chengzeyi commited on
Commit
91865ac
·
verified ·
1 Parent(s): 42d7619

FP8 e4m3 per-channel quantized DiT for LongCat-Video-Avatar-1.5

Browse files
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "LongCatVideoAvatarTransformer3DModel",
3
+ "architectures": [
4
+ "LongCatVideoAvatarTransformer3DModel"
5
+ ],
6
+ "_diffusers_version": "0.32.0",
7
+ "in_channels": 16,
8
+ "out_channels": 16,
9
+ "hidden_size": 4096,
10
+ "depth": 48,
11
+ "num_heads": 32,
12
+ "caption_channels": 4096,
13
+ "model_max_length": 512,
14
+ "mlp_ratio": 4,
15
+ "adaln_tembed_dim": 512,
16
+ "frequency_embedding_size": 256,
17
+ "patch_size": [
18
+ 1,
19
+ 2,
20
+ 2
21
+ ],
22
+ "enable_flashattn3": false,
23
+ "enable_flashattn2": true,
24
+ "enable_xformers": false,
25
+ "enable_bsa": false,
26
+ "bsa_params": null,
27
+ "cp_split_hw": null,
28
+ "text_tokens_zero_pad": true,
29
+ "audio_window": 5,
30
+ "audio_block": 5,
31
+ "audio_channel": 1280,
32
+ "intermediate_dim": 512,
33
+ "output_dim": 768,
34
+ "context_tokens": 32,
35
+ "vae_scale": 4,
36
+ "audio_prenorm": false,
37
+ "class_range": 24,
38
+ "class_interval": 4
39
+ }
quantization_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "quantization_method": "fp8_e4m3_per_channel_dynamic_act_rowwise",
3
+ "weight_dtype": "float8_e4m3fn",
4
+ "weight_scale_shape": "(out_features,)",
5
+ "weight_scale_dtype": "float32",
6
+ "activation_dtype": "float8_e4m3fn",
7
+ "activation_scale": "dynamic_per_row",
8
+ "skip_patterns": [
9
+ "final_layer.linear"
10
+ ],
11
+ "compute_dtype": "bfloat16",
12
+ "description": "Per-output-channel symmetric FP8 (e4m3) weight quantization with dynamic per-row activation quantization. Matmul via torch._scaled_mm (RowWise) on Hopper. Layers matching `skip_patterns` are kept in bfloat16."
13
+ }
quantized_model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fd104833f2bcac6536f26cb7af7a36a75482281111dc316906ac96fa2d7bd48
3
+ size 4264635792
quantized_model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba930a8361f95ef929a266c1eb5b55b97e744c8556ff660b6147a82759b7ebb1
3
+ size 4275232472
quantized_model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f85e225bce212b7dd22e62a4bce8aa023c46ad3882c26db21c5db59791a22406
3
+ size 4275232472
quantized_model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8802746465cf583a94040a3f4f02173c14036870c01317ec8b78655cd555cf45
3
+ size 3065282200
quantized_model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff