FP8 e4m3 per-channel quantized DiT for LongCat-Video-Avatar-1.5
Browse files
config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "LongCatVideoAvatarTransformer3DModel",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"LongCatVideoAvatarTransformer3DModel"
|
| 5 |
+
],
|
| 6 |
+
"_diffusers_version": "0.32.0",
|
| 7 |
+
"in_channels": 16,
|
| 8 |
+
"out_channels": 16,
|
| 9 |
+
"hidden_size": 4096,
|
| 10 |
+
"depth": 48,
|
| 11 |
+
"num_heads": 32,
|
| 12 |
+
"caption_channels": 4096,
|
| 13 |
+
"model_max_length": 512,
|
| 14 |
+
"mlp_ratio": 4,
|
| 15 |
+
"adaln_tembed_dim": 512,
|
| 16 |
+
"frequency_embedding_size": 256,
|
| 17 |
+
"patch_size": [
|
| 18 |
+
1,
|
| 19 |
+
2,
|
| 20 |
+
2
|
| 21 |
+
],
|
| 22 |
+
"enable_flashattn3": false,
|
| 23 |
+
"enable_flashattn2": true,
|
| 24 |
+
"enable_xformers": false,
|
| 25 |
+
"enable_bsa": false,
|
| 26 |
+
"bsa_params": null,
|
| 27 |
+
"cp_split_hw": null,
|
| 28 |
+
"text_tokens_zero_pad": true,
|
| 29 |
+
"audio_window": 5,
|
| 30 |
+
"audio_block": 5,
|
| 31 |
+
"audio_channel": 1280,
|
| 32 |
+
"intermediate_dim": 512,
|
| 33 |
+
"output_dim": 768,
|
| 34 |
+
"context_tokens": 32,
|
| 35 |
+
"vae_scale": 4,
|
| 36 |
+
"audio_prenorm": false,
|
| 37 |
+
"class_range": 24,
|
| 38 |
+
"class_interval": 4
|
| 39 |
+
}
|
quantization_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"quantization_method": "fp8_e4m3_per_channel_dynamic_act_rowwise",
|
| 3 |
+
"weight_dtype": "float8_e4m3fn",
|
| 4 |
+
"weight_scale_shape": "(out_features,)",
|
| 5 |
+
"weight_scale_dtype": "float32",
|
| 6 |
+
"activation_dtype": "float8_e4m3fn",
|
| 7 |
+
"activation_scale": "dynamic_per_row",
|
| 8 |
+
"skip_patterns": [
|
| 9 |
+
"final_layer.linear"
|
| 10 |
+
],
|
| 11 |
+
"compute_dtype": "bfloat16",
|
| 12 |
+
"description": "Per-output-channel symmetric FP8 (e4m3) weight quantization with dynamic per-row activation quantization. Matmul via torch._scaled_mm (RowWise) on Hopper. Layers matching `skip_patterns` are kept in bfloat16."
|
| 13 |
+
}
|
quantized_model-00001-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fd104833f2bcac6536f26cb7af7a36a75482281111dc316906ac96fa2d7bd48
|
| 3 |
+
size 4264635792
|
quantized_model-00002-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba930a8361f95ef929a266c1eb5b55b97e744c8556ff660b6147a82759b7ebb1
|
| 3 |
+
size 4275232472
|
quantized_model-00003-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f85e225bce212b7dd22e62a4bce8aa023c46ad3882c26db21c5db59791a22406
|
| 3 |
+
size 4275232472
|
quantized_model-00004-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8802746465cf583a94040a3f4f02173c14036870c01317ec8b78655cd555cf45
|
| 3 |
+
size 3065282200
|
quantized_model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|