FP8 e4m3 per-channel quantized DiT for LongCat-Video-Avatar-1.5

Files changed (7) hide show

config.json ADDED Viewed

+{
+  "_class_name": "LongCatVideoAvatarTransformer3DModel",
+  "architectures": [
+    "LongCatVideoAvatarTransformer3DModel"
+  ],
+  "_diffusers_version": "0.32.0",
+  "in_channels": 16,
+  "out_channels": 16,
+  "hidden_size": 4096,
+  "depth": 48,
+  "num_heads": 32,
+  "caption_channels": 4096,
+  "model_max_length": 512,
+  "mlp_ratio": 4,
+  "adaln_tembed_dim": 512,
+  "frequency_embedding_size": 256,
+  "patch_size": [
+    1,
+    2,
+    2
+  ],
+  "enable_flashattn3": false,
+  "enable_flashattn2": true,
+  "enable_xformers": false,
+  "enable_bsa": false,
+  "bsa_params": null,
+  "cp_split_hw": null,
+  "text_tokens_zero_pad": true,
+  "audio_window": 5,
+  "audio_block": 5,
+  "audio_channel": 1280,
+  "intermediate_dim": 512,
+  "output_dim": 768,
+  "context_tokens": 32,
+  "vae_scale": 4,
+  "audio_prenorm": false,
+  "class_range": 24,
+  "class_interval": 4
+}

quantization_config.json ADDED Viewed

+{
+  "quantization_method": "fp8_e4m3_per_channel_dynamic_act_rowwise",
+  "weight_dtype": "float8_e4m3fn",
+  "weight_scale_shape": "(out_features,)",
+  "weight_scale_dtype": "float32",
+  "activation_dtype": "float8_e4m3fn",
+  "activation_scale": "dynamic_per_row",
+  "skip_patterns": [
+    "final_layer.linear"
+  ],
+  "compute_dtype": "bfloat16",
+  "description": "Per-output-channel symmetric FP8 (e4m3) weight quantization with dynamic per-row activation quantization. Matmul via torch._scaled_mm (RowWise) on Hopper. Layers matching `skip_patterns` are kept in bfloat16."
+}

quantized_model-00001-of-00004.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fd104833f2bcac6536f26cb7af7a36a75482281111dc316906ac96fa2d7bd48
+size 4264635792

quantized_model-00002-of-00004.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba930a8361f95ef929a266c1eb5b55b97e744c8556ff660b6147a82759b7ebb1
+size 4275232472

quantized_model-00003-of-00004.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f85e225bce212b7dd22e62a4bce8aa023c46ad3882c26db21c5db59791a22406
+size 4275232472

quantized_model-00004-of-00004.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8802746465cf583a94040a3f4f02173c14036870c01317ec8b78655cd555cf45
+size 3065282200

quantized_model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff