LongCat-Video-Avatar-1.5-e4m3 / quantization_config.json
chengzeyi's picture
FP8 e4m3 per-channel quantized DiT for LongCat-Video-Avatar-1.5
91865ac verified
{
"quantization_method": "fp8_e4m3_per_channel_dynamic_act_rowwise",
"weight_dtype": "float8_e4m3fn",
"weight_scale_shape": "(out_features,)",
"weight_scale_dtype": "float32",
"activation_dtype": "float8_e4m3fn",
"activation_scale": "dynamic_per_row",
"skip_patterns": [
"final_layer.linear"
],
"compute_dtype": "bfloat16",
"description": "Per-output-channel symmetric FP8 (e4m3) weight quantization with dynamic per-row activation quantization. Matmul via torch._scaled_mm (RowWise) on Hopper. Layers matching `skip_patterns` are kept in bfloat16."
}