| { | |
| "quantization_method": "fp8_e4m3_per_channel_dynamic_act_rowwise", | |
| "weight_dtype": "float8_e4m3fn", | |
| "weight_scale_shape": "(out_features,)", | |
| "weight_scale_dtype": "float32", | |
| "activation_dtype": "float8_e4m3fn", | |
| "activation_scale": "dynamic_per_row", | |
| "skip_patterns": [ | |
| "final_layer.linear" | |
| ], | |
| "compute_dtype": "bfloat16", | |
| "description": "Per-output-channel symmetric FP8 (e4m3) weight quantization with dynamic per-row activation quantization. Matmul via torch._scaled_mm (RowWise) on Hopper. Layers matching `skip_patterns` are kept in bfloat16." | |
| } |