| { |
| "_class_name": "AutoencoderKLPyraTok", |
| "_diffusers_version": "0.37.0.dev0", |
| "_name_or_path": "/data/onkar/PyraTok/vae", |
| "attn_scales": [], |
| "base_dim": 96, |
| "decoder_base_dim": null, |
| "dim_mult": [ |
| 1, |
| 2, |
| 4, |
| 4 |
| ], |
| "dropout": 0.0, |
| "in_channels": 3, |
| "is_residual": false, |
| "lapq_codebook_dim": 16, |
| "lapq_commitment_weight": 0.5, |
| "lapq_entropy_weight": 0.5, |
| "lapq_inv_temperature": 100.0, |
| "lapq_num_codes": 65536, |
| "lapq_num_quantizers": 4, |
| "lapq_quantize_dropout": false, |
| "lapq_quantize_dropout_cutoff_index": 0, |
| "lapq_quantize_dropout_multiple_of": 1, |
| "lapq_text_condition_heads": 2, |
| "lapq_text_condition_scale": 0.7, |
| "lapq_text_embed_dim": 256, |
| "lapq_text_input_dim": 2560, |
| "lapq_text_mlp_hidden_dim": 1024, |
| "latents_mean": [ |
| -0.7571, |
| -0.7089, |
| -0.9113, |
| 0.1075, |
| -0.1745, |
| 0.9653, |
| -0.1517, |
| 1.5508, |
| 0.4134, |
| -0.0715, |
| 0.5517, |
| -0.3632, |
| -0.1922, |
| -0.9497, |
| 0.2503, |
| -0.2921 |
| ], |
| "latents_std": [ |
| 2.8184, |
| 1.4541, |
| 2.3275, |
| 2.6558, |
| 1.2196, |
| 1.7708, |
| 2.6052, |
| 2.0743, |
| 3.2687, |
| 2.1526, |
| 2.8652, |
| 1.5579, |
| 1.6382, |
| 1.1253, |
| 2.8251, |
| 1.916 |
| ], |
| "num_res_blocks": 2, |
| "out_channels": 3, |
| "patch_size": null, |
| "scale_factor_spatial": 8, |
| "scale_factor_temporal": 4, |
| "temperal_downsample": [ |
| false, |
| true, |
| true |
| ], |
| "use_lapq_quantizer": true, |
| "z_dim": 16 |
| } |
|
|