deepseek-v4-mini-1B-from-flash / model.safetensors.index.json
kshitijthakkar's picture
Partial slice from deepseek-ai/DeepSeek-V4-Flash (16/27 shards)
0073e98 verified
{
"metadata": {
"total_parameters": 1021129744,
"total_size": 2045362944
},
"weight_map": {
"embed.weight": "model-00001-of-00002.safetensors",
"hc_head_base": "model-00001-of-00002.safetensors",
"hc_head_fn": "model-00001-of-00002.safetensors",
"hc_head_scale": "model-00001-of-00002.safetensors",
"head.weight": "model-00001-of-00002.safetensors",
"layers.0.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.0.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.0.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.0.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.0.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.0.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.0.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.0.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.0.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.gate.tid2eid": "model-00001-of-00002.safetensors",
"layers.0.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.0.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.0.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.0.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.0.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.0.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.0.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.0.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.1.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.1.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.1.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.1.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.1.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.1.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.1.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.1.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.1.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.gate.tid2eid": "model-00001-of-00002.safetensors",
"layers.1.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.1.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.1.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.1.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.1.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.1.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.1.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.1.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.10.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.10.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.10.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.10.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.10.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.10.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.10.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.10.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.10.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.10.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.10.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.10.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.10.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.10.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.11.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.11.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.11.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.11.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.11.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.11.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.11.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.11.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.11.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.11.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.11.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.11.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.11.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.11.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.11.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.11.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.11.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.11.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.11.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.11.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.11.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.12.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.12.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.12.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.12.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.12.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.12.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.12.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.12.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.12.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.12.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.12.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.12.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.12.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.12.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.13.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.13.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.13.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.13.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.13.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.13.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.13.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.13.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.13.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.13.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.13.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.13.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.13.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.13.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.13.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.13.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.13.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.13.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.13.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.13.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.13.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.14.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.14.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.14.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.14.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.14.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.14.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.14.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.14.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.14.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.14.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.14.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.14.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.14.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.14.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.15.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.15.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.15.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.15.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.15.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.15.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.15.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.15.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.15.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.15.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.15.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.15.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.15.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.15.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.15.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.15.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.15.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.15.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.15.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.15.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.15.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.16.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.16.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.16.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.16.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.16.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.16.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.16.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.16.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.16.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.16.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.16.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.16.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.16.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.16.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.17.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.17.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.17.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.17.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.17.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.17.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.17.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.17.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.17.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.17.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.17.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.17.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.17.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.17.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.17.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.17.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.17.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.17.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.17.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.17.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.17.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.18.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.18.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.18.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.18.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.18.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.18.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.18.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.18.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.18.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.18.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.18.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.18.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.18.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.18.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.19.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.19.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.19.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.19.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.19.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.19.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.19.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.19.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.19.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.19.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.19.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.19.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.19.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.19.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.19.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.19.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.19.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.19.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.19.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.19.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.19.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.2.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.2.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.2.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.2.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.2.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.2.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.2.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.2.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.2.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.2.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.2.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.2.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.2.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.2.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.20.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.20.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.20.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.20.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.20.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.20.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.20.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.20.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.20.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.20.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.20.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.20.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.20.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.20.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.21.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.21.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.21.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.21.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.21.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.21.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.21.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.21.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.21.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.21.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.21.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.21.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.21.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.21.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.21.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.21.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.21.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.21.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.21.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.21.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.21.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.22.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.22.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.22.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.22.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.22.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.22.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.22.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.22.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.22.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.22.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.22.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.22.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.22.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.22.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.23.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.23.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.23.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.23.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.23.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.23.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.23.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.23.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.23.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.23.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.23.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.23.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.23.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.23.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.23.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.23.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.23.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.3.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.3.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.3.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.3.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.3.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.3.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.3.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.3.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.3.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.3.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.3.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.3.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.3.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.3.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.3.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.3.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.3.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.3.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.3.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.3.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.3.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.4.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.4.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.4.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.4.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.4.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.4.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.4.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.4.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.4.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.4.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.4.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.4.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.4.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.4.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.5.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.5.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.5.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.5.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.5.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.5.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.5.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.5.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.5.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.5.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.5.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.5.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.5.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.5.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.5.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.5.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.5.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.5.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.5.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.5.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.5.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.6.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.6.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.6.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.6.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.6.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.6.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.6.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.6.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.6.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.6.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.6.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.6.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.6.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.6.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.7.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.7.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.7.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.7.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.7.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.7.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.7.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.7.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.7.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.7.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.7.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.7.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.7.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.7.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.7.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.7.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.7.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.7.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.7.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.7.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.7.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.8.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.8.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.8.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.indexer.compressor.ape": "model-00001-of-00002.safetensors",
"layers.8.attn.indexer.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.indexer.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.indexer.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.indexer.weights_proj.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.indexer.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.8.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.8.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.8.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.8.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.8.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.8.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.8.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.8.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.8.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.8.hc_ffn_scale": "model-00001-of-00002.safetensors",
"layers.9.attn.attn_sink": "model-00001-of-00002.safetensors",
"layers.9.attn.compressor.ape": "model-00001-of-00002.safetensors",
"layers.9.attn.compressor.norm.weight": "model-00001-of-00002.safetensors",
"layers.9.attn.compressor.wgate.weight": "model-00001-of-00002.safetensors",
"layers.9.attn.compressor.wkv.weight": "model-00001-of-00002.safetensors",
"layers.9.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"layers.9.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"layers.9.attn.wkv.weight": "model-00001-of-00002.safetensors",
"layers.9.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"layers.9.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"layers.9.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"layers.9.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"layers.9.attn_norm.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.10.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.10.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.10.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.11.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.11.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.11.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.12.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.12.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.12.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.13.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.13.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.13.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.14.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.14.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.14.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.15.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.15.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.15.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.2.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.2.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.3.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.3.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.3.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.4.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.4.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.4.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.5.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.5.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.5.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.6.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.6.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.6.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.7.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.7.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.7.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.8.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.8.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.8.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.9.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.9.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.experts.9.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.gate.bias": "model-00001-of-00002.safetensors",
"layers.9.ffn.gate.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.shared_experts.w1.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.shared_experts.w2.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn.shared_experts.w3.weight": "model-00001-of-00002.safetensors",
"layers.9.ffn_norm.weight": "model-00001-of-00002.safetensors",
"layers.9.hc_attn_base": "model-00001-of-00002.safetensors",
"layers.9.hc_attn_fn": "model-00001-of-00002.safetensors",
"layers.9.hc_attn_scale": "model-00001-of-00002.safetensors",
"layers.9.hc_ffn_base": "model-00001-of-00002.safetensors",
"layers.9.hc_ffn_fn": "model-00001-of-00002.safetensors",
"layers.9.hc_ffn_scale": "model-00001-of-00002.safetensors",
"mtp.0.attn.attn_sink": "model-00001-of-00002.safetensors",
"mtp.0.attn.kv_norm.weight": "model-00001-of-00002.safetensors",
"mtp.0.attn.q_norm.weight": "model-00001-of-00002.safetensors",
"mtp.0.attn.wkv.weight": "model-00001-of-00002.safetensors",
"mtp.0.attn.wo_a.weight": "model-00001-of-00002.safetensors",
"mtp.0.attn.wo_b.weight": "model-00001-of-00002.safetensors",
"mtp.0.attn.wq_a.weight": "model-00001-of-00002.safetensors",
"mtp.0.attn.wq_b.weight": "model-00001-of-00002.safetensors",
"mtp.0.attn_norm.weight": "model-00001-of-00002.safetensors",
"mtp.0.e_proj.weight": "model-00001-of-00002.safetensors",
"mtp.0.enorm.weight": "model-00001-of-00002.safetensors",
"mtp.0.ffn.experts.0.w1.weight": "model-00001-of-00002.safetensors",
"mtp.0.ffn.experts.0.w2.weight": "model-00001-of-00002.safetensors",
"mtp.0.ffn.experts.0.w3.weight": "model-00001-of-00002.safetensors",
"mtp.0.ffn.experts.1.w1.weight": "model-00001-of-00002.safetensors",
"mtp.0.ffn.experts.1.w2.weight": "model-00001-of-00002.safetensors",
"mtp.0.ffn.experts.1.w3.weight": "model-00001-of-00002.safetensors",
"mtp.0.ffn.experts.10.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.10.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.10.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.11.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.11.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.11.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.12.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.12.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.12.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.13.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.13.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.13.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.14.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.14.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.14.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.15.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.15.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.15.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.2.w1.weight": "model-00001-of-00002.safetensors",
"mtp.0.ffn.experts.2.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.2.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.3.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.3.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.3.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.4.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.4.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.4.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.5.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.5.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.5.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.6.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.6.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.6.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.7.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.7.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.7.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.8.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.8.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.8.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.9.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.9.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.experts.9.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.gate.bias": "model-00001-of-00002.safetensors",
"mtp.0.ffn.gate.weight": "model-00001-of-00002.safetensors",
"mtp.0.ffn.shared_experts.w1.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.shared_experts.w2.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn.shared_experts.w3.weight": "model-00002-of-00002.safetensors",
"mtp.0.ffn_norm.weight": "model-00001-of-00002.safetensors",
"mtp.0.h_proj.weight": "model-00001-of-00002.safetensors",
"mtp.0.hc_attn_base": "model-00001-of-00002.safetensors",
"mtp.0.hc_attn_fn": "model-00001-of-00002.safetensors",
"mtp.0.hc_attn_scale": "model-00001-of-00002.safetensors",
"mtp.0.hc_ffn_base": "model-00001-of-00002.safetensors",
"mtp.0.hc_ffn_fn": "model-00001-of-00002.safetensors",
"mtp.0.hc_ffn_scale": "model-00001-of-00002.safetensors",
"mtp.0.hc_head_base": "model-00001-of-00002.safetensors",
"mtp.0.hc_head_fn": "model-00001-of-00002.safetensors",
"mtp.0.hc_head_scale": "model-00001-of-00002.safetensors",
"mtp.0.hnorm.weight": "model-00001-of-00002.safetensors",
"mtp.0.norm.weight": "model-00002-of-00002.safetensors",
"norm.weight": "model-00001-of-00002.safetensors"
}
}