| { |
| "measurement": { |
| "model.layers.0": { |
| "accuracy": 0.931088128243573, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.1": { |
| "accuracy": 0.9676548447459936, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.2": { |
| "accuracy": 0.9660987094976008, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.3": { |
| "accuracy": 0.9645824813051149, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.4": { |
| "accuracy": 0.9636156821507029, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.5": { |
| "accuracy": 0.9642364114988595, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.6": { |
| "accuracy": 0.9561966508626938, |
| "total_bits": 754581504.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.7": { |
| "accuracy": 0.9612123254919425, |
| "total_bits": 649789440.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.8": { |
| "accuracy": 0.9671264597272966, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.9": { |
| "accuracy": 0.9623621638165787, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.10": { |
| "accuracy": 0.9592596785514615, |
| "total_bits": 585302016.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.11": { |
| "accuracy": 0.9603466420667246, |
| "total_bits": 585302016.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.12": { |
| "accuracy": 0.9643587092868984, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.13": { |
| "accuracy": 0.9577175587182865, |
| "total_bits": 585302016.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.14": { |
| "accuracy": 0.9630259958212264, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.15": { |
| "accuracy": 0.9582303760689683, |
| "total_bits": 585302016.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.16": { |
| "accuracy": 0.9499164640437812, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.17": { |
| "accuracy": 0.9687414867221378, |
| "total_bits": 585302016.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.18": { |
| "accuracy": 0.9692069391603582, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.19": { |
| "accuracy": 0.9578957456978969, |
| "total_bits": 585302016.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.20": { |
| "accuracy": 0.9595417078817263, |
| "total_bits": 585302016.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.21": { |
| "accuracy": 0.9578904592199251, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.22": { |
| "accuracy": 0.9615095227491111, |
| "total_bits": 682033152.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.23": { |
| "accuracy": 0.958842396910768, |
| "total_bits": 682033152.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.24": { |
| "accuracy": 0.9785620045149699, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.25": { |
| "accuracy": 0.9803282860666513, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.26": { |
| "accuracy": 0.9577851566718891, |
| "total_bits": 682033152.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.27": { |
| "accuracy": 0.9793906190898269, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.28": { |
| "accuracy": 0.977548697614111, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.29": { |
| "accuracy": 0.9786167360143736, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.30": { |
| "accuracy": 0.9765483211958781, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.31": { |
| "accuracy": 0.9776295093470253, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.32": { |
| "accuracy": 0.9769191510858946, |
| "total_bits": 786825216.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.33": { |
| "accuracy": 0.9609942462993786, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.34": { |
| "accuracy": 0.9950092360377312, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.35": { |
| "accuracy": 0.9938810579478741, |
| "total_bits": 593362944.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "2": 64 |
| }, |
| "bits": [ |
| 2 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| } |
| } |
| } |