gemma-7b-it-q4f16_1-MLC / ndarray-cache.json
nico-martin's picture
nico-martin HF Staff
Upload 107 files
541a0fb verified
{
"metadata": {
"ParamSize": 283,
"ParamBytes": 4802697216.0,
"BitsPerParam": 4.500235859834132
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 393216000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
256000,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216000,
"byteOffset": 0
}
],
"md5sum": "06b94265b52b0f6fe6980cb4502afdd7"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 49152000,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
256000,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152000,
"byteOffset": 0
}
],
"md5sum": "67ce689d7e81e4d7e847a5fb1dd51983"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "df8ec890cea72af238733d91f83b5104"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "b3cdfcb2cf6aa9b57ac4cab68265e326"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33042432,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6144
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4724736
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14161920
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14168064
}
],
"md5sum": "1f2f9b6656ebc5aa6ff7f6df7c7aaeee"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "7b46bd8ffb887730f75db95b70a7d79b"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "b27cc5d12c92cec500b4e488096fa8bf"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d89f6377fcb29151727b142b424bfd51"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "429d93efc23c4467badab9ebfeb1ea2f"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "20a483218b2210c470fa7408ac77b573"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "137a0bf7d99de236597cb8aefc56832f"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "bdca8e26c8538f30e0f1bd03ad86adf4"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "4f36d761ce99c2091d01b1d6879ecb88"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "9566754efa36ab54f8e458a505a33fde"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "cd29237688afcff04cd477b9fc1a47fb"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "a53004af032aefd3da4a77c8a9ef156b"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "00301a9fbb7c5d390b4bb91a2e42b7c5"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "9e6b4592bc98d488b460245abed7d33d"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "9ef0c7def8e0578efc5d9fba0115e872"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "6de884afeef8c32b42d9b4943b7d04a5"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "c83c884c5aec7e6abc542e548c10c18e"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "1af8972728fb68280a98a344ae67b44b"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8f8a54b887a979e2944266059eaed5ce"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33042432,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
}
],
"md5sum": "86bb90e430a0f8502ec11903c18b8e58"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "9e2813486aacf750bebbe9599000c46c"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "0ad30b9e986127b360345ef71c71a0ff"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "3fc9897eadd20b9fd28f3101c9c38690"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "d14cbbe88fafe2895dc11f656f540589"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "e01a3a01df91b5656ec3c97d7b1fe0ae"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "9e45fb19bf6906f6ce7e5034aa3d47b1"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "7d55df9ca9ae0688481887e800302c2c"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "7a0e438511412fd9aac048eafab2acca"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "8f7b6716cd10b6f1d8b4fc70bc3bea74"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "9349a69c10aee16bddf49f709128da45"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "fe042ca2207681e774efb995b7792447"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "aa667bd8798e29c7854833227573915a"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "699c68d6ae326f097cabba21bd590482"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "429efe6f1b09c99c6a036401eb83e69a"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "2ee3e062eaf71983b18d8f916ea3b574"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "3a4e1b1cf4a3c5894ee8a4c805121edd"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "cf374eea0f1518c25b2f3746b8fce2ee"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bb876400b45fd76256805d3836c728bb"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33042432,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
}
],
"md5sum": "3063b34b94b4f908a03e0279182ad512"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "3df47bfa6929562e91a58b437f8c8040"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "407999a1431cc84017d3bfdb4b159416"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "aecbf76a60bb2822def686e7fe2ed08a"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "1418deb5b8065df5830b82f861debaed"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 28329984,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23605248
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23611392
}
],
"md5sum": "a5597cb0c99122fa8b9f28030483e37a"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 30676992,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 9443328
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 28317696
}
],
"md5sum": "73a6e9b2b98f97e9059aed8d68751fb5"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "4f105b5c499d93b5bee7c762e777bce0"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "9fb44b2b3f783c403f3f93d22a4ee987"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2253edb249c3daded6940b44ed0eea06"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "8c16b59f2ce3420043eca980b0c7d39d"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 30689280,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7077888
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 7084032
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11802624
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21239808
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21245952
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 23605248
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 29896704
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 30683136
}
],
"md5sum": "acd2996dcab7877e29097419dc68a90d"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "cde2174db83cd3c80c4af4f83e189be1"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "05c39c455a662fd9605b6cd925355c5c"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "5eb22e61f0fd00b16f3b53fb2858e332"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "90113638e40ecb25f9dffe96f1305399"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "5ef10d085d58e82b356624e4a0d6781c"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "8733aa69d27b4953a7def4016bd847f5"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 28329984,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23605248
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23611392
}
],
"md5sum": "09adf7eb35ae1711374510500c20bc3a"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 30676992,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 9443328
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 28317696
}
],
"md5sum": "76eb0bcb80edcb3feed8cca82195c55f"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "b57fdefd812b119363029ebad499af95"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "82872d8a967d23b9ee41bb65ab5beb76"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "3a9f69d5b2aee5dd0139fdd34783681b"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "3ae70a4f15b63ed943f4074b116a9c80"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 30689280,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7077888
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 7084032
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11802624
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21239808
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21245952
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 23605248
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 29896704
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 30683136
}
],
"md5sum": "025a1219d56570c131e565db374ebb38"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "c938a67eeab25b46936567ed6176ffbd"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "3eee0d53fbaf690e8dbdc5b3caed32a4"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "1955607c1058a9f379a24489ded24c92"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "256c9f75680300aeb674d4b9f7a48de5"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "c28b44ac7b6bfd49b589f263a2adb6af"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "2957c38693ac735feda0d92e32736101"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "09d9d29776c00a0564551dfb7b8b0861"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "b94f6c703f66d622c3f9350909242d76"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "cd2ac446400b2e1e8cbec1487f26ae03"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "d3331c59e56498296d39d9a87b4cfce9"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "7ed5a11a6b8f9167d2110ade3b1b5b9c"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "6684882dfdb595d29909c82001479854"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "4fd05e3e25d800b74cfae33e3eeaee30"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "34fab42682a9133f3ccd481f8eaa0fe2"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "9ccbdcc8352ba2725b89882378d783c6"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "c306740b10f30ee750399b3958836212"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "87cf46088256c3632dce087b2134dc05"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "76acd6880c43866f0f83816c64cf3c4d"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8295111267c6d8bd5f22642fec5f041d"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "3632b4dc3110b4a631ee473a1e477a72"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 33042432,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
}
],
"md5sum": "c948d95674a8eafb0b988d68e387028d"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "d1f229dfd00657d995643f960af2f521"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "97fbef787cc566c812a2b836dda4e239"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "b9c03f7580595a65997c79c2d4c3a52c"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "0e9f20735bde6f1f770053b07a5a778c"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 28329984,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23605248
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23611392
}
],
"md5sum": "e7567c448cd751e5bf871f34da8b6a5d"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 30676992,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 9443328
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 28317696
}
],
"md5sum": "5247b70ee4b83472f58daa3888ea2de4"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "e404de8d90d6103e1e19a2fa02167aed"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "047fe71f862b7be8206e9718b59117a9"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "5e300878b37fa017e8c5485f4750a9a9"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "69fdfd00aac5585aba2a761548a8d028"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 30689280,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7077888
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 7084032
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11802624
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21239808
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21245952
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 23605248
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 29896704
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 30683136
}
],
"md5sum": "f7850b7c89b76188c3f2877050382bb3"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "d690359b1bc65282c3ea266d67e42710"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "100af05669087ea0f2e600d6d0ad34ac"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 9443328,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
}
],
"md5sum": "73506dca8d5f38ba981d584d956db13d"
}
]
}