{ "metadata": { "ParamSize": 283, "ParamBytes": 4802697216.0, "BitsPerParam": 4.500235859834132 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 393216000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 256000, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 393216000, "byteOffset": 0 } ], "md5sum": "06b94265b52b0f6fe6980cb4502afdd7" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 49152000, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 256000, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152000, "byteOffset": 0 } ], "md5sum": "67ce689d7e81e4d7e847a5fb1dd51983" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "df8ec890cea72af238733d91f83b5104" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b3cdfcb2cf6aa9b57ac4cab68265e326" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33042432, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6144 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4724736 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14161920 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14168064 } ], "md5sum": "1f2f9b6656ebc5aa6ff7f6df7c7aaeee" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "7b46bd8ffb887730f75db95b70a7d79b" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b27cc5d12c92cec500b4e488096fa8bf" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d89f6377fcb29151727b142b424bfd51" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "429d93efc23c4467badab9ebfeb1ea2f" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "20a483218b2210c470fa7408ac77b573" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "137a0bf7d99de236597cb8aefc56832f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "bdca8e26c8538f30e0f1bd03ad86adf4" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "4f36d761ce99c2091d01b1d6879ecb88" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9566754efa36ab54f8e458a505a33fde" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cd29237688afcff04cd477b9fc1a47fb" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "a53004af032aefd3da4a77c8a9ef156b" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "00301a9fbb7c5d390b4bb91a2e42b7c5" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9e6b4592bc98d488b460245abed7d33d" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "9ef0c7def8e0578efc5d9fba0115e872" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "6de884afeef8c32b42d9b4943b7d04a5" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c83c884c5aec7e6abc542e548c10c18e" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1af8972728fb68280a98a344ae67b44b" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8f8a54b887a979e2944266059eaed5ce" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33042432, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 } ], "md5sum": "86bb90e430a0f8502ec11903c18b8e58" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "9e2813486aacf750bebbe9599000c46c" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0ad30b9e986127b360345ef71c71a0ff" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3fc9897eadd20b9fd28f3101c9c38690" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "d14cbbe88fafe2895dc11f656f540589" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "e01a3a01df91b5656ec3c97d7b1fe0ae" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9e45fb19bf6906f6ce7e5034aa3d47b1" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "7d55df9ca9ae0688481887e800302c2c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "7a0e438511412fd9aac048eafab2acca" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8f7b6716cd10b6f1d8b4fc70bc3bea74" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9349a69c10aee16bddf49f709128da45" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "fe042ca2207681e774efb995b7792447" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "aa667bd8798e29c7854833227573915a" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "699c68d6ae326f097cabba21bd590482" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "429efe6f1b09c99c6a036401eb83e69a" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "2ee3e062eaf71983b18d8f916ea3b574" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3a4e1b1cf4a3c5894ee8a4c805121edd" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cf374eea0f1518c25b2f3746b8fce2ee" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bb876400b45fd76256805d3836c728bb" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33042432, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 } ], "md5sum": "3063b34b94b4f908a03e0279182ad512" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "3df47bfa6929562e91a58b437f8c8040" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "407999a1431cc84017d3bfdb4b159416" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "aecbf76a60bb2822def686e7fe2ed08a" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1418deb5b8065df5830b82f861debaed" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 28329984, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23605248 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 23611392 } ], "md5sum": "a5597cb0c99122fa8b9f28030483e37a" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 30676992, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 9443328 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 28317696 } ], "md5sum": "73a6e9b2b98f97e9059aed8d68751fb5" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "4f105b5c499d93b5bee7c762e777bce0" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9fb44b2b3f783c403f3f93d22a4ee987" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2253edb249c3daded6940b44ed0eea06" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "8c16b59f2ce3420043eca980b0c7d39d" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 30689280, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 7077888 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 7084032 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11802624 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21239808 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21245952 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 23605248 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 29896704 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30683136 } ], "md5sum": "acd2996dcab7877e29097419dc68a90d" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cde2174db83cd3c80c4af4f83e189be1" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "05c39c455a662fd9605b6cd925355c5c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "5eb22e61f0fd00b16f3b53fb2858e332" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "90113638e40ecb25f9dffe96f1305399" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "5ef10d085d58e82b356624e4a0d6781c" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8733aa69d27b4953a7def4016bd847f5" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 28329984, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23605248 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 23611392 } ], "md5sum": "09adf7eb35ae1711374510500c20bc3a" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 30676992, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 9443328 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 28317696 } ], "md5sum": "76eb0bcb80edcb3feed8cca82195c55f" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "b57fdefd812b119363029ebad499af95" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "82872d8a967d23b9ee41bb65ab5beb76" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3a9f69d5b2aee5dd0139fdd34783681b" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "3ae70a4f15b63ed943f4074b116a9c80" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 30689280, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 7077888 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 7084032 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11802624 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21239808 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21245952 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 23605248 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 29896704 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30683136 } ], "md5sum": "025a1219d56570c131e565db374ebb38" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c938a67eeab25b46936567ed6176ffbd" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "3eee0d53fbaf690e8dbdc5b3caed32a4" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "1955607c1058a9f379a24489ded24c92" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "256c9f75680300aeb674d4b9f7a48de5" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c28b44ac7b6bfd49b589f263a2adb6af" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "2957c38693ac735feda0d92e32736101" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "09d9d29776c00a0564551dfb7b8b0861" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b94f6c703f66d622c3f9350909242d76" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "cd2ac446400b2e1e8cbec1487f26ae03" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "d3331c59e56498296d39d9a87b4cfce9" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7ed5a11a6b8f9167d2110ade3b1b5b9c" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6684882dfdb595d29909c82001479854" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "4fd05e3e25d800b74cfae33e3eeaee30" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33048576, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33042432 } ], "md5sum": "34fab42682a9133f3ccd481f8eaa0fe2" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9ccbdcc8352ba2725b89882378d783c6" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "c306740b10f30ee750399b3958836212" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "87cf46088256c3632dce087b2134dc05" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "76acd6880c43866f0f83816c64cf3c4d" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8295111267c6d8bd5f22642fec5f041d" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3632b4dc3110b4a631ee473a1e477a72" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33042432, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23605248 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25964544 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 32256000 } ], "md5sum": "c948d95674a8eafb0b988d68e387028d" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "d1f229dfd00657d995643f960af2f521" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "97fbef787cc566c812a2b836dda4e239" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "b9c03f7580595a65997c79c2d4c3a52c" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0e9f20735bde6f1f770053b07a5a778c" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 28329984, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9443328 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 14161920 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23599104 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23605248 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 23611392 } ], "md5sum": "e7567c448cd751e5bf871f34da8b6a5d" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 30676992, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 9443328 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 28317696 } ], "md5sum": "5247b70ee4b83472f58daa3888ea2de4" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "e404de8d90d6103e1e19a2fa02167aed" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "047fe71f862b7be8206e9718b59117a9" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5e300878b37fa017e8c5485f4750a9a9" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3072, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "69fdfd00aac5585aba2a761548a8d028" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 30689280, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 6291456 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 7077888 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 7084032 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11802624 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21239808 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21245952 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 23605248 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 29896704 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30683136 } ], "md5sum": "f7850b7c89b76188c3f2877050382bb3" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 49152, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d690359b1bc65282c3ea266d67e42710" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 33036288, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 49152, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 4718592 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14155776 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 14161920 } ], "md5sum": "100af05669087ea0f2e600d6d0ad34ac" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 9443328, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2359296 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 8650752 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9437184 } ], "md5sum": "73506dca8d5f38ba981d584d956db13d" } ] }