qwen3_8b_stage1 / trainer_state.json
shanghong's picture
Upload folder using huggingface_hub
a6ee427 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.9302325581395348,
"eval_steps": 50,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.10416666666666663,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 989.0,
"completions/mean_length": 580.8333740234375,
"completions/mean_terminated_length": 529.3023071289062,
"completions/min_length": 273.0,
"completions/min_terminated_length": 273.0,
"epoch": 0,
"eval_loss": 9.23273983062245e-05,
"eval_runtime": 30.5641,
"eval_samples_per_second": 0.654,
"eval_steps_per_second": 0.033,
"kl": 0.000934600830078125,
"num_tokens": 80224.0,
"reward": 0.5,
"reward_std": 0.0,
"rewards/compiled_reward_inst/mean": 0.5,
"rewards/compiled_reward_inst/std": 0.0,
"step": 0
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.23307291666666663,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1022.5,
"completions/mean_length": 645.7864685058594,
"completions/mean_terminated_length": 531.2034301757812,
"completions/min_length": 225.0,
"completions/min_terminated_length": 225.0,
"epoch": 0.37209302325581395,
"grad_norm": 0.0,
"kl": 0.00021028518676757812,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0,
"num_tokens": 1440148.0,
"reward": 0.5,
"reward_std": 0.0,
"rewards/compiled_reward_inst/mean": 0.5,
"rewards/compiled_reward_inst/std": 0.0,
"step": 2
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.2981770833333333,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1022.0,
"completions/mean_length": 660.4414367675781,
"completions/mean_terminated_length": 506.07061767578125,
"completions/min_length": 204.5,
"completions/min_terminated_length": 204.5,
"epoch": 0.7441860465116279,
"grad_norm": 397.1085205078125,
"kl": 0.9753599166870117,
"learning_rate": 1e-05,
"loss": 0.0975,
"num_tokens": 2834391.0,
"reward": 0.5,
"reward_std": 0.0,
"rewards/compiled_reward_inst/mean": 0.5,
"rewards/compiled_reward_inst/std": 0.0,
"step": 4
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.251953125,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1018.0,
"completions/mean_length": 646.3483276367188,
"completions/mean_terminated_length": 519.0995330810547,
"completions/min_length": 218.0,
"completions/min_terminated_length": 218.0,
"epoch": 1.1860465116279069,
"grad_norm": 46054105088.0,
"kl": 43881017.763025284,
"learning_rate": 9.777864028930705e-06,
"loss": 4398960.0,
"num_tokens": 4201695.0,
"reward": 0.5,
"reward_std": 0.0,
"rewards/compiled_reward_inst/mean": 0.5,
"rewards/compiled_reward_inst/std": 0.0,
"step": 6
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.27213541666666663,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1022.5,
"completions/mean_length": 673.9479370117188,
"completions/mean_terminated_length": 543.1653747558594,
"completions/min_length": 254.5,
"completions/min_terminated_length": 254.5,
"epoch": 1.558139534883721,
"grad_norm": 0.28854840993881226,
"kl": 20.63982391357422,
"learning_rate": 9.131193871579975e-06,
"loss": 2.0676,
"num_tokens": 5603183.0,
"reward": 0.5,
"reward_std": 0.0,
"rewards/compiled_reward_inst/mean": 0.5,
"rewards/compiled_reward_inst/std": 0.0,
"step": 8
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.2740885416666667,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1018.5,
"completions/mean_length": 673.3385620117188,
"completions/mean_terminated_length": 540.8829956054688,
"completions/min_length": 168.0,
"completions/min_terminated_length": 168.0,
"epoch": 1.9302325581395348,
"grad_norm": 0.31550031900405884,
"kl": 0.0182647705078125,
"learning_rate": 8.117449009293668e-06,
"loss": 0.0018,
"num_tokens": 6971974.0,
"reward": 0.5,
"reward_std": 0.0,
"rewards/compiled_reward_inst/mean": 0.5,
"rewards/compiled_reward_inst/std": 0.0,
"step": 10
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.24609375,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1017.5,
"completions/mean_length": 648.3164367675781,
"completions/mean_terminated_length": 526.1214752197266,
"completions/min_length": 184.5,
"completions/min_terminated_length": 184.5,
"epoch": 2.3720930232558137,
"grad_norm": 0.32607021927833557,
"kl": 0.021625518798828125,
"learning_rate": 6.8267051218319766e-06,
"loss": 0.0022,
"num_tokens": 8358137.0,
"reward": 0.5,
"reward_std": 0.0,
"rewards/compiled_reward_inst/mean": 0.5,
"rewards/compiled_reward_inst/std": 0.0,
"step": 12
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.22395833333333331,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1007.5,
"completions/mean_length": 630.4388122558594,
"completions/mean_terminated_length": 516.7716064453125,
"completions/min_length": 134.0,
"completions/min_terminated_length": 134.0,
"epoch": 2.744186046511628,
"grad_norm": 0.12816570699214935,
"kl": 0.0286407470703125,
"learning_rate": 5.373650467932122e-06,
"loss": 0.0029,
"num_tokens": 9705290.0,
"reward": 0.5,
"reward_std": 0.0,
"rewards/compiled_reward_inst/mean": 0.5,
"rewards/compiled_reward_inst/std": 0.0,
"step": 14
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.2734375,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 998.5,
"completions/mean_length": 662.1341247558594,
"completions/mean_terminated_length": 526.5307464599609,
"completions/min_length": 158.5,
"completions/min_terminated_length": 158.5,
"epoch": 3.186046511627907,
"grad_norm": 2.3328659534454346,
"kl": 0.038421630859375,
"learning_rate": 3.887395330218429e-06,
"loss": 0.0038,
"num_tokens": 11084995.0,
"reward": 0.5,
"reward_std": 0.0,
"rewards/compiled_reward_inst/mean": 0.5,
"rewards/compiled_reward_inst/std": 0.0,
"step": 16
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.18359375,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1015.5,
"completions/mean_length": 614.7044677734375,
"completions/mean_terminated_length": 522.2958679199219,
"completions/min_length": 33.0,
"completions/min_terminated_length": 33.0,
"epoch": 3.558139534883721,
"grad_norm": 0.08445177972316742,
"kl": 0.03482818603515625,
"learning_rate": 2.5000000000000015e-06,
"loss": 0.007,
"num_tokens": 12414672.0,
"reward": 0.5013020932674408,
"reward_std": 0.003682847833260894,
"rewards/compiled_reward_inst/mean": 0.5013020932674408,
"rewards/compiled_reward_inst/std": 0.025515519082546234,
"step": 18
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.24088541666666669,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1020.0,
"completions/mean_length": 658.873046875,
"completions/mean_terminated_length": 541.7471008300781,
"completions/min_length": 87.0,
"completions/min_terminated_length": 87.0,
"epoch": 3.9302325581395348,
"grad_norm": 0.2177199274301529,
"kl": 0.0333709716796875,
"learning_rate": 1.3347406408508695e-06,
"loss": 0.0033,
"num_tokens": 13810302.0,
"reward": 0.5013020932674408,
"reward_std": 0.003682847833260894,
"rewards/compiled_reward_inst/mean": 0.5013020932674408,
"rewards/compiled_reward_inst/std": 0.025515519082546234,
"step": 20
}
],
"logging_steps": 2,
"max_steps": 24,
"num_input_tokens_seen": 13810302,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}