| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.9302325581395348, | |
| "eval_steps": 50, | |
| "global_step": 20, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.10416666666666663, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 989.0, | |
| "completions/mean_length": 580.8333740234375, | |
| "completions/mean_terminated_length": 529.3023071289062, | |
| "completions/min_length": 273.0, | |
| "completions/min_terminated_length": 273.0, | |
| "epoch": 0, | |
| "eval_loss": 9.23273983062245e-05, | |
| "eval_runtime": 30.5641, | |
| "eval_samples_per_second": 0.654, | |
| "eval_steps_per_second": 0.033, | |
| "kl": 0.000934600830078125, | |
| "num_tokens": 80224.0, | |
| "reward": 0.5, | |
| "reward_std": 0.0, | |
| "rewards/compiled_reward_inst/mean": 0.5, | |
| "rewards/compiled_reward_inst/std": 0.0, | |
| "step": 0 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.23307291666666663, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1022.5, | |
| "completions/mean_length": 645.7864685058594, | |
| "completions/mean_terminated_length": 531.2034301757812, | |
| "completions/min_length": 225.0, | |
| "completions/min_terminated_length": 225.0, | |
| "epoch": 0.37209302325581395, | |
| "grad_norm": 0.0, | |
| "kl": 0.00021028518676757812, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.0, | |
| "num_tokens": 1440148.0, | |
| "reward": 0.5, | |
| "reward_std": 0.0, | |
| "rewards/compiled_reward_inst/mean": 0.5, | |
| "rewards/compiled_reward_inst/std": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2981770833333333, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1022.0, | |
| "completions/mean_length": 660.4414367675781, | |
| "completions/mean_terminated_length": 506.07061767578125, | |
| "completions/min_length": 204.5, | |
| "completions/min_terminated_length": 204.5, | |
| "epoch": 0.7441860465116279, | |
| "grad_norm": 397.1085205078125, | |
| "kl": 0.9753599166870117, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0975, | |
| "num_tokens": 2834391.0, | |
| "reward": 0.5, | |
| "reward_std": 0.0, | |
| "rewards/compiled_reward_inst/mean": 0.5, | |
| "rewards/compiled_reward_inst/std": 0.0, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.251953125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1018.0, | |
| "completions/mean_length": 646.3483276367188, | |
| "completions/mean_terminated_length": 519.0995330810547, | |
| "completions/min_length": 218.0, | |
| "completions/min_terminated_length": 218.0, | |
| "epoch": 1.1860465116279069, | |
| "grad_norm": 46054105088.0, | |
| "kl": 43881017.763025284, | |
| "learning_rate": 9.777864028930705e-06, | |
| "loss": 4398960.0, | |
| "num_tokens": 4201695.0, | |
| "reward": 0.5, | |
| "reward_std": 0.0, | |
| "rewards/compiled_reward_inst/mean": 0.5, | |
| "rewards/compiled_reward_inst/std": 0.0, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.27213541666666663, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1022.5, | |
| "completions/mean_length": 673.9479370117188, | |
| "completions/mean_terminated_length": 543.1653747558594, | |
| "completions/min_length": 254.5, | |
| "completions/min_terminated_length": 254.5, | |
| "epoch": 1.558139534883721, | |
| "grad_norm": 0.28854840993881226, | |
| "kl": 20.63982391357422, | |
| "learning_rate": 9.131193871579975e-06, | |
| "loss": 2.0676, | |
| "num_tokens": 5603183.0, | |
| "reward": 0.5, | |
| "reward_std": 0.0, | |
| "rewards/compiled_reward_inst/mean": 0.5, | |
| "rewards/compiled_reward_inst/std": 0.0, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2740885416666667, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1018.5, | |
| "completions/mean_length": 673.3385620117188, | |
| "completions/mean_terminated_length": 540.8829956054688, | |
| "completions/min_length": 168.0, | |
| "completions/min_terminated_length": 168.0, | |
| "epoch": 1.9302325581395348, | |
| "grad_norm": 0.31550031900405884, | |
| "kl": 0.0182647705078125, | |
| "learning_rate": 8.117449009293668e-06, | |
| "loss": 0.0018, | |
| "num_tokens": 6971974.0, | |
| "reward": 0.5, | |
| "reward_std": 0.0, | |
| "rewards/compiled_reward_inst/mean": 0.5, | |
| "rewards/compiled_reward_inst/std": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.24609375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1017.5, | |
| "completions/mean_length": 648.3164367675781, | |
| "completions/mean_terminated_length": 526.1214752197266, | |
| "completions/min_length": 184.5, | |
| "completions/min_terminated_length": 184.5, | |
| "epoch": 2.3720930232558137, | |
| "grad_norm": 0.32607021927833557, | |
| "kl": 0.021625518798828125, | |
| "learning_rate": 6.8267051218319766e-06, | |
| "loss": 0.0022, | |
| "num_tokens": 8358137.0, | |
| "reward": 0.5, | |
| "reward_std": 0.0, | |
| "rewards/compiled_reward_inst/mean": 0.5, | |
| "rewards/compiled_reward_inst/std": 0.0, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.22395833333333331, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1007.5, | |
| "completions/mean_length": 630.4388122558594, | |
| "completions/mean_terminated_length": 516.7716064453125, | |
| "completions/min_length": 134.0, | |
| "completions/min_terminated_length": 134.0, | |
| "epoch": 2.744186046511628, | |
| "grad_norm": 0.12816570699214935, | |
| "kl": 0.0286407470703125, | |
| "learning_rate": 5.373650467932122e-06, | |
| "loss": 0.0029, | |
| "num_tokens": 9705290.0, | |
| "reward": 0.5, | |
| "reward_std": 0.0, | |
| "rewards/compiled_reward_inst/mean": 0.5, | |
| "rewards/compiled_reward_inst/std": 0.0, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2734375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 998.5, | |
| "completions/mean_length": 662.1341247558594, | |
| "completions/mean_terminated_length": 526.5307464599609, | |
| "completions/min_length": 158.5, | |
| "completions/min_terminated_length": 158.5, | |
| "epoch": 3.186046511627907, | |
| "grad_norm": 2.3328659534454346, | |
| "kl": 0.038421630859375, | |
| "learning_rate": 3.887395330218429e-06, | |
| "loss": 0.0038, | |
| "num_tokens": 11084995.0, | |
| "reward": 0.5, | |
| "reward_std": 0.0, | |
| "rewards/compiled_reward_inst/mean": 0.5, | |
| "rewards/compiled_reward_inst/std": 0.0, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.18359375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1015.5, | |
| "completions/mean_length": 614.7044677734375, | |
| "completions/mean_terminated_length": 522.2958679199219, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "epoch": 3.558139534883721, | |
| "grad_norm": 0.08445177972316742, | |
| "kl": 0.03482818603515625, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.007, | |
| "num_tokens": 12414672.0, | |
| "reward": 0.5013020932674408, | |
| "reward_std": 0.003682847833260894, | |
| "rewards/compiled_reward_inst/mean": 0.5013020932674408, | |
| "rewards/compiled_reward_inst/std": 0.025515519082546234, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.24088541666666669, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1020.0, | |
| "completions/mean_length": 658.873046875, | |
| "completions/mean_terminated_length": 541.7471008300781, | |
| "completions/min_length": 87.0, | |
| "completions/min_terminated_length": 87.0, | |
| "epoch": 3.9302325581395348, | |
| "grad_norm": 0.2177199274301529, | |
| "kl": 0.0333709716796875, | |
| "learning_rate": 1.3347406408508695e-06, | |
| "loss": 0.0033, | |
| "num_tokens": 13810302.0, | |
| "reward": 0.5013020932674408, | |
| "reward_std": 0.003682847833260894, | |
| "rewards/compiled_reward_inst/mean": 0.5013020932674408, | |
| "rewards/compiled_reward_inst/std": 0.025515519082546234, | |
| "step": 20 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 24, | |
| "num_input_tokens_seen": 13810302, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |