{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0449438202247191, "eval_steps": 1, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0044943820224719105, "grad_norm": 0.20572859048843384, "learning_rate": 0.0, "loss": -0.0552, "step": 1 }, { "epoch": 0.0044943820224719105, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.8489583333333334, "eval_completions/max_length": 256.0, "eval_completions/max_terminated_length": 117.91666666666667, "eval_completions/mean_length": 242.40104166666666, "eval_completions/mean_terminated_length": 105.13125038146973, "eval_completions/min_length": 185.91666666666666, "eval_completions/min_terminated_length": 89.91666666666667, "eval_kl": 0.0, "eval_loss": -0.002283654874190688, "eval_num_tokens": 3937.0, "eval_reward": -135.38923199971518, "eval_reward_std": 38.23202101389567, "eval_rewards/reward_low_identity_attack_score/mean": 0.9988682096203169, "eval_rewards/reward_low_identity_attack_score/std": 0.0016322459551645352, "eval_rewards/reward_short_sentences/mean": -16.582972327868145, "eval_rewards/reward_short_sentences/std": 5.800372085223596, "eval_runtime": 504.2734, "eval_samples_per_second": 0.093, "eval_steps_per_second": 0.012, "step": 1 }, { "epoch": 0.008988764044943821, "grad_norm": 0.198868528008461, "learning_rate": 2.0000000000000003e-06, "loss": -0.0087, "step": 2 }, { "epoch": 0.008988764044943821, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.8333333333333334, "eval_completions/max_length": 256.0, "eval_completions/max_terminated_length": 94.625, "eval_completions/mean_length": 240.15104166666666, "eval_completions/mean_terminated_length": 78.94097264607747, "eval_completions/min_length": 198.25, "eval_completions/min_terminated_length": 59.583333333333336, "eval_kl": 0.0004699874468011937, "eval_loss": 0.017109675332903862, "eval_num_tokens": 8284.0, "eval_reward": -138.16361236572266, "eval_reward_std": 36.529361406962074, "eval_rewards/reward_low_identity_attack_score/mean": 0.9992144530018171, "eval_rewards/reward_low_identity_attack_score/std": 0.0009160235370444525, "eval_rewards/reward_short_sentences/mean": -16.91547389825185, "eval_rewards/reward_short_sentences/std": 4.824008772770564, "eval_runtime": 502.6748, "eval_samples_per_second": 0.093, "eval_steps_per_second": 0.012, "step": 2 }, { "epoch": 0.01348314606741573, "grad_norm": 0.20767748355865479, "learning_rate": 4.000000000000001e-06, "loss": 0.0021, "step": 3 }, { "epoch": 0.01348314606741573, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.8177083333333334, "eval_completions/max_length": 256.0, "eval_completions/max_terminated_length": 94.5, "eval_completions/mean_length": 238.38541666666666, "eval_completions/mean_terminated_length": 75.93958346048991, "eval_completions/min_length": 199.5, "eval_completions/min_terminated_length": 60.833333333333336, "eval_kl": 0.0004704270249931142, "eval_loss": 0.007125795818865299, "eval_num_tokens": 12380.0, "eval_reward": -131.08518664042154, "eval_reward_std": 37.45306404431661, "eval_rewards/reward_low_identity_attack_score/mean": 0.999359500904878, "eval_rewards/reward_low_identity_attack_score/std": 0.0006575306392827921, "eval_rewards/reward_short_sentences/mean": -16.067519426345825, "eval_rewards/reward_short_sentences/std": 5.311739345391591, "eval_runtime": 501.3503, "eval_samples_per_second": 0.094, "eval_steps_per_second": 0.012, "step": 3 }, { "epoch": 0.017977528089887642, "grad_norm": 0.18843844532966614, "learning_rate": 6e-06, "loss": 0.027, "step": 4 }, { "epoch": 0.017977528089887642, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.828125, "eval_completions/max_length": 256.0, "eval_completions/max_terminated_length": 107.70833333333333, "eval_completions/mean_length": 240.71354166666666, "eval_completions/mean_terminated_length": 88.28472264607747, "eval_completions/min_length": 194.41666666666666, "eval_completions/min_terminated_length": 66.41666666666667, "eval_kl": 0.0004497210647969041, "eval_loss": 0.0061494638212025166, "eval_num_tokens": 16554.0, "eval_reward": -133.93212604522705, "eval_reward_std": 33.41885634263357, "eval_rewards/reward_low_identity_attack_score/mean": 0.9993308633565903, "eval_rewards/reward_low_identity_attack_score/std": 0.0008023299560970069, "eval_rewards/reward_short_sentences/mean": -16.40857668717702, "eval_rewards/reward_short_sentences/std": 5.010460207859675, "eval_runtime": 498.182, "eval_samples_per_second": 0.094, "eval_steps_per_second": 0.012, "step": 4 }, { "epoch": 0.02247191011235955, "grad_norm": 0.1922888159751892, "learning_rate": 8.000000000000001e-06, "loss": -0.0164, "step": 5 }, { "epoch": 0.02247191011235955, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.890625, "eval_completions/max_length": 256.0, "eval_completions/max_terminated_length": 72.41666666666667, "eval_completions/mean_length": 245.171875, "eval_completions/mean_terminated_length": 61.555555979410805, "eval_completions/min_length": 211.04166666666666, "eval_completions/min_terminated_length": 51.041666666666664, "eval_kl": 0.0004409884980608088, "eval_loss": -0.0005789658171124756, "eval_num_tokens": 20962.0, "eval_reward": -139.16856416066489, "eval_reward_std": 47.69642313321432, "eval_rewards/reward_low_identity_attack_score/mean": 0.9987605661153793, "eval_rewards/reward_low_identity_attack_score/std": 0.0014406013592444349, "eval_rewards/reward_short_sentences/mean": -17.035703380902607, "eval_rewards/reward_short_sentences/std": 7.158746535579364, "eval_runtime": 498.5985, "eval_samples_per_second": 0.094, "eval_steps_per_second": 0.012, "step": 5 }, { "epoch": 0.02696629213483146, "grad_norm": 0.20973674952983856, "learning_rate": 1e-05, "loss": -0.0091, "step": 6 }, { "epoch": 0.02696629213483146, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.8645833333333334, "eval_completions/max_length": 256.0, "eval_completions/max_terminated_length": 97.58333333333333, "eval_completions/mean_length": 243.49479166666666, "eval_completions/mean_terminated_length": 87.31250063578288, "eval_completions/min_length": 194.25, "eval_completions/min_terminated_length": 76.91666666666667, "eval_kl": 0.0004579596510059976, "eval_loss": 0.011630469001829624, "eval_num_tokens": 25138.0, "eval_reward": -126.97941493988037, "eval_reward_std": 35.5850567817688, "eval_rewards/reward_low_identity_attack_score/mean": 0.999014342824618, "eval_rewards/reward_low_identity_attack_score/std": 0.001167270889103141, "eval_rewards/reward_short_sentences/mean": -15.575515786806742, "eval_rewards/reward_short_sentences/std": 5.122981091340383, "eval_runtime": 498.9815, "eval_samples_per_second": 0.094, "eval_steps_per_second": 0.012, "step": 6 }, { "epoch": 0.03146067415730337, "grad_norm": 0.2157692313194275, "learning_rate": 1.2e-05, "loss": 0.0, "step": 7 }, { "epoch": 0.03146067415730337, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.8541666666666666, "eval_completions/max_length": 256.0, "eval_completions/max_terminated_length": 97.0, "eval_completions/mean_length": 241.46875, "eval_completions/mean_terminated_length": 78.63888963063557, "eval_completions/min_length": 186.54166666666666, "eval_completions/min_terminated_length": 58.541666666666664, "eval_kl": 0.00047438923502340913, "eval_loss": 0.010423107072710991, "eval_num_tokens": 29610.0, "eval_reward": -127.89143784840901, "eval_reward_std": 31.59454568227132, "eval_rewards/reward_low_identity_attack_score/mean": 0.9993377675612768, "eval_rewards/reward_low_identity_attack_score/std": 0.0006659857741529626, "eval_rewards/reward_short_sentences/mean": -15.684895118077597, "eval_rewards/reward_short_sentences/std": 4.187550102670987, "eval_runtime": 497.9463, "eval_samples_per_second": 0.094, "eval_steps_per_second": 0.012, "step": 7 }, { "epoch": 0.035955056179775284, "grad_norm": 0.20879505574703217, "learning_rate": 1.4000000000000001e-05, "loss": 0.1945, "step": 8 }, { "epoch": 0.035955056179775284, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.8697916666666666, "eval_completions/max_length": 256.0, "eval_completions/max_terminated_length": 93.79166666666667, "eval_completions/mean_length": 242.77083333333334, "eval_completions/mean_terminated_length": 79.84375063578288, "eval_completions/min_length": 195.91666666666666, "eval_completions/min_terminated_length": 67.91666666666667, "eval_kl": 0.000479057072273766, "eval_loss": 0.007415430154651403, "eval_num_tokens": 33507.0, "eval_reward": -130.9524710973104, "eval_reward_std": 37.18793753782908, "eval_rewards/reward_low_identity_attack_score/mean": 0.9991383577386538, "eval_rewards/reward_low_identity_attack_score/std": 0.0012076803329061174, "eval_rewards/reward_short_sentences/mean": -16.051539699236553, "eval_rewards/reward_short_sentences/std": 5.303417146205902, "eval_runtime": 500.9887, "eval_samples_per_second": 0.094, "eval_steps_per_second": 0.012, "step": 8 }, { "epoch": 0.04044943820224719, "grad_norm": 0.19092078506946564, "learning_rate": 1.6000000000000003e-05, "loss": 0.0697, "step": 9 }, { "epoch": 0.04044943820224719, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.8333333333333334, "eval_completions/max_length": 256.0, "eval_completions/max_terminated_length": 101.0, "eval_completions/mean_length": 239.02604166666666, "eval_completions/mean_terminated_length": 81.92361195882161, "eval_completions/min_length": 191.79166666666666, "eval_completions/min_terminated_length": 63.791666666666664, "eval_kl": 0.0004818017926785008, "eval_loss": 0.015385713428258896, "eval_num_tokens": 37625.0, "eval_reward": -131.7768144607544, "eval_reward_std": 44.59881019592285, "eval_rewards/reward_low_identity_attack_score/mean": 0.9993962869048119, "eval_rewards/reward_low_identity_attack_score/std": 0.0005807341856420104, "eval_rewards/reward_short_sentences/mean": -16.15039114157359, "eval_rewards/reward_short_sentences/std": 6.209470520416896, "eval_runtime": 499.0804, "eval_samples_per_second": 0.094, "eval_steps_per_second": 0.012, "step": 9 }, { "epoch": 0.0449438202247191, "grad_norm": 0.21867510676383972, "learning_rate": 1.8e-05, "loss": 0.0646, "step": 10 }, { "epoch": 0.0449438202247191, "eval_clip_ratio/high_max": 0.0, "eval_clip_ratio/high_mean": 0.0, "eval_clip_ratio/low_mean": 0.0, "eval_clip_ratio/low_min": 0.0, "eval_clip_ratio/region_mean": 0.0, "eval_completions/clipped_ratio": 0.8854166666666666, "eval_completions/max_length": 256.0, "eval_completions/max_terminated_length": 76.625, "eval_completions/mean_length": 246.19270833333334, "eval_completions/mean_terminated_length": 66.23611132303874, "eval_completions/min_length": 218.20833333333334, "eval_completions/min_terminated_length": 58.208333333333336, "eval_kl": 0.00046255352450922754, "eval_loss": -0.008996911346912384, "eval_num_tokens": 41691.0, "eval_reward": -128.49213695526123, "eval_reward_std": 37.85737713177999, "eval_rewards/reward_low_identity_attack_score/mean": 0.9993234574794769, "eval_rewards/reward_low_identity_attack_score/std": 0.0007394837536291258, "eval_rewards/reward_short_sentences/mean": -15.75685485204061, "eval_rewards/reward_short_sentences/std": 5.540864855051041, "eval_runtime": 504.157, "eval_samples_per_second": 0.093, "eval_steps_per_second": 0.012, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 41691, "num_train_epochs": 1, "save_steps": 33, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }