Upload folder using huggingface_hub

a6ee427 verified 10 months ago

10.8 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.9302325581395348,
	"eval_steps": 50,
	"global_step": 20,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.10416666666666663,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 989.0,
	"completions/mean_length": 580.8333740234375,
	"completions/mean_terminated_length": 529.3023071289062,
	"completions/min_length": 273.0,
	"completions/min_terminated_length": 273.0,
	"epoch": 0,
	"eval_loss": 9.23273983062245e-05,
	"eval_runtime": 30.5641,
	"eval_samples_per_second": 0.654,
	"eval_steps_per_second": 0.033,
	"kl": 0.000934600830078125,
	"num_tokens": 80224.0,
	"reward": 0.5,
	"reward_std": 0.0,
	"rewards/compiled_reward_inst/mean": 0.5,
	"rewards/compiled_reward_inst/std": 0.0,
	"step": 0
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.23307291666666663,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 1022.5,
	"completions/mean_length": 645.7864685058594,
	"completions/mean_terminated_length": 531.2034301757812,
	"completions/min_length": 225.0,
	"completions/min_terminated_length": 225.0,
	"epoch": 0.37209302325581395,
	"grad_norm": 0.0,
	"kl": 0.00021028518676757812,
	"learning_rate": 3.3333333333333333e-06,
	"loss": 0.0,
	"num_tokens": 1440148.0,
	"reward": 0.5,
	"reward_std": 0.0,
	"rewards/compiled_reward_inst/mean": 0.5,
	"rewards/compiled_reward_inst/std": 0.0,
	"step": 2
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.2981770833333333,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 1022.0,
	"completions/mean_length": 660.4414367675781,
	"completions/mean_terminated_length": 506.07061767578125,
	"completions/min_length": 204.5,
	"completions/min_terminated_length": 204.5,
	"epoch": 0.7441860465116279,
	"grad_norm": 397.1085205078125,
	"kl": 0.9753599166870117,
	"learning_rate": 1e-05,
	"loss": 0.0975,
	"num_tokens": 2834391.0,
	"reward": 0.5,
	"reward_std": 0.0,
	"rewards/compiled_reward_inst/mean": 0.5,
	"rewards/compiled_reward_inst/std": 0.0,
	"step": 4
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.251953125,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 1018.0,
	"completions/mean_length": 646.3483276367188,
	"completions/mean_terminated_length": 519.0995330810547,
	"completions/min_length": 218.0,
	"completions/min_terminated_length": 218.0,
	"epoch": 1.1860465116279069,
	"grad_norm": 46054105088.0,
	"kl": 43881017.763025284,
	"learning_rate": 9.777864028930705e-06,
	"loss": 4398960.0,
	"num_tokens": 4201695.0,
	"reward": 0.5,
	"reward_std": 0.0,
	"rewards/compiled_reward_inst/mean": 0.5,
	"rewards/compiled_reward_inst/std": 0.0,
	"step": 6
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.27213541666666663,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 1022.5,
	"completions/mean_length": 673.9479370117188,
	"completions/mean_terminated_length": 543.1653747558594,
	"completions/min_length": 254.5,
	"completions/min_terminated_length": 254.5,
	"epoch": 1.558139534883721,
	"grad_norm": 0.28854840993881226,
	"kl": 20.63982391357422,
	"learning_rate": 9.131193871579975e-06,
	"loss": 2.0676,
	"num_tokens": 5603183.0,
	"reward": 0.5,
	"reward_std": 0.0,
	"rewards/compiled_reward_inst/mean": 0.5,
	"rewards/compiled_reward_inst/std": 0.0,
	"step": 8
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.2740885416666667,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 1018.5,
	"completions/mean_length": 673.3385620117188,
	"completions/mean_terminated_length": 540.8829956054688,
	"completions/min_length": 168.0,
	"completions/min_terminated_length": 168.0,
	"epoch": 1.9302325581395348,
	"grad_norm": 0.31550031900405884,
	"kl": 0.0182647705078125,
	"learning_rate": 8.117449009293668e-06,
	"loss": 0.0018,
	"num_tokens": 6971974.0,
	"reward": 0.5,
	"reward_std": 0.0,
	"rewards/compiled_reward_inst/mean": 0.5,
	"rewards/compiled_reward_inst/std": 0.0,
	"step": 10
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.24609375,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 1017.5,
	"completions/mean_length": 648.3164367675781,
	"completions/mean_terminated_length": 526.1214752197266,
	"completions/min_length": 184.5,
	"completions/min_terminated_length": 184.5,
	"epoch": 2.3720930232558137,
	"grad_norm": 0.32607021927833557,
	"kl": 0.021625518798828125,
	"learning_rate": 6.8267051218319766e-06,
	"loss": 0.0022,
	"num_tokens": 8358137.0,
	"reward": 0.5,
	"reward_std": 0.0,
	"rewards/compiled_reward_inst/mean": 0.5,
	"rewards/compiled_reward_inst/std": 0.0,
	"step": 12
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.22395833333333331,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 1007.5,
	"completions/mean_length": 630.4388122558594,
	"completions/mean_terminated_length": 516.7716064453125,
	"completions/min_length": 134.0,
	"completions/min_terminated_length": 134.0,
	"epoch": 2.744186046511628,
	"grad_norm": 0.12816570699214935,
	"kl": 0.0286407470703125,
	"learning_rate": 5.373650467932122e-06,
	"loss": 0.0029,
	"num_tokens": 9705290.0,
	"reward": 0.5,
	"reward_std": 0.0,
	"rewards/compiled_reward_inst/mean": 0.5,
	"rewards/compiled_reward_inst/std": 0.0,
	"step": 14
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.2734375,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 998.5,
	"completions/mean_length": 662.1341247558594,
	"completions/mean_terminated_length": 526.5307464599609,
	"completions/min_length": 158.5,
	"completions/min_terminated_length": 158.5,
	"epoch": 3.186046511627907,
	"grad_norm": 2.3328659534454346,
	"kl": 0.038421630859375,
	"learning_rate": 3.887395330218429e-06,
	"loss": 0.0038,
	"num_tokens": 11084995.0,
	"reward": 0.5,
	"reward_std": 0.0,
	"rewards/compiled_reward_inst/mean": 0.5,
	"rewards/compiled_reward_inst/std": 0.0,
	"step": 16
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.18359375,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 1015.5,
	"completions/mean_length": 614.7044677734375,
	"completions/mean_terminated_length": 522.2958679199219,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"epoch": 3.558139534883721,
	"grad_norm": 0.08445177972316742,
	"kl": 0.03482818603515625,
	"learning_rate": 2.5000000000000015e-06,
	"loss": 0.007,
	"num_tokens": 12414672.0,
	"reward": 0.5013020932674408,
	"reward_std": 0.003682847833260894,
	"rewards/compiled_reward_inst/mean": 0.5013020932674408,
	"rewards/compiled_reward_inst/std": 0.025515519082546234,
	"step": 18
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.24088541666666669,
	"completions/max_length": 1024.0,
	"completions/max_terminated_length": 1020.0,
	"completions/mean_length": 658.873046875,
	"completions/mean_terminated_length": 541.7471008300781,
	"completions/min_length": 87.0,
	"completions/min_terminated_length": 87.0,
	"epoch": 3.9302325581395348,
	"grad_norm": 0.2177199274301529,
	"kl": 0.0333709716796875,
	"learning_rate": 1.3347406408508695e-06,
	"loss": 0.0033,
	"num_tokens": 13810302.0,
	"reward": 0.5013020932674408,
	"reward_std": 0.003682847833260894,
	"rewards/compiled_reward_inst/mean": 0.5013020932674408,
	"rewards/compiled_reward_inst/std": 0.025515519082546234,
	"step": 20
	}
	],
	"logging_steps": 2,
	"max_steps": 24,
	"num_input_tokens_seen": 13810302,
	"num_train_epochs": 4,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}