OpenCSG-Qwen2.5-3B-GUI / trainer_state.json
yuyijiong's picture
Upload trainer_state.json with huggingface_hub
2a2dc6e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999747481124214,
"eval_steps": 500,
"global_step": 19800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00050503775157193,
"grad_norm": 13.943564139476521,
"learning_rate": 9.996464289322155e-06,
"loss": 1.7553,
"step": 10
},
{
"epoch": 0.00101007550314386,
"grad_norm": 5.789553200928907,
"learning_rate": 9.991413274068087e-06,
"loss": 0.8176,
"step": 20
},
{
"epoch": 0.00151511325471579,
"grad_norm": 7.735975598755883,
"learning_rate": 9.986362258814021e-06,
"loss": 0.7018,
"step": 30
},
{
"epoch": 0.00202015100628772,
"grad_norm": 3.5851607839184143,
"learning_rate": 9.981311243559957e-06,
"loss": 0.6599,
"step": 40
},
{
"epoch": 0.00252518875785965,
"grad_norm": 6.237435656827707,
"learning_rate": 9.976260228305891e-06,
"loss": 0.6038,
"step": 50
},
{
"epoch": 0.00303022650943158,
"grad_norm": 13.568356180772243,
"learning_rate": 9.971209213051824e-06,
"loss": 0.6027,
"step": 60
},
{
"epoch": 0.00353526426100351,
"grad_norm": 3.235105055773784,
"learning_rate": 9.966158197797758e-06,
"loss": 0.5893,
"step": 70
},
{
"epoch": 0.00404030201257544,
"grad_norm": 5.862991115468846,
"learning_rate": 9.961107182543692e-06,
"loss": 0.5863,
"step": 80
},
{
"epoch": 0.00454533976414737,
"grad_norm": 3.3977361363891334,
"learning_rate": 9.956056167289626e-06,
"loss": 0.5736,
"step": 90
},
{
"epoch": 0.0050503775157193,
"grad_norm": 4.281339414007124,
"learning_rate": 9.95100515203556e-06,
"loss": 0.5508,
"step": 100
},
{
"epoch": 0.00555541526729123,
"grad_norm": 3.144694292761742,
"learning_rate": 9.945954136781494e-06,
"loss": 0.5493,
"step": 110
},
{
"epoch": 0.00606045301886316,
"grad_norm": 3.0898861803222566,
"learning_rate": 9.940903121527427e-06,
"loss": 0.5417,
"step": 120
},
{
"epoch": 0.00656549077043509,
"grad_norm": 14.79188542424222,
"learning_rate": 9.935852106273361e-06,
"loss": 0.5444,
"step": 130
},
{
"epoch": 0.00707052852200702,
"grad_norm": 3.493197693686729,
"learning_rate": 9.930801091019295e-06,
"loss": 0.5284,
"step": 140
},
{
"epoch": 0.00757556627357895,
"grad_norm": 10.7485482232531,
"learning_rate": 9.92575007576523e-06,
"loss": 0.53,
"step": 150
},
{
"epoch": 0.00808060402515088,
"grad_norm": 4.095456564556807,
"learning_rate": 9.920699060511163e-06,
"loss": 0.5171,
"step": 160
},
{
"epoch": 0.00858564177672281,
"grad_norm": 18.07561627235401,
"learning_rate": 9.915648045257097e-06,
"loss": 0.5293,
"step": 170
},
{
"epoch": 0.00909067952829474,
"grad_norm": 14.881912561895916,
"learning_rate": 9.910597030003032e-06,
"loss": 0.525,
"step": 180
},
{
"epoch": 0.00959571727986667,
"grad_norm": 3.077548490509973,
"learning_rate": 9.905546014748966e-06,
"loss": 0.4952,
"step": 190
},
{
"epoch": 0.0101007550314386,
"grad_norm": 5.40203767482295,
"learning_rate": 9.9004949994949e-06,
"loss": 0.5172,
"step": 200
},
{
"epoch": 0.01060579278301053,
"grad_norm": 12.22425699537504,
"learning_rate": 9.895443984240834e-06,
"loss": 0.5358,
"step": 210
},
{
"epoch": 0.01111083053458246,
"grad_norm": 7.830646098334567,
"learning_rate": 9.890392968986768e-06,
"loss": 0.5102,
"step": 220
},
{
"epoch": 0.01161586828615439,
"grad_norm": 3.814940763160932,
"learning_rate": 9.8853419537327e-06,
"loss": 0.4948,
"step": 230
},
{
"epoch": 0.01212090603772632,
"grad_norm": 9.822466847144137,
"learning_rate": 9.880290938478635e-06,
"loss": 0.5217,
"step": 240
},
{
"epoch": 0.01262594378929825,
"grad_norm": 46.6756643508386,
"learning_rate": 9.875239923224569e-06,
"loss": 0.5024,
"step": 250
},
{
"epoch": 0.01313098154087018,
"grad_norm": 2.706287487769574,
"learning_rate": 9.870188907970503e-06,
"loss": 0.5161,
"step": 260
},
{
"epoch": 0.01363601929244211,
"grad_norm": 2.211316891350979,
"learning_rate": 9.865137892716437e-06,
"loss": 0.5112,
"step": 270
},
{
"epoch": 0.01414105704401404,
"grad_norm": 2.9091819978240663,
"learning_rate": 9.860086877462371e-06,
"loss": 0.5166,
"step": 280
},
{
"epoch": 0.01464609479558597,
"grad_norm": 3.745541781549167,
"learning_rate": 9.855035862208304e-06,
"loss": 0.5051,
"step": 290
},
{
"epoch": 0.0151511325471579,
"grad_norm": 2.8340353132271394,
"learning_rate": 9.849984846954238e-06,
"loss": 0.5075,
"step": 300
},
{
"epoch": 0.01565617029872983,
"grad_norm": 3.869953185267734,
"learning_rate": 9.844933831700174e-06,
"loss": 0.5216,
"step": 310
},
{
"epoch": 0.01616120805030176,
"grad_norm": 2.211016455052966,
"learning_rate": 9.839882816446108e-06,
"loss": 0.4973,
"step": 320
},
{
"epoch": 0.01666624580187369,
"grad_norm": 3.3086106494157606,
"learning_rate": 9.83483180119204e-06,
"loss": 0.5023,
"step": 330
},
{
"epoch": 0.01717128355344562,
"grad_norm": 3.680801950407282,
"learning_rate": 9.829780785937974e-06,
"loss": 0.4933,
"step": 340
},
{
"epoch": 0.01767632130501755,
"grad_norm": 2.7546156381855935,
"learning_rate": 9.824729770683908e-06,
"loss": 0.5176,
"step": 350
},
{
"epoch": 0.01818135905658948,
"grad_norm": 5.87533624997546,
"learning_rate": 9.819678755429842e-06,
"loss": 0.5014,
"step": 360
},
{
"epoch": 0.01868639680816141,
"grad_norm": 5.806811873538911,
"learning_rate": 9.814627740175777e-06,
"loss": 0.5103,
"step": 370
},
{
"epoch": 0.01919143455973334,
"grad_norm": 2.3849830922620137,
"learning_rate": 9.80957672492171e-06,
"loss": 0.5133,
"step": 380
},
{
"epoch": 0.01969647231130527,
"grad_norm": 7.2668450331901715,
"learning_rate": 9.804525709667643e-06,
"loss": 0.5049,
"step": 390
},
{
"epoch": 0.0202015100628772,
"grad_norm": 2.5520502646771264,
"learning_rate": 9.799474694413577e-06,
"loss": 0.5102,
"step": 400
},
{
"epoch": 0.02070654781444913,
"grad_norm": 2.473700390480294,
"learning_rate": 9.794423679159511e-06,
"loss": 0.4825,
"step": 410
},
{
"epoch": 0.02121158556602106,
"grad_norm": 3.648682107473895,
"learning_rate": 9.789372663905445e-06,
"loss": 0.4983,
"step": 420
},
{
"epoch": 0.02171662331759299,
"grad_norm": 3.458879333685983,
"learning_rate": 9.78432164865138e-06,
"loss": 0.4912,
"step": 430
},
{
"epoch": 0.02222166106916492,
"grad_norm": 3.0729192822937503,
"learning_rate": 9.779270633397314e-06,
"loss": 0.4905,
"step": 440
},
{
"epoch": 0.02272669882073685,
"grad_norm": 17.505451267102025,
"learning_rate": 9.774219618143248e-06,
"loss": 0.4988,
"step": 450
},
{
"epoch": 0.02323173657230878,
"grad_norm": 11.347644323036132,
"learning_rate": 9.769168602889182e-06,
"loss": 0.527,
"step": 460
},
{
"epoch": 0.02373677432388071,
"grad_norm": 3.745152344082931,
"learning_rate": 9.764117587635116e-06,
"loss": 0.4751,
"step": 470
},
{
"epoch": 0.02424181207545264,
"grad_norm": 3.430436888183069,
"learning_rate": 9.75906657238105e-06,
"loss": 0.4925,
"step": 480
},
{
"epoch": 0.02474684982702457,
"grad_norm": 2.5106033133425316,
"learning_rate": 9.754015557126983e-06,
"loss": 0.4909,
"step": 490
},
{
"epoch": 0.0252518875785965,
"grad_norm": 2.9444873201490345,
"learning_rate": 9.748964541872917e-06,
"loss": 0.5046,
"step": 500
},
{
"epoch": 0.02575692533016843,
"grad_norm": 2.2226598370186355,
"learning_rate": 9.743913526618851e-06,
"loss": 0.5019,
"step": 510
},
{
"epoch": 0.02626196308174036,
"grad_norm": 2.1431364606222143,
"learning_rate": 9.738862511364785e-06,
"loss": 0.4978,
"step": 520
},
{
"epoch": 0.02676700083331229,
"grad_norm": 1.9687420001979,
"learning_rate": 9.733811496110719e-06,
"loss": 0.5168,
"step": 530
},
{
"epoch": 0.02727203858488422,
"grad_norm": 1.790704686386335,
"learning_rate": 9.728760480856653e-06,
"loss": 0.4918,
"step": 540
},
{
"epoch": 0.02777707633645615,
"grad_norm": 1.5122456879041994,
"learning_rate": 9.723709465602587e-06,
"loss": 0.5015,
"step": 550
},
{
"epoch": 0.02828211408802808,
"grad_norm": 2.155745175549288,
"learning_rate": 9.71865845034852e-06,
"loss": 0.4958,
"step": 560
},
{
"epoch": 0.02878715183960001,
"grad_norm": 2.2999303399374753,
"learning_rate": 9.713607435094454e-06,
"loss": 0.494,
"step": 570
},
{
"epoch": 0.02929218959117194,
"grad_norm": 1.9957939818387918,
"learning_rate": 9.70855641984039e-06,
"loss": 0.4772,
"step": 580
},
{
"epoch": 0.02979722734274387,
"grad_norm": 2.7790545722653137,
"learning_rate": 9.703505404586324e-06,
"loss": 0.4958,
"step": 590
},
{
"epoch": 0.0303022650943158,
"grad_norm": 1.8902194621075885,
"learning_rate": 9.698454389332256e-06,
"loss": 0.4811,
"step": 600
},
{
"epoch": 0.03080730284588773,
"grad_norm": 2.330090449709308,
"learning_rate": 9.69340337407819e-06,
"loss": 0.4955,
"step": 610
},
{
"epoch": 0.03131234059745966,
"grad_norm": 2.63971178528007,
"learning_rate": 9.688352358824125e-06,
"loss": 0.481,
"step": 620
},
{
"epoch": 0.03181737834903159,
"grad_norm": 3.9470995700087093,
"learning_rate": 9.683301343570059e-06,
"loss": 0.4911,
"step": 630
},
{
"epoch": 0.03232241610060352,
"grad_norm": 3.705578063273332,
"learning_rate": 9.678250328315993e-06,
"loss": 0.4707,
"step": 640
},
{
"epoch": 0.03282745385217545,
"grad_norm": 2.583661159495941,
"learning_rate": 9.673199313061927e-06,
"loss": 0.4599,
"step": 650
},
{
"epoch": 0.03333249160374738,
"grad_norm": 2.0174541627393743,
"learning_rate": 9.66814829780786e-06,
"loss": 0.4941,
"step": 660
},
{
"epoch": 0.03383752935531931,
"grad_norm": 4.576484208609804,
"learning_rate": 9.663097282553793e-06,
"loss": 0.4839,
"step": 670
},
{
"epoch": 0.03434256710689124,
"grad_norm": 5.516602752547856,
"learning_rate": 9.658046267299728e-06,
"loss": 0.4652,
"step": 680
},
{
"epoch": 0.03484760485846317,
"grad_norm": 1.9835012286897338,
"learning_rate": 9.652995252045662e-06,
"loss": 0.4573,
"step": 690
},
{
"epoch": 0.0353526426100351,
"grad_norm": 2.3068456370975596,
"learning_rate": 9.647944236791596e-06,
"loss": 0.4939,
"step": 700
},
{
"epoch": 0.03585768036160703,
"grad_norm": 2.504718982689086,
"learning_rate": 9.64289322153753e-06,
"loss": 0.479,
"step": 710
},
{
"epoch": 0.03636271811317896,
"grad_norm": 3.0659241109345885,
"learning_rate": 9.637842206283464e-06,
"loss": 0.4668,
"step": 720
},
{
"epoch": 0.03686775586475089,
"grad_norm": 5.822499608800306,
"learning_rate": 9.632791191029398e-06,
"loss": 0.4776,
"step": 730
},
{
"epoch": 0.03737279361632282,
"grad_norm": 2.282902491093137,
"learning_rate": 9.627740175775332e-06,
"loss": 0.4809,
"step": 740
},
{
"epoch": 0.03787783136789475,
"grad_norm": 3.6160296320382623,
"learning_rate": 9.622689160521266e-06,
"loss": 0.4772,
"step": 750
},
{
"epoch": 0.03838286911946668,
"grad_norm": 2.168648546785388,
"learning_rate": 9.617638145267199e-06,
"loss": 0.4727,
"step": 760
},
{
"epoch": 0.03888790687103861,
"grad_norm": 2.3243624027787746,
"learning_rate": 9.612587130013133e-06,
"loss": 0.4725,
"step": 770
},
{
"epoch": 0.03939294462261054,
"grad_norm": 2.302262458969543,
"learning_rate": 9.607536114759067e-06,
"loss": 0.4866,
"step": 780
},
{
"epoch": 0.03989798237418247,
"grad_norm": 8.258585402548862,
"learning_rate": 9.602485099505001e-06,
"loss": 0.4732,
"step": 790
},
{
"epoch": 0.0404030201257544,
"grad_norm": 2.1689541135562007,
"learning_rate": 9.597434084250935e-06,
"loss": 0.4761,
"step": 800
},
{
"epoch": 0.04090805787732633,
"grad_norm": 2.087988553230154,
"learning_rate": 9.59238306899687e-06,
"loss": 0.4755,
"step": 810
},
{
"epoch": 0.04141309562889826,
"grad_norm": 2.2958375828670965,
"learning_rate": 9.587332053742802e-06,
"loss": 0.4688,
"step": 820
},
{
"epoch": 0.04191813338047019,
"grad_norm": 2.3117097400641446,
"learning_rate": 9.582281038488736e-06,
"loss": 0.4635,
"step": 830
},
{
"epoch": 0.04242317113204212,
"grad_norm": 8.93438709495605,
"learning_rate": 9.57723002323467e-06,
"loss": 0.4615,
"step": 840
},
{
"epoch": 0.04292820888361405,
"grad_norm": 2.1041537246773134,
"learning_rate": 9.572179007980606e-06,
"loss": 0.493,
"step": 850
},
{
"epoch": 0.04343324663518598,
"grad_norm": 1.73155627574332,
"learning_rate": 9.567127992726538e-06,
"loss": 0.4531,
"step": 860
},
{
"epoch": 0.04393828438675791,
"grad_norm": 1.9302062705761438,
"learning_rate": 9.562076977472473e-06,
"loss": 0.4644,
"step": 870
},
{
"epoch": 0.04444332213832984,
"grad_norm": 1.9821994690328757,
"learning_rate": 9.557025962218407e-06,
"loss": 0.4418,
"step": 880
},
{
"epoch": 0.04494835988990177,
"grad_norm": 2.5134313739910716,
"learning_rate": 9.55197494696434e-06,
"loss": 0.4556,
"step": 890
},
{
"epoch": 0.0454533976414737,
"grad_norm": 2.448160093260678,
"learning_rate": 9.546923931710275e-06,
"loss": 0.4867,
"step": 900
},
{
"epoch": 0.04595843539304563,
"grad_norm": 3.6293868003589456,
"learning_rate": 9.541872916456209e-06,
"loss": 0.4575,
"step": 910
},
{
"epoch": 0.04646347314461756,
"grad_norm": 5.740958333853467,
"learning_rate": 9.536821901202143e-06,
"loss": 0.4602,
"step": 920
},
{
"epoch": 0.04696851089618949,
"grad_norm": 2.6003779275117282,
"learning_rate": 9.531770885948076e-06,
"loss": 0.4631,
"step": 930
},
{
"epoch": 0.04747354864776142,
"grad_norm": 2.412862217111322,
"learning_rate": 9.52671987069401e-06,
"loss": 0.4765,
"step": 940
},
{
"epoch": 0.04797858639933335,
"grad_norm": 6.6681363062446035,
"learning_rate": 9.521668855439944e-06,
"loss": 0.4658,
"step": 950
},
{
"epoch": 0.04848362415090528,
"grad_norm": 5.764214860916012,
"learning_rate": 9.516617840185878e-06,
"loss": 0.4615,
"step": 960
},
{
"epoch": 0.04898866190247721,
"grad_norm": 2.464119785527778,
"learning_rate": 9.511566824931812e-06,
"loss": 0.4496,
"step": 970
},
{
"epoch": 0.04949369965404914,
"grad_norm": 6.238087195845154,
"learning_rate": 9.506515809677746e-06,
"loss": 0.4431,
"step": 980
},
{
"epoch": 0.04999873740562107,
"grad_norm": 5.932767508185511,
"learning_rate": 9.50146479442368e-06,
"loss": 0.4574,
"step": 990
},
{
"epoch": 0.050503775157193,
"grad_norm": 9.775572888274276,
"learning_rate": 9.496413779169614e-06,
"loss": 0.4596,
"step": 1000
},
{
"epoch": 0.05100881290876493,
"grad_norm": 6.307739027249706,
"learning_rate": 9.491362763915549e-06,
"loss": 0.4515,
"step": 1010
},
{
"epoch": 0.05151385066033686,
"grad_norm": 3.187729331697054,
"learning_rate": 9.486311748661483e-06,
"loss": 0.4713,
"step": 1020
},
{
"epoch": 0.05201888841190879,
"grad_norm": 5.592090620849129,
"learning_rate": 9.481260733407415e-06,
"loss": 0.454,
"step": 1030
},
{
"epoch": 0.05252392616348072,
"grad_norm": 5.281749256921326,
"learning_rate": 9.47620971815335e-06,
"loss": 0.4513,
"step": 1040
},
{
"epoch": 0.05302896391505265,
"grad_norm": 3.415542870576186,
"learning_rate": 9.471158702899283e-06,
"loss": 0.4584,
"step": 1050
},
{
"epoch": 0.05353400166662458,
"grad_norm": 3.4909280444023034,
"learning_rate": 9.466107687645218e-06,
"loss": 0.4491,
"step": 1060
},
{
"epoch": 0.05403903941819651,
"grad_norm": 4.87148376874038,
"learning_rate": 9.461056672391152e-06,
"loss": 0.4672,
"step": 1070
},
{
"epoch": 0.05454407716976844,
"grad_norm": 5.691263676353072,
"learning_rate": 9.456005657137086e-06,
"loss": 0.4398,
"step": 1080
},
{
"epoch": 0.05504911492134037,
"grad_norm": 7.086742552442662,
"learning_rate": 9.450954641883018e-06,
"loss": 0.4657,
"step": 1090
},
{
"epoch": 0.0555541526729123,
"grad_norm": 3.941589115392049,
"learning_rate": 9.445903626628952e-06,
"loss": 0.4559,
"step": 1100
},
{
"epoch": 0.05605919042448423,
"grad_norm": 11.794328327356654,
"learning_rate": 9.440852611374886e-06,
"loss": 0.4436,
"step": 1110
},
{
"epoch": 0.05656422817605616,
"grad_norm": 9.586300666497886,
"learning_rate": 9.435801596120822e-06,
"loss": 0.4581,
"step": 1120
},
{
"epoch": 0.05706926592762809,
"grad_norm": 3.5060354246476484,
"learning_rate": 9.430750580866755e-06,
"loss": 0.4414,
"step": 1130
},
{
"epoch": 0.05757430367920002,
"grad_norm": 2.9623447600620136,
"learning_rate": 9.425699565612689e-06,
"loss": 0.4475,
"step": 1140
},
{
"epoch": 0.05807934143077195,
"grad_norm": 10.925832762761535,
"learning_rate": 9.420648550358623e-06,
"loss": 0.4454,
"step": 1150
},
{
"epoch": 0.05858437918234388,
"grad_norm": 3.275733802006869,
"learning_rate": 9.415597535104557e-06,
"loss": 0.4349,
"step": 1160
},
{
"epoch": 0.05908941693391581,
"grad_norm": 4.379989620996673,
"learning_rate": 9.410546519850491e-06,
"loss": 0.455,
"step": 1170
},
{
"epoch": 0.05959445468548774,
"grad_norm": 2.1574051089026893,
"learning_rate": 9.405495504596425e-06,
"loss": 0.4551,
"step": 1180
},
{
"epoch": 0.06009949243705967,
"grad_norm": 2.5566027220209344,
"learning_rate": 9.400444489342358e-06,
"loss": 0.4496,
"step": 1190
},
{
"epoch": 0.0606045301886316,
"grad_norm": 3.3866623066830153,
"learning_rate": 9.395393474088292e-06,
"loss": 0.4705,
"step": 1200
},
{
"epoch": 0.06110956794020353,
"grad_norm": 3.166133789377687,
"learning_rate": 9.390342458834226e-06,
"loss": 0.4556,
"step": 1210
},
{
"epoch": 0.06161460569177546,
"grad_norm": 3.91861184581623,
"learning_rate": 9.38529144358016e-06,
"loss": 0.4673,
"step": 1220
},
{
"epoch": 0.06211964344334739,
"grad_norm": 3.380841845875404,
"learning_rate": 9.380240428326094e-06,
"loss": 0.4551,
"step": 1230
},
{
"epoch": 0.06262468119491932,
"grad_norm": 8.214890466142487,
"learning_rate": 9.375189413072028e-06,
"loss": 0.4436,
"step": 1240
},
{
"epoch": 0.06312971894649125,
"grad_norm": 3.5096622602562904,
"learning_rate": 9.37013839781796e-06,
"loss": 0.4455,
"step": 1250
},
{
"epoch": 0.06363475669806318,
"grad_norm": 3.773235111611304,
"learning_rate": 9.365087382563897e-06,
"loss": 0.4622,
"step": 1260
},
{
"epoch": 0.0641397944496351,
"grad_norm": 6.295753580294585,
"learning_rate": 9.36003636730983e-06,
"loss": 0.4602,
"step": 1270
},
{
"epoch": 0.06464483220120704,
"grad_norm": 2.725162214074834,
"learning_rate": 9.354985352055765e-06,
"loss": 0.4597,
"step": 1280
},
{
"epoch": 0.06514986995277897,
"grad_norm": 2.52312004043461,
"learning_rate": 9.349934336801699e-06,
"loss": 0.4617,
"step": 1290
},
{
"epoch": 0.0656549077043509,
"grad_norm": 2.789794392473564,
"learning_rate": 9.344883321547631e-06,
"loss": 0.4505,
"step": 1300
},
{
"epoch": 0.06615994545592283,
"grad_norm": 3.952687566204272,
"learning_rate": 9.339832306293566e-06,
"loss": 0.4488,
"step": 1310
},
{
"epoch": 0.06666498320749475,
"grad_norm": 2.4979475288074284,
"learning_rate": 9.3347812910395e-06,
"loss": 0.436,
"step": 1320
},
{
"epoch": 0.06717002095906668,
"grad_norm": 3.3362793627660685,
"learning_rate": 9.329730275785434e-06,
"loss": 0.4739,
"step": 1330
},
{
"epoch": 0.06767505871063861,
"grad_norm": 2.4147254145567834,
"learning_rate": 9.324679260531368e-06,
"loss": 0.4627,
"step": 1340
},
{
"epoch": 0.06818009646221056,
"grad_norm": 5.672214495427731,
"learning_rate": 9.319628245277302e-06,
"loss": 0.451,
"step": 1350
},
{
"epoch": 0.06868513421378249,
"grad_norm": 1.8726502280944801,
"learning_rate": 9.314577230023234e-06,
"loss": 0.4417,
"step": 1360
},
{
"epoch": 0.06919017196535442,
"grad_norm": 3.973145301581678,
"learning_rate": 9.309526214769169e-06,
"loss": 0.4253,
"step": 1370
},
{
"epoch": 0.06969520971692635,
"grad_norm": 5.073872511256849,
"learning_rate": 9.304475199515103e-06,
"loss": 0.4408,
"step": 1380
},
{
"epoch": 0.07020024746849828,
"grad_norm": 7.525570163399166,
"learning_rate": 9.299424184261039e-06,
"loss": 0.4307,
"step": 1390
},
{
"epoch": 0.0707052852200702,
"grad_norm": 2.781161378212436,
"learning_rate": 9.294373169006971e-06,
"loss": 0.4536,
"step": 1400
},
{
"epoch": 0.07121032297164213,
"grad_norm": 2.090988491048501,
"learning_rate": 9.289322153752905e-06,
"loss": 0.4583,
"step": 1410
},
{
"epoch": 0.07171536072321406,
"grad_norm": 2.4437267649640715,
"learning_rate": 9.28427113849884e-06,
"loss": 0.4579,
"step": 1420
},
{
"epoch": 0.072220398474786,
"grad_norm": 2.2076648371457033,
"learning_rate": 9.279220123244773e-06,
"loss": 0.4343,
"step": 1430
},
{
"epoch": 0.07272543622635792,
"grad_norm": 1.8617491045028525,
"learning_rate": 9.274169107990707e-06,
"loss": 0.4509,
"step": 1440
},
{
"epoch": 0.07323047397792985,
"grad_norm": 2.222879920477197,
"learning_rate": 9.269118092736642e-06,
"loss": 0.4577,
"step": 1450
},
{
"epoch": 0.07373551172950178,
"grad_norm": 2.5321185321413626,
"learning_rate": 9.264067077482574e-06,
"loss": 0.4391,
"step": 1460
},
{
"epoch": 0.07424054948107371,
"grad_norm": 1.8760025526162245,
"learning_rate": 9.259016062228508e-06,
"loss": 0.474,
"step": 1470
},
{
"epoch": 0.07474558723264564,
"grad_norm": 4.085966137442041,
"learning_rate": 9.253965046974442e-06,
"loss": 0.4444,
"step": 1480
},
{
"epoch": 0.07525062498421757,
"grad_norm": 3.6849288920231547,
"learning_rate": 9.248914031720376e-06,
"loss": 0.4631,
"step": 1490
},
{
"epoch": 0.0757556627357895,
"grad_norm": 2.0812547682300395,
"learning_rate": 9.24386301646631e-06,
"loss": 0.4429,
"step": 1500
},
{
"epoch": 0.07626070048736143,
"grad_norm": 2.1885414591057923,
"learning_rate": 9.238812001212245e-06,
"loss": 0.4453,
"step": 1510
},
{
"epoch": 0.07676573823893336,
"grad_norm": 2.3972513453985234,
"learning_rate": 9.233760985958177e-06,
"loss": 0.4541,
"step": 1520
},
{
"epoch": 0.07727077599050529,
"grad_norm": 1.715103982949988,
"learning_rate": 9.228709970704113e-06,
"loss": 0.4218,
"step": 1530
},
{
"epoch": 0.07777581374207722,
"grad_norm": 2.825431193796437,
"learning_rate": 9.223658955450047e-06,
"loss": 0.4552,
"step": 1540
},
{
"epoch": 0.07828085149364915,
"grad_norm": 4.126140659488343,
"learning_rate": 9.218607940195981e-06,
"loss": 0.4379,
"step": 1550
},
{
"epoch": 0.07878588924522108,
"grad_norm": 4.979196187347629,
"learning_rate": 9.213556924941914e-06,
"loss": 0.4649,
"step": 1560
},
{
"epoch": 0.079290926996793,
"grad_norm": 5.8119930612941735,
"learning_rate": 9.208505909687848e-06,
"loss": 0.4474,
"step": 1570
},
{
"epoch": 0.07979596474836494,
"grad_norm": 2.5598139471551864,
"learning_rate": 9.203454894433782e-06,
"loss": 0.4645,
"step": 1580
},
{
"epoch": 0.08030100249993687,
"grad_norm": 2.9414494655541112,
"learning_rate": 9.198403879179716e-06,
"loss": 0.4431,
"step": 1590
},
{
"epoch": 0.0808060402515088,
"grad_norm": 2.518508877017779,
"learning_rate": 9.19335286392565e-06,
"loss": 0.4513,
"step": 1600
},
{
"epoch": 0.08131107800308073,
"grad_norm": 4.2677352209415425,
"learning_rate": 9.188301848671584e-06,
"loss": 0.4362,
"step": 1610
},
{
"epoch": 0.08181611575465265,
"grad_norm": 2.376190662511493,
"learning_rate": 9.183250833417518e-06,
"loss": 0.4419,
"step": 1620
},
{
"epoch": 0.08232115350622458,
"grad_norm": 2.311824528121457,
"learning_rate": 9.17819981816345e-06,
"loss": 0.4543,
"step": 1630
},
{
"epoch": 0.08282619125779651,
"grad_norm": 2.8591324038418207,
"learning_rate": 9.173148802909385e-06,
"loss": 0.4449,
"step": 1640
},
{
"epoch": 0.08333122900936846,
"grad_norm": 2.0096705761911737,
"learning_rate": 9.168097787655319e-06,
"loss": 0.4413,
"step": 1650
},
{
"epoch": 0.08383626676094039,
"grad_norm": 2.536437573585815,
"learning_rate": 9.163046772401255e-06,
"loss": 0.4527,
"step": 1660
},
{
"epoch": 0.08434130451251232,
"grad_norm": 1.974773461430328,
"learning_rate": 9.157995757147187e-06,
"loss": 0.4471,
"step": 1670
},
{
"epoch": 0.08484634226408425,
"grad_norm": 3.0369504080501755,
"learning_rate": 9.152944741893121e-06,
"loss": 0.4523,
"step": 1680
},
{
"epoch": 0.08535138001565618,
"grad_norm": 2.3986445472475455,
"learning_rate": 9.147893726639055e-06,
"loss": 0.4367,
"step": 1690
},
{
"epoch": 0.0858564177672281,
"grad_norm": 2.808593335583457,
"learning_rate": 9.14284271138499e-06,
"loss": 0.4465,
"step": 1700
},
{
"epoch": 0.08636145551880003,
"grad_norm": 2.516839276650598,
"learning_rate": 9.137791696130924e-06,
"loss": 0.4492,
"step": 1710
},
{
"epoch": 0.08686649327037196,
"grad_norm": 2.458679996734928,
"learning_rate": 9.132740680876858e-06,
"loss": 0.4387,
"step": 1720
},
{
"epoch": 0.0873715310219439,
"grad_norm": 2.691340501537917,
"learning_rate": 9.12768966562279e-06,
"loss": 0.4591,
"step": 1730
},
{
"epoch": 0.08787656877351582,
"grad_norm": 2.2677007816655244,
"learning_rate": 9.122638650368724e-06,
"loss": 0.4463,
"step": 1740
},
{
"epoch": 0.08838160652508775,
"grad_norm": 2.0671070241912797,
"learning_rate": 9.117587635114659e-06,
"loss": 0.4307,
"step": 1750
},
{
"epoch": 0.08888664427665968,
"grad_norm": 2.7605610064361596,
"learning_rate": 9.112536619860593e-06,
"loss": 0.4391,
"step": 1760
},
{
"epoch": 0.08939168202823161,
"grad_norm": 2.437909002135904,
"learning_rate": 9.107485604606527e-06,
"loss": 0.4526,
"step": 1770
},
{
"epoch": 0.08989671977980354,
"grad_norm": 2.99196689672201,
"learning_rate": 9.102434589352461e-06,
"loss": 0.4593,
"step": 1780
},
{
"epoch": 0.09040175753137547,
"grad_norm": 1.8141907580964987,
"learning_rate": 9.097383574098393e-06,
"loss": 0.4611,
"step": 1790
},
{
"epoch": 0.0909067952829474,
"grad_norm": 2.018729489961064,
"learning_rate": 9.092332558844329e-06,
"loss": 0.4594,
"step": 1800
},
{
"epoch": 0.09141183303451933,
"grad_norm": 1.9703582862843025,
"learning_rate": 9.087281543590263e-06,
"loss": 0.4382,
"step": 1810
},
{
"epoch": 0.09191687078609126,
"grad_norm": 1.6921508201701456,
"learning_rate": 9.082230528336197e-06,
"loss": 0.4561,
"step": 1820
},
{
"epoch": 0.09242190853766319,
"grad_norm": 3.029420346852276,
"learning_rate": 9.07717951308213e-06,
"loss": 0.4441,
"step": 1830
},
{
"epoch": 0.09292694628923512,
"grad_norm": 1.9103397185841393,
"learning_rate": 9.072128497828064e-06,
"loss": 0.4345,
"step": 1840
},
{
"epoch": 0.09343198404080705,
"grad_norm": 1.6003619276106154,
"learning_rate": 9.067077482573998e-06,
"loss": 0.454,
"step": 1850
},
{
"epoch": 0.09393702179237898,
"grad_norm": 2.039612541605303,
"learning_rate": 9.062026467319932e-06,
"loss": 0.4472,
"step": 1860
},
{
"epoch": 0.09444205954395091,
"grad_norm": 1.9475921005254484,
"learning_rate": 9.056975452065866e-06,
"loss": 0.4568,
"step": 1870
},
{
"epoch": 0.09494709729552284,
"grad_norm": 1.556252720211933,
"learning_rate": 9.0519244368118e-06,
"loss": 0.4462,
"step": 1880
},
{
"epoch": 0.09545213504709477,
"grad_norm": 2.041709870164745,
"learning_rate": 9.046873421557733e-06,
"loss": 0.4522,
"step": 1890
},
{
"epoch": 0.0959571727986667,
"grad_norm": 1.6042104819290137,
"learning_rate": 9.041822406303667e-06,
"loss": 0.4358,
"step": 1900
},
{
"epoch": 0.09646221055023863,
"grad_norm": 1.7827733522561613,
"learning_rate": 9.036771391049601e-06,
"loss": 0.4541,
"step": 1910
},
{
"epoch": 0.09696724830181055,
"grad_norm": 1.5335434779957613,
"learning_rate": 9.031720375795535e-06,
"loss": 0.462,
"step": 1920
},
{
"epoch": 0.09747228605338248,
"grad_norm": 1.5961467635458402,
"learning_rate": 9.02666936054147e-06,
"loss": 0.444,
"step": 1930
},
{
"epoch": 0.09797732380495441,
"grad_norm": 2.6325242433089637,
"learning_rate": 9.021618345287403e-06,
"loss": 0.4413,
"step": 1940
},
{
"epoch": 0.09848236155652636,
"grad_norm": 2.8583722906916056,
"learning_rate": 9.016567330033338e-06,
"loss": 0.4444,
"step": 1950
},
{
"epoch": 0.09898739930809829,
"grad_norm": 2.3144131776880763,
"learning_rate": 9.011516314779272e-06,
"loss": 0.4444,
"step": 1960
},
{
"epoch": 0.09949243705967022,
"grad_norm": 2.1581296348642938,
"learning_rate": 9.006465299525206e-06,
"loss": 0.4335,
"step": 1970
},
{
"epoch": 0.09999747481124215,
"grad_norm": 17.52409284022625,
"learning_rate": 9.00141428427114e-06,
"loss": 0.4315,
"step": 1980
},
{
"epoch": 0.10050251256281408,
"grad_norm": 4.993014635080338,
"learning_rate": 8.996363269017074e-06,
"loss": 0.4397,
"step": 1990
},
{
"epoch": 0.101007550314386,
"grad_norm": 2.877086759384337,
"learning_rate": 8.991312253763007e-06,
"loss": 0.4399,
"step": 2000
},
{
"epoch": 0.10151258806595793,
"grad_norm": 1.8400304810419659,
"learning_rate": 8.98626123850894e-06,
"loss": 0.4362,
"step": 2010
},
{
"epoch": 0.10201762581752986,
"grad_norm": 2.9024549851445993,
"learning_rate": 8.981210223254875e-06,
"loss": 0.4408,
"step": 2020
},
{
"epoch": 0.1025226635691018,
"grad_norm": 2.0461267797053067,
"learning_rate": 8.976159208000809e-06,
"loss": 0.432,
"step": 2030
},
{
"epoch": 0.10302770132067372,
"grad_norm": 3.503082468120821,
"learning_rate": 8.971108192746743e-06,
"loss": 0.4325,
"step": 2040
},
{
"epoch": 0.10353273907224565,
"grad_norm": 2.388506460672893,
"learning_rate": 8.966057177492677e-06,
"loss": 0.4351,
"step": 2050
},
{
"epoch": 0.10403777682381758,
"grad_norm": 2.1254282185067646,
"learning_rate": 8.96100616223861e-06,
"loss": 0.4471,
"step": 2060
},
{
"epoch": 0.10454281457538951,
"grad_norm": 7.57345126491693,
"learning_rate": 8.955955146984545e-06,
"loss": 0.4397,
"step": 2070
},
{
"epoch": 0.10504785232696144,
"grad_norm": 1.6413054443187884,
"learning_rate": 8.95090413173048e-06,
"loss": 0.4322,
"step": 2080
},
{
"epoch": 0.10555289007853337,
"grad_norm": 1.8503237766357117,
"learning_rate": 8.945853116476414e-06,
"loss": 0.4599,
"step": 2090
},
{
"epoch": 0.1060579278301053,
"grad_norm": 2.122766063633365,
"learning_rate": 8.940802101222346e-06,
"loss": 0.4297,
"step": 2100
},
{
"epoch": 0.10656296558167723,
"grad_norm": 1.859081726279874,
"learning_rate": 8.93575108596828e-06,
"loss": 0.4448,
"step": 2110
},
{
"epoch": 0.10706800333324916,
"grad_norm": 2.477456287176706,
"learning_rate": 8.930700070714214e-06,
"loss": 0.4358,
"step": 2120
},
{
"epoch": 0.10757304108482109,
"grad_norm": 1.642087049827079,
"learning_rate": 8.925649055460148e-06,
"loss": 0.4549,
"step": 2130
},
{
"epoch": 0.10807807883639302,
"grad_norm": 3.1351745453532818,
"learning_rate": 8.920598040206083e-06,
"loss": 0.4482,
"step": 2140
},
{
"epoch": 0.10858311658796495,
"grad_norm": 3.2350948921462805,
"learning_rate": 8.915547024952017e-06,
"loss": 0.4426,
"step": 2150
},
{
"epoch": 0.10908815433953688,
"grad_norm": 2.4276194333582986,
"learning_rate": 8.910496009697949e-06,
"loss": 0.4523,
"step": 2160
},
{
"epoch": 0.10959319209110881,
"grad_norm": 2.425429246229103,
"learning_rate": 8.905444994443883e-06,
"loss": 0.4381,
"step": 2170
},
{
"epoch": 0.11009822984268074,
"grad_norm": 2.8737600406248682,
"learning_rate": 8.900393979189817e-06,
"loss": 0.4355,
"step": 2180
},
{
"epoch": 0.11060326759425267,
"grad_norm": 1.868046294608527,
"learning_rate": 8.895342963935751e-06,
"loss": 0.4445,
"step": 2190
},
{
"epoch": 0.1111083053458246,
"grad_norm": 7.059764277040628,
"learning_rate": 8.890291948681686e-06,
"loss": 0.451,
"step": 2200
},
{
"epoch": 0.11161334309739653,
"grad_norm": 2.155018266737317,
"learning_rate": 8.88524093342762e-06,
"loss": 0.4421,
"step": 2210
},
{
"epoch": 0.11211838084896845,
"grad_norm": 9.177888788340095,
"learning_rate": 8.880189918173554e-06,
"loss": 0.4398,
"step": 2220
},
{
"epoch": 0.11262341860054038,
"grad_norm": 3.143167600541576,
"learning_rate": 8.875138902919488e-06,
"loss": 0.4297,
"step": 2230
},
{
"epoch": 0.11312845635211231,
"grad_norm": 2.152834452055092,
"learning_rate": 8.870087887665422e-06,
"loss": 0.4529,
"step": 2240
},
{
"epoch": 0.11363349410368426,
"grad_norm": 14.793812918224992,
"learning_rate": 8.865036872411356e-06,
"loss": 0.4383,
"step": 2250
},
{
"epoch": 0.11413853185525619,
"grad_norm": 3.45745902653208,
"learning_rate": 8.859985857157289e-06,
"loss": 0.4302,
"step": 2260
},
{
"epoch": 0.11464356960682812,
"grad_norm": 2.7114403885624605,
"learning_rate": 8.854934841903223e-06,
"loss": 0.4251,
"step": 2270
},
{
"epoch": 0.11514860735840005,
"grad_norm": 2.723960534334651,
"learning_rate": 8.849883826649157e-06,
"loss": 0.4286,
"step": 2280
},
{
"epoch": 0.11565364510997198,
"grad_norm": 2.4905833599831415,
"learning_rate": 8.844832811395091e-06,
"loss": 0.4508,
"step": 2290
},
{
"epoch": 0.1161586828615439,
"grad_norm": 20.83563549184929,
"learning_rate": 8.839781796141025e-06,
"loss": 0.4474,
"step": 2300
},
{
"epoch": 0.11666372061311583,
"grad_norm": 3.9724073335359336,
"learning_rate": 8.83473078088696e-06,
"loss": 0.4476,
"step": 2310
},
{
"epoch": 0.11716875836468776,
"grad_norm": 2.1463739260782106,
"learning_rate": 8.829679765632892e-06,
"loss": 0.4189,
"step": 2320
},
{
"epoch": 0.1176737961162597,
"grad_norm": 3.592083818788885,
"learning_rate": 8.824628750378826e-06,
"loss": 0.4242,
"step": 2330
},
{
"epoch": 0.11817883386783162,
"grad_norm": 2.548758001659721,
"learning_rate": 8.819577735124762e-06,
"loss": 0.4112,
"step": 2340
},
{
"epoch": 0.11868387161940355,
"grad_norm": 2.713449956929624,
"learning_rate": 8.814526719870696e-06,
"loss": 0.4173,
"step": 2350
},
{
"epoch": 0.11918890937097548,
"grad_norm": 3.2378838703432016,
"learning_rate": 8.80947570461663e-06,
"loss": 0.4256,
"step": 2360
},
{
"epoch": 0.11969394712254741,
"grad_norm": 3.8665508352583013,
"learning_rate": 8.804424689362562e-06,
"loss": 0.4241,
"step": 2370
},
{
"epoch": 0.12019898487411934,
"grad_norm": 4.556177393216747,
"learning_rate": 8.799373674108496e-06,
"loss": 0.4282,
"step": 2380
},
{
"epoch": 0.12070402262569127,
"grad_norm": 3.687650501163452,
"learning_rate": 8.79432265885443e-06,
"loss": 0.4242,
"step": 2390
},
{
"epoch": 0.1212090603772632,
"grad_norm": 2.9530727834279595,
"learning_rate": 8.789271643600365e-06,
"loss": 0.4301,
"step": 2400
},
{
"epoch": 0.12171409812883513,
"grad_norm": 2.916996176159921,
"learning_rate": 8.784220628346299e-06,
"loss": 0.4284,
"step": 2410
},
{
"epoch": 0.12221913588040706,
"grad_norm": 2.2417118662412734,
"learning_rate": 8.779169613092233e-06,
"loss": 0.4186,
"step": 2420
},
{
"epoch": 0.12272417363197899,
"grad_norm": 2.947908220653578,
"learning_rate": 8.774118597838165e-06,
"loss": 0.4223,
"step": 2430
},
{
"epoch": 0.12322921138355092,
"grad_norm": 2.071616410723938,
"learning_rate": 8.7690675825841e-06,
"loss": 0.4375,
"step": 2440
},
{
"epoch": 0.12373424913512285,
"grad_norm": 2.8361443264190296,
"learning_rate": 8.764016567330034e-06,
"loss": 0.426,
"step": 2450
},
{
"epoch": 0.12423928688669478,
"grad_norm": 2.820818503243756,
"learning_rate": 8.758965552075968e-06,
"loss": 0.4341,
"step": 2460
},
{
"epoch": 0.12474432463826671,
"grad_norm": 3.136410856304739,
"learning_rate": 8.753914536821902e-06,
"loss": 0.4445,
"step": 2470
},
{
"epoch": 0.12524936238983864,
"grad_norm": 8.047547235777753,
"learning_rate": 8.748863521567836e-06,
"loss": 0.4267,
"step": 2480
},
{
"epoch": 0.12575440014141057,
"grad_norm": 3.892449775126266,
"learning_rate": 8.74381250631377e-06,
"loss": 0.419,
"step": 2490
},
{
"epoch": 0.1262594378929825,
"grad_norm": 2.354897593217238,
"learning_rate": 8.738761491059704e-06,
"loss": 0.4272,
"step": 2500
},
{
"epoch": 0.12676447564455443,
"grad_norm": 2.5811852973740566,
"learning_rate": 8.733710475805638e-06,
"loss": 0.4253,
"step": 2510
},
{
"epoch": 0.12726951339612635,
"grad_norm": 6.105044708207583,
"learning_rate": 8.728659460551572e-06,
"loss": 0.4214,
"step": 2520
},
{
"epoch": 0.12777455114769828,
"grad_norm": 6.168633495733829,
"learning_rate": 8.723608445297505e-06,
"loss": 0.4374,
"step": 2530
},
{
"epoch": 0.1282795888992702,
"grad_norm": 3.3331523293740677,
"learning_rate": 8.718557430043439e-06,
"loss": 0.4221,
"step": 2540
},
{
"epoch": 0.12878462665084214,
"grad_norm": 3.1244184035542895,
"learning_rate": 8.713506414789373e-06,
"loss": 0.4362,
"step": 2550
},
{
"epoch": 0.12928966440241407,
"grad_norm": 3.438277611759734,
"learning_rate": 8.708455399535307e-06,
"loss": 0.4193,
"step": 2560
},
{
"epoch": 0.129794702153986,
"grad_norm": 5.520539408202482,
"learning_rate": 8.703404384281241e-06,
"loss": 0.4057,
"step": 2570
},
{
"epoch": 0.13029973990555793,
"grad_norm": 5.422130415998936,
"learning_rate": 8.698353369027176e-06,
"loss": 0.4384,
"step": 2580
},
{
"epoch": 0.13080477765712986,
"grad_norm": 3.341989913378195,
"learning_rate": 8.693302353773108e-06,
"loss": 0.4162,
"step": 2590
},
{
"epoch": 0.1313098154087018,
"grad_norm": 2.475515489410939,
"learning_rate": 8.688251338519042e-06,
"loss": 0.4348,
"step": 2600
},
{
"epoch": 0.13181485316027372,
"grad_norm": 2.550493025405498,
"learning_rate": 8.683200323264976e-06,
"loss": 0.4173,
"step": 2610
},
{
"epoch": 0.13231989091184565,
"grad_norm": 2.0333378304030956,
"learning_rate": 8.678149308010912e-06,
"loss": 0.423,
"step": 2620
},
{
"epoch": 0.13282492866341758,
"grad_norm": 2.2902374319536176,
"learning_rate": 8.673098292756844e-06,
"loss": 0.443,
"step": 2630
},
{
"epoch": 0.1333299664149895,
"grad_norm": 2.774814046594427,
"learning_rate": 8.668047277502779e-06,
"loss": 0.4174,
"step": 2640
},
{
"epoch": 0.13383500416656144,
"grad_norm": 3.8730252519897013,
"learning_rate": 8.662996262248713e-06,
"loss": 0.4248,
"step": 2650
},
{
"epoch": 0.13434004191813337,
"grad_norm": 2.6952038500341127,
"learning_rate": 8.657945246994647e-06,
"loss": 0.4117,
"step": 2660
},
{
"epoch": 0.1348450796697053,
"grad_norm": 2.7361849988399634,
"learning_rate": 8.652894231740581e-06,
"loss": 0.4249,
"step": 2670
},
{
"epoch": 0.13535011742127723,
"grad_norm": 2.9758725333203535,
"learning_rate": 8.647843216486515e-06,
"loss": 0.4381,
"step": 2680
},
{
"epoch": 0.13585515517284916,
"grad_norm": 2.551067170047961,
"learning_rate": 8.642792201232447e-06,
"loss": 0.4214,
"step": 2690
},
{
"epoch": 0.13636019292442111,
"grad_norm": 2.6334795048127724,
"learning_rate": 8.637741185978382e-06,
"loss": 0.427,
"step": 2700
},
{
"epoch": 0.13686523067599304,
"grad_norm": 3.9869976953943866,
"learning_rate": 8.632690170724316e-06,
"loss": 0.4101,
"step": 2710
},
{
"epoch": 0.13737026842756497,
"grad_norm": 3.313933800574665,
"learning_rate": 8.62763915547025e-06,
"loss": 0.4138,
"step": 2720
},
{
"epoch": 0.1378753061791369,
"grad_norm": 3.641595792417612,
"learning_rate": 8.622588140216184e-06,
"loss": 0.4256,
"step": 2730
},
{
"epoch": 0.13838034393070883,
"grad_norm": 5.36751356787116,
"learning_rate": 8.617537124962118e-06,
"loss": 0.4314,
"step": 2740
},
{
"epoch": 0.13888538168228076,
"grad_norm": 2.5991069721105977,
"learning_rate": 8.612486109708052e-06,
"loss": 0.4157,
"step": 2750
},
{
"epoch": 0.1393904194338527,
"grad_norm": 3.1641348139616463,
"learning_rate": 8.607435094453986e-06,
"loss": 0.4023,
"step": 2760
},
{
"epoch": 0.13989545718542462,
"grad_norm": 3.021572554543311,
"learning_rate": 8.60238407919992e-06,
"loss": 0.4138,
"step": 2770
},
{
"epoch": 0.14040049493699655,
"grad_norm": 3.0773716536091063,
"learning_rate": 8.597333063945855e-06,
"loss": 0.4347,
"step": 2780
},
{
"epoch": 0.14090553268856848,
"grad_norm": 3.1572917500612236,
"learning_rate": 8.592282048691789e-06,
"loss": 0.425,
"step": 2790
},
{
"epoch": 0.1414105704401404,
"grad_norm": 3.065547472806378,
"learning_rate": 8.587231033437721e-06,
"loss": 0.4227,
"step": 2800
},
{
"epoch": 0.14191560819171234,
"grad_norm": 3.0283835796400593,
"learning_rate": 8.582180018183655e-06,
"loss": 0.4126,
"step": 2810
},
{
"epoch": 0.14242064594328427,
"grad_norm": 3.173481302073063,
"learning_rate": 8.57712900292959e-06,
"loss": 0.434,
"step": 2820
},
{
"epoch": 0.1429256836948562,
"grad_norm": 3.765724472944031,
"learning_rate": 8.572077987675524e-06,
"loss": 0.4463,
"step": 2830
},
{
"epoch": 0.14343072144642813,
"grad_norm": 5.419243180691885,
"learning_rate": 8.567026972421458e-06,
"loss": 0.4213,
"step": 2840
},
{
"epoch": 0.14393575919800006,
"grad_norm": 7.3295747683680865,
"learning_rate": 8.561975957167392e-06,
"loss": 0.4202,
"step": 2850
},
{
"epoch": 0.144440796949572,
"grad_norm": 6.887688386053364,
"learning_rate": 8.556924941913324e-06,
"loss": 0.4238,
"step": 2860
},
{
"epoch": 0.14494583470114392,
"grad_norm": 2.786632234737629,
"learning_rate": 8.551873926659258e-06,
"loss": 0.431,
"step": 2870
},
{
"epoch": 0.14545087245271585,
"grad_norm": 2.60607889031576,
"learning_rate": 8.546822911405192e-06,
"loss": 0.4306,
"step": 2880
},
{
"epoch": 0.14595591020428778,
"grad_norm": 8.477010546599903,
"learning_rate": 8.541771896151128e-06,
"loss": 0.4419,
"step": 2890
},
{
"epoch": 0.1464609479558597,
"grad_norm": 2.27833113141815,
"learning_rate": 8.53672088089706e-06,
"loss": 0.4226,
"step": 2900
},
{
"epoch": 0.14696598570743163,
"grad_norm": 4.423453943948206,
"learning_rate": 8.531669865642995e-06,
"loss": 0.439,
"step": 2910
},
{
"epoch": 0.14747102345900356,
"grad_norm": 3.6944661359694013,
"learning_rate": 8.526618850388929e-06,
"loss": 0.4221,
"step": 2920
},
{
"epoch": 0.1479760612105755,
"grad_norm": 5.078989868876925,
"learning_rate": 8.521567835134863e-06,
"loss": 0.4317,
"step": 2930
},
{
"epoch": 0.14848109896214742,
"grad_norm": 3.37535562195142,
"learning_rate": 8.516516819880797e-06,
"loss": 0.4365,
"step": 2940
},
{
"epoch": 0.14898613671371935,
"grad_norm": 4.387702072726384,
"learning_rate": 8.511465804626731e-06,
"loss": 0.4133,
"step": 2950
},
{
"epoch": 0.14949117446529128,
"grad_norm": 4.377549617088979,
"learning_rate": 8.506414789372664e-06,
"loss": 0.4332,
"step": 2960
},
{
"epoch": 0.1499962122168632,
"grad_norm": 3.6071548234377033,
"learning_rate": 8.501363774118598e-06,
"loss": 0.424,
"step": 2970
},
{
"epoch": 0.15050124996843514,
"grad_norm": 3.2776129282491953,
"learning_rate": 8.496312758864532e-06,
"loss": 0.431,
"step": 2980
},
{
"epoch": 0.15100628772000707,
"grad_norm": 4.474289915888015,
"learning_rate": 8.491261743610466e-06,
"loss": 0.4289,
"step": 2990
},
{
"epoch": 0.151511325471579,
"grad_norm": 2.562841849224475,
"learning_rate": 8.4862107283564e-06,
"loss": 0.4291,
"step": 3000
},
{
"epoch": 0.15201636322315093,
"grad_norm": 2.9179612414883325,
"learning_rate": 8.481159713102334e-06,
"loss": 0.4187,
"step": 3010
},
{
"epoch": 0.15252140097472286,
"grad_norm": 3.2632959036982703,
"learning_rate": 8.476108697848268e-06,
"loss": 0.4212,
"step": 3020
},
{
"epoch": 0.1530264387262948,
"grad_norm": 3.962692563420196,
"learning_rate": 8.471057682594203e-06,
"loss": 0.4228,
"step": 3030
},
{
"epoch": 0.15353147647786672,
"grad_norm": 3.2213155600732803,
"learning_rate": 8.466006667340137e-06,
"loss": 0.4271,
"step": 3040
},
{
"epoch": 0.15403651422943865,
"grad_norm": 2.3479787665404555,
"learning_rate": 8.460955652086071e-06,
"loss": 0.4329,
"step": 3050
},
{
"epoch": 0.15454155198101058,
"grad_norm": 2.7693374782815035,
"learning_rate": 8.455904636832003e-06,
"loss": 0.4168,
"step": 3060
},
{
"epoch": 0.1550465897325825,
"grad_norm": 2.7643943115933314,
"learning_rate": 8.450853621577937e-06,
"loss": 0.4145,
"step": 3070
},
{
"epoch": 0.15555162748415444,
"grad_norm": 2.6062282259447422,
"learning_rate": 8.445802606323872e-06,
"loss": 0.4191,
"step": 3080
},
{
"epoch": 0.15605666523572637,
"grad_norm": 2.60763373572543,
"learning_rate": 8.440751591069806e-06,
"loss": 0.4161,
"step": 3090
},
{
"epoch": 0.1565617029872983,
"grad_norm": 2.4475412139703,
"learning_rate": 8.43570057581574e-06,
"loss": 0.4273,
"step": 3100
},
{
"epoch": 0.15706674073887023,
"grad_norm": 2.9309156363483084,
"learning_rate": 8.430649560561674e-06,
"loss": 0.4042,
"step": 3110
},
{
"epoch": 0.15757177849044215,
"grad_norm": 2.2191638521741845,
"learning_rate": 8.425598545307608e-06,
"loss": 0.4306,
"step": 3120
},
{
"epoch": 0.15807681624201408,
"grad_norm": 2.0790541165617893,
"learning_rate": 8.42054753005354e-06,
"loss": 0.4219,
"step": 3130
},
{
"epoch": 0.158581853993586,
"grad_norm": 2.344144785550955,
"learning_rate": 8.415496514799475e-06,
"loss": 0.44,
"step": 3140
},
{
"epoch": 0.15908689174515794,
"grad_norm": 3.896107585478935,
"learning_rate": 8.410445499545409e-06,
"loss": 0.4196,
"step": 3150
},
{
"epoch": 0.15959192949672987,
"grad_norm": 3.511331268768427,
"learning_rate": 8.405394484291345e-06,
"loss": 0.4138,
"step": 3160
},
{
"epoch": 0.1600969672483018,
"grad_norm": 4.918842428536537,
"learning_rate": 8.400343469037277e-06,
"loss": 0.426,
"step": 3170
},
{
"epoch": 0.16060200499987373,
"grad_norm": 2.670823674907055,
"learning_rate": 8.395292453783211e-06,
"loss": 0.4099,
"step": 3180
},
{
"epoch": 0.16110704275144566,
"grad_norm": 2.125448197180906,
"learning_rate": 8.390241438529145e-06,
"loss": 0.4186,
"step": 3190
},
{
"epoch": 0.1616120805030176,
"grad_norm": 1.971202039415502,
"learning_rate": 8.38519042327508e-06,
"loss": 0.4307,
"step": 3200
},
{
"epoch": 0.16211711825458952,
"grad_norm": 3.637785251447948,
"learning_rate": 8.380139408021013e-06,
"loss": 0.4315,
"step": 3210
},
{
"epoch": 0.16262215600616145,
"grad_norm": 3.1909501990683027,
"learning_rate": 8.375088392766948e-06,
"loss": 0.4103,
"step": 3220
},
{
"epoch": 0.16312719375773338,
"grad_norm": 3.426808590014172,
"learning_rate": 8.37003737751288e-06,
"loss": 0.4178,
"step": 3230
},
{
"epoch": 0.1636322315093053,
"grad_norm": 2.794301511590198,
"learning_rate": 8.364986362258814e-06,
"loss": 0.421,
"step": 3240
},
{
"epoch": 0.16413726926087724,
"grad_norm": 2.6521369002493196,
"learning_rate": 8.359935347004748e-06,
"loss": 0.4279,
"step": 3250
},
{
"epoch": 0.16464230701244917,
"grad_norm": 3.065473735931392,
"learning_rate": 8.354884331750682e-06,
"loss": 0.4242,
"step": 3260
},
{
"epoch": 0.1651473447640211,
"grad_norm": 2.3676634296220826,
"learning_rate": 8.349833316496616e-06,
"loss": 0.4004,
"step": 3270
},
{
"epoch": 0.16565238251559303,
"grad_norm": 3.0123313610182407,
"learning_rate": 8.34478230124255e-06,
"loss": 0.4168,
"step": 3280
},
{
"epoch": 0.16615742026716496,
"grad_norm": 2.613652117431061,
"learning_rate": 8.339731285988485e-06,
"loss": 0.4032,
"step": 3290
},
{
"epoch": 0.16666245801873691,
"grad_norm": 2.5495103609008383,
"learning_rate": 8.334680270734419e-06,
"loss": 0.4258,
"step": 3300
},
{
"epoch": 0.16716749577030884,
"grad_norm": 1.944110652634474,
"learning_rate": 8.329629255480353e-06,
"loss": 0.4047,
"step": 3310
},
{
"epoch": 0.16767253352188077,
"grad_norm": 3.996554861787568,
"learning_rate": 8.324578240226287e-06,
"loss": 0.4266,
"step": 3320
},
{
"epoch": 0.1681775712734527,
"grad_norm": 3.311463894609568,
"learning_rate": 8.31952722497222e-06,
"loss": 0.4142,
"step": 3330
},
{
"epoch": 0.16868260902502463,
"grad_norm": 2.6266078214610418,
"learning_rate": 8.314476209718154e-06,
"loss": 0.4106,
"step": 3340
},
{
"epoch": 0.16918764677659656,
"grad_norm": 3.3843962429509533,
"learning_rate": 8.309425194464088e-06,
"loss": 0.4023,
"step": 3350
},
{
"epoch": 0.1696926845281685,
"grad_norm": 2.755113613993984,
"learning_rate": 8.304374179210022e-06,
"loss": 0.3974,
"step": 3360
},
{
"epoch": 0.17019772227974042,
"grad_norm": 3.915262353619256,
"learning_rate": 8.299323163955956e-06,
"loss": 0.4233,
"step": 3370
},
{
"epoch": 0.17070276003131235,
"grad_norm": 6.128222421561234,
"learning_rate": 8.29427214870189e-06,
"loss": 0.3987,
"step": 3380
},
{
"epoch": 0.17120779778288428,
"grad_norm": 3.244731601327553,
"learning_rate": 8.289221133447823e-06,
"loss": 0.408,
"step": 3390
},
{
"epoch": 0.1717128355344562,
"grad_norm": 3.415058459654747,
"learning_rate": 8.284170118193757e-06,
"loss": 0.4264,
"step": 3400
},
{
"epoch": 0.17221787328602814,
"grad_norm": 4.870959665296901,
"learning_rate": 8.27911910293969e-06,
"loss": 0.4068,
"step": 3410
},
{
"epoch": 0.17272291103760007,
"grad_norm": 3.1738996571406837,
"learning_rate": 8.274068087685625e-06,
"loss": 0.4238,
"step": 3420
},
{
"epoch": 0.173227948789172,
"grad_norm": 5.015822248252729,
"learning_rate": 8.269017072431559e-06,
"loss": 0.4258,
"step": 3430
},
{
"epoch": 0.17373298654074393,
"grad_norm": 5.826443535049107,
"learning_rate": 8.263966057177493e-06,
"loss": 0.389,
"step": 3440
},
{
"epoch": 0.17423802429231586,
"grad_norm": 3.9268362077276366,
"learning_rate": 8.258915041923427e-06,
"loss": 0.4302,
"step": 3450
},
{
"epoch": 0.1747430620438878,
"grad_norm": 2.7948709929923155,
"learning_rate": 8.253864026669361e-06,
"loss": 0.4179,
"step": 3460
},
{
"epoch": 0.17524809979545972,
"grad_norm": 7.055728348112125,
"learning_rate": 8.248813011415296e-06,
"loss": 0.4249,
"step": 3470
},
{
"epoch": 0.17575313754703165,
"grad_norm": 11.223550772273683,
"learning_rate": 8.24376199616123e-06,
"loss": 0.4198,
"step": 3480
},
{
"epoch": 0.17625817529860358,
"grad_norm": 3.272872614673025,
"learning_rate": 8.238710980907164e-06,
"loss": 0.412,
"step": 3490
},
{
"epoch": 0.1767632130501755,
"grad_norm": 4.687786747360582,
"learning_rate": 8.233659965653096e-06,
"loss": 0.4143,
"step": 3500
},
{
"epoch": 0.17726825080174743,
"grad_norm": 4.2131701830416315,
"learning_rate": 8.22860895039903e-06,
"loss": 0.407,
"step": 3510
},
{
"epoch": 0.17777328855331936,
"grad_norm": 2.7350467089835058,
"learning_rate": 8.223557935144965e-06,
"loss": 0.4065,
"step": 3520
},
{
"epoch": 0.1782783263048913,
"grad_norm": 15.963906126522279,
"learning_rate": 8.218506919890899e-06,
"loss": 0.4053,
"step": 3530
},
{
"epoch": 0.17878336405646322,
"grad_norm": 6.283377242483946,
"learning_rate": 8.213455904636833e-06,
"loss": 0.4398,
"step": 3540
},
{
"epoch": 0.17928840180803515,
"grad_norm": 4.211980280469181,
"learning_rate": 8.208404889382767e-06,
"loss": 0.4279,
"step": 3550
},
{
"epoch": 0.17979343955960708,
"grad_norm": 4.888324462942457,
"learning_rate": 8.2033538741287e-06,
"loss": 0.4279,
"step": 3560
},
{
"epoch": 0.180298477311179,
"grad_norm": 4.806718275911088,
"learning_rate": 8.198302858874635e-06,
"loss": 0.4074,
"step": 3570
},
{
"epoch": 0.18080351506275094,
"grad_norm": 14.618909757426387,
"learning_rate": 8.19325184362057e-06,
"loss": 0.4377,
"step": 3580
},
{
"epoch": 0.18130855281432287,
"grad_norm": 3.0145598452408935,
"learning_rate": 8.188200828366503e-06,
"loss": 0.4244,
"step": 3590
},
{
"epoch": 0.1818135905658948,
"grad_norm": 3.537451616849706,
"learning_rate": 8.183149813112436e-06,
"loss": 0.4115,
"step": 3600
},
{
"epoch": 0.18231862831746673,
"grad_norm": 6.760898974505609,
"learning_rate": 8.17809879785837e-06,
"loss": 0.4183,
"step": 3610
},
{
"epoch": 0.18282366606903866,
"grad_norm": 3.1669599365914927,
"learning_rate": 8.173047782604304e-06,
"loss": 0.4116,
"step": 3620
},
{
"epoch": 0.1833287038206106,
"grad_norm": 15.662389012118272,
"learning_rate": 8.167996767350238e-06,
"loss": 0.4208,
"step": 3630
},
{
"epoch": 0.18383374157218252,
"grad_norm": 7.978949404174967,
"learning_rate": 8.162945752096172e-06,
"loss": 0.4355,
"step": 3640
},
{
"epoch": 0.18433877932375445,
"grad_norm": 3.0917950299227375,
"learning_rate": 8.157894736842106e-06,
"loss": 0.4161,
"step": 3650
},
{
"epoch": 0.18484381707532638,
"grad_norm": 5.585272554418732,
"learning_rate": 8.152843721588039e-06,
"loss": 0.4206,
"step": 3660
},
{
"epoch": 0.1853488548268983,
"grad_norm": 2.7026089034564875,
"learning_rate": 8.147792706333973e-06,
"loss": 0.4092,
"step": 3670
},
{
"epoch": 0.18585389257847024,
"grad_norm": 4.859931210791958,
"learning_rate": 8.142741691079907e-06,
"loss": 0.4064,
"step": 3680
},
{
"epoch": 0.18635893033004217,
"grad_norm": 3.403058240228029,
"learning_rate": 8.137690675825841e-06,
"loss": 0.4082,
"step": 3690
},
{
"epoch": 0.1868639680816141,
"grad_norm": 3.2005619654009076,
"learning_rate": 8.132639660571775e-06,
"loss": 0.4042,
"step": 3700
},
{
"epoch": 0.18736900583318603,
"grad_norm": 2.6974451416766443,
"learning_rate": 8.12758864531771e-06,
"loss": 0.4291,
"step": 3710
},
{
"epoch": 0.18787404358475795,
"grad_norm": 3.226834318122475,
"learning_rate": 8.122537630063644e-06,
"loss": 0.4173,
"step": 3720
},
{
"epoch": 0.18837908133632988,
"grad_norm": 2.6103166184994895,
"learning_rate": 8.117486614809578e-06,
"loss": 0.4112,
"step": 3730
},
{
"epoch": 0.18888411908790181,
"grad_norm": 2.691120701877213,
"learning_rate": 8.112435599555512e-06,
"loss": 0.4083,
"step": 3740
},
{
"epoch": 0.18938915683947374,
"grad_norm": 3.323955083237021,
"learning_rate": 8.107384584301446e-06,
"loss": 0.4096,
"step": 3750
},
{
"epoch": 0.18989419459104567,
"grad_norm": 6.965415675773204,
"learning_rate": 8.102333569047378e-06,
"loss": 0.3977,
"step": 3760
},
{
"epoch": 0.1903992323426176,
"grad_norm": 2.451525045869991,
"learning_rate": 8.097282553793313e-06,
"loss": 0.4195,
"step": 3770
},
{
"epoch": 0.19090427009418953,
"grad_norm": 2.6787347183394243,
"learning_rate": 8.092231538539247e-06,
"loss": 0.4121,
"step": 3780
},
{
"epoch": 0.19140930784576146,
"grad_norm": 6.742330893738729,
"learning_rate": 8.08718052328518e-06,
"loss": 0.4102,
"step": 3790
},
{
"epoch": 0.1919143455973334,
"grad_norm": 2.9458958646737083,
"learning_rate": 8.082129508031115e-06,
"loss": 0.4206,
"step": 3800
},
{
"epoch": 0.19241938334890532,
"grad_norm": 4.018744503984466,
"learning_rate": 8.077078492777049e-06,
"loss": 0.4367,
"step": 3810
},
{
"epoch": 0.19292442110047725,
"grad_norm": 4.594358535206507,
"learning_rate": 8.072027477522983e-06,
"loss": 0.4123,
"step": 3820
},
{
"epoch": 0.19342945885204918,
"grad_norm": 7.765118457446688,
"learning_rate": 8.066976462268916e-06,
"loss": 0.4203,
"step": 3830
},
{
"epoch": 0.1939344966036211,
"grad_norm": 4.571772892446487,
"learning_rate": 8.061925447014851e-06,
"loss": 0.4093,
"step": 3840
},
{
"epoch": 0.19443953435519304,
"grad_norm": 3.0704420404585995,
"learning_rate": 8.056874431760786e-06,
"loss": 0.4195,
"step": 3850
},
{
"epoch": 0.19494457210676497,
"grad_norm": 2.8235987578400334,
"learning_rate": 8.05182341650672e-06,
"loss": 0.4131,
"step": 3860
},
{
"epoch": 0.1954496098583369,
"grad_norm": 4.664925999855343,
"learning_rate": 8.046772401252652e-06,
"loss": 0.4052,
"step": 3870
},
{
"epoch": 0.19595464760990883,
"grad_norm": 3.0583937314253,
"learning_rate": 8.041721385998586e-06,
"loss": 0.4198,
"step": 3880
},
{
"epoch": 0.19645968536148076,
"grad_norm": 2.973956981634855,
"learning_rate": 8.03667037074452e-06,
"loss": 0.4066,
"step": 3890
},
{
"epoch": 0.19696472311305271,
"grad_norm": 2.803835666144933,
"learning_rate": 8.031619355490454e-06,
"loss": 0.4158,
"step": 3900
},
{
"epoch": 0.19746976086462464,
"grad_norm": 4.834280824568942,
"learning_rate": 8.026568340236389e-06,
"loss": 0.4178,
"step": 3910
},
{
"epoch": 0.19797479861619657,
"grad_norm": 10.038307243068243,
"learning_rate": 8.021517324982323e-06,
"loss": 0.4111,
"step": 3920
},
{
"epoch": 0.1984798363677685,
"grad_norm": 7.610030059675689,
"learning_rate": 8.016466309728255e-06,
"loss": 0.4096,
"step": 3930
},
{
"epoch": 0.19898487411934043,
"grad_norm": 3.6050764165551556,
"learning_rate": 8.01141529447419e-06,
"loss": 0.4233,
"step": 3940
},
{
"epoch": 0.19948991187091236,
"grad_norm": 6.736237771102318,
"learning_rate": 8.006364279220123e-06,
"loss": 0.4153,
"step": 3950
},
{
"epoch": 0.1999949496224843,
"grad_norm": 2.9583460730423443,
"learning_rate": 8.001313263966057e-06,
"loss": 0.4211,
"step": 3960
},
{
"epoch": 0.20049998737405622,
"grad_norm": 2.2968014860075576,
"learning_rate": 7.996262248711992e-06,
"loss": 0.4289,
"step": 3970
},
{
"epoch": 0.20100502512562815,
"grad_norm": 2.474011765446813,
"learning_rate": 7.991211233457926e-06,
"loss": 0.4233,
"step": 3980
},
{
"epoch": 0.20151006287720008,
"grad_norm": 3.0063110556198778,
"learning_rate": 7.98616021820386e-06,
"loss": 0.428,
"step": 3990
},
{
"epoch": 0.202015100628772,
"grad_norm": 4.355361962575098,
"learning_rate": 7.981109202949794e-06,
"loss": 0.4226,
"step": 4000
},
{
"epoch": 0.20252013838034394,
"grad_norm": 2.6560259873816747,
"learning_rate": 7.976058187695728e-06,
"loss": 0.4175,
"step": 4010
},
{
"epoch": 0.20302517613191587,
"grad_norm": 2.9619585376031647,
"learning_rate": 7.971007172441662e-06,
"loss": 0.4143,
"step": 4020
},
{
"epoch": 0.2035302138834878,
"grad_norm": 2.066998306222369,
"learning_rate": 7.965956157187595e-06,
"loss": 0.4001,
"step": 4030
},
{
"epoch": 0.20403525163505973,
"grad_norm": 4.632842633228617,
"learning_rate": 7.960905141933529e-06,
"loss": 0.4133,
"step": 4040
},
{
"epoch": 0.20454028938663166,
"grad_norm": 6.512090967359411,
"learning_rate": 7.955854126679463e-06,
"loss": 0.4271,
"step": 4050
},
{
"epoch": 0.2050453271382036,
"grad_norm": 3.4613990642774777,
"learning_rate": 7.950803111425397e-06,
"loss": 0.4035,
"step": 4060
},
{
"epoch": 0.20555036488977552,
"grad_norm": 6.082524106916885,
"learning_rate": 7.945752096171331e-06,
"loss": 0.4186,
"step": 4070
},
{
"epoch": 0.20605540264134745,
"grad_norm": 9.959451587245269,
"learning_rate": 7.940701080917265e-06,
"loss": 0.4136,
"step": 4080
},
{
"epoch": 0.20656044039291938,
"grad_norm": 3.1580403384223694,
"learning_rate": 7.935650065663198e-06,
"loss": 0.4166,
"step": 4090
},
{
"epoch": 0.2070654781444913,
"grad_norm": 43.253412774792665,
"learning_rate": 7.930599050409132e-06,
"loss": 0.4237,
"step": 4100
},
{
"epoch": 0.20757051589606323,
"grad_norm": 4.161811983252057,
"learning_rate": 7.925548035155068e-06,
"loss": 0.426,
"step": 4110
},
{
"epoch": 0.20807555364763516,
"grad_norm": 4.07148032877867,
"learning_rate": 7.920497019901002e-06,
"loss": 0.4046,
"step": 4120
},
{
"epoch": 0.2085805913992071,
"grad_norm": 2.980398901316733,
"learning_rate": 7.915446004646934e-06,
"loss": 0.4018,
"step": 4130
},
{
"epoch": 0.20908562915077902,
"grad_norm": 12.570414912224734,
"learning_rate": 7.910394989392868e-06,
"loss": 0.4249,
"step": 4140
},
{
"epoch": 0.20959066690235095,
"grad_norm": 4.296760517181418,
"learning_rate": 7.905343974138802e-06,
"loss": 0.4134,
"step": 4150
},
{
"epoch": 0.21009570465392288,
"grad_norm": 3.8924058247405795,
"learning_rate": 7.900292958884737e-06,
"loss": 0.4184,
"step": 4160
},
{
"epoch": 0.2106007424054948,
"grad_norm": 5.074708812193615,
"learning_rate": 7.89524194363067e-06,
"loss": 0.4326,
"step": 4170
},
{
"epoch": 0.21110578015706674,
"grad_norm": 4.094894279147386,
"learning_rate": 7.890190928376605e-06,
"loss": 0.427,
"step": 4180
},
{
"epoch": 0.21161081790863867,
"grad_norm": 7.0180188428328245,
"learning_rate": 7.885139913122539e-06,
"loss": 0.4434,
"step": 4190
},
{
"epoch": 0.2121158556602106,
"grad_norm": 3.1522461269078854,
"learning_rate": 7.880088897868471e-06,
"loss": 0.4118,
"step": 4200
},
{
"epoch": 0.21262089341178253,
"grad_norm": 3.1387724966942048,
"learning_rate": 7.875037882614405e-06,
"loss": 0.4188,
"step": 4210
},
{
"epoch": 0.21312593116335446,
"grad_norm": 2.6776667573826005,
"learning_rate": 7.86998686736034e-06,
"loss": 0.432,
"step": 4220
},
{
"epoch": 0.2136309689149264,
"grad_norm": 3.6637419491007046,
"learning_rate": 7.864935852106274e-06,
"loss": 0.435,
"step": 4230
},
{
"epoch": 0.21413600666649832,
"grad_norm": 5.882360941042325,
"learning_rate": 7.859884836852208e-06,
"loss": 0.4308,
"step": 4240
},
{
"epoch": 0.21464104441807025,
"grad_norm": 8.010164762562015,
"learning_rate": 7.854833821598142e-06,
"loss": 0.4035,
"step": 4250
},
{
"epoch": 0.21514608216964218,
"grad_norm": 2.940352809589755,
"learning_rate": 7.849782806344076e-06,
"loss": 0.4045,
"step": 4260
},
{
"epoch": 0.2156511199212141,
"grad_norm": 5.768043762782633,
"learning_rate": 7.84473179109001e-06,
"loss": 0.4106,
"step": 4270
},
{
"epoch": 0.21615615767278604,
"grad_norm": 4.169886235036406,
"learning_rate": 7.839680775835944e-06,
"loss": 0.4238,
"step": 4280
},
{
"epoch": 0.21666119542435797,
"grad_norm": 3.249720384522653,
"learning_rate": 7.834629760581878e-06,
"loss": 0.3969,
"step": 4290
},
{
"epoch": 0.2171662331759299,
"grad_norm": 5.065976709307023,
"learning_rate": 7.829578745327811e-06,
"loss": 0.4069,
"step": 4300
},
{
"epoch": 0.21767127092750183,
"grad_norm": 3.9668832894751107,
"learning_rate": 7.824527730073745e-06,
"loss": 0.4125,
"step": 4310
},
{
"epoch": 0.21817630867907375,
"grad_norm": 3.4619845168591796,
"learning_rate": 7.819476714819679e-06,
"loss": 0.4105,
"step": 4320
},
{
"epoch": 0.21868134643064568,
"grad_norm": 3.886033142263623,
"learning_rate": 7.814425699565613e-06,
"loss": 0.4151,
"step": 4330
},
{
"epoch": 0.21918638418221761,
"grad_norm": 3.5781821573741994,
"learning_rate": 7.809374684311547e-06,
"loss": 0.3923,
"step": 4340
},
{
"epoch": 0.21969142193378954,
"grad_norm": 2.9767702411423325,
"learning_rate": 7.804323669057482e-06,
"loss": 0.3827,
"step": 4350
},
{
"epoch": 0.22019645968536147,
"grad_norm": 3.3595860629684577,
"learning_rate": 7.799272653803414e-06,
"loss": 0.4103,
"step": 4360
},
{
"epoch": 0.2207014974369334,
"grad_norm": 2.0952276159055154,
"learning_rate": 7.794221638549348e-06,
"loss": 0.4171,
"step": 4370
},
{
"epoch": 0.22120653518850533,
"grad_norm": 3.1915802413191376,
"learning_rate": 7.789170623295284e-06,
"loss": 0.4273,
"step": 4380
},
{
"epoch": 0.22171157294007726,
"grad_norm": 1.8629666469771935,
"learning_rate": 7.784119608041218e-06,
"loss": 0.4226,
"step": 4390
},
{
"epoch": 0.2222166106916492,
"grad_norm": 4.627481223873417,
"learning_rate": 7.77906859278715e-06,
"loss": 0.3979,
"step": 4400
},
{
"epoch": 0.22272164844322112,
"grad_norm": 3.4768855391600084,
"learning_rate": 7.774017577533085e-06,
"loss": 0.413,
"step": 4410
},
{
"epoch": 0.22322668619479305,
"grad_norm": 2.8907573807850673,
"learning_rate": 7.768966562279019e-06,
"loss": 0.4201,
"step": 4420
},
{
"epoch": 0.22373172394636498,
"grad_norm": 5.5052911721090805,
"learning_rate": 7.763915547024953e-06,
"loss": 0.4322,
"step": 4430
},
{
"epoch": 0.2242367616979369,
"grad_norm": 2.171757782442608,
"learning_rate": 7.758864531770887e-06,
"loss": 0.4341,
"step": 4440
},
{
"epoch": 0.22474179944950884,
"grad_norm": 2.092006999366198,
"learning_rate": 7.753813516516821e-06,
"loss": 0.422,
"step": 4450
},
{
"epoch": 0.22524683720108077,
"grad_norm": 6.02493579048235,
"learning_rate": 7.748762501262753e-06,
"loss": 0.4105,
"step": 4460
},
{
"epoch": 0.2257518749526527,
"grad_norm": 3.705631561875418,
"learning_rate": 7.743711486008688e-06,
"loss": 0.4081,
"step": 4470
},
{
"epoch": 0.22625691270422463,
"grad_norm": 4.191020612596744,
"learning_rate": 7.738660470754622e-06,
"loss": 0.402,
"step": 4480
},
{
"epoch": 0.22676195045579656,
"grad_norm": 2.9926161263513484,
"learning_rate": 7.733609455500556e-06,
"loss": 0.4105,
"step": 4490
},
{
"epoch": 0.22726698820736851,
"grad_norm": 2.7804371886771304,
"learning_rate": 7.72855844024649e-06,
"loss": 0.4004,
"step": 4500
},
{
"epoch": 0.22777202595894044,
"grad_norm": 3.3532410934862007,
"learning_rate": 7.723507424992424e-06,
"loss": 0.3982,
"step": 4510
},
{
"epoch": 0.22827706371051237,
"grad_norm": 5.67085098197258,
"learning_rate": 7.718456409738358e-06,
"loss": 0.4244,
"step": 4520
},
{
"epoch": 0.2287821014620843,
"grad_norm": 6.390172943127481,
"learning_rate": 7.713405394484292e-06,
"loss": 0.405,
"step": 4530
},
{
"epoch": 0.22928713921365623,
"grad_norm": 2.0493376647118535,
"learning_rate": 7.708354379230226e-06,
"loss": 0.4091,
"step": 4540
},
{
"epoch": 0.22979217696522816,
"grad_norm": 3.2998640142236373,
"learning_rate": 7.70330336397616e-06,
"loss": 0.4116,
"step": 4550
},
{
"epoch": 0.2302972147168001,
"grad_norm": 9.39006495464318,
"learning_rate": 7.698252348722095e-06,
"loss": 0.4047,
"step": 4560
},
{
"epoch": 0.23080225246837202,
"grad_norm": 5.551570211137529,
"learning_rate": 7.693201333468027e-06,
"loss": 0.408,
"step": 4570
},
{
"epoch": 0.23130729021994395,
"grad_norm": 4.557076032338838,
"learning_rate": 7.688150318213961e-06,
"loss": 0.3971,
"step": 4580
},
{
"epoch": 0.23181232797151588,
"grad_norm": 2.606206409240519,
"learning_rate": 7.683099302959895e-06,
"loss": 0.4178,
"step": 4590
},
{
"epoch": 0.2323173657230878,
"grad_norm": 4.447142719835132,
"learning_rate": 7.67804828770583e-06,
"loss": 0.394,
"step": 4600
},
{
"epoch": 0.23282240347465974,
"grad_norm": 3.0541458976753204,
"learning_rate": 7.672997272451764e-06,
"loss": 0.425,
"step": 4610
},
{
"epoch": 0.23332744122623167,
"grad_norm": 2.176856844484498,
"learning_rate": 7.667946257197698e-06,
"loss": 0.4028,
"step": 4620
},
{
"epoch": 0.2338324789778036,
"grad_norm": 2.308455454963439,
"learning_rate": 7.66289524194363e-06,
"loss": 0.4141,
"step": 4630
},
{
"epoch": 0.23433751672937553,
"grad_norm": 2.112712712332131,
"learning_rate": 7.657844226689564e-06,
"loss": 0.419,
"step": 4640
},
{
"epoch": 0.23484255448094746,
"grad_norm": 3.2748057827058634,
"learning_rate": 7.6527932114355e-06,
"loss": 0.4122,
"step": 4650
},
{
"epoch": 0.2353475922325194,
"grad_norm": 4.614165718449686,
"learning_rate": 7.647742196181434e-06,
"loss": 0.4181,
"step": 4660
},
{
"epoch": 0.23585262998409132,
"grad_norm": 3.4633789585985038,
"learning_rate": 7.642691180927367e-06,
"loss": 0.4134,
"step": 4670
},
{
"epoch": 0.23635766773566325,
"grad_norm": 2.459474386028591,
"learning_rate": 7.6376401656733e-06,
"loss": 0.4045,
"step": 4680
},
{
"epoch": 0.23686270548723518,
"grad_norm": 4.467572890592401,
"learning_rate": 7.632589150419235e-06,
"loss": 0.4108,
"step": 4690
},
{
"epoch": 0.2373677432388071,
"grad_norm": 3.2733674833799085,
"learning_rate": 7.627538135165169e-06,
"loss": 0.4247,
"step": 4700
},
{
"epoch": 0.23787278099037903,
"grad_norm": 2.5012350340127583,
"learning_rate": 7.622487119911103e-06,
"loss": 0.4027,
"step": 4710
},
{
"epoch": 0.23837781874195096,
"grad_norm": 2.918885002659697,
"learning_rate": 7.6174361046570365e-06,
"loss": 0.3961,
"step": 4720
},
{
"epoch": 0.2388828564935229,
"grad_norm": 2.151448240091896,
"learning_rate": 7.612385089402971e-06,
"loss": 0.397,
"step": 4730
},
{
"epoch": 0.23938789424509482,
"grad_norm": 3.645676075183417,
"learning_rate": 7.607334074148905e-06,
"loss": 0.4042,
"step": 4740
},
{
"epoch": 0.23989293199666675,
"grad_norm": 4.7934119111079365,
"learning_rate": 7.602283058894838e-06,
"loss": 0.4199,
"step": 4750
},
{
"epoch": 0.24039796974823868,
"grad_norm": 3.024756725193816,
"learning_rate": 7.597232043640772e-06,
"loss": 0.4214,
"step": 4760
},
{
"epoch": 0.2409030074998106,
"grad_norm": 3.8298473971048552,
"learning_rate": 7.592181028386706e-06,
"loss": 0.3971,
"step": 4770
},
{
"epoch": 0.24140804525138254,
"grad_norm": 3.6045455173768897,
"learning_rate": 7.5871300131326395e-06,
"loss": 0.41,
"step": 4780
},
{
"epoch": 0.24191308300295447,
"grad_norm": 2.4793807629242686,
"learning_rate": 7.5820789978785745e-06,
"loss": 0.4027,
"step": 4790
},
{
"epoch": 0.2424181207545264,
"grad_norm": 2.7349340937466837,
"learning_rate": 7.577027982624509e-06,
"loss": 0.4354,
"step": 4800
},
{
"epoch": 0.24292315850609833,
"grad_norm": 3.2738776137303436,
"learning_rate": 7.571976967370443e-06,
"loss": 0.4472,
"step": 4810
},
{
"epoch": 0.24342819625767026,
"grad_norm": 13.785461532124051,
"learning_rate": 7.566925952116376e-06,
"loss": 0.3969,
"step": 4820
},
{
"epoch": 0.2439332340092422,
"grad_norm": 3.313726734189114,
"learning_rate": 7.56187493686231e-06,
"loss": 0.4007,
"step": 4830
},
{
"epoch": 0.24443827176081412,
"grad_norm": 4.099815087006132,
"learning_rate": 7.556823921608244e-06,
"loss": 0.4099,
"step": 4840
},
{
"epoch": 0.24494330951238605,
"grad_norm": 4.723031766050033,
"learning_rate": 7.5517729063541775e-06,
"loss": 0.4204,
"step": 4850
},
{
"epoch": 0.24544834726395798,
"grad_norm": 3.7013488115087085,
"learning_rate": 7.546721891100112e-06,
"loss": 0.3928,
"step": 4860
},
{
"epoch": 0.2459533850155299,
"grad_norm": 7.683672740752602,
"learning_rate": 7.541670875846046e-06,
"loss": 0.4028,
"step": 4870
},
{
"epoch": 0.24645842276710184,
"grad_norm": 5.671010001370461,
"learning_rate": 7.536619860591979e-06,
"loss": 0.433,
"step": 4880
},
{
"epoch": 0.24696346051867377,
"grad_norm": 9.869009780517741,
"learning_rate": 7.531568845337913e-06,
"loss": 0.3989,
"step": 4890
},
{
"epoch": 0.2474684982702457,
"grad_norm": 5.709197913641078,
"learning_rate": 7.526517830083847e-06,
"loss": 0.4031,
"step": 4900
},
{
"epoch": 0.24797353602181763,
"grad_norm": 4.33540159587482,
"learning_rate": 7.521466814829781e-06,
"loss": 0.4062,
"step": 4910
},
{
"epoch": 0.24847857377338955,
"grad_norm": 8.109361669653662,
"learning_rate": 7.5164157995757156e-06,
"loss": 0.3972,
"step": 4920
},
{
"epoch": 0.24898361152496148,
"grad_norm": 13.57377976771627,
"learning_rate": 7.51136478432165e-06,
"loss": 0.4067,
"step": 4930
},
{
"epoch": 0.24948864927653341,
"grad_norm": 4.077593295992246,
"learning_rate": 7.506313769067584e-06,
"loss": 0.3992,
"step": 4940
},
{
"epoch": 0.24999368702810534,
"grad_norm": 6.370437009913983,
"learning_rate": 7.501262753813517e-06,
"loss": 0.41,
"step": 4950
},
{
"epoch": 0.2504987247796773,
"grad_norm": 4.917853551887297,
"learning_rate": 7.496211738559451e-06,
"loss": 0.3967,
"step": 4960
},
{
"epoch": 0.25100376253124923,
"grad_norm": 8.018747776559712,
"learning_rate": 7.491160723305385e-06,
"loss": 0.4174,
"step": 4970
},
{
"epoch": 0.25150880028282113,
"grad_norm": 5.56863245620068,
"learning_rate": 7.486109708051319e-06,
"loss": 0.4161,
"step": 4980
},
{
"epoch": 0.2520138380343931,
"grad_norm": 3.5246503817814405,
"learning_rate": 7.481058692797253e-06,
"loss": 0.4161,
"step": 4990
},
{
"epoch": 0.252518875785965,
"grad_norm": 8.805921139909751,
"learning_rate": 7.476007677543187e-06,
"loss": 0.4045,
"step": 5000
},
{
"epoch": 0.25302391353753695,
"grad_norm": 4.998987522917178,
"learning_rate": 7.47095666228912e-06,
"loss": 0.3859,
"step": 5010
},
{
"epoch": 0.25352895128910885,
"grad_norm": 4.00150580245438,
"learning_rate": 7.465905647035054e-06,
"loss": 0.4164,
"step": 5020
},
{
"epoch": 0.2540339890406808,
"grad_norm": 3.706340372471242,
"learning_rate": 7.460854631780988e-06,
"loss": 0.4176,
"step": 5030
},
{
"epoch": 0.2545390267922527,
"grad_norm": 4.998624600906844,
"learning_rate": 7.4558036165269225e-06,
"loss": 0.41,
"step": 5040
},
{
"epoch": 0.25504406454382467,
"grad_norm": 8.31884957399422,
"learning_rate": 7.450752601272856e-06,
"loss": 0.3998,
"step": 5050
},
{
"epoch": 0.25554910229539657,
"grad_norm": 3.4901102806237314,
"learning_rate": 7.445701586018791e-06,
"loss": 0.3997,
"step": 5060
},
{
"epoch": 0.2560541400469685,
"grad_norm": 4.645395770689467,
"learning_rate": 7.440650570764725e-06,
"loss": 0.4083,
"step": 5070
},
{
"epoch": 0.2565591777985404,
"grad_norm": 6.384417075428531,
"learning_rate": 7.435599555510659e-06,
"loss": 0.4092,
"step": 5080
},
{
"epoch": 0.2570642155501124,
"grad_norm": 2.4454349649277694,
"learning_rate": 7.430548540256592e-06,
"loss": 0.3848,
"step": 5090
},
{
"epoch": 0.2575692533016843,
"grad_norm": 3.558542444645485,
"learning_rate": 7.425497525002526e-06,
"loss": 0.4088,
"step": 5100
},
{
"epoch": 0.25807429105325624,
"grad_norm": 3.0244806698758713,
"learning_rate": 7.4204465097484605e-06,
"loss": 0.4039,
"step": 5110
},
{
"epoch": 0.25857932880482815,
"grad_norm": 3.8966266585093767,
"learning_rate": 7.415395494494394e-06,
"loss": 0.4235,
"step": 5120
},
{
"epoch": 0.2590843665564001,
"grad_norm": 5.6318294513336555,
"learning_rate": 7.410344479240328e-06,
"loss": 0.4037,
"step": 5130
},
{
"epoch": 0.259589404307972,
"grad_norm": 3.0998441234985714,
"learning_rate": 7.405293463986262e-06,
"loss": 0.3978,
"step": 5140
},
{
"epoch": 0.26009444205954396,
"grad_norm": 2.21396747445095,
"learning_rate": 7.400242448732195e-06,
"loss": 0.4145,
"step": 5150
},
{
"epoch": 0.26059947981111586,
"grad_norm": 4.1662815020101585,
"learning_rate": 7.3951914334781294e-06,
"loss": 0.3868,
"step": 5160
},
{
"epoch": 0.2611045175626878,
"grad_norm": 12.395354056627705,
"learning_rate": 7.3901404182240636e-06,
"loss": 0.4154,
"step": 5170
},
{
"epoch": 0.2616095553142597,
"grad_norm": 3.1142309464667712,
"learning_rate": 7.385089402969997e-06,
"loss": 0.413,
"step": 5180
},
{
"epoch": 0.2621145930658317,
"grad_norm": 4.513387929199787,
"learning_rate": 7.380038387715931e-06,
"loss": 0.3921,
"step": 5190
},
{
"epoch": 0.2626196308174036,
"grad_norm": 4.7697729405684735,
"learning_rate": 7.374987372461866e-06,
"loss": 0.4012,
"step": 5200
},
{
"epoch": 0.26312466856897554,
"grad_norm": 4.893393941433008,
"learning_rate": 7.3699363572078e-06,
"loss": 0.4186,
"step": 5210
},
{
"epoch": 0.26362970632054744,
"grad_norm": 3.831293276797052,
"learning_rate": 7.364885341953733e-06,
"loss": 0.4226,
"step": 5220
},
{
"epoch": 0.2641347440721194,
"grad_norm": 9.39587893739541,
"learning_rate": 7.3598343266996675e-06,
"loss": 0.4223,
"step": 5230
},
{
"epoch": 0.2646397818236913,
"grad_norm": 2.5508906516086913,
"learning_rate": 7.354783311445602e-06,
"loss": 0.4204,
"step": 5240
},
{
"epoch": 0.26514481957526326,
"grad_norm": 1.9529316193965132,
"learning_rate": 7.349732296191535e-06,
"loss": 0.4137,
"step": 5250
},
{
"epoch": 0.26564985732683516,
"grad_norm": 2.2836631555730147,
"learning_rate": 7.344681280937469e-06,
"loss": 0.4272,
"step": 5260
},
{
"epoch": 0.2661548950784071,
"grad_norm": 2.740870228644112,
"learning_rate": 7.339630265683403e-06,
"loss": 0.4042,
"step": 5270
},
{
"epoch": 0.266659932829979,
"grad_norm": 2.8335765045366057,
"learning_rate": 7.334579250429336e-06,
"loss": 0.4042,
"step": 5280
},
{
"epoch": 0.267164970581551,
"grad_norm": 6.021567811733276,
"learning_rate": 7.3295282351752705e-06,
"loss": 0.4154,
"step": 5290
},
{
"epoch": 0.2676700083331229,
"grad_norm": 3.8900529078388475,
"learning_rate": 7.324477219921205e-06,
"loss": 0.4066,
"step": 5300
},
{
"epoch": 0.26817504608469483,
"grad_norm": 4.107751019837596,
"learning_rate": 7.319426204667138e-06,
"loss": 0.4314,
"step": 5310
},
{
"epoch": 0.26868008383626674,
"grad_norm": 3.83489471259606,
"learning_rate": 7.314375189413072e-06,
"loss": 0.4289,
"step": 5320
},
{
"epoch": 0.2691851215878387,
"grad_norm": 4.1927101461017955,
"learning_rate": 7.309324174159007e-06,
"loss": 0.4104,
"step": 5330
},
{
"epoch": 0.2696901593394106,
"grad_norm": 4.40200271099351,
"learning_rate": 7.304273158904941e-06,
"loss": 0.4117,
"step": 5340
},
{
"epoch": 0.27019519709098255,
"grad_norm": 2.5719446012019347,
"learning_rate": 7.299222143650874e-06,
"loss": 0.4172,
"step": 5350
},
{
"epoch": 0.27070023484255445,
"grad_norm": 3.550169709846441,
"learning_rate": 7.2941711283968085e-06,
"loss": 0.3961,
"step": 5360
},
{
"epoch": 0.2712052725941264,
"grad_norm": 5.9366850534425994,
"learning_rate": 7.289120113142743e-06,
"loss": 0.4092,
"step": 5370
},
{
"epoch": 0.2717103103456983,
"grad_norm": 2.2429091940544756,
"learning_rate": 7.284069097888676e-06,
"loss": 0.3911,
"step": 5380
},
{
"epoch": 0.27221534809727027,
"grad_norm": 3.417557725862848,
"learning_rate": 7.27901808263461e-06,
"loss": 0.404,
"step": 5390
},
{
"epoch": 0.27272038584884223,
"grad_norm": 2.724964100715556,
"learning_rate": 7.273967067380544e-06,
"loss": 0.3954,
"step": 5400
},
{
"epoch": 0.27322542360041413,
"grad_norm": 2.856161247602667,
"learning_rate": 7.268916052126478e-06,
"loss": 0.4166,
"step": 5410
},
{
"epoch": 0.2737304613519861,
"grad_norm": 3.6154589918708724,
"learning_rate": 7.2638650368724116e-06,
"loss": 0.4194,
"step": 5420
},
{
"epoch": 0.274235499103558,
"grad_norm": 4.796628055616583,
"learning_rate": 7.258814021618346e-06,
"loss": 0.4047,
"step": 5430
},
{
"epoch": 0.27474053685512995,
"grad_norm": 1.8189441498839218,
"learning_rate": 7.25376300636428e-06,
"loss": 0.427,
"step": 5440
},
{
"epoch": 0.27524557460670185,
"grad_norm": 7.024511830423675,
"learning_rate": 7.248711991110213e-06,
"loss": 0.4046,
"step": 5450
},
{
"epoch": 0.2757506123582738,
"grad_norm": 3.5667517176775423,
"learning_rate": 7.243660975856147e-06,
"loss": 0.4104,
"step": 5460
},
{
"epoch": 0.2762556501098457,
"grad_norm": 2.9842087732377456,
"learning_rate": 7.238609960602082e-06,
"loss": 0.3865,
"step": 5470
},
{
"epoch": 0.27676068786141766,
"grad_norm": 3.138211814542453,
"learning_rate": 7.233558945348016e-06,
"loss": 0.421,
"step": 5480
},
{
"epoch": 0.27726572561298957,
"grad_norm": 3.7899763794613603,
"learning_rate": 7.22850793009395e-06,
"loss": 0.4211,
"step": 5490
},
{
"epoch": 0.2777707633645615,
"grad_norm": 6.302295488602417,
"learning_rate": 7.223456914839884e-06,
"loss": 0.414,
"step": 5500
},
{
"epoch": 0.2782758011161334,
"grad_norm": 10.35490362594059,
"learning_rate": 7.218405899585818e-06,
"loss": 0.416,
"step": 5510
},
{
"epoch": 0.2787808388677054,
"grad_norm": 6.310469266434071,
"learning_rate": 7.213354884331751e-06,
"loss": 0.4064,
"step": 5520
},
{
"epoch": 0.2792858766192773,
"grad_norm": 16.515977602019493,
"learning_rate": 7.208303869077685e-06,
"loss": 0.4079,
"step": 5530
},
{
"epoch": 0.27979091437084924,
"grad_norm": 4.823535989192853,
"learning_rate": 7.203252853823619e-06,
"loss": 0.4057,
"step": 5540
},
{
"epoch": 0.28029595212242114,
"grad_norm": 5.510784263870791,
"learning_rate": 7.198201838569553e-06,
"loss": 0.4037,
"step": 5550
},
{
"epoch": 0.2808009898739931,
"grad_norm": 6.358367429720183,
"learning_rate": 7.193150823315487e-06,
"loss": 0.4229,
"step": 5560
},
{
"epoch": 0.281306027625565,
"grad_norm": 6.411947776097414,
"learning_rate": 7.188099808061421e-06,
"loss": 0.4067,
"step": 5570
},
{
"epoch": 0.28181106537713696,
"grad_norm": 7.711569114064788,
"learning_rate": 7.183048792807354e-06,
"loss": 0.4033,
"step": 5580
},
{
"epoch": 0.28231610312870886,
"grad_norm": 7.577084197751842,
"learning_rate": 7.177997777553288e-06,
"loss": 0.4193,
"step": 5590
},
{
"epoch": 0.2828211408802808,
"grad_norm": 8.359668853331197,
"learning_rate": 7.172946762299223e-06,
"loss": 0.4075,
"step": 5600
},
{
"epoch": 0.2833261786318527,
"grad_norm": 3.151405334238333,
"learning_rate": 7.167895747045157e-06,
"loss": 0.3968,
"step": 5610
},
{
"epoch": 0.2838312163834247,
"grad_norm": 5.148673338465559,
"learning_rate": 7.162844731791091e-06,
"loss": 0.4084,
"step": 5620
},
{
"epoch": 0.2843362541349966,
"grad_norm": 4.221476129767122,
"learning_rate": 7.157793716537025e-06,
"loss": 0.4171,
"step": 5630
},
{
"epoch": 0.28484129188656854,
"grad_norm": 13.100880512181337,
"learning_rate": 7.152742701282959e-06,
"loss": 0.4153,
"step": 5640
},
{
"epoch": 0.28534632963814044,
"grad_norm": 4.5479841682511735,
"learning_rate": 7.147691686028892e-06,
"loss": 0.4008,
"step": 5650
},
{
"epoch": 0.2858513673897124,
"grad_norm": 12.774978173115935,
"learning_rate": 7.142640670774826e-06,
"loss": 0.4081,
"step": 5660
},
{
"epoch": 0.2863564051412843,
"grad_norm": 4.74811206992399,
"learning_rate": 7.1375896555207604e-06,
"loss": 0.4111,
"step": 5670
},
{
"epoch": 0.28686144289285626,
"grad_norm": 6.322992937888857,
"learning_rate": 7.132538640266694e-06,
"loss": 0.4147,
"step": 5680
},
{
"epoch": 0.28736648064442816,
"grad_norm": 2.3879730307069913,
"learning_rate": 7.127487625012628e-06,
"loss": 0.413,
"step": 5690
},
{
"epoch": 0.2878715183960001,
"grad_norm": 9.78190300595279,
"learning_rate": 7.122436609758562e-06,
"loss": 0.4082,
"step": 5700
},
{
"epoch": 0.288376556147572,
"grad_norm": 2.9229120102524546,
"learning_rate": 7.117385594504495e-06,
"loss": 0.3991,
"step": 5710
},
{
"epoch": 0.288881593899144,
"grad_norm": 3.0629100552949367,
"learning_rate": 7.112334579250429e-06,
"loss": 0.4219,
"step": 5720
},
{
"epoch": 0.2893866316507159,
"grad_norm": 4.461418623396228,
"learning_rate": 7.1072835639963635e-06,
"loss": 0.3935,
"step": 5730
},
{
"epoch": 0.28989166940228783,
"grad_norm": 4.158346788895872,
"learning_rate": 7.1022325487422985e-06,
"loss": 0.4055,
"step": 5740
},
{
"epoch": 0.29039670715385973,
"grad_norm": 4.423297022005039,
"learning_rate": 7.097181533488232e-06,
"loss": 0.4056,
"step": 5750
},
{
"epoch": 0.2909017449054317,
"grad_norm": 9.748691220694795,
"learning_rate": 7.092130518234166e-06,
"loss": 0.4163,
"step": 5760
},
{
"epoch": 0.2914067826570036,
"grad_norm": 5.583094724720717,
"learning_rate": 7.0870795029801e-06,
"loss": 0.404,
"step": 5770
},
{
"epoch": 0.29191182040857555,
"grad_norm": 5.4361132596130375,
"learning_rate": 7.082028487726034e-06,
"loss": 0.4077,
"step": 5780
},
{
"epoch": 0.29241685816014745,
"grad_norm": 5.4795736369075225,
"learning_rate": 7.076977472471967e-06,
"loss": 0.385,
"step": 5790
},
{
"epoch": 0.2929218959117194,
"grad_norm": 6.295238304978109,
"learning_rate": 7.0719264572179015e-06,
"loss": 0.3865,
"step": 5800
},
{
"epoch": 0.2934269336632913,
"grad_norm": 6.992767843962523,
"learning_rate": 7.066875441963836e-06,
"loss": 0.4016,
"step": 5810
},
{
"epoch": 0.29393197141486327,
"grad_norm": 2.2081931951077762,
"learning_rate": 7.061824426709769e-06,
"loss": 0.4177,
"step": 5820
},
{
"epoch": 0.29443700916643517,
"grad_norm": 2.6034285969774755,
"learning_rate": 7.056773411455703e-06,
"loss": 0.4259,
"step": 5830
},
{
"epoch": 0.29494204691800713,
"grad_norm": 3.409449485576021,
"learning_rate": 7.051722396201637e-06,
"loss": 0.4033,
"step": 5840
},
{
"epoch": 0.29544708466957903,
"grad_norm": 4.0667424482713415,
"learning_rate": 7.04667138094757e-06,
"loss": 0.3996,
"step": 5850
},
{
"epoch": 0.295952122421151,
"grad_norm": 10.124949019281052,
"learning_rate": 7.0416203656935045e-06,
"loss": 0.4006,
"step": 5860
},
{
"epoch": 0.2964571601727229,
"grad_norm": 14.286058521059143,
"learning_rate": 7.0365693504394395e-06,
"loss": 0.4095,
"step": 5870
},
{
"epoch": 0.29696219792429485,
"grad_norm": 6.367565010187276,
"learning_rate": 7.031518335185374e-06,
"loss": 0.3838,
"step": 5880
},
{
"epoch": 0.29746723567586675,
"grad_norm": 5.583399625565326,
"learning_rate": 7.026467319931307e-06,
"loss": 0.4117,
"step": 5890
},
{
"epoch": 0.2979722734274387,
"grad_norm": 3.0731137084379534,
"learning_rate": 7.021416304677241e-06,
"loss": 0.4236,
"step": 5900
},
{
"epoch": 0.2984773111790106,
"grad_norm": 3.0663411110006646,
"learning_rate": 7.016365289423175e-06,
"loss": 0.4053,
"step": 5910
},
{
"epoch": 0.29898234893058256,
"grad_norm": 2.750574161212685,
"learning_rate": 7.0113142741691084e-06,
"loss": 0.4068,
"step": 5920
},
{
"epoch": 0.29948738668215447,
"grad_norm": 6.009688968198948,
"learning_rate": 7.0062632589150426e-06,
"loss": 0.3836,
"step": 5930
},
{
"epoch": 0.2999924244337264,
"grad_norm": 2.9753632251149047,
"learning_rate": 7.001212243660977e-06,
"loss": 0.4004,
"step": 5940
},
{
"epoch": 0.3004974621852983,
"grad_norm": 4.208610201729938,
"learning_rate": 6.99616122840691e-06,
"loss": 0.4076,
"step": 5950
},
{
"epoch": 0.3010024999368703,
"grad_norm": 4.7205159244344435,
"learning_rate": 6.991110213152844e-06,
"loss": 0.4169,
"step": 5960
},
{
"epoch": 0.3015075376884422,
"grad_norm": 5.1888963188216914,
"learning_rate": 6.986059197898778e-06,
"loss": 0.4131,
"step": 5970
},
{
"epoch": 0.30201257544001414,
"grad_norm": 5.94229402465264,
"learning_rate": 6.9810081826447115e-06,
"loss": 0.3973,
"step": 5980
},
{
"epoch": 0.30251761319158604,
"grad_norm": 5.525705257458754,
"learning_rate": 6.975957167390646e-06,
"loss": 0.4055,
"step": 5990
},
{
"epoch": 0.303022650943158,
"grad_norm": 12.459125129727365,
"learning_rate": 6.97090615213658e-06,
"loss": 0.3971,
"step": 6000
},
{
"epoch": 0.30352768869472996,
"grad_norm": 31.656872346789477,
"learning_rate": 6.965855136882515e-06,
"loss": 0.407,
"step": 6010
},
{
"epoch": 0.30403272644630186,
"grad_norm": 9.639606695045442,
"learning_rate": 6.960804121628448e-06,
"loss": 0.3863,
"step": 6020
},
{
"epoch": 0.3045377641978738,
"grad_norm": 7.035557345160716,
"learning_rate": 6.955753106374382e-06,
"loss": 0.3869,
"step": 6030
},
{
"epoch": 0.3050428019494457,
"grad_norm": 4.9371125965383165,
"learning_rate": 6.950702091120316e-06,
"loss": 0.3976,
"step": 6040
},
{
"epoch": 0.3055478397010177,
"grad_norm": 3.933897990218482,
"learning_rate": 6.9456510758662495e-06,
"loss": 0.3897,
"step": 6050
},
{
"epoch": 0.3060528774525896,
"grad_norm": 3.1641621883370337,
"learning_rate": 6.940600060612184e-06,
"loss": 0.3907,
"step": 6060
},
{
"epoch": 0.30655791520416154,
"grad_norm": 8.063219971247296,
"learning_rate": 6.935549045358118e-06,
"loss": 0.4031,
"step": 6070
},
{
"epoch": 0.30706295295573344,
"grad_norm": 5.068712111699617,
"learning_rate": 6.930498030104051e-06,
"loss": 0.3669,
"step": 6080
},
{
"epoch": 0.3075679907073054,
"grad_norm": 2.659761406451919,
"learning_rate": 6.925447014849985e-06,
"loss": 0.4083,
"step": 6090
},
{
"epoch": 0.3080730284588773,
"grad_norm": 2.711900853734696,
"learning_rate": 6.920395999595919e-06,
"loss": 0.3857,
"step": 6100
},
{
"epoch": 0.30857806621044925,
"grad_norm": 3.5691023416952015,
"learning_rate": 6.9153449843418526e-06,
"loss": 0.4075,
"step": 6110
},
{
"epoch": 0.30908310396202116,
"grad_norm": 9.44528498007685,
"learning_rate": 6.910293969087787e-06,
"loss": 0.3907,
"step": 6120
},
{
"epoch": 0.3095881417135931,
"grad_norm": 6.630952946299743,
"learning_rate": 6.905242953833721e-06,
"loss": 0.4166,
"step": 6130
},
{
"epoch": 0.310093179465165,
"grad_norm": 2.240750009167658,
"learning_rate": 6.900191938579655e-06,
"loss": 0.4103,
"step": 6140
},
{
"epoch": 0.31059821721673697,
"grad_norm": 3.4596094945674865,
"learning_rate": 6.89514092332559e-06,
"loss": 0.3979,
"step": 6150
},
{
"epoch": 0.3111032549683089,
"grad_norm": 3.4198773372663984,
"learning_rate": 6.890089908071523e-06,
"loss": 0.4131,
"step": 6160
},
{
"epoch": 0.31160829271988083,
"grad_norm": 2.6133323586176163,
"learning_rate": 6.885038892817457e-06,
"loss": 0.3966,
"step": 6170
},
{
"epoch": 0.31211333047145273,
"grad_norm": 2.7898977693699667,
"learning_rate": 6.879987877563391e-06,
"loss": 0.4005,
"step": 6180
},
{
"epoch": 0.3126183682230247,
"grad_norm": 15.014756094180136,
"learning_rate": 6.874936862309325e-06,
"loss": 0.417,
"step": 6190
},
{
"epoch": 0.3131234059745966,
"grad_norm": 3.877839859697261,
"learning_rate": 6.869885847055259e-06,
"loss": 0.3967,
"step": 6200
},
{
"epoch": 0.31362844372616855,
"grad_norm": 3.642380487638742,
"learning_rate": 6.864834831801193e-06,
"loss": 0.3872,
"step": 6210
},
{
"epoch": 0.31413348147774045,
"grad_norm": 6.1392539662766055,
"learning_rate": 6.859783816547126e-06,
"loss": 0.3969,
"step": 6220
},
{
"epoch": 0.3146385192293124,
"grad_norm": 2.8146690736323206,
"learning_rate": 6.85473280129306e-06,
"loss": 0.4129,
"step": 6230
},
{
"epoch": 0.3151435569808843,
"grad_norm": 4.058997537956916,
"learning_rate": 6.8496817860389945e-06,
"loss": 0.4015,
"step": 6240
},
{
"epoch": 0.31564859473245627,
"grad_norm": 5.742551004803781,
"learning_rate": 6.844630770784928e-06,
"loss": 0.3953,
"step": 6250
},
{
"epoch": 0.31615363248402817,
"grad_norm": 15.598787015706185,
"learning_rate": 6.839579755530862e-06,
"loss": 0.4031,
"step": 6260
},
{
"epoch": 0.3166586702356001,
"grad_norm": 5.020417282923178,
"learning_rate": 6.834528740276796e-06,
"loss": 0.4035,
"step": 6270
},
{
"epoch": 0.317163707987172,
"grad_norm": 4.667007823216816,
"learning_rate": 6.829477725022731e-06,
"loss": 0.406,
"step": 6280
},
{
"epoch": 0.317668745738744,
"grad_norm": 7.463699952057543,
"learning_rate": 6.824426709768664e-06,
"loss": 0.4108,
"step": 6290
},
{
"epoch": 0.3181737834903159,
"grad_norm": 4.262657782964267,
"learning_rate": 6.819375694514598e-06,
"loss": 0.3974,
"step": 6300
},
{
"epoch": 0.31867882124188784,
"grad_norm": 3.3264387869397862,
"learning_rate": 6.8143246792605325e-06,
"loss": 0.3952,
"step": 6310
},
{
"epoch": 0.31918385899345975,
"grad_norm": 12.90071142768197,
"learning_rate": 6.809273664006466e-06,
"loss": 0.3821,
"step": 6320
},
{
"epoch": 0.3196888967450317,
"grad_norm": 6.103897285160824,
"learning_rate": 6.8042226487524e-06,
"loss": 0.387,
"step": 6330
},
{
"epoch": 0.3201939344966036,
"grad_norm": 5.664255551154975,
"learning_rate": 6.799171633498334e-06,
"loss": 0.3939,
"step": 6340
},
{
"epoch": 0.32069897224817556,
"grad_norm": 3.9983006490372746,
"learning_rate": 6.794120618244267e-06,
"loss": 0.4026,
"step": 6350
},
{
"epoch": 0.32120400999974746,
"grad_norm": 4.502616368454671,
"learning_rate": 6.789069602990201e-06,
"loss": 0.3992,
"step": 6360
},
{
"epoch": 0.3217090477513194,
"grad_norm": 6.834315364764452,
"learning_rate": 6.7840185877361355e-06,
"loss": 0.4131,
"step": 6370
},
{
"epoch": 0.3222140855028913,
"grad_norm": 6.26718953681543,
"learning_rate": 6.778967572482069e-06,
"loss": 0.4066,
"step": 6380
},
{
"epoch": 0.3227191232544633,
"grad_norm": 7.341432352989431,
"learning_rate": 6.773916557228003e-06,
"loss": 0.3883,
"step": 6390
},
{
"epoch": 0.3232241610060352,
"grad_norm": 11.668272167662552,
"learning_rate": 6.768865541973937e-06,
"loss": 0.415,
"step": 6400
},
{
"epoch": 0.32372919875760714,
"grad_norm": 18.632730593364624,
"learning_rate": 6.76381452671987e-06,
"loss": 0.4084,
"step": 6410
},
{
"epoch": 0.32423423650917904,
"grad_norm": 9.46350511552533,
"learning_rate": 6.758763511465805e-06,
"loss": 0.3807,
"step": 6420
},
{
"epoch": 0.324739274260751,
"grad_norm": 4.410820137502103,
"learning_rate": 6.7537124962117394e-06,
"loss": 0.3844,
"step": 6430
},
{
"epoch": 0.3252443120123229,
"grad_norm": 9.07873183264923,
"learning_rate": 6.7486614809576736e-06,
"loss": 0.3902,
"step": 6440
},
{
"epoch": 0.32574934976389486,
"grad_norm": 17.65425817976251,
"learning_rate": 6.743610465703607e-06,
"loss": 0.3902,
"step": 6450
},
{
"epoch": 0.32625438751546676,
"grad_norm": 4.109418188768749,
"learning_rate": 6.738559450449541e-06,
"loss": 0.391,
"step": 6460
},
{
"epoch": 0.3267594252670387,
"grad_norm": 4.071871347874285,
"learning_rate": 6.733508435195475e-06,
"loss": 0.4067,
"step": 6470
},
{
"epoch": 0.3272644630186106,
"grad_norm": 4.833923669112235,
"learning_rate": 6.728457419941408e-06,
"loss": 0.3869,
"step": 6480
},
{
"epoch": 0.3277695007701826,
"grad_norm": 5.226542205736505,
"learning_rate": 6.7234064046873425e-06,
"loss": 0.3983,
"step": 6490
},
{
"epoch": 0.3282745385217545,
"grad_norm": 14.955066893117705,
"learning_rate": 6.718355389433277e-06,
"loss": 0.3892,
"step": 6500
},
{
"epoch": 0.32877957627332643,
"grad_norm": 9.826109172231558,
"learning_rate": 6.713304374179211e-06,
"loss": 0.3849,
"step": 6510
},
{
"epoch": 0.32928461402489834,
"grad_norm": 10.365177617347678,
"learning_rate": 6.708253358925144e-06,
"loss": 0.3909,
"step": 6520
},
{
"epoch": 0.3297896517764703,
"grad_norm": 3.902511753305759,
"learning_rate": 6.703202343671078e-06,
"loss": 0.3894,
"step": 6530
},
{
"epoch": 0.3302946895280422,
"grad_norm": 2.5389813606137,
"learning_rate": 6.698151328417012e-06,
"loss": 0.3947,
"step": 6540
},
{
"epoch": 0.33079972727961415,
"grad_norm": 3.076985557483421,
"learning_rate": 6.693100313162947e-06,
"loss": 0.385,
"step": 6550
},
{
"epoch": 0.33130476503118605,
"grad_norm": 3.9649750433361164,
"learning_rate": 6.6880492979088805e-06,
"loss": 0.3904,
"step": 6560
},
{
"epoch": 0.331809802782758,
"grad_norm": 11.994320837772984,
"learning_rate": 6.682998282654815e-06,
"loss": 0.3983,
"step": 6570
},
{
"epoch": 0.3323148405343299,
"grad_norm": 7.214021116152274,
"learning_rate": 6.677947267400749e-06,
"loss": 0.3978,
"step": 6580
},
{
"epoch": 0.33281987828590187,
"grad_norm": 2.6026929269364727,
"learning_rate": 6.672896252146682e-06,
"loss": 0.3775,
"step": 6590
},
{
"epoch": 0.33332491603747383,
"grad_norm": 13.22492686543516,
"learning_rate": 6.667845236892616e-06,
"loss": 0.3872,
"step": 6600
},
{
"epoch": 0.33382995378904573,
"grad_norm": 3.4887621572574443,
"learning_rate": 6.66279422163855e-06,
"loss": 0.3961,
"step": 6610
},
{
"epoch": 0.3343349915406177,
"grad_norm": 4.75022907961459,
"learning_rate": 6.6577432063844835e-06,
"loss": 0.4064,
"step": 6620
},
{
"epoch": 0.3348400292921896,
"grad_norm": 3.0134556897490876,
"learning_rate": 6.652692191130418e-06,
"loss": 0.4082,
"step": 6630
},
{
"epoch": 0.33534506704376155,
"grad_norm": 6.134839516437559,
"learning_rate": 6.647641175876352e-06,
"loss": 0.3807,
"step": 6640
},
{
"epoch": 0.33585010479533345,
"grad_norm": 2.967962385112729,
"learning_rate": 6.642590160622285e-06,
"loss": 0.389,
"step": 6650
},
{
"epoch": 0.3363551425469054,
"grad_norm": 2.530280284171402,
"learning_rate": 6.637539145368219e-06,
"loss": 0.3966,
"step": 6660
},
{
"epoch": 0.3368601802984773,
"grad_norm": 5.8276867252853455,
"learning_rate": 6.632488130114153e-06,
"loss": 0.3899,
"step": 6670
},
{
"epoch": 0.33736521805004926,
"grad_norm": 11.32729799857443,
"learning_rate": 6.627437114860087e-06,
"loss": 0.3977,
"step": 6680
},
{
"epoch": 0.33787025580162117,
"grad_norm": 4.636057747530336,
"learning_rate": 6.6223860996060216e-06,
"loss": 0.3879,
"step": 6690
},
{
"epoch": 0.3383752935531931,
"grad_norm": 3.644150608144144,
"learning_rate": 6.617335084351956e-06,
"loss": 0.4062,
"step": 6700
},
{
"epoch": 0.338880331304765,
"grad_norm": 9.355537955116619,
"learning_rate": 6.61228406909789e-06,
"loss": 0.3866,
"step": 6710
},
{
"epoch": 0.339385369056337,
"grad_norm": 2.4262554363736517,
"learning_rate": 6.607233053843823e-06,
"loss": 0.4244,
"step": 6720
},
{
"epoch": 0.3398904068079089,
"grad_norm": 4.581445110742895,
"learning_rate": 6.602182038589757e-06,
"loss": 0.3843,
"step": 6730
},
{
"epoch": 0.34039544455948084,
"grad_norm": 2.4362996437893902,
"learning_rate": 6.597131023335691e-06,
"loss": 0.3948,
"step": 6740
},
{
"epoch": 0.34090048231105274,
"grad_norm": 4.360969619561423,
"learning_rate": 6.592080008081625e-06,
"loss": 0.3861,
"step": 6750
},
{
"epoch": 0.3414055200626247,
"grad_norm": 3.2345867404100423,
"learning_rate": 6.587028992827559e-06,
"loss": 0.422,
"step": 6760
},
{
"epoch": 0.3419105578141966,
"grad_norm": 5.310935386991515,
"learning_rate": 6.581977977573493e-06,
"loss": 0.4137,
"step": 6770
},
{
"epoch": 0.34241559556576856,
"grad_norm": 10.95834322260054,
"learning_rate": 6.576926962319426e-06,
"loss": 0.4002,
"step": 6780
},
{
"epoch": 0.34292063331734046,
"grad_norm": 7.178923820806059,
"learning_rate": 6.57187594706536e-06,
"loss": 0.3771,
"step": 6790
},
{
"epoch": 0.3434256710689124,
"grad_norm": 16.529256071560802,
"learning_rate": 6.566824931811294e-06,
"loss": 0.4129,
"step": 6800
},
{
"epoch": 0.3439307088204843,
"grad_norm": 3.673940050614789,
"learning_rate": 6.561773916557228e-06,
"loss": 0.4034,
"step": 6810
},
{
"epoch": 0.3444357465720563,
"grad_norm": 2.9280965882270404,
"learning_rate": 6.556722901303163e-06,
"loss": 0.3892,
"step": 6820
},
{
"epoch": 0.3449407843236282,
"grad_norm": 93.05414347131021,
"learning_rate": 6.551671886049097e-06,
"loss": 0.4061,
"step": 6830
},
{
"epoch": 0.34544582207520014,
"grad_norm": 4.709615772022943,
"learning_rate": 6.546620870795031e-06,
"loss": 0.3948,
"step": 6840
},
{
"epoch": 0.34595085982677204,
"grad_norm": 4.852654627656968,
"learning_rate": 6.541569855540965e-06,
"loss": 0.3938,
"step": 6850
},
{
"epoch": 0.346455897578344,
"grad_norm": 4.816390805815548,
"learning_rate": 6.536518840286898e-06,
"loss": 0.3792,
"step": 6860
},
{
"epoch": 0.3469609353299159,
"grad_norm": 3.6013276651698374,
"learning_rate": 6.531467825032832e-06,
"loss": 0.4012,
"step": 6870
},
{
"epoch": 0.34746597308148786,
"grad_norm": 3.4921009820598576,
"learning_rate": 6.5264168097787665e-06,
"loss": 0.4083,
"step": 6880
},
{
"epoch": 0.34797101083305976,
"grad_norm": 6.61814343879125,
"learning_rate": 6.5213657945247e-06,
"loss": 0.4049,
"step": 6890
},
{
"epoch": 0.3484760485846317,
"grad_norm": 4.052623448201924,
"learning_rate": 6.516314779270634e-06,
"loss": 0.388,
"step": 6900
},
{
"epoch": 0.3489810863362036,
"grad_norm": 3.63955593340056,
"learning_rate": 6.511263764016568e-06,
"loss": 0.4061,
"step": 6910
},
{
"epoch": 0.3494861240877756,
"grad_norm": 4.150263872578294,
"learning_rate": 6.506212748762501e-06,
"loss": 0.4054,
"step": 6920
},
{
"epoch": 0.3499911618393475,
"grad_norm": 7.266970150410544,
"learning_rate": 6.5011617335084354e-06,
"loss": 0.3793,
"step": 6930
},
{
"epoch": 0.35049619959091943,
"grad_norm": 7.712101909778269,
"learning_rate": 6.4961107182543696e-06,
"loss": 0.3994,
"step": 6940
},
{
"epoch": 0.35100123734249133,
"grad_norm": 5.509148276281905,
"learning_rate": 6.491059703000303e-06,
"loss": 0.3781,
"step": 6950
},
{
"epoch": 0.3515062750940633,
"grad_norm": 2.589095673859192,
"learning_rate": 6.486008687746238e-06,
"loss": 0.3978,
"step": 6960
},
{
"epoch": 0.3520113128456352,
"grad_norm": 4.122842581672093,
"learning_rate": 6.480957672492172e-06,
"loss": 0.4009,
"step": 6970
},
{
"epoch": 0.35251635059720715,
"grad_norm": 4.1842800217172265,
"learning_rate": 6.475906657238106e-06,
"loss": 0.4029,
"step": 6980
},
{
"epoch": 0.35302138834877905,
"grad_norm": 7.398626485810365,
"learning_rate": 6.470855641984039e-06,
"loss": 0.4078,
"step": 6990
},
{
"epoch": 0.353526426100351,
"grad_norm": 5.396774144943605,
"learning_rate": 6.4658046267299735e-06,
"loss": 0.4059,
"step": 7000
},
{
"epoch": 0.3540314638519229,
"grad_norm": 13.281878929001232,
"learning_rate": 6.460753611475908e-06,
"loss": 0.4024,
"step": 7010
},
{
"epoch": 0.35453650160349487,
"grad_norm": 4.283305275547664,
"learning_rate": 6.455702596221841e-06,
"loss": 0.4108,
"step": 7020
},
{
"epoch": 0.35504153935506677,
"grad_norm": 3.8392385244226794,
"learning_rate": 6.450651580967775e-06,
"loss": 0.4227,
"step": 7030
},
{
"epoch": 0.35554657710663873,
"grad_norm": 5.012629063346216,
"learning_rate": 6.445600565713709e-06,
"loss": 0.4071,
"step": 7040
},
{
"epoch": 0.35605161485821063,
"grad_norm": 13.322782620701876,
"learning_rate": 6.440549550459642e-06,
"loss": 0.3926,
"step": 7050
},
{
"epoch": 0.3565566526097826,
"grad_norm": 5.149647425656023,
"learning_rate": 6.4354985352055765e-06,
"loss": 0.4017,
"step": 7060
},
{
"epoch": 0.3570616903613545,
"grad_norm": 7.5875779832186065,
"learning_rate": 6.430447519951511e-06,
"loss": 0.3867,
"step": 7070
},
{
"epoch": 0.35756672811292645,
"grad_norm": 3.433655269776608,
"learning_rate": 6.425396504697444e-06,
"loss": 0.3788,
"step": 7080
},
{
"epoch": 0.35807176586449835,
"grad_norm": 4.332715392495331,
"learning_rate": 6.420345489443378e-06,
"loss": 0.3672,
"step": 7090
},
{
"epoch": 0.3585768036160703,
"grad_norm": 8.825136973366781,
"learning_rate": 6.415294474189313e-06,
"loss": 0.391,
"step": 7100
},
{
"epoch": 0.3590818413676422,
"grad_norm": 4.896756858317604,
"learning_rate": 6.410243458935247e-06,
"loss": 0.3981,
"step": 7110
},
{
"epoch": 0.35958687911921416,
"grad_norm": 3.0181643910861595,
"learning_rate": 6.40519244368118e-06,
"loss": 0.3998,
"step": 7120
},
{
"epoch": 0.36009191687078607,
"grad_norm": 2.5429084560847346,
"learning_rate": 6.4001414284271145e-06,
"loss": 0.3973,
"step": 7130
},
{
"epoch": 0.360596954622358,
"grad_norm": 4.518411589704886,
"learning_rate": 6.395090413173049e-06,
"loss": 0.3921,
"step": 7140
},
{
"epoch": 0.3611019923739299,
"grad_norm": 2.520647494438761,
"learning_rate": 6.390039397918982e-06,
"loss": 0.3838,
"step": 7150
},
{
"epoch": 0.3616070301255019,
"grad_norm": 5.45309379468731,
"learning_rate": 6.384988382664916e-06,
"loss": 0.3966,
"step": 7160
},
{
"epoch": 0.3621120678770738,
"grad_norm": 5.822220380763848,
"learning_rate": 6.37993736741085e-06,
"loss": 0.4041,
"step": 7170
},
{
"epoch": 0.36261710562864574,
"grad_norm": 4.920376454170573,
"learning_rate": 6.3748863521567835e-06,
"loss": 0.3994,
"step": 7180
},
{
"epoch": 0.3631221433802177,
"grad_norm": 2.5736259947121707,
"learning_rate": 6.369835336902718e-06,
"loss": 0.3944,
"step": 7190
},
{
"epoch": 0.3636271811317896,
"grad_norm": 2.749872482951078,
"learning_rate": 6.364784321648652e-06,
"loss": 0.3985,
"step": 7200
},
{
"epoch": 0.36413221888336156,
"grad_norm": 2.5194888325972267,
"learning_rate": 6.359733306394586e-06,
"loss": 0.3939,
"step": 7210
},
{
"epoch": 0.36463725663493346,
"grad_norm": 3.574666468822497,
"learning_rate": 6.354682291140519e-06,
"loss": 0.4234,
"step": 7220
},
{
"epoch": 0.3651422943865054,
"grad_norm": 3.0608012516572325,
"learning_rate": 6.349631275886454e-06,
"loss": 0.3894,
"step": 7230
},
{
"epoch": 0.3656473321380773,
"grad_norm": 2.888576087403178,
"learning_rate": 6.344580260632388e-06,
"loss": 0.4149,
"step": 7240
},
{
"epoch": 0.3661523698896493,
"grad_norm": 2.6426553837219378,
"learning_rate": 6.339529245378322e-06,
"loss": 0.3788,
"step": 7250
},
{
"epoch": 0.3666574076412212,
"grad_norm": 8.704787479012706,
"learning_rate": 6.334478230124256e-06,
"loss": 0.4047,
"step": 7260
},
{
"epoch": 0.36716244539279314,
"grad_norm": 2.497691163861801,
"learning_rate": 6.32942721487019e-06,
"loss": 0.3959,
"step": 7270
},
{
"epoch": 0.36766748314436504,
"grad_norm": 2.8835395006733306,
"learning_rate": 6.324376199616124e-06,
"loss": 0.3964,
"step": 7280
},
{
"epoch": 0.368172520895937,
"grad_norm": 4.011809451507155,
"learning_rate": 6.319325184362057e-06,
"loss": 0.4196,
"step": 7290
},
{
"epoch": 0.3686775586475089,
"grad_norm": 9.138568921825629,
"learning_rate": 6.314274169107991e-06,
"loss": 0.4075,
"step": 7300
},
{
"epoch": 0.36918259639908085,
"grad_norm": 2.6137236844527676,
"learning_rate": 6.309223153853925e-06,
"loss": 0.3967,
"step": 7310
},
{
"epoch": 0.36968763415065276,
"grad_norm": 5.521296938399183,
"learning_rate": 6.304172138599859e-06,
"loss": 0.4005,
"step": 7320
},
{
"epoch": 0.3701926719022247,
"grad_norm": 7.608318289570054,
"learning_rate": 6.299121123345793e-06,
"loss": 0.4103,
"step": 7330
},
{
"epoch": 0.3706977096537966,
"grad_norm": 3.952351490468356,
"learning_rate": 6.294070108091727e-06,
"loss": 0.3955,
"step": 7340
},
{
"epoch": 0.37120274740536857,
"grad_norm": 3.2275756395096367,
"learning_rate": 6.28901909283766e-06,
"loss": 0.3691,
"step": 7350
},
{
"epoch": 0.3717077851569405,
"grad_norm": 5.1929124366289185,
"learning_rate": 6.283968077583594e-06,
"loss": 0.4109,
"step": 7360
},
{
"epoch": 0.37221282290851243,
"grad_norm": 3.7102445761239147,
"learning_rate": 6.278917062329529e-06,
"loss": 0.3855,
"step": 7370
},
{
"epoch": 0.37271786066008433,
"grad_norm": 4.102719494934126,
"learning_rate": 6.273866047075463e-06,
"loss": 0.4085,
"step": 7380
},
{
"epoch": 0.3732228984116563,
"grad_norm": 2.882775662800612,
"learning_rate": 6.268815031821397e-06,
"loss": 0.3949,
"step": 7390
},
{
"epoch": 0.3737279361632282,
"grad_norm": 3.847808178042115,
"learning_rate": 6.263764016567331e-06,
"loss": 0.3893,
"step": 7400
},
{
"epoch": 0.37423297391480015,
"grad_norm": 11.984324118193372,
"learning_rate": 6.258713001313265e-06,
"loss": 0.376,
"step": 7410
},
{
"epoch": 0.37473801166637205,
"grad_norm": 3.3001953512413897,
"learning_rate": 6.253661986059198e-06,
"loss": 0.3823,
"step": 7420
},
{
"epoch": 0.375243049417944,
"grad_norm": 5.373513774406177,
"learning_rate": 6.248610970805132e-06,
"loss": 0.4099,
"step": 7430
},
{
"epoch": 0.3757480871695159,
"grad_norm": 2.1585330518152492,
"learning_rate": 6.2435599555510664e-06,
"loss": 0.3977,
"step": 7440
},
{
"epoch": 0.37625312492108787,
"grad_norm": 5.400683846446516,
"learning_rate": 6.238508940297e-06,
"loss": 0.3917,
"step": 7450
},
{
"epoch": 0.37675816267265977,
"grad_norm": 11.58546846012599,
"learning_rate": 6.233457925042934e-06,
"loss": 0.4104,
"step": 7460
},
{
"epoch": 0.3772632004242317,
"grad_norm": 6.158451538704526,
"learning_rate": 6.228406909788868e-06,
"loss": 0.3817,
"step": 7470
},
{
"epoch": 0.37776823817580363,
"grad_norm": 3.220415459261749,
"learning_rate": 6.223355894534801e-06,
"loss": 0.4143,
"step": 7480
},
{
"epoch": 0.3782732759273756,
"grad_norm": 6.483927128344251,
"learning_rate": 6.218304879280735e-06,
"loss": 0.4009,
"step": 7490
},
{
"epoch": 0.3787783136789475,
"grad_norm": 3.1374527379369628,
"learning_rate": 6.21325386402667e-06,
"loss": 0.3825,
"step": 7500
},
{
"epoch": 0.37928335143051944,
"grad_norm": 1.9293391844723216,
"learning_rate": 6.2082028487726045e-06,
"loss": 0.3986,
"step": 7510
},
{
"epoch": 0.37978838918209135,
"grad_norm": 2.8612302362457704,
"learning_rate": 6.203151833518538e-06,
"loss": 0.4093,
"step": 7520
},
{
"epoch": 0.3802934269336633,
"grad_norm": 27.52073154576333,
"learning_rate": 6.198100818264472e-06,
"loss": 0.41,
"step": 7530
},
{
"epoch": 0.3807984646852352,
"grad_norm": 18.495429795467157,
"learning_rate": 6.193049803010406e-06,
"loss": 0.3918,
"step": 7540
},
{
"epoch": 0.38130350243680716,
"grad_norm": 1.9149364107527063,
"learning_rate": 6.187998787756339e-06,
"loss": 0.4059,
"step": 7550
},
{
"epoch": 0.38180854018837906,
"grad_norm": 3.265575132967205,
"learning_rate": 6.182947772502273e-06,
"loss": 0.3761,
"step": 7560
},
{
"epoch": 0.382313577939951,
"grad_norm": 2.9358960635943485,
"learning_rate": 6.1778967572482075e-06,
"loss": 0.3868,
"step": 7570
},
{
"epoch": 0.3828186156915229,
"grad_norm": 2.7564424046901324,
"learning_rate": 6.172845741994142e-06,
"loss": 0.4058,
"step": 7580
},
{
"epoch": 0.3833236534430949,
"grad_norm": 2.2542661022048374,
"learning_rate": 6.167794726740075e-06,
"loss": 0.3878,
"step": 7590
},
{
"epoch": 0.3838286911946668,
"grad_norm": 3.185426579737794,
"learning_rate": 6.162743711486009e-06,
"loss": 0.399,
"step": 7600
},
{
"epoch": 0.38433372894623874,
"grad_norm": 3.3739552822928665,
"learning_rate": 6.157692696231943e-06,
"loss": 0.3915,
"step": 7610
},
{
"epoch": 0.38483876669781064,
"grad_norm": 6.412123283698192,
"learning_rate": 6.1526416809778764e-06,
"loss": 0.3815,
"step": 7620
},
{
"epoch": 0.3853438044493826,
"grad_norm": 5.5629296211963215,
"learning_rate": 6.1475906657238105e-06,
"loss": 0.3794,
"step": 7630
},
{
"epoch": 0.3858488422009545,
"grad_norm": 3.130873953540236,
"learning_rate": 6.1425396504697455e-06,
"loss": 0.3971,
"step": 7640
},
{
"epoch": 0.38635387995252646,
"grad_norm": 2.92441715259481,
"learning_rate": 6.13748863521568e-06,
"loss": 0.396,
"step": 7650
},
{
"epoch": 0.38685891770409836,
"grad_norm": 4.577909010601426,
"learning_rate": 6.132437619961613e-06,
"loss": 0.4041,
"step": 7660
},
{
"epoch": 0.3873639554556703,
"grad_norm": 6.930598411493494,
"learning_rate": 6.127386604707547e-06,
"loss": 0.4031,
"step": 7670
},
{
"epoch": 0.3878689932072422,
"grad_norm": 3.4417274152302513,
"learning_rate": 6.122335589453481e-06,
"loss": 0.3806,
"step": 7680
},
{
"epoch": 0.3883740309588142,
"grad_norm": 8.637005031113738,
"learning_rate": 6.1172845741994144e-06,
"loss": 0.3847,
"step": 7690
},
{
"epoch": 0.3888790687103861,
"grad_norm": 5.039168163562101,
"learning_rate": 6.1122335589453486e-06,
"loss": 0.3927,
"step": 7700
},
{
"epoch": 0.38938410646195803,
"grad_norm": 2.3470164311901307,
"learning_rate": 6.107182543691283e-06,
"loss": 0.38,
"step": 7710
},
{
"epoch": 0.38988914421352994,
"grad_norm": 10.576642650070218,
"learning_rate": 6.102131528437216e-06,
"loss": 0.3792,
"step": 7720
},
{
"epoch": 0.3903941819651019,
"grad_norm": 4.420199483186855,
"learning_rate": 6.09708051318315e-06,
"loss": 0.4023,
"step": 7730
},
{
"epoch": 0.3908992197166738,
"grad_norm": 4.194934818506613,
"learning_rate": 6.092029497929084e-06,
"loss": 0.3889,
"step": 7740
},
{
"epoch": 0.39140425746824575,
"grad_norm": 4.111966133004914,
"learning_rate": 6.0869784826750175e-06,
"loss": 0.4035,
"step": 7750
},
{
"epoch": 0.39190929521981765,
"grad_norm": 2.451588743941197,
"learning_rate": 6.081927467420952e-06,
"loss": 0.3713,
"step": 7760
},
{
"epoch": 0.3924143329713896,
"grad_norm": 4.679648209446588,
"learning_rate": 6.076876452166886e-06,
"loss": 0.3841,
"step": 7770
},
{
"epoch": 0.3929193707229615,
"grad_norm": 5.530044736355814,
"learning_rate": 6.071825436912821e-06,
"loss": 0.3775,
"step": 7780
},
{
"epoch": 0.39342440847453347,
"grad_norm": 8.103071297854475,
"learning_rate": 6.066774421658754e-06,
"loss": 0.3977,
"step": 7790
},
{
"epoch": 0.39392944622610543,
"grad_norm": 3.4887947505414787,
"learning_rate": 6.061723406404688e-06,
"loss": 0.3855,
"step": 7800
},
{
"epoch": 0.39443448397767733,
"grad_norm": 8.590868528737689,
"learning_rate": 6.056672391150622e-06,
"loss": 0.4039,
"step": 7810
},
{
"epoch": 0.3949395217292493,
"grad_norm": 6.459621411276858,
"learning_rate": 6.0516213758965555e-06,
"loss": 0.3877,
"step": 7820
},
{
"epoch": 0.3954445594808212,
"grad_norm": 5.585759607891893,
"learning_rate": 6.04657036064249e-06,
"loss": 0.382,
"step": 7830
},
{
"epoch": 0.39594959723239315,
"grad_norm": 13.909241772822636,
"learning_rate": 6.041519345388424e-06,
"loss": 0.3604,
"step": 7840
},
{
"epoch": 0.39645463498396505,
"grad_norm": 8.283993010778675,
"learning_rate": 6.036468330134357e-06,
"loss": 0.3892,
"step": 7850
},
{
"epoch": 0.396959672735537,
"grad_norm": 5.670024832457869,
"learning_rate": 6.031417314880291e-06,
"loss": 0.3758,
"step": 7860
},
{
"epoch": 0.3974647104871089,
"grad_norm": 6.589956544879753,
"learning_rate": 6.026366299626225e-06,
"loss": 0.3788,
"step": 7870
},
{
"epoch": 0.39796974823868086,
"grad_norm": 38.93534106041679,
"learning_rate": 6.0213152843721586e-06,
"loss": 0.3959,
"step": 7880
},
{
"epoch": 0.39847478599025277,
"grad_norm": 9.349039394830367,
"learning_rate": 6.016264269118093e-06,
"loss": 0.4026,
"step": 7890
},
{
"epoch": 0.3989798237418247,
"grad_norm": 12.651279564480014,
"learning_rate": 6.011213253864027e-06,
"loss": 0.3953,
"step": 7900
},
{
"epoch": 0.3994848614933966,
"grad_norm": 12.369720860667183,
"learning_rate": 6.006162238609962e-06,
"loss": 0.3834,
"step": 7910
},
{
"epoch": 0.3999898992449686,
"grad_norm": 6.935402086209793,
"learning_rate": 6.001111223355895e-06,
"loss": 0.3855,
"step": 7920
},
{
"epoch": 0.4004949369965405,
"grad_norm": 14.485519276092187,
"learning_rate": 5.996060208101829e-06,
"loss": 0.3794,
"step": 7930
},
{
"epoch": 0.40099997474811244,
"grad_norm": 7.8583919362498795,
"learning_rate": 5.991009192847763e-06,
"loss": 0.3922,
"step": 7940
},
{
"epoch": 0.40150501249968434,
"grad_norm": 11.298566008159142,
"learning_rate": 5.985958177593697e-06,
"loss": 0.4002,
"step": 7950
},
{
"epoch": 0.4020100502512563,
"grad_norm": 28.291667710287033,
"learning_rate": 5.980907162339631e-06,
"loss": 0.3621,
"step": 7960
},
{
"epoch": 0.4025150880028282,
"grad_norm": 17.877462639004314,
"learning_rate": 5.975856147085565e-06,
"loss": 0.3968,
"step": 7970
},
{
"epoch": 0.40302012575440016,
"grad_norm": 8.068693573421475,
"learning_rate": 5.970805131831499e-06,
"loss": 0.3795,
"step": 7980
},
{
"epoch": 0.40352516350597206,
"grad_norm": 6.199946065309128,
"learning_rate": 5.965754116577432e-06,
"loss": 0.3909,
"step": 7990
},
{
"epoch": 0.404030201257544,
"grad_norm": 13.7274262284846,
"learning_rate": 5.960703101323366e-06,
"loss": 0.3728,
"step": 8000
},
{
"epoch": 0.4045352390091159,
"grad_norm": 15.37764496794447,
"learning_rate": 5.9556520860693005e-06,
"loss": 0.3868,
"step": 8010
},
{
"epoch": 0.4050402767606879,
"grad_norm": 7.104894136071469,
"learning_rate": 5.950601070815234e-06,
"loss": 0.3869,
"step": 8020
},
{
"epoch": 0.4055453145122598,
"grad_norm": 22.085557636681344,
"learning_rate": 5.945550055561168e-06,
"loss": 0.3842,
"step": 8030
},
{
"epoch": 0.40605035226383174,
"grad_norm": 8.557783879131767,
"learning_rate": 5.940499040307102e-06,
"loss": 0.3823,
"step": 8040
},
{
"epoch": 0.40655539001540364,
"grad_norm": 19.231211160885596,
"learning_rate": 5.935448025053037e-06,
"loss": 0.3819,
"step": 8050
},
{
"epoch": 0.4070604277669756,
"grad_norm": 7.452519753759336,
"learning_rate": 5.93039700979897e-06,
"loss": 0.3789,
"step": 8060
},
{
"epoch": 0.4075654655185475,
"grad_norm": 25.150570679742312,
"learning_rate": 5.925345994544904e-06,
"loss": 0.3829,
"step": 8070
},
{
"epoch": 0.40807050327011946,
"grad_norm": 12.1446176511635,
"learning_rate": 5.9202949792908385e-06,
"loss": 0.3925,
"step": 8080
},
{
"epoch": 0.40857554102169136,
"grad_norm": 8.119469151619079,
"learning_rate": 5.915243964036772e-06,
"loss": 0.3857,
"step": 8090
},
{
"epoch": 0.4090805787732633,
"grad_norm": 8.948591507408596,
"learning_rate": 5.910192948782706e-06,
"loss": 0.3757,
"step": 8100
},
{
"epoch": 0.4095856165248352,
"grad_norm": 4.621733850994045,
"learning_rate": 5.90514193352864e-06,
"loss": 0.4093,
"step": 8110
},
{
"epoch": 0.4100906542764072,
"grad_norm": 4.153858890970974,
"learning_rate": 5.900090918274573e-06,
"loss": 0.3834,
"step": 8120
},
{
"epoch": 0.4105956920279791,
"grad_norm": 7.250782877265542,
"learning_rate": 5.895039903020507e-06,
"loss": 0.3914,
"step": 8130
},
{
"epoch": 0.41110072977955103,
"grad_norm": 7.214997449416598,
"learning_rate": 5.8899888877664415e-06,
"loss": 0.3763,
"step": 8140
},
{
"epoch": 0.41160576753112293,
"grad_norm": 7.292312503512012,
"learning_rate": 5.884937872512375e-06,
"loss": 0.3696,
"step": 8150
},
{
"epoch": 0.4121108052826949,
"grad_norm": 8.564034765070186,
"learning_rate": 5.879886857258309e-06,
"loss": 0.36,
"step": 8160
},
{
"epoch": 0.4126158430342668,
"grad_norm": 5.03381059488658,
"learning_rate": 5.874835842004243e-06,
"loss": 0.3915,
"step": 8170
},
{
"epoch": 0.41312088078583875,
"grad_norm": 7.1010748327814275,
"learning_rate": 5.869784826750178e-06,
"loss": 0.3831,
"step": 8180
},
{
"epoch": 0.41362591853741065,
"grad_norm": 3.8185356541714177,
"learning_rate": 5.864733811496111e-06,
"loss": 0.3821,
"step": 8190
},
{
"epoch": 0.4141309562889826,
"grad_norm": 15.840372845269206,
"learning_rate": 5.8596827962420454e-06,
"loss": 0.3703,
"step": 8200
},
{
"epoch": 0.4146359940405545,
"grad_norm": 8.469089513751111,
"learning_rate": 5.8546317809879796e-06,
"loss": 0.3672,
"step": 8210
},
{
"epoch": 0.41514103179212647,
"grad_norm": 5.590802338147382,
"learning_rate": 5.849580765733913e-06,
"loss": 0.3893,
"step": 8220
},
{
"epoch": 0.41564606954369837,
"grad_norm": 8.673788237261087,
"learning_rate": 5.844529750479847e-06,
"loss": 0.3802,
"step": 8230
},
{
"epoch": 0.41615110729527033,
"grad_norm": 4.9263977139934,
"learning_rate": 5.839478735225781e-06,
"loss": 0.3699,
"step": 8240
},
{
"epoch": 0.41665614504684223,
"grad_norm": 2.3618077978561134,
"learning_rate": 5.834427719971714e-06,
"loss": 0.4169,
"step": 8250
},
{
"epoch": 0.4171611827984142,
"grad_norm": 3.4421082504609215,
"learning_rate": 5.8293767047176485e-06,
"loss": 0.3895,
"step": 8260
},
{
"epoch": 0.4176662205499861,
"grad_norm": 9.036063807272322,
"learning_rate": 5.824325689463583e-06,
"loss": 0.3911,
"step": 8270
},
{
"epoch": 0.41817125830155805,
"grad_norm": 2.726905279515538,
"learning_rate": 5.819274674209517e-06,
"loss": 0.3906,
"step": 8280
},
{
"epoch": 0.41867629605312995,
"grad_norm": 3.1988219571320173,
"learning_rate": 5.81422365895545e-06,
"loss": 0.3864,
"step": 8290
},
{
"epoch": 0.4191813338047019,
"grad_norm": 3.45185619640904,
"learning_rate": 5.809172643701384e-06,
"loss": 0.3985,
"step": 8300
},
{
"epoch": 0.4196863715562738,
"grad_norm": 5.02491265508067,
"learning_rate": 5.804121628447318e-06,
"loss": 0.3907,
"step": 8310
},
{
"epoch": 0.42019140930784576,
"grad_norm": 3.5658556661908443,
"learning_rate": 5.799070613193253e-06,
"loss": 0.3923,
"step": 8320
},
{
"epoch": 0.42069644705941767,
"grad_norm": 5.978581813826559,
"learning_rate": 5.7940195979391865e-06,
"loss": 0.3844,
"step": 8330
},
{
"epoch": 0.4212014848109896,
"grad_norm": 8.14240158938725,
"learning_rate": 5.788968582685121e-06,
"loss": 0.3978,
"step": 8340
},
{
"epoch": 0.4217065225625615,
"grad_norm": 3.181846621853647,
"learning_rate": 5.783917567431055e-06,
"loss": 0.3992,
"step": 8350
},
{
"epoch": 0.4222115603141335,
"grad_norm": 8.322284252076804,
"learning_rate": 5.778866552176988e-06,
"loss": 0.3836,
"step": 8360
},
{
"epoch": 0.4227165980657054,
"grad_norm": 4.76585783468973,
"learning_rate": 5.773815536922922e-06,
"loss": 0.3904,
"step": 8370
},
{
"epoch": 0.42322163581727734,
"grad_norm": 3.857044578534056,
"learning_rate": 5.768764521668856e-06,
"loss": 0.3851,
"step": 8380
},
{
"epoch": 0.4237266735688493,
"grad_norm": 5.058592219879147,
"learning_rate": 5.7637135064147895e-06,
"loss": 0.3867,
"step": 8390
},
{
"epoch": 0.4242317113204212,
"grad_norm": 15.788479310500968,
"learning_rate": 5.758662491160724e-06,
"loss": 0.3876,
"step": 8400
},
{
"epoch": 0.42473674907199316,
"grad_norm": 14.796150435899415,
"learning_rate": 5.753611475906658e-06,
"loss": 0.3713,
"step": 8410
},
{
"epoch": 0.42524178682356506,
"grad_norm": 6.198372860755761,
"learning_rate": 5.748560460652591e-06,
"loss": 0.3959,
"step": 8420
},
{
"epoch": 0.425746824575137,
"grad_norm": 4.004395808383159,
"learning_rate": 5.743509445398525e-06,
"loss": 0.3879,
"step": 8430
},
{
"epoch": 0.4262518623267089,
"grad_norm": 24.753829770708524,
"learning_rate": 5.738458430144459e-06,
"loss": 0.3907,
"step": 8440
},
{
"epoch": 0.4267569000782809,
"grad_norm": 3.79416616415335,
"learning_rate": 5.733407414890394e-06,
"loss": 0.3716,
"step": 8450
},
{
"epoch": 0.4272619378298528,
"grad_norm": 16.793962044251266,
"learning_rate": 5.7283563996363276e-06,
"loss": 0.38,
"step": 8460
},
{
"epoch": 0.42776697558142474,
"grad_norm": 6.987194101161238,
"learning_rate": 5.723305384382262e-06,
"loss": 0.3741,
"step": 8470
},
{
"epoch": 0.42827201333299664,
"grad_norm": 6.1003803215690295,
"learning_rate": 5.718254369128196e-06,
"loss": 0.3836,
"step": 8480
},
{
"epoch": 0.4287770510845686,
"grad_norm": 11.39191383765078,
"learning_rate": 5.713203353874129e-06,
"loss": 0.3785,
"step": 8490
},
{
"epoch": 0.4292820888361405,
"grad_norm": 4.216231578608531,
"learning_rate": 5.708152338620063e-06,
"loss": 0.3849,
"step": 8500
},
{
"epoch": 0.42978712658771245,
"grad_norm": 26.748832800632403,
"learning_rate": 5.703101323365997e-06,
"loss": 0.3877,
"step": 8510
},
{
"epoch": 0.43029216433928436,
"grad_norm": 5.3822939480121414,
"learning_rate": 5.698050308111931e-06,
"loss": 0.3692,
"step": 8520
},
{
"epoch": 0.4307972020908563,
"grad_norm": 4.249521852101111,
"learning_rate": 5.692999292857865e-06,
"loss": 0.3817,
"step": 8530
},
{
"epoch": 0.4313022398424282,
"grad_norm": 6.698259673877979,
"learning_rate": 5.687948277603799e-06,
"loss": 0.3881,
"step": 8540
},
{
"epoch": 0.43180727759400017,
"grad_norm": 6.493588412853115,
"learning_rate": 5.682897262349732e-06,
"loss": 0.3977,
"step": 8550
},
{
"epoch": 0.4323123153455721,
"grad_norm": 3.733362569921758,
"learning_rate": 5.677846247095666e-06,
"loss": 0.3982,
"step": 8560
},
{
"epoch": 0.43281735309714403,
"grad_norm": 4.847622020933204,
"learning_rate": 5.6727952318416e-06,
"loss": 0.3769,
"step": 8570
},
{
"epoch": 0.43332239084871593,
"grad_norm": 7.5594667479754785,
"learning_rate": 5.667744216587534e-06,
"loss": 0.3907,
"step": 8580
},
{
"epoch": 0.4338274286002879,
"grad_norm": 3.201682379163367,
"learning_rate": 5.662693201333469e-06,
"loss": 0.3729,
"step": 8590
},
{
"epoch": 0.4343324663518598,
"grad_norm": 2.5878076554319898,
"learning_rate": 5.657642186079403e-06,
"loss": 0.3956,
"step": 8600
},
{
"epoch": 0.43483750410343175,
"grad_norm": 17.488965325778878,
"learning_rate": 5.652591170825337e-06,
"loss": 0.3712,
"step": 8610
},
{
"epoch": 0.43534254185500365,
"grad_norm": 4.116982393940921,
"learning_rate": 5.64754015557127e-06,
"loss": 0.3893,
"step": 8620
},
{
"epoch": 0.4358475796065756,
"grad_norm": 3.9148601756933554,
"learning_rate": 5.642489140317204e-06,
"loss": 0.3887,
"step": 8630
},
{
"epoch": 0.4363526173581475,
"grad_norm": 7.764634135356721,
"learning_rate": 5.637438125063138e-06,
"loss": 0.3776,
"step": 8640
},
{
"epoch": 0.43685765510971947,
"grad_norm": 9.506827681627305,
"learning_rate": 5.6323871098090725e-06,
"loss": 0.3989,
"step": 8650
},
{
"epoch": 0.43736269286129137,
"grad_norm": 3.2832190526050242,
"learning_rate": 5.627336094555006e-06,
"loss": 0.3982,
"step": 8660
},
{
"epoch": 0.4378677306128633,
"grad_norm": 4.082534248878624,
"learning_rate": 5.62228507930094e-06,
"loss": 0.3901,
"step": 8670
},
{
"epoch": 0.43837276836443523,
"grad_norm": 3.077297177968144,
"learning_rate": 5.617234064046874e-06,
"loss": 0.3669,
"step": 8680
},
{
"epoch": 0.4388778061160072,
"grad_norm": 2.9330241633656167,
"learning_rate": 5.612183048792807e-06,
"loss": 0.3934,
"step": 8690
},
{
"epoch": 0.4393828438675791,
"grad_norm": 3.3633206477485222,
"learning_rate": 5.6071320335387414e-06,
"loss": 0.3949,
"step": 8700
},
{
"epoch": 0.43988788161915104,
"grad_norm": 4.3033520111069,
"learning_rate": 5.6020810182846756e-06,
"loss": 0.3989,
"step": 8710
},
{
"epoch": 0.44039291937072295,
"grad_norm": 3.106225393468076,
"learning_rate": 5.597030003030609e-06,
"loss": 0.3827,
"step": 8720
},
{
"epoch": 0.4408979571222949,
"grad_norm": 3.245685369854172,
"learning_rate": 5.591978987776544e-06,
"loss": 0.3892,
"step": 8730
},
{
"epoch": 0.4414029948738668,
"grad_norm": 3.381863989802099,
"learning_rate": 5.586927972522478e-06,
"loss": 0.3945,
"step": 8740
},
{
"epoch": 0.44190803262543876,
"grad_norm": 5.237035603972838,
"learning_rate": 5.581876957268412e-06,
"loss": 0.3852,
"step": 8750
},
{
"epoch": 0.44241307037701066,
"grad_norm": 3.927311917890188,
"learning_rate": 5.576825942014345e-06,
"loss": 0.396,
"step": 8760
},
{
"epoch": 0.4429181081285826,
"grad_norm": 3.9833756210512634,
"learning_rate": 5.5717749267602795e-06,
"loss": 0.3823,
"step": 8770
},
{
"epoch": 0.4434231458801545,
"grad_norm": 4.82672597736031,
"learning_rate": 5.566723911506214e-06,
"loss": 0.3837,
"step": 8780
},
{
"epoch": 0.4439281836317265,
"grad_norm": 6.6207510733703,
"learning_rate": 5.561672896252147e-06,
"loss": 0.3846,
"step": 8790
},
{
"epoch": 0.4444332213832984,
"grad_norm": 4.761029989854409,
"learning_rate": 5.556621880998081e-06,
"loss": 0.3851,
"step": 8800
},
{
"epoch": 0.44493825913487034,
"grad_norm": 7.583147217262274,
"learning_rate": 5.551570865744015e-06,
"loss": 0.3897,
"step": 8810
},
{
"epoch": 0.44544329688644224,
"grad_norm": 8.267756155680992,
"learning_rate": 5.546519850489948e-06,
"loss": 0.3924,
"step": 8820
},
{
"epoch": 0.4459483346380142,
"grad_norm": 10.81298502824958,
"learning_rate": 5.5414688352358825e-06,
"loss": 0.3768,
"step": 8830
},
{
"epoch": 0.4464533723895861,
"grad_norm": 5.44259386052073,
"learning_rate": 5.536417819981817e-06,
"loss": 0.3766,
"step": 8840
},
{
"epoch": 0.44695841014115806,
"grad_norm": 19.848594487097966,
"learning_rate": 5.53136680472775e-06,
"loss": 0.386,
"step": 8850
},
{
"epoch": 0.44746344789272996,
"grad_norm": 9.264967854622206,
"learning_rate": 5.526315789473685e-06,
"loss": 0.3916,
"step": 8860
},
{
"epoch": 0.4479684856443019,
"grad_norm": 11.970151482632554,
"learning_rate": 5.521264774219619e-06,
"loss": 0.3878,
"step": 8870
},
{
"epoch": 0.4484735233958738,
"grad_norm": 4.562463656013562,
"learning_rate": 5.516213758965553e-06,
"loss": 0.3827,
"step": 8880
},
{
"epoch": 0.4489785611474458,
"grad_norm": 8.92273565377152,
"learning_rate": 5.511162743711486e-06,
"loss": 0.3691,
"step": 8890
},
{
"epoch": 0.4494835988990177,
"grad_norm": 6.168748754085141,
"learning_rate": 5.5061117284574205e-06,
"loss": 0.3762,
"step": 8900
},
{
"epoch": 0.44998863665058964,
"grad_norm": 9.45298676374925,
"learning_rate": 5.501060713203355e-06,
"loss": 0.39,
"step": 8910
},
{
"epoch": 0.45049367440216154,
"grad_norm": 8.9825721118566,
"learning_rate": 5.496009697949288e-06,
"loss": 0.3946,
"step": 8920
},
{
"epoch": 0.4509987121537335,
"grad_norm": 3.970319391221007,
"learning_rate": 5.490958682695222e-06,
"loss": 0.3766,
"step": 8930
},
{
"epoch": 0.4515037499053054,
"grad_norm": 10.786278133368974,
"learning_rate": 5.485907667441156e-06,
"loss": 0.3609,
"step": 8940
},
{
"epoch": 0.45200878765687735,
"grad_norm": 10.415656893523584,
"learning_rate": 5.4808566521870895e-06,
"loss": 0.381,
"step": 8950
},
{
"epoch": 0.45251382540844926,
"grad_norm": 6.642215363932757,
"learning_rate": 5.475805636933024e-06,
"loss": 0.3958,
"step": 8960
},
{
"epoch": 0.4530188631600212,
"grad_norm": 23.15078550027285,
"learning_rate": 5.470754621678958e-06,
"loss": 0.3765,
"step": 8970
},
{
"epoch": 0.4535239009115931,
"grad_norm": 12.962039757228773,
"learning_rate": 5.465703606424891e-06,
"loss": 0.3796,
"step": 8980
},
{
"epoch": 0.45402893866316507,
"grad_norm": 10.834056396167355,
"learning_rate": 5.460652591170825e-06,
"loss": 0.3825,
"step": 8990
},
{
"epoch": 0.45453397641473703,
"grad_norm": 31.830300939594174,
"learning_rate": 5.45560157591676e-06,
"loss": 0.3799,
"step": 9000
},
{
"epoch": 0.45503901416630893,
"grad_norm": 7.317578019104587,
"learning_rate": 5.450550560662694e-06,
"loss": 0.3617,
"step": 9010
},
{
"epoch": 0.4555440519178809,
"grad_norm": 18.51705064335606,
"learning_rate": 5.445499545408628e-06,
"loss": 0.3843,
"step": 9020
},
{
"epoch": 0.4560490896694528,
"grad_norm": 12.73637775470279,
"learning_rate": 5.440448530154562e-06,
"loss": 0.3533,
"step": 9030
},
{
"epoch": 0.45655412742102475,
"grad_norm": 13.089783277969067,
"learning_rate": 5.435397514900496e-06,
"loss": 0.3844,
"step": 9040
},
{
"epoch": 0.45705916517259665,
"grad_norm": 38.50162494773606,
"learning_rate": 5.43034649964643e-06,
"loss": 0.3839,
"step": 9050
},
{
"epoch": 0.4575642029241686,
"grad_norm": 5.6808406753592715,
"learning_rate": 5.425295484392363e-06,
"loss": 0.3938,
"step": 9060
},
{
"epoch": 0.4580692406757405,
"grad_norm": 9.303424412707237,
"learning_rate": 5.420244469138297e-06,
"loss": 0.3864,
"step": 9070
},
{
"epoch": 0.45857427842731247,
"grad_norm": 6.58855670966557,
"learning_rate": 5.415193453884231e-06,
"loss": 0.3802,
"step": 9080
},
{
"epoch": 0.45907931617888437,
"grad_norm": 7.626250897081635,
"learning_rate": 5.410142438630165e-06,
"loss": 0.3779,
"step": 9090
},
{
"epoch": 0.4595843539304563,
"grad_norm": 7.20955512967183,
"learning_rate": 5.405091423376099e-06,
"loss": 0.3852,
"step": 9100
},
{
"epoch": 0.4600893916820282,
"grad_norm": 3.600490312288401,
"learning_rate": 5.400040408122033e-06,
"loss": 0.3734,
"step": 9110
},
{
"epoch": 0.4605944294336002,
"grad_norm": 8.58899517905405,
"learning_rate": 5.394989392867966e-06,
"loss": 0.3789,
"step": 9120
},
{
"epoch": 0.4610994671851721,
"grad_norm": 6.98087393231808,
"learning_rate": 5.389938377613901e-06,
"loss": 0.3833,
"step": 9130
},
{
"epoch": 0.46160450493674404,
"grad_norm": 4.3141382007799765,
"learning_rate": 5.384887362359835e-06,
"loss": 0.3772,
"step": 9140
},
{
"epoch": 0.46210954268831594,
"grad_norm": 4.9903497470267615,
"learning_rate": 5.379836347105769e-06,
"loss": 0.3777,
"step": 9150
},
{
"epoch": 0.4626145804398879,
"grad_norm": 5.928389544490131,
"learning_rate": 5.374785331851703e-06,
"loss": 0.3862,
"step": 9160
},
{
"epoch": 0.4631196181914598,
"grad_norm": 4.797470596868366,
"learning_rate": 5.369734316597637e-06,
"loss": 0.3997,
"step": 9170
},
{
"epoch": 0.46362465594303176,
"grad_norm": 8.670905903895585,
"learning_rate": 5.364683301343571e-06,
"loss": 0.3678,
"step": 9180
},
{
"epoch": 0.46412969369460366,
"grad_norm": 6.464493728216097,
"learning_rate": 5.359632286089504e-06,
"loss": 0.3815,
"step": 9190
},
{
"epoch": 0.4646347314461756,
"grad_norm": 12.803983058209836,
"learning_rate": 5.354581270835438e-06,
"loss": 0.38,
"step": 9200
},
{
"epoch": 0.4651397691977475,
"grad_norm": 9.24913498804073,
"learning_rate": 5.3495302555813724e-06,
"loss": 0.3818,
"step": 9210
},
{
"epoch": 0.4656448069493195,
"grad_norm": 6.124104683054,
"learning_rate": 5.344479240327306e-06,
"loss": 0.3857,
"step": 9220
},
{
"epoch": 0.4661498447008914,
"grad_norm": 19.982398897538843,
"learning_rate": 5.33942822507324e-06,
"loss": 0.3759,
"step": 9230
},
{
"epoch": 0.46665488245246334,
"grad_norm": 6.143149352411427,
"learning_rate": 5.334377209819174e-06,
"loss": 0.3901,
"step": 9240
},
{
"epoch": 0.46715992020403524,
"grad_norm": 11.010467552273573,
"learning_rate": 5.329326194565107e-06,
"loss": 0.3798,
"step": 9250
},
{
"epoch": 0.4676649579556072,
"grad_norm": 5.615638251464779,
"learning_rate": 5.324275179311041e-06,
"loss": 0.3855,
"step": 9260
},
{
"epoch": 0.4681699957071791,
"grad_norm": 5.10238166508234,
"learning_rate": 5.319224164056976e-06,
"loss": 0.3793,
"step": 9270
},
{
"epoch": 0.46867503345875106,
"grad_norm": 11.264038165848222,
"learning_rate": 5.3141731488029105e-06,
"loss": 0.3912,
"step": 9280
},
{
"epoch": 0.46918007121032296,
"grad_norm": 4.638158847573904,
"learning_rate": 5.309122133548844e-06,
"loss": 0.3874,
"step": 9290
},
{
"epoch": 0.4696851089618949,
"grad_norm": 5.40563230905546,
"learning_rate": 5.304071118294778e-06,
"loss": 0.3688,
"step": 9300
},
{
"epoch": 0.4701901467134668,
"grad_norm": 7.336455357223306,
"learning_rate": 5.299020103040712e-06,
"loss": 0.3595,
"step": 9310
},
{
"epoch": 0.4706951844650388,
"grad_norm": 6.604255767738522,
"learning_rate": 5.293969087786645e-06,
"loss": 0.3851,
"step": 9320
},
{
"epoch": 0.4712002222166107,
"grad_norm": 8.434342768902814,
"learning_rate": 5.288918072532579e-06,
"loss": 0.3752,
"step": 9330
},
{
"epoch": 0.47170525996818263,
"grad_norm": 7.991264568372306,
"learning_rate": 5.2838670572785135e-06,
"loss": 0.3847,
"step": 9340
},
{
"epoch": 0.47221029771975453,
"grad_norm": 7.689424953061599,
"learning_rate": 5.278816042024447e-06,
"loss": 0.381,
"step": 9350
},
{
"epoch": 0.4727153354713265,
"grad_norm": 8.92289056823504,
"learning_rate": 5.273765026770381e-06,
"loss": 0.3766,
"step": 9360
},
{
"epoch": 0.4732203732228984,
"grad_norm": 6.798146146074497,
"learning_rate": 5.268714011516315e-06,
"loss": 0.3973,
"step": 9370
},
{
"epoch": 0.47372541097447035,
"grad_norm": 4.0943212163341105,
"learning_rate": 5.263662996262249e-06,
"loss": 0.3852,
"step": 9380
},
{
"epoch": 0.47423044872604225,
"grad_norm": 7.923369250973975,
"learning_rate": 5.2586119810081824e-06,
"loss": 0.3759,
"step": 9390
},
{
"epoch": 0.4747354864776142,
"grad_norm": 6.958844060305958,
"learning_rate": 5.253560965754117e-06,
"loss": 0.3814,
"step": 9400
},
{
"epoch": 0.4752405242291861,
"grad_norm": 5.637050541114169,
"learning_rate": 5.2485099505000515e-06,
"loss": 0.3665,
"step": 9410
},
{
"epoch": 0.47574556198075807,
"grad_norm": 7.801257727546048,
"learning_rate": 5.243458935245986e-06,
"loss": 0.3744,
"step": 9420
},
{
"epoch": 0.47625059973232997,
"grad_norm": 8.37919084606113,
"learning_rate": 5.238407919991919e-06,
"loss": 0.394,
"step": 9430
},
{
"epoch": 0.47675563748390193,
"grad_norm": 4.463876576531971,
"learning_rate": 5.233356904737853e-06,
"loss": 0.3712,
"step": 9440
},
{
"epoch": 0.47726067523547383,
"grad_norm": 3.5151581874917253,
"learning_rate": 5.228305889483787e-06,
"loss": 0.3733,
"step": 9450
},
{
"epoch": 0.4777657129870458,
"grad_norm": 6.525584094885654,
"learning_rate": 5.2232548742297204e-06,
"loss": 0.3769,
"step": 9460
},
{
"epoch": 0.4782707507386177,
"grad_norm": 3.5568609941598344,
"learning_rate": 5.2182038589756546e-06,
"loss": 0.3902,
"step": 9470
},
{
"epoch": 0.47877578849018965,
"grad_norm": 4.842870672953731,
"learning_rate": 5.213152843721589e-06,
"loss": 0.3794,
"step": 9480
},
{
"epoch": 0.47928082624176155,
"grad_norm": 6.265357359911366,
"learning_rate": 5.208101828467522e-06,
"loss": 0.3803,
"step": 9490
},
{
"epoch": 0.4797858639933335,
"grad_norm": 8.175660778042406,
"learning_rate": 5.203050813213456e-06,
"loss": 0.3835,
"step": 9500
},
{
"epoch": 0.4802909017449054,
"grad_norm": 2.9195122634550112,
"learning_rate": 5.19799979795939e-06,
"loss": 0.3813,
"step": 9510
},
{
"epoch": 0.48079593949647736,
"grad_norm": 3.632448992408266,
"learning_rate": 5.1929487827053235e-06,
"loss": 0.3684,
"step": 9520
},
{
"epoch": 0.48130097724804927,
"grad_norm": 4.0935522105259095,
"learning_rate": 5.187897767451258e-06,
"loss": 0.4025,
"step": 9530
},
{
"epoch": 0.4818060149996212,
"grad_norm": 2.3275681028787005,
"learning_rate": 5.182846752197193e-06,
"loss": 0.388,
"step": 9540
},
{
"epoch": 0.4823110527511931,
"grad_norm": 2.0956548268446458,
"learning_rate": 5.177795736943127e-06,
"loss": 0.4031,
"step": 9550
},
{
"epoch": 0.4828160905027651,
"grad_norm": 2.173446663356269,
"learning_rate": 5.17274472168906e-06,
"loss": 0.3984,
"step": 9560
},
{
"epoch": 0.483321128254337,
"grad_norm": 3.2812553226056713,
"learning_rate": 5.167693706434994e-06,
"loss": 0.384,
"step": 9570
},
{
"epoch": 0.48382616600590894,
"grad_norm": 3.590239737946557,
"learning_rate": 5.162642691180928e-06,
"loss": 0.3877,
"step": 9580
},
{
"epoch": 0.4843312037574809,
"grad_norm": 2.83308197947474,
"learning_rate": 5.1575916759268615e-06,
"loss": 0.3934,
"step": 9590
},
{
"epoch": 0.4848362415090528,
"grad_norm": 3.847792584135844,
"learning_rate": 5.152540660672796e-06,
"loss": 0.3735,
"step": 9600
},
{
"epoch": 0.48534127926062476,
"grad_norm": 20.75585092309958,
"learning_rate": 5.14748964541873e-06,
"loss": 0.3788,
"step": 9610
},
{
"epoch": 0.48584631701219666,
"grad_norm": 4.31507443188382,
"learning_rate": 5.142438630164663e-06,
"loss": 0.4073,
"step": 9620
},
{
"epoch": 0.4863513547637686,
"grad_norm": 2.675763121628948,
"learning_rate": 5.137387614910597e-06,
"loss": 0.3853,
"step": 9630
},
{
"epoch": 0.4868563925153405,
"grad_norm": 3.180499977172799,
"learning_rate": 5.132336599656531e-06,
"loss": 0.3933,
"step": 9640
},
{
"epoch": 0.4873614302669125,
"grad_norm": 3.2780129127069633,
"learning_rate": 5.1272855844024646e-06,
"loss": 0.3862,
"step": 9650
},
{
"epoch": 0.4878664680184844,
"grad_norm": 2.650097724922774,
"learning_rate": 5.122234569148399e-06,
"loss": 0.3769,
"step": 9660
},
{
"epoch": 0.48837150577005634,
"grad_norm": 19.664449194387533,
"learning_rate": 5.117183553894333e-06,
"loss": 0.3909,
"step": 9670
},
{
"epoch": 0.48887654352162824,
"grad_norm": 3.055574678383631,
"learning_rate": 5.112132538640268e-06,
"loss": 0.368,
"step": 9680
},
{
"epoch": 0.4893815812732002,
"grad_norm": 4.1275772825798285,
"learning_rate": 5.107081523386201e-06,
"loss": 0.3807,
"step": 9690
},
{
"epoch": 0.4898866190247721,
"grad_norm": 2.680370704403155,
"learning_rate": 5.102030508132135e-06,
"loss": 0.3852,
"step": 9700
},
{
"epoch": 0.49039165677634405,
"grad_norm": 5.918612399643231,
"learning_rate": 5.096979492878069e-06,
"loss": 0.3838,
"step": 9710
},
{
"epoch": 0.49089669452791596,
"grad_norm": 3.682371592006304,
"learning_rate": 5.091928477624003e-06,
"loss": 0.4054,
"step": 9720
},
{
"epoch": 0.4914017322794879,
"grad_norm": 14.462676090252211,
"learning_rate": 5.086877462369937e-06,
"loss": 0.3992,
"step": 9730
},
{
"epoch": 0.4919067700310598,
"grad_norm": 8.666562939588516,
"learning_rate": 5.081826447115871e-06,
"loss": 0.3942,
"step": 9740
},
{
"epoch": 0.49241180778263177,
"grad_norm": 8.758850115026672,
"learning_rate": 5.076775431861805e-06,
"loss": 0.3895,
"step": 9750
},
{
"epoch": 0.4929168455342037,
"grad_norm": 4.418838276589228,
"learning_rate": 5.071724416607738e-06,
"loss": 0.3696,
"step": 9760
},
{
"epoch": 0.49342188328577563,
"grad_norm": 3.2634112325138016,
"learning_rate": 5.066673401353672e-06,
"loss": 0.392,
"step": 9770
},
{
"epoch": 0.49392692103734753,
"grad_norm": 9.038919625118139,
"learning_rate": 5.0616223860996065e-06,
"loss": 0.3763,
"step": 9780
},
{
"epoch": 0.4944319587889195,
"grad_norm": 9.874288914817955,
"learning_rate": 5.05657137084554e-06,
"loss": 0.3749,
"step": 9790
},
{
"epoch": 0.4949369965404914,
"grad_norm": 4.177942828246587,
"learning_rate": 5.051520355591474e-06,
"loss": 0.3803,
"step": 9800
},
{
"epoch": 0.49544203429206335,
"grad_norm": 5.209048638874338,
"learning_rate": 5.046469340337409e-06,
"loss": 0.377,
"step": 9810
},
{
"epoch": 0.49594707204363525,
"grad_norm": 6.559103955892935,
"learning_rate": 5.041418325083343e-06,
"loss": 0.3779,
"step": 9820
},
{
"epoch": 0.4964521097952072,
"grad_norm": 4.815427953732501,
"learning_rate": 5.036367309829276e-06,
"loss": 0.3832,
"step": 9830
},
{
"epoch": 0.4969571475467791,
"grad_norm": 4.801884846686052,
"learning_rate": 5.03131629457521e-06,
"loss": 0.3648,
"step": 9840
},
{
"epoch": 0.49746218529835107,
"grad_norm": 4.707895499699569,
"learning_rate": 5.0262652793211445e-06,
"loss": 0.3974,
"step": 9850
},
{
"epoch": 0.49796722304992297,
"grad_norm": 3.1632503133911176,
"learning_rate": 5.021214264067078e-06,
"loss": 0.3645,
"step": 9860
},
{
"epoch": 0.4984722608014949,
"grad_norm": 3.187410732261863,
"learning_rate": 5.016163248813012e-06,
"loss": 0.3964,
"step": 9870
},
{
"epoch": 0.49897729855306683,
"grad_norm": 18.87501682400254,
"learning_rate": 5.011112233558946e-06,
"loss": 0.394,
"step": 9880
},
{
"epoch": 0.4994823363046388,
"grad_norm": 2.727776305600166,
"learning_rate": 5.006061218304879e-06,
"loss": 0.384,
"step": 9890
},
{
"epoch": 0.4999873740562107,
"grad_norm": 2.835732786228447,
"learning_rate": 5.001010203050813e-06,
"loss": 0.3826,
"step": 9900
},
{
"epoch": 0.5004924118077826,
"grad_norm": 12.721805859040408,
"learning_rate": 4.9959591877967475e-06,
"loss": 0.3965,
"step": 9910
},
{
"epoch": 0.5009974495593545,
"grad_norm": 4.309473567917906,
"learning_rate": 4.990908172542682e-06,
"loss": 0.3867,
"step": 9920
},
{
"epoch": 0.5015024873109265,
"grad_norm": 4.385559429438772,
"learning_rate": 4.985857157288616e-06,
"loss": 0.3819,
"step": 9930
},
{
"epoch": 0.5020075250624985,
"grad_norm": 3.1179751000194917,
"learning_rate": 4.980806142034549e-06,
"loss": 0.3678,
"step": 9940
},
{
"epoch": 0.5025125628140703,
"grad_norm": 6.661473319925103,
"learning_rate": 4.975755126780483e-06,
"loss": 0.3803,
"step": 9950
},
{
"epoch": 0.5030176005656423,
"grad_norm": 5.422867145087551,
"learning_rate": 4.970704111526417e-06,
"loss": 0.3724,
"step": 9960
},
{
"epoch": 0.5035226383172142,
"grad_norm": 5.116363457400816,
"learning_rate": 4.965653096272351e-06,
"loss": 0.3796,
"step": 9970
},
{
"epoch": 0.5040276760687862,
"grad_norm": 4.36781162605322,
"learning_rate": 4.9606020810182856e-06,
"loss": 0.3758,
"step": 9980
},
{
"epoch": 0.504532713820358,
"grad_norm": 4.280820474874582,
"learning_rate": 4.955551065764219e-06,
"loss": 0.3747,
"step": 9990
},
{
"epoch": 0.50503775157193,
"grad_norm": 57.44106262644066,
"learning_rate": 4.950500050510153e-06,
"loss": 0.371,
"step": 10000
},
{
"epoch": 0.5055427893235019,
"grad_norm": 4.841001034679017,
"learning_rate": 4.945449035256087e-06,
"loss": 0.3779,
"step": 10010
},
{
"epoch": 0.5060478270750739,
"grad_norm": 11.797410635025113,
"learning_rate": 4.94039802000202e-06,
"loss": 0.3683,
"step": 10020
},
{
"epoch": 0.5065528648266457,
"grad_norm": 4.75455698304815,
"learning_rate": 4.9353470047479545e-06,
"loss": 0.3705,
"step": 10030
},
{
"epoch": 0.5070579025782177,
"grad_norm": 4.360031532937485,
"learning_rate": 4.930295989493889e-06,
"loss": 0.398,
"step": 10040
},
{
"epoch": 0.5075629403297897,
"grad_norm": 5.827246838677423,
"learning_rate": 4.925244974239823e-06,
"loss": 0.3841,
"step": 10050
},
{
"epoch": 0.5080679780813616,
"grad_norm": 7.5891846258257765,
"learning_rate": 4.920193958985757e-06,
"loss": 0.3744,
"step": 10060
},
{
"epoch": 0.5085730158329335,
"grad_norm": 5.9635081800997805,
"learning_rate": 4.91514294373169e-06,
"loss": 0.3986,
"step": 10070
},
{
"epoch": 0.5090780535845054,
"grad_norm": 5.484063460889767,
"learning_rate": 4.910091928477624e-06,
"loss": 0.3839,
"step": 10080
},
{
"epoch": 0.5095830913360774,
"grad_norm": 5.174164528404758,
"learning_rate": 4.905040913223558e-06,
"loss": 0.3619,
"step": 10090
},
{
"epoch": 0.5100881290876493,
"grad_norm": 2.90936959880962,
"learning_rate": 4.8999898979694925e-06,
"loss": 0.3756,
"step": 10100
},
{
"epoch": 0.5105931668392212,
"grad_norm": 4.3229203248018715,
"learning_rate": 4.894938882715426e-06,
"loss": 0.3832,
"step": 10110
},
{
"epoch": 0.5110982045907931,
"grad_norm": 2.500902066389639,
"learning_rate": 4.889887867461361e-06,
"loss": 0.3826,
"step": 10120
},
{
"epoch": 0.5116032423423651,
"grad_norm": 8.290016106139367,
"learning_rate": 4.884836852207294e-06,
"loss": 0.386,
"step": 10130
},
{
"epoch": 0.512108280093937,
"grad_norm": 2.6008214460714894,
"learning_rate": 4.879785836953228e-06,
"loss": 0.3676,
"step": 10140
},
{
"epoch": 0.5126133178455089,
"grad_norm": 2.1333825641801902,
"learning_rate": 4.874734821699162e-06,
"loss": 0.3898,
"step": 10150
},
{
"epoch": 0.5131183555970809,
"grad_norm": 3.013718964658717,
"learning_rate": 4.8696838064450955e-06,
"loss": 0.3887,
"step": 10160
},
{
"epoch": 0.5136233933486528,
"grad_norm": 3.6467351317483816,
"learning_rate": 4.86463279119103e-06,
"loss": 0.3782,
"step": 10170
},
{
"epoch": 0.5141284311002248,
"grad_norm": 2.950522126672218,
"learning_rate": 4.859581775936964e-06,
"loss": 0.377,
"step": 10180
},
{
"epoch": 0.5146334688517967,
"grad_norm": 2.252426318052208,
"learning_rate": 4.854530760682898e-06,
"loss": 0.3758,
"step": 10190
},
{
"epoch": 0.5151385066033686,
"grad_norm": 4.755817616459457,
"learning_rate": 4.849479745428832e-06,
"loss": 0.3807,
"step": 10200
},
{
"epoch": 0.5156435443549405,
"grad_norm": 4.046138186931513,
"learning_rate": 4.844428730174765e-06,
"loss": 0.391,
"step": 10210
},
{
"epoch": 0.5161485821065125,
"grad_norm": 19.99100500952114,
"learning_rate": 4.8393777149206994e-06,
"loss": 0.3781,
"step": 10220
},
{
"epoch": 0.5166536198580844,
"grad_norm": 5.479232805622522,
"learning_rate": 4.8343266996666336e-06,
"loss": 0.3708,
"step": 10230
},
{
"epoch": 0.5171586576096563,
"grad_norm": 8.844870574151285,
"learning_rate": 4.829275684412567e-06,
"loss": 0.3711,
"step": 10240
},
{
"epoch": 0.5176636953612282,
"grad_norm": 7.57710951045164,
"learning_rate": 4.824224669158502e-06,
"loss": 0.3854,
"step": 10250
},
{
"epoch": 0.5181687331128002,
"grad_norm": 7.850442473809185,
"learning_rate": 4.819173653904435e-06,
"loss": 0.3786,
"step": 10260
},
{
"epoch": 0.5186737708643722,
"grad_norm": 6.316522286246354,
"learning_rate": 4.814122638650369e-06,
"loss": 0.3838,
"step": 10270
},
{
"epoch": 0.519178808615944,
"grad_norm": 11.182819849962994,
"learning_rate": 4.809071623396303e-06,
"loss": 0.385,
"step": 10280
},
{
"epoch": 0.519683846367516,
"grad_norm": 9.46133568238808,
"learning_rate": 4.804020608142237e-06,
"loss": 0.3703,
"step": 10290
},
{
"epoch": 0.5201888841190879,
"grad_norm": 7.691807268341042,
"learning_rate": 4.798969592888171e-06,
"loss": 0.3682,
"step": 10300
},
{
"epoch": 0.5206939218706599,
"grad_norm": 9.93078233486807,
"learning_rate": 4.793918577634105e-06,
"loss": 0.3802,
"step": 10310
},
{
"epoch": 0.5211989596222317,
"grad_norm": 7.510658702823232,
"learning_rate": 4.788867562380039e-06,
"loss": 0.3886,
"step": 10320
},
{
"epoch": 0.5217039973738037,
"grad_norm": 22.882646270297737,
"learning_rate": 4.783816547125973e-06,
"loss": 0.393,
"step": 10330
},
{
"epoch": 0.5222090351253756,
"grad_norm": 14.517964204994225,
"learning_rate": 4.778765531871906e-06,
"loss": 0.3803,
"step": 10340
},
{
"epoch": 0.5227140728769476,
"grad_norm": 6.854534356382802,
"learning_rate": 4.7737145166178405e-06,
"loss": 0.3931,
"step": 10350
},
{
"epoch": 0.5232191106285194,
"grad_norm": 11.309739741500756,
"learning_rate": 4.768663501363775e-06,
"loss": 0.3876,
"step": 10360
},
{
"epoch": 0.5237241483800914,
"grad_norm": 8.35728833434225,
"learning_rate": 4.763612486109708e-06,
"loss": 0.3789,
"step": 10370
},
{
"epoch": 0.5242291861316634,
"grad_norm": 8.871743836416062,
"learning_rate": 4.758561470855642e-06,
"loss": 0.3981,
"step": 10380
},
{
"epoch": 0.5247342238832353,
"grad_norm": 15.15117759530781,
"learning_rate": 4.753510455601576e-06,
"loss": 0.3577,
"step": 10390
},
{
"epoch": 0.5252392616348072,
"grad_norm": 10.766312797504908,
"learning_rate": 4.74845944034751e-06,
"loss": 0.3744,
"step": 10400
},
{
"epoch": 0.5257442993863791,
"grad_norm": 44.757871998844294,
"learning_rate": 4.743408425093444e-06,
"loss": 0.3697,
"step": 10410
},
{
"epoch": 0.5262493371379511,
"grad_norm": 16.586045391427703,
"learning_rate": 4.738357409839378e-06,
"loss": 0.3746,
"step": 10420
},
{
"epoch": 0.526754374889523,
"grad_norm": 50.773287263268436,
"learning_rate": 4.733306394585312e-06,
"loss": 0.373,
"step": 10430
},
{
"epoch": 0.5272594126410949,
"grad_norm": 10.502847442362183,
"learning_rate": 4.728255379331246e-06,
"loss": 0.387,
"step": 10440
},
{
"epoch": 0.5277644503926668,
"grad_norm": 16.528099312938107,
"learning_rate": 4.72320436407718e-06,
"loss": 0.383,
"step": 10450
},
{
"epoch": 0.5282694881442388,
"grad_norm": 17.8799111844064,
"learning_rate": 4.718153348823114e-06,
"loss": 0.381,
"step": 10460
},
{
"epoch": 0.5287745258958108,
"grad_norm": 14.63753428467757,
"learning_rate": 4.713102333569048e-06,
"loss": 0.3837,
"step": 10470
},
{
"epoch": 0.5292795636473826,
"grad_norm": 13.42251301006677,
"learning_rate": 4.7080513183149816e-06,
"loss": 0.3595,
"step": 10480
},
{
"epoch": 0.5297846013989546,
"grad_norm": 30.157056141735794,
"learning_rate": 4.703000303060916e-06,
"loss": 0.3648,
"step": 10490
},
{
"epoch": 0.5302896391505265,
"grad_norm": 28.508594444052306,
"learning_rate": 4.69794928780685e-06,
"loss": 0.3793,
"step": 10500
},
{
"epoch": 0.5307946769020985,
"grad_norm": 28.922781791221336,
"learning_rate": 4.692898272552783e-06,
"loss": 0.3745,
"step": 10510
},
{
"epoch": 0.5312997146536703,
"grad_norm": 13.721901669008831,
"learning_rate": 4.687847257298718e-06,
"loss": 0.375,
"step": 10520
},
{
"epoch": 0.5318047524052423,
"grad_norm": 65.55837391568407,
"learning_rate": 4.682796242044651e-06,
"loss": 0.3801,
"step": 10530
},
{
"epoch": 0.5323097901568142,
"grad_norm": 16.351443187824426,
"learning_rate": 4.6777452267905855e-06,
"loss": 0.3727,
"step": 10540
},
{
"epoch": 0.5328148279083862,
"grad_norm": 24.727000388948138,
"learning_rate": 4.67269421153652e-06,
"loss": 0.4,
"step": 10550
},
{
"epoch": 0.533319865659958,
"grad_norm": 16.92930292363904,
"learning_rate": 4.667643196282453e-06,
"loss": 0.3818,
"step": 10560
},
{
"epoch": 0.53382490341153,
"grad_norm": 8.293298262780807,
"learning_rate": 4.662592181028387e-06,
"loss": 0.3914,
"step": 10570
},
{
"epoch": 0.534329941163102,
"grad_norm": 7.923596292500085,
"learning_rate": 4.657541165774321e-06,
"loss": 0.3835,
"step": 10580
},
{
"epoch": 0.5348349789146739,
"grad_norm": 21.525089803513207,
"learning_rate": 4.652490150520255e-06,
"loss": 0.3621,
"step": 10590
},
{
"epoch": 0.5353400166662458,
"grad_norm": 5.744657346480749,
"learning_rate": 4.647439135266189e-06,
"loss": 0.3674,
"step": 10600
},
{
"epoch": 0.5358450544178177,
"grad_norm": 4.54613701131844,
"learning_rate": 4.642388120012123e-06,
"loss": 0.3804,
"step": 10610
},
{
"epoch": 0.5363500921693897,
"grad_norm": 14.97113129892423,
"learning_rate": 4.637337104758057e-06,
"loss": 0.3555,
"step": 10620
},
{
"epoch": 0.5368551299209616,
"grad_norm": 11.639674507484868,
"learning_rate": 4.632286089503991e-06,
"loss": 0.3632,
"step": 10630
},
{
"epoch": 0.5373601676725335,
"grad_norm": 19.897945155280457,
"learning_rate": 4.627235074249924e-06,
"loss": 0.3747,
"step": 10640
},
{
"epoch": 0.5378652054241054,
"grad_norm": 6.364339652707885,
"learning_rate": 4.622184058995858e-06,
"loss": 0.3786,
"step": 10650
},
{
"epoch": 0.5383702431756774,
"grad_norm": 9.457676241588533,
"learning_rate": 4.617133043741792e-06,
"loss": 0.3756,
"step": 10660
},
{
"epoch": 0.5388752809272493,
"grad_norm": 44.15827262329639,
"learning_rate": 4.6120820284877265e-06,
"loss": 0.3741,
"step": 10670
},
{
"epoch": 0.5393803186788212,
"grad_norm": 17.038339271477003,
"learning_rate": 4.607031013233661e-06,
"loss": 0.3736,
"step": 10680
},
{
"epoch": 0.5398853564303931,
"grad_norm": 7.117484724149919,
"learning_rate": 4.601979997979594e-06,
"loss": 0.3723,
"step": 10690
},
{
"epoch": 0.5403903941819651,
"grad_norm": 42.36231217164134,
"learning_rate": 4.596928982725528e-06,
"loss": 0.3729,
"step": 10700
},
{
"epoch": 0.5408954319335371,
"grad_norm": 8.564465700855026,
"learning_rate": 4.591877967471462e-06,
"loss": 0.3789,
"step": 10710
},
{
"epoch": 0.5414004696851089,
"grad_norm": 8.67428139976207,
"learning_rate": 4.5868269522173955e-06,
"loss": 0.3713,
"step": 10720
},
{
"epoch": 0.5419055074366809,
"grad_norm": 3.39326548400569,
"learning_rate": 4.5817759369633304e-06,
"loss": 0.385,
"step": 10730
},
{
"epoch": 0.5424105451882528,
"grad_norm": 8.005255858360202,
"learning_rate": 4.576724921709264e-06,
"loss": 0.3707,
"step": 10740
},
{
"epoch": 0.5429155829398248,
"grad_norm": 5.061235443799371,
"learning_rate": 4.571673906455198e-06,
"loss": 0.3685,
"step": 10750
},
{
"epoch": 0.5434206206913966,
"grad_norm": 3.7186572910401816,
"learning_rate": 4.566622891201132e-06,
"loss": 0.3793,
"step": 10760
},
{
"epoch": 0.5439256584429686,
"grad_norm": 3.941979325788329,
"learning_rate": 4.561571875947065e-06,
"loss": 0.3723,
"step": 10770
},
{
"epoch": 0.5444306961945405,
"grad_norm": 6.68200128954829,
"learning_rate": 4.556520860692999e-06,
"loss": 0.3699,
"step": 10780
},
{
"epoch": 0.5449357339461125,
"grad_norm": 3.8742413460943204,
"learning_rate": 4.5514698454389335e-06,
"loss": 0.3712,
"step": 10790
},
{
"epoch": 0.5454407716976845,
"grad_norm": 3.5527678739232327,
"learning_rate": 4.546418830184868e-06,
"loss": 0.3797,
"step": 10800
},
{
"epoch": 0.5459458094492563,
"grad_norm": 3.293145624427697,
"learning_rate": 4.541367814930802e-06,
"loss": 0.392,
"step": 10810
},
{
"epoch": 0.5464508472008283,
"grad_norm": 43.93745381251672,
"learning_rate": 4.536316799676736e-06,
"loss": 0.3848,
"step": 10820
},
{
"epoch": 0.5469558849524002,
"grad_norm": 3.1532560256164466,
"learning_rate": 4.531265784422669e-06,
"loss": 0.3833,
"step": 10830
},
{
"epoch": 0.5474609227039722,
"grad_norm": 6.167488921508967,
"learning_rate": 4.526214769168603e-06,
"loss": 0.4006,
"step": 10840
},
{
"epoch": 0.547965960455544,
"grad_norm": 12.229490845691188,
"learning_rate": 4.521163753914537e-06,
"loss": 0.3803,
"step": 10850
},
{
"epoch": 0.548470998207116,
"grad_norm": 4.1950075492065615,
"learning_rate": 4.5161127386604715e-06,
"loss": 0.3842,
"step": 10860
},
{
"epoch": 0.5489760359586879,
"grad_norm": 3.654034176301022,
"learning_rate": 4.511061723406406e-06,
"loss": 0.396,
"step": 10870
},
{
"epoch": 0.5494810737102599,
"grad_norm": 12.04048410661582,
"learning_rate": 4.506010708152339e-06,
"loss": 0.3902,
"step": 10880
},
{
"epoch": 0.5499861114618317,
"grad_norm": 5.763873510396362,
"learning_rate": 4.500959692898273e-06,
"loss": 0.3803,
"step": 10890
},
{
"epoch": 0.5504911492134037,
"grad_norm": 5.97333873812127,
"learning_rate": 4.495908677644207e-06,
"loss": 0.3787,
"step": 10900
},
{
"epoch": 0.5509961869649757,
"grad_norm": 4.51021628683305,
"learning_rate": 4.49085766239014e-06,
"loss": 0.3835,
"step": 10910
},
{
"epoch": 0.5515012247165476,
"grad_norm": 3.902274506866627,
"learning_rate": 4.4858066471360745e-06,
"loss": 0.3762,
"step": 10920
},
{
"epoch": 0.5520062624681195,
"grad_norm": 3.0980828622359335,
"learning_rate": 4.480755631882009e-06,
"loss": 0.3873,
"step": 10930
},
{
"epoch": 0.5525113002196914,
"grad_norm": 2.5033456579398674,
"learning_rate": 4.475704616627943e-06,
"loss": 0.4027,
"step": 10940
},
{
"epoch": 0.5530163379712634,
"grad_norm": 2.888693890103198,
"learning_rate": 4.470653601373877e-06,
"loss": 0.3828,
"step": 10950
},
{
"epoch": 0.5535213757228353,
"grad_norm": 3.5149007219506205,
"learning_rate": 4.46560258611981e-06,
"loss": 0.3797,
"step": 10960
},
{
"epoch": 0.5540264134744072,
"grad_norm": 2.3262448954191894,
"learning_rate": 4.460551570865744e-06,
"loss": 0.385,
"step": 10970
},
{
"epoch": 0.5545314512259791,
"grad_norm": 2.021941174461837,
"learning_rate": 4.4555005556116784e-06,
"loss": 0.3902,
"step": 10980
},
{
"epoch": 0.5550364889775511,
"grad_norm": 3.2136340880143694,
"learning_rate": 4.450449540357612e-06,
"loss": 0.3767,
"step": 10990
},
{
"epoch": 0.555541526729123,
"grad_norm": 6.405644243339184,
"learning_rate": 4.445398525103547e-06,
"loss": 0.394,
"step": 11000
},
{
"epoch": 0.5560465644806949,
"grad_norm": 3.698639548037572,
"learning_rate": 4.44034750984948e-06,
"loss": 0.3733,
"step": 11010
},
{
"epoch": 0.5565516022322669,
"grad_norm": 1.8438864220839366,
"learning_rate": 4.435296494595414e-06,
"loss": 0.387,
"step": 11020
},
{
"epoch": 0.5570566399838388,
"grad_norm": 2.9672016536550654,
"learning_rate": 4.430245479341348e-06,
"loss": 0.3767,
"step": 11030
},
{
"epoch": 0.5575616777354108,
"grad_norm": 9.95951552056093,
"learning_rate": 4.4251944640872815e-06,
"loss": 0.3796,
"step": 11040
},
{
"epoch": 0.5580667154869826,
"grad_norm": 3.844799275005631,
"learning_rate": 4.420143448833216e-06,
"loss": 0.3721,
"step": 11050
},
{
"epoch": 0.5585717532385546,
"grad_norm": 3.606527812089754,
"learning_rate": 4.41509243357915e-06,
"loss": 0.3726,
"step": 11060
},
{
"epoch": 0.5590767909901265,
"grad_norm": 3.99898244357326,
"learning_rate": 4.410041418325084e-06,
"loss": 0.3822,
"step": 11070
},
{
"epoch": 0.5595818287416985,
"grad_norm": 2.365878798228634,
"learning_rate": 4.404990403071018e-06,
"loss": 0.3858,
"step": 11080
},
{
"epoch": 0.5600868664932703,
"grad_norm": 2.0114937465222105,
"learning_rate": 4.399939387816951e-06,
"loss": 0.3619,
"step": 11090
},
{
"epoch": 0.5605919042448423,
"grad_norm": 4.267506041644642,
"learning_rate": 4.394888372562885e-06,
"loss": 0.3959,
"step": 11100
},
{
"epoch": 0.5610969419964142,
"grad_norm": 2.3885028725522663,
"learning_rate": 4.3898373573088195e-06,
"loss": 0.3689,
"step": 11110
},
{
"epoch": 0.5616019797479862,
"grad_norm": 8.297216447663791,
"learning_rate": 4.384786342054753e-06,
"loss": 0.3775,
"step": 11120
},
{
"epoch": 0.562107017499558,
"grad_norm": 2.8469924568130516,
"learning_rate": 4.379735326800687e-06,
"loss": 0.3898,
"step": 11130
},
{
"epoch": 0.56261205525113,
"grad_norm": 6.231958717493552,
"learning_rate": 4.374684311546621e-06,
"loss": 0.3837,
"step": 11140
},
{
"epoch": 0.563117093002702,
"grad_norm": 4.309341792972094,
"learning_rate": 4.369633296292555e-06,
"loss": 0.3828,
"step": 11150
},
{
"epoch": 0.5636221307542739,
"grad_norm": 3.203446767568462,
"learning_rate": 4.364582281038489e-06,
"loss": 0.3903,
"step": 11160
},
{
"epoch": 0.5641271685058458,
"grad_norm": 4.843549458393687,
"learning_rate": 4.359531265784423e-06,
"loss": 0.3855,
"step": 11170
},
{
"epoch": 0.5646322062574177,
"grad_norm": 2.525777084557924,
"learning_rate": 4.354480250530357e-06,
"loss": 0.3898,
"step": 11180
},
{
"epoch": 0.5651372440089897,
"grad_norm": 1.9829345093280084,
"learning_rate": 4.349429235276291e-06,
"loss": 0.3981,
"step": 11190
},
{
"epoch": 0.5656422817605616,
"grad_norm": 2.2195349808444162,
"learning_rate": 4.344378220022225e-06,
"loss": 0.3627,
"step": 11200
},
{
"epoch": 0.5661473195121335,
"grad_norm": 1.8120887020230037,
"learning_rate": 4.339327204768159e-06,
"loss": 0.3785,
"step": 11210
},
{
"epoch": 0.5666523572637054,
"grad_norm": 3.909050155051535,
"learning_rate": 4.334276189514093e-06,
"loss": 0.3893,
"step": 11220
},
{
"epoch": 0.5671573950152774,
"grad_norm": 110.40544319977835,
"learning_rate": 4.3292251742600264e-06,
"loss": 0.3835,
"step": 11230
},
{
"epoch": 0.5676624327668494,
"grad_norm": 2.734463901696715,
"learning_rate": 4.3241741590059606e-06,
"loss": 0.3996,
"step": 11240
},
{
"epoch": 0.5681674705184212,
"grad_norm": 2.044621346060538,
"learning_rate": 4.319123143751895e-06,
"loss": 0.3963,
"step": 11250
},
{
"epoch": 0.5686725082699932,
"grad_norm": 2.6336975727846346,
"learning_rate": 4.314072128497828e-06,
"loss": 0.3862,
"step": 11260
},
{
"epoch": 0.5691775460215651,
"grad_norm": 2.817059832465792,
"learning_rate": 4.309021113243763e-06,
"loss": 0.3944,
"step": 11270
},
{
"epoch": 0.5696825837731371,
"grad_norm": 3.118909901449835,
"learning_rate": 4.303970097989696e-06,
"loss": 0.3746,
"step": 11280
},
{
"epoch": 0.5701876215247089,
"grad_norm": 2.419014264509884,
"learning_rate": 4.29891908273563e-06,
"loss": 0.3756,
"step": 11290
},
{
"epoch": 0.5706926592762809,
"grad_norm": 2.009124011568514,
"learning_rate": 4.2938680674815645e-06,
"loss": 0.381,
"step": 11300
},
{
"epoch": 0.5711976970278528,
"grad_norm": 1.7912588198575379,
"learning_rate": 4.288817052227498e-06,
"loss": 0.3854,
"step": 11310
},
{
"epoch": 0.5717027347794248,
"grad_norm": 2.188948676804761,
"learning_rate": 4.283766036973432e-06,
"loss": 0.4011,
"step": 11320
},
{
"epoch": 0.5722077725309966,
"grad_norm": 5.787714109914013,
"learning_rate": 4.278715021719366e-06,
"loss": 0.3953,
"step": 11330
},
{
"epoch": 0.5727128102825686,
"grad_norm": 2.895115220305218,
"learning_rate": 4.2736640064653e-06,
"loss": 0.3835,
"step": 11340
},
{
"epoch": 0.5732178480341406,
"grad_norm": 3.0581001395729013,
"learning_rate": 4.268612991211234e-06,
"loss": 0.3765,
"step": 11350
},
{
"epoch": 0.5737228857857125,
"grad_norm": 2.8763642234703015,
"learning_rate": 4.2635619759571675e-06,
"loss": 0.3844,
"step": 11360
},
{
"epoch": 0.5742279235372844,
"grad_norm": 2.73350988345317,
"learning_rate": 4.258510960703102e-06,
"loss": 0.3838,
"step": 11370
},
{
"epoch": 0.5747329612888563,
"grad_norm": 2.187379524359724,
"learning_rate": 4.253459945449036e-06,
"loss": 0.3819,
"step": 11380
},
{
"epoch": 0.5752379990404283,
"grad_norm": 2.5046900898952518,
"learning_rate": 4.248408930194969e-06,
"loss": 0.3959,
"step": 11390
},
{
"epoch": 0.5757430367920002,
"grad_norm": 2.058073939600919,
"learning_rate": 4.243357914940903e-06,
"loss": 0.3931,
"step": 11400
},
{
"epoch": 0.5762480745435722,
"grad_norm": 2.15231990076239,
"learning_rate": 4.238306899686837e-06,
"loss": 0.3755,
"step": 11410
},
{
"epoch": 0.576753112295144,
"grad_norm": 2.050683275677417,
"learning_rate": 4.233255884432771e-06,
"loss": 0.3736,
"step": 11420
},
{
"epoch": 0.577258150046716,
"grad_norm": 2.6748692786354624,
"learning_rate": 4.2282048691787055e-06,
"loss": 0.3766,
"step": 11430
},
{
"epoch": 0.577763187798288,
"grad_norm": 3.596459258619974,
"learning_rate": 4.223153853924639e-06,
"loss": 0.3798,
"step": 11440
},
{
"epoch": 0.5782682255498599,
"grad_norm": 3.0261933816595987,
"learning_rate": 4.218102838670573e-06,
"loss": 0.3958,
"step": 11450
},
{
"epoch": 0.5787732633014318,
"grad_norm": 5.146945239564686,
"learning_rate": 4.213051823416507e-06,
"loss": 0.3725,
"step": 11460
},
{
"epoch": 0.5792783010530037,
"grad_norm": 2.4698008594522896,
"learning_rate": 4.20800080816244e-06,
"loss": 0.376,
"step": 11470
},
{
"epoch": 0.5797833388045757,
"grad_norm": 2.107316320060359,
"learning_rate": 4.202949792908375e-06,
"loss": 0.3629,
"step": 11480
},
{
"epoch": 0.5802883765561476,
"grad_norm": 2.0980153420904575,
"learning_rate": 4.197898777654309e-06,
"loss": 0.4014,
"step": 11490
},
{
"epoch": 0.5807934143077195,
"grad_norm": 3.2058729016983136,
"learning_rate": 4.192847762400243e-06,
"loss": 0.3828,
"step": 11500
},
{
"epoch": 0.5812984520592914,
"grad_norm": 2.71859111643498,
"learning_rate": 4.187796747146177e-06,
"loss": 0.3819,
"step": 11510
},
{
"epoch": 0.5818034898108634,
"grad_norm": 2.4534281139850793,
"learning_rate": 4.182745731892111e-06,
"loss": 0.3928,
"step": 11520
},
{
"epoch": 0.5823085275624353,
"grad_norm": 1.8301553116699396,
"learning_rate": 4.177694716638044e-06,
"loss": 0.3654,
"step": 11530
},
{
"epoch": 0.5828135653140072,
"grad_norm": 4.477875878428584,
"learning_rate": 4.172643701383979e-06,
"loss": 0.3748,
"step": 11540
},
{
"epoch": 0.5833186030655791,
"grad_norm": 13.967741045162365,
"learning_rate": 4.1675926861299125e-06,
"loss": 0.3756,
"step": 11550
},
{
"epoch": 0.5838236408171511,
"grad_norm": 3.872741784787074,
"learning_rate": 4.162541670875847e-06,
"loss": 0.3771,
"step": 11560
},
{
"epoch": 0.5843286785687231,
"grad_norm": 4.469317092124396,
"learning_rate": 4.157490655621781e-06,
"loss": 0.374,
"step": 11570
},
{
"epoch": 0.5848337163202949,
"grad_norm": 4.299796050846851,
"learning_rate": 4.152439640367714e-06,
"loss": 0.3721,
"step": 11580
},
{
"epoch": 0.5853387540718669,
"grad_norm": 2.72784485464612,
"learning_rate": 4.147388625113648e-06,
"loss": 0.3752,
"step": 11590
},
{
"epoch": 0.5858437918234388,
"grad_norm": 2.4059258940294574,
"learning_rate": 4.142337609859582e-06,
"loss": 0.3725,
"step": 11600
},
{
"epoch": 0.5863488295750108,
"grad_norm": 3.311734782523416,
"learning_rate": 4.137286594605516e-06,
"loss": 0.3798,
"step": 11610
},
{
"epoch": 0.5868538673265826,
"grad_norm": 3.792159108792209,
"learning_rate": 4.1322355793514505e-06,
"loss": 0.3611,
"step": 11620
},
{
"epoch": 0.5873589050781546,
"grad_norm": 3.7603434596067684,
"learning_rate": 4.127184564097384e-06,
"loss": 0.3807,
"step": 11630
},
{
"epoch": 0.5878639428297265,
"grad_norm": 2.082805585556261,
"learning_rate": 4.122133548843318e-06,
"loss": 0.3713,
"step": 11640
},
{
"epoch": 0.5883689805812985,
"grad_norm": 3.7335861923532847,
"learning_rate": 4.117082533589252e-06,
"loss": 0.37,
"step": 11650
},
{
"epoch": 0.5888740183328703,
"grad_norm": 2.56465711212247,
"learning_rate": 4.112031518335185e-06,
"loss": 0.3902,
"step": 11660
},
{
"epoch": 0.5893790560844423,
"grad_norm": 2.1557448667197843,
"learning_rate": 4.106980503081119e-06,
"loss": 0.3943,
"step": 11670
},
{
"epoch": 0.5898840938360143,
"grad_norm": 4.763988402275346,
"learning_rate": 4.1019294878270535e-06,
"loss": 0.3775,
"step": 11680
},
{
"epoch": 0.5903891315875862,
"grad_norm": 5.278109696777853,
"learning_rate": 4.096878472572988e-06,
"loss": 0.3893,
"step": 11690
},
{
"epoch": 0.5908941693391581,
"grad_norm": 2.6015113581109466,
"learning_rate": 4.091827457318922e-06,
"loss": 0.3746,
"step": 11700
},
{
"epoch": 0.59139920709073,
"grad_norm": 2.851515770660457,
"learning_rate": 4.086776442064855e-06,
"loss": 0.3819,
"step": 11710
},
{
"epoch": 0.591904244842302,
"grad_norm": 2.5969899871366215,
"learning_rate": 4.081725426810789e-06,
"loss": 0.3728,
"step": 11720
},
{
"epoch": 0.5924092825938739,
"grad_norm": 1.9757415738509565,
"learning_rate": 4.076674411556723e-06,
"loss": 0.3724,
"step": 11730
},
{
"epoch": 0.5929143203454458,
"grad_norm": 3.277581750970118,
"learning_rate": 4.071623396302657e-06,
"loss": 0.3688,
"step": 11740
},
{
"epoch": 0.5934193580970177,
"grad_norm": 4.8469992620214155,
"learning_rate": 4.0665723810485916e-06,
"loss": 0.3562,
"step": 11750
},
{
"epoch": 0.5939243958485897,
"grad_norm": 2.323267430569943,
"learning_rate": 4.061521365794525e-06,
"loss": 0.3907,
"step": 11760
},
{
"epoch": 0.5944294336001616,
"grad_norm": 1.9799231963200372,
"learning_rate": 4.056470350540459e-06,
"loss": 0.3894,
"step": 11770
},
{
"epoch": 0.5949344713517335,
"grad_norm": 2.576381259976751,
"learning_rate": 4.051419335286393e-06,
"loss": 0.3709,
"step": 11780
},
{
"epoch": 0.5954395091033055,
"grad_norm": 3.283926951217961,
"learning_rate": 4.046368320032326e-06,
"loss": 0.3938,
"step": 11790
},
{
"epoch": 0.5959445468548774,
"grad_norm": 3.29273998482733,
"learning_rate": 4.0413173047782605e-06,
"loss": 0.3896,
"step": 11800
},
{
"epoch": 0.5964495846064494,
"grad_norm": 2.332643126609973,
"learning_rate": 4.036266289524195e-06,
"loss": 0.3719,
"step": 11810
},
{
"epoch": 0.5969546223580212,
"grad_norm": 3.2558173835297963,
"learning_rate": 4.031215274270129e-06,
"loss": 0.3741,
"step": 11820
},
{
"epoch": 0.5974596601095932,
"grad_norm": 6.835841939224201,
"learning_rate": 4.026164259016063e-06,
"loss": 0.363,
"step": 11830
},
{
"epoch": 0.5979646978611651,
"grad_norm": 2.1914586837180687,
"learning_rate": 4.021113243761996e-06,
"loss": 0.3618,
"step": 11840
},
{
"epoch": 0.5984697356127371,
"grad_norm": 2.360393516962277,
"learning_rate": 4.01606222850793e-06,
"loss": 0.3726,
"step": 11850
},
{
"epoch": 0.5989747733643089,
"grad_norm": 2.46922458613829,
"learning_rate": 4.011011213253864e-06,
"loss": 0.3649,
"step": 11860
},
{
"epoch": 0.5994798111158809,
"grad_norm": 1.886510326818929,
"learning_rate": 4.005960197999798e-06,
"loss": 0.371,
"step": 11870
},
{
"epoch": 0.5999848488674528,
"grad_norm": 4.097224486709572,
"learning_rate": 4.000909182745733e-06,
"loss": 0.3944,
"step": 11880
},
{
"epoch": 0.6004898866190248,
"grad_norm": 7.100119835211903,
"learning_rate": 3.995858167491667e-06,
"loss": 0.3665,
"step": 11890
},
{
"epoch": 0.6009949243705967,
"grad_norm": 1.7863799930508097,
"learning_rate": 3.9908071522376e-06,
"loss": 0.3748,
"step": 11900
},
{
"epoch": 0.6014999621221686,
"grad_norm": 3.0047977958425487,
"learning_rate": 3.985756136983534e-06,
"loss": 0.3869,
"step": 11910
},
{
"epoch": 0.6020049998737406,
"grad_norm": 3.077659231433922,
"learning_rate": 3.980705121729468e-06,
"loss": 0.3618,
"step": 11920
},
{
"epoch": 0.6025100376253125,
"grad_norm": 5.808559896098692,
"learning_rate": 3.9756541064754015e-06,
"loss": 0.3735,
"step": 11930
},
{
"epoch": 0.6030150753768844,
"grad_norm": 2.1197853890011285,
"learning_rate": 3.970603091221336e-06,
"loss": 0.3901,
"step": 11940
},
{
"epoch": 0.6035201131284563,
"grad_norm": 2.0357339213647054,
"learning_rate": 3.96555207596727e-06,
"loss": 0.3768,
"step": 11950
},
{
"epoch": 0.6040251508800283,
"grad_norm": 1.9386614623259042,
"learning_rate": 3.960501060713204e-06,
"loss": 0.371,
"step": 11960
},
{
"epoch": 0.6045301886316002,
"grad_norm": 3.573185030358563,
"learning_rate": 3.955450045459138e-06,
"loss": 0.3748,
"step": 11970
},
{
"epoch": 0.6050352263831721,
"grad_norm": 2.6490276092763225,
"learning_rate": 3.950399030205071e-06,
"loss": 0.3907,
"step": 11980
},
{
"epoch": 0.605540264134744,
"grad_norm": 1.9765526236319297,
"learning_rate": 3.9453480149510054e-06,
"loss": 0.3851,
"step": 11990
},
{
"epoch": 0.606045301886316,
"grad_norm": 2.6148181435511866,
"learning_rate": 3.9402969996969396e-06,
"loss": 0.3829,
"step": 12000
},
{
"epoch": 0.606550339637888,
"grad_norm": 2.0137917601843194,
"learning_rate": 3.935245984442873e-06,
"loss": 0.3876,
"step": 12010
},
{
"epoch": 0.6070553773894599,
"grad_norm": 2.183286213214996,
"learning_rate": 3.930194969188808e-06,
"loss": 0.3852,
"step": 12020
},
{
"epoch": 0.6075604151410318,
"grad_norm": 1.8996262451525328,
"learning_rate": 3.925143953934741e-06,
"loss": 0.3737,
"step": 12030
},
{
"epoch": 0.6080654528926037,
"grad_norm": 2.1458215777612293,
"learning_rate": 3.920092938680675e-06,
"loss": 0.3705,
"step": 12040
},
{
"epoch": 0.6085704906441757,
"grad_norm": 1.611205289782356,
"learning_rate": 3.915041923426609e-06,
"loss": 0.3774,
"step": 12050
},
{
"epoch": 0.6090755283957476,
"grad_norm": 1.8784320811926083,
"learning_rate": 3.909990908172543e-06,
"loss": 0.3705,
"step": 12060
},
{
"epoch": 0.6095805661473195,
"grad_norm": 2.0214114750705283,
"learning_rate": 3.904939892918477e-06,
"loss": 0.3579,
"step": 12070
},
{
"epoch": 0.6100856038988914,
"grad_norm": 2.52455375915045,
"learning_rate": 3.899888877664411e-06,
"loss": 0.3757,
"step": 12080
},
{
"epoch": 0.6105906416504634,
"grad_norm": 2.7488427549157963,
"learning_rate": 3.894837862410345e-06,
"loss": 0.3753,
"step": 12090
},
{
"epoch": 0.6110956794020354,
"grad_norm": 2.366101694404488,
"learning_rate": 3.889786847156279e-06,
"loss": 0.3583,
"step": 12100
},
{
"epoch": 0.6116007171536072,
"grad_norm": 4.526955084775627,
"learning_rate": 3.884735831902212e-06,
"loss": 0.373,
"step": 12110
},
{
"epoch": 0.6121057549051792,
"grad_norm": 1.6632942025151354,
"learning_rate": 3.8796848166481465e-06,
"loss": 0.3797,
"step": 12120
},
{
"epoch": 0.6126107926567511,
"grad_norm": 2.122544662038796,
"learning_rate": 3.874633801394081e-06,
"loss": 0.3763,
"step": 12130
},
{
"epoch": 0.6131158304083231,
"grad_norm": 2.136310843503549,
"learning_rate": 3.869582786140014e-06,
"loss": 0.3822,
"step": 12140
},
{
"epoch": 0.6136208681598949,
"grad_norm": 1.701221883208955,
"learning_rate": 3.864531770885949e-06,
"loss": 0.3636,
"step": 12150
},
{
"epoch": 0.6141259059114669,
"grad_norm": 1.9524548661829169,
"learning_rate": 3.859480755631882e-06,
"loss": 0.3913,
"step": 12160
},
{
"epoch": 0.6146309436630388,
"grad_norm": 2.58084193291891,
"learning_rate": 3.854429740377816e-06,
"loss": 0.3647,
"step": 12170
},
{
"epoch": 0.6151359814146108,
"grad_norm": 2.6683083927745552,
"learning_rate": 3.84937872512375e-06,
"loss": 0.3834,
"step": 12180
},
{
"epoch": 0.6156410191661826,
"grad_norm": 5.415821035201854,
"learning_rate": 3.844327709869684e-06,
"loss": 0.3679,
"step": 12190
},
{
"epoch": 0.6161460569177546,
"grad_norm": 1.7693494201780369,
"learning_rate": 3.839276694615618e-06,
"loss": 0.3675,
"step": 12200
},
{
"epoch": 0.6166510946693265,
"grad_norm": 2.025826626656601,
"learning_rate": 3.834225679361552e-06,
"loss": 0.3807,
"step": 12210
},
{
"epoch": 0.6171561324208985,
"grad_norm": 2.409768770452221,
"learning_rate": 3.829174664107486e-06,
"loss": 0.3718,
"step": 12220
},
{
"epoch": 0.6176611701724704,
"grad_norm": 2.81111834040087,
"learning_rate": 3.82412364885342e-06,
"loss": 0.3809,
"step": 12230
},
{
"epoch": 0.6181662079240423,
"grad_norm": 2.261752998915838,
"learning_rate": 3.8190726335993535e-06,
"loss": 0.392,
"step": 12240
},
{
"epoch": 0.6186712456756143,
"grad_norm": 4.1776939292020066,
"learning_rate": 3.8140216183452876e-06,
"loss": 0.3738,
"step": 12250
},
{
"epoch": 0.6191762834271862,
"grad_norm": 2.1437198828433197,
"learning_rate": 3.8089706030912217e-06,
"loss": 0.3719,
"step": 12260
},
{
"epoch": 0.6196813211787581,
"grad_norm": 3.075348229616783,
"learning_rate": 3.8039195878371554e-06,
"loss": 0.3706,
"step": 12270
},
{
"epoch": 0.62018635893033,
"grad_norm": 2.0146213511322566,
"learning_rate": 3.798868572583089e-06,
"loss": 0.3897,
"step": 12280
},
{
"epoch": 0.620691396681902,
"grad_norm": 3.0389012573946625,
"learning_rate": 3.7938175573290236e-06,
"loss": 0.3744,
"step": 12290
},
{
"epoch": 0.6211964344334739,
"grad_norm": 3.500847066826284,
"learning_rate": 3.7887665420749573e-06,
"loss": 0.3645,
"step": 12300
},
{
"epoch": 0.6217014721850458,
"grad_norm": 4.562441605553007,
"learning_rate": 3.7837155268208915e-06,
"loss": 0.3738,
"step": 12310
},
{
"epoch": 0.6222065099366177,
"grad_norm": 2.8281857847564957,
"learning_rate": 3.778664511566825e-06,
"loss": 0.3645,
"step": 12320
},
{
"epoch": 0.6227115476881897,
"grad_norm": 4.569461603791169,
"learning_rate": 3.7736134963127593e-06,
"loss": 0.3525,
"step": 12330
},
{
"epoch": 0.6232165854397617,
"grad_norm": 3.3932139149292198,
"learning_rate": 3.768562481058693e-06,
"loss": 0.3686,
"step": 12340
},
{
"epoch": 0.6237216231913335,
"grad_norm": 3.208108830968236,
"learning_rate": 3.7635114658046267e-06,
"loss": 0.3675,
"step": 12350
},
{
"epoch": 0.6242266609429055,
"grad_norm": 3.325030777041727,
"learning_rate": 3.7584604505505612e-06,
"loss": 0.3827,
"step": 12360
},
{
"epoch": 0.6247316986944774,
"grad_norm": 3.001621471243721,
"learning_rate": 3.753409435296495e-06,
"loss": 0.3673,
"step": 12370
},
{
"epoch": 0.6252367364460494,
"grad_norm": 2.7314438383701263,
"learning_rate": 3.748358420042429e-06,
"loss": 0.3831,
"step": 12380
},
{
"epoch": 0.6257417741976212,
"grad_norm": 4.37429312079951,
"learning_rate": 3.7433074047883628e-06,
"loss": 0.3665,
"step": 12390
},
{
"epoch": 0.6262468119491932,
"grad_norm": 3.9619988373008077,
"learning_rate": 3.7382563895342965e-06,
"loss": 0.3747,
"step": 12400
},
{
"epoch": 0.6267518497007651,
"grad_norm": 4.80089629937948,
"learning_rate": 3.7332053742802306e-06,
"loss": 0.3777,
"step": 12410
},
{
"epoch": 0.6272568874523371,
"grad_norm": 3.8675160835398485,
"learning_rate": 3.7281543590261643e-06,
"loss": 0.3644,
"step": 12420
},
{
"epoch": 0.6277619252039089,
"grad_norm": 2.235819960025121,
"learning_rate": 3.723103343772099e-06,
"loss": 0.37,
"step": 12430
},
{
"epoch": 0.6282669629554809,
"grad_norm": 2.1802012458708164,
"learning_rate": 3.7180523285180325e-06,
"loss": 0.3783,
"step": 12440
},
{
"epoch": 0.6287720007070529,
"grad_norm": 5.254753451223298,
"learning_rate": 3.7130013132639662e-06,
"loss": 0.3778,
"step": 12450
},
{
"epoch": 0.6292770384586248,
"grad_norm": 3.0569298328433794,
"learning_rate": 3.7079502980099004e-06,
"loss": 0.3708,
"step": 12460
},
{
"epoch": 0.6297820762101967,
"grad_norm": 3.453756701658818,
"learning_rate": 3.702899282755834e-06,
"loss": 0.3614,
"step": 12470
},
{
"epoch": 0.6302871139617686,
"grad_norm": 3.0406418341037105,
"learning_rate": 3.6978482675017678e-06,
"loss": 0.3605,
"step": 12480
},
{
"epoch": 0.6307921517133406,
"grad_norm": 5.948695461444471,
"learning_rate": 3.6927972522477023e-06,
"loss": 0.3769,
"step": 12490
},
{
"epoch": 0.6312971894649125,
"grad_norm": 3.6565401647116986,
"learning_rate": 3.687746236993636e-06,
"loss": 0.3598,
"step": 12500
},
{
"epoch": 0.6318022272164844,
"grad_norm": 2.386153355802283,
"learning_rate": 3.68269522173957e-06,
"loss": 0.3668,
"step": 12510
},
{
"epoch": 0.6323072649680563,
"grad_norm": 5.317495105141212,
"learning_rate": 3.677644206485504e-06,
"loss": 0.3678,
"step": 12520
},
{
"epoch": 0.6328123027196283,
"grad_norm": 6.576082498743726,
"learning_rate": 3.672593191231438e-06,
"loss": 0.3668,
"step": 12530
},
{
"epoch": 0.6333173404712003,
"grad_norm": 6.268613985781669,
"learning_rate": 3.6675421759773717e-06,
"loss": 0.3635,
"step": 12540
},
{
"epoch": 0.6338223782227721,
"grad_norm": 5.931534215833603,
"learning_rate": 3.6624911607233054e-06,
"loss": 0.3836,
"step": 12550
},
{
"epoch": 0.634327415974344,
"grad_norm": 3.414474702810524,
"learning_rate": 3.65744014546924e-06,
"loss": 0.3638,
"step": 12560
},
{
"epoch": 0.634832453725916,
"grad_norm": 2.5786921065802906,
"learning_rate": 3.6523891302151736e-06,
"loss": 0.3792,
"step": 12570
},
{
"epoch": 0.635337491477488,
"grad_norm": 3.3296465725491973,
"learning_rate": 3.6473381149611077e-06,
"loss": 0.3726,
"step": 12580
},
{
"epoch": 0.6358425292290599,
"grad_norm": 5.112120958135022,
"learning_rate": 3.6422870997070414e-06,
"loss": 0.3608,
"step": 12590
},
{
"epoch": 0.6363475669806318,
"grad_norm": 4.712824755681933,
"learning_rate": 3.637236084452975e-06,
"loss": 0.361,
"step": 12600
},
{
"epoch": 0.6368526047322037,
"grad_norm": 2.557395610295011,
"learning_rate": 3.6321850691989093e-06,
"loss": 0.3646,
"step": 12610
},
{
"epoch": 0.6373576424837757,
"grad_norm": 2.4265764249895962,
"learning_rate": 3.627134053944843e-06,
"loss": 0.3673,
"step": 12620
},
{
"epoch": 0.6378626802353476,
"grad_norm": 9.111738338010397,
"learning_rate": 3.6220830386907775e-06,
"loss": 0.3748,
"step": 12630
},
{
"epoch": 0.6383677179869195,
"grad_norm": 13.21927265302919,
"learning_rate": 3.617032023436711e-06,
"loss": 0.3736,
"step": 12640
},
{
"epoch": 0.6388727557384914,
"grad_norm": 3.433344571738311,
"learning_rate": 3.611981008182645e-06,
"loss": 0.3804,
"step": 12650
},
{
"epoch": 0.6393777934900634,
"grad_norm": 3.3546640582148393,
"learning_rate": 3.606929992928579e-06,
"loss": 0.3512,
"step": 12660
},
{
"epoch": 0.6398828312416354,
"grad_norm": 2.665395597695038,
"learning_rate": 3.6018789776745127e-06,
"loss": 0.3777,
"step": 12670
},
{
"epoch": 0.6403878689932072,
"grad_norm": 3.7554388445793343,
"learning_rate": 3.5968279624204464e-06,
"loss": 0.3745,
"step": 12680
},
{
"epoch": 0.6408929067447792,
"grad_norm": 5.3470856792032695,
"learning_rate": 3.5917769471663805e-06,
"loss": 0.3712,
"step": 12690
},
{
"epoch": 0.6413979444963511,
"grad_norm": 3.090410688880816,
"learning_rate": 3.586725931912315e-06,
"loss": 0.3621,
"step": 12700
},
{
"epoch": 0.6419029822479231,
"grad_norm": 2.664925787957406,
"learning_rate": 3.581674916658249e-06,
"loss": 0.3543,
"step": 12710
},
{
"epoch": 0.6424080199994949,
"grad_norm": 2.449259146215423,
"learning_rate": 3.5766239014041825e-06,
"loss": 0.3637,
"step": 12720
},
{
"epoch": 0.6429130577510669,
"grad_norm": 3.159318328572562,
"learning_rate": 3.5715728861501166e-06,
"loss": 0.3793,
"step": 12730
},
{
"epoch": 0.6434180955026388,
"grad_norm": 2.7133150428468835,
"learning_rate": 3.5665218708960503e-06,
"loss": 0.3798,
"step": 12740
},
{
"epoch": 0.6439231332542108,
"grad_norm": 3.374200452364885,
"learning_rate": 3.561470855641984e-06,
"loss": 0.3624,
"step": 12750
},
{
"epoch": 0.6444281710057826,
"grad_norm": 2.9622292324304365,
"learning_rate": 3.556419840387918e-06,
"loss": 0.3553,
"step": 12760
},
{
"epoch": 0.6449332087573546,
"grad_norm": 2.609047480998997,
"learning_rate": 3.5513688251338523e-06,
"loss": 0.3721,
"step": 12770
},
{
"epoch": 0.6454382465089266,
"grad_norm": 5.058720798344808,
"learning_rate": 3.5463178098797864e-06,
"loss": 0.3486,
"step": 12780
},
{
"epoch": 0.6459432842604985,
"grad_norm": 2.2596797012742558,
"learning_rate": 3.54126679462572e-06,
"loss": 0.3696,
"step": 12790
},
{
"epoch": 0.6464483220120704,
"grad_norm": 2.5737345357996815,
"learning_rate": 3.536215779371654e-06,
"loss": 0.3534,
"step": 12800
},
{
"epoch": 0.6469533597636423,
"grad_norm": 3.9591498502643305,
"learning_rate": 3.531164764117588e-06,
"loss": 0.3635,
"step": 12810
},
{
"epoch": 0.6474583975152143,
"grad_norm": 4.5793714339332805,
"learning_rate": 3.5261137488635216e-06,
"loss": 0.3633,
"step": 12820
},
{
"epoch": 0.6479634352667862,
"grad_norm": 2.4696926794812173,
"learning_rate": 3.521062733609456e-06,
"loss": 0.3681,
"step": 12830
},
{
"epoch": 0.6484684730183581,
"grad_norm": 10.025961417450299,
"learning_rate": 3.51601171835539e-06,
"loss": 0.3645,
"step": 12840
},
{
"epoch": 0.64897351076993,
"grad_norm": 2.2863708800017934,
"learning_rate": 3.5109607031013236e-06,
"loss": 0.3686,
"step": 12850
},
{
"epoch": 0.649478548521502,
"grad_norm": 1.9177686162549965,
"learning_rate": 3.5059096878472577e-06,
"loss": 0.3673,
"step": 12860
},
{
"epoch": 0.649983586273074,
"grad_norm": 5.445065512861327,
"learning_rate": 3.5008586725931914e-06,
"loss": 0.3801,
"step": 12870
},
{
"epoch": 0.6504886240246458,
"grad_norm": 2.6676713316438336,
"learning_rate": 3.4958076573391255e-06,
"loss": 0.3779,
"step": 12880
},
{
"epoch": 0.6509936617762178,
"grad_norm": 3.8611942525569374,
"learning_rate": 3.490756642085059e-06,
"loss": 0.375,
"step": 12890
},
{
"epoch": 0.6514986995277897,
"grad_norm": 3.774007520734188,
"learning_rate": 3.4857056268309938e-06,
"loss": 0.3768,
"step": 12900
},
{
"epoch": 0.6520037372793617,
"grad_norm": 3.200552688232275,
"learning_rate": 3.4806546115769275e-06,
"loss": 0.3831,
"step": 12910
},
{
"epoch": 0.6525087750309335,
"grad_norm": 1.9098229490724261,
"learning_rate": 3.475603596322861e-06,
"loss": 0.379,
"step": 12920
},
{
"epoch": 0.6530138127825055,
"grad_norm": 3.943477029717605,
"learning_rate": 3.4705525810687953e-06,
"loss": 0.3712,
"step": 12930
},
{
"epoch": 0.6535188505340774,
"grad_norm": 4.9375810935188165,
"learning_rate": 3.465501565814729e-06,
"loss": 0.3727,
"step": 12940
},
{
"epoch": 0.6540238882856494,
"grad_norm": 5.033670107006043,
"learning_rate": 3.4604505505606627e-06,
"loss": 0.3663,
"step": 12950
},
{
"epoch": 0.6545289260372212,
"grad_norm": 3.064561646751532,
"learning_rate": 3.455399535306597e-06,
"loss": 0.3777,
"step": 12960
},
{
"epoch": 0.6550339637887932,
"grad_norm": 2.3278833688543594,
"learning_rate": 3.450348520052531e-06,
"loss": 0.3539,
"step": 12970
},
{
"epoch": 0.6555390015403652,
"grad_norm": 2.194839952891626,
"learning_rate": 3.445297504798465e-06,
"loss": 0.3623,
"step": 12980
},
{
"epoch": 0.6560440392919371,
"grad_norm": 4.478106964413342,
"learning_rate": 3.4402464895443987e-06,
"loss": 0.3717,
"step": 12990
},
{
"epoch": 0.656549077043509,
"grad_norm": 3.862763827323303,
"learning_rate": 3.4351954742903324e-06,
"loss": 0.3497,
"step": 13000
},
{
"epoch": 0.6570541147950809,
"grad_norm": 6.994733245951842,
"learning_rate": 3.4301444590362666e-06,
"loss": 0.3814,
"step": 13010
},
{
"epoch": 0.6575591525466529,
"grad_norm": 5.798821632935566,
"learning_rate": 3.4250934437822003e-06,
"loss": 0.3632,
"step": 13020
},
{
"epoch": 0.6580641902982248,
"grad_norm": 4.04056214450326,
"learning_rate": 3.420042428528134e-06,
"loss": 0.3848,
"step": 13030
},
{
"epoch": 0.6585692280497967,
"grad_norm": 2.567673036911413,
"learning_rate": 3.4149914132740685e-06,
"loss": 0.3613,
"step": 13040
},
{
"epoch": 0.6590742658013686,
"grad_norm": 4.569693836912058,
"learning_rate": 3.4099403980200022e-06,
"loss": 0.3689,
"step": 13050
},
{
"epoch": 0.6595793035529406,
"grad_norm": 7.024167809375034,
"learning_rate": 3.4048893827659363e-06,
"loss": 0.3752,
"step": 13060
},
{
"epoch": 0.6600843413045125,
"grad_norm": 15.307342736787076,
"learning_rate": 3.39983836751187e-06,
"loss": 0.3563,
"step": 13070
},
{
"epoch": 0.6605893790560844,
"grad_norm": 5.154751774553765,
"learning_rate": 3.394787352257804e-06,
"loss": 0.374,
"step": 13080
},
{
"epoch": 0.6610944168076563,
"grad_norm": 6.4670558523466894,
"learning_rate": 3.389736337003738e-06,
"loss": 0.3662,
"step": 13090
},
{
"epoch": 0.6615994545592283,
"grad_norm": 4.631202509755095,
"learning_rate": 3.3846853217496724e-06,
"loss": 0.3719,
"step": 13100
},
{
"epoch": 0.6621044923108003,
"grad_norm": 2.638649824542641,
"learning_rate": 3.379634306495606e-06,
"loss": 0.3649,
"step": 13110
},
{
"epoch": 0.6626095300623721,
"grad_norm": 4.93421678581268,
"learning_rate": 3.37458329124154e-06,
"loss": 0.3583,
"step": 13120
},
{
"epoch": 0.6631145678139441,
"grad_norm": 8.596147858750863,
"learning_rate": 3.369532275987474e-06,
"loss": 0.3757,
"step": 13130
},
{
"epoch": 0.663619605565516,
"grad_norm": 2.389686125334587,
"learning_rate": 3.3644812607334076e-06,
"loss": 0.3665,
"step": 13140
},
{
"epoch": 0.664124643317088,
"grad_norm": 7.517201643559354,
"learning_rate": 3.3594302454793413e-06,
"loss": 0.3772,
"step": 13150
},
{
"epoch": 0.6646296810686598,
"grad_norm": 3.2962729985486843,
"learning_rate": 3.3543792302252755e-06,
"loss": 0.3662,
"step": 13160
},
{
"epoch": 0.6651347188202318,
"grad_norm": 2.696587145771749,
"learning_rate": 3.3493282149712096e-06,
"loss": 0.3784,
"step": 13170
},
{
"epoch": 0.6656397565718037,
"grad_norm": 9.007208510801052,
"learning_rate": 3.3442771997171437e-06,
"loss": 0.3754,
"step": 13180
},
{
"epoch": 0.6661447943233757,
"grad_norm": 3.6378171433319384,
"learning_rate": 3.3392261844630774e-06,
"loss": 0.36,
"step": 13190
},
{
"epoch": 0.6666498320749477,
"grad_norm": 5.542336432011706,
"learning_rate": 3.334175169209011e-06,
"loss": 0.3594,
"step": 13200
},
{
"epoch": 0.6671548698265195,
"grad_norm": 3.865761953639028,
"learning_rate": 3.3291241539549452e-06,
"loss": 0.3627,
"step": 13210
},
{
"epoch": 0.6676599075780915,
"grad_norm": 8.688331722475619,
"learning_rate": 3.324073138700879e-06,
"loss": 0.3583,
"step": 13220
},
{
"epoch": 0.6681649453296634,
"grad_norm": 7.506416675568487,
"learning_rate": 3.319022123446813e-06,
"loss": 0.389,
"step": 13230
},
{
"epoch": 0.6686699830812354,
"grad_norm": 5.431932592827668,
"learning_rate": 3.313971108192747e-06,
"loss": 0.3544,
"step": 13240
},
{
"epoch": 0.6691750208328072,
"grad_norm": 5.709981208456748,
"learning_rate": 3.3089200929386813e-06,
"loss": 0.3872,
"step": 13250
},
{
"epoch": 0.6696800585843792,
"grad_norm": 3.72615004588395,
"learning_rate": 3.303869077684615e-06,
"loss": 0.3694,
"step": 13260
},
{
"epoch": 0.6701850963359511,
"grad_norm": 4.672631711696132,
"learning_rate": 3.2988180624305487e-06,
"loss": 0.3552,
"step": 13270
},
{
"epoch": 0.6706901340875231,
"grad_norm": 4.166097239004073,
"learning_rate": 3.293767047176483e-06,
"loss": 0.3705,
"step": 13280
},
{
"epoch": 0.6711951718390949,
"grad_norm": 9.195919275508842,
"learning_rate": 3.2887160319224165e-06,
"loss": 0.3688,
"step": 13290
},
{
"epoch": 0.6717002095906669,
"grad_norm": 6.6636791033610185,
"learning_rate": 3.2836650166683502e-06,
"loss": 0.3621,
"step": 13300
},
{
"epoch": 0.6722052473422389,
"grad_norm": 5.256124870002979,
"learning_rate": 3.2786140014142848e-06,
"loss": 0.3611,
"step": 13310
},
{
"epoch": 0.6727102850938108,
"grad_norm": 7.691842196761616,
"learning_rate": 3.2735629861602185e-06,
"loss": 0.3634,
"step": 13320
},
{
"epoch": 0.6732153228453827,
"grad_norm": 4.350658776477639,
"learning_rate": 3.2685119709061526e-06,
"loss": 0.3588,
"step": 13330
},
{
"epoch": 0.6737203605969546,
"grad_norm": 11.623446284645299,
"learning_rate": 3.2634609556520863e-06,
"loss": 0.371,
"step": 13340
},
{
"epoch": 0.6742253983485266,
"grad_norm": 3.656024443945701,
"learning_rate": 3.25840994039802e-06,
"loss": 0.3496,
"step": 13350
},
{
"epoch": 0.6747304361000985,
"grad_norm": 4.747809479838541,
"learning_rate": 3.253358925143954e-06,
"loss": 0.369,
"step": 13360
},
{
"epoch": 0.6752354738516704,
"grad_norm": 3.045504794009347,
"learning_rate": 3.248307909889888e-06,
"loss": 0.3675,
"step": 13370
},
{
"epoch": 0.6757405116032423,
"grad_norm": 4.5225509793672405,
"learning_rate": 3.2432568946358224e-06,
"loss": 0.364,
"step": 13380
},
{
"epoch": 0.6762455493548143,
"grad_norm": 6.376859379432204,
"learning_rate": 3.238205879381756e-06,
"loss": 0.3538,
"step": 13390
},
{
"epoch": 0.6767505871063862,
"grad_norm": 4.459340178610681,
"learning_rate": 3.2331548641276898e-06,
"loss": 0.3549,
"step": 13400
},
{
"epoch": 0.6772556248579581,
"grad_norm": 4.009290064517063,
"learning_rate": 3.228103848873624e-06,
"loss": 0.3635,
"step": 13410
},
{
"epoch": 0.67776066260953,
"grad_norm": 4.352335627454303,
"learning_rate": 3.2230528336195576e-06,
"loss": 0.3642,
"step": 13420
},
{
"epoch": 0.678265700361102,
"grad_norm": 8.538415021418777,
"learning_rate": 3.2180018183654917e-06,
"loss": 0.3638,
"step": 13430
},
{
"epoch": 0.678770738112674,
"grad_norm": 9.929796560384965,
"learning_rate": 3.212950803111426e-06,
"loss": 0.3733,
"step": 13440
},
{
"epoch": 0.6792757758642458,
"grad_norm": 10.24812538481589,
"learning_rate": 3.20789978785736e-06,
"loss": 0.3733,
"step": 13450
},
{
"epoch": 0.6797808136158178,
"grad_norm": 4.911901994444236,
"learning_rate": 3.2028487726032937e-06,
"loss": 0.3546,
"step": 13460
},
{
"epoch": 0.6802858513673897,
"grad_norm": 9.459161115684884,
"learning_rate": 3.1977977573492274e-06,
"loss": 0.3604,
"step": 13470
},
{
"epoch": 0.6807908891189617,
"grad_norm": 58.44069759388177,
"learning_rate": 3.1927467420951615e-06,
"loss": 0.3682,
"step": 13480
},
{
"epoch": 0.6812959268705335,
"grad_norm": 5.773542468847822,
"learning_rate": 3.187695726841095e-06,
"loss": 0.3654,
"step": 13490
},
{
"epoch": 0.6818009646221055,
"grad_norm": 7.236039686643189,
"learning_rate": 3.182644711587029e-06,
"loss": 0.3581,
"step": 13500
},
{
"epoch": 0.6823060023736774,
"grad_norm": 8.164527538084936,
"learning_rate": 3.1775936963329634e-06,
"loss": 0.3719,
"step": 13510
},
{
"epoch": 0.6828110401252494,
"grad_norm": 8.750460822533052,
"learning_rate": 3.172542681078897e-06,
"loss": 0.3685,
"step": 13520
},
{
"epoch": 0.6833160778768212,
"grad_norm": 8.31780664281577,
"learning_rate": 3.1674916658248313e-06,
"loss": 0.3738,
"step": 13530
},
{
"epoch": 0.6838211156283932,
"grad_norm": 39.92814930883877,
"learning_rate": 3.162440650570765e-06,
"loss": 0.3561,
"step": 13540
},
{
"epoch": 0.6843261533799652,
"grad_norm": 7.250183885140758,
"learning_rate": 3.1573896353166987e-06,
"loss": 0.3763,
"step": 13550
},
{
"epoch": 0.6848311911315371,
"grad_norm": 7.545512249578723,
"learning_rate": 3.1523386200626328e-06,
"loss": 0.3619,
"step": 13560
},
{
"epoch": 0.685336228883109,
"grad_norm": 13.136078405966021,
"learning_rate": 3.1472876048085665e-06,
"loss": 0.3506,
"step": 13570
},
{
"epoch": 0.6858412666346809,
"grad_norm": 12.847273364738184,
"learning_rate": 3.142236589554501e-06,
"loss": 0.3718,
"step": 13580
},
{
"epoch": 0.6863463043862529,
"grad_norm": 5.131472126489263,
"learning_rate": 3.1371855743004347e-06,
"loss": 0.3573,
"step": 13590
},
{
"epoch": 0.6868513421378248,
"grad_norm": 8.846171149150827,
"learning_rate": 3.132134559046369e-06,
"loss": 0.3645,
"step": 13600
},
{
"epoch": 0.6873563798893967,
"grad_norm": 7.08666632645302,
"learning_rate": 3.1270835437923026e-06,
"loss": 0.3863,
"step": 13610
},
{
"epoch": 0.6878614176409686,
"grad_norm": 9.125152763348956,
"learning_rate": 3.1220325285382363e-06,
"loss": 0.3612,
"step": 13620
},
{
"epoch": 0.6883664553925406,
"grad_norm": 22.795792171268904,
"learning_rate": 3.1169815132841704e-06,
"loss": 0.3514,
"step": 13630
},
{
"epoch": 0.6888714931441126,
"grad_norm": 13.972423486929486,
"learning_rate": 3.111930498030104e-06,
"loss": 0.343,
"step": 13640
},
{
"epoch": 0.6893765308956844,
"grad_norm": 7.146064054239514,
"learning_rate": 3.1068794827760386e-06,
"loss": 0.3634,
"step": 13650
},
{
"epoch": 0.6898815686472564,
"grad_norm": 12.633883151844028,
"learning_rate": 3.1018284675219723e-06,
"loss": 0.3622,
"step": 13660
},
{
"epoch": 0.6903866063988283,
"grad_norm": 10.567266022971053,
"learning_rate": 3.096777452267906e-06,
"loss": 0.3775,
"step": 13670
},
{
"epoch": 0.6908916441504003,
"grad_norm": 10.665161639920084,
"learning_rate": 3.09172643701384e-06,
"loss": 0.3473,
"step": 13680
},
{
"epoch": 0.6913966819019721,
"grad_norm": 13.421618433035983,
"learning_rate": 3.086675421759774e-06,
"loss": 0.3566,
"step": 13690
},
{
"epoch": 0.6919017196535441,
"grad_norm": 19.50453127590542,
"learning_rate": 3.0816244065057076e-06,
"loss": 0.3524,
"step": 13700
},
{
"epoch": 0.692406757405116,
"grad_norm": 11.56456458693372,
"learning_rate": 3.0765733912516417e-06,
"loss": 0.3489,
"step": 13710
},
{
"epoch": 0.692911795156688,
"grad_norm": 9.379959657537876,
"learning_rate": 3.071522375997576e-06,
"loss": 0.3736,
"step": 13720
},
{
"epoch": 0.6934168329082598,
"grad_norm": 22.486832278224945,
"learning_rate": 3.06647136074351e-06,
"loss": 0.3586,
"step": 13730
},
{
"epoch": 0.6939218706598318,
"grad_norm": 10.367615306255258,
"learning_rate": 3.0614203454894436e-06,
"loss": 0.3598,
"step": 13740
},
{
"epoch": 0.6944269084114038,
"grad_norm": 13.659141357148844,
"learning_rate": 3.0563693302353773e-06,
"loss": 0.3559,
"step": 13750
},
{
"epoch": 0.6949319461629757,
"grad_norm": 28.090360883092867,
"learning_rate": 3.0513183149813114e-06,
"loss": 0.3572,
"step": 13760
},
{
"epoch": 0.6954369839145476,
"grad_norm": 18.453465716800892,
"learning_rate": 3.046267299727245e-06,
"loss": 0.3724,
"step": 13770
},
{
"epoch": 0.6959420216661195,
"grad_norm": 29.69383075969026,
"learning_rate": 3.0412162844731797e-06,
"loss": 0.3715,
"step": 13780
},
{
"epoch": 0.6964470594176915,
"grad_norm": 5.487134314962381,
"learning_rate": 3.0361652692191134e-06,
"loss": 0.3498,
"step": 13790
},
{
"epoch": 0.6969520971692634,
"grad_norm": 5.672011269458868,
"learning_rate": 3.0311142539650475e-06,
"loss": 0.3681,
"step": 13800
},
{
"epoch": 0.6974571349208354,
"grad_norm": 4.839559671681653,
"learning_rate": 3.0260632387109812e-06,
"loss": 0.3623,
"step": 13810
},
{
"epoch": 0.6979621726724072,
"grad_norm": 4.235787248204053,
"learning_rate": 3.021012223456915e-06,
"loss": 0.3607,
"step": 13820
},
{
"epoch": 0.6984672104239792,
"grad_norm": 4.608668652433986,
"learning_rate": 3.015961208202849e-06,
"loss": 0.3477,
"step": 13830
},
{
"epoch": 0.6989722481755511,
"grad_norm": 4.704716321355951,
"learning_rate": 3.0109101929487827e-06,
"loss": 0.3685,
"step": 13840
},
{
"epoch": 0.6994772859271231,
"grad_norm": 6.503590629238218,
"learning_rate": 3.0058591776947173e-06,
"loss": 0.3721,
"step": 13850
},
{
"epoch": 0.699982323678695,
"grad_norm": 4.428750198778166,
"learning_rate": 3.000808162440651e-06,
"loss": 0.3712,
"step": 13860
},
{
"epoch": 0.7004873614302669,
"grad_norm": 8.223361489268525,
"learning_rate": 2.9957571471865847e-06,
"loss": 0.3615,
"step": 13870
},
{
"epoch": 0.7009923991818389,
"grad_norm": 3.8707110375759908,
"learning_rate": 2.990706131932519e-06,
"loss": 0.368,
"step": 13880
},
{
"epoch": 0.7014974369334108,
"grad_norm": 3.3033672047102853,
"learning_rate": 2.9856551166784525e-06,
"loss": 0.3662,
"step": 13890
},
{
"epoch": 0.7020024746849827,
"grad_norm": 5.8378570543101045,
"learning_rate": 2.9806041014243862e-06,
"loss": 0.3766,
"step": 13900
},
{
"epoch": 0.7025075124365546,
"grad_norm": 3.6651748652891425,
"learning_rate": 2.9755530861703203e-06,
"loss": 0.3661,
"step": 13910
},
{
"epoch": 0.7030125501881266,
"grad_norm": 5.784855966194933,
"learning_rate": 2.9705020709162545e-06,
"loss": 0.367,
"step": 13920
},
{
"epoch": 0.7035175879396985,
"grad_norm": 7.312016198568584,
"learning_rate": 2.9654510556621886e-06,
"loss": 0.3672,
"step": 13930
},
{
"epoch": 0.7040226256912704,
"grad_norm": 4.875104979534643,
"learning_rate": 2.9604000404081223e-06,
"loss": 0.3557,
"step": 13940
},
{
"epoch": 0.7045276634428423,
"grad_norm": 64.55120292569038,
"learning_rate": 2.955349025154056e-06,
"loss": 0.357,
"step": 13950
},
{
"epoch": 0.7050327011944143,
"grad_norm": 4.765695435475721,
"learning_rate": 2.95029800989999e-06,
"loss": 0.3625,
"step": 13960
},
{
"epoch": 0.7055377389459863,
"grad_norm": 9.706426400727505,
"learning_rate": 2.945246994645924e-06,
"loss": 0.3692,
"step": 13970
},
{
"epoch": 0.7060427766975581,
"grad_norm": 5.7703156609987785,
"learning_rate": 2.940195979391858e-06,
"loss": 0.3496,
"step": 13980
},
{
"epoch": 0.7065478144491301,
"grad_norm": 10.413216240197633,
"learning_rate": 2.935144964137792e-06,
"loss": 0.3435,
"step": 13990
},
{
"epoch": 0.707052852200702,
"grad_norm": 7.078239510812093,
"learning_rate": 2.930093948883726e-06,
"loss": 0.3661,
"step": 14000
},
{
"epoch": 0.707557889952274,
"grad_norm": 10.541563423283328,
"learning_rate": 2.92504293362966e-06,
"loss": 0.3569,
"step": 14010
},
{
"epoch": 0.7080629277038458,
"grad_norm": 4.886876661534467,
"learning_rate": 2.9199919183755936e-06,
"loss": 0.368,
"step": 14020
},
{
"epoch": 0.7085679654554178,
"grad_norm": 15.262848195897316,
"learning_rate": 2.9149409031215277e-06,
"loss": 0.3682,
"step": 14030
},
{
"epoch": 0.7090730032069897,
"grad_norm": 12.794932541476035,
"learning_rate": 2.9098898878674614e-06,
"loss": 0.3592,
"step": 14040
},
{
"epoch": 0.7095780409585617,
"grad_norm": 10.61866026419293,
"learning_rate": 2.904838872613395e-06,
"loss": 0.3604,
"step": 14050
},
{
"epoch": 0.7100830787101335,
"grad_norm": 25.4476962457588,
"learning_rate": 2.8997878573593296e-06,
"loss": 0.3824,
"step": 14060
},
{
"epoch": 0.7105881164617055,
"grad_norm": 7.669338040232453,
"learning_rate": 2.8947368421052634e-06,
"loss": 0.3595,
"step": 14070
},
{
"epoch": 0.7110931542132775,
"grad_norm": 11.74527716840071,
"learning_rate": 2.8896858268511975e-06,
"loss": 0.3543,
"step": 14080
},
{
"epoch": 0.7115981919648494,
"grad_norm": 6.646635907284065,
"learning_rate": 2.884634811597131e-06,
"loss": 0.372,
"step": 14090
},
{
"epoch": 0.7121032297164213,
"grad_norm": 7.097527430156463,
"learning_rate": 2.879583796343065e-06,
"loss": 0.3551,
"step": 14100
},
{
"epoch": 0.7126082674679932,
"grad_norm": 13.591824996252463,
"learning_rate": 2.874532781088999e-06,
"loss": 0.3681,
"step": 14110
},
{
"epoch": 0.7131133052195652,
"grad_norm": 6.164884039122544,
"learning_rate": 2.869481765834933e-06,
"loss": 0.3587,
"step": 14120
},
{
"epoch": 0.7136183429711371,
"grad_norm": 4.715084112453213,
"learning_rate": 2.8644307505808672e-06,
"loss": 0.3573,
"step": 14130
},
{
"epoch": 0.714123380722709,
"grad_norm": 6.717756492187504,
"learning_rate": 2.859379735326801e-06,
"loss": 0.3686,
"step": 14140
},
{
"epoch": 0.7146284184742809,
"grad_norm": 3.2667847864627855,
"learning_rate": 2.854328720072735e-06,
"loss": 0.3623,
"step": 14150
},
{
"epoch": 0.7151334562258529,
"grad_norm": 4.553377349461034,
"learning_rate": 2.8492777048186688e-06,
"loss": 0.3696,
"step": 14160
},
{
"epoch": 0.7156384939774248,
"grad_norm": 6.765157372727796,
"learning_rate": 2.8442266895646025e-06,
"loss": 0.3828,
"step": 14170
},
{
"epoch": 0.7161435317289967,
"grad_norm": 11.922453637732414,
"learning_rate": 2.8391756743105366e-06,
"loss": 0.362,
"step": 14180
},
{
"epoch": 0.7166485694805687,
"grad_norm": 14.304651599986355,
"learning_rate": 2.8341246590564707e-06,
"loss": 0.3609,
"step": 14190
},
{
"epoch": 0.7171536072321406,
"grad_norm": 6.623357145482479,
"learning_rate": 2.829073643802405e-06,
"loss": 0.3697,
"step": 14200
},
{
"epoch": 0.7176586449837126,
"grad_norm": 8.462478930205787,
"learning_rate": 2.8240226285483385e-06,
"loss": 0.3909,
"step": 14210
},
{
"epoch": 0.7181636827352844,
"grad_norm": 4.464396808912767,
"learning_rate": 2.8189716132942722e-06,
"loss": 0.3653,
"step": 14220
},
{
"epoch": 0.7186687204868564,
"grad_norm": 5.427275414848616,
"learning_rate": 2.8139205980402064e-06,
"loss": 0.3615,
"step": 14230
},
{
"epoch": 0.7191737582384283,
"grad_norm": 6.5602036666267285,
"learning_rate": 2.80886958278614e-06,
"loss": 0.3793,
"step": 14240
},
{
"epoch": 0.7196787959900003,
"grad_norm": 8.382640709296943,
"learning_rate": 2.8038185675320738e-06,
"loss": 0.3839,
"step": 14250
},
{
"epoch": 0.7201838337415721,
"grad_norm": 8.38849977200125,
"learning_rate": 2.7987675522780083e-06,
"loss": 0.3632,
"step": 14260
},
{
"epoch": 0.7206888714931441,
"grad_norm": 4.838800155468274,
"learning_rate": 2.793716537023942e-06,
"loss": 0.3654,
"step": 14270
},
{
"epoch": 0.721193909244716,
"grad_norm": 3.202140029458944,
"learning_rate": 2.788665521769876e-06,
"loss": 0.3457,
"step": 14280
},
{
"epoch": 0.721698946996288,
"grad_norm": 3.096801851946618,
"learning_rate": 2.78361450651581e-06,
"loss": 0.3624,
"step": 14290
},
{
"epoch": 0.7222039847478599,
"grad_norm": 5.2594336987978885,
"learning_rate": 2.7785634912617435e-06,
"loss": 0.3595,
"step": 14300
},
{
"epoch": 0.7227090224994318,
"grad_norm": 5.738417846740695,
"learning_rate": 2.7735124760076777e-06,
"loss": 0.3471,
"step": 14310
},
{
"epoch": 0.7232140602510038,
"grad_norm": 3.3615923332479283,
"learning_rate": 2.7684614607536114e-06,
"loss": 0.3781,
"step": 14320
},
{
"epoch": 0.7237190980025757,
"grad_norm": 13.026933882432127,
"learning_rate": 2.763410445499546e-06,
"loss": 0.3765,
"step": 14330
},
{
"epoch": 0.7242241357541476,
"grad_norm": 2.4861597750527884,
"learning_rate": 2.7583594302454796e-06,
"loss": 0.3528,
"step": 14340
},
{
"epoch": 0.7247291735057195,
"grad_norm": 6.6503607629591,
"learning_rate": 2.7533084149914137e-06,
"loss": 0.3618,
"step": 14350
},
{
"epoch": 0.7252342112572915,
"grad_norm": 2.8124337865494464,
"learning_rate": 2.7482573997373474e-06,
"loss": 0.3791,
"step": 14360
},
{
"epoch": 0.7257392490088634,
"grad_norm": 5.334310989331839,
"learning_rate": 2.743206384483281e-06,
"loss": 0.3702,
"step": 14370
},
{
"epoch": 0.7262442867604354,
"grad_norm": 4.197944825898265,
"learning_rate": 2.7381553692292153e-06,
"loss": 0.3644,
"step": 14380
},
{
"epoch": 0.7267493245120072,
"grad_norm": 3.4074066682001076,
"learning_rate": 2.7331043539751494e-06,
"loss": 0.3694,
"step": 14390
},
{
"epoch": 0.7272543622635792,
"grad_norm": 2.5186168503160817,
"learning_rate": 2.7280533387210835e-06,
"loss": 0.3675,
"step": 14400
},
{
"epoch": 0.7277594000151512,
"grad_norm": 2.7527199189922134,
"learning_rate": 2.723002323467017e-06,
"loss": 0.3474,
"step": 14410
},
{
"epoch": 0.7282644377667231,
"grad_norm": 10.29943264391808,
"learning_rate": 2.717951308212951e-06,
"loss": 0.3627,
"step": 14420
},
{
"epoch": 0.728769475518295,
"grad_norm": 3.012910231072848,
"learning_rate": 2.712900292958885e-06,
"loss": 0.359,
"step": 14430
},
{
"epoch": 0.7292745132698669,
"grad_norm": 4.291318577581795,
"learning_rate": 2.7078492777048187e-06,
"loss": 0.3657,
"step": 14440
},
{
"epoch": 0.7297795510214389,
"grad_norm": 3.161513732407597,
"learning_rate": 2.7027982624507524e-06,
"loss": 0.3774,
"step": 14450
},
{
"epoch": 0.7302845887730108,
"grad_norm": 6.231424663495901,
"learning_rate": 2.697747247196687e-06,
"loss": 0.3496,
"step": 14460
},
{
"epoch": 0.7307896265245827,
"grad_norm": 9.93800800520396,
"learning_rate": 2.6926962319426207e-06,
"loss": 0.3574,
"step": 14470
},
{
"epoch": 0.7312946642761546,
"grad_norm": 5.792304139685358,
"learning_rate": 2.687645216688555e-06,
"loss": 0.3641,
"step": 14480
},
{
"epoch": 0.7317997020277266,
"grad_norm": 6.779155670529348,
"learning_rate": 2.6825942014344885e-06,
"loss": 0.3545,
"step": 14490
},
{
"epoch": 0.7323047397792986,
"grad_norm": 3.8135773883209527,
"learning_rate": 2.6775431861804226e-06,
"loss": 0.3722,
"step": 14500
},
{
"epoch": 0.7328097775308704,
"grad_norm": 3.3544383619935805,
"learning_rate": 2.6724921709263563e-06,
"loss": 0.3603,
"step": 14510
},
{
"epoch": 0.7333148152824424,
"grad_norm": 4.268409877609442,
"learning_rate": 2.66744115567229e-06,
"loss": 0.3682,
"step": 14520
},
{
"epoch": 0.7338198530340143,
"grad_norm": 6.571521428968219,
"learning_rate": 2.6623901404182246e-06,
"loss": 0.376,
"step": 14530
},
{
"epoch": 0.7343248907855863,
"grad_norm": 2.9026381605364935,
"learning_rate": 2.6573391251641583e-06,
"loss": 0.3705,
"step": 14540
},
{
"epoch": 0.7348299285371581,
"grad_norm": 3.6365813772686635,
"learning_rate": 2.6522881099100924e-06,
"loss": 0.3556,
"step": 14550
},
{
"epoch": 0.7353349662887301,
"grad_norm": 4.744776659753617,
"learning_rate": 2.647237094656026e-06,
"loss": 0.381,
"step": 14560
},
{
"epoch": 0.735840004040302,
"grad_norm": 3.8265663489625172,
"learning_rate": 2.64218607940196e-06,
"loss": 0.3675,
"step": 14570
},
{
"epoch": 0.736345041791874,
"grad_norm": 4.046535114955657,
"learning_rate": 2.637135064147894e-06,
"loss": 0.354,
"step": 14580
},
{
"epoch": 0.7368500795434458,
"grad_norm": 2.011858995754094,
"learning_rate": 2.6320840488938276e-06,
"loss": 0.3674,
"step": 14590
},
{
"epoch": 0.7373551172950178,
"grad_norm": 8.920805773112406,
"learning_rate": 2.627033033639762e-06,
"loss": 0.3479,
"step": 14600
},
{
"epoch": 0.7378601550465897,
"grad_norm": 3.136158587893057,
"learning_rate": 2.621982018385696e-06,
"loss": 0.3626,
"step": 14610
},
{
"epoch": 0.7383651927981617,
"grad_norm": 3.2282462284034175,
"learning_rate": 2.6169310031316296e-06,
"loss": 0.3612,
"step": 14620
},
{
"epoch": 0.7388702305497336,
"grad_norm": 2.7063221534655018,
"learning_rate": 2.6118799878775637e-06,
"loss": 0.3658,
"step": 14630
},
{
"epoch": 0.7393752683013055,
"grad_norm": 2.0705371802094663,
"learning_rate": 2.6068289726234974e-06,
"loss": 0.3619,
"step": 14640
},
{
"epoch": 0.7398803060528775,
"grad_norm": 2.452888217750629,
"learning_rate": 2.601777957369431e-06,
"loss": 0.3745,
"step": 14650
},
{
"epoch": 0.7403853438044494,
"grad_norm": 3.5210431082573987,
"learning_rate": 2.596726942115365e-06,
"loss": 0.3621,
"step": 14660
},
{
"epoch": 0.7408903815560213,
"grad_norm": 2.705820608986338,
"learning_rate": 2.5916759268612993e-06,
"loss": 0.3722,
"step": 14670
},
{
"epoch": 0.7413954193075932,
"grad_norm": 3.8301828984326005,
"learning_rate": 2.5866249116072335e-06,
"loss": 0.3475,
"step": 14680
},
{
"epoch": 0.7419004570591652,
"grad_norm": 2.506550997138522,
"learning_rate": 2.581573896353167e-06,
"loss": 0.379,
"step": 14690
},
{
"epoch": 0.7424054948107371,
"grad_norm": 4.732673442087478,
"learning_rate": 2.5765228810991013e-06,
"loss": 0.3646,
"step": 14700
},
{
"epoch": 0.742910532562309,
"grad_norm": 3.5697728957238297,
"learning_rate": 2.571471865845035e-06,
"loss": 0.3749,
"step": 14710
},
{
"epoch": 0.743415570313881,
"grad_norm": 3.408786643280019,
"learning_rate": 2.5664208505909687e-06,
"loss": 0.3729,
"step": 14720
},
{
"epoch": 0.7439206080654529,
"grad_norm": 3.2680972265488584,
"learning_rate": 2.5613698353369032e-06,
"loss": 0.3709,
"step": 14730
},
{
"epoch": 0.7444256458170249,
"grad_norm": 2.830354639883803,
"learning_rate": 2.556318820082837e-06,
"loss": 0.3575,
"step": 14740
},
{
"epoch": 0.7449306835685967,
"grad_norm": 2.1934046044941216,
"learning_rate": 2.551267804828771e-06,
"loss": 0.3615,
"step": 14750
},
{
"epoch": 0.7454357213201687,
"grad_norm": 3.7209556097533363,
"learning_rate": 2.5462167895747048e-06,
"loss": 0.3759,
"step": 14760
},
{
"epoch": 0.7459407590717406,
"grad_norm": 2.171529108489969,
"learning_rate": 2.5411657743206385e-06,
"loss": 0.3736,
"step": 14770
},
{
"epoch": 0.7464457968233126,
"grad_norm": 3.2881080411249903,
"learning_rate": 2.5361147590665726e-06,
"loss": 0.3856,
"step": 14780
},
{
"epoch": 0.7469508345748844,
"grad_norm": 2.1841022217022004,
"learning_rate": 2.5310637438125063e-06,
"loss": 0.3594,
"step": 14790
},
{
"epoch": 0.7474558723264564,
"grad_norm": 11.74984061570617,
"learning_rate": 2.526012728558441e-06,
"loss": 0.3691,
"step": 14800
},
{
"epoch": 0.7479609100780283,
"grad_norm": 2.64035990962851,
"learning_rate": 2.5209617133043745e-06,
"loss": 0.3426,
"step": 14810
},
{
"epoch": 0.7484659478296003,
"grad_norm": 1.7507289069126455,
"learning_rate": 2.5159106980503082e-06,
"loss": 0.3652,
"step": 14820
},
{
"epoch": 0.7489709855811721,
"grad_norm": 2.720025461643553,
"learning_rate": 2.5108596827962423e-06,
"loss": 0.3757,
"step": 14830
},
{
"epoch": 0.7494760233327441,
"grad_norm": 2.595489021549687,
"learning_rate": 2.505808667542176e-06,
"loss": 0.3659,
"step": 14840
},
{
"epoch": 0.7499810610843161,
"grad_norm": 3.4867867255116884,
"learning_rate": 2.50075765228811e-06,
"loss": 0.3733,
"step": 14850
},
{
"epoch": 0.750486098835888,
"grad_norm": 2.716151183546756,
"learning_rate": 2.4957066370340443e-06,
"loss": 0.3609,
"step": 14860
},
{
"epoch": 0.7509911365874599,
"grad_norm": 2.0400103144299737,
"learning_rate": 2.490655621779978e-06,
"loss": 0.3639,
"step": 14870
},
{
"epoch": 0.7514961743390318,
"grad_norm": 9.410235472131538,
"learning_rate": 2.4856046065259117e-06,
"loss": 0.3483,
"step": 14880
},
{
"epoch": 0.7520012120906038,
"grad_norm": 2.4049800584504584,
"learning_rate": 2.480553591271846e-06,
"loss": 0.3569,
"step": 14890
},
{
"epoch": 0.7525062498421757,
"grad_norm": 3.7384168124978565,
"learning_rate": 2.47550257601778e-06,
"loss": 0.3542,
"step": 14900
},
{
"epoch": 0.7530112875937476,
"grad_norm": 2.589659210465427,
"learning_rate": 2.4704515607637136e-06,
"loss": 0.368,
"step": 14910
},
{
"epoch": 0.7535163253453195,
"grad_norm": 2.2792717912641383,
"learning_rate": 2.4654005455096478e-06,
"loss": 0.3575,
"step": 14920
},
{
"epoch": 0.7540213630968915,
"grad_norm": 5.546504545797571,
"learning_rate": 2.4603495302555815e-06,
"loss": 0.361,
"step": 14930
},
{
"epoch": 0.7545264008484635,
"grad_norm": 4.5236138626508415,
"learning_rate": 2.4552985150015156e-06,
"loss": 0.3652,
"step": 14940
},
{
"epoch": 0.7550314386000353,
"grad_norm": 5.458941587498548,
"learning_rate": 2.4502474997474497e-06,
"loss": 0.3565,
"step": 14950
},
{
"epoch": 0.7555364763516073,
"grad_norm": 2.182667398028913,
"learning_rate": 2.4451964844933834e-06,
"loss": 0.3574,
"step": 14960
},
{
"epoch": 0.7560415141031792,
"grad_norm": 2.3501955437712563,
"learning_rate": 2.440145469239317e-06,
"loss": 0.3409,
"step": 14970
},
{
"epoch": 0.7565465518547512,
"grad_norm": 2.8564528610222473,
"learning_rate": 2.4350944539852512e-06,
"loss": 0.3629,
"step": 14980
},
{
"epoch": 0.7570515896063231,
"grad_norm": 3.888970347389462,
"learning_rate": 2.4300434387311854e-06,
"loss": 0.361,
"step": 14990
},
{
"epoch": 0.757556627357895,
"grad_norm": 7.312481133609781,
"learning_rate": 2.424992423477119e-06,
"loss": 0.3614,
"step": 15000
},
{
"epoch": 0.7580616651094669,
"grad_norm": 2.1317079758826813,
"learning_rate": 2.4199414082230528e-06,
"loss": 0.3738,
"step": 15010
},
{
"epoch": 0.7585667028610389,
"grad_norm": 3.6516684684220366,
"learning_rate": 2.414890392968987e-06,
"loss": 0.3648,
"step": 15020
},
{
"epoch": 0.7590717406126108,
"grad_norm": 2.6072438507069555,
"learning_rate": 2.409839377714921e-06,
"loss": 0.3621,
"step": 15030
},
{
"epoch": 0.7595767783641827,
"grad_norm": 6.351203220775499,
"learning_rate": 2.4047883624608547e-06,
"loss": 0.3558,
"step": 15040
},
{
"epoch": 0.7600818161157546,
"grad_norm": 4.130236652741164,
"learning_rate": 2.399737347206789e-06,
"loss": 0.369,
"step": 15050
},
{
"epoch": 0.7605868538673266,
"grad_norm": 4.6672030495120564,
"learning_rate": 2.394686331952723e-06,
"loss": 0.3622,
"step": 15060
},
{
"epoch": 0.7610918916188986,
"grad_norm": 12.886235429525053,
"learning_rate": 2.3896353166986567e-06,
"loss": 0.3622,
"step": 15070
},
{
"epoch": 0.7615969293704704,
"grad_norm": 15.156964415943117,
"learning_rate": 2.3845843014445904e-06,
"loss": 0.3467,
"step": 15080
},
{
"epoch": 0.7621019671220424,
"grad_norm": 2.87845766689377,
"learning_rate": 2.3795332861905245e-06,
"loss": 0.3505,
"step": 15090
},
{
"epoch": 0.7626070048736143,
"grad_norm": 15.116534480941452,
"learning_rate": 2.3744822709364586e-06,
"loss": 0.3483,
"step": 15100
},
{
"epoch": 0.7631120426251863,
"grad_norm": 7.257991090958527,
"learning_rate": 2.3694312556823923e-06,
"loss": 0.3822,
"step": 15110
},
{
"epoch": 0.7636170803767581,
"grad_norm": 3.4000262016509697,
"learning_rate": 2.3643802404283264e-06,
"loss": 0.3609,
"step": 15120
},
{
"epoch": 0.7641221181283301,
"grad_norm": 5.8302722893046734,
"learning_rate": 2.35932922517426e-06,
"loss": 0.3532,
"step": 15130
},
{
"epoch": 0.764627155879902,
"grad_norm": 2.980190177193747,
"learning_rate": 2.3542782099201943e-06,
"loss": 0.3532,
"step": 15140
},
{
"epoch": 0.765132193631474,
"grad_norm": 2.886158227100219,
"learning_rate": 2.349227194666128e-06,
"loss": 0.3747,
"step": 15150
},
{
"epoch": 0.7656372313830458,
"grad_norm": 3.5525222031207635,
"learning_rate": 2.344176179412062e-06,
"loss": 0.3627,
"step": 15160
},
{
"epoch": 0.7661422691346178,
"grad_norm": 5.160435812895103,
"learning_rate": 2.3391251641579958e-06,
"loss": 0.3638,
"step": 15170
},
{
"epoch": 0.7666473068861898,
"grad_norm": 11.69613721594715,
"learning_rate": 2.33407414890393e-06,
"loss": 0.354,
"step": 15180
},
{
"epoch": 0.7671523446377617,
"grad_norm": 2.5755050921925604,
"learning_rate": 2.329023133649864e-06,
"loss": 0.3571,
"step": 15190
},
{
"epoch": 0.7676573823893336,
"grad_norm": 6.875578222537554,
"learning_rate": 2.3239721183957977e-06,
"loss": 0.3494,
"step": 15200
},
{
"epoch": 0.7681624201409055,
"grad_norm": 3.951283263659072,
"learning_rate": 2.3189211031417314e-06,
"loss": 0.3642,
"step": 15210
},
{
"epoch": 0.7686674578924775,
"grad_norm": 6.490351712773479,
"learning_rate": 2.3138700878876655e-06,
"loss": 0.3625,
"step": 15220
},
{
"epoch": 0.7691724956440494,
"grad_norm": 6.723804099342641,
"learning_rate": 2.3088190726335997e-06,
"loss": 0.3542,
"step": 15230
},
{
"epoch": 0.7696775333956213,
"grad_norm": 3.266407086981815,
"learning_rate": 2.3037680573795334e-06,
"loss": 0.3593,
"step": 15240
},
{
"epoch": 0.7701825711471932,
"grad_norm": 3.9284527391422093,
"learning_rate": 2.2987170421254675e-06,
"loss": 0.3614,
"step": 15250
},
{
"epoch": 0.7706876088987652,
"grad_norm": 15.223624137338168,
"learning_rate": 2.2936660268714016e-06,
"loss": 0.3662,
"step": 15260
},
{
"epoch": 0.7711926466503372,
"grad_norm": 4.028959949491209,
"learning_rate": 2.2886150116173353e-06,
"loss": 0.3499,
"step": 15270
},
{
"epoch": 0.771697684401909,
"grad_norm": 6.41110230244689,
"learning_rate": 2.283563996363269e-06,
"loss": 0.3753,
"step": 15280
},
{
"epoch": 0.772202722153481,
"grad_norm": 4.076986580941685,
"learning_rate": 2.278512981109203e-06,
"loss": 0.334,
"step": 15290
},
{
"epoch": 0.7727077599050529,
"grad_norm": 3.74862169818398,
"learning_rate": 2.2734619658551373e-06,
"loss": 0.3491,
"step": 15300
},
{
"epoch": 0.7732127976566249,
"grad_norm": 3.385686236758005,
"learning_rate": 2.268410950601071e-06,
"loss": 0.3505,
"step": 15310
},
{
"epoch": 0.7737178354081967,
"grad_norm": 4.72702362279656,
"learning_rate": 2.2633599353470047e-06,
"loss": 0.362,
"step": 15320
},
{
"epoch": 0.7742228731597687,
"grad_norm": 15.01423003858265,
"learning_rate": 2.2583089200929388e-06,
"loss": 0.3557,
"step": 15330
},
{
"epoch": 0.7747279109113406,
"grad_norm": 7.0698788662071275,
"learning_rate": 2.253257904838873e-06,
"loss": 0.3414,
"step": 15340
},
{
"epoch": 0.7752329486629126,
"grad_norm": 3.524087806752161,
"learning_rate": 2.2482068895848066e-06,
"loss": 0.372,
"step": 15350
},
{
"epoch": 0.7757379864144844,
"grad_norm": 3.595099576572771,
"learning_rate": 2.2431558743307407e-06,
"loss": 0.3621,
"step": 15360
},
{
"epoch": 0.7762430241660564,
"grad_norm": 3.3403877064629786,
"learning_rate": 2.2381048590766744e-06,
"loss": 0.3565,
"step": 15370
},
{
"epoch": 0.7767480619176284,
"grad_norm": 5.0138361529572,
"learning_rate": 2.2330538438226086e-06,
"loss": 0.374,
"step": 15380
},
{
"epoch": 0.7772530996692003,
"grad_norm": 6.076755730180162,
"learning_rate": 2.2280028285685423e-06,
"loss": 0.3548,
"step": 15390
},
{
"epoch": 0.7777581374207722,
"grad_norm": 7.698809050653406,
"learning_rate": 2.2229518133144764e-06,
"loss": 0.3667,
"step": 15400
},
{
"epoch": 0.7782631751723441,
"grad_norm": 3.4467924224973046,
"learning_rate": 2.2179007980604105e-06,
"loss": 0.3628,
"step": 15410
},
{
"epoch": 0.7787682129239161,
"grad_norm": 4.893559485834325,
"learning_rate": 2.212849782806344e-06,
"loss": 0.3644,
"step": 15420
},
{
"epoch": 0.779273250675488,
"grad_norm": 4.646808808665608,
"learning_rate": 2.2077987675522783e-06,
"loss": 0.3615,
"step": 15430
},
{
"epoch": 0.7797782884270599,
"grad_norm": 5.1159354751441395,
"learning_rate": 2.202747752298212e-06,
"loss": 0.3719,
"step": 15440
},
{
"epoch": 0.7802833261786318,
"grad_norm": 3.6881998697625784,
"learning_rate": 2.197696737044146e-06,
"loss": 0.3607,
"step": 15450
},
{
"epoch": 0.7807883639302038,
"grad_norm": 2.337125255187875,
"learning_rate": 2.1926457217900803e-06,
"loss": 0.3757,
"step": 15460
},
{
"epoch": 0.7812934016817757,
"grad_norm": 4.428433133367473,
"learning_rate": 2.187594706536014e-06,
"loss": 0.3588,
"step": 15470
},
{
"epoch": 0.7817984394333476,
"grad_norm": 3.458785718908454,
"learning_rate": 2.1825436912819477e-06,
"loss": 0.3705,
"step": 15480
},
{
"epoch": 0.7823034771849195,
"grad_norm": 3.5942204693831936,
"learning_rate": 2.177492676027882e-06,
"loss": 0.3683,
"step": 15490
},
{
"epoch": 0.7828085149364915,
"grad_norm": 5.889169421367107,
"learning_rate": 2.172441660773816e-06,
"loss": 0.3573,
"step": 15500
},
{
"epoch": 0.7833135526880635,
"grad_norm": 3.261189892920889,
"learning_rate": 2.1673906455197496e-06,
"loss": 0.3512,
"step": 15510
},
{
"epoch": 0.7838185904396353,
"grad_norm": 8.352842752027808,
"learning_rate": 2.1623396302656833e-06,
"loss": 0.3629,
"step": 15520
},
{
"epoch": 0.7843236281912073,
"grad_norm": 2.96386077067302,
"learning_rate": 2.1572886150116174e-06,
"loss": 0.3636,
"step": 15530
},
{
"epoch": 0.7848286659427792,
"grad_norm": 5.283581255544922,
"learning_rate": 2.1522375997575516e-06,
"loss": 0.3676,
"step": 15540
},
{
"epoch": 0.7853337036943512,
"grad_norm": 2.6179107461634796,
"learning_rate": 2.1471865845034853e-06,
"loss": 0.354,
"step": 15550
},
{
"epoch": 0.785838741445923,
"grad_norm": 7.9036355130392755,
"learning_rate": 2.142135569249419e-06,
"loss": 0.3621,
"step": 15560
},
{
"epoch": 0.786343779197495,
"grad_norm": 4.522869551719609,
"learning_rate": 2.137084553995353e-06,
"loss": 0.3718,
"step": 15570
},
{
"epoch": 0.7868488169490669,
"grad_norm": 4.684014168319032,
"learning_rate": 2.1320335387412872e-06,
"loss": 0.3543,
"step": 15580
},
{
"epoch": 0.7873538547006389,
"grad_norm": 3.8914673062545697,
"learning_rate": 2.126982523487221e-06,
"loss": 0.3535,
"step": 15590
},
{
"epoch": 0.7878588924522109,
"grad_norm": 5.961270787701582,
"learning_rate": 2.121931508233155e-06,
"loss": 0.3716,
"step": 15600
},
{
"epoch": 0.7883639302037827,
"grad_norm": 30.183292311995285,
"learning_rate": 2.116880492979089e-06,
"loss": 0.3613,
"step": 15610
},
{
"epoch": 0.7888689679553547,
"grad_norm": 6.697417465261187,
"learning_rate": 2.111829477725023e-06,
"loss": 0.3507,
"step": 15620
},
{
"epoch": 0.7893740057069266,
"grad_norm": 3.0372920526034783,
"learning_rate": 2.106778462470957e-06,
"loss": 0.3556,
"step": 15630
},
{
"epoch": 0.7898790434584986,
"grad_norm": 2.8257416604165764,
"learning_rate": 2.1017274472168907e-06,
"loss": 0.3558,
"step": 15640
},
{
"epoch": 0.7903840812100704,
"grad_norm": 4.371382594369755,
"learning_rate": 2.096676431962825e-06,
"loss": 0.3708,
"step": 15650
},
{
"epoch": 0.7908891189616424,
"grad_norm": 2.2916826306015388,
"learning_rate": 2.0916254167087585e-06,
"loss": 0.3575,
"step": 15660
},
{
"epoch": 0.7913941567132143,
"grad_norm": 4.127089263253249,
"learning_rate": 2.0865744014546926e-06,
"loss": 0.3727,
"step": 15670
},
{
"epoch": 0.7918991944647863,
"grad_norm": 4.5838298238269095,
"learning_rate": 2.0815233862006263e-06,
"loss": 0.3631,
"step": 15680
},
{
"epoch": 0.7924042322163581,
"grad_norm": 2.13347121181111,
"learning_rate": 2.0764723709465605e-06,
"loss": 0.3577,
"step": 15690
},
{
"epoch": 0.7929092699679301,
"grad_norm": 2.865333714554876,
"learning_rate": 2.0714213556924946e-06,
"loss": 0.3646,
"step": 15700
},
{
"epoch": 0.793414307719502,
"grad_norm": 3.3712098781575968,
"learning_rate": 2.0663703404384283e-06,
"loss": 0.3665,
"step": 15710
},
{
"epoch": 0.793919345471074,
"grad_norm": 6.140656487678135,
"learning_rate": 2.061319325184362e-06,
"loss": 0.3562,
"step": 15720
},
{
"epoch": 0.7944243832226459,
"grad_norm": 6.1891047783662785,
"learning_rate": 2.056268309930296e-06,
"loss": 0.3724,
"step": 15730
},
{
"epoch": 0.7949294209742178,
"grad_norm": 9.134267646827375,
"learning_rate": 2.0512172946762302e-06,
"loss": 0.3733,
"step": 15740
},
{
"epoch": 0.7954344587257898,
"grad_norm": 3.939505362182306,
"learning_rate": 2.046166279422164e-06,
"loss": 0.356,
"step": 15750
},
{
"epoch": 0.7959394964773617,
"grad_norm": 5.797259273365989,
"learning_rate": 2.041115264168098e-06,
"loss": 0.3657,
"step": 15760
},
{
"epoch": 0.7964445342289336,
"grad_norm": 4.772342944803025,
"learning_rate": 2.036064248914032e-06,
"loss": 0.3565,
"step": 15770
},
{
"epoch": 0.7969495719805055,
"grad_norm": 5.611273163023503,
"learning_rate": 2.031013233659966e-06,
"loss": 0.342,
"step": 15780
},
{
"epoch": 0.7974546097320775,
"grad_norm": 4.35699870135289,
"learning_rate": 2.0259622184058996e-06,
"loss": 0.3564,
"step": 15790
},
{
"epoch": 0.7979596474836494,
"grad_norm": 4.981366634684693,
"learning_rate": 2.0209112031518337e-06,
"loss": 0.374,
"step": 15800
},
{
"epoch": 0.7984646852352213,
"grad_norm": 5.47051690828411,
"learning_rate": 2.015860187897768e-06,
"loss": 0.3648,
"step": 15810
},
{
"epoch": 0.7989697229867933,
"grad_norm": 2.7267511419608192,
"learning_rate": 2.0108091726437015e-06,
"loss": 0.3633,
"step": 15820
},
{
"epoch": 0.7994747607383652,
"grad_norm": 5.550689226148647,
"learning_rate": 2.0057581573896352e-06,
"loss": 0.3729,
"step": 15830
},
{
"epoch": 0.7999797984899372,
"grad_norm": 3.302912522583322,
"learning_rate": 2.0007071421355694e-06,
"loss": 0.3594,
"step": 15840
},
{
"epoch": 0.800484836241509,
"grad_norm": 5.487680267201285,
"learning_rate": 1.9956561268815035e-06,
"loss": 0.3529,
"step": 15850
},
{
"epoch": 0.800989873993081,
"grad_norm": 2.2707759816513016,
"learning_rate": 1.990605111627437e-06,
"loss": 0.3608,
"step": 15860
},
{
"epoch": 0.8014949117446529,
"grad_norm": 7.6358774792742805,
"learning_rate": 1.9855540963733713e-06,
"loss": 0.3664,
"step": 15870
},
{
"epoch": 0.8019999494962249,
"grad_norm": 4.568057200617405,
"learning_rate": 1.980503081119305e-06,
"loss": 0.3625,
"step": 15880
},
{
"epoch": 0.8025049872477967,
"grad_norm": 15.31945090537045,
"learning_rate": 1.975452065865239e-06,
"loss": 0.3489,
"step": 15890
},
{
"epoch": 0.8030100249993687,
"grad_norm": 8.04419756034429,
"learning_rate": 1.970401050611173e-06,
"loss": 0.3658,
"step": 15900
},
{
"epoch": 0.8035150627509406,
"grad_norm": 2.473274324466601,
"learning_rate": 1.965350035357107e-06,
"loss": 0.375,
"step": 15910
},
{
"epoch": 0.8040201005025126,
"grad_norm": 2.6342811148775076,
"learning_rate": 1.9602990201030406e-06,
"loss": 0.3636,
"step": 15920
},
{
"epoch": 0.8045251382540844,
"grad_norm": 2.6430322700309214,
"learning_rate": 1.9552480048489748e-06,
"loss": 0.3533,
"step": 15930
},
{
"epoch": 0.8050301760056564,
"grad_norm": 4.757917337464147,
"learning_rate": 1.950196989594909e-06,
"loss": 0.3727,
"step": 15940
},
{
"epoch": 0.8055352137572284,
"grad_norm": 4.511805579600671,
"learning_rate": 1.9451459743408426e-06,
"loss": 0.337,
"step": 15950
},
{
"epoch": 0.8060402515088003,
"grad_norm": 4.838220936104523,
"learning_rate": 1.9400949590867767e-06,
"loss": 0.3577,
"step": 15960
},
{
"epoch": 0.8065452892603722,
"grad_norm": 16.008417651939627,
"learning_rate": 1.935043943832711e-06,
"loss": 0.3475,
"step": 15970
},
{
"epoch": 0.8070503270119441,
"grad_norm": 6.303632411143941,
"learning_rate": 1.9299929285786445e-06,
"loss": 0.3659,
"step": 15980
},
{
"epoch": 0.8075553647635161,
"grad_norm": 5.190471720901927,
"learning_rate": 1.9249419133245782e-06,
"loss": 0.3546,
"step": 15990
},
{
"epoch": 0.808060402515088,
"grad_norm": 3.242336430162645,
"learning_rate": 1.9198908980705124e-06,
"loss": 0.3509,
"step": 16000
},
{
"epoch": 0.8085654402666599,
"grad_norm": 109.07082323352427,
"learning_rate": 1.9148398828164465e-06,
"loss": 0.3576,
"step": 16010
},
{
"epoch": 0.8090704780182318,
"grad_norm": 4.924092447335461,
"learning_rate": 1.90978886756238e-06,
"loss": 0.3513,
"step": 16020
},
{
"epoch": 0.8095755157698038,
"grad_norm": 4.008651128754475,
"learning_rate": 1.904737852308314e-06,
"loss": 0.3632,
"step": 16030
},
{
"epoch": 0.8100805535213758,
"grad_norm": 2.704598654274715,
"learning_rate": 1.8996868370542482e-06,
"loss": 0.3789,
"step": 16040
},
{
"epoch": 0.8105855912729476,
"grad_norm": 3.9535618478053838,
"learning_rate": 1.894635821800182e-06,
"loss": 0.366,
"step": 16050
},
{
"epoch": 0.8110906290245196,
"grad_norm": 2.647502432727123,
"learning_rate": 1.8895848065461158e-06,
"loss": 0.3591,
"step": 16060
},
{
"epoch": 0.8115956667760915,
"grad_norm": 3.1410244190511296,
"learning_rate": 1.8845337912920497e-06,
"loss": 0.371,
"step": 16070
},
{
"epoch": 0.8121007045276635,
"grad_norm": 2.681292853976926,
"learning_rate": 1.8794827760379839e-06,
"loss": 0.3625,
"step": 16080
},
{
"epoch": 0.8126057422792353,
"grad_norm": 3.2981049374910203,
"learning_rate": 1.8744317607839178e-06,
"loss": 0.3524,
"step": 16090
},
{
"epoch": 0.8131107800308073,
"grad_norm": 3.021432379433984,
"learning_rate": 1.8693807455298515e-06,
"loss": 0.3787,
"step": 16100
},
{
"epoch": 0.8136158177823792,
"grad_norm": 4.129295733061501,
"learning_rate": 1.8643297302757856e-06,
"loss": 0.3565,
"step": 16110
},
{
"epoch": 0.8141208555339512,
"grad_norm": 2.7563655099935587,
"learning_rate": 1.8592787150217195e-06,
"loss": 0.3594,
"step": 16120
},
{
"epoch": 0.814625893285523,
"grad_norm": 17.34216993322677,
"learning_rate": 1.8542276997676534e-06,
"loss": 0.364,
"step": 16130
},
{
"epoch": 0.815130931037095,
"grad_norm": 9.964020505556881,
"learning_rate": 1.8491766845135876e-06,
"loss": 0.365,
"step": 16140
},
{
"epoch": 0.815635968788667,
"grad_norm": 3.075416765785676,
"learning_rate": 1.8441256692595213e-06,
"loss": 0.3635,
"step": 16150
},
{
"epoch": 0.8161410065402389,
"grad_norm": 5.243244426066463,
"learning_rate": 1.8390746540054552e-06,
"loss": 0.3684,
"step": 16160
},
{
"epoch": 0.8166460442918108,
"grad_norm": 12.518246409079454,
"learning_rate": 1.834023638751389e-06,
"loss": 0.3622,
"step": 16170
},
{
"epoch": 0.8171510820433827,
"grad_norm": 7.312613998018914,
"learning_rate": 1.8289726234973232e-06,
"loss": 0.3657,
"step": 16180
},
{
"epoch": 0.8176561197949547,
"grad_norm": 3.588424798034187,
"learning_rate": 1.8239216082432571e-06,
"loss": 0.3704,
"step": 16190
},
{
"epoch": 0.8181611575465266,
"grad_norm": 5.675252481804342,
"learning_rate": 1.8188705929891908e-06,
"loss": 0.3769,
"step": 16200
},
{
"epoch": 0.8186661952980986,
"grad_norm": 4.439591443663197,
"learning_rate": 1.813819577735125e-06,
"loss": 0.3613,
"step": 16210
},
{
"epoch": 0.8191712330496704,
"grad_norm": 3.8799599946003904,
"learning_rate": 1.8087685624810589e-06,
"loss": 0.3507,
"step": 16220
},
{
"epoch": 0.8196762708012424,
"grad_norm": 6.706337065885557,
"learning_rate": 1.8037175472269928e-06,
"loss": 0.3524,
"step": 16230
},
{
"epoch": 0.8201813085528143,
"grad_norm": 9.215432805836578,
"learning_rate": 1.7986665319729269e-06,
"loss": 0.3503,
"step": 16240
},
{
"epoch": 0.8206863463043863,
"grad_norm": 14.076599744963326,
"learning_rate": 1.7936155167188608e-06,
"loss": 0.3628,
"step": 16250
},
{
"epoch": 0.8211913840559582,
"grad_norm": 3.743352271180992,
"learning_rate": 1.7885645014647945e-06,
"loss": 0.3661,
"step": 16260
},
{
"epoch": 0.8216964218075301,
"grad_norm": 5.7001568185616085,
"learning_rate": 1.7835134862107284e-06,
"loss": 0.3549,
"step": 16270
},
{
"epoch": 0.8222014595591021,
"grad_norm": 7.558296626118487,
"learning_rate": 1.7784624709566625e-06,
"loss": 0.3595,
"step": 16280
},
{
"epoch": 0.822706497310674,
"grad_norm": 4.177362518260731,
"learning_rate": 1.7734114557025964e-06,
"loss": 0.3649,
"step": 16290
},
{
"epoch": 0.8232115350622459,
"grad_norm": 3.640941034887769,
"learning_rate": 1.7683604404485301e-06,
"loss": 0.3545,
"step": 16300
},
{
"epoch": 0.8237165728138178,
"grad_norm": 3.349967752869647,
"learning_rate": 1.7633094251944643e-06,
"loss": 0.3489,
"step": 16310
},
{
"epoch": 0.8242216105653898,
"grad_norm": 8.902832535316834,
"learning_rate": 1.7582584099403982e-06,
"loss": 0.3613,
"step": 16320
},
{
"epoch": 0.8247266483169617,
"grad_norm": 4.87569549754752,
"learning_rate": 1.753207394686332e-06,
"loss": 0.3466,
"step": 16330
},
{
"epoch": 0.8252316860685336,
"grad_norm": 3.113899384550637,
"learning_rate": 1.748156379432266e-06,
"loss": 0.356,
"step": 16340
},
{
"epoch": 0.8257367238201055,
"grad_norm": 3.908258669462424,
"learning_rate": 1.7431053641782001e-06,
"loss": 0.3542,
"step": 16350
},
{
"epoch": 0.8262417615716775,
"grad_norm": 4.63530589205007,
"learning_rate": 1.7380543489241338e-06,
"loss": 0.3627,
"step": 16360
},
{
"epoch": 0.8267467993232495,
"grad_norm": 8.784657750253414,
"learning_rate": 1.7330033336700677e-06,
"loss": 0.3578,
"step": 16370
},
{
"epoch": 0.8272518370748213,
"grad_norm": 6.545444728348479,
"learning_rate": 1.7279523184160019e-06,
"loss": 0.3539,
"step": 16380
},
{
"epoch": 0.8277568748263933,
"grad_norm": 12.650331415485839,
"learning_rate": 1.7229013031619358e-06,
"loss": 0.3477,
"step": 16390
},
{
"epoch": 0.8282619125779652,
"grad_norm": 6.830225414304928,
"learning_rate": 1.7178502879078695e-06,
"loss": 0.3503,
"step": 16400
},
{
"epoch": 0.8287669503295372,
"grad_norm": 4.071173355925771,
"learning_rate": 1.7127992726538036e-06,
"loss": 0.3556,
"step": 16410
},
{
"epoch": 0.829271988081109,
"grad_norm": 4.66654000796166,
"learning_rate": 1.7077482573997375e-06,
"loss": 0.3639,
"step": 16420
},
{
"epoch": 0.829777025832681,
"grad_norm": 3.6997372517957183,
"learning_rate": 1.7026972421456714e-06,
"loss": 0.3624,
"step": 16430
},
{
"epoch": 0.8302820635842529,
"grad_norm": 2.8900578489503537,
"learning_rate": 1.6976462268916053e-06,
"loss": 0.352,
"step": 16440
},
{
"epoch": 0.8307871013358249,
"grad_norm": 7.911446253848937,
"learning_rate": 1.6925952116375395e-06,
"loss": 0.3439,
"step": 16450
},
{
"epoch": 0.8312921390873967,
"grad_norm": 4.387449984988524,
"learning_rate": 1.6875441963834732e-06,
"loss": 0.3556,
"step": 16460
},
{
"epoch": 0.8317971768389687,
"grad_norm": 3.423134745091436,
"learning_rate": 1.682493181129407e-06,
"loss": 0.3534,
"step": 16470
},
{
"epoch": 0.8323022145905407,
"grad_norm": 6.067996848489734,
"learning_rate": 1.6774421658753412e-06,
"loss": 0.3579,
"step": 16480
},
{
"epoch": 0.8328072523421126,
"grad_norm": 4.7407053967222,
"learning_rate": 1.6723911506212751e-06,
"loss": 0.3479,
"step": 16490
},
{
"epoch": 0.8333122900936845,
"grad_norm": 11.91783824543377,
"learning_rate": 1.6673401353672088e-06,
"loss": 0.3457,
"step": 16500
},
{
"epoch": 0.8338173278452564,
"grad_norm": 4.2675850830619115,
"learning_rate": 1.6622891201131427e-06,
"loss": 0.3652,
"step": 16510
},
{
"epoch": 0.8343223655968284,
"grad_norm": 6.818897096480064,
"learning_rate": 1.6572381048590768e-06,
"loss": 0.3758,
"step": 16520
},
{
"epoch": 0.8348274033484003,
"grad_norm": 4.98946971011121,
"learning_rate": 1.6521870896050108e-06,
"loss": 0.3701,
"step": 16530
},
{
"epoch": 0.8353324410999722,
"grad_norm": 9.186502501610118,
"learning_rate": 1.6471360743509447e-06,
"loss": 0.3621,
"step": 16540
},
{
"epoch": 0.8358374788515441,
"grad_norm": 4.424346513819785,
"learning_rate": 1.6420850590968788e-06,
"loss": 0.355,
"step": 16550
},
{
"epoch": 0.8363425166031161,
"grad_norm": 6.493343915545971,
"learning_rate": 1.6370340438428125e-06,
"loss": 0.3606,
"step": 16560
},
{
"epoch": 0.836847554354688,
"grad_norm": 9.200578558866452,
"learning_rate": 1.6319830285887464e-06,
"loss": 0.3586,
"step": 16570
},
{
"epoch": 0.8373525921062599,
"grad_norm": 3.604785576320196,
"learning_rate": 1.6269320133346805e-06,
"loss": 0.3659,
"step": 16580
},
{
"epoch": 0.8378576298578319,
"grad_norm": 6.963237973002329,
"learning_rate": 1.6218809980806144e-06,
"loss": 0.3585,
"step": 16590
},
{
"epoch": 0.8383626676094038,
"grad_norm": 2.3248924867977525,
"learning_rate": 1.6168299828265481e-06,
"loss": 0.361,
"step": 16600
},
{
"epoch": 0.8388677053609758,
"grad_norm": 2.6056154769998776,
"learning_rate": 1.611778967572482e-06,
"loss": 0.3551,
"step": 16610
},
{
"epoch": 0.8393727431125476,
"grad_norm": 5.347914332983081,
"learning_rate": 1.6067279523184162e-06,
"loss": 0.3626,
"step": 16620
},
{
"epoch": 0.8398777808641196,
"grad_norm": 4.2282210080509275,
"learning_rate": 1.60167693706435e-06,
"loss": 0.3408,
"step": 16630
},
{
"epoch": 0.8403828186156915,
"grad_norm": 64.68616959956208,
"learning_rate": 1.596625921810284e-06,
"loss": 0.361,
"step": 16640
},
{
"epoch": 0.8408878563672635,
"grad_norm": 4.552604163372721,
"learning_rate": 1.5915749065562181e-06,
"loss": 0.3583,
"step": 16650
},
{
"epoch": 0.8413928941188353,
"grad_norm": 5.37530591780756,
"learning_rate": 1.5865238913021518e-06,
"loss": 0.3677,
"step": 16660
},
{
"epoch": 0.8418979318704073,
"grad_norm": 5.851389647033064,
"learning_rate": 1.5814728760480857e-06,
"loss": 0.3496,
"step": 16670
},
{
"epoch": 0.8424029696219792,
"grad_norm": 12.669284701519452,
"learning_rate": 1.5764218607940196e-06,
"loss": 0.3537,
"step": 16680
},
{
"epoch": 0.8429080073735512,
"grad_norm": 4.126283598831685,
"learning_rate": 1.5713708455399538e-06,
"loss": 0.3508,
"step": 16690
},
{
"epoch": 0.843413045125123,
"grad_norm": 5.925620788856901,
"learning_rate": 1.5663198302858877e-06,
"loss": 0.3575,
"step": 16700
},
{
"epoch": 0.843918082876695,
"grad_norm": 4.664307589807468,
"learning_rate": 1.5612688150318214e-06,
"loss": 0.3649,
"step": 16710
},
{
"epoch": 0.844423120628267,
"grad_norm": 11.693975175758714,
"learning_rate": 1.5562177997777555e-06,
"loss": 0.3524,
"step": 16720
},
{
"epoch": 0.8449281583798389,
"grad_norm": 4.379171141956925,
"learning_rate": 1.5511667845236894e-06,
"loss": 0.3636,
"step": 16730
},
{
"epoch": 0.8454331961314108,
"grad_norm": 4.966223200480838,
"learning_rate": 1.5461157692696233e-06,
"loss": 0.3754,
"step": 16740
},
{
"epoch": 0.8459382338829827,
"grad_norm": 6.214250837875937,
"learning_rate": 1.5410647540155575e-06,
"loss": 0.3591,
"step": 16750
},
{
"epoch": 0.8464432716345547,
"grad_norm": 3.0308071228407876,
"learning_rate": 1.5360137387614912e-06,
"loss": 0.3542,
"step": 16760
},
{
"epoch": 0.8469483093861266,
"grad_norm": 2.6208353352508613,
"learning_rate": 1.530962723507425e-06,
"loss": 0.3685,
"step": 16770
},
{
"epoch": 0.8474533471376986,
"grad_norm": 6.849704130112083,
"learning_rate": 1.525911708253359e-06,
"loss": 0.3656,
"step": 16780
},
{
"epoch": 0.8479583848892704,
"grad_norm": 4.032506964164272,
"learning_rate": 1.520860692999293e-06,
"loss": 0.3432,
"step": 16790
},
{
"epoch": 0.8484634226408424,
"grad_norm": 3.5062245447178464,
"learning_rate": 1.515809677745227e-06,
"loss": 0.3635,
"step": 16800
},
{
"epoch": 0.8489684603924144,
"grad_norm": 3.098328874271106,
"learning_rate": 1.5107586624911607e-06,
"loss": 0.3625,
"step": 16810
},
{
"epoch": 0.8494734981439863,
"grad_norm": 4.466142042227375,
"learning_rate": 1.5057076472370948e-06,
"loss": 0.3613,
"step": 16820
},
{
"epoch": 0.8499785358955582,
"grad_norm": 5.448485215579026,
"learning_rate": 1.5006566319830287e-06,
"loss": 0.3677,
"step": 16830
},
{
"epoch": 0.8504835736471301,
"grad_norm": 3.7693651118255795,
"learning_rate": 1.4956056167289627e-06,
"loss": 0.3597,
"step": 16840
},
{
"epoch": 0.8509886113987021,
"grad_norm": 3.0048683020938265,
"learning_rate": 1.4905546014748964e-06,
"loss": 0.3458,
"step": 16850
},
{
"epoch": 0.851493649150274,
"grad_norm": 3.6083434998949278,
"learning_rate": 1.4855035862208305e-06,
"loss": 0.3524,
"step": 16860
},
{
"epoch": 0.8519986869018459,
"grad_norm": 3.8301361676049375,
"learning_rate": 1.4804525709667644e-06,
"loss": 0.354,
"step": 16870
},
{
"epoch": 0.8525037246534178,
"grad_norm": 9.70263976797423,
"learning_rate": 1.4754015557126983e-06,
"loss": 0.335,
"step": 16880
},
{
"epoch": 0.8530087624049898,
"grad_norm": 2.9237142901845368,
"learning_rate": 1.4703505404586324e-06,
"loss": 0.3569,
"step": 16890
},
{
"epoch": 0.8535138001565618,
"grad_norm": 9.647980801949249,
"learning_rate": 1.4652995252045663e-06,
"loss": 0.3514,
"step": 16900
},
{
"epoch": 0.8540188379081336,
"grad_norm": 10.070888822677043,
"learning_rate": 1.4602485099505e-06,
"loss": 0.3576,
"step": 16910
},
{
"epoch": 0.8545238756597056,
"grad_norm": 3.273605158275998,
"learning_rate": 1.4551974946964342e-06,
"loss": 0.3707,
"step": 16920
},
{
"epoch": 0.8550289134112775,
"grad_norm": 4.9136034513233575,
"learning_rate": 1.450146479442368e-06,
"loss": 0.3606,
"step": 16930
},
{
"epoch": 0.8555339511628495,
"grad_norm": 2.4213339644373426,
"learning_rate": 1.445095464188302e-06,
"loss": 0.3511,
"step": 16940
},
{
"epoch": 0.8560389889144213,
"grad_norm": 5.947198902514986,
"learning_rate": 1.4400444489342357e-06,
"loss": 0.3441,
"step": 16950
},
{
"epoch": 0.8565440266659933,
"grad_norm": 4.128142354654801,
"learning_rate": 1.4349934336801698e-06,
"loss": 0.3421,
"step": 16960
},
{
"epoch": 0.8570490644175652,
"grad_norm": 3.699537501488419,
"learning_rate": 1.4299424184261037e-06,
"loss": 0.3517,
"step": 16970
},
{
"epoch": 0.8575541021691372,
"grad_norm": 2.4855687780449074,
"learning_rate": 1.4248914031720376e-06,
"loss": 0.3571,
"step": 16980
},
{
"epoch": 0.858059139920709,
"grad_norm": 4.059592927833855,
"learning_rate": 1.4198403879179718e-06,
"loss": 0.3515,
"step": 16990
},
{
"epoch": 0.858564177672281,
"grad_norm": 10.362480051840812,
"learning_rate": 1.4147893726639057e-06,
"loss": 0.3599,
"step": 17000
},
{
"epoch": 0.859069215423853,
"grad_norm": 4.66559692240262,
"learning_rate": 1.4097383574098394e-06,
"loss": 0.3659,
"step": 17010
},
{
"epoch": 0.8595742531754249,
"grad_norm": 3.0262644076926244,
"learning_rate": 1.4046873421557733e-06,
"loss": 0.3627,
"step": 17020
},
{
"epoch": 0.8600792909269968,
"grad_norm": 6.4805418875740335,
"learning_rate": 1.3996363269017074e-06,
"loss": 0.3552,
"step": 17030
},
{
"epoch": 0.8605843286785687,
"grad_norm": 6.48974005858549,
"learning_rate": 1.3945853116476413e-06,
"loss": 0.3742,
"step": 17040
},
{
"epoch": 0.8610893664301407,
"grad_norm": 12.933277403296861,
"learning_rate": 1.389534296393575e-06,
"loss": 0.3688,
"step": 17050
},
{
"epoch": 0.8615944041817126,
"grad_norm": 5.0135946224215076,
"learning_rate": 1.3844832811395094e-06,
"loss": 0.3495,
"step": 17060
},
{
"epoch": 0.8620994419332845,
"grad_norm": 4.664374173808275,
"learning_rate": 1.379432265885443e-06,
"loss": 0.3479,
"step": 17070
},
{
"epoch": 0.8626044796848564,
"grad_norm": 3.413827294110899,
"learning_rate": 1.374381250631377e-06,
"loss": 0.363,
"step": 17080
},
{
"epoch": 0.8631095174364284,
"grad_norm": 4.110151494898355,
"learning_rate": 1.369330235377311e-06,
"loss": 0.3601,
"step": 17090
},
{
"epoch": 0.8636145551880003,
"grad_norm": 4.057869159667351,
"learning_rate": 1.364279220123245e-06,
"loss": 0.3524,
"step": 17100
},
{
"epoch": 0.8641195929395722,
"grad_norm": 3.345052422207178,
"learning_rate": 1.3592282048691787e-06,
"loss": 0.3464,
"step": 17110
},
{
"epoch": 0.8646246306911441,
"grad_norm": 17.60499598627659,
"learning_rate": 1.3541771896151126e-06,
"loss": 0.3457,
"step": 17120
},
{
"epoch": 0.8651296684427161,
"grad_norm": 3.1318074334341395,
"learning_rate": 1.3491261743610467e-06,
"loss": 0.3519,
"step": 17130
},
{
"epoch": 0.8656347061942881,
"grad_norm": 8.924077465063698,
"learning_rate": 1.3440751591069806e-06,
"loss": 0.3536,
"step": 17140
},
{
"epoch": 0.8661397439458599,
"grad_norm": 5.697646524473763,
"learning_rate": 1.3390241438529146e-06,
"loss": 0.3528,
"step": 17150
},
{
"epoch": 0.8666447816974319,
"grad_norm": 2.759497306215723,
"learning_rate": 1.3339731285988487e-06,
"loss": 0.3619,
"step": 17160
},
{
"epoch": 0.8671498194490038,
"grad_norm": 7.1692765874676905,
"learning_rate": 1.3289221133447824e-06,
"loss": 0.3719,
"step": 17170
},
{
"epoch": 0.8676548572005758,
"grad_norm": 7.494321248279156,
"learning_rate": 1.3238710980907163e-06,
"loss": 0.345,
"step": 17180
},
{
"epoch": 0.8681598949521476,
"grad_norm": 3.809737865656533,
"learning_rate": 1.3188200828366502e-06,
"loss": 0.3482,
"step": 17190
},
{
"epoch": 0.8686649327037196,
"grad_norm": 2.566570715695477,
"learning_rate": 1.3137690675825843e-06,
"loss": 0.3606,
"step": 17200
},
{
"epoch": 0.8691699704552915,
"grad_norm": 5.642929958428042,
"learning_rate": 1.308718052328518e-06,
"loss": 0.3563,
"step": 17210
},
{
"epoch": 0.8696750082068635,
"grad_norm": 7.32713592653821,
"learning_rate": 1.303667037074452e-06,
"loss": 0.3546,
"step": 17220
},
{
"epoch": 0.8701800459584353,
"grad_norm": 3.958850560726077,
"learning_rate": 1.298616021820386e-06,
"loss": 0.3455,
"step": 17230
},
{
"epoch": 0.8706850837100073,
"grad_norm": 6.811151879320974,
"learning_rate": 1.29356500656632e-06,
"loss": 0.3549,
"step": 17240
},
{
"epoch": 0.8711901214615793,
"grad_norm": 7.69433841766783,
"learning_rate": 1.2885139913122539e-06,
"loss": 0.3521,
"step": 17250
},
{
"epoch": 0.8716951592131512,
"grad_norm": 2.703944056955305,
"learning_rate": 1.283462976058188e-06,
"loss": 0.364,
"step": 17260
},
{
"epoch": 0.8722001969647231,
"grad_norm": 2.6933087195097296,
"learning_rate": 1.2784119608041217e-06,
"loss": 0.3611,
"step": 17270
},
{
"epoch": 0.872705234716295,
"grad_norm": 5.320181701389919,
"learning_rate": 1.2733609455500556e-06,
"loss": 0.3645,
"step": 17280
},
{
"epoch": 0.873210272467867,
"grad_norm": 3.361877630451869,
"learning_rate": 1.2683099302959895e-06,
"loss": 0.3519,
"step": 17290
},
{
"epoch": 0.8737153102194389,
"grad_norm": 2.951104064745123,
"learning_rate": 1.2632589150419237e-06,
"loss": 0.3587,
"step": 17300
},
{
"epoch": 0.8742203479710108,
"grad_norm": 6.829151598791138,
"learning_rate": 1.2582078997878574e-06,
"loss": 0.3449,
"step": 17310
},
{
"epoch": 0.8747253857225827,
"grad_norm": 2.350852229769939,
"learning_rate": 1.2531568845337913e-06,
"loss": 0.3478,
"step": 17320
},
{
"epoch": 0.8752304234741547,
"grad_norm": 3.348683817462015,
"learning_rate": 1.2481058692797254e-06,
"loss": 0.3542,
"step": 17330
},
{
"epoch": 0.8757354612257267,
"grad_norm": 13.330436598568463,
"learning_rate": 1.2430548540256593e-06,
"loss": 0.3606,
"step": 17340
},
{
"epoch": 0.8762404989772985,
"grad_norm": 2.9324631629214233,
"learning_rate": 1.2380038387715932e-06,
"loss": 0.3403,
"step": 17350
},
{
"epoch": 0.8767455367288705,
"grad_norm": 12.462910905007668,
"learning_rate": 1.2329528235175271e-06,
"loss": 0.36,
"step": 17360
},
{
"epoch": 0.8772505744804424,
"grad_norm": 6.774737202335634,
"learning_rate": 1.227901808263461e-06,
"loss": 0.3529,
"step": 17370
},
{
"epoch": 0.8777556122320144,
"grad_norm": 6.599803354341647,
"learning_rate": 1.222850793009395e-06,
"loss": 0.349,
"step": 17380
},
{
"epoch": 0.8782606499835863,
"grad_norm": 4.087712138749244,
"learning_rate": 1.2177997777553289e-06,
"loss": 0.3585,
"step": 17390
},
{
"epoch": 0.8787656877351582,
"grad_norm": 5.90307508489925,
"learning_rate": 1.2127487625012628e-06,
"loss": 0.3779,
"step": 17400
},
{
"epoch": 0.8792707254867301,
"grad_norm": 2.9671035952158404,
"learning_rate": 1.2076977472471967e-06,
"loss": 0.3581,
"step": 17410
},
{
"epoch": 0.8797757632383021,
"grad_norm": 3.797739053184357,
"learning_rate": 1.2026467319931308e-06,
"loss": 0.3523,
"step": 17420
},
{
"epoch": 0.880280800989874,
"grad_norm": 2.753560543414613,
"learning_rate": 1.1975957167390647e-06,
"loss": 0.3525,
"step": 17430
},
{
"epoch": 0.8807858387414459,
"grad_norm": 9.720571030998466,
"learning_rate": 1.1925447014849986e-06,
"loss": 0.3541,
"step": 17440
},
{
"epoch": 0.8812908764930178,
"grad_norm": 3.0251393418296226,
"learning_rate": 1.1874936862309326e-06,
"loss": 0.346,
"step": 17450
},
{
"epoch": 0.8817959142445898,
"grad_norm": 3.841487482571985,
"learning_rate": 1.1824426709768665e-06,
"loss": 0.3667,
"step": 17460
},
{
"epoch": 0.8823009519961618,
"grad_norm": 3.3891055128420655,
"learning_rate": 1.1773916557228004e-06,
"loss": 0.34,
"step": 17470
},
{
"epoch": 0.8828059897477336,
"grad_norm": 6.131210762114432,
"learning_rate": 1.1723406404687343e-06,
"loss": 0.3631,
"step": 17480
},
{
"epoch": 0.8833110274993056,
"grad_norm": 3.2369764099154117,
"learning_rate": 1.1672896252146682e-06,
"loss": 0.3588,
"step": 17490
},
{
"epoch": 0.8838160652508775,
"grad_norm": 5.076564547768568,
"learning_rate": 1.1622386099606021e-06,
"loss": 0.3556,
"step": 17500
},
{
"epoch": 0.8843211030024495,
"grad_norm": 4.679877306058496,
"learning_rate": 1.1571875947065362e-06,
"loss": 0.344,
"step": 17510
},
{
"epoch": 0.8848261407540213,
"grad_norm": 11.330975476714967,
"learning_rate": 1.15213657945247e-06,
"loss": 0.3602,
"step": 17520
},
{
"epoch": 0.8853311785055933,
"grad_norm": 3.0144585760929576,
"learning_rate": 1.147085564198404e-06,
"loss": 0.3489,
"step": 17530
},
{
"epoch": 0.8858362162571652,
"grad_norm": 6.287423740993079,
"learning_rate": 1.142034548944338e-06,
"loss": 0.3558,
"step": 17540
},
{
"epoch": 0.8863412540087372,
"grad_norm": 3.489576000710248,
"learning_rate": 1.1369835336902719e-06,
"loss": 0.3531,
"step": 17550
},
{
"epoch": 0.886846291760309,
"grad_norm": 3.733608437865078,
"learning_rate": 1.1319325184362058e-06,
"loss": 0.375,
"step": 17560
},
{
"epoch": 0.887351329511881,
"grad_norm": 5.217817871919478,
"learning_rate": 1.1268815031821397e-06,
"loss": 0.3542,
"step": 17570
},
{
"epoch": 0.887856367263453,
"grad_norm": 2.799146326616282,
"learning_rate": 1.1218304879280736e-06,
"loss": 0.3483,
"step": 17580
},
{
"epoch": 0.8883614050150249,
"grad_norm": 2.6219946573680275,
"learning_rate": 1.1167794726740075e-06,
"loss": 0.3553,
"step": 17590
},
{
"epoch": 0.8888664427665968,
"grad_norm": 3.3194236691159764,
"learning_rate": 1.1117284574199414e-06,
"loss": 0.3714,
"step": 17600
},
{
"epoch": 0.8893714805181687,
"grad_norm": 7.465096104263228,
"learning_rate": 1.1066774421658756e-06,
"loss": 0.358,
"step": 17610
},
{
"epoch": 0.8898765182697407,
"grad_norm": 7.584088341048675,
"learning_rate": 1.1016264269118093e-06,
"loss": 0.3474,
"step": 17620
},
{
"epoch": 0.8903815560213126,
"grad_norm": 4.658333080329891,
"learning_rate": 1.0965754116577434e-06,
"loss": 0.3491,
"step": 17630
},
{
"epoch": 0.8908865937728845,
"grad_norm": 3.8001771174540377,
"learning_rate": 1.091524396403677e-06,
"loss": 0.354,
"step": 17640
},
{
"epoch": 0.8913916315244564,
"grad_norm": 2.5239942090163834,
"learning_rate": 1.0864733811496112e-06,
"loss": 0.3526,
"step": 17650
},
{
"epoch": 0.8918966692760284,
"grad_norm": 3.192080136278505,
"learning_rate": 1.0814223658955451e-06,
"loss": 0.3674,
"step": 17660
},
{
"epoch": 0.8924017070276004,
"grad_norm": 5.19946883439612,
"learning_rate": 1.076371350641479e-06,
"loss": 0.3575,
"step": 17670
},
{
"epoch": 0.8929067447791722,
"grad_norm": 4.214472251420109,
"learning_rate": 1.071320335387413e-06,
"loss": 0.3389,
"step": 17680
},
{
"epoch": 0.8934117825307442,
"grad_norm": 10.692110937064982,
"learning_rate": 1.0662693201333469e-06,
"loss": 0.3731,
"step": 17690
},
{
"epoch": 0.8939168202823161,
"grad_norm": 6.139646910075302,
"learning_rate": 1.0612183048792808e-06,
"loss": 0.3472,
"step": 17700
},
{
"epoch": 0.8944218580338881,
"grad_norm": 3.5048760100552157,
"learning_rate": 1.056167289625215e-06,
"loss": 0.3479,
"step": 17710
},
{
"epoch": 0.8949268957854599,
"grad_norm": 3.4931757105634254,
"learning_rate": 1.0511162743711486e-06,
"loss": 0.3546,
"step": 17720
},
{
"epoch": 0.8954319335370319,
"grad_norm": 7.742107901473751,
"learning_rate": 1.0460652591170827e-06,
"loss": 0.3501,
"step": 17730
},
{
"epoch": 0.8959369712886038,
"grad_norm": 2.8474052373770107,
"learning_rate": 1.0410142438630164e-06,
"loss": 0.3509,
"step": 17740
},
{
"epoch": 0.8964420090401758,
"grad_norm": 3.2173387588789186,
"learning_rate": 1.0359632286089505e-06,
"loss": 0.3513,
"step": 17750
},
{
"epoch": 0.8969470467917476,
"grad_norm": 6.294405766765221,
"learning_rate": 1.0309122133548845e-06,
"loss": 0.3566,
"step": 17760
},
{
"epoch": 0.8974520845433196,
"grad_norm": 2.4812196184280344,
"learning_rate": 1.0258611981008184e-06,
"loss": 0.3517,
"step": 17770
},
{
"epoch": 0.8979571222948916,
"grad_norm": 34.26237777325275,
"learning_rate": 1.0208101828467523e-06,
"loss": 0.3448,
"step": 17780
},
{
"epoch": 0.8984621600464635,
"grad_norm": 62.559391948683924,
"learning_rate": 1.0157591675926862e-06,
"loss": 0.361,
"step": 17790
},
{
"epoch": 0.8989671977980354,
"grad_norm": 3.0126489507596106,
"learning_rate": 1.01070815233862e-06,
"loss": 0.3594,
"step": 17800
},
{
"epoch": 0.8994722355496073,
"grad_norm": 4.4144846462481935,
"learning_rate": 1.0056571370845542e-06,
"loss": 0.3569,
"step": 17810
},
{
"epoch": 0.8999772733011793,
"grad_norm": 6.230988179874581,
"learning_rate": 1.000606121830488e-06,
"loss": 0.3383,
"step": 17820
},
{
"epoch": 0.9004823110527512,
"grad_norm": 2.294744478892904,
"learning_rate": 9.95555106576422e-07,
"loss": 0.3443,
"step": 17830
},
{
"epoch": 0.9009873488043231,
"grad_norm": 5.478102588014345,
"learning_rate": 9.905040913223558e-07,
"loss": 0.3469,
"step": 17840
},
{
"epoch": 0.901492386555895,
"grad_norm": 2.756012421267679,
"learning_rate": 9.854530760682899e-07,
"loss": 0.3629,
"step": 17850
},
{
"epoch": 0.901997424307467,
"grad_norm": 2.9051333748854504,
"learning_rate": 9.804020608142236e-07,
"loss": 0.3493,
"step": 17860
},
{
"epoch": 0.902502462059039,
"grad_norm": 2.942294785390223,
"learning_rate": 9.753510455601577e-07,
"loss": 0.3497,
"step": 17870
},
{
"epoch": 0.9030074998106108,
"grad_norm": 3.2373052706846135,
"learning_rate": 9.703000303060916e-07,
"loss": 0.3423,
"step": 17880
},
{
"epoch": 0.9035125375621827,
"grad_norm": 3.2500158251075266,
"learning_rate": 9.652490150520255e-07,
"loss": 0.3539,
"step": 17890
},
{
"epoch": 0.9040175753137547,
"grad_norm": 3.5580015551516104,
"learning_rate": 9.601979997979594e-07,
"loss": 0.3332,
"step": 17900
},
{
"epoch": 0.9045226130653267,
"grad_norm": 2.9893729880240203,
"learning_rate": 9.551469845438933e-07,
"loss": 0.3421,
"step": 17910
},
{
"epoch": 0.9050276508168985,
"grad_norm": 3.1028575176827182,
"learning_rate": 9.500959692898274e-07,
"loss": 0.3408,
"step": 17920
},
{
"epoch": 0.9055326885684705,
"grad_norm": 3.018466429936209,
"learning_rate": 9.450449540357613e-07,
"loss": 0.3444,
"step": 17930
},
{
"epoch": 0.9060377263200424,
"grad_norm": 5.330235007532234,
"learning_rate": 9.399939387816952e-07,
"loss": 0.3635,
"step": 17940
},
{
"epoch": 0.9065427640716144,
"grad_norm": 8.530098317004866,
"learning_rate": 9.349429235276292e-07,
"loss": 0.3499,
"step": 17950
},
{
"epoch": 0.9070478018231862,
"grad_norm": 5.312690518311376,
"learning_rate": 9.29891908273563e-07,
"loss": 0.3542,
"step": 17960
},
{
"epoch": 0.9075528395747582,
"grad_norm": 4.197543188110089,
"learning_rate": 9.24840893019497e-07,
"loss": 0.3545,
"step": 17970
},
{
"epoch": 0.9080578773263301,
"grad_norm": 8.064198853556876,
"learning_rate": 9.197898777654309e-07,
"loss": 0.3642,
"step": 17980
},
{
"epoch": 0.9085629150779021,
"grad_norm": 2.6318426760910656,
"learning_rate": 9.147388625113649e-07,
"loss": 0.3671,
"step": 17990
},
{
"epoch": 0.9090679528294741,
"grad_norm": 5.88110776094459,
"learning_rate": 9.096878472572989e-07,
"loss": 0.3278,
"step": 18000
},
{
"epoch": 0.9095729905810459,
"grad_norm": 7.327649689814793,
"learning_rate": 9.046368320032327e-07,
"loss": 0.3612,
"step": 18010
},
{
"epoch": 0.9100780283326179,
"grad_norm": 5.1865153794476235,
"learning_rate": 8.995858167491667e-07,
"loss": 0.3382,
"step": 18020
},
{
"epoch": 0.9105830660841898,
"grad_norm": 3.0447013025219696,
"learning_rate": 8.945348014951005e-07,
"loss": 0.3543,
"step": 18030
},
{
"epoch": 0.9110881038357618,
"grad_norm": 3.6031825002335753,
"learning_rate": 8.894837862410345e-07,
"loss": 0.3447,
"step": 18040
},
{
"epoch": 0.9115931415873336,
"grad_norm": 3.724192694212046,
"learning_rate": 8.844327709869685e-07,
"loss": 0.3435,
"step": 18050
},
{
"epoch": 0.9120981793389056,
"grad_norm": 3.9945852787473015,
"learning_rate": 8.793817557329023e-07,
"loss": 0.3618,
"step": 18060
},
{
"epoch": 0.9126032170904775,
"grad_norm": 4.943722371461341,
"learning_rate": 8.743307404788364e-07,
"loss": 0.3546,
"step": 18070
},
{
"epoch": 0.9131082548420495,
"grad_norm": 3.3003892909027313,
"learning_rate": 8.692797252247702e-07,
"loss": 0.3341,
"step": 18080
},
{
"epoch": 0.9136132925936213,
"grad_norm": 2.5050619983833826,
"learning_rate": 8.642287099707042e-07,
"loss": 0.3434,
"step": 18090
},
{
"epoch": 0.9141183303451933,
"grad_norm": 3.7372168096985408,
"learning_rate": 8.591776947166382e-07,
"loss": 0.3456,
"step": 18100
},
{
"epoch": 0.9146233680967653,
"grad_norm": 2.587657901087928,
"learning_rate": 8.54126679462572e-07,
"loss": 0.3442,
"step": 18110
},
{
"epoch": 0.9151284058483372,
"grad_norm": 2.9656578110467207,
"learning_rate": 8.49075664208506e-07,
"loss": 0.3588,
"step": 18120
},
{
"epoch": 0.9156334435999091,
"grad_norm": 3.3573402419593186,
"learning_rate": 8.440246489544398e-07,
"loss": 0.3277,
"step": 18130
},
{
"epoch": 0.916138481351481,
"grad_norm": 2.7211678821317205,
"learning_rate": 8.389736337003738e-07,
"loss": 0.3531,
"step": 18140
},
{
"epoch": 0.916643519103053,
"grad_norm": 5.733816461438319,
"learning_rate": 8.339226184463079e-07,
"loss": 0.3554,
"step": 18150
},
{
"epoch": 0.9171485568546249,
"grad_norm": 2.9296826900284536,
"learning_rate": 8.288716031922417e-07,
"loss": 0.3484,
"step": 18160
},
{
"epoch": 0.9176535946061968,
"grad_norm": 2.660140218702211,
"learning_rate": 8.238205879381757e-07,
"loss": 0.3456,
"step": 18170
},
{
"epoch": 0.9181586323577687,
"grad_norm": 2.5446985764311996,
"learning_rate": 8.187695726841095e-07,
"loss": 0.3609,
"step": 18180
},
{
"epoch": 0.9186636701093407,
"grad_norm": 4.659560104675861,
"learning_rate": 8.137185574300435e-07,
"loss": 0.3438,
"step": 18190
},
{
"epoch": 0.9191687078609126,
"grad_norm": 2.727989773393059,
"learning_rate": 8.086675421759773e-07,
"loss": 0.3573,
"step": 18200
},
{
"epoch": 0.9196737456124845,
"grad_norm": 2.415446839777949,
"learning_rate": 8.036165269219113e-07,
"loss": 0.353,
"step": 18210
},
{
"epoch": 0.9201787833640565,
"grad_norm": 4.842714491783439,
"learning_rate": 7.985655116678454e-07,
"loss": 0.3616,
"step": 18220
},
{
"epoch": 0.9206838211156284,
"grad_norm": 4.320254893741863,
"learning_rate": 7.935144964137792e-07,
"loss": 0.3441,
"step": 18230
},
{
"epoch": 0.9211888588672004,
"grad_norm": 7.676940904287274,
"learning_rate": 7.884634811597132e-07,
"loss": 0.3469,
"step": 18240
},
{
"epoch": 0.9216938966187722,
"grad_norm": 4.236663818861765,
"learning_rate": 7.834124659056471e-07,
"loss": 0.3415,
"step": 18250
},
{
"epoch": 0.9221989343703442,
"grad_norm": 2.5909282600390977,
"learning_rate": 7.78361450651581e-07,
"loss": 0.3735,
"step": 18260
},
{
"epoch": 0.9227039721219161,
"grad_norm": 3.095824958631971,
"learning_rate": 7.73310435397515e-07,
"loss": 0.3448,
"step": 18270
},
{
"epoch": 0.9232090098734881,
"grad_norm": 2.915058703898021,
"learning_rate": 7.682594201434488e-07,
"loss": 0.3605,
"step": 18280
},
{
"epoch": 0.9237140476250599,
"grad_norm": 4.0628438496453985,
"learning_rate": 7.632084048893828e-07,
"loss": 0.3522,
"step": 18290
},
{
"epoch": 0.9242190853766319,
"grad_norm": 5.339121668100427,
"learning_rate": 7.581573896353168e-07,
"loss": 0.348,
"step": 18300
},
{
"epoch": 0.9247241231282038,
"grad_norm": 2.9403014748581366,
"learning_rate": 7.531063743812507e-07,
"loss": 0.3436,
"step": 18310
},
{
"epoch": 0.9252291608797758,
"grad_norm": 4.4297759422952065,
"learning_rate": 7.480553591271847e-07,
"loss": 0.3688,
"step": 18320
},
{
"epoch": 0.9257341986313476,
"grad_norm": 8.718344498904465,
"learning_rate": 7.430043438731185e-07,
"loss": 0.3493,
"step": 18330
},
{
"epoch": 0.9262392363829196,
"grad_norm": 17.16627380229125,
"learning_rate": 7.379533286190525e-07,
"loss": 0.3635,
"step": 18340
},
{
"epoch": 0.9267442741344916,
"grad_norm": 6.9347616228201785,
"learning_rate": 7.329023133649864e-07,
"loss": 0.3443,
"step": 18350
},
{
"epoch": 0.9272493118860635,
"grad_norm": 4.584801866934316,
"learning_rate": 7.278512981109203e-07,
"loss": 0.3381,
"step": 18360
},
{
"epoch": 0.9277543496376354,
"grad_norm": 6.526194404732607,
"learning_rate": 7.228002828568542e-07,
"loss": 0.3795,
"step": 18370
},
{
"epoch": 0.9282593873892073,
"grad_norm": 2.8075224191637536,
"learning_rate": 7.177492676027882e-07,
"loss": 0.3473,
"step": 18380
},
{
"epoch": 0.9287644251407793,
"grad_norm": 8.151912813354414,
"learning_rate": 7.126982523487222e-07,
"loss": 0.3358,
"step": 18390
},
{
"epoch": 0.9292694628923512,
"grad_norm": 4.7898553816163245,
"learning_rate": 7.076472370946561e-07,
"loss": 0.3662,
"step": 18400
},
{
"epoch": 0.9297745006439231,
"grad_norm": 4.443345676540152,
"learning_rate": 7.0259622184059e-07,
"loss": 0.3576,
"step": 18410
},
{
"epoch": 0.930279538395495,
"grad_norm": 2.1359533523104464,
"learning_rate": 6.975452065865239e-07,
"loss": 0.3469,
"step": 18420
},
{
"epoch": 0.930784576147067,
"grad_norm": 37.17325982324839,
"learning_rate": 6.924941913324579e-07,
"loss": 0.3581,
"step": 18430
},
{
"epoch": 0.931289613898639,
"grad_norm": 9.673960641714096,
"learning_rate": 6.874431760783918e-07,
"loss": 0.3522,
"step": 18440
},
{
"epoch": 0.9317946516502108,
"grad_norm": 3.176528472152659,
"learning_rate": 6.823921608243258e-07,
"loss": 0.3464,
"step": 18450
},
{
"epoch": 0.9322996894017828,
"grad_norm": 6.300715383126615,
"learning_rate": 6.773411455702597e-07,
"loss": 0.3497,
"step": 18460
},
{
"epoch": 0.9328047271533547,
"grad_norm": 8.137473676823792,
"learning_rate": 6.722901303161936e-07,
"loss": 0.3542,
"step": 18470
},
{
"epoch": 0.9333097649049267,
"grad_norm": 3.7878098971135756,
"learning_rate": 6.672391150621276e-07,
"loss": 0.331,
"step": 18480
},
{
"epoch": 0.9338148026564985,
"grad_norm": 3.4537119018992883,
"learning_rate": 6.621880998080615e-07,
"loss": 0.349,
"step": 18490
},
{
"epoch": 0.9343198404080705,
"grad_norm": 5.7332136754175425,
"learning_rate": 6.571370845539954e-07,
"loss": 0.3686,
"step": 18500
},
{
"epoch": 0.9348248781596424,
"grad_norm": 2.40088922999737,
"learning_rate": 6.520860692999293e-07,
"loss": 0.3283,
"step": 18510
},
{
"epoch": 0.9353299159112144,
"grad_norm": 3.363962036008761,
"learning_rate": 6.470350540458632e-07,
"loss": 0.3438,
"step": 18520
},
{
"epoch": 0.9358349536627862,
"grad_norm": 4.168660835516454,
"learning_rate": 6.419840387917973e-07,
"loss": 0.3454,
"step": 18530
},
{
"epoch": 0.9363399914143582,
"grad_norm": 31.57273809211181,
"learning_rate": 6.369330235377312e-07,
"loss": 0.3525,
"step": 18540
},
{
"epoch": 0.9368450291659302,
"grad_norm": 2.6132048866458453,
"learning_rate": 6.318820082836651e-07,
"loss": 0.3445,
"step": 18550
},
{
"epoch": 0.9373500669175021,
"grad_norm": 3.121982815268045,
"learning_rate": 6.26830993029599e-07,
"loss": 0.3548,
"step": 18560
},
{
"epoch": 0.937855104669074,
"grad_norm": 3.878175783357159,
"learning_rate": 6.21779977775533e-07,
"loss": 0.3551,
"step": 18570
},
{
"epoch": 0.9383601424206459,
"grad_norm": 4.9399268768023035,
"learning_rate": 6.167289625214669e-07,
"loss": 0.3406,
"step": 18580
},
{
"epoch": 0.9388651801722179,
"grad_norm": 4.102118329658351,
"learning_rate": 6.116779472674008e-07,
"loss": 0.3415,
"step": 18590
},
{
"epoch": 0.9393702179237898,
"grad_norm": 7.576480207566005,
"learning_rate": 6.066269320133347e-07,
"loss": 0.3474,
"step": 18600
},
{
"epoch": 0.9398752556753618,
"grad_norm": 5.205648741417787,
"learning_rate": 6.015759167592687e-07,
"loss": 0.3701,
"step": 18610
},
{
"epoch": 0.9403802934269336,
"grad_norm": 2.574703645382877,
"learning_rate": 5.965249015052026e-07,
"loss": 0.3711,
"step": 18620
},
{
"epoch": 0.9408853311785056,
"grad_norm": 2.1710201302013608,
"learning_rate": 5.914738862511366e-07,
"loss": 0.3474,
"step": 18630
},
{
"epoch": 0.9413903689300775,
"grad_norm": 4.238488989950128,
"learning_rate": 5.864228709970705e-07,
"loss": 0.3458,
"step": 18640
},
{
"epoch": 0.9418954066816495,
"grad_norm": 3.9298261035180815,
"learning_rate": 5.813718557430044e-07,
"loss": 0.3517,
"step": 18650
},
{
"epoch": 0.9424004444332214,
"grad_norm": 4.183286579494331,
"learning_rate": 5.763208404889383e-07,
"loss": 0.3535,
"step": 18660
},
{
"epoch": 0.9429054821847933,
"grad_norm": 3.787432945522304,
"learning_rate": 5.712698252348722e-07,
"loss": 0.3438,
"step": 18670
},
{
"epoch": 0.9434105199363653,
"grad_norm": 42.44616395667764,
"learning_rate": 5.662188099808063e-07,
"loss": 0.3487,
"step": 18680
},
{
"epoch": 0.9439155576879372,
"grad_norm": 2.467350994396614,
"learning_rate": 5.611677947267402e-07,
"loss": 0.3495,
"step": 18690
},
{
"epoch": 0.9444205954395091,
"grad_norm": 3.4763055801021197,
"learning_rate": 5.561167794726741e-07,
"loss": 0.3471,
"step": 18700
},
{
"epoch": 0.944925633191081,
"grad_norm": 2.5579533898925604,
"learning_rate": 5.51065764218608e-07,
"loss": 0.3534,
"step": 18710
},
{
"epoch": 0.945430670942653,
"grad_norm": 4.548010779885983,
"learning_rate": 5.460147489645419e-07,
"loss": 0.3471,
"step": 18720
},
{
"epoch": 0.9459357086942249,
"grad_norm": 6.469724337112102,
"learning_rate": 5.409637337104758e-07,
"loss": 0.3519,
"step": 18730
},
{
"epoch": 0.9464407464457968,
"grad_norm": 3.2909861731073007,
"learning_rate": 5.359127184564098e-07,
"loss": 0.3455,
"step": 18740
},
{
"epoch": 0.9469457841973687,
"grad_norm": 3.9954669422093674,
"learning_rate": 5.308617032023437e-07,
"loss": 0.3484,
"step": 18750
},
{
"epoch": 0.9474508219489407,
"grad_norm": 4.918272865216922,
"learning_rate": 5.258106879482777e-07,
"loss": 0.3543,
"step": 18760
},
{
"epoch": 0.9479558597005127,
"grad_norm": 7.001698550237223,
"learning_rate": 5.207596726942116e-07,
"loss": 0.35,
"step": 18770
},
{
"epoch": 0.9484608974520845,
"grad_norm": 5.052929072371654,
"learning_rate": 5.157086574401455e-07,
"loss": 0.3442,
"step": 18780
},
{
"epoch": 0.9489659352036565,
"grad_norm": 4.671072953558266,
"learning_rate": 5.106576421860794e-07,
"loss": 0.3413,
"step": 18790
},
{
"epoch": 0.9494709729552284,
"grad_norm": 2.2274459931275943,
"learning_rate": 5.056066269320134e-07,
"loss": 0.3579,
"step": 18800
},
{
"epoch": 0.9499760107068004,
"grad_norm": 3.679235828488556,
"learning_rate": 5.005556116779473e-07,
"loss": 0.3482,
"step": 18810
},
{
"epoch": 0.9504810484583722,
"grad_norm": 5.113998461225806,
"learning_rate": 4.955045964238812e-07,
"loss": 0.3492,
"step": 18820
},
{
"epoch": 0.9509860862099442,
"grad_norm": 2.313685561005164,
"learning_rate": 4.904535811698151e-07,
"loss": 0.3498,
"step": 18830
},
{
"epoch": 0.9514911239615161,
"grad_norm": 4.548823482232416,
"learning_rate": 4.854025659157491e-07,
"loss": 0.3381,
"step": 18840
},
{
"epoch": 0.9519961617130881,
"grad_norm": 5.44596462224921,
"learning_rate": 4.803515506616831e-07,
"loss": 0.3499,
"step": 18850
},
{
"epoch": 0.9525011994646599,
"grad_norm": 5.653637436781184,
"learning_rate": 4.75300535407617e-07,
"loss": 0.3606,
"step": 18860
},
{
"epoch": 0.9530062372162319,
"grad_norm": 17.20756057482154,
"learning_rate": 4.702495201535509e-07,
"loss": 0.3513,
"step": 18870
},
{
"epoch": 0.9535112749678039,
"grad_norm": 2.6415906411811094,
"learning_rate": 4.651985048994848e-07,
"loss": 0.3584,
"step": 18880
},
{
"epoch": 0.9540163127193758,
"grad_norm": 3.3045378472333926,
"learning_rate": 4.601474896454188e-07,
"loss": 0.3672,
"step": 18890
},
{
"epoch": 0.9545213504709477,
"grad_norm": 2.494761338609364,
"learning_rate": 4.550964743913527e-07,
"loss": 0.3479,
"step": 18900
},
{
"epoch": 0.9550263882225196,
"grad_norm": 3.7846428264832346,
"learning_rate": 4.5004545913728665e-07,
"loss": 0.3594,
"step": 18910
},
{
"epoch": 0.9555314259740916,
"grad_norm": 11.41402543242729,
"learning_rate": 4.4499444388322056e-07,
"loss": 0.3391,
"step": 18920
},
{
"epoch": 0.9560364637256635,
"grad_norm": 23.109764846483394,
"learning_rate": 4.399434286291545e-07,
"loss": 0.3657,
"step": 18930
},
{
"epoch": 0.9565415014772354,
"grad_norm": 2.8683015901455313,
"learning_rate": 4.3489241337508844e-07,
"loss": 0.365,
"step": 18940
},
{
"epoch": 0.9570465392288073,
"grad_norm": 3.310696390265973,
"learning_rate": 4.2984139812102235e-07,
"loss": 0.3692,
"step": 18950
},
{
"epoch": 0.9575515769803793,
"grad_norm": 3.223732467814084,
"learning_rate": 4.247903828669563e-07,
"loss": 0.3268,
"step": 18960
},
{
"epoch": 0.9580566147319513,
"grad_norm": 3.067086167535032,
"learning_rate": 4.1973936761289023e-07,
"loss": 0.353,
"step": 18970
},
{
"epoch": 0.9585616524835231,
"grad_norm": 4.735815421841188,
"learning_rate": 4.1468835235882414e-07,
"loss": 0.3444,
"step": 18980
},
{
"epoch": 0.959066690235095,
"grad_norm": 3.425925550819413,
"learning_rate": 4.096373371047581e-07,
"loss": 0.3385,
"step": 18990
},
{
"epoch": 0.959571727986667,
"grad_norm": 4.169456686292674,
"learning_rate": 4.04586321850692e-07,
"loss": 0.3635,
"step": 19000
},
{
"epoch": 0.960076765738239,
"grad_norm": 2.7883327435971967,
"learning_rate": 3.9953530659662593e-07,
"loss": 0.3496,
"step": 19010
},
{
"epoch": 0.9605818034898108,
"grad_norm": 6.631719541255136,
"learning_rate": 3.944842913425599e-07,
"loss": 0.3536,
"step": 19020
},
{
"epoch": 0.9610868412413828,
"grad_norm": 6.199402518918171,
"learning_rate": 3.8943327608849386e-07,
"loss": 0.3492,
"step": 19030
},
{
"epoch": 0.9615918789929547,
"grad_norm": 3.832122639682839,
"learning_rate": 3.8438226083442777e-07,
"loss": 0.3559,
"step": 19040
},
{
"epoch": 0.9620969167445267,
"grad_norm": 5.054933419698706,
"learning_rate": 3.793312455803617e-07,
"loss": 0.3396,
"step": 19050
},
{
"epoch": 0.9626019544960985,
"grad_norm": 4.931557429835319,
"learning_rate": 3.742802303262956e-07,
"loss": 0.361,
"step": 19060
},
{
"epoch": 0.9631069922476705,
"grad_norm": 2.8243354412545663,
"learning_rate": 3.692292150722295e-07,
"loss": 0.3519,
"step": 19070
},
{
"epoch": 0.9636120299992424,
"grad_norm": 2.9047769826763843,
"learning_rate": 3.641781998181635e-07,
"loss": 0.352,
"step": 19080
},
{
"epoch": 0.9641170677508144,
"grad_norm": 3.5750399190488586,
"learning_rate": 3.5912718456409744e-07,
"loss": 0.3515,
"step": 19090
},
{
"epoch": 0.9646221055023863,
"grad_norm": 4.083793717766302,
"learning_rate": 3.5407616931003135e-07,
"loss": 0.3364,
"step": 19100
},
{
"epoch": 0.9651271432539582,
"grad_norm": 2.237959887146547,
"learning_rate": 3.4902515405596526e-07,
"loss": 0.3451,
"step": 19110
},
{
"epoch": 0.9656321810055302,
"grad_norm": 3.1768900583422597,
"learning_rate": 3.4397413880189917e-07,
"loss": 0.3468,
"step": 19120
},
{
"epoch": 0.9661372187571021,
"grad_norm": 4.21068826694376,
"learning_rate": 3.389231235478332e-07,
"loss": 0.3412,
"step": 19130
},
{
"epoch": 0.966642256508674,
"grad_norm": 5.657422019205803,
"learning_rate": 3.338721082937671e-07,
"loss": 0.3439,
"step": 19140
},
{
"epoch": 0.9671472942602459,
"grad_norm": 6.306482550657807,
"learning_rate": 3.28821093039701e-07,
"loss": 0.3668,
"step": 19150
},
{
"epoch": 0.9676523320118179,
"grad_norm": 3.949528139283546,
"learning_rate": 3.237700777856349e-07,
"loss": 0.3521,
"step": 19160
},
{
"epoch": 0.9681573697633898,
"grad_norm": 3.8577102555565457,
"learning_rate": 3.1871906253156884e-07,
"loss": 0.3432,
"step": 19170
},
{
"epoch": 0.9686624075149618,
"grad_norm": 10.01075740910154,
"learning_rate": 3.1366804727750275e-07,
"loss": 0.349,
"step": 19180
},
{
"epoch": 0.9691674452665336,
"grad_norm": 5.366835858110643,
"learning_rate": 3.086170320234367e-07,
"loss": 0.3347,
"step": 19190
},
{
"epoch": 0.9696724830181056,
"grad_norm": 3.0600066927692553,
"learning_rate": 3.035660167693707e-07,
"loss": 0.3484,
"step": 19200
},
{
"epoch": 0.9701775207696776,
"grad_norm": 3.3199457968898933,
"learning_rate": 2.985150015153046e-07,
"loss": 0.3662,
"step": 19210
},
{
"epoch": 0.9706825585212495,
"grad_norm": 8.305484409899465,
"learning_rate": 2.934639862612385e-07,
"loss": 0.3401,
"step": 19220
},
{
"epoch": 0.9711875962728214,
"grad_norm": 4.5596702016391575,
"learning_rate": 2.8841297100717247e-07,
"loss": 0.3453,
"step": 19230
},
{
"epoch": 0.9716926340243933,
"grad_norm": 3.1404063614755056,
"learning_rate": 2.833619557531064e-07,
"loss": 0.3568,
"step": 19240
},
{
"epoch": 0.9721976717759653,
"grad_norm": 2.1279962723109542,
"learning_rate": 2.7831094049904034e-07,
"loss": 0.3652,
"step": 19250
},
{
"epoch": 0.9727027095275372,
"grad_norm": 8.103709676163785,
"learning_rate": 2.7325992524497426e-07,
"loss": 0.3599,
"step": 19260
},
{
"epoch": 0.9732077472791091,
"grad_norm": 3.9079606526402695,
"learning_rate": 2.6820890999090817e-07,
"loss": 0.3332,
"step": 19270
},
{
"epoch": 0.973712785030681,
"grad_norm": 2.834072428824953,
"learning_rate": 2.6315789473684213e-07,
"loss": 0.3587,
"step": 19280
},
{
"epoch": 0.974217822782253,
"grad_norm": 3.331437469521485,
"learning_rate": 2.5810687948277604e-07,
"loss": 0.3532,
"step": 19290
},
{
"epoch": 0.974722860533825,
"grad_norm": 4.5771090753301955,
"learning_rate": 2.5305586422871e-07,
"loss": 0.3619,
"step": 19300
},
{
"epoch": 0.9752278982853968,
"grad_norm": 2.1169984635243897,
"learning_rate": 2.480048489746439e-07,
"loss": 0.3338,
"step": 19310
},
{
"epoch": 0.9757329360369688,
"grad_norm": 2.4727042377727075,
"learning_rate": 2.429538337205779e-07,
"loss": 0.348,
"step": 19320
},
{
"epoch": 0.9762379737885407,
"grad_norm": 4.382104299041976,
"learning_rate": 2.3790281846651177e-07,
"loss": 0.3608,
"step": 19330
},
{
"epoch": 0.9767430115401127,
"grad_norm": 24.576345877505307,
"learning_rate": 2.3285180321244574e-07,
"loss": 0.3344,
"step": 19340
},
{
"epoch": 0.9772480492916845,
"grad_norm": 5.068003336801587,
"learning_rate": 2.2780078795837965e-07,
"loss": 0.34,
"step": 19350
},
{
"epoch": 0.9777530870432565,
"grad_norm": 5.036323992558192,
"learning_rate": 2.2274977270431359e-07,
"loss": 0.3534,
"step": 19360
},
{
"epoch": 0.9782581247948284,
"grad_norm": 8.0901350202218,
"learning_rate": 2.1769875745024752e-07,
"loss": 0.3563,
"step": 19370
},
{
"epoch": 0.9787631625464004,
"grad_norm": 3.2338907871203575,
"learning_rate": 2.1264774219618146e-07,
"loss": 0.342,
"step": 19380
},
{
"epoch": 0.9792682002979722,
"grad_norm": 7.152607151035173,
"learning_rate": 2.0759672694211537e-07,
"loss": 0.3585,
"step": 19390
},
{
"epoch": 0.9797732380495442,
"grad_norm": 2.8408399191335234,
"learning_rate": 2.025457116880493e-07,
"loss": 0.3408,
"step": 19400
},
{
"epoch": 0.9802782758011162,
"grad_norm": 3.335420575405322,
"learning_rate": 1.9749469643398325e-07,
"loss": 0.3626,
"step": 19410
},
{
"epoch": 0.9807833135526881,
"grad_norm": 4.427107484896195,
"learning_rate": 1.9244368117991716e-07,
"loss": 0.3385,
"step": 19420
},
{
"epoch": 0.98128835130426,
"grad_norm": 5.431153322507852,
"learning_rate": 1.8739266592585113e-07,
"loss": 0.3513,
"step": 19430
},
{
"epoch": 0.9817933890558319,
"grad_norm": 3.813640172056062,
"learning_rate": 1.8234165067178504e-07,
"loss": 0.3393,
"step": 19440
},
{
"epoch": 0.9822984268074039,
"grad_norm": 3.898589096762485,
"learning_rate": 1.7729063541771895e-07,
"loss": 0.3405,
"step": 19450
},
{
"epoch": 0.9828034645589758,
"grad_norm": 3.037916644567326,
"learning_rate": 1.7223962016365292e-07,
"loss": 0.3519,
"step": 19460
},
{
"epoch": 0.9833085023105477,
"grad_norm": 3.190273896432863,
"learning_rate": 1.6718860490958683e-07,
"loss": 0.3533,
"step": 19470
},
{
"epoch": 0.9838135400621196,
"grad_norm": 5.858585403848123,
"learning_rate": 1.621375896555208e-07,
"loss": 0.3443,
"step": 19480
},
{
"epoch": 0.9843185778136916,
"grad_norm": 3.441915956155067,
"learning_rate": 1.570865744014547e-07,
"loss": 0.3554,
"step": 19490
},
{
"epoch": 0.9848236155652635,
"grad_norm": 3.7424368716852348,
"learning_rate": 1.5203555914738864e-07,
"loss": 0.3587,
"step": 19500
},
{
"epoch": 0.9853286533168354,
"grad_norm": 7.071299158156384,
"learning_rate": 1.4698454389332256e-07,
"loss": 0.3621,
"step": 19510
},
{
"epoch": 0.9858336910684073,
"grad_norm": 3.8198683754171587,
"learning_rate": 1.419335286392565e-07,
"loss": 0.3724,
"step": 19520
},
{
"epoch": 0.9863387288199793,
"grad_norm": 2.731071788924555,
"learning_rate": 1.3688251338519043e-07,
"loss": 0.3416,
"step": 19530
},
{
"epoch": 0.9868437665715513,
"grad_norm": 3.7212383700962652,
"learning_rate": 1.3183149813112437e-07,
"loss": 0.3473,
"step": 19540
},
{
"epoch": 0.9873488043231231,
"grad_norm": 3.302454861812917,
"learning_rate": 1.267804828770583e-07,
"loss": 0.3567,
"step": 19550
},
{
"epoch": 0.9878538420746951,
"grad_norm": 13.424373803274289,
"learning_rate": 1.2172946762299225e-07,
"loss": 0.3462,
"step": 19560
},
{
"epoch": 0.988358879826267,
"grad_norm": 2.5094569211373683,
"learning_rate": 1.1667845236892617e-07,
"loss": 0.3506,
"step": 19570
},
{
"epoch": 0.988863917577839,
"grad_norm": 8.676691313011856,
"learning_rate": 1.116274371148601e-07,
"loss": 0.3399,
"step": 19580
},
{
"epoch": 0.9893689553294108,
"grad_norm": 5.111065406394146,
"learning_rate": 1.0657642186079402e-07,
"loss": 0.3486,
"step": 19590
},
{
"epoch": 0.9898739930809828,
"grad_norm": 3.1643666611399976,
"learning_rate": 1.0152540660672796e-07,
"loss": 0.3463,
"step": 19600
},
{
"epoch": 0.9903790308325547,
"grad_norm": 3.7401113905250845,
"learning_rate": 9.647439135266189e-08,
"loss": 0.3462,
"step": 19610
},
{
"epoch": 0.9908840685841267,
"grad_norm": 3.1161613433451696,
"learning_rate": 9.142337609859582e-08,
"loss": 0.3595,
"step": 19620
},
{
"epoch": 0.9913891063356985,
"grad_norm": 7.461525216502298,
"learning_rate": 8.637236084452976e-08,
"loss": 0.3478,
"step": 19630
},
{
"epoch": 0.9918941440872705,
"grad_norm": 3.278360748256368,
"learning_rate": 8.13213455904637e-08,
"loss": 0.3468,
"step": 19640
},
{
"epoch": 0.9923991818388425,
"grad_norm": 2.3662646040461026,
"learning_rate": 7.627033033639761e-08,
"loss": 0.3334,
"step": 19650
},
{
"epoch": 0.9929042195904144,
"grad_norm": 4.061819314850138,
"learning_rate": 7.121931508233155e-08,
"loss": 0.339,
"step": 19660
},
{
"epoch": 0.9934092573419863,
"grad_norm": 3.0268165058919387,
"learning_rate": 6.616829982826549e-08,
"loss": 0.3358,
"step": 19670
},
{
"epoch": 0.9939142950935582,
"grad_norm": 4.547975258460046,
"learning_rate": 6.111728457419941e-08,
"loss": 0.3338,
"step": 19680
},
{
"epoch": 0.9944193328451302,
"grad_norm": 5.770374809427592,
"learning_rate": 5.606626932013335e-08,
"loss": 0.3448,
"step": 19690
},
{
"epoch": 0.9949243705967021,
"grad_norm": 2.864633009769063,
"learning_rate": 5.1015254066067285e-08,
"loss": 0.3305,
"step": 19700
},
{
"epoch": 0.995429408348274,
"grad_norm": 2.15308465344697,
"learning_rate": 4.5964238812001217e-08,
"loss": 0.3756,
"step": 19710
},
{
"epoch": 0.9959344460998459,
"grad_norm": 4.787969766327734,
"learning_rate": 4.091322355793514e-08,
"loss": 0.343,
"step": 19720
},
{
"epoch": 0.9964394838514179,
"grad_norm": 9.985119863463977,
"learning_rate": 3.586220830386908e-08,
"loss": 0.3436,
"step": 19730
},
{
"epoch": 0.9969445216029899,
"grad_norm": 2.6886443082611673,
"learning_rate": 3.081119304980301e-08,
"loss": 0.3517,
"step": 19740
},
{
"epoch": 0.9974495593545617,
"grad_norm": 7.198367620435292,
"learning_rate": 2.5760177795736944e-08,
"loss": 0.3608,
"step": 19750
},
{
"epoch": 0.9979545971061337,
"grad_norm": 2.4433957061834106,
"learning_rate": 2.0709162541670875e-08,
"loss": 0.3587,
"step": 19760
},
{
"epoch": 0.9984596348577056,
"grad_norm": 43.92952696515746,
"learning_rate": 1.565814728760481e-08,
"loss": 0.352,
"step": 19770
},
{
"epoch": 0.9989646726092776,
"grad_norm": 5.395974747202963,
"learning_rate": 1.0607132033538742e-08,
"loss": 0.3339,
"step": 19780
},
{
"epoch": 0.9994697103608495,
"grad_norm": 2.2549607655291393,
"learning_rate": 5.556116779472674e-09,
"loss": 0.3437,
"step": 19790
},
{
"epoch": 0.9999747481124214,
"grad_norm": 4.084387999304972,
"learning_rate": 5.051015254066067e-10,
"loss": 0.3312,
"step": 19800
}
],
"logging_steps": 10,
"max_steps": 19800,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.475990740795392e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}