Commit ·
7463b1b
1
Parent(s): 2cdcc16
Training in progress, step 600000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +311 -3
- pytorch_model.bin +1 -1
- runs/Feb25_19-25-50_t1v-n-15e54913-w-0/events.out.tfevents.1677353360.t1v-n-15e54913-w-0.2265434.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3480942553
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28ee67af53aba46278b2f67f403ff852d746365440eecdf8bce9fcd80cd0bb83
|
| 3 |
size 3480942553
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1740493675
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf3a2440240f6a8a3e9f4556d4121d36465b190a0240767d5fa77daee75c8504
|
| 3 |
size 1740493675
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
|
| 3 |
size 13611
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
|
| 3 |
size 13611
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
|
| 3 |
size 13611
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
|
| 3 |
size 13611
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
|
| 3 |
size 13611
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
|
| 3 |
size 13611
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
|
| 3 |
size 13611
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f6d3f5c9c843b9212bde5d89446d0571e28dbb8ebe90a8cd6f7cc64ee039aa7
|
| 3 |
size 13611
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f595742cd0d96240559aaf1ff72fa8686f62da9f07c5878ab2af30ab1e4f0a07
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -3394,11 +3394,319 @@
|
|
| 3394 |
"eval_samples_per_second": 27.652,
|
| 3395 |
"eval_steps_per_second": 0.437,
|
| 3396 |
"step": 550000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3397 |
}
|
| 3398 |
],
|
| 3399 |
"max_steps": 1000000,
|
| 3400 |
"num_train_epochs": 9223372036854775807,
|
| 3401 |
-
"total_flos": 3.
|
| 3402 |
"trial_name": null,
|
| 3403 |
"trial_params": null
|
| 3404 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.6,
|
| 5 |
+
"global_step": 600000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 3394 |
"eval_samples_per_second": 27.652,
|
| 3395 |
"eval_steps_per_second": 0.437,
|
| 3396 |
"step": 550000
|
| 3397 |
+
},
|
| 3398 |
+
{
|
| 3399 |
+
"epoch": 0.55,
|
| 3400 |
+
"learning_rate": 4.570627336558915e-05,
|
| 3401 |
+
"loss": 0.8088,
|
| 3402 |
+
"step": 551000
|
| 3403 |
+
},
|
| 3404 |
+
{
|
| 3405 |
+
"epoch": 0.55,
|
| 3406 |
+
"learning_rate": 4.554156096030149e-05,
|
| 3407 |
+
"loss": 0.8035,
|
| 3408 |
+
"step": 552000
|
| 3409 |
+
},
|
| 3410 |
+
{
|
| 3411 |
+
"epoch": 0.55,
|
| 3412 |
+
"learning_rate": 4.537689731178883e-05,
|
| 3413 |
+
"loss": 0.7899,
|
| 3414 |
+
"step": 553000
|
| 3415 |
+
},
|
| 3416 |
+
{
|
| 3417 |
+
"epoch": 0.55,
|
| 3418 |
+
"learning_rate": 4.5212284220786494e-05,
|
| 3419 |
+
"loss": 0.8265,
|
| 3420 |
+
"step": 554000
|
| 3421 |
+
},
|
| 3422 |
+
{
|
| 3423 |
+
"epoch": 0.56,
|
| 3424 |
+
"learning_rate": 4.504772348747687e-05,
|
| 3425 |
+
"loss": 0.7993,
|
| 3426 |
+
"step": 555000
|
| 3427 |
+
},
|
| 3428 |
+
{
|
| 3429 |
+
"epoch": 0.56,
|
| 3430 |
+
"learning_rate": 4.488321691146975e-05,
|
| 3431 |
+
"loss": 0.7958,
|
| 3432 |
+
"step": 556000
|
| 3433 |
+
},
|
| 3434 |
+
{
|
| 3435 |
+
"epoch": 0.56,
|
| 3436 |
+
"learning_rate": 4.471876629178273e-05,
|
| 3437 |
+
"loss": 0.8024,
|
| 3438 |
+
"step": 557000
|
| 3439 |
+
},
|
| 3440 |
+
{
|
| 3441 |
+
"epoch": 0.56,
|
| 3442 |
+
"learning_rate": 4.4554373426821374e-05,
|
| 3443 |
+
"loss": 0.8121,
|
| 3444 |
+
"step": 558000
|
| 3445 |
+
},
|
| 3446 |
+
{
|
| 3447 |
+
"epoch": 0.56,
|
| 3448 |
+
"learning_rate": 4.439004011435979e-05,
|
| 3449 |
+
"loss": 0.7992,
|
| 3450 |
+
"step": 559000
|
| 3451 |
+
},
|
| 3452 |
+
{
|
| 3453 |
+
"epoch": 0.56,
|
| 3454 |
+
"learning_rate": 4.4225768151520694e-05,
|
| 3455 |
+
"loss": 0.7822,
|
| 3456 |
+
"step": 560000
|
| 3457 |
+
},
|
| 3458 |
+
{
|
| 3459 |
+
"epoch": 0.56,
|
| 3460 |
+
"learning_rate": 4.406155933475599e-05,
|
| 3461 |
+
"loss": 0.7942,
|
| 3462 |
+
"step": 561000
|
| 3463 |
+
},
|
| 3464 |
+
{
|
| 3465 |
+
"epoch": 0.56,
|
| 3466 |
+
"learning_rate": 4.3897415459827e-05,
|
| 3467 |
+
"loss": 0.7942,
|
| 3468 |
+
"step": 562000
|
| 3469 |
+
},
|
| 3470 |
+
{
|
| 3471 |
+
"epoch": 0.56,
|
| 3472 |
+
"learning_rate": 4.373333832178478e-05,
|
| 3473 |
+
"loss": 0.7832,
|
| 3474 |
+
"step": 563000
|
| 3475 |
+
},
|
| 3476 |
+
{
|
| 3477 |
+
"epoch": 0.56,
|
| 3478 |
+
"learning_rate": 4.3569329714950704e-05,
|
| 3479 |
+
"loss": 0.7995,
|
| 3480 |
+
"step": 564000
|
| 3481 |
+
},
|
| 3482 |
+
{
|
| 3483 |
+
"epoch": 0.56,
|
| 3484 |
+
"learning_rate": 4.3405391432896555e-05,
|
| 3485 |
+
"loss": 0.8227,
|
| 3486 |
+
"step": 565000
|
| 3487 |
+
},
|
| 3488 |
+
{
|
| 3489 |
+
"epoch": 0.57,
|
| 3490 |
+
"learning_rate": 4.324152526842517e-05,
|
| 3491 |
+
"loss": 0.8254,
|
| 3492 |
+
"step": 566000
|
| 3493 |
+
},
|
| 3494 |
+
{
|
| 3495 |
+
"epoch": 0.57,
|
| 3496 |
+
"learning_rate": 4.307773301355062e-05,
|
| 3497 |
+
"loss": 0.8075,
|
| 3498 |
+
"step": 567000
|
| 3499 |
+
},
|
| 3500 |
+
{
|
| 3501 |
+
"epoch": 0.57,
|
| 3502 |
+
"learning_rate": 4.291401645947879e-05,
|
| 3503 |
+
"loss": 0.8268,
|
| 3504 |
+
"step": 568000
|
| 3505 |
+
},
|
| 3506 |
+
{
|
| 3507 |
+
"epoch": 0.57,
|
| 3508 |
+
"learning_rate": 4.275037739658771e-05,
|
| 3509 |
+
"loss": 0.8367,
|
| 3510 |
+
"step": 569000
|
| 3511 |
+
},
|
| 3512 |
+
{
|
| 3513 |
+
"epoch": 0.57,
|
| 3514 |
+
"learning_rate": 4.2586817614407895e-05,
|
| 3515 |
+
"loss": 0.8323,
|
| 3516 |
+
"step": 570000
|
| 3517 |
+
},
|
| 3518 |
+
{
|
| 3519 |
+
"epoch": 0.57,
|
| 3520 |
+
"learning_rate": 4.2423338901602985e-05,
|
| 3521 |
+
"loss": 0.8278,
|
| 3522 |
+
"step": 571000
|
| 3523 |
+
},
|
| 3524 |
+
{
|
| 3525 |
+
"epoch": 0.57,
|
| 3526 |
+
"learning_rate": 4.2259943045949934e-05,
|
| 3527 |
+
"loss": 0.8258,
|
| 3528 |
+
"step": 572000
|
| 3529 |
+
},
|
| 3530 |
+
{
|
| 3531 |
+
"epoch": 0.57,
|
| 3532 |
+
"learning_rate": 4.209663183431969e-05,
|
| 3533 |
+
"loss": 0.7842,
|
| 3534 |
+
"step": 573000
|
| 3535 |
+
},
|
| 3536 |
+
{
|
| 3537 |
+
"epoch": 0.57,
|
| 3538 |
+
"learning_rate": 4.1933407052657456e-05,
|
| 3539 |
+
"loss": 0.8079,
|
| 3540 |
+
"step": 574000
|
| 3541 |
+
},
|
| 3542 |
+
{
|
| 3543 |
+
"epoch": 0.57,
|
| 3544 |
+
"learning_rate": 4.17702704859633e-05,
|
| 3545 |
+
"loss": 0.782,
|
| 3546 |
+
"step": 575000
|
| 3547 |
+
},
|
| 3548 |
+
{
|
| 3549 |
+
"epoch": 0.58,
|
| 3550 |
+
"learning_rate": 4.160722391827262e-05,
|
| 3551 |
+
"loss": 0.7742,
|
| 3552 |
+
"step": 576000
|
| 3553 |
+
},
|
| 3554 |
+
{
|
| 3555 |
+
"epoch": 0.58,
|
| 3556 |
+
"learning_rate": 4.14442691326365e-05,
|
| 3557 |
+
"loss": 0.7748,
|
| 3558 |
+
"step": 577000
|
| 3559 |
+
},
|
| 3560 |
+
{
|
| 3561 |
+
"epoch": 0.58,
|
| 3562 |
+
"learning_rate": 4.1281407911102425e-05,
|
| 3563 |
+
"loss": 0.7921,
|
| 3564 |
+
"step": 578000
|
| 3565 |
+
},
|
| 3566 |
+
{
|
| 3567 |
+
"epoch": 0.58,
|
| 3568 |
+
"learning_rate": 4.111864203469457e-05,
|
| 3569 |
+
"loss": 0.7633,
|
| 3570 |
+
"step": 579000
|
| 3571 |
+
},
|
| 3572 |
+
{
|
| 3573 |
+
"epoch": 0.58,
|
| 3574 |
+
"learning_rate": 4.095597328339452e-05,
|
| 3575 |
+
"loss": 0.7463,
|
| 3576 |
+
"step": 580000
|
| 3577 |
+
},
|
| 3578 |
+
{
|
| 3579 |
+
"epoch": 0.58,
|
| 3580 |
+
"learning_rate": 4.079340343612165e-05,
|
| 3581 |
+
"loss": 0.7693,
|
| 3582 |
+
"step": 581000
|
| 3583 |
+
},
|
| 3584 |
+
{
|
| 3585 |
+
"epoch": 0.58,
|
| 3586 |
+
"learning_rate": 4.063093427071376e-05,
|
| 3587 |
+
"loss": 0.7814,
|
| 3588 |
+
"step": 582000
|
| 3589 |
+
},
|
| 3590 |
+
{
|
| 3591 |
+
"epoch": 0.58,
|
| 3592 |
+
"learning_rate": 4.046856756390767e-05,
|
| 3593 |
+
"loss": 0.7844,
|
| 3594 |
+
"step": 583000
|
| 3595 |
+
},
|
| 3596 |
+
{
|
| 3597 |
+
"epoch": 0.58,
|
| 3598 |
+
"learning_rate": 4.0306305091319595e-05,
|
| 3599 |
+
"loss": 0.7944,
|
| 3600 |
+
"step": 584000
|
| 3601 |
+
},
|
| 3602 |
+
{
|
| 3603 |
+
"epoch": 0.58,
|
| 3604 |
+
"learning_rate": 4.0144148627425993e-05,
|
| 3605 |
+
"loss": 0.7793,
|
| 3606 |
+
"step": 585000
|
| 3607 |
+
},
|
| 3608 |
+
{
|
| 3609 |
+
"epoch": 0.59,
|
| 3610 |
+
"learning_rate": 3.9982099945543945e-05,
|
| 3611 |
+
"loss": 0.7749,
|
| 3612 |
+
"step": 586000
|
| 3613 |
+
},
|
| 3614 |
+
{
|
| 3615 |
+
"epoch": 0.59,
|
| 3616 |
+
"learning_rate": 3.982016081781189e-05,
|
| 3617 |
+
"loss": 0.7831,
|
| 3618 |
+
"step": 587000
|
| 3619 |
+
},
|
| 3620 |
+
{
|
| 3621 |
+
"epoch": 0.59,
|
| 3622 |
+
"learning_rate": 3.965833301517017e-05,
|
| 3623 |
+
"loss": 0.7851,
|
| 3624 |
+
"step": 588000
|
| 3625 |
+
},
|
| 3626 |
+
{
|
| 3627 |
+
"epoch": 0.59,
|
| 3628 |
+
"learning_rate": 3.949661830734172e-05,
|
| 3629 |
+
"loss": 0.7685,
|
| 3630 |
+
"step": 589000
|
| 3631 |
+
},
|
| 3632 |
+
{
|
| 3633 |
+
"epoch": 0.59,
|
| 3634 |
+
"learning_rate": 3.933501846281267e-05,
|
| 3635 |
+
"loss": 0.7739,
|
| 3636 |
+
"step": 590000
|
| 3637 |
+
},
|
| 3638 |
+
{
|
| 3639 |
+
"epoch": 0.59,
|
| 3640 |
+
"learning_rate": 3.917353524881302e-05,
|
| 3641 |
+
"loss": 0.7596,
|
| 3642 |
+
"step": 591000
|
| 3643 |
+
},
|
| 3644 |
+
{
|
| 3645 |
+
"epoch": 0.59,
|
| 3646 |
+
"learning_rate": 3.901217043129735e-05,
|
| 3647 |
+
"loss": 0.7514,
|
| 3648 |
+
"step": 592000
|
| 3649 |
+
},
|
| 3650 |
+
{
|
| 3651 |
+
"epoch": 0.59,
|
| 3652 |
+
"learning_rate": 3.8850925774925425e-05,
|
| 3653 |
+
"loss": 0.7885,
|
| 3654 |
+
"step": 593000
|
| 3655 |
+
},
|
| 3656 |
+
{
|
| 3657 |
+
"epoch": 0.59,
|
| 3658 |
+
"learning_rate": 3.8689803043043e-05,
|
| 3659 |
+
"loss": 0.7904,
|
| 3660 |
+
"step": 594000
|
| 3661 |
+
},
|
| 3662 |
+
{
|
| 3663 |
+
"epoch": 0.59,
|
| 3664 |
+
"learning_rate": 3.852880399766243e-05,
|
| 3665 |
+
"loss": 0.7525,
|
| 3666 |
+
"step": 595000
|
| 3667 |
+
},
|
| 3668 |
+
{
|
| 3669 |
+
"epoch": 0.6,
|
| 3670 |
+
"learning_rate": 3.836793039944349e-05,
|
| 3671 |
+
"loss": 0.7758,
|
| 3672 |
+
"step": 596000
|
| 3673 |
+
},
|
| 3674 |
+
{
|
| 3675 |
+
"epoch": 0.6,
|
| 3676 |
+
"learning_rate": 3.820718400767409e-05,
|
| 3677 |
+
"loss": 0.7627,
|
| 3678 |
+
"step": 597000
|
| 3679 |
+
},
|
| 3680 |
+
{
|
| 3681 |
+
"epoch": 0.6,
|
| 3682 |
+
"learning_rate": 3.8046566580251e-05,
|
| 3683 |
+
"loss": 0.7454,
|
| 3684 |
+
"step": 598000
|
| 3685 |
+
},
|
| 3686 |
+
{
|
| 3687 |
+
"epoch": 0.6,
|
| 3688 |
+
"learning_rate": 3.788607987366069e-05,
|
| 3689 |
+
"loss": 0.7745,
|
| 3690 |
+
"step": 599000
|
| 3691 |
+
},
|
| 3692 |
+
{
|
| 3693 |
+
"epoch": 0.6,
|
| 3694 |
+
"learning_rate": 3.772572564296005e-05,
|
| 3695 |
+
"loss": 0.768,
|
| 3696 |
+
"step": 600000
|
| 3697 |
+
},
|
| 3698 |
+
{
|
| 3699 |
+
"epoch": 0.6,
|
| 3700 |
+
"eval_loss": 0.41385677456855774,
|
| 3701 |
+
"eval_runtime": 177.5998,
|
| 3702 |
+
"eval_samples_per_second": 28.153,
|
| 3703 |
+
"eval_steps_per_second": 0.445,
|
| 3704 |
+
"step": 600000
|
| 3705 |
}
|
| 3706 |
],
|
| 3707 |
"max_steps": 1000000,
|
| 3708 |
"num_train_epochs": 9223372036854775807,
|
| 3709 |
+
"total_flos": 3.58012636102656e+19,
|
| 3710 |
"trial_name": null,
|
| 3711 |
"trial_params": null
|
| 3712 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1740493675
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf3a2440240f6a8a3e9f4556d4121d36465b190a0240767d5fa77daee75c8504
|
| 3 |
size 1740493675
|
runs/Feb25_19-25-50_t1v-n-15e54913-w-0/events.out.tfevents.1677353360.t1v-n-15e54913-w-0.2265434.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83e5e2e456e56194105e17b452fd8c58cd77d132fbe96a705b9e5b711228eb6e
|
| 3 |
+
size 45193
|