| { | |
| "best_metric": 0.7937062937062938, | |
| "best_model_checkpoint": "wav2vec2-5Class-train-test-finetune/checkpoint-4122", | |
| "epoch": 224.0, | |
| "eval_steps": 500, | |
| "global_step": 5432, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.34265734265734266, | |
| "eval_loss": 1.5984586477279663, | |
| "eval_runtime": 5.3437, | |
| "eval_samples_per_second": 53.521, | |
| "eval_steps_per_second": 3.368, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_accuracy": 0.33916083916083917, | |
| "eval_loss": 1.5969289541244507, | |
| "eval_runtime": 3.8653, | |
| "eval_samples_per_second": 73.992, | |
| "eval_steps_per_second": 4.657, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 1.0544973611831665, | |
| "learning_rate": 2.4999999999999998e-06, | |
| "loss": 1.5969, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_accuracy": 0.32867132867132864, | |
| "eval_loss": 1.5943816900253296, | |
| "eval_runtime": 6.1748, | |
| "eval_samples_per_second": 46.317, | |
| "eval_steps_per_second": 2.915, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.3146853146853147, | |
| "eval_loss": 1.5906767845153809, | |
| "eval_runtime": 5.1678, | |
| "eval_samples_per_second": 55.343, | |
| "eval_steps_per_second": 3.483, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.8443157076835632, | |
| "learning_rate": 4.9999999999999996e-06, | |
| "loss": 1.5896, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_accuracy": 0.2972027972027972, | |
| "eval_loss": 1.5860023498535156, | |
| "eval_runtime": 4.9416, | |
| "eval_samples_per_second": 57.876, | |
| "eval_steps_per_second": 3.643, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "eval_accuracy": 0.2692307692307692, | |
| "eval_loss": 1.5806005001068115, | |
| "eval_runtime": 4.1837, | |
| "eval_samples_per_second": 68.36, | |
| "eval_steps_per_second": 4.302, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "grad_norm": 1.0938074588775635, | |
| "learning_rate": 7.5e-06, | |
| "loss": 1.5743, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "eval_accuracy": 0.25874125874125875, | |
| "eval_loss": 1.5742768049240112, | |
| "eval_runtime": 7.1914, | |
| "eval_samples_per_second": 39.77, | |
| "eval_steps_per_second": 2.503, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.23426573426573427, | |
| "eval_loss": 1.5664165019989014, | |
| "eval_runtime": 5.6489, | |
| "eval_samples_per_second": 50.629, | |
| "eval_steps_per_second": 3.186, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "grad_norm": 0.9692079424858093, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 1.5508, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.557572841644287, | |
| "eval_runtime": 5.5182, | |
| "eval_samples_per_second": 51.828, | |
| "eval_steps_per_second": 3.262, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5482373237609863, | |
| "eval_runtime": 5.3205, | |
| "eval_samples_per_second": 53.754, | |
| "eval_steps_per_second": 3.383, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "grad_norm": 1.02046799659729, | |
| "learning_rate": 1.25e-05, | |
| "loss": 1.5157, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.539355993270874, | |
| "eval_runtime": 6.3116, | |
| "eval_samples_per_second": 45.313, | |
| "eval_steps_per_second": 2.852, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5350520610809326, | |
| "eval_runtime": 4.3422, | |
| "eval_samples_per_second": 65.865, | |
| "eval_steps_per_second": 4.145, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 12.37, | |
| "grad_norm": 1.6058833599090576, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.4534, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5525730848312378, | |
| "eval_runtime": 5.245, | |
| "eval_samples_per_second": 54.528, | |
| "eval_steps_per_second": 3.432, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.599926471710205, | |
| "eval_runtime": 6.0088, | |
| "eval_samples_per_second": 47.597, | |
| "eval_steps_per_second": 2.996, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 14.43, | |
| "grad_norm": 0.8243080377578735, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 1.3638, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 14.97, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5896875858306885, | |
| "eval_runtime": 4.8752, | |
| "eval_samples_per_second": 58.664, | |
| "eval_steps_per_second": 3.692, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.26573426573426573, | |
| "eval_loss": 1.560091495513916, | |
| "eval_runtime": 5.5082, | |
| "eval_samples_per_second": 51.922, | |
| "eval_steps_per_second": 3.268, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 16.49, | |
| "grad_norm": 0.7977257370948792, | |
| "learning_rate": 1.9999999999999998e-05, | |
| "loss": 1.2951, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "eval_accuracy": 0.2937062937062937, | |
| "eval_loss": 1.5349317789077759, | |
| "eval_runtime": 4.7526, | |
| "eval_samples_per_second": 60.178, | |
| "eval_steps_per_second": 3.787, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 17.98, | |
| "eval_accuracy": 0.34265734265734266, | |
| "eval_loss": 1.5053907632827759, | |
| "eval_runtime": 4.8638, | |
| "eval_samples_per_second": 58.801, | |
| "eval_steps_per_second": 3.701, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 18.56, | |
| "grad_norm": 0.7064552903175354, | |
| "learning_rate": 2.25e-05, | |
| "loss": 1.2369, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 18.97, | |
| "eval_accuracy": 0.3741258741258741, | |
| "eval_loss": 1.4689087867736816, | |
| "eval_runtime": 4.3712, | |
| "eval_samples_per_second": 65.428, | |
| "eval_steps_per_second": 4.118, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.4370629370629371, | |
| "eval_loss": 1.404613971710205, | |
| "eval_runtime": 4.7203, | |
| "eval_samples_per_second": 60.59, | |
| "eval_steps_per_second": 3.813, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 20.62, | |
| "grad_norm": 0.598238468170166, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.1566, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "eval_accuracy": 0.4405594405594406, | |
| "eval_loss": 1.3691043853759766, | |
| "eval_runtime": 6.6443, | |
| "eval_samples_per_second": 43.044, | |
| "eval_steps_per_second": 2.709, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 21.98, | |
| "eval_accuracy": 0.4825174825174825, | |
| "eval_loss": 1.3120107650756836, | |
| "eval_runtime": 4.9585, | |
| "eval_samples_per_second": 57.679, | |
| "eval_steps_per_second": 3.63, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 22.68, | |
| "grad_norm": 0.682925820350647, | |
| "learning_rate": 2.75e-05, | |
| "loss": 1.0676, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 22.97, | |
| "eval_accuracy": 0.486013986013986, | |
| "eval_loss": 1.2839338779449463, | |
| "eval_runtime": 4.0382, | |
| "eval_samples_per_second": 70.824, | |
| "eval_steps_per_second": 4.457, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.5104895104895105, | |
| "eval_loss": 1.2549891471862793, | |
| "eval_runtime": 5.1896, | |
| "eval_samples_per_second": 55.11, | |
| "eval_steps_per_second": 3.468, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 24.74, | |
| "grad_norm": 1.1368101835250854, | |
| "learning_rate": 3e-05, | |
| "loss": 0.992, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "eval_accuracy": 0.5209790209790209, | |
| "eval_loss": 1.2106566429138184, | |
| "eval_runtime": 6.8941, | |
| "eval_samples_per_second": 41.485, | |
| "eval_steps_per_second": 2.611, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 25.98, | |
| "eval_accuracy": 0.5384615384615384, | |
| "eval_loss": 1.1711338758468628, | |
| "eval_runtime": 4.9707, | |
| "eval_samples_per_second": 57.537, | |
| "eval_steps_per_second": 3.621, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 26.8, | |
| "grad_norm": 0.9649831056594849, | |
| "learning_rate": 2.9722222222222223e-05, | |
| "loss": 0.9272, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 26.97, | |
| "eval_accuracy": 0.5594405594405595, | |
| "eval_loss": 1.1318116188049316, | |
| "eval_runtime": 5.5564, | |
| "eval_samples_per_second": 51.472, | |
| "eval_steps_per_second": 3.24, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.6153846153846154, | |
| "eval_loss": 1.0594333410263062, | |
| "eval_runtime": 4.6773, | |
| "eval_samples_per_second": 61.147, | |
| "eval_steps_per_second": 3.848, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 28.87, | |
| "grad_norm": 0.883937418460846, | |
| "learning_rate": 2.9444444444444445e-05, | |
| "loss": 0.8478, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "eval_accuracy": 0.6013986013986014, | |
| "eval_loss": 1.054669737815857, | |
| "eval_runtime": 4.9219, | |
| "eval_samples_per_second": 58.108, | |
| "eval_steps_per_second": 3.657, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 29.98, | |
| "eval_accuracy": 0.6363636363636364, | |
| "eval_loss": 0.9822685122489929, | |
| "eval_runtime": 6.3133, | |
| "eval_samples_per_second": 45.302, | |
| "eval_steps_per_second": 2.851, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 30.93, | |
| "grad_norm": 1.3742878437042236, | |
| "learning_rate": 2.9166666666666666e-05, | |
| "loss": 0.7627, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 30.97, | |
| "eval_accuracy": 0.6398601398601399, | |
| "eval_loss": 1.00295090675354, | |
| "eval_runtime": 6.154, | |
| "eval_samples_per_second": 46.473, | |
| "eval_steps_per_second": 2.925, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.6608391608391608, | |
| "eval_loss": 0.930969774723053, | |
| "eval_runtime": 5.6747, | |
| "eval_samples_per_second": 50.399, | |
| "eval_steps_per_second": 3.172, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 32.99, | |
| "grad_norm": 1.329268217086792, | |
| "learning_rate": 2.8888888888888888e-05, | |
| "loss": 0.7266, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 32.99, | |
| "eval_accuracy": 0.6678321678321678, | |
| "eval_loss": 0.9228739738464355, | |
| "eval_runtime": 5.382, | |
| "eval_samples_per_second": 53.14, | |
| "eval_steps_per_second": 3.344, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 33.98, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8684509992599487, | |
| "eval_runtime": 4.8497, | |
| "eval_samples_per_second": 58.973, | |
| "eval_steps_per_second": 3.712, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 34.97, | |
| "eval_accuracy": 0.6643356643356644, | |
| "eval_loss": 0.8954732418060303, | |
| "eval_runtime": 5.2083, | |
| "eval_samples_per_second": 54.912, | |
| "eval_steps_per_second": 3.456, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 35.05, | |
| "grad_norm": 1.3892701864242554, | |
| "learning_rate": 2.8611111111111113e-05, | |
| "loss": 0.6906, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.6713286713286714, | |
| "eval_loss": 0.9125654101371765, | |
| "eval_runtime": 5.3068, | |
| "eval_samples_per_second": 53.894, | |
| "eval_steps_per_second": 3.392, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 36.99, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8543534874916077, | |
| "eval_runtime": 4.3351, | |
| "eval_samples_per_second": 65.974, | |
| "eval_steps_per_second": 4.152, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 37.11, | |
| "grad_norm": 0.836291491985321, | |
| "learning_rate": 2.8333333333333332e-05, | |
| "loss": 0.6721, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 37.98, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8480322957038879, | |
| "eval_runtime": 5.1861, | |
| "eval_samples_per_second": 55.147, | |
| "eval_steps_per_second": 3.471, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 38.97, | |
| "eval_accuracy": 0.7097902097902098, | |
| "eval_loss": 0.8354606628417969, | |
| "eval_runtime": 6.3247, | |
| "eval_samples_per_second": 45.22, | |
| "eval_steps_per_second": 2.846, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 39.18, | |
| "grad_norm": 1.6499431133270264, | |
| "learning_rate": 2.8055555555555557e-05, | |
| "loss": 0.6442, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8412452340126038, | |
| "eval_runtime": 5.2281, | |
| "eval_samples_per_second": 54.704, | |
| "eval_steps_per_second": 3.443, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 40.99, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8356389999389648, | |
| "eval_runtime": 4.8326, | |
| "eval_samples_per_second": 59.181, | |
| "eval_steps_per_second": 3.725, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 41.24, | |
| "grad_norm": 1.1766818761825562, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.6465, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 41.98, | |
| "eval_accuracy": 0.7062937062937062, | |
| "eval_loss": 0.8180016875267029, | |
| "eval_runtime": 5.7926, | |
| "eval_samples_per_second": 49.374, | |
| "eval_steps_per_second": 3.107, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 42.97, | |
| "eval_accuracy": 0.7027972027972028, | |
| "eval_loss": 0.8103991150856018, | |
| "eval_runtime": 5.5185, | |
| "eval_samples_per_second": 51.825, | |
| "eval_steps_per_second": 3.262, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 43.3, | |
| "grad_norm": 0.9722403287887573, | |
| "learning_rate": 2.75e-05, | |
| "loss": 0.6086, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8162235617637634, | |
| "eval_runtime": 4.9174, | |
| "eval_samples_per_second": 58.161, | |
| "eval_steps_per_second": 3.66, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 44.99, | |
| "eval_accuracy": 0.7027972027972028, | |
| "eval_loss": 0.7957289218902588, | |
| "eval_runtime": 4.6891, | |
| "eval_samples_per_second": 60.992, | |
| "eval_steps_per_second": 3.839, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 45.36, | |
| "grad_norm": 1.269113302230835, | |
| "learning_rate": 2.7222222222222223e-05, | |
| "loss": 0.5863, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 45.98, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8143528699874878, | |
| "eval_runtime": 6.6805, | |
| "eval_samples_per_second": 42.811, | |
| "eval_steps_per_second": 2.694, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 46.97, | |
| "eval_accuracy": 0.7027972027972028, | |
| "eval_loss": 0.78568434715271, | |
| "eval_runtime": 4.7422, | |
| "eval_samples_per_second": 60.31, | |
| "eval_steps_per_second": 3.796, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 47.42, | |
| "grad_norm": 0.9775255918502808, | |
| "learning_rate": 2.6944444444444445e-05, | |
| "loss": 0.5877, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.7132867132867133, | |
| "eval_loss": 0.7764595150947571, | |
| "eval_runtime": 5.76, | |
| "eval_samples_per_second": 49.653, | |
| "eval_steps_per_second": 3.125, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 48.99, | |
| "eval_accuracy": 0.6993006993006993, | |
| "eval_loss": 0.7881478071212769, | |
| "eval_runtime": 5.4965, | |
| "eval_samples_per_second": 52.033, | |
| "eval_steps_per_second": 3.275, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 49.48, | |
| "grad_norm": 1.540124773979187, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.5629, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 49.98, | |
| "eval_accuracy": 0.7097902097902098, | |
| "eval_loss": 0.7658265829086304, | |
| "eval_runtime": 4.731, | |
| "eval_samples_per_second": 60.452, | |
| "eval_steps_per_second": 3.805, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 50.97, | |
| "eval_accuracy": 0.7132867132867133, | |
| "eval_loss": 0.7723098397254944, | |
| "eval_runtime": 5.8352, | |
| "eval_samples_per_second": 49.013, | |
| "eval_steps_per_second": 3.085, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 51.55, | |
| "grad_norm": 1.2498500347137451, | |
| "learning_rate": 2.6388888888888892e-05, | |
| "loss": 0.5476, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.7097902097902098, | |
| "eval_loss": 0.7603952884674072, | |
| "eval_runtime": 4.448, | |
| "eval_samples_per_second": 64.299, | |
| "eval_steps_per_second": 4.047, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 52.99, | |
| "eval_accuracy": 0.7202797202797203, | |
| "eval_loss": 0.7554137706756592, | |
| "eval_runtime": 6.4218, | |
| "eval_samples_per_second": 44.536, | |
| "eval_steps_per_second": 2.803, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 53.61, | |
| "grad_norm": 0.9919388890266418, | |
| "learning_rate": 2.6116666666666667e-05, | |
| "loss": 0.5357, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 53.98, | |
| "eval_accuracy": 0.7307692307692307, | |
| "eval_loss": 0.7458928227424622, | |
| "eval_runtime": 5.3791, | |
| "eval_samples_per_second": 53.168, | |
| "eval_steps_per_second": 3.346, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 54.97, | |
| "eval_accuracy": 0.7132867132867133, | |
| "eval_loss": 0.7632877230644226, | |
| "eval_runtime": 5.278, | |
| "eval_samples_per_second": 54.187, | |
| "eval_steps_per_second": 3.41, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 55.67, | |
| "grad_norm": 1.688183307647705, | |
| "learning_rate": 2.5838888888888892e-05, | |
| "loss": 0.5335, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.7167832167832168, | |
| "eval_loss": 0.768308162689209, | |
| "eval_runtime": 5.7022, | |
| "eval_samples_per_second": 50.156, | |
| "eval_steps_per_second": 3.157, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 56.99, | |
| "eval_accuracy": 0.7307692307692307, | |
| "eval_loss": 0.7380541563034058, | |
| "eval_runtime": 4.522, | |
| "eval_samples_per_second": 63.247, | |
| "eval_steps_per_second": 3.981, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 57.73, | |
| "grad_norm": 1.4895784854888916, | |
| "learning_rate": 2.556111111111111e-05, | |
| "loss": 0.5107, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 57.98, | |
| "eval_accuracy": 0.7377622377622378, | |
| "eval_loss": 0.7308338284492493, | |
| "eval_runtime": 4.4787, | |
| "eval_samples_per_second": 63.857, | |
| "eval_steps_per_second": 4.019, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 58.97, | |
| "eval_accuracy": 0.7237762237762237, | |
| "eval_loss": 0.7441032528877258, | |
| "eval_runtime": 5.8744, | |
| "eval_samples_per_second": 48.685, | |
| "eval_steps_per_second": 3.064, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 59.79, | |
| "grad_norm": 1.4925004243850708, | |
| "learning_rate": 2.5283333333333332e-05, | |
| "loss": 0.5105, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.7307692307692307, | |
| "eval_loss": 0.7481815218925476, | |
| "eval_runtime": 7.272, | |
| "eval_samples_per_second": 39.329, | |
| "eval_steps_per_second": 2.475, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 60.99, | |
| "eval_accuracy": 0.7342657342657343, | |
| "eval_loss": 0.733482301235199, | |
| "eval_runtime": 4.6235, | |
| "eval_samples_per_second": 61.858, | |
| "eval_steps_per_second": 3.893, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 61.86, | |
| "grad_norm": 1.3200663328170776, | |
| "learning_rate": 2.5005555555555558e-05, | |
| "loss": 0.4914, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 61.98, | |
| "eval_accuracy": 0.7447552447552448, | |
| "eval_loss": 0.7241908311843872, | |
| "eval_runtime": 4.8198, | |
| "eval_samples_per_second": 59.338, | |
| "eval_steps_per_second": 3.735, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 62.97, | |
| "eval_accuracy": 0.7377622377622378, | |
| "eval_loss": 0.7321043014526367, | |
| "eval_runtime": 5.8929, | |
| "eval_samples_per_second": 48.533, | |
| "eval_steps_per_second": 3.055, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 63.92, | |
| "grad_norm": 1.1309747695922852, | |
| "learning_rate": 2.472777777777778e-05, | |
| "loss": 0.4839, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.7342657342657343, | |
| "eval_loss": 0.7220665216445923, | |
| "eval_runtime": 5.8635, | |
| "eval_samples_per_second": 48.776, | |
| "eval_steps_per_second": 3.07, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 64.99, | |
| "eval_accuracy": 0.7412587412587412, | |
| "eval_loss": 0.7136482000350952, | |
| "eval_runtime": 4.3102, | |
| "eval_samples_per_second": 66.354, | |
| "eval_steps_per_second": 4.176, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 65.98, | |
| "grad_norm": 1.1314157247543335, | |
| "learning_rate": 2.4449999999999998e-05, | |
| "loss": 0.4751, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 65.98, | |
| "eval_accuracy": 0.7412587412587412, | |
| "eval_loss": 0.7198111414909363, | |
| "eval_runtime": 4.7841, | |
| "eval_samples_per_second": 59.781, | |
| "eval_steps_per_second": 3.762, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 66.97, | |
| "eval_accuracy": 0.7377622377622378, | |
| "eval_loss": 0.7145721912384033, | |
| "eval_runtime": 6.347, | |
| "eval_samples_per_second": 45.061, | |
| "eval_steps_per_second": 2.836, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.7447552447552448, | |
| "eval_loss": 0.6970916390419006, | |
| "eval_runtime": 5.6871, | |
| "eval_samples_per_second": 50.289, | |
| "eval_steps_per_second": 3.165, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 68.04, | |
| "grad_norm": 2.397585153579712, | |
| "learning_rate": 2.4172222222222223e-05, | |
| "loss": 0.4639, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 68.99, | |
| "eval_accuracy": 0.7272727272727273, | |
| "eval_loss": 0.7201464176177979, | |
| "eval_runtime": 4.4157, | |
| "eval_samples_per_second": 64.769, | |
| "eval_steps_per_second": 4.076, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 69.98, | |
| "eval_accuracy": 0.7307692307692307, | |
| "eval_loss": 0.7244682312011719, | |
| "eval_runtime": 5.4392, | |
| "eval_samples_per_second": 52.581, | |
| "eval_steps_per_second": 3.309, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 70.1, | |
| "grad_norm": 2.062610387802124, | |
| "learning_rate": 2.3894444444444445e-05, | |
| "loss": 0.4581, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 70.97, | |
| "eval_accuracy": 0.7447552447552448, | |
| "eval_loss": 0.7077587842941284, | |
| "eval_runtime": 5.1002, | |
| "eval_samples_per_second": 56.076, | |
| "eval_steps_per_second": 3.529, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.7517482517482518, | |
| "eval_loss": 0.6957913637161255, | |
| "eval_runtime": 4.4485, | |
| "eval_samples_per_second": 64.291, | |
| "eval_steps_per_second": 4.046, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 72.16, | |
| "grad_norm": 2.7808456420898438, | |
| "learning_rate": 2.3616666666666667e-05, | |
| "loss": 0.4643, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 72.99, | |
| "eval_accuracy": 0.7447552447552448, | |
| "eval_loss": 0.7036928534507751, | |
| "eval_runtime": 5.9101, | |
| "eval_samples_per_second": 48.392, | |
| "eval_steps_per_second": 3.046, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 73.98, | |
| "eval_accuracy": 0.7482517482517482, | |
| "eval_loss": 0.71629399061203, | |
| "eval_runtime": 6.0211, | |
| "eval_samples_per_second": 47.5, | |
| "eval_steps_per_second": 2.989, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 74.23, | |
| "grad_norm": 1.78495192527771, | |
| "learning_rate": 2.333888888888889e-05, | |
| "loss": 0.442, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 74.97, | |
| "eval_accuracy": 0.7377622377622378, | |
| "eval_loss": 0.6997957229614258, | |
| "eval_runtime": 4.4212, | |
| "eval_samples_per_second": 64.688, | |
| "eval_steps_per_second": 4.071, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.7447552447552448, | |
| "eval_loss": 0.6946483850479126, | |
| "eval_runtime": 4.0507, | |
| "eval_samples_per_second": 70.605, | |
| "eval_steps_per_second": 4.444, | |
| "step": 1843 | |
| }, | |
| { | |
| "epoch": 76.29, | |
| "grad_norm": 1.7383118867874146, | |
| "learning_rate": 2.306111111111111e-05, | |
| "loss": 0.4305, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 76.99, | |
| "eval_accuracy": 0.7552447552447552, | |
| "eval_loss": 0.6857091784477234, | |
| "eval_runtime": 4.1718, | |
| "eval_samples_per_second": 68.556, | |
| "eval_steps_per_second": 4.315, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 77.98, | |
| "eval_accuracy": 0.7447552447552448, | |
| "eval_loss": 0.6936307549476624, | |
| "eval_runtime": 3.8781, | |
| "eval_samples_per_second": 73.747, | |
| "eval_steps_per_second": 4.641, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 78.35, | |
| "grad_norm": 1.047067403793335, | |
| "learning_rate": 2.2783333333333336e-05, | |
| "loss": 0.4416, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 78.97, | |
| "eval_accuracy": 0.7517482517482518, | |
| "eval_loss": 0.6965110301971436, | |
| "eval_runtime": 5.1318, | |
| "eval_samples_per_second": 55.731, | |
| "eval_steps_per_second": 3.508, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.7482517482517482, | |
| "eval_loss": 0.7017127871513367, | |
| "eval_runtime": 4.3418, | |
| "eval_samples_per_second": 65.871, | |
| "eval_steps_per_second": 4.146, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 80.41, | |
| "grad_norm": 1.5354928970336914, | |
| "learning_rate": 2.2505555555555554e-05, | |
| "loss": 0.428, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 80.99, | |
| "eval_accuracy": 0.7552447552447552, | |
| "eval_loss": 0.6970596313476562, | |
| "eval_runtime": 5.973, | |
| "eval_samples_per_second": 47.882, | |
| "eval_steps_per_second": 3.014, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 81.98, | |
| "eval_accuracy": 0.7552447552447552, | |
| "eval_loss": 0.6897542476654053, | |
| "eval_runtime": 5.0481, | |
| "eval_samples_per_second": 56.655, | |
| "eval_steps_per_second": 3.566, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 82.47, | |
| "grad_norm": 1.7141317129135132, | |
| "learning_rate": 2.2227777777777776e-05, | |
| "loss": 0.4093, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 82.97, | |
| "eval_accuracy": 0.7482517482517482, | |
| "eval_loss": 0.7004020810127258, | |
| "eval_runtime": 4.1986, | |
| "eval_samples_per_second": 68.118, | |
| "eval_steps_per_second": 4.287, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.7552447552447552, | |
| "eval_loss": 0.6867479681968689, | |
| "eval_runtime": 4.6871, | |
| "eval_samples_per_second": 61.018, | |
| "eval_steps_per_second": 3.84, | |
| "step": 2037 | |
| }, | |
| { | |
| "epoch": 84.54, | |
| "grad_norm": 2.0219666957855225, | |
| "learning_rate": 2.195e-05, | |
| "loss": 0.4148, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 84.99, | |
| "eval_accuracy": 0.7377622377622378, | |
| "eval_loss": 0.7070020437240601, | |
| "eval_runtime": 5.9326, | |
| "eval_samples_per_second": 48.208, | |
| "eval_steps_per_second": 3.034, | |
| "step": 2061 | |
| }, | |
| { | |
| "epoch": 85.98, | |
| "eval_accuracy": 0.7447552447552448, | |
| "eval_loss": 0.7030305862426758, | |
| "eval_runtime": 5.3564, | |
| "eval_samples_per_second": 53.394, | |
| "eval_steps_per_second": 3.36, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 86.6, | |
| "grad_norm": 1.4678714275360107, | |
| "learning_rate": 2.1672222222222223e-05, | |
| "loss": 0.3923, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 86.97, | |
| "eval_accuracy": 0.7587412587412588, | |
| "eval_loss": 0.678174614906311, | |
| "eval_runtime": 3.9745, | |
| "eval_samples_per_second": 71.96, | |
| "eval_steps_per_second": 4.529, | |
| "step": 2109 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.7412587412587412, | |
| "eval_loss": 0.7166118621826172, | |
| "eval_runtime": 4.0358, | |
| "eval_samples_per_second": 70.866, | |
| "eval_steps_per_second": 4.46, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 88.66, | |
| "grad_norm": 1.589543342590332, | |
| "learning_rate": 2.1394444444444445e-05, | |
| "loss": 0.3964, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 88.99, | |
| "eval_accuracy": 0.7482517482517482, | |
| "eval_loss": 0.7075912952423096, | |
| "eval_runtime": 5.0331, | |
| "eval_samples_per_second": 56.823, | |
| "eval_steps_per_second": 3.576, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 89.98, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.6867172122001648, | |
| "eval_runtime": 5.386, | |
| "eval_samples_per_second": 53.101, | |
| "eval_steps_per_second": 3.342, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 90.72, | |
| "grad_norm": 1.3886605501174927, | |
| "learning_rate": 2.1116666666666667e-05, | |
| "loss": 0.3846, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 90.97, | |
| "eval_accuracy": 0.7517482517482518, | |
| "eval_loss": 0.6913285851478577, | |
| "eval_runtime": 5.5324, | |
| "eval_samples_per_second": 51.696, | |
| "eval_steps_per_second": 3.254, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.7482517482517482, | |
| "eval_loss": 0.7160294651985168, | |
| "eval_runtime": 5.2753, | |
| "eval_samples_per_second": 54.215, | |
| "eval_steps_per_second": 3.412, | |
| "step": 2231 | |
| }, | |
| { | |
| "epoch": 92.78, | |
| "grad_norm": 2.4106783866882324, | |
| "learning_rate": 2.083888888888889e-05, | |
| "loss": 0.3654, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 92.99, | |
| "eval_accuracy": 0.7517482517482518, | |
| "eval_loss": 0.6765207052230835, | |
| "eval_runtime": 5.5671, | |
| "eval_samples_per_second": 51.373, | |
| "eval_steps_per_second": 3.233, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 93.98, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.6881967186927795, | |
| "eval_runtime": 3.8228, | |
| "eval_samples_per_second": 74.814, | |
| "eval_steps_per_second": 4.709, | |
| "step": 2279 | |
| }, | |
| { | |
| "epoch": 94.85, | |
| "grad_norm": 0.8871183395385742, | |
| "learning_rate": 2.0561111111111114e-05, | |
| "loss": 0.3577, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 94.97, | |
| "eval_accuracy": 0.7552447552447552, | |
| "eval_loss": 0.6852585673332214, | |
| "eval_runtime": 4.7228, | |
| "eval_samples_per_second": 60.557, | |
| "eval_steps_per_second": 3.811, | |
| "step": 2303 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.7552447552447552, | |
| "eval_loss": 0.7158808708190918, | |
| "eval_runtime": 5.6504, | |
| "eval_samples_per_second": 50.616, | |
| "eval_steps_per_second": 3.186, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 96.91, | |
| "grad_norm": 1.0019863843917847, | |
| "learning_rate": 2.0283333333333333e-05, | |
| "loss": 0.37, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 96.99, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.6943120360374451, | |
| "eval_runtime": 4.8337, | |
| "eval_samples_per_second": 59.168, | |
| "eval_steps_per_second": 3.724, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 97.98, | |
| "eval_accuracy": 0.7587412587412588, | |
| "eval_loss": 0.7010317444801331, | |
| "eval_runtime": 4.6874, | |
| "eval_samples_per_second": 61.015, | |
| "eval_steps_per_second": 3.84, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 98.97, | |
| "grad_norm": 1.2908928394317627, | |
| "learning_rate": 2.0005555555555555e-05, | |
| "loss": 0.3473, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 98.97, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.693758487701416, | |
| "eval_runtime": 4.7585, | |
| "eval_samples_per_second": 60.103, | |
| "eval_steps_per_second": 3.783, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.7587412587412588, | |
| "eval_loss": 0.6918778419494629, | |
| "eval_runtime": 6.6891, | |
| "eval_samples_per_second": 42.756, | |
| "eval_steps_per_second": 2.691, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 100.99, | |
| "eval_accuracy": 0.7552447552447552, | |
| "eval_loss": 0.6849302053451538, | |
| "eval_runtime": 4.4685, | |
| "eval_samples_per_second": 64.003, | |
| "eval_steps_per_second": 4.028, | |
| "step": 2449 | |
| }, | |
| { | |
| "epoch": 101.03, | |
| "grad_norm": 1.1730871200561523, | |
| "learning_rate": 1.972777777777778e-05, | |
| "loss": 0.3587, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 101.98, | |
| "eval_accuracy": 0.7587412587412588, | |
| "eval_loss": 0.6855939030647278, | |
| "eval_runtime": 4.3434, | |
| "eval_samples_per_second": 65.847, | |
| "eval_steps_per_second": 4.144, | |
| "step": 2473 | |
| }, | |
| { | |
| "epoch": 102.97, | |
| "eval_accuracy": 0.7517482517482518, | |
| "eval_loss": 0.7046144604682922, | |
| "eval_runtime": 4.7166, | |
| "eval_samples_per_second": 60.637, | |
| "eval_steps_per_second": 3.816, | |
| "step": 2497 | |
| }, | |
| { | |
| "epoch": 103.09, | |
| "grad_norm": 1.3693217039108276, | |
| "learning_rate": 1.945e-05, | |
| "loss": 0.3429, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.6892997622489929, | |
| "eval_runtime": 5.3868, | |
| "eval_samples_per_second": 53.092, | |
| "eval_steps_per_second": 3.341, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 104.99, | |
| "eval_accuracy": 0.7622377622377622, | |
| "eval_loss": 0.6913393139839172, | |
| "eval_runtime": 5.09, | |
| "eval_samples_per_second": 56.188, | |
| "eval_steps_per_second": 3.536, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 105.15, | |
| "grad_norm": 1.923829436302185, | |
| "learning_rate": 1.9172222222222224e-05, | |
| "loss": 0.3549, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 105.98, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.6880810856819153, | |
| "eval_runtime": 4.6668, | |
| "eval_samples_per_second": 61.283, | |
| "eval_steps_per_second": 3.857, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 106.97, | |
| "eval_accuracy": 0.7692307692307693, | |
| "eval_loss": 0.7097887396812439, | |
| "eval_runtime": 6.4652, | |
| "eval_samples_per_second": 44.237, | |
| "eval_steps_per_second": 2.784, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 107.22, | |
| "grad_norm": 2.702012062072754, | |
| "learning_rate": 1.8894444444444446e-05, | |
| "loss": 0.3403, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.6878336668014526, | |
| "eval_runtime": 4.6923, | |
| "eval_samples_per_second": 60.951, | |
| "eval_steps_per_second": 3.836, | |
| "step": 2619 | |
| }, | |
| { | |
| "epoch": 108.99, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.695954442024231, | |
| "eval_runtime": 4.4809, | |
| "eval_samples_per_second": 63.827, | |
| "eval_steps_per_second": 4.017, | |
| "step": 2643 | |
| }, | |
| { | |
| "epoch": 109.28, | |
| "grad_norm": 2.3427536487579346, | |
| "learning_rate": 1.8616666666666667e-05, | |
| "loss": 0.3253, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 109.98, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.7005948424339294, | |
| "eval_runtime": 4.8882, | |
| "eval_samples_per_second": 58.508, | |
| "eval_steps_per_second": 3.682, | |
| "step": 2667 | |
| }, | |
| { | |
| "epoch": 110.97, | |
| "eval_accuracy": 0.7692307692307693, | |
| "eval_loss": 0.6916196346282959, | |
| "eval_runtime": 5.2891, | |
| "eval_samples_per_second": 54.073, | |
| "eval_steps_per_second": 3.403, | |
| "step": 2691 | |
| }, | |
| { | |
| "epoch": 111.34, | |
| "grad_norm": 2.178089141845703, | |
| "learning_rate": 1.833888888888889e-05, | |
| "loss": 0.3332, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.7059447765350342, | |
| "eval_runtime": 4.7437, | |
| "eval_samples_per_second": 60.291, | |
| "eval_steps_per_second": 3.795, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 112.99, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.6904045939445496, | |
| "eval_runtime": 4.9942, | |
| "eval_samples_per_second": 57.267, | |
| "eval_steps_per_second": 3.604, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 113.4, | |
| "grad_norm": 1.1625444889068604, | |
| "learning_rate": 1.806111111111111e-05, | |
| "loss": 0.3188, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 113.98, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.6970774531364441, | |
| "eval_runtime": 6.4809, | |
| "eval_samples_per_second": 44.13, | |
| "eval_steps_per_second": 2.777, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 114.97, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.700820803642273, | |
| "eval_runtime": 5.2617, | |
| "eval_samples_per_second": 54.355, | |
| "eval_steps_per_second": 3.421, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 115.46, | |
| "grad_norm": 1.2394715547561646, | |
| "learning_rate": 1.7783333333333333e-05, | |
| "loss": 0.3112, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7002130150794983, | |
| "eval_runtime": 5.0937, | |
| "eval_samples_per_second": 56.147, | |
| "eval_steps_per_second": 3.534, | |
| "step": 2813 | |
| }, | |
| { | |
| "epoch": 116.99, | |
| "eval_accuracy": 0.7692307692307693, | |
| "eval_loss": 0.6909505724906921, | |
| "eval_runtime": 4.7575, | |
| "eval_samples_per_second": 60.116, | |
| "eval_steps_per_second": 3.784, | |
| "step": 2837 | |
| }, | |
| { | |
| "epoch": 117.53, | |
| "grad_norm": 2.4334964752197266, | |
| "learning_rate": 1.7505555555555558e-05, | |
| "loss": 0.3153, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 117.98, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.6957750916481018, | |
| "eval_runtime": 4.8105, | |
| "eval_samples_per_second": 59.453, | |
| "eval_steps_per_second": 3.742, | |
| "step": 2861 | |
| }, | |
| { | |
| "epoch": 118.97, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.6867520213127136, | |
| "eval_runtime": 4.5411, | |
| "eval_samples_per_second": 62.98, | |
| "eval_steps_per_second": 3.964, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 119.59, | |
| "grad_norm": 0.769097089767456, | |
| "learning_rate": 1.7227777777777777e-05, | |
| "loss": 0.3006, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.6890790462493896, | |
| "eval_runtime": 4.5864, | |
| "eval_samples_per_second": 62.358, | |
| "eval_steps_per_second": 3.925, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 120.99, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.6889089941978455, | |
| "eval_runtime": 6.5804, | |
| "eval_samples_per_second": 43.462, | |
| "eval_steps_per_second": 2.735, | |
| "step": 2934 | |
| }, | |
| { | |
| "epoch": 121.65, | |
| "grad_norm": 1.8714542388916016, | |
| "learning_rate": 1.695e-05, | |
| "loss": 0.2967, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 121.98, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.6935350894927979, | |
| "eval_runtime": 4.7491, | |
| "eval_samples_per_second": 60.223, | |
| "eval_steps_per_second": 3.79, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 122.97, | |
| "eval_accuracy": 0.7692307692307693, | |
| "eval_loss": 0.7058219909667969, | |
| "eval_runtime": 4.8941, | |
| "eval_samples_per_second": 58.438, | |
| "eval_steps_per_second": 3.678, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 123.71, | |
| "grad_norm": 2.062924385070801, | |
| "learning_rate": 1.6672222222222224e-05, | |
| "loss": 0.2939, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 124.0, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.7220865488052368, | |
| "eval_runtime": 5.0487, | |
| "eval_samples_per_second": 56.648, | |
| "eval_steps_per_second": 3.565, | |
| "step": 3007 | |
| }, | |
| { | |
| "epoch": 124.99, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.6857044696807861, | |
| "eval_runtime": 5.6134, | |
| "eval_samples_per_second": 50.95, | |
| "eval_steps_per_second": 3.207, | |
| "step": 3031 | |
| }, | |
| { | |
| "epoch": 125.77, | |
| "grad_norm": 1.7039302587509155, | |
| "learning_rate": 1.6394444444444446e-05, | |
| "loss": 0.3101, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 125.98, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.6742061972618103, | |
| "eval_runtime": 5.3609, | |
| "eval_samples_per_second": 53.349, | |
| "eval_steps_per_second": 3.358, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 126.97, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.7029407620429993, | |
| "eval_runtime": 5.8891, | |
| "eval_samples_per_second": 48.564, | |
| "eval_steps_per_second": 3.056, | |
| "step": 3079 | |
| }, | |
| { | |
| "epoch": 127.84, | |
| "grad_norm": 1.434970736503601, | |
| "learning_rate": 1.6116666666666668e-05, | |
| "loss": 0.284, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 128.0, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.682050347328186, | |
| "eval_runtime": 5.1437, | |
| "eval_samples_per_second": 55.602, | |
| "eval_steps_per_second": 3.499, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 128.99, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.68370121717453, | |
| "eval_runtime": 4.2733, | |
| "eval_samples_per_second": 66.927, | |
| "eval_steps_per_second": 4.212, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 129.9, | |
| "grad_norm": 1.320789098739624, | |
| "learning_rate": 1.583888888888889e-05, | |
| "loss": 0.2902, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 129.98, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.6823462843894958, | |
| "eval_runtime": 5.7566, | |
| "eval_samples_per_second": 49.682, | |
| "eval_steps_per_second": 3.127, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 130.97, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.6950440406799316, | |
| "eval_runtime": 4.9248, | |
| "eval_samples_per_second": 58.074, | |
| "eval_steps_per_second": 3.655, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 131.96, | |
| "grad_norm": 2.1280930042266846, | |
| "learning_rate": 1.556111111111111e-05, | |
| "loss": 0.301, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 132.0, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.6800761818885803, | |
| "eval_runtime": 8.1328, | |
| "eval_samples_per_second": 35.166, | |
| "eval_steps_per_second": 2.213, | |
| "step": 3201 | |
| }, | |
| { | |
| "epoch": 132.99, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.6867505311965942, | |
| "eval_runtime": 4.2532, | |
| "eval_samples_per_second": 67.244, | |
| "eval_steps_per_second": 4.232, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 133.98, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7061284184455872, | |
| "eval_runtime": 5.3031, | |
| "eval_samples_per_second": 53.93, | |
| "eval_steps_per_second": 3.394, | |
| "step": 3249 | |
| }, | |
| { | |
| "epoch": 134.02, | |
| "grad_norm": 1.532638669013977, | |
| "learning_rate": 1.5283333333333333e-05, | |
| "loss": 0.2736, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 134.97, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.7114368677139282, | |
| "eval_runtime": 4.6536, | |
| "eval_samples_per_second": 61.458, | |
| "eval_steps_per_second": 3.868, | |
| "step": 3273 | |
| }, | |
| { | |
| "epoch": 136.0, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.6914551854133606, | |
| "eval_runtime": 4.5505, | |
| "eval_samples_per_second": 62.851, | |
| "eval_steps_per_second": 3.956, | |
| "step": 3298 | |
| }, | |
| { | |
| "epoch": 136.08, | |
| "grad_norm": 2.0108492374420166, | |
| "learning_rate": 1.5005555555555555e-05, | |
| "loss": 0.2931, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 136.99, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7055917978286743, | |
| "eval_runtime": 5.3067, | |
| "eval_samples_per_second": 53.894, | |
| "eval_steps_per_second": 3.392, | |
| "step": 3322 | |
| }, | |
| { | |
| "epoch": 137.98, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.7026935815811157, | |
| "eval_runtime": 5.186, | |
| "eval_samples_per_second": 55.149, | |
| "eval_steps_per_second": 3.471, | |
| "step": 3346 | |
| }, | |
| { | |
| "epoch": 138.14, | |
| "grad_norm": 1.0804469585418701, | |
| "learning_rate": 1.4727777777777779e-05, | |
| "loss": 0.2864, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 138.97, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.6983500719070435, | |
| "eval_runtime": 6.955, | |
| "eval_samples_per_second": 41.122, | |
| "eval_steps_per_second": 2.588, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.7168787121772766, | |
| "eval_runtime": 4.234, | |
| "eval_samples_per_second": 67.548, | |
| "eval_steps_per_second": 4.251, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 140.21, | |
| "grad_norm": 2.370694637298584, | |
| "learning_rate": 1.445e-05, | |
| "loss": 0.2765, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 140.99, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.6960318088531494, | |
| "eval_runtime": 5.0294, | |
| "eval_samples_per_second": 56.865, | |
| "eval_steps_per_second": 3.579, | |
| "step": 3419 | |
| }, | |
| { | |
| "epoch": 141.98, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.6990492343902588, | |
| "eval_runtime": 5.2727, | |
| "eval_samples_per_second": 54.242, | |
| "eval_steps_per_second": 3.414, | |
| "step": 3443 | |
| }, | |
| { | |
| "epoch": 142.27, | |
| "grad_norm": 1.6676194667816162, | |
| "learning_rate": 1.4172222222222222e-05, | |
| "loss": 0.2808, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 142.97, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.706200897693634, | |
| "eval_runtime": 4.5273, | |
| "eval_samples_per_second": 63.173, | |
| "eval_steps_per_second": 3.976, | |
| "step": 3467 | |
| }, | |
| { | |
| "epoch": 144.0, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.6821764707565308, | |
| "eval_runtime": 5.3614, | |
| "eval_samples_per_second": 53.344, | |
| "eval_steps_per_second": 3.357, | |
| "step": 3492 | |
| }, | |
| { | |
| "epoch": 144.33, | |
| "grad_norm": 1.9151145219802856, | |
| "learning_rate": 1.3894444444444444e-05, | |
| "loss": 0.2712, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 144.99, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.7063603401184082, | |
| "eval_runtime": 4.9088, | |
| "eval_samples_per_second": 58.263, | |
| "eval_steps_per_second": 3.667, | |
| "step": 3516 | |
| }, | |
| { | |
| "epoch": 145.98, | |
| "eval_accuracy": 0.7692307692307693, | |
| "eval_loss": 0.7150112390518188, | |
| "eval_runtime": 7.2044, | |
| "eval_samples_per_second": 39.698, | |
| "eval_steps_per_second": 2.498, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 146.39, | |
| "grad_norm": 1.5093848705291748, | |
| "learning_rate": 1.3622222222222223e-05, | |
| "loss": 0.2726, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 146.97, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.696849524974823, | |
| "eval_runtime": 4.9386, | |
| "eval_samples_per_second": 57.911, | |
| "eval_steps_per_second": 3.645, | |
| "step": 3564 | |
| }, | |
| { | |
| "epoch": 148.0, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.7086759209632874, | |
| "eval_runtime": 4.4363, | |
| "eval_samples_per_second": 64.468, | |
| "eval_steps_per_second": 4.057, | |
| "step": 3589 | |
| }, | |
| { | |
| "epoch": 148.45, | |
| "grad_norm": 1.4403679370880127, | |
| "learning_rate": 1.3344444444444444e-05, | |
| "loss": 0.2607, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 148.99, | |
| "eval_accuracy": 0.7692307692307693, | |
| "eval_loss": 0.7129560112953186, | |
| "eval_runtime": 5.3809, | |
| "eval_samples_per_second": 53.15, | |
| "eval_steps_per_second": 3.345, | |
| "step": 3613 | |
| }, | |
| { | |
| "epoch": 149.98, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.7080287933349609, | |
| "eval_runtime": 5.8187, | |
| "eval_samples_per_second": 49.152, | |
| "eval_steps_per_second": 3.093, | |
| "step": 3637 | |
| }, | |
| { | |
| "epoch": 150.52, | |
| "grad_norm": 2.036515235900879, | |
| "learning_rate": 1.3066666666666666e-05, | |
| "loss": 0.2546, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 150.97, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.7088435888290405, | |
| "eval_runtime": 4.8742, | |
| "eval_samples_per_second": 58.677, | |
| "eval_steps_per_second": 3.693, | |
| "step": 3661 | |
| }, | |
| { | |
| "epoch": 152.0, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7030193209648132, | |
| "eval_runtime": 4.9492, | |
| "eval_samples_per_second": 57.787, | |
| "eval_steps_per_second": 3.637, | |
| "step": 3686 | |
| }, | |
| { | |
| "epoch": 152.58, | |
| "grad_norm": 1.200052261352539, | |
| "learning_rate": 1.2788888888888888e-05, | |
| "loss": 0.2563, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 152.99, | |
| "eval_accuracy": 0.7692307692307693, | |
| "eval_loss": 0.7077969908714294, | |
| "eval_runtime": 4.614, | |
| "eval_samples_per_second": 61.985, | |
| "eval_steps_per_second": 3.901, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 153.98, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.700455904006958, | |
| "eval_runtime": 5.7657, | |
| "eval_samples_per_second": 49.604, | |
| "eval_steps_per_second": 3.122, | |
| "step": 3734 | |
| }, | |
| { | |
| "epoch": 154.64, | |
| "grad_norm": 2.2751214504241943, | |
| "learning_rate": 1.2511111111111112e-05, | |
| "loss": 0.2531, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 154.97, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.7160292267799377, | |
| "eval_runtime": 5.1079, | |
| "eval_samples_per_second": 55.992, | |
| "eval_steps_per_second": 3.524, | |
| "step": 3758 | |
| }, | |
| { | |
| "epoch": 156.0, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7175909876823425, | |
| "eval_runtime": 5.4035, | |
| "eval_samples_per_second": 52.929, | |
| "eval_steps_per_second": 3.331, | |
| "step": 3783 | |
| }, | |
| { | |
| "epoch": 156.7, | |
| "grad_norm": 1.9024412631988525, | |
| "learning_rate": 1.2233333333333334e-05, | |
| "loss": 0.2446, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 156.99, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.7190600037574768, | |
| "eval_runtime": 4.3633, | |
| "eval_samples_per_second": 65.546, | |
| "eval_steps_per_second": 4.125, | |
| "step": 3807 | |
| }, | |
| { | |
| "epoch": 157.98, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.719641387462616, | |
| "eval_runtime": 5.0426, | |
| "eval_samples_per_second": 56.717, | |
| "eval_steps_per_second": 3.57, | |
| "step": 3831 | |
| }, | |
| { | |
| "epoch": 158.76, | |
| "grad_norm": 3.471806287765503, | |
| "learning_rate": 1.1955555555555556e-05, | |
| "loss": 0.2479, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 158.97, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7073430418968201, | |
| "eval_runtime": 3.6336, | |
| "eval_samples_per_second": 78.711, | |
| "eval_steps_per_second": 4.954, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 160.0, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7328661680221558, | |
| "eval_runtime": 5.2625, | |
| "eval_samples_per_second": 54.347, | |
| "eval_steps_per_second": 3.42, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 160.82, | |
| "grad_norm": 2.1171793937683105, | |
| "learning_rate": 1.1677777777777777e-05, | |
| "loss": 0.2523, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 160.99, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7158821821212769, | |
| "eval_runtime": 6.5877, | |
| "eval_samples_per_second": 43.414, | |
| "eval_steps_per_second": 2.732, | |
| "step": 3904 | |
| }, | |
| { | |
| "epoch": 161.98, | |
| "eval_accuracy": 0.7692307692307693, | |
| "eval_loss": 0.719171404838562, | |
| "eval_runtime": 4.5674, | |
| "eval_samples_per_second": 62.618, | |
| "eval_steps_per_second": 3.941, | |
| "step": 3928 | |
| }, | |
| { | |
| "epoch": 162.89, | |
| "grad_norm": 1.7515395879745483, | |
| "learning_rate": 1.1400000000000001e-05, | |
| "loss": 0.2523, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 162.97, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.7281435132026672, | |
| "eval_runtime": 4.4866, | |
| "eval_samples_per_second": 63.746, | |
| "eval_steps_per_second": 4.012, | |
| "step": 3952 | |
| }, | |
| { | |
| "epoch": 164.0, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7078841328620911, | |
| "eval_runtime": 4.4241, | |
| "eval_samples_per_second": 64.645, | |
| "eval_steps_per_second": 4.069, | |
| "step": 3977 | |
| }, | |
| { | |
| "epoch": 164.95, | |
| "grad_norm": 1.456335186958313, | |
| "learning_rate": 1.1122222222222223e-05, | |
| "loss": 0.2422, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 164.99, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.7161521911621094, | |
| "eval_runtime": 5.1239, | |
| "eval_samples_per_second": 55.817, | |
| "eval_steps_per_second": 3.513, | |
| "step": 4001 | |
| }, | |
| { | |
| "epoch": 165.98, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7190020084381104, | |
| "eval_runtime": 3.4488, | |
| "eval_samples_per_second": 82.926, | |
| "eval_steps_per_second": 5.219, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 166.97, | |
| "eval_accuracy": 0.7762237762237763, | |
| "eval_loss": 0.7311248779296875, | |
| "eval_runtime": 5.0389, | |
| "eval_samples_per_second": 56.759, | |
| "eval_steps_per_second": 3.572, | |
| "step": 4049 | |
| }, | |
| { | |
| "epoch": 167.01, | |
| "grad_norm": 1.2554075717926025, | |
| "learning_rate": 1.0844444444444445e-05, | |
| "loss": 0.242, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 168.0, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.7110462188720703, | |
| "eval_runtime": 4.4612, | |
| "eval_samples_per_second": 64.108, | |
| "eval_steps_per_second": 4.035, | |
| "step": 4074 | |
| }, | |
| { | |
| "epoch": 168.99, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7028501629829407, | |
| "eval_runtime": 6.955, | |
| "eval_samples_per_second": 41.122, | |
| "eval_steps_per_second": 2.588, | |
| "step": 4098 | |
| }, | |
| { | |
| "epoch": 169.07, | |
| "grad_norm": 2.8003265857696533, | |
| "learning_rate": 1.0566666666666667e-05, | |
| "loss": 0.2392, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 169.98, | |
| "eval_accuracy": 0.7937062937062938, | |
| "eval_loss": 0.7108554840087891, | |
| "eval_runtime": 5.0033, | |
| "eval_samples_per_second": 57.162, | |
| "eval_steps_per_second": 3.598, | |
| "step": 4122 | |
| }, | |
| { | |
| "epoch": 170.97, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.7106384634971619, | |
| "eval_runtime": 5.1984, | |
| "eval_samples_per_second": 55.017, | |
| "eval_steps_per_second": 3.463, | |
| "step": 4146 | |
| }, | |
| { | |
| "epoch": 171.13, | |
| "grad_norm": 2.1897969245910645, | |
| "learning_rate": 1.028888888888889e-05, | |
| "loss": 0.247, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 172.0, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7151694297790527, | |
| "eval_runtime": 5.1963, | |
| "eval_samples_per_second": 55.039, | |
| "eval_steps_per_second": 3.464, | |
| "step": 4171 | |
| }, | |
| { | |
| "epoch": 172.99, | |
| "eval_accuracy": 0.7657342657342657, | |
| "eval_loss": 0.7254167795181274, | |
| "eval_runtime": 4.4466, | |
| "eval_samples_per_second": 64.319, | |
| "eval_steps_per_second": 4.048, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 173.2, | |
| "grad_norm": 2.769357681274414, | |
| "learning_rate": 1.0011111111111112e-05, | |
| "loss": 0.2341, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 173.98, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7290962338447571, | |
| "eval_runtime": 6.2221, | |
| "eval_samples_per_second": 45.965, | |
| "eval_steps_per_second": 2.893, | |
| "step": 4219 | |
| }, | |
| { | |
| "epoch": 174.97, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7088623046875, | |
| "eval_runtime": 4.3709, | |
| "eval_samples_per_second": 65.433, | |
| "eval_steps_per_second": 4.118, | |
| "step": 4243 | |
| }, | |
| { | |
| "epoch": 175.26, | |
| "grad_norm": 2.044703483581543, | |
| "learning_rate": 9.733333333333332e-06, | |
| "loss": 0.2317, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 176.0, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.7185826897621155, | |
| "eval_runtime": 5.4095, | |
| "eval_samples_per_second": 52.87, | |
| "eval_steps_per_second": 3.327, | |
| "step": 4268 | |
| }, | |
| { | |
| "epoch": 176.99, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7167823314666748, | |
| "eval_runtime": 4.9506, | |
| "eval_samples_per_second": 57.77, | |
| "eval_steps_per_second": 3.636, | |
| "step": 4292 | |
| }, | |
| { | |
| "epoch": 177.32, | |
| "grad_norm": 1.078834056854248, | |
| "learning_rate": 9.455555555555556e-06, | |
| "loss": 0.2269, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 177.98, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.7237738966941833, | |
| "eval_runtime": 4.781, | |
| "eval_samples_per_second": 59.82, | |
| "eval_steps_per_second": 3.765, | |
| "step": 4316 | |
| }, | |
| { | |
| "epoch": 178.97, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7131801247596741, | |
| "eval_runtime": 4.6869, | |
| "eval_samples_per_second": 61.022, | |
| "eval_steps_per_second": 3.841, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 179.38, | |
| "grad_norm": 2.008120536804199, | |
| "learning_rate": 9.177777777777778e-06, | |
| "loss": 0.2283, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 180.0, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7384253144264221, | |
| "eval_runtime": 4.5879, | |
| "eval_samples_per_second": 62.338, | |
| "eval_steps_per_second": 3.923, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 180.99, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.7002861499786377, | |
| "eval_runtime": 5.3238, | |
| "eval_samples_per_second": 53.721, | |
| "eval_steps_per_second": 3.381, | |
| "step": 4389 | |
| }, | |
| { | |
| "epoch": 181.44, | |
| "grad_norm": 1.9518792629241943, | |
| "learning_rate": 8.900000000000001e-06, | |
| "loss": 0.2303, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 181.98, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7278482913970947, | |
| "eval_runtime": 5.8358, | |
| "eval_samples_per_second": 49.008, | |
| "eval_steps_per_second": 3.084, | |
| "step": 4413 | |
| }, | |
| { | |
| "epoch": 182.97, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7143127918243408, | |
| "eval_runtime": 6.1229, | |
| "eval_samples_per_second": 46.71, | |
| "eval_steps_per_second": 2.94, | |
| "step": 4437 | |
| }, | |
| { | |
| "epoch": 183.51, | |
| "grad_norm": 1.0936890840530396, | |
| "learning_rate": 8.622222222222221e-06, | |
| "loss": 0.2109, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 184.0, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7406834363937378, | |
| "eval_runtime": 5.0467, | |
| "eval_samples_per_second": 56.671, | |
| "eval_steps_per_second": 3.567, | |
| "step": 4462 | |
| }, | |
| { | |
| "epoch": 184.99, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7053534388542175, | |
| "eval_runtime": 5.279, | |
| "eval_samples_per_second": 54.177, | |
| "eval_steps_per_second": 3.41, | |
| "step": 4486 | |
| }, | |
| { | |
| "epoch": 185.57, | |
| "grad_norm": 2.9350059032440186, | |
| "learning_rate": 8.344444444444445e-06, | |
| "loss": 0.2261, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 185.98, | |
| "eval_accuracy": 0.7727272727272727, | |
| "eval_loss": 0.7260809540748596, | |
| "eval_runtime": 5.4165, | |
| "eval_samples_per_second": 52.802, | |
| "eval_steps_per_second": 3.323, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 186.97, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.7240064144134521, | |
| "eval_runtime": 5.4866, | |
| "eval_samples_per_second": 52.127, | |
| "eval_steps_per_second": 3.281, | |
| "step": 4534 | |
| }, | |
| { | |
| "epoch": 187.63, | |
| "grad_norm": 1.8322782516479492, | |
| "learning_rate": 8.066666666666667e-06, | |
| "loss": 0.2282, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 188.0, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7199599146842957, | |
| "eval_runtime": 4.6736, | |
| "eval_samples_per_second": 61.195, | |
| "eval_steps_per_second": 3.851, | |
| "step": 4559 | |
| }, | |
| { | |
| "epoch": 188.99, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7102844715118408, | |
| "eval_runtime": 5.4219, | |
| "eval_samples_per_second": 52.749, | |
| "eval_steps_per_second": 3.32, | |
| "step": 4583 | |
| }, | |
| { | |
| "epoch": 189.69, | |
| "grad_norm": 1.8777916431427002, | |
| "learning_rate": 7.78888888888889e-06, | |
| "loss": 0.2321, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 189.98, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7083376049995422, | |
| "eval_runtime": 5.9634, | |
| "eval_samples_per_second": 47.959, | |
| "eval_steps_per_second": 3.018, | |
| "step": 4607 | |
| }, | |
| { | |
| "epoch": 190.97, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7244677543640137, | |
| "eval_runtime": 5.2078, | |
| "eval_samples_per_second": 54.918, | |
| "eval_steps_per_second": 3.456, | |
| "step": 4631 | |
| }, | |
| { | |
| "epoch": 191.75, | |
| "grad_norm": 1.5277408361434937, | |
| "learning_rate": 7.5111111111111105e-06, | |
| "loss": 0.2261, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 192.0, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7124583721160889, | |
| "eval_runtime": 5.7079, | |
| "eval_samples_per_second": 50.106, | |
| "eval_steps_per_second": 3.154, | |
| "step": 4656 | |
| }, | |
| { | |
| "epoch": 192.99, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7308976054191589, | |
| "eval_runtime": 5.3404, | |
| "eval_samples_per_second": 53.554, | |
| "eval_steps_per_second": 3.371, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 193.81, | |
| "grad_norm": 2.095749616622925, | |
| "learning_rate": 7.233333333333333e-06, | |
| "loss": 0.2231, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 193.98, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7237818837165833, | |
| "eval_runtime": 4.6666, | |
| "eval_samples_per_second": 61.286, | |
| "eval_steps_per_second": 3.857, | |
| "step": 4704 | |
| }, | |
| { | |
| "epoch": 194.97, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7253320217132568, | |
| "eval_runtime": 5.8059, | |
| "eval_samples_per_second": 49.261, | |
| "eval_steps_per_second": 3.1, | |
| "step": 4728 | |
| }, | |
| { | |
| "epoch": 195.88, | |
| "grad_norm": 1.6955636739730835, | |
| "learning_rate": 6.955555555555556e-06, | |
| "loss": 0.2083, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 196.0, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7240011692047119, | |
| "eval_runtime": 6.0767, | |
| "eval_samples_per_second": 47.065, | |
| "eval_steps_per_second": 2.962, | |
| "step": 4753 | |
| }, | |
| { | |
| "epoch": 196.99, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7131750583648682, | |
| "eval_runtime": 5.3063, | |
| "eval_samples_per_second": 53.898, | |
| "eval_steps_per_second": 3.392, | |
| "step": 4777 | |
| }, | |
| { | |
| "epoch": 197.94, | |
| "grad_norm": 0.8933289051055908, | |
| "learning_rate": 6.677777777777778e-06, | |
| "loss": 0.2116, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 197.98, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7169559597969055, | |
| "eval_runtime": 5.5713, | |
| "eval_samples_per_second": 51.335, | |
| "eval_steps_per_second": 3.231, | |
| "step": 4801 | |
| }, | |
| { | |
| "epoch": 198.97, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7265609502792358, | |
| "eval_runtime": 4.1397, | |
| "eval_samples_per_second": 69.087, | |
| "eval_steps_per_second": 4.348, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "grad_norm": 2.175414562225342, | |
| "learning_rate": 6.4000000000000006e-06, | |
| "loss": 0.2219, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7162622213363647, | |
| "eval_runtime": 5.2016, | |
| "eval_samples_per_second": 54.984, | |
| "eval_steps_per_second": 3.461, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 200.99, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7302048802375793, | |
| "eval_runtime": 4.9222, | |
| "eval_samples_per_second": 58.104, | |
| "eval_steps_per_second": 3.657, | |
| "step": 4874 | |
| }, | |
| { | |
| "epoch": 201.98, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7223746180534363, | |
| "eval_runtime": 4.6884, | |
| "eval_samples_per_second": 61.002, | |
| "eval_steps_per_second": 3.839, | |
| "step": 4898 | |
| }, | |
| { | |
| "epoch": 202.06, | |
| "grad_norm": 2.053739309310913, | |
| "learning_rate": 6.1222222222222224e-06, | |
| "loss": 0.2183, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 202.97, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7179226279258728, | |
| "eval_runtime": 4.5556, | |
| "eval_samples_per_second": 62.78, | |
| "eval_steps_per_second": 3.951, | |
| "step": 4922 | |
| }, | |
| { | |
| "epoch": 204.0, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7245286107063293, | |
| "eval_runtime": 5.7474, | |
| "eval_samples_per_second": 49.762, | |
| "eval_steps_per_second": 3.132, | |
| "step": 4947 | |
| }, | |
| { | |
| "epoch": 204.12, | |
| "grad_norm": 1.1081063747406006, | |
| "learning_rate": 5.844444444444444e-06, | |
| "loss": 0.2053, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 204.99, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7344977259635925, | |
| "eval_runtime": 5.4178, | |
| "eval_samples_per_second": 52.789, | |
| "eval_steps_per_second": 3.322, | |
| "step": 4971 | |
| }, | |
| { | |
| "epoch": 205.98, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7249557971954346, | |
| "eval_runtime": 5.6352, | |
| "eval_samples_per_second": 50.753, | |
| "eval_steps_per_second": 3.194, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 206.19, | |
| "grad_norm": 1.09213125705719, | |
| "learning_rate": 5.566666666666667e-06, | |
| "loss": 0.2113, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 206.97, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7246001958847046, | |
| "eval_runtime": 4.9071, | |
| "eval_samples_per_second": 58.283, | |
| "eval_steps_per_second": 3.668, | |
| "step": 5019 | |
| }, | |
| { | |
| "epoch": 208.0, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7270117998123169, | |
| "eval_runtime": 5.8385, | |
| "eval_samples_per_second": 48.985, | |
| "eval_steps_per_second": 3.083, | |
| "step": 5044 | |
| }, | |
| { | |
| "epoch": 208.25, | |
| "grad_norm": 1.6693130731582642, | |
| "learning_rate": 5.288888888888889e-06, | |
| "loss": 0.2152, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 208.99, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7285901308059692, | |
| "eval_runtime": 5.489, | |
| "eval_samples_per_second": 52.104, | |
| "eval_steps_per_second": 3.279, | |
| "step": 5068 | |
| }, | |
| { | |
| "epoch": 209.98, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7332947254180908, | |
| "eval_runtime": 5.3017, | |
| "eval_samples_per_second": 53.945, | |
| "eval_steps_per_second": 3.395, | |
| "step": 5092 | |
| }, | |
| { | |
| "epoch": 210.31, | |
| "grad_norm": 2.0511515140533447, | |
| "learning_rate": 5.011111111111112e-06, | |
| "loss": 0.2129, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 210.97, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7307863831520081, | |
| "eval_runtime": 5.2991, | |
| "eval_samples_per_second": 53.971, | |
| "eval_steps_per_second": 3.397, | |
| "step": 5116 | |
| }, | |
| { | |
| "epoch": 212.0, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7176437973976135, | |
| "eval_runtime": 4.9452, | |
| "eval_samples_per_second": 57.834, | |
| "eval_steps_per_second": 3.64, | |
| "step": 5141 | |
| }, | |
| { | |
| "epoch": 212.37, | |
| "grad_norm": 1.8491023778915405, | |
| "learning_rate": 4.7333333333333335e-06, | |
| "loss": 0.2173, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 212.99, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7334882020950317, | |
| "eval_runtime": 4.9602, | |
| "eval_samples_per_second": 57.659, | |
| "eval_steps_per_second": 3.629, | |
| "step": 5165 | |
| }, | |
| { | |
| "epoch": 213.98, | |
| "eval_accuracy": 0.7797202797202797, | |
| "eval_loss": 0.7268483638763428, | |
| "eval_runtime": 5.885, | |
| "eval_samples_per_second": 48.598, | |
| "eval_steps_per_second": 3.059, | |
| "step": 5189 | |
| }, | |
| { | |
| "epoch": 214.43, | |
| "grad_norm": 1.2067769765853882, | |
| "learning_rate": 4.455555555555556e-06, | |
| "loss": 0.2042, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 214.97, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.7299237847328186, | |
| "eval_runtime": 5.7645, | |
| "eval_samples_per_second": 49.614, | |
| "eval_steps_per_second": 3.123, | |
| "step": 5213 | |
| }, | |
| { | |
| "epoch": 216.0, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.7360625863075256, | |
| "eval_runtime": 4.7143, | |
| "eval_samples_per_second": 60.667, | |
| "eval_steps_per_second": 3.818, | |
| "step": 5238 | |
| }, | |
| { | |
| "epoch": 216.49, | |
| "grad_norm": 1.3863427639007568, | |
| "learning_rate": 4.177777777777777e-06, | |
| "loss": 0.2112, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 216.99, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.723866879940033, | |
| "eval_runtime": 5.3445, | |
| "eval_samples_per_second": 53.513, | |
| "eval_steps_per_second": 3.368, | |
| "step": 5262 | |
| }, | |
| { | |
| "epoch": 217.98, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.7252445220947266, | |
| "eval_runtime": 4.6314, | |
| "eval_samples_per_second": 61.753, | |
| "eval_steps_per_second": 3.887, | |
| "step": 5286 | |
| }, | |
| { | |
| "epoch": 218.56, | |
| "grad_norm": 1.1177924871444702, | |
| "learning_rate": 3.9e-06, | |
| "loss": 0.2007, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 218.97, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.719983696937561, | |
| "eval_runtime": 4.865, | |
| "eval_samples_per_second": 58.787, | |
| "eval_steps_per_second": 3.7, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 220.0, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7195786237716675, | |
| "eval_runtime": 5.5422, | |
| "eval_samples_per_second": 51.604, | |
| "eval_steps_per_second": 3.248, | |
| "step": 5335 | |
| }, | |
| { | |
| "epoch": 220.62, | |
| "grad_norm": 1.413304090499878, | |
| "learning_rate": 3.6222222222222226e-06, | |
| "loss": 0.2163, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 220.99, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.7309580445289612, | |
| "eval_runtime": 5.2512, | |
| "eval_samples_per_second": 54.463, | |
| "eval_steps_per_second": 3.428, | |
| "step": 5359 | |
| }, | |
| { | |
| "epoch": 221.98, | |
| "eval_accuracy": 0.7867132867132867, | |
| "eval_loss": 0.7313971519470215, | |
| "eval_runtime": 5.1151, | |
| "eval_samples_per_second": 55.913, | |
| "eval_steps_per_second": 3.519, | |
| "step": 5383 | |
| }, | |
| { | |
| "epoch": 222.68, | |
| "grad_norm": 3.0471901893615723, | |
| "learning_rate": 3.3444444444444445e-06, | |
| "loss": 0.2141, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 222.97, | |
| "eval_accuracy": 0.7832167832167832, | |
| "eval_loss": 0.727938175201416, | |
| "eval_runtime": 4.6405, | |
| "eval_samples_per_second": 61.631, | |
| "eval_steps_per_second": 3.879, | |
| "step": 5407 | |
| }, | |
| { | |
| "epoch": 224.0, | |
| "eval_accuracy": 0.7902097902097902, | |
| "eval_loss": 0.725923478603363, | |
| "eval_runtime": 5.0906, | |
| "eval_samples_per_second": 56.182, | |
| "eval_steps_per_second": 3.536, | |
| "step": 5432 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 6000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 250, | |
| "save_steps": 500, | |
| "total_flos": 3.037085846065152e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |