| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.99968, | |
| "eval_steps": 500, | |
| "global_step": 781, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00128, | |
| "grad_norm": 75.22388810122628, | |
| "learning_rate": 0.0, | |
| "loss": 6.4066, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00256, | |
| "grad_norm": 74.84035682780194, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "loss": 6.3943, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00384, | |
| "grad_norm": 71.6440734190211, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 6.1241, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00512, | |
| "grad_norm": 78.67458850667643, | |
| "learning_rate": 6.25e-06, | |
| "loss": 6.6531, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0064, | |
| "grad_norm": 76.43934602400999, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 5.7199, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00768, | |
| "grad_norm": 36.5748722242616, | |
| "learning_rate": 1.0416666666666668e-05, | |
| "loss": 4.7397, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.00896, | |
| "grad_norm": 34.08030761387554, | |
| "learning_rate": 1.25e-05, | |
| "loss": 4.18, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.01024, | |
| "grad_norm": 53.96003347068579, | |
| "learning_rate": 1.4583333333333335e-05, | |
| "loss": 4.5235, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01152, | |
| "grad_norm": 35.57787525068594, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 4.1362, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0128, | |
| "grad_norm": 21.64818418320424, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 3.648, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01408, | |
| "grad_norm": 23.698731425697314, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 3.6368, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.01536, | |
| "grad_norm": 25.49135474110726, | |
| "learning_rate": 2.2916666666666667e-05, | |
| "loss": 3.3574, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.01664, | |
| "grad_norm": 28.186928861494014, | |
| "learning_rate": 2.5e-05, | |
| "loss": 3.2673, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.01792, | |
| "grad_norm": 29.42863632854984, | |
| "learning_rate": 2.7083333333333332e-05, | |
| "loss": 3.0853, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0192, | |
| "grad_norm": 29.970359127718496, | |
| "learning_rate": 2.916666666666667e-05, | |
| "loss": 2.9262, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02048, | |
| "grad_norm": 29.849946615196654, | |
| "learning_rate": 3.125e-05, | |
| "loss": 2.7621, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02176, | |
| "grad_norm": 31.36349951125821, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 2.7265, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.02304, | |
| "grad_norm": 32.60856621807998, | |
| "learning_rate": 3.541666666666667e-05, | |
| "loss": 2.5158, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.02432, | |
| "grad_norm": 33.42203498908917, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 2.448, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0256, | |
| "grad_norm": 30.921871779344183, | |
| "learning_rate": 3.958333333333333e-05, | |
| "loss": 2.2241, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02688, | |
| "grad_norm": 31.106220456760624, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 2.2075, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.02816, | |
| "grad_norm": 31.604306671646082, | |
| "learning_rate": 4.375e-05, | |
| "loss": 2.1254, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.02944, | |
| "grad_norm": 31.64347734281831, | |
| "learning_rate": 4.5833333333333334e-05, | |
| "loss": 1.8717, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.03072, | |
| "grad_norm": 28.179839908574685, | |
| "learning_rate": 4.791666666666667e-05, | |
| "loss": 1.735, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 29.732607095642244, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5319, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03328, | |
| "grad_norm": 24.059204813518615, | |
| "learning_rate": 4.9999784713213104e-05, | |
| "loss": 1.361, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03456, | |
| "grad_norm": 21.267374659129725, | |
| "learning_rate": 4.999913885656027e-05, | |
| "loss": 1.2991, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.03584, | |
| "grad_norm": 18.264005792415958, | |
| "learning_rate": 4.9998062441165057e-05, | |
| "loss": 1.1682, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03712, | |
| "grad_norm": 15.231578361958586, | |
| "learning_rate": 4.999655548556651e-05, | |
| "loss": 1.1368, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0384, | |
| "grad_norm": 12.872167605400922, | |
| "learning_rate": 4.999461801571883e-05, | |
| "loss": 1.0988, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03968, | |
| "grad_norm": 9.561878395436098, | |
| "learning_rate": 4.9992250064990957e-05, | |
| "loss": 0.9662, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.04096, | |
| "grad_norm": 7.597870732428243, | |
| "learning_rate": 4.998945167416597e-05, | |
| "loss": 0.9607, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04224, | |
| "grad_norm": 6.904886757471601, | |
| "learning_rate": 4.998622289144039e-05, | |
| "loss": 0.9469, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.04352, | |
| "grad_norm": 5.6775963847529045, | |
| "learning_rate": 4.9982563772423375e-05, | |
| "loss": 0.9346, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0448, | |
| "grad_norm": 5.622101580299479, | |
| "learning_rate": 4.99784743801357e-05, | |
| "loss": 0.9002, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04608, | |
| "grad_norm": 6.0802080320987075, | |
| "learning_rate": 4.9973954785008737e-05, | |
| "loss": 0.8733, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.04736, | |
| "grad_norm": 6.430695410984101, | |
| "learning_rate": 4.9969005064883235e-05, | |
| "loss": 0.9273, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.04864, | |
| "grad_norm": 6.249598968243061, | |
| "learning_rate": 4.9963625305007923e-05, | |
| "loss": 0.8055, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04992, | |
| "grad_norm": 7.510546436152582, | |
| "learning_rate": 4.9957815598038104e-05, | |
| "loss": 0.8535, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0512, | |
| "grad_norm": 7.167416668500387, | |
| "learning_rate": 4.995157604403403e-05, | |
| "loss": 0.9187, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05248, | |
| "grad_norm": 7.381670416770956, | |
| "learning_rate": 4.994490675045919e-05, | |
| "loss": 0.8665, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.05376, | |
| "grad_norm": 7.3445024299149075, | |
| "learning_rate": 4.993780783217844e-05, | |
| "loss": 0.7841, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.05504, | |
| "grad_norm": 7.478776601738135, | |
| "learning_rate": 4.993027941145604e-05, | |
| "loss": 0.8308, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.05632, | |
| "grad_norm": 6.5733180494252705, | |
| "learning_rate": 4.992232161795356e-05, | |
| "loss": 0.7194, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0576, | |
| "grad_norm": 6.82101290025168, | |
| "learning_rate": 4.991393458872762e-05, | |
| "loss": 0.8695, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05888, | |
| "grad_norm": 6.644036414771558, | |
| "learning_rate": 4.990511846822754e-05, | |
| "loss": 0.7879, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.06016, | |
| "grad_norm": 5.515517456959634, | |
| "learning_rate": 4.989587340829287e-05, | |
| "loss": 0.7858, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.06144, | |
| "grad_norm": 5.210427012503115, | |
| "learning_rate": 4.9886199568150745e-05, | |
| "loss": 0.7486, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06272, | |
| "grad_norm": 5.407985524958646, | |
| "learning_rate": 4.987609711441316e-05, | |
| "loss": 0.758, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 4.558397042534138, | |
| "learning_rate": 4.98655662210741e-05, | |
| "loss": 0.7788, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06528, | |
| "grad_norm": 4.218476509110454, | |
| "learning_rate": 4.985460706950655e-05, | |
| "loss": 0.682, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.06656, | |
| "grad_norm": 3.5231703818153024, | |
| "learning_rate": 4.984321984845934e-05, | |
| "loss": 0.7636, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06784, | |
| "grad_norm": 3.9386668264384905, | |
| "learning_rate": 4.9831404754053934e-05, | |
| "loss": 0.714, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.06912, | |
| "grad_norm": 3.5893952410017627, | |
| "learning_rate": 4.9819161989781024e-05, | |
| "loss": 0.6537, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0704, | |
| "grad_norm": 2.8681476640209818, | |
| "learning_rate": 4.980649176649704e-05, | |
| "loss": 0.6384, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.07168, | |
| "grad_norm": 3.0010595459503753, | |
| "learning_rate": 4.979339430242053e-05, | |
| "loss": 0.7417, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07296, | |
| "grad_norm": 2.751660779828729, | |
| "learning_rate": 4.977986982312836e-05, | |
| "loss": 0.7008, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.07424, | |
| "grad_norm": 3.194366208285437, | |
| "learning_rate": 4.976591856155186e-05, | |
| "loss": 0.7072, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07552, | |
| "grad_norm": 2.48252595313533, | |
| "learning_rate": 4.9751540757972816e-05, | |
| "loss": 0.6971, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0768, | |
| "grad_norm": 2.3495656800930402, | |
| "learning_rate": 4.973673666001932e-05, | |
| "loss": 0.6876, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07808, | |
| "grad_norm": 2.63986280783826, | |
| "learning_rate": 4.972150652266151e-05, | |
| "loss": 0.736, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.07936, | |
| "grad_norm": 2.7150342194317814, | |
| "learning_rate": 4.9705850608207174e-05, | |
| "loss": 0.725, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.08064, | |
| "grad_norm": 2.523989046322601, | |
| "learning_rate": 4.968976918629722e-05, | |
| "loss": 0.6565, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.08192, | |
| "grad_norm": 2.3269402200989346, | |
| "learning_rate": 4.967326253390107e-05, | |
| "loss": 0.6919, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0832, | |
| "grad_norm": 1.9026859178219735, | |
| "learning_rate": 4.9656330935311856e-05, | |
| "loss": 0.6629, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.08448, | |
| "grad_norm": 2.057962587453645, | |
| "learning_rate": 4.963897468214154e-05, | |
| "loss": 0.71, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.08576, | |
| "grad_norm": 1.8090872467237042, | |
| "learning_rate": 4.962119407331587e-05, | |
| "loss": 0.6926, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.08704, | |
| "grad_norm": 2.0716692020910643, | |
| "learning_rate": 4.960298941506927e-05, | |
| "loss": 0.6899, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08832, | |
| "grad_norm": 2.5200615138614557, | |
| "learning_rate": 4.958436102093951e-05, | |
| "loss": 0.6635, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.0896, | |
| "grad_norm": 2.2621437989928395, | |
| "learning_rate": 4.956530921176238e-05, | |
| "loss": 0.6748, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.09088, | |
| "grad_norm": 1.8332228174615994, | |
| "learning_rate": 4.954583431566609e-05, | |
| "loss": 0.6644, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.09216, | |
| "grad_norm": 2.0863135928007983, | |
| "learning_rate": 4.952593666806567e-05, | |
| "loss": 0.6153, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09344, | |
| "grad_norm": 1.7415280790596896, | |
| "learning_rate": 4.950561661165717e-05, | |
| "loss": 0.6484, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.09472, | |
| "grad_norm": 1.746981819187831, | |
| "learning_rate": 4.9484874496411756e-05, | |
| "loss": 0.636, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 1.9432608496567476, | |
| "learning_rate": 4.946371067956971e-05, | |
| "loss": 0.6246, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.09728, | |
| "grad_norm": 1.75865419988054, | |
| "learning_rate": 4.9442125525634224e-05, | |
| "loss": 0.6282, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09856, | |
| "grad_norm": 1.7287020057722782, | |
| "learning_rate": 4.9420119406365185e-05, | |
| "loss": 0.6542, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.09984, | |
| "grad_norm": 1.9852157049314743, | |
| "learning_rate": 4.939769270077273e-05, | |
| "loss": 0.6735, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.10112, | |
| "grad_norm": 1.9974297496082407, | |
| "learning_rate": 4.937484579511071e-05, | |
| "loss": 0.5874, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.1024, | |
| "grad_norm": 1.7181017736857525, | |
| "learning_rate": 4.935157908287011e-05, | |
| "loss": 0.6768, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10368, | |
| "grad_norm": 2.0558959756383652, | |
| "learning_rate": 4.9327892964772164e-05, | |
| "loss": 0.6827, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.10496, | |
| "grad_norm": 1.8300402350049234, | |
| "learning_rate": 4.930378784876154e-05, | |
| "loss": 0.6563, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10624, | |
| "grad_norm": 2.075493458043633, | |
| "learning_rate": 4.9279264149999285e-05, | |
| "loss": 0.6022, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.10752, | |
| "grad_norm": 1.8272978013891923, | |
| "learning_rate": 4.925432229085565e-05, | |
| "loss": 0.676, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.1088, | |
| "grad_norm": 1.8012811494453218, | |
| "learning_rate": 4.9228962700902845e-05, | |
| "loss": 0.6913, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.11008, | |
| "grad_norm": 1.6881411330084826, | |
| "learning_rate": 4.9203185816907674e-05, | |
| "loss": 0.6305, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.11136, | |
| "grad_norm": 1.7612874976812931, | |
| "learning_rate": 4.91769920828239e-05, | |
| "loss": 0.6284, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.11264, | |
| "grad_norm": 1.7073438596596813, | |
| "learning_rate": 4.915038194978474e-05, | |
| "loss": 0.6464, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.11392, | |
| "grad_norm": 1.6012318501728533, | |
| "learning_rate": 4.912335587609499e-05, | |
| "loss": 0.6265, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.1152, | |
| "grad_norm": 2.0499167724620064, | |
| "learning_rate": 4.909591432722316e-05, | |
| "loss": 0.6247, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11648, | |
| "grad_norm": 1.6941550864006603, | |
| "learning_rate": 4.90680577757935e-05, | |
| "loss": 0.6579, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.11776, | |
| "grad_norm": 1.731174063575679, | |
| "learning_rate": 4.90397867015778e-05, | |
| "loss": 0.6268, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.11904, | |
| "grad_norm": 1.5312622049127997, | |
| "learning_rate": 4.901110159148715e-05, | |
| "loss": 0.6354, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.12032, | |
| "grad_norm": 1.7108509451239904, | |
| "learning_rate": 4.8982002939563584e-05, | |
| "loss": 0.5528, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.1216, | |
| "grad_norm": 2.8797239557655137, | |
| "learning_rate": 4.89524912469715e-05, | |
| "loss": 0.6825, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.12288, | |
| "grad_norm": 1.9555928058700465, | |
| "learning_rate": 4.892256702198912e-05, | |
| "loss": 0.5632, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.12416, | |
| "grad_norm": 2.5250801141749024, | |
| "learning_rate": 4.889223077999965e-05, | |
| "loss": 0.6744, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.12544, | |
| "grad_norm": 1.8367912274560791, | |
| "learning_rate": 4.886148304348245e-05, | |
| "loss": 0.6438, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.12672, | |
| "grad_norm": 1.6113654706551255, | |
| "learning_rate": 4.883032434200404e-05, | |
| "loss": 0.6178, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 1.7010224016416655, | |
| "learning_rate": 4.8798755212208955e-05, | |
| "loss": 0.5643, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12928, | |
| "grad_norm": 2.2848886080761126, | |
| "learning_rate": 4.876677619781053e-05, | |
| "loss": 0.6643, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.13056, | |
| "grad_norm": 1.6211591622673918, | |
| "learning_rate": 4.873438784958148e-05, | |
| "loss": 0.567, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.13184, | |
| "grad_norm": 1.8632164017803852, | |
| "learning_rate": 4.87015907253445e-05, | |
| "loss": 0.5697, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.13312, | |
| "grad_norm": 1.4746648348350377, | |
| "learning_rate": 4.866838538996258e-05, | |
| "loss": 0.6479, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.1344, | |
| "grad_norm": 1.6062813223908587, | |
| "learning_rate": 4.8634772415329325e-05, | |
| "loss": 0.6025, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.13568, | |
| "grad_norm": 1.817381489363038, | |
| "learning_rate": 4.8600752380359074e-05, | |
| "loss": 0.6703, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.13696, | |
| "grad_norm": 1.5776320201015432, | |
| "learning_rate": 4.856632587097694e-05, | |
| "loss": 0.5594, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.13824, | |
| "grad_norm": 1.3277058874360568, | |
| "learning_rate": 4.8531493480108746e-05, | |
| "loss": 0.6013, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.13952, | |
| "grad_norm": 1.5526558234706886, | |
| "learning_rate": 4.8496255807670766e-05, | |
| "loss": 0.6062, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.1408, | |
| "grad_norm": 1.8299718052431133, | |
| "learning_rate": 4.846061346055942e-05, | |
| "loss": 0.5114, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.14208, | |
| "grad_norm": 1.731011102449401, | |
| "learning_rate": 4.842456705264082e-05, | |
| "loss": 0.592, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.14336, | |
| "grad_norm": 1.3621849313565593, | |
| "learning_rate": 4.838811720474019e-05, | |
| "loss": 0.6044, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.14464, | |
| "grad_norm": 1.6732978958650073, | |
| "learning_rate": 4.8351264544631195e-05, | |
| "loss": 0.7145, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.14592, | |
| "grad_norm": 1.894809626357365, | |
| "learning_rate": 4.831400970702508e-05, | |
| "loss": 0.6064, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.1472, | |
| "grad_norm": 1.8324635468356303, | |
| "learning_rate": 4.82763533335598e-05, | |
| "loss": 0.6553, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.14848, | |
| "grad_norm": 1.72222811011011, | |
| "learning_rate": 4.823829607278892e-05, | |
| "loss": 0.6352, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.14976, | |
| "grad_norm": 1.5581321731057138, | |
| "learning_rate": 4.819983858017048e-05, | |
| "loss": 0.5878, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.15104, | |
| "grad_norm": 1.498096214184562, | |
| "learning_rate": 4.816098151805566e-05, | |
| "loss": 0.5796, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.15232, | |
| "grad_norm": 1.7231985884714938, | |
| "learning_rate": 4.812172555567746e-05, | |
| "loss": 0.6561, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.1536, | |
| "grad_norm": 1.764146809011255, | |
| "learning_rate": 4.808207136913904e-05, | |
| "loss": 0.6066, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.15488, | |
| "grad_norm": 1.2964463070656347, | |
| "learning_rate": 4.8042019641402225e-05, | |
| "loss": 0.5948, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.15616, | |
| "grad_norm": 1.4011097166605235, | |
| "learning_rate": 4.8001571062275616e-05, | |
| "loss": 0.6757, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.15744, | |
| "grad_norm": 1.6953382417290377, | |
| "learning_rate": 4.796072632840279e-05, | |
| "loss": 0.6515, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.15872, | |
| "grad_norm": 1.64419676160895, | |
| "learning_rate": 4.791948614325028e-05, | |
| "loss": 0.7206, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.396499794039069, | |
| "learning_rate": 4.787785121709543e-05, | |
| "loss": 0.5973, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.16128, | |
| "grad_norm": 1.5651242884852707, | |
| "learning_rate": 4.78358222670142e-05, | |
| "loss": 0.5777, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.16256, | |
| "grad_norm": 1.6705934829027111, | |
| "learning_rate": 4.7793400016868806e-05, | |
| "loss": 0.6292, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.16384, | |
| "grad_norm": 1.5388606027083547, | |
| "learning_rate": 4.7750585197295226e-05, | |
| "loss": 0.6211, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.16512, | |
| "grad_norm": 1.4778488204311073, | |
| "learning_rate": 4.770737854569067e-05, | |
| "loss": 0.5358, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.1664, | |
| "grad_norm": 1.5140116394296845, | |
| "learning_rate": 4.766378080620083e-05, | |
| "loss": 0.593, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.16768, | |
| "grad_norm": 1.4748151022018725, | |
| "learning_rate": 4.7619792729707086e-05, | |
| "loss": 0.5815, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.16896, | |
| "grad_norm": 1.5941827710788068, | |
| "learning_rate": 4.757541507381357e-05, | |
| "loss": 0.6071, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.17024, | |
| "grad_norm": 1.578445948877928, | |
| "learning_rate": 4.7530648602834114e-05, | |
| "loss": 0.647, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.17152, | |
| "grad_norm": 1.4239357190704514, | |
| "learning_rate": 4.748549408777909e-05, | |
| "loss": 0.6559, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.1728, | |
| "grad_norm": 1.4310055814028564, | |
| "learning_rate": 4.743995230634216e-05, | |
| "loss": 0.5701, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.17408, | |
| "grad_norm": 1.6447664990059667, | |
| "learning_rate": 4.7394024042886796e-05, | |
| "loss": 0.607, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.17536, | |
| "grad_norm": 1.269555998444542, | |
| "learning_rate": 4.734771008843287e-05, | |
| "loss": 0.5525, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.17664, | |
| "grad_norm": 1.5429837855466046, | |
| "learning_rate": 4.730101124064299e-05, | |
| "loss": 0.5631, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.17792, | |
| "grad_norm": 1.8277462215582432, | |
| "learning_rate": 4.725392830380874e-05, | |
| "loss": 0.5577, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.1792, | |
| "grad_norm": 1.3810219315467789, | |
| "learning_rate": 4.720646208883684e-05, | |
| "loss": 0.5751, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.18048, | |
| "grad_norm": 1.484273258841385, | |
| "learning_rate": 4.7158613413235216e-05, | |
| "loss": 0.6568, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.18176, | |
| "grad_norm": 1.4528693876678762, | |
| "learning_rate": 4.711038310109889e-05, | |
| "loss": 0.5798, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.18304, | |
| "grad_norm": 1.3888432503412995, | |
| "learning_rate": 4.706177198309576e-05, | |
| "loss": 0.5547, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.18432, | |
| "grad_norm": 1.8695688648565847, | |
| "learning_rate": 4.7012780896452336e-05, | |
| "loss": 0.5129, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1856, | |
| "grad_norm": 1.4398450190784378, | |
| "learning_rate": 4.696341068493932e-05, | |
| "loss": 0.6138, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.18688, | |
| "grad_norm": 1.256550030058827, | |
| "learning_rate": 4.6913662198857045e-05, | |
| "loss": 0.5698, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.18816, | |
| "grad_norm": 1.3243691997802411, | |
| "learning_rate": 4.686353629502084e-05, | |
| "loss": 0.5779, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.18944, | |
| "grad_norm": 1.3472490259945922, | |
| "learning_rate": 4.68130338367463e-05, | |
| "loss": 0.6057, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.19072, | |
| "grad_norm": 1.423771795211049, | |
| "learning_rate": 4.6762155693834375e-05, | |
| "loss": 0.5539, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 1.278411327909649, | |
| "learning_rate": 4.671090274255642e-05, | |
| "loss": 0.6568, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.19328, | |
| "grad_norm": 1.2896944158114314, | |
| "learning_rate": 4.6659275865639084e-05, | |
| "loss": 0.6208, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.19456, | |
| "grad_norm": 1.4358904823462553, | |
| "learning_rate": 4.660727595224913e-05, | |
| "loss": 0.5429, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.19584, | |
| "grad_norm": 1.2259330425776525, | |
| "learning_rate": 4.655490389797811e-05, | |
| "loss": 0.5369, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.19712, | |
| "grad_norm": 1.586061135702463, | |
| "learning_rate": 4.650216060482692e-05, | |
| "loss": 0.6086, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.1984, | |
| "grad_norm": 1.3307781224435642, | |
| "learning_rate": 4.644904698119027e-05, | |
| "loss": 0.5632, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.19968, | |
| "grad_norm": 1.344663472569419, | |
| "learning_rate": 4.639556394184109e-05, | |
| "loss": 0.5908, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.20096, | |
| "grad_norm": 1.4634221552477196, | |
| "learning_rate": 4.634171240791472e-05, | |
| "loss": 0.5956, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.20224, | |
| "grad_norm": 1.5213663047025157, | |
| "learning_rate": 4.6287493306893035e-05, | |
| "loss": 0.6076, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.20352, | |
| "grad_norm": 1.4721238877847156, | |
| "learning_rate": 4.623290757258854e-05, | |
| "loss": 0.6018, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.2048, | |
| "grad_norm": 1.3751194857287241, | |
| "learning_rate": 4.6177956145128217e-05, | |
| "loss": 0.5693, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.20608, | |
| "grad_norm": 1.6426361855564613, | |
| "learning_rate": 4.612263997093735e-05, | |
| "loss": 0.5913, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.20736, | |
| "grad_norm": 1.5780007570722678, | |
| "learning_rate": 4.606696000272328e-05, | |
| "loss": 0.5488, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.20864, | |
| "grad_norm": 1.4677250842886291, | |
| "learning_rate": 4.60109171994589e-05, | |
| "loss": 0.6588, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.20992, | |
| "grad_norm": 1.7555722653674988, | |
| "learning_rate": 4.595451252636623e-05, | |
| "loss": 0.6271, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2112, | |
| "grad_norm": 1.3894415125583848, | |
| "learning_rate": 4.5897746954899725e-05, | |
| "loss": 0.6044, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.21248, | |
| "grad_norm": 1.2175225197872683, | |
| "learning_rate": 4.584062146272958e-05, | |
| "loss": 0.6158, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.21376, | |
| "grad_norm": 1.629665475548213, | |
| "learning_rate": 4.57831370337249e-05, | |
| "loss": 0.6131, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.21504, | |
| "grad_norm": 1.2841042637211522, | |
| "learning_rate": 4.572529465793672e-05, | |
| "loss": 0.5767, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.21632, | |
| "grad_norm": 2.7853685730757394, | |
| "learning_rate": 4.566709533158099e-05, | |
| "loss": 0.5593, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.2176, | |
| "grad_norm": 1.3850254106700643, | |
| "learning_rate": 4.560854005702137e-05, | |
| "loss": 0.6115, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.21888, | |
| "grad_norm": 1.2913092655981235, | |
| "learning_rate": 4.5549629842752024e-05, | |
| "loss": 0.5847, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.22016, | |
| "grad_norm": 1.5613887453358524, | |
| "learning_rate": 4.549036570338021e-05, | |
| "loss": 0.5964, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.22144, | |
| "grad_norm": 1.7170341281067822, | |
| "learning_rate": 4.543074865960881e-05, | |
| "loss": 0.5637, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.22272, | |
| "grad_norm": 2.4730532191588064, | |
| "learning_rate": 4.5370779738218784e-05, | |
| "loss": 0.6055, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 1.2707856122137464, | |
| "learning_rate": 4.531045997205143e-05, | |
| "loss": 0.5435, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.22528, | |
| "grad_norm": 1.7111229073746521, | |
| "learning_rate": 4.5249790399990656e-05, | |
| "loss": 0.6221, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.22656, | |
| "grad_norm": 1.3595239843603941, | |
| "learning_rate": 4.5188772066945026e-05, | |
| "loss": 0.5717, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.22784, | |
| "grad_norm": 1.3809996428880513, | |
| "learning_rate": 4.512740602382981e-05, | |
| "loss": 0.5696, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.22912, | |
| "grad_norm": 1.4209829860828496, | |
| "learning_rate": 4.506569332754887e-05, | |
| "loss": 0.5613, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.2304, | |
| "grad_norm": 1.6935901791883121, | |
| "learning_rate": 4.5003635040976465e-05, | |
| "loss": 0.6077, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.23168, | |
| "grad_norm": 1.5105293540221678, | |
| "learning_rate": 4.494123223293891e-05, | |
| "loss": 0.6783, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.23296, | |
| "grad_norm": 1.5390880129401896, | |
| "learning_rate": 4.48784859781962e-05, | |
| "loss": 0.6024, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.23424, | |
| "grad_norm": 1.5313113885779375, | |
| "learning_rate": 4.4815397357423526e-05, | |
| "loss": 0.6138, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.23552, | |
| "grad_norm": 1.3609896622450086, | |
| "learning_rate": 4.475196745719259e-05, | |
| "loss": 0.6729, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2368, | |
| "grad_norm": 1.479241008092771, | |
| "learning_rate": 4.4688197369952945e-05, | |
| "loss": 0.5977, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.23808, | |
| "grad_norm": 1.4818189945972011, | |
| "learning_rate": 4.462408819401317e-05, | |
| "loss": 0.5882, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.23936, | |
| "grad_norm": 1.5061778226183522, | |
| "learning_rate": 4.455964103352194e-05, | |
| "loss": 0.5861, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.24064, | |
| "grad_norm": 1.4743770650173358, | |
| "learning_rate": 4.449485699844902e-05, | |
| "loss": 0.585, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.24192, | |
| "grad_norm": 1.2302952737480486, | |
| "learning_rate": 4.4429737204566155e-05, | |
| "loss": 0.5847, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.2432, | |
| "grad_norm": 1.4111193829738211, | |
| "learning_rate": 4.4364282773427846e-05, | |
| "loss": 0.5657, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.24448, | |
| "grad_norm": 1.3570987894924547, | |
| "learning_rate": 4.429849483235202e-05, | |
| "loss": 0.5615, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.24576, | |
| "grad_norm": 1.3822044695442701, | |
| "learning_rate": 4.423237451440064e-05, | |
| "loss": 0.5932, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.24704, | |
| "grad_norm": 1.3663828327097014, | |
| "learning_rate": 4.4165922958360165e-05, | |
| "loss": 0.543, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.24832, | |
| "grad_norm": 1.3993875869868275, | |
| "learning_rate": 4.409914130872194e-05, | |
| "loss": 0.5933, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.2496, | |
| "grad_norm": 1.341085747475743, | |
| "learning_rate": 4.403203071566253e-05, | |
| "loss": 0.632, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.25088, | |
| "grad_norm": 1.4712233726790762, | |
| "learning_rate": 4.396459233502383e-05, | |
| "loss": 0.5773, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.25216, | |
| "grad_norm": 1.4565723974631613, | |
| "learning_rate": 4.389682732829325e-05, | |
| "loss": 0.5425, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.25344, | |
| "grad_norm": 1.4154904282118412, | |
| "learning_rate": 4.382873686258361e-05, | |
| "loss": 0.5982, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.25472, | |
| "grad_norm": 1.2950506602467453, | |
| "learning_rate": 4.3760322110613127e-05, | |
| "loss": 0.6612, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 1.4701283831627094, | |
| "learning_rate": 4.369158425068517e-05, | |
| "loss": 0.5816, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.25728, | |
| "grad_norm": 1.6738445465515808, | |
| "learning_rate": 4.362252446666798e-05, | |
| "loss": 0.5755, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.25856, | |
| "grad_norm": 1.3942321590387716, | |
| "learning_rate": 4.3553143947974276e-05, | |
| "loss": 0.579, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.25984, | |
| "grad_norm": 1.3386983487568793, | |
| "learning_rate": 4.348344388954076e-05, | |
| "loss": 0.5203, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.26112, | |
| "grad_norm": 1.5228122463541818, | |
| "learning_rate": 4.341342549180759e-05, | |
| "loss": 0.6213, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2624, | |
| "grad_norm": 1.3887871941177943, | |
| "learning_rate": 4.334308996069762e-05, | |
| "loss": 0.6155, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.26368, | |
| "grad_norm": 1.1249421238224457, | |
| "learning_rate": 4.3272438507595695e-05, | |
| "loss": 0.5611, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.26496, | |
| "grad_norm": 1.3426254693359165, | |
| "learning_rate": 4.3201472349327744e-05, | |
| "loss": 0.5217, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.26624, | |
| "grad_norm": 1.6983653189143417, | |
| "learning_rate": 4.313019270813988e-05, | |
| "loss": 0.5772, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.26752, | |
| "grad_norm": 1.4485876342432744, | |
| "learning_rate": 4.30586008116773e-05, | |
| "loss": 0.6085, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.2688, | |
| "grad_norm": 1.8497953340674216, | |
| "learning_rate": 4.298669789296314e-05, | |
| "loss": 0.5604, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.27008, | |
| "grad_norm": 1.536417668038393, | |
| "learning_rate": 4.291448519037727e-05, | |
| "loss": 0.6464, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.27136, | |
| "grad_norm": 1.2638757873673312, | |
| "learning_rate": 4.2841963947634955e-05, | |
| "loss": 0.6232, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.27264, | |
| "grad_norm": 1.4218638281970397, | |
| "learning_rate": 4.2769135413765416e-05, | |
| "loss": 0.6244, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.27392, | |
| "grad_norm": 1.6333848851818025, | |
| "learning_rate": 4.269600084309033e-05, | |
| "loss": 0.5861, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.2752, | |
| "grad_norm": 1.3749041930700574, | |
| "learning_rate": 4.262256149520225e-05, | |
| "loss": 0.5846, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.27648, | |
| "grad_norm": 1.2947028786311572, | |
| "learning_rate": 4.254881863494287e-05, | |
| "loss": 0.5733, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.27776, | |
| "grad_norm": 1.565320016460211, | |
| "learning_rate": 4.247477353238125e-05, | |
| "loss": 0.5622, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.27904, | |
| "grad_norm": 1.2131196326641547, | |
| "learning_rate": 4.240042746279199e-05, | |
| "loss": 0.6389, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.28032, | |
| "grad_norm": 1.3776153163158087, | |
| "learning_rate": 4.232578170663319e-05, | |
| "loss": 0.6107, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.2816, | |
| "grad_norm": 1.284248692743062, | |
| "learning_rate": 4.2250837549524456e-05, | |
| "loss": 0.5864, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.28288, | |
| "grad_norm": 1.2511680982294107, | |
| "learning_rate": 4.2175596282224736e-05, | |
| "loss": 0.538, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.28416, | |
| "grad_norm": 1.2460687716152292, | |
| "learning_rate": 4.210005920061008e-05, | |
| "loss": 0.6314, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.28544, | |
| "grad_norm": 1.3139632724512236, | |
| "learning_rate": 4.2024227605651336e-05, | |
| "loss": 0.5563, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.28672, | |
| "grad_norm": 1.2300759433052748, | |
| "learning_rate": 4.194810280339173e-05, | |
| "loss": 0.5747, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 1.4271134870001978, | |
| "learning_rate": 4.187168610492439e-05, | |
| "loss": 0.6554, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.28928, | |
| "grad_norm": 1.5722489914514965, | |
| "learning_rate": 4.1794978826369763e-05, | |
| "loss": 0.5885, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.29056, | |
| "grad_norm": 1.5009714149834261, | |
| "learning_rate": 4.171798228885293e-05, | |
| "loss": 0.6113, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.29184, | |
| "grad_norm": 1.8334180258983053, | |
| "learning_rate": 4.164069781848086e-05, | |
| "loss": 0.6062, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.29312, | |
| "grad_norm": 1.3718388181459142, | |
| "learning_rate": 4.156312674631957e-05, | |
| "loss": 0.5865, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.2944, | |
| "grad_norm": 1.3521700531014587, | |
| "learning_rate": 4.148527040837123e-05, | |
| "loss": 0.6516, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.29568, | |
| "grad_norm": 1.3943729045586102, | |
| "learning_rate": 4.1407130145551086e-05, | |
| "loss": 0.5544, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.29696, | |
| "grad_norm": 1.4330528205647741, | |
| "learning_rate": 4.132870730366445e-05, | |
| "loss": 0.6353, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.29824, | |
| "grad_norm": 1.2076734538851226, | |
| "learning_rate": 4.125000323338343e-05, | |
| "loss": 0.5849, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.29952, | |
| "grad_norm": 1.6401942949755128, | |
| "learning_rate": 4.117101929022376e-05, | |
| "loss": 0.5901, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.3008, | |
| "grad_norm": 1.3400348496844277, | |
| "learning_rate": 4.109175683452137e-05, | |
| "loss": 0.5468, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.30208, | |
| "grad_norm": 1.4241168627425973, | |
| "learning_rate": 4.101221723140902e-05, | |
| "loss": 0.6082, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.30336, | |
| "grad_norm": 1.6561352522409785, | |
| "learning_rate": 4.0932401850792754e-05, | |
| "loss": 0.5939, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.30464, | |
| "grad_norm": 1.4556867022610906, | |
| "learning_rate": 4.0852312067328326e-05, | |
| "loss": 0.6112, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.30592, | |
| "grad_norm": 1.3468859989270134, | |
| "learning_rate": 4.07719492603975e-05, | |
| "loss": 0.5737, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.3072, | |
| "grad_norm": 1.3259435525468157, | |
| "learning_rate": 4.069131481408433e-05, | |
| "loss": 0.6104, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.30848, | |
| "grad_norm": 1.3745196369553818, | |
| "learning_rate": 4.061041011715126e-05, | |
| "loss": 0.6012, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.30976, | |
| "grad_norm": 1.1609557512857687, | |
| "learning_rate": 4.052923656301528e-05, | |
| "loss": 0.554, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.31104, | |
| "grad_norm": 1.41857166007364, | |
| "learning_rate": 4.044779554972389e-05, | |
| "loss": 0.5899, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.31232, | |
| "grad_norm": 1.1985384897560705, | |
| "learning_rate": 4.036608847993101e-05, | |
| "loss": 0.6275, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.3136, | |
| "grad_norm": 1.5579694785779994, | |
| "learning_rate": 4.028411676087285e-05, | |
| "loss": 0.548, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.31488, | |
| "grad_norm": 1.3489944592031309, | |
| "learning_rate": 4.020188180434363e-05, | |
| "loss": 0.5322, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.31616, | |
| "grad_norm": 1.590180669493754, | |
| "learning_rate": 4.011938502667134e-05, | |
| "loss": 0.588, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.31744, | |
| "grad_norm": 1.277555025994882, | |
| "learning_rate": 4.003662784869326e-05, | |
| "loss": 0.6203, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.31872, | |
| "grad_norm": 1.4952572911639632, | |
| "learning_rate": 3.995361169573155e-05, | |
| "loss": 0.5332, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.2720020348705705, | |
| "learning_rate": 3.9870337997568684e-05, | |
| "loss": 0.5524, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.32128, | |
| "grad_norm": 1.508655319764419, | |
| "learning_rate": 3.9786808188422796e-05, | |
| "loss": 0.513, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.32256, | |
| "grad_norm": 1.874134149510889, | |
| "learning_rate": 3.970302370692304e-05, | |
| "loss": 0.6253, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.32384, | |
| "grad_norm": 1.279567419708335, | |
| "learning_rate": 3.961898599608476e-05, | |
| "loss": 0.6783, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.32512, | |
| "grad_norm": 1.1853548580609758, | |
| "learning_rate": 3.953469650328464e-05, | |
| "loss": 0.527, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3264, | |
| "grad_norm": 1.6800387671360835, | |
| "learning_rate": 3.9450156680235826e-05, | |
| "loss": 0.5631, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.32768, | |
| "grad_norm": 1.3780997056205, | |
| "learning_rate": 3.9365367982962854e-05, | |
| "loss": 0.6016, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.32896, | |
| "grad_norm": 1.5533459709454962, | |
| "learning_rate": 3.928033187177663e-05, | |
| "loss": 0.5759, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.33024, | |
| "grad_norm": 1.287486096125242, | |
| "learning_rate": 3.919504981124924e-05, | |
| "loss": 0.6245, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.33152, | |
| "grad_norm": 1.4774716008260305, | |
| "learning_rate": 3.910952327018875e-05, | |
| "loss": 0.5831, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.3328, | |
| "grad_norm": 1.5906756360247127, | |
| "learning_rate": 3.9023753721613885e-05, | |
| "loss": 0.6272, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.33408, | |
| "grad_norm": 1.353433681059385, | |
| "learning_rate": 3.893774264272871e-05, | |
| "loss": 0.5683, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.33536, | |
| "grad_norm": 1.1888931801263596, | |
| "learning_rate": 3.8851491514897106e-05, | |
| "loss": 0.5601, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.33664, | |
| "grad_norm": 1.5017639536043685, | |
| "learning_rate": 3.8765001823617353e-05, | |
| "loss": 0.618, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.33792, | |
| "grad_norm": 1.4834089402426867, | |
| "learning_rate": 3.867827505849645e-05, | |
| "loss": 0.5042, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3392, | |
| "grad_norm": 1.3483641951738772, | |
| "learning_rate": 3.8591312713224534e-05, | |
| "loss": 0.4949, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.34048, | |
| "grad_norm": 1.6008714340347994, | |
| "learning_rate": 3.8504116285549104e-05, | |
| "loss": 0.5675, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.34176, | |
| "grad_norm": 1.2361194733115883, | |
| "learning_rate": 3.841668727724928e-05, | |
| "loss": 0.5661, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.34304, | |
| "grad_norm": 1.2304578602405873, | |
| "learning_rate": 3.832902719410987e-05, | |
| "loss": 0.5765, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.34432, | |
| "grad_norm": 1.6181265543840555, | |
| "learning_rate": 3.824113754589548e-05, | |
| "loss": 0.6493, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.3456, | |
| "grad_norm": 1.319940158177043, | |
| "learning_rate": 3.815301984632452e-05, | |
| "loss": 0.6195, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.34688, | |
| "grad_norm": 1.2954372238150471, | |
| "learning_rate": 3.806467561304311e-05, | |
| "loss": 0.6127, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.34816, | |
| "grad_norm": 1.224648672260382, | |
| "learning_rate": 3.7976106367598916e-05, | |
| "loss": 0.5798, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.34944, | |
| "grad_norm": 1.5144644968917076, | |
| "learning_rate": 3.7887313635415014e-05, | |
| "loss": 0.6112, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.35072, | |
| "grad_norm": 1.215688244377943, | |
| "learning_rate": 3.779829894576356e-05, | |
| "loss": 0.5621, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 1.2754504027490492, | |
| "learning_rate": 3.770906383173949e-05, | |
| "loss": 0.6308, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.35328, | |
| "grad_norm": 1.2949946759729944, | |
| "learning_rate": 3.761960983023407e-05, | |
| "loss": 0.6139, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.35456, | |
| "grad_norm": 1.221882657046218, | |
| "learning_rate": 3.752993848190846e-05, | |
| "loss": 0.5263, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.35584, | |
| "grad_norm": 1.3249519620764185, | |
| "learning_rate": 3.744005133116718e-05, | |
| "loss": 0.6363, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.35712, | |
| "grad_norm": 1.15652353361268, | |
| "learning_rate": 3.73499499261315e-05, | |
| "loss": 0.489, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.3584, | |
| "grad_norm": 1.1679315162719004, | |
| "learning_rate": 3.725963581861279e-05, | |
| "loss": 0.5328, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.35968, | |
| "grad_norm": 1.3646536938255422, | |
| "learning_rate": 3.716911056408575e-05, | |
| "loss": 0.5984, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.36096, | |
| "grad_norm": 1.12669716548629, | |
| "learning_rate": 3.7078375721661695e-05, | |
| "loss": 0.5493, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.36224, | |
| "grad_norm": 1.226794376671606, | |
| "learning_rate": 3.698743285406164e-05, | |
| "loss": 0.5694, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.36352, | |
| "grad_norm": 1.4769643370330616, | |
| "learning_rate": 3.689628352758938e-05, | |
| "loss": 0.5734, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3648, | |
| "grad_norm": 1.215799347763785, | |
| "learning_rate": 3.6804929312104594e-05, | |
| "loss": 0.5455, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.36608, | |
| "grad_norm": 1.2353495658216664, | |
| "learning_rate": 3.6713371780995705e-05, | |
| "loss": 0.5878, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.36736, | |
| "grad_norm": 1.3498857433651656, | |
| "learning_rate": 3.6621612511152855e-05, | |
| "loss": 0.6253, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.36864, | |
| "grad_norm": 1.3431766601701063, | |
| "learning_rate": 3.6529653082940716e-05, | |
| "loss": 0.6428, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.36992, | |
| "grad_norm": 1.7218890689464375, | |
| "learning_rate": 3.643749508017127e-05, | |
| "loss": 0.5699, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.3712, | |
| "grad_norm": 1.3293167573090323, | |
| "learning_rate": 3.6345140090076555e-05, | |
| "loss": 0.5328, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.37248, | |
| "grad_norm": 1.4775399143679342, | |
| "learning_rate": 3.625258970328127e-05, | |
| "loss": 0.5287, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.37376, | |
| "grad_norm": 1.287867200672087, | |
| "learning_rate": 3.6159845513775466e-05, | |
| "loss": 0.4338, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.37504, | |
| "grad_norm": 1.527612472586449, | |
| "learning_rate": 3.606690911888702e-05, | |
| "loss": 0.4971, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.37632, | |
| "grad_norm": 1.3510764637999464, | |
| "learning_rate": 3.5973782119254164e-05, | |
| "loss": 0.5972, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3776, | |
| "grad_norm": 1.3937422897967797, | |
| "learning_rate": 3.5880466118797906e-05, | |
| "loss": 0.5284, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.37888, | |
| "grad_norm": 1.466811933631946, | |
| "learning_rate": 3.5786962724694384e-05, | |
| "loss": 0.5436, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.38016, | |
| "grad_norm": 1.4437937361204634, | |
| "learning_rate": 3.569327354734723e-05, | |
| "loss": 0.5646, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.38144, | |
| "grad_norm": 1.290090274970926, | |
| "learning_rate": 3.55994002003598e-05, | |
| "loss": 0.5543, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.38272, | |
| "grad_norm": 1.0853975586084386, | |
| "learning_rate": 3.5505344300507395e-05, | |
| "loss": 0.5779, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 1.267921834114073, | |
| "learning_rate": 3.5411107467709404e-05, | |
| "loss": 0.5645, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.38528, | |
| "grad_norm": 1.6787016254059157, | |
| "learning_rate": 3.531669132500143e-05, | |
| "loss": 0.591, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.38656, | |
| "grad_norm": 1.2074228758044878, | |
| "learning_rate": 3.522209749850731e-05, | |
| "loss": 0.534, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.38784, | |
| "grad_norm": 1.2455711032289403, | |
| "learning_rate": 3.512732761741112e-05, | |
| "loss": 0.6168, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.38912, | |
| "grad_norm": 1.1905858344499094, | |
| "learning_rate": 3.503238331392913e-05, | |
| "loss": 0.5963, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3904, | |
| "grad_norm": 1.34055529484794, | |
| "learning_rate": 3.493726622328164e-05, | |
| "loss": 0.6311, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.39168, | |
| "grad_norm": 1.3045153374892182, | |
| "learning_rate": 3.4841977983664884e-05, | |
| "loss": 0.5334, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.39296, | |
| "grad_norm": 1.2667062270289304, | |
| "learning_rate": 3.474652023622278e-05, | |
| "loss": 0.6376, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.39424, | |
| "grad_norm": 1.2136104665777498, | |
| "learning_rate": 3.4650894625018674e-05, | |
| "loss": 0.6002, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.39552, | |
| "grad_norm": 1.3229140599868134, | |
| "learning_rate": 3.4555102797006994e-05, | |
| "loss": 0.5891, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.3968, | |
| "grad_norm": 1.6780158724088927, | |
| "learning_rate": 3.445914640200495e-05, | |
| "loss": 0.567, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.39808, | |
| "grad_norm": 1.3797285759333664, | |
| "learning_rate": 3.436302709266404e-05, | |
| "loss": 0.5864, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.39936, | |
| "grad_norm": 1.422694140642603, | |
| "learning_rate": 3.4266746524441656e-05, | |
| "loss": 0.6284, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.40064, | |
| "grad_norm": 1.3635968113925354, | |
| "learning_rate": 3.4170306355572536e-05, | |
| "loss": 0.6165, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.40192, | |
| "grad_norm": 1.222278706443975, | |
| "learning_rate": 3.4073708247040195e-05, | |
| "loss": 0.5796, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4032, | |
| "grad_norm": 1.345912611253027, | |
| "learning_rate": 3.397695386254835e-05, | |
| "loss": 0.5577, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.40448, | |
| "grad_norm": 1.2268397815956371, | |
| "learning_rate": 3.3880044868492244e-05, | |
| "loss": 0.5911, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.40576, | |
| "grad_norm": 1.2952822047284296, | |
| "learning_rate": 3.378298293392996e-05, | |
| "loss": 0.5985, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.40704, | |
| "grad_norm": 1.2467909061828686, | |
| "learning_rate": 3.3685769730553656e-05, | |
| "loss": 0.5211, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.40832, | |
| "grad_norm": 1.2956340009271652, | |
| "learning_rate": 3.358840693266079e-05, | |
| "loss": 0.6198, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.4096, | |
| "grad_norm": 1.1943115956554835, | |
| "learning_rate": 3.349089621712526e-05, | |
| "loss": 0.5212, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.41088, | |
| "grad_norm": 1.4650997633267928, | |
| "learning_rate": 3.339323926336858e-05, | |
| "loss": 0.6123, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.41216, | |
| "grad_norm": 1.2534578547458006, | |
| "learning_rate": 3.3295437753330884e-05, | |
| "loss": 0.5015, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.41344, | |
| "grad_norm": 1.1962435109275822, | |
| "learning_rate": 3.3197493371442e-05, | |
| "loss": 0.5301, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.41472, | |
| "grad_norm": 1.1594755003785737, | |
| "learning_rate": 3.3099407804592425e-05, | |
| "loss": 0.5538, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 1.4584834684167707, | |
| "learning_rate": 3.3001182742104284e-05, | |
| "loss": 0.5756, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.41728, | |
| "grad_norm": 1.2558119616769445, | |
| "learning_rate": 3.290281987570223e-05, | |
| "loss": 0.6599, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.41856, | |
| "grad_norm": 1.4278970954149597, | |
| "learning_rate": 3.28043208994843e-05, | |
| "loss": 0.5869, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.41984, | |
| "grad_norm": 2.4291510496794997, | |
| "learning_rate": 3.270568750989274e-05, | |
| "loss": 0.6185, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.42112, | |
| "grad_norm": 1.2291374743151258, | |
| "learning_rate": 3.2606921405684775e-05, | |
| "loss": 0.6135, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.4224, | |
| "grad_norm": 1.4695882845341766, | |
| "learning_rate": 3.25080242879034e-05, | |
| "loss": 0.5721, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.42368, | |
| "grad_norm": 1.1372064070452967, | |
| "learning_rate": 3.2408997859848054e-05, | |
| "loss": 0.6025, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.42496, | |
| "grad_norm": 1.1634030987060509, | |
| "learning_rate": 3.2309843827045204e-05, | |
| "loss": 0.628, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.42624, | |
| "grad_norm": 1.308080248548213, | |
| "learning_rate": 3.221056389721916e-05, | |
| "loss": 0.584, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.42752, | |
| "grad_norm": 1.1890680841041952, | |
| "learning_rate": 3.2111159780262444e-05, | |
| "loss": 0.5607, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.4288, | |
| "grad_norm": 1.9139958322844468, | |
| "learning_rate": 3.201163318820651e-05, | |
| "loss": 0.5057, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.43008, | |
| "grad_norm": 1.3435376715264546, | |
| "learning_rate": 3.191198583519219e-05, | |
| "loss": 0.5569, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.43136, | |
| "grad_norm": 1.1164006852074977, | |
| "learning_rate": 3.181221943744013e-05, | |
| "loss": 0.5792, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.43264, | |
| "grad_norm": 1.3249853155722109, | |
| "learning_rate": 3.171233571322132e-05, | |
| "loss": 0.6264, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.43392, | |
| "grad_norm": 1.3538583731743, | |
| "learning_rate": 3.161233638282745e-05, | |
| "loss": 0.5998, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.4352, | |
| "grad_norm": 1.2895456992706795, | |
| "learning_rate": 3.151222316854128e-05, | |
| "loss": 0.5761, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.43648, | |
| "grad_norm": 1.4005056509719636, | |
| "learning_rate": 3.141199779460699e-05, | |
| "loss": 0.5722, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.43776, | |
| "grad_norm": 1.1987474984629116, | |
| "learning_rate": 3.131166198720047e-05, | |
| "loss": 0.5512, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.43904, | |
| "grad_norm": 1.3932536887192866, | |
| "learning_rate": 3.121121747439961e-05, | |
| "loss": 0.5275, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.44032, | |
| "grad_norm": 1.2171861052933255, | |
| "learning_rate": 3.111066598615452e-05, | |
| "loss": 0.5404, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.4416, | |
| "grad_norm": 1.2629057081947703, | |
| "learning_rate": 3.101000925425776e-05, | |
| "loss": 0.5966, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.44288, | |
| "grad_norm": 1.1032390597680797, | |
| "learning_rate": 3.0909249012314476e-05, | |
| "loss": 0.597, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.44416, | |
| "grad_norm": 1.2527254463012014, | |
| "learning_rate": 3.080838699571255e-05, | |
| "loss": 0.5128, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.44544, | |
| "grad_norm": 1.1761651462829126, | |
| "learning_rate": 3.070742494159277e-05, | |
| "loss": 0.5547, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.44672, | |
| "grad_norm": 1.2544289052403295, | |
| "learning_rate": 3.0606364588818796e-05, | |
| "loss": 0.4864, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 1.1724153058535909, | |
| "learning_rate": 3.0505207677947346e-05, | |
| "loss": 0.5751, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.44928, | |
| "grad_norm": 1.1653416072287968, | |
| "learning_rate": 3.0403955951198128e-05, | |
| "loss": 0.5075, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.45056, | |
| "grad_norm": 1.2231314830408664, | |
| "learning_rate": 3.0302611152423843e-05, | |
| "loss": 0.5564, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.45184, | |
| "grad_norm": 1.2514358177940867, | |
| "learning_rate": 3.0201175027080185e-05, | |
| "loss": 0.6166, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.45312, | |
| "grad_norm": 1.2878014912228342, | |
| "learning_rate": 3.0099649322195744e-05, | |
| "loss": 0.5545, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4544, | |
| "grad_norm": 1.1548643419585916, | |
| "learning_rate": 2.9998035786341955e-05, | |
| "loss": 0.62, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.45568, | |
| "grad_norm": 1.330069185436519, | |
| "learning_rate": 2.9896336169602944e-05, | |
| "loss": 0.6524, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.45696, | |
| "grad_norm": 1.2243531133738643, | |
| "learning_rate": 2.9794552223545415e-05, | |
| "loss": 0.5803, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.45824, | |
| "grad_norm": 1.0989942461323519, | |
| "learning_rate": 2.9692685701188456e-05, | |
| "loss": 0.5943, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.45952, | |
| "grad_norm": 1.2983997729559762, | |
| "learning_rate": 2.9590738356973367e-05, | |
| "loss": 0.5394, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.4608, | |
| "grad_norm": 1.2413018726686185, | |
| "learning_rate": 2.9488711946733456e-05, | |
| "loss": 0.6067, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.46208, | |
| "grad_norm": 1.252446433445436, | |
| "learning_rate": 2.938660822766376e-05, | |
| "loss": 0.5287, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.46336, | |
| "grad_norm": 1.275914526368788, | |
| "learning_rate": 2.928442895829079e-05, | |
| "loss": 0.5204, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.46464, | |
| "grad_norm": 1.1837419589585472, | |
| "learning_rate": 2.9182175898442306e-05, | |
| "loss": 0.4935, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.46592, | |
| "grad_norm": 1.205077511167095, | |
| "learning_rate": 2.9079850809216896e-05, | |
| "loss": 0.5369, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4672, | |
| "grad_norm": 1.2688726918198103, | |
| "learning_rate": 2.897745545295375e-05, | |
| "loss": 0.5451, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.46848, | |
| "grad_norm": 1.1704492533966362, | |
| "learning_rate": 2.887499159320225e-05, | |
| "loss": 0.6097, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.46976, | |
| "grad_norm": 1.3235882304781694, | |
| "learning_rate": 2.877246099469159e-05, | |
| "loss": 0.556, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.47104, | |
| "grad_norm": 1.2918187012538356, | |
| "learning_rate": 2.8669865423300435e-05, | |
| "loss": 0.5205, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.47232, | |
| "grad_norm": 1.340377609108849, | |
| "learning_rate": 2.8567206646026445e-05, | |
| "loss": 0.6099, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.4736, | |
| "grad_norm": 1.2566046414113698, | |
| "learning_rate": 2.8464486430955893e-05, | |
| "loss": 0.5519, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.47488, | |
| "grad_norm": 1.2274715526437678, | |
| "learning_rate": 2.8361706547233197e-05, | |
| "loss": 0.553, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.47616, | |
| "grad_norm": 1.1289857185924714, | |
| "learning_rate": 2.8258868765030404e-05, | |
| "loss": 0.6039, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.47744, | |
| "grad_norm": 1.2701823138589687, | |
| "learning_rate": 2.815597485551678e-05, | |
| "loss": 0.5612, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.47872, | |
| "grad_norm": 1.2076034107561964, | |
| "learning_rate": 2.8053026590828268e-05, | |
| "loss": 0.5968, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.3105219035183913, | |
| "learning_rate": 2.7950025744036946e-05, | |
| "loss": 0.5626, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.48128, | |
| "grad_norm": 1.3524040967341444, | |
| "learning_rate": 2.7846974089120533e-05, | |
| "loss": 0.6152, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.48256, | |
| "grad_norm": 1.2449086491057804, | |
| "learning_rate": 2.774387340093179e-05, | |
| "loss": 0.5676, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.48384, | |
| "grad_norm": 1.2219099032202758, | |
| "learning_rate": 2.7640725455167997e-05, | |
| "loss": 0.5755, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.48512, | |
| "grad_norm": 1.056275114051859, | |
| "learning_rate": 2.7537532028340346e-05, | |
| "loss": 0.511, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4864, | |
| "grad_norm": 1.3709611873936856, | |
| "learning_rate": 2.743429489774332e-05, | |
| "loss": 0.5884, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.48768, | |
| "grad_norm": 1.3842123548791745, | |
| "learning_rate": 2.7331015841424147e-05, | |
| "loss": 0.5514, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.48896, | |
| "grad_norm": 1.2908690360305188, | |
| "learning_rate": 2.722769663815211e-05, | |
| "loss": 0.5444, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.49024, | |
| "grad_norm": 1.119809353222974, | |
| "learning_rate": 2.7124339067387967e-05, | |
| "loss": 0.5742, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.49152, | |
| "grad_norm": 1.2813035492518066, | |
| "learning_rate": 2.7020944909253254e-05, | |
| "loss": 0.56, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4928, | |
| "grad_norm": 1.4895244230953308, | |
| "learning_rate": 2.6917515944499662e-05, | |
| "loss": 0.596, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.49408, | |
| "grad_norm": 1.2423920545080436, | |
| "learning_rate": 2.681405395447834e-05, | |
| "loss": 0.5949, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.49536, | |
| "grad_norm": 1.428374674531408, | |
| "learning_rate": 2.671056072110925e-05, | |
| "loss": 0.5875, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.49664, | |
| "grad_norm": 1.2294299524141814, | |
| "learning_rate": 2.660703802685045e-05, | |
| "loss": 0.5766, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.49792, | |
| "grad_norm": 1.4334722662702042, | |
| "learning_rate": 2.6503487654667393e-05, | |
| "loss": 0.5907, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.4992, | |
| "grad_norm": 1.0508584663305052, | |
| "learning_rate": 2.6399911388002223e-05, | |
| "loss": 0.5778, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.50048, | |
| "grad_norm": 1.3016693067243945, | |
| "learning_rate": 2.629631101074308e-05, | |
| "loss": 0.5601, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.50176, | |
| "grad_norm": 1.7491421602485298, | |
| "learning_rate": 2.6192688307193352e-05, | |
| "loss": 0.5255, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.50304, | |
| "grad_norm": 1.3723500990836885, | |
| "learning_rate": 2.6089045062040952e-05, | |
| "loss": 0.5982, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.50432, | |
| "grad_norm": 1.1683424370515292, | |
| "learning_rate": 2.5985383060327578e-05, | |
| "loss": 0.5914, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5056, | |
| "grad_norm": 1.170730407642251, | |
| "learning_rate": 2.588170408741796e-05, | |
| "loss": 0.5107, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.50688, | |
| "grad_norm": 1.498512014092024, | |
| "learning_rate": 2.5778009928969156e-05, | |
| "loss": 0.512, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.50816, | |
| "grad_norm": 1.1575011931583987, | |
| "learning_rate": 2.5674302370899727e-05, | |
| "loss": 0.5388, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.50944, | |
| "grad_norm": 0.9707795521955531, | |
| "learning_rate": 2.5570583199359022e-05, | |
| "loss": 0.5046, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.51072, | |
| "grad_norm": 1.1338043215270575, | |
| "learning_rate": 2.5466854200696433e-05, | |
| "loss": 0.4667, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 1.23888658446542, | |
| "learning_rate": 2.5363117161430576e-05, | |
| "loss": 0.6083, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.51328, | |
| "grad_norm": 1.3860213242038153, | |
| "learning_rate": 2.525937386821856e-05, | |
| "loss": 0.6007, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.51456, | |
| "grad_norm": 1.233464452584448, | |
| "learning_rate": 2.515562610782522e-05, | |
| "loss": 0.6071, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.51584, | |
| "grad_norm": 1.2007891316048498, | |
| "learning_rate": 2.5051875667092295e-05, | |
| "loss": 0.5747, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.51712, | |
| "grad_norm": 1.3643602667570913, | |
| "learning_rate": 2.4948124332907718e-05, | |
| "loss": 0.5934, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5184, | |
| "grad_norm": 1.256270986331548, | |
| "learning_rate": 2.4844373892174784e-05, | |
| "loss": 0.5826, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.51968, | |
| "grad_norm": 1.3160910758396223, | |
| "learning_rate": 2.4740626131781443e-05, | |
| "loss": 0.5613, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.52096, | |
| "grad_norm": 1.2973913348628476, | |
| "learning_rate": 2.4636882838569427e-05, | |
| "loss": 0.5649, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.52224, | |
| "grad_norm": 1.1537267459203224, | |
| "learning_rate": 2.4533145799303563e-05, | |
| "loss": 0.5512, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.52352, | |
| "grad_norm": 1.2468678438124798, | |
| "learning_rate": 2.4429416800640984e-05, | |
| "loss": 0.5514, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.5248, | |
| "grad_norm": 1.1791927371620639, | |
| "learning_rate": 2.432569762910028e-05, | |
| "loss": 0.5113, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.52608, | |
| "grad_norm": 1.1303674841275846, | |
| "learning_rate": 2.422199007103085e-05, | |
| "loss": 0.5143, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.52736, | |
| "grad_norm": 1.3269655646131298, | |
| "learning_rate": 2.411829591258204e-05, | |
| "loss": 0.5813, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.52864, | |
| "grad_norm": 1.4496753345160198, | |
| "learning_rate": 2.401461693967242e-05, | |
| "loss": 0.5895, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.52992, | |
| "grad_norm": 1.2990517155869246, | |
| "learning_rate": 2.3910954937959054e-05, | |
| "loss": 0.5814, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5312, | |
| "grad_norm": 1.0920219631581929, | |
| "learning_rate": 2.3807311692806647e-05, | |
| "loss": 0.4915, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.53248, | |
| "grad_norm": 1.3762916454324492, | |
| "learning_rate": 2.3703688989256927e-05, | |
| "loss": 0.6253, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.53376, | |
| "grad_norm": 1.141172451857967, | |
| "learning_rate": 2.3600088611997783e-05, | |
| "loss": 0.5151, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.53504, | |
| "grad_norm": 1.2186628074135093, | |
| "learning_rate": 2.349651234533262e-05, | |
| "loss": 0.5866, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.53632, | |
| "grad_norm": 1.32711445847315, | |
| "learning_rate": 2.3392961973149558e-05, | |
| "loss": 0.6288, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.5376, | |
| "grad_norm": 1.296431616588229, | |
| "learning_rate": 2.3289439278890745e-05, | |
| "loss": 0.5734, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.53888, | |
| "grad_norm": 1.113892769973126, | |
| "learning_rate": 2.3185946045521665e-05, | |
| "loss": 0.5948, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.54016, | |
| "grad_norm": 1.3517343904695995, | |
| "learning_rate": 2.3082484055500344e-05, | |
| "loss": 0.6031, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.54144, | |
| "grad_norm": 1.1998450250938084, | |
| "learning_rate": 2.2979055090746755e-05, | |
| "loss": 0.6453, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.54272, | |
| "grad_norm": 1.2071751768753964, | |
| "learning_rate": 2.287566093261204e-05, | |
| "loss": 0.6026, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 1.2292756137396301, | |
| "learning_rate": 2.2772303361847888e-05, | |
| "loss": 0.5829, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.54528, | |
| "grad_norm": 1.1994982795440643, | |
| "learning_rate": 2.266898415857586e-05, | |
| "loss": 0.4896, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.54656, | |
| "grad_norm": 1.2001935606378307, | |
| "learning_rate": 2.2565705102256684e-05, | |
| "loss": 0.5409, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.54784, | |
| "grad_norm": 1.2853193868753996, | |
| "learning_rate": 2.2462467971659666e-05, | |
| "loss": 0.517, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.54912, | |
| "grad_norm": 1.1830392415417872, | |
| "learning_rate": 2.2359274544832005e-05, | |
| "loss": 0.5637, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.5504, | |
| "grad_norm": 1.2248270356932633, | |
| "learning_rate": 2.225612659906822e-05, | |
| "loss": 0.6132, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.55168, | |
| "grad_norm": 1.2611689524837952, | |
| "learning_rate": 2.2153025910879473e-05, | |
| "loss": 0.5341, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.55296, | |
| "grad_norm": 1.3858277268542414, | |
| "learning_rate": 2.2049974255963056e-05, | |
| "loss": 0.6227, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.55424, | |
| "grad_norm": 1.2345807696605382, | |
| "learning_rate": 2.1946973409171738e-05, | |
| "loss": 0.5345, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.55552, | |
| "grad_norm": 1.2789228285202352, | |
| "learning_rate": 2.1844025144483222e-05, | |
| "loss": 0.6043, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5568, | |
| "grad_norm": 1.2922656932659387, | |
| "learning_rate": 2.174113123496961e-05, | |
| "loss": 0.5168, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.55808, | |
| "grad_norm": 1.4040188415620778, | |
| "learning_rate": 2.163829345276681e-05, | |
| "loss": 0.6237, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.55936, | |
| "grad_norm": 1.2321616718186987, | |
| "learning_rate": 2.1535513569044103e-05, | |
| "loss": 0.5671, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.56064, | |
| "grad_norm": 1.1124330457312328, | |
| "learning_rate": 2.143279335397356e-05, | |
| "loss": 0.5446, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.56192, | |
| "grad_norm": 1.1171606133972136, | |
| "learning_rate": 2.1330134576699574e-05, | |
| "loss": 0.5846, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.5632, | |
| "grad_norm": 1.1830503160889023, | |
| "learning_rate": 2.122753900530842e-05, | |
| "loss": 0.639, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.56448, | |
| "grad_norm": 1.2636418722094025, | |
| "learning_rate": 2.1125008406797758e-05, | |
| "loss": 0.6172, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.56576, | |
| "grad_norm": 1.464537035290334, | |
| "learning_rate": 2.1022544547046262e-05, | |
| "loss": 0.5114, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.56704, | |
| "grad_norm": 1.2189845380736988, | |
| "learning_rate": 2.0920149190783106e-05, | |
| "loss": 0.5498, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.56832, | |
| "grad_norm": 1.2639051517016353, | |
| "learning_rate": 2.08178241015577e-05, | |
| "loss": 0.5407, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5696, | |
| "grad_norm": 1.2455885670188174, | |
| "learning_rate": 2.0715571041709213e-05, | |
| "loss": 0.5775, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.57088, | |
| "grad_norm": 1.177282967253439, | |
| "learning_rate": 2.061339177233625e-05, | |
| "loss": 0.558, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.57216, | |
| "grad_norm": 1.1191927664939323, | |
| "learning_rate": 2.0511288053266556e-05, | |
| "loss": 0.5265, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.57344, | |
| "grad_norm": 1.372685384367634, | |
| "learning_rate": 2.0409261643026635e-05, | |
| "loss": 0.534, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.57472, | |
| "grad_norm": 1.3203175248668297, | |
| "learning_rate": 2.030731429881155e-05, | |
| "loss": 0.5108, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 1.2651180014274976, | |
| "learning_rate": 2.0205447776454594e-05, | |
| "loss": 0.5269, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.57728, | |
| "grad_norm": 1.2285045730420263, | |
| "learning_rate": 2.0103663830397055e-05, | |
| "loss": 0.5476, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.57856, | |
| "grad_norm": 1.2187872148550316, | |
| "learning_rate": 2.0001964213658055e-05, | |
| "loss": 0.6011, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.57984, | |
| "grad_norm": 1.3182223798353634, | |
| "learning_rate": 1.990035067780426e-05, | |
| "loss": 0.5286, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.58112, | |
| "grad_norm": 1.383300691712815, | |
| "learning_rate": 1.9798824972919827e-05, | |
| "loss": 0.552, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5824, | |
| "grad_norm": 1.3543660078689728, | |
| "learning_rate": 1.9697388847576166e-05, | |
| "loss": 0.5679, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.58368, | |
| "grad_norm": 1.3682056136434164, | |
| "learning_rate": 1.959604404880187e-05, | |
| "loss": 0.565, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.58496, | |
| "grad_norm": 1.292711367790853, | |
| "learning_rate": 1.9494792322052657e-05, | |
| "loss": 0.4667, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.58624, | |
| "grad_norm": 1.1099553036632586, | |
| "learning_rate": 1.9393635411181207e-05, | |
| "loss": 0.5812, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.58752, | |
| "grad_norm": 1.2996199223765448, | |
| "learning_rate": 1.929257505840725e-05, | |
| "loss": 0.5662, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5888, | |
| "grad_norm": 1.2399781722085743, | |
| "learning_rate": 1.9191613004287457e-05, | |
| "loss": 0.5591, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.59008, | |
| "grad_norm": 1.074087210982775, | |
| "learning_rate": 1.909075098768553e-05, | |
| "loss": 0.5151, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.59136, | |
| "grad_norm": 1.443150256537927, | |
| "learning_rate": 1.898999074574225e-05, | |
| "loss": 0.5322, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.59264, | |
| "grad_norm": 1.1460551754512525, | |
| "learning_rate": 1.8889334013845477e-05, | |
| "loss": 0.5756, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.59392, | |
| "grad_norm": 1.2419053035722545, | |
| "learning_rate": 1.87887825256004e-05, | |
| "loss": 0.5866, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5952, | |
| "grad_norm": 1.5391850851292794, | |
| "learning_rate": 1.8688338012799538e-05, | |
| "loss": 0.5551, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.59648, | |
| "grad_norm": 1.3477944906171562, | |
| "learning_rate": 1.8588002205393022e-05, | |
| "loss": 0.552, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.59776, | |
| "grad_norm": 1.221519019813217, | |
| "learning_rate": 1.8487776831458726e-05, | |
| "loss": 0.5789, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.59904, | |
| "grad_norm": 1.2044933407805498, | |
| "learning_rate": 1.8387663617172548e-05, | |
| "loss": 0.4332, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.60032, | |
| "grad_norm": 1.2012284237768365, | |
| "learning_rate": 1.8287664286778684e-05, | |
| "loss": 0.5544, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.6016, | |
| "grad_norm": 1.2816736962799906, | |
| "learning_rate": 1.8187780562559874e-05, | |
| "loss": 0.5276, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.60288, | |
| "grad_norm": 1.495077809694399, | |
| "learning_rate": 1.8088014164807826e-05, | |
| "loss": 0.5412, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.60416, | |
| "grad_norm": 1.2154956923566738, | |
| "learning_rate": 1.7988366811793492e-05, | |
| "loss": 0.5111, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.60544, | |
| "grad_norm": 1.2057734281666543, | |
| "learning_rate": 1.7888840219737558e-05, | |
| "loss": 0.5201, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.60672, | |
| "grad_norm": 1.211236940844381, | |
| "learning_rate": 1.778943610278085e-05, | |
| "loss": 0.6024, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 1.4289234845237362, | |
| "learning_rate": 1.7690156172954792e-05, | |
| "loss": 0.5625, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.60928, | |
| "grad_norm": 1.4857543422663195, | |
| "learning_rate": 1.7591002140151962e-05, | |
| "loss": 0.5893, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.61056, | |
| "grad_norm": 2.1142689700662083, | |
| "learning_rate": 1.7491975712096597e-05, | |
| "loss": 0.577, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.61184, | |
| "grad_norm": 1.4472570725904248, | |
| "learning_rate": 1.7393078594315234e-05, | |
| "loss": 0.5714, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.61312, | |
| "grad_norm": 1.3361732477713268, | |
| "learning_rate": 1.7294312490107274e-05, | |
| "loss": 0.5747, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.6144, | |
| "grad_norm": 1.2824327680032168, | |
| "learning_rate": 1.7195679100515704e-05, | |
| "loss": 0.6522, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.61568, | |
| "grad_norm": 1.2744729944573268, | |
| "learning_rate": 1.7097180124297773e-05, | |
| "loss": 0.5317, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.61696, | |
| "grad_norm": 1.2554460971289778, | |
| "learning_rate": 1.699881725789572e-05, | |
| "loss": 0.5319, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.61824, | |
| "grad_norm": 1.1096558046798817, | |
| "learning_rate": 1.6900592195407587e-05, | |
| "loss": 0.4942, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.61952, | |
| "grad_norm": 1.3387394857782844, | |
| "learning_rate": 1.6802506628558007e-05, | |
| "loss": 0.6198, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.6208, | |
| "grad_norm": 1.3711413673578072, | |
| "learning_rate": 1.670456224666912e-05, | |
| "loss": 0.4729, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.62208, | |
| "grad_norm": 1.2832549202461732, | |
| "learning_rate": 1.6606760736631424e-05, | |
| "loss": 0.5738, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.62336, | |
| "grad_norm": 1.2786064784236342, | |
| "learning_rate": 1.650910378287474e-05, | |
| "loss": 0.542, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.62464, | |
| "grad_norm": 1.2244005536975646, | |
| "learning_rate": 1.6411593067339226e-05, | |
| "loss": 0.602, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.62592, | |
| "grad_norm": 1.3670752314401449, | |
| "learning_rate": 1.631423026944635e-05, | |
| "loss": 0.5264, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.6272, | |
| "grad_norm": 1.1975869923574043, | |
| "learning_rate": 1.621701706607004e-05, | |
| "loss": 0.5417, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.62848, | |
| "grad_norm": 1.1684365097414156, | |
| "learning_rate": 1.6119955131507762e-05, | |
| "loss": 0.5134, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.62976, | |
| "grad_norm": 1.2911937928145603, | |
| "learning_rate": 1.602304613745166e-05, | |
| "loss": 0.5665, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.63104, | |
| "grad_norm": 1.1229267173367932, | |
| "learning_rate": 1.5926291752959817e-05, | |
| "loss": 0.5243, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.63232, | |
| "grad_norm": 1.4259357023269845, | |
| "learning_rate": 1.5829693644427467e-05, | |
| "loss": 0.558, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6336, | |
| "grad_norm": 1.3549968973530235, | |
| "learning_rate": 1.573325347555835e-05, | |
| "loss": 0.5275, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.63488, | |
| "grad_norm": 1.4982948541046688, | |
| "learning_rate": 1.5636972907335963e-05, | |
| "loss": 0.5374, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.63616, | |
| "grad_norm": 1.5047489082026262, | |
| "learning_rate": 1.5540853597995056e-05, | |
| "loss": 0.5652, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.63744, | |
| "grad_norm": 1.2337671955369678, | |
| "learning_rate": 1.544489720299301e-05, | |
| "loss": 0.5841, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.63872, | |
| "grad_norm": 1.263681821755804, | |
| "learning_rate": 1.534910537498133e-05, | |
| "loss": 0.5265, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.1936173660190517, | |
| "learning_rate": 1.5253479763777223e-05, | |
| "loss": 0.5168, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.64128, | |
| "grad_norm": 1.2912848360288505, | |
| "learning_rate": 1.5158022016335122e-05, | |
| "loss": 0.5186, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.64256, | |
| "grad_norm": 1.3030553677924595, | |
| "learning_rate": 1.5062733776718368e-05, | |
| "loss": 0.4865, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.64384, | |
| "grad_norm": 1.1509389250895423, | |
| "learning_rate": 1.4967616686070884e-05, | |
| "loss": 0.552, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.64512, | |
| "grad_norm": 1.2370993628937543, | |
| "learning_rate": 1.4872672382588876e-05, | |
| "loss": 0.4807, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6464, | |
| "grad_norm": 1.2088652091504688, | |
| "learning_rate": 1.4777902501492697e-05, | |
| "loss": 0.5152, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.64768, | |
| "grad_norm": 1.3333807930463668, | |
| "learning_rate": 1.4683308674998574e-05, | |
| "loss": 0.5602, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.64896, | |
| "grad_norm": 1.2916598419804997, | |
| "learning_rate": 1.4588892532290605e-05, | |
| "loss": 0.5712, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.65024, | |
| "grad_norm": 1.5063705994063172, | |
| "learning_rate": 1.4494655699492616e-05, | |
| "loss": 0.5382, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.65152, | |
| "grad_norm": 1.2379817269613271, | |
| "learning_rate": 1.4400599799640208e-05, | |
| "loss": 0.5218, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.6528, | |
| "grad_norm": 1.1137098205502176, | |
| "learning_rate": 1.4306726452652775e-05, | |
| "loss": 0.5807, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.65408, | |
| "grad_norm": 1.0551157546349172, | |
| "learning_rate": 1.4213037275305619e-05, | |
| "loss": 0.526, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.65536, | |
| "grad_norm": 1.293960992094804, | |
| "learning_rate": 1.4119533881202107e-05, | |
| "loss": 0.53, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.65664, | |
| "grad_norm": 1.247185840007771, | |
| "learning_rate": 1.4026217880745834e-05, | |
| "loss": 0.5675, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.65792, | |
| "grad_norm": 1.1908808687708086, | |
| "learning_rate": 1.393309088111298e-05, | |
| "loss": 0.5955, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.6592, | |
| "grad_norm": 1.2156584477137464, | |
| "learning_rate": 1.3840154486224546e-05, | |
| "loss": 0.5274, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.66048, | |
| "grad_norm": 1.0981287668065602, | |
| "learning_rate": 1.374741029671874e-05, | |
| "loss": 0.5282, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.66176, | |
| "grad_norm": 1.107252285561255, | |
| "learning_rate": 1.365485990992346e-05, | |
| "loss": 0.5153, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.66304, | |
| "grad_norm": 1.3518707420118647, | |
| "learning_rate": 1.3562504919828733e-05, | |
| "loss": 0.5943, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.66432, | |
| "grad_norm": 1.0564427325082784, | |
| "learning_rate": 1.3470346917059285e-05, | |
| "loss": 0.5423, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.6656, | |
| "grad_norm": 1.221981551343257, | |
| "learning_rate": 1.3378387488847147e-05, | |
| "loss": 0.504, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.66688, | |
| "grad_norm": 1.1511755768258065, | |
| "learning_rate": 1.3286628219004296e-05, | |
| "loss": 0.5864, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.66816, | |
| "grad_norm": 1.2192055466441367, | |
| "learning_rate": 1.3195070687895417e-05, | |
| "loss": 0.5594, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.66944, | |
| "grad_norm": 1.2585185794710985, | |
| "learning_rate": 1.3103716472410615e-05, | |
| "loss": 0.5287, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.67072, | |
| "grad_norm": 1.3934704845636057, | |
| "learning_rate": 1.3012567145938372e-05, | |
| "loss": 0.6051, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 1.177737385230067, | |
| "learning_rate": 1.2921624278338307e-05, | |
| "loss": 0.5462, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.67328, | |
| "grad_norm": 1.316866042598568, | |
| "learning_rate": 1.283088943591425e-05, | |
| "loss": 0.545, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.67456, | |
| "grad_norm": 1.208871704339945, | |
| "learning_rate": 1.2740364181387216e-05, | |
| "loss": 0.5676, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.67584, | |
| "grad_norm": 1.1626006594704577, | |
| "learning_rate": 1.2650050073868503e-05, | |
| "loss": 0.5513, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.67712, | |
| "grad_norm": 1.5968257338184035, | |
| "learning_rate": 1.2559948668832824e-05, | |
| "loss": 0.5933, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.6784, | |
| "grad_norm": 1.1356469812667038, | |
| "learning_rate": 1.2470061518091547e-05, | |
| "loss": 0.5444, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.67968, | |
| "grad_norm": 1.1885702721742741, | |
| "learning_rate": 1.2380390169765945e-05, | |
| "loss": 0.5506, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.68096, | |
| "grad_norm": 1.1013113550769662, | |
| "learning_rate": 1.229093616826052e-05, | |
| "loss": 0.5027, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.68224, | |
| "grad_norm": 1.2841292239519988, | |
| "learning_rate": 1.220170105423643e-05, | |
| "loss": 0.4487, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.68352, | |
| "grad_norm": 1.5808389931213436, | |
| "learning_rate": 1.2112686364584992e-05, | |
| "loss": 0.5696, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6848, | |
| "grad_norm": 1.3657014015225237, | |
| "learning_rate": 1.202389363240109e-05, | |
| "loss": 0.6055, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.68608, | |
| "grad_norm": 1.3127880194747064, | |
| "learning_rate": 1.1935324386956898e-05, | |
| "loss": 0.4975, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.68736, | |
| "grad_norm": 1.33213215435169, | |
| "learning_rate": 1.1846980153675477e-05, | |
| "loss": 0.501, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.68864, | |
| "grad_norm": 1.3170582348295938, | |
| "learning_rate": 1.1758862454104519e-05, | |
| "loss": 0.6154, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.68992, | |
| "grad_norm": 1.1842089275460224, | |
| "learning_rate": 1.1670972805890135e-05, | |
| "loss": 0.5439, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.6912, | |
| "grad_norm": 1.0384903459303434, | |
| "learning_rate": 1.1583312722750723e-05, | |
| "loss": 0.5628, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.69248, | |
| "grad_norm": 1.361620259028275, | |
| "learning_rate": 1.1495883714450903e-05, | |
| "loss": 0.5321, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.69376, | |
| "grad_norm": 1.2264732643979583, | |
| "learning_rate": 1.1408687286775477e-05, | |
| "loss": 0.5919, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.69504, | |
| "grad_norm": 1.1087407645182128, | |
| "learning_rate": 1.1321724941503556e-05, | |
| "loss": 0.5653, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.69632, | |
| "grad_norm": 1.1636779939198256, | |
| "learning_rate": 1.1234998176382652e-05, | |
| "loss": 0.5237, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6976, | |
| "grad_norm": 1.146421493513529, | |
| "learning_rate": 1.1148508485102891e-05, | |
| "loss": 0.5726, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.69888, | |
| "grad_norm": 1.2221004328085083, | |
| "learning_rate": 1.1062257357271294e-05, | |
| "loss": 0.5914, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.70016, | |
| "grad_norm": 1.1406098120346972, | |
| "learning_rate": 1.0976246278386112e-05, | |
| "loss": 0.5043, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.70144, | |
| "grad_norm": 1.4462017230940785, | |
| "learning_rate": 1.0890476729811264e-05, | |
| "loss": 0.607, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.70272, | |
| "grad_norm": 1.2718033186610471, | |
| "learning_rate": 1.0804950188750763e-05, | |
| "loss": 0.5798, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 1.2566061081417694, | |
| "learning_rate": 1.071966812822337e-05, | |
| "loss": 0.6509, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.70528, | |
| "grad_norm": 1.1564175127755913, | |
| "learning_rate": 1.063463201703715e-05, | |
| "loss": 0.5128, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.70656, | |
| "grad_norm": 1.1677726983961518, | |
| "learning_rate": 1.054984331976418e-05, | |
| "loss": 0.5724, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.70784, | |
| "grad_norm": 1.2454718041428254, | |
| "learning_rate": 1.0465303496715361e-05, | |
| "loss": 0.5784, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.70912, | |
| "grad_norm": 1.1415754843333104, | |
| "learning_rate": 1.0381014003915248e-05, | |
| "loss": 0.4843, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.7104, | |
| "grad_norm": 1.3007429856310264, | |
| "learning_rate": 1.0296976293076972e-05, | |
| "loss": 0.5617, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.71168, | |
| "grad_norm": 1.2198414193245422, | |
| "learning_rate": 1.0213191811577205e-05, | |
| "loss": 0.5596, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.71296, | |
| "grad_norm": 1.2843612700551643, | |
| "learning_rate": 1.012966200243132e-05, | |
| "loss": 0.5725, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.71424, | |
| "grad_norm": 1.126739877746523, | |
| "learning_rate": 1.0046388304268453e-05, | |
| "loss": 0.5436, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.71552, | |
| "grad_norm": 1.1331575664204836, | |
| "learning_rate": 9.963372151306744e-06, | |
| "loss": 0.5134, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.7168, | |
| "grad_norm": 1.2490185834409133, | |
| "learning_rate": 9.880614973328665e-06, | |
| "loss": 0.542, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.71808, | |
| "grad_norm": 1.2666444161044723, | |
| "learning_rate": 9.798118195656369e-06, | |
| "loss": 0.5921, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.71936, | |
| "grad_norm": 1.1854991284497056, | |
| "learning_rate": 9.715883239127157e-06, | |
| "loss": 0.5078, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.72064, | |
| "grad_norm": 1.2993488130030606, | |
| "learning_rate": 9.633911520068991e-06, | |
| "loss": 0.5492, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.72192, | |
| "grad_norm": 1.2406863978546896, | |
| "learning_rate": 9.552204450276112e-06, | |
| "loss": 0.5422, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.7232, | |
| "grad_norm": 1.091220039419828, | |
| "learning_rate": 9.47076343698473e-06, | |
| "loss": 0.5619, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.72448, | |
| "grad_norm": 1.1308773765297762, | |
| "learning_rate": 9.389589882848745e-06, | |
| "loss": 0.5664, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.72576, | |
| "grad_norm": 1.3974585568511477, | |
| "learning_rate": 9.308685185915685e-06, | |
| "loss": 0.5757, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.72704, | |
| "grad_norm": 1.2945255145565056, | |
| "learning_rate": 9.228050739602505e-06, | |
| "loss": 0.5478, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.72832, | |
| "grad_norm": 1.3739807773870025, | |
| "learning_rate": 9.147687932671681e-06, | |
| "loss": 0.5164, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.7296, | |
| "grad_norm": 1.2202555826722916, | |
| "learning_rate": 9.067598149207248e-06, | |
| "loss": 0.6039, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.73088, | |
| "grad_norm": 1.1567728732249534, | |
| "learning_rate": 8.987782768590986e-06, | |
| "loss": 0.4751, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.73216, | |
| "grad_norm": 1.264377575201893, | |
| "learning_rate": 8.908243165478637e-06, | |
| "loss": 0.5238, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.73344, | |
| "grad_norm": 1.1585888737648737, | |
| "learning_rate": 8.828980709776244e-06, | |
| "loss": 0.5152, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.73472, | |
| "grad_norm": 1.0550868080120395, | |
| "learning_rate": 8.749996766616566e-06, | |
| "loss": 0.5902, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 1.0748447046171212, | |
| "learning_rate": 8.67129269633556e-06, | |
| "loss": 0.5812, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.73728, | |
| "grad_norm": 1.197065774883385, | |
| "learning_rate": 8.59286985444891e-06, | |
| "loss": 0.6037, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.73856, | |
| "grad_norm": 1.2540788359939605, | |
| "learning_rate": 8.514729591628779e-06, | |
| "loss": 0.5304, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.73984, | |
| "grad_norm": 1.1192563868153553, | |
| "learning_rate": 8.436873253680433e-06, | |
| "loss": 0.5166, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.74112, | |
| "grad_norm": 1.0512034630035598, | |
| "learning_rate": 8.359302181519149e-06, | |
| "loss": 0.5724, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.7424, | |
| "grad_norm": 1.155936774472116, | |
| "learning_rate": 8.282017711147078e-06, | |
| "loss": 0.522, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.74368, | |
| "grad_norm": 1.2258491855677904, | |
| "learning_rate": 8.205021173630242e-06, | |
| "loss": 0.5973, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.74496, | |
| "grad_norm": 1.101410093876387, | |
| "learning_rate": 8.128313895075613e-06, | |
| "loss": 0.493, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.74624, | |
| "grad_norm": 1.3173396562305426, | |
| "learning_rate": 8.051897196608277e-06, | |
| "loss": 0.5016, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.74752, | |
| "grad_norm": 1.1639740237204903, | |
| "learning_rate": 7.975772394348676e-06, | |
| "loss": 0.5127, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7488, | |
| "grad_norm": 1.233993867871114, | |
| "learning_rate": 7.899940799389927e-06, | |
| "loss": 0.5293, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.75008, | |
| "grad_norm": 1.2643317967727674, | |
| "learning_rate": 7.824403717775258e-06, | |
| "loss": 0.537, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.75136, | |
| "grad_norm": 1.1316684062444469, | |
| "learning_rate": 7.749162450475541e-06, | |
| "loss": 0.5274, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.75264, | |
| "grad_norm": 1.0841667173819654, | |
| "learning_rate": 7.674218293366811e-06, | |
| "loss": 0.5169, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.75392, | |
| "grad_norm": 1.1931686957781658, | |
| "learning_rate": 7.599572537208013e-06, | |
| "loss": 0.5774, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.7552, | |
| "grad_norm": 1.2805213089480427, | |
| "learning_rate": 7.525226467618752e-06, | |
| "loss": 0.536, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.75648, | |
| "grad_norm": 1.1519718416840845, | |
| "learning_rate": 7.451181365057147e-06, | |
| "loss": 0.5641, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.75776, | |
| "grad_norm": 1.0998772914524717, | |
| "learning_rate": 7.377438504797754e-06, | |
| "loss": 0.5259, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.75904, | |
| "grad_norm": 1.111291933910291, | |
| "learning_rate": 7.303999156909669e-06, | |
| "loss": 0.571, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.76032, | |
| "grad_norm": 1.1529449610866376, | |
| "learning_rate": 7.230864586234596e-06, | |
| "loss": 0.5716, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7616, | |
| "grad_norm": 1.053821335847592, | |
| "learning_rate": 7.158036052365052e-06, | |
| "loss": 0.5819, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.76288, | |
| "grad_norm": 1.0092268297919107, | |
| "learning_rate": 7.085514809622734e-06, | |
| "loss": 0.5538, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.76416, | |
| "grad_norm": 1.0055689106321928, | |
| "learning_rate": 7.013302107036865e-06, | |
| "loss": 0.5597, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.76544, | |
| "grad_norm": 1.092563741333246, | |
| "learning_rate": 6.941399188322706e-06, | |
| "loss": 0.5412, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.76672, | |
| "grad_norm": 1.4806630716838582, | |
| "learning_rate": 6.86980729186012e-06, | |
| "loss": 0.4715, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 1.1616869547894457, | |
| "learning_rate": 6.7985276506722565e-06, | |
| "loss": 0.5399, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.76928, | |
| "grad_norm": 1.0223628889532805, | |
| "learning_rate": 6.727561492404322e-06, | |
| "loss": 0.4376, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.77056, | |
| "grad_norm": 1.2933807749502593, | |
| "learning_rate": 6.65691003930238e-06, | |
| "loss": 0.4706, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.77184, | |
| "grad_norm": 1.0268455047625684, | |
| "learning_rate": 6.586574508192414e-06, | |
| "loss": 0.5267, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.77312, | |
| "grad_norm": 1.095020523638991, | |
| "learning_rate": 6.516556110459241e-06, | |
| "loss": 0.4738, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.7744, | |
| "grad_norm": 1.0985186107825293, | |
| "learning_rate": 6.446856052025735e-06, | |
| "loss": 0.5491, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.77568, | |
| "grad_norm": 1.118502076639103, | |
| "learning_rate": 6.377475533332028e-06, | |
| "loss": 0.501, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.77696, | |
| "grad_norm": 1.2074341290422528, | |
| "learning_rate": 6.3084157493148336e-06, | |
| "loss": 0.4945, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.77824, | |
| "grad_norm": 1.3452903764068787, | |
| "learning_rate": 6.23967788938688e-06, | |
| "loss": 0.5413, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.77952, | |
| "grad_norm": 1.053341753373437, | |
| "learning_rate": 6.171263137416389e-06, | |
| "loss": 0.5887, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.7808, | |
| "grad_norm": 1.1108775660363333, | |
| "learning_rate": 6.10317267170675e-06, | |
| "loss": 0.574, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.78208, | |
| "grad_norm": 1.1282719187508847, | |
| "learning_rate": 6.03540766497617e-06, | |
| "loss": 0.5708, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.78336, | |
| "grad_norm": 0.9849227160845696, | |
| "learning_rate": 5.967969284337471e-06, | |
| "loss": 0.488, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.78464, | |
| "grad_norm": 0.9983979598777111, | |
| "learning_rate": 5.900858691278066e-06, | |
| "loss": 0.5583, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.78592, | |
| "grad_norm": 1.3149384447699428, | |
| "learning_rate": 5.834077041639846e-06, | |
| "loss": 0.5719, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7872, | |
| "grad_norm": 1.1163064585542375, | |
| "learning_rate": 5.767625485599365e-06, | |
| "loss": 0.5835, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.78848, | |
| "grad_norm": 1.7221044361813862, | |
| "learning_rate": 5.70150516764798e-06, | |
| "loss": 0.5892, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.78976, | |
| "grad_norm": 1.1687895489075768, | |
| "learning_rate": 5.635717226572154e-06, | |
| "loss": 0.4852, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.79104, | |
| "grad_norm": 1.0568297929200168, | |
| "learning_rate": 5.570262795433851e-06, | |
| "loss": 0.5461, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.79232, | |
| "grad_norm": 1.1046789596063666, | |
| "learning_rate": 5.505143001550983e-06, | |
| "loss": 0.5175, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.7936, | |
| "grad_norm": 1.0758097500032344, | |
| "learning_rate": 5.440358966478074e-06, | |
| "loss": 0.552, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.79488, | |
| "grad_norm": 1.1450055146891784, | |
| "learning_rate": 5.375911805986838e-06, | |
| "loss": 0.5185, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.79616, | |
| "grad_norm": 1.0102193691443953, | |
| "learning_rate": 5.311802630047053e-06, | |
| "loss": 0.5264, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.79744, | |
| "grad_norm": 1.11285665818929, | |
| "learning_rate": 5.248032542807413e-06, | |
| "loss": 0.5694, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.79872, | |
| "grad_norm": 1.2033172371680183, | |
| "learning_rate": 5.184602642576475e-06, | |
| "loss": 0.5197, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.0514835178191522, | |
| "learning_rate": 5.121514021803794e-06, | |
| "loss": 0.5191, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.80128, | |
| "grad_norm": 1.0917149032038982, | |
| "learning_rate": 5.058767767061096e-06, | |
| "loss": 0.5284, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.80256, | |
| "grad_norm": 1.3407018432650684, | |
| "learning_rate": 4.996364959023541e-06, | |
| "loss": 0.5121, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.80384, | |
| "grad_norm": 1.4256027453245268, | |
| "learning_rate": 4.934306672451131e-06, | |
| "loss": 0.5673, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.80512, | |
| "grad_norm": 1.0618584748116073, | |
| "learning_rate": 4.872593976170187e-06, | |
| "loss": 0.5386, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.8064, | |
| "grad_norm": 1.0426399155093706, | |
| "learning_rate": 4.811227933054979e-06, | |
| "loss": 0.6241, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.80768, | |
| "grad_norm": 1.084924447474932, | |
| "learning_rate": 4.750209600009348e-06, | |
| "loss": 0.5522, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.80896, | |
| "grad_norm": 1.4732247401868341, | |
| "learning_rate": 4.689540027948566e-06, | |
| "loss": 0.5298, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.81024, | |
| "grad_norm": 1.166602303315911, | |
| "learning_rate": 4.629220261781217e-06, | |
| "loss": 0.5445, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.81152, | |
| "grad_norm": 1.229356014197064, | |
| "learning_rate": 4.56925134039119e-06, | |
| "loss": 0.5435, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.8128, | |
| "grad_norm": 1.0806291107561352, | |
| "learning_rate": 4.509634296619794e-06, | |
| "loss": 0.4837, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.81408, | |
| "grad_norm": 1.1359445952442202, | |
| "learning_rate": 4.450370157247976e-06, | |
| "loss": 0.4957, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.81536, | |
| "grad_norm": 1.2478790816421126, | |
| "learning_rate": 4.391459942978637e-06, | |
| "loss": 0.5444, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.81664, | |
| "grad_norm": 1.1781506211630577, | |
| "learning_rate": 4.332904668419016e-06, | |
| "loss": 0.5502, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.81792, | |
| "grad_norm": 1.2127489300441003, | |
| "learning_rate": 4.274705342063273e-06, | |
| "loss": 0.6245, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.8192, | |
| "grad_norm": 1.118910926855837, | |
| "learning_rate": 4.2168629662751035e-06, | |
| "loss": 0.5654, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.82048, | |
| "grad_norm": 1.1902793827050928, | |
| "learning_rate": 4.159378537270423e-06, | |
| "loss": 0.5591, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.82176, | |
| "grad_norm": 1.0976660557767342, | |
| "learning_rate": 4.102253045100282e-06, | |
| "loss": 0.5046, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.82304, | |
| "grad_norm": 1.19137113719625, | |
| "learning_rate": 4.045487473633777e-06, | |
| "loss": 0.5414, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.82432, | |
| "grad_norm": 1.199609836047892, | |
| "learning_rate": 3.989082800541105e-06, | |
| "loss": 0.5053, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.8256, | |
| "grad_norm": 1.1437952123341775, | |
| "learning_rate": 3.933039997276722e-06, | |
| "loss": 0.5169, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.82688, | |
| "grad_norm": 1.2288056698961283, | |
| "learning_rate": 3.877360029062646e-06, | |
| "loss": 0.5448, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.82816, | |
| "grad_norm": 1.3549924552843626, | |
| "learning_rate": 3.822043854871793e-06, | |
| "loss": 0.489, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.82944, | |
| "grad_norm": 1.330530322422781, | |
| "learning_rate": 3.7670924274114645e-06, | |
| "loss": 0.4976, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.83072, | |
| "grad_norm": 1.601413126987568, | |
| "learning_rate": 3.712506693106965e-06, | |
| "loss": 0.6284, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 1.3804199737313514, | |
| "learning_rate": 3.6582875920852882e-06, | |
| "loss": 0.5737, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.83328, | |
| "grad_norm": 1.2285335413724352, | |
| "learning_rate": 3.604436058158911e-06, | |
| "loss": 0.6018, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.83456, | |
| "grad_norm": 1.089609290527028, | |
| "learning_rate": 3.550953018809733e-06, | |
| "loss": 0.5524, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.83584, | |
| "grad_norm": 1.0193410119594306, | |
| "learning_rate": 3.4978393951730887e-06, | |
| "loss": 0.5182, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.83712, | |
| "grad_norm": 1.2309014433221501, | |
| "learning_rate": 3.445096102021894e-06, | |
| "loss": 0.6176, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.8384, | |
| "grad_norm": 1.1509287956439467, | |
| "learning_rate": 3.3927240477508645e-06, | |
| "loss": 0.5477, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.83968, | |
| "grad_norm": 1.2835863999348207, | |
| "learning_rate": 3.3407241343609205e-06, | |
| "loss": 0.5318, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.84096, | |
| "grad_norm": 1.1486061721156429, | |
| "learning_rate": 3.289097257443588e-06, | |
| "loss": 0.5566, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.84224, | |
| "grad_norm": 1.096850848382564, | |
| "learning_rate": 3.2378443061656328e-06, | |
| "loss": 0.5567, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.84352, | |
| "grad_norm": 1.2359464293221607, | |
| "learning_rate": 3.1869661632537075e-06, | |
| "loss": 0.528, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.8448, | |
| "grad_norm": 1.0019416595160129, | |
| "learning_rate": 3.1364637049791616e-06, | |
| "loss": 0.4936, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.84608, | |
| "grad_norm": 1.10789194657861, | |
| "learning_rate": 3.0863378011429662e-06, | |
| "loss": 0.5343, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.84736, | |
| "grad_norm": 1.1437948141171734, | |
| "learning_rate": 3.036589315060681e-06, | |
| "loss": 0.5474, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.84864, | |
| "grad_norm": 1.2443624993715714, | |
| "learning_rate": 2.9872191035476643e-06, | |
| "loss": 0.4849, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.84992, | |
| "grad_norm": 1.2743188352209562, | |
| "learning_rate": 2.938228016904249e-06, | |
| "loss": 0.5695, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8512, | |
| "grad_norm": 1.1970996778684515, | |
| "learning_rate": 2.8896168989011096e-06, | |
| "loss": 0.5648, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.85248, | |
| "grad_norm": 1.2975613742051237, | |
| "learning_rate": 2.8413865867647825e-06, | |
| "loss": 0.5307, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.85376, | |
| "grad_norm": 1.046244950966025, | |
| "learning_rate": 2.793537911163163e-06, | |
| "loss": 0.5235, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.85504, | |
| "grad_norm": 1.2703450922830408, | |
| "learning_rate": 2.7460716961912686e-06, | |
| "loss": 0.4855, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.85632, | |
| "grad_norm": 1.135703167308996, | |
| "learning_rate": 2.698988759357013e-06, | |
| "loss": 0.5243, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.8576, | |
| "grad_norm": 1.2090902146826676, | |
| "learning_rate": 2.6522899115671266e-06, | |
| "loss": 0.5316, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.85888, | |
| "grad_norm": 1.0952078528377482, | |
| "learning_rate": 2.6059759571132102e-06, | |
| "loss": 0.6011, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.86016, | |
| "grad_norm": 1.1328211030379634, | |
| "learning_rate": 2.5600476936578436e-06, | |
| "loss": 0.5998, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.86144, | |
| "grad_norm": 1.2214658316621616, | |
| "learning_rate": 2.51450591222091e-06, | |
| "loss": 0.5633, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.86272, | |
| "grad_norm": 1.3808413521414007, | |
| "learning_rate": 2.469351397165892e-06, | |
| "loss": 0.5498, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 1.189561435957974, | |
| "learning_rate": 2.424584926186432e-06, | |
| "loss": 0.5507, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.86528, | |
| "grad_norm": 1.1035428119256585, | |
| "learning_rate": 2.3802072702929197e-06, | |
| "loss": 0.4844, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.86656, | |
| "grad_norm": 0.9933044665909531, | |
| "learning_rate": 2.3362191937991716e-06, | |
| "loss": 0.5523, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.86784, | |
| "grad_norm": 1.4577996549770615, | |
| "learning_rate": 2.292621454309332e-06, | |
| "loss": 0.538, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.86912, | |
| "grad_norm": 1.224510594886684, | |
| "learning_rate": 2.2494148027047747e-06, | |
| "loss": 0.5059, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.8704, | |
| "grad_norm": 1.0657315879566949, | |
| "learning_rate": 2.206599983131205e-06, | |
| "loss": 0.5251, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.87168, | |
| "grad_norm": 1.4123295884997187, | |
| "learning_rate": 2.1641777329858033e-06, | |
| "loss": 0.6012, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.87296, | |
| "grad_norm": 1.043966311767512, | |
| "learning_rate": 2.1221487829045678e-06, | |
| "loss": 0.5026, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.87424, | |
| "grad_norm": 1.2385598290051774, | |
| "learning_rate": 2.0805138567497217e-06, | |
| "loss": 0.5625, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.87552, | |
| "grad_norm": 1.4336778850101726, | |
| "learning_rate": 2.039273671597208e-06, | |
| "loss": 0.5688, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8768, | |
| "grad_norm": 1.06986941070996, | |
| "learning_rate": 1.9984289377243894e-06, | |
| "loss": 0.4935, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.87808, | |
| "grad_norm": 1.1182880684718772, | |
| "learning_rate": 1.9579803585977827e-06, | |
| "loss": 0.5635, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.87936, | |
| "grad_norm": 1.0388130905287614, | |
| "learning_rate": 1.917928630860963e-06, | |
| "loss": 0.5649, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.88064, | |
| "grad_norm": 1.014905443186478, | |
| "learning_rate": 1.8782744443225503e-06, | |
| "loss": 0.5906, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.88192, | |
| "grad_norm": 1.3182947811281995, | |
| "learning_rate": 1.8390184819443379e-06, | |
| "loss": 0.5083, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.8832, | |
| "grad_norm": 1.1036134272383793, | |
| "learning_rate": 1.8001614198295325e-06, | |
| "loss": 0.546, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.88448, | |
| "grad_norm": 1.1748225273559305, | |
| "learning_rate": 1.7617039272110863e-06, | |
| "loss": 0.5639, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.88576, | |
| "grad_norm": 1.1824429254941855, | |
| "learning_rate": 1.7236466664402068e-06, | |
| "loss": 0.5472, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.88704, | |
| "grad_norm": 1.1692957233756056, | |
| "learning_rate": 1.6859902929749232e-06, | |
| "loss": 0.5215, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.88832, | |
| "grad_norm": 1.45591248138988, | |
| "learning_rate": 1.6487354553688106e-06, | |
| "loss": 0.5193, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8896, | |
| "grad_norm": 1.0751195291191613, | |
| "learning_rate": 1.6118827952598114e-06, | |
| "loss": 0.5447, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.89088, | |
| "grad_norm": 1.2649637649798582, | |
| "learning_rate": 1.5754329473591845e-06, | |
| "loss": 0.4816, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.89216, | |
| "grad_norm": 1.0966615263818826, | |
| "learning_rate": 1.5393865394405903e-06, | |
| "loss": 0.5688, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.89344, | |
| "grad_norm": 1.2484202890583995, | |
| "learning_rate": 1.5037441923292401e-06, | |
| "loss": 0.545, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.89472, | |
| "grad_norm": 1.094226150451411, | |
| "learning_rate": 1.468506519891255e-06, | |
| "loss": 0.5247, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 1.1737531804888182, | |
| "learning_rate": 1.4336741290230632e-06, | |
| "loss": 0.5606, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.89728, | |
| "grad_norm": 1.2613980261409754, | |
| "learning_rate": 1.3992476196409337e-06, | |
| "loss": 0.601, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.89856, | |
| "grad_norm": 1.2355481541572428, | |
| "learning_rate": 1.3652275846706809e-06, | |
| "loss": 0.4906, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.89984, | |
| "grad_norm": 1.1276441113525533, | |
| "learning_rate": 1.331614610037421e-06, | |
| "loss": 0.5059, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.90112, | |
| "grad_norm": 1.126716292874737, | |
| "learning_rate": 1.2984092746555015e-06, | |
| "loss": 0.498, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.9024, | |
| "grad_norm": 1.3631639847075514, | |
| "learning_rate": 1.2656121504185213e-06, | |
| "loss": 0.5194, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.90368, | |
| "grad_norm": 1.1982826408687786, | |
| "learning_rate": 1.233223802189476e-06, | |
| "loss": 0.5095, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.90496, | |
| "grad_norm": 1.162885206132346, | |
| "learning_rate": 1.2012447877910476e-06, | |
| "loss": 0.5254, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.90624, | |
| "grad_norm": 1.2070681253825588, | |
| "learning_rate": 1.1696756579959611e-06, | |
| "loss": 0.6134, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.90752, | |
| "grad_norm": 1.2336494911100582, | |
| "learning_rate": 1.1385169565175573e-06, | |
| "loss": 0.5358, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.9088, | |
| "grad_norm": 1.055109444787224, | |
| "learning_rate": 1.1077692200003597e-06, | |
| "loss": 0.6004, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.91008, | |
| "grad_norm": 1.1894486389194694, | |
| "learning_rate": 1.0774329780108838e-06, | |
| "loss": 0.4955, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.91136, | |
| "grad_norm": 1.2884398815666143, | |
| "learning_rate": 1.0475087530284978e-06, | |
| "loss": 0.5395, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.91264, | |
| "grad_norm": 1.2949729351162262, | |
| "learning_rate": 1.017997060436418e-06, | |
| "loss": 0.511, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.91392, | |
| "grad_norm": 1.114700286942753, | |
| "learning_rate": 9.888984085128506e-07, | |
| "loss": 0.5365, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.9152, | |
| "grad_norm": 0.9396086075330566, | |
| "learning_rate": 9.602132984222024e-07, | |
| "loss": 0.55, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.91648, | |
| "grad_norm": 1.1959488129300242, | |
| "learning_rate": 9.319422242065029e-07, | |
| "loss": 0.5276, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.91776, | |
| "grad_norm": 1.1718303327328687, | |
| "learning_rate": 9.040856727768415e-07, | |
| "loss": 0.6005, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.91904, | |
| "grad_norm": 1.1515491379164742, | |
| "learning_rate": 8.766441239050155e-07, | |
| "loss": 0.5343, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.92032, | |
| "grad_norm": 1.1951704933053644, | |
| "learning_rate": 8.496180502152618e-07, | |
| "loss": 0.5646, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.9216, | |
| "grad_norm": 1.0403038518707561, | |
| "learning_rate": 8.230079171760995e-07, | |
| "loss": 0.5951, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.92288, | |
| "grad_norm": 1.2624464378095743, | |
| "learning_rate": 7.968141830923342e-07, | |
| "loss": 0.5291, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.92416, | |
| "grad_norm": 1.304953665745907, | |
| "learning_rate": 7.71037299097152e-07, | |
| "loss": 0.5811, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.92544, | |
| "grad_norm": 1.0580072277902857, | |
| "learning_rate": 7.456777091443573e-07, | |
| "loss": 0.5262, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.92672, | |
| "grad_norm": 1.132162411329924, | |
| "learning_rate": 7.207358500007228e-07, | |
| "loss": 0.5394, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 1.0966037073899235, | |
| "learning_rate": 6.962121512384567e-07, | |
| "loss": 0.571, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.92928, | |
| "grad_norm": 1.0803803532967529, | |
| "learning_rate": 6.721070352278397e-07, | |
| "loss": 0.5281, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.93056, | |
| "grad_norm": 1.374459406022423, | |
| "learning_rate": 6.484209171298938e-07, | |
| "loss": 0.5346, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.93184, | |
| "grad_norm": 1.160363924619333, | |
| "learning_rate": 6.251542048892888e-07, | |
| "loss": 0.6027, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.93312, | |
| "grad_norm": 1.038330316122472, | |
| "learning_rate": 6.023072992272783e-07, | |
| "loss": 0.5313, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.9344, | |
| "grad_norm": 1.1561107653176907, | |
| "learning_rate": 5.798805936348184e-07, | |
| "loss": 0.5297, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.93568, | |
| "grad_norm": 1.2644499222150027, | |
| "learning_rate": 5.578744743657771e-07, | |
| "loss": 0.5581, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.93696, | |
| "grad_norm": 1.2415591193593967, | |
| "learning_rate": 5.362893204302943e-07, | |
| "loss": 0.5489, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.93824, | |
| "grad_norm": 1.0364443106003203, | |
| "learning_rate": 5.151255035882429e-07, | |
| "loss": 0.5541, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.93952, | |
| "grad_norm": 1.1349432378864015, | |
| "learning_rate": 4.943833883428339e-07, | |
| "loss": 0.5221, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.9408, | |
| "grad_norm": 1.1916828886756892, | |
| "learning_rate": 4.740633319343296e-07, | |
| "loss": 0.5024, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.94208, | |
| "grad_norm": 1.199654926497124, | |
| "learning_rate": 4.541656843339126e-07, | |
| "loss": 0.5689, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.94336, | |
| "grad_norm": 1.0772765680742296, | |
| "learning_rate": 4.346907882376239e-07, | |
| "loss": 0.5782, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.94464, | |
| "grad_norm": 1.1734100141260488, | |
| "learning_rate": 4.1563897906049254e-07, | |
| "loss": 0.5428, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.94592, | |
| "grad_norm": 1.0649206188195541, | |
| "learning_rate": 3.970105849307376e-07, | |
| "loss": 0.5142, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.9472, | |
| "grad_norm": 1.1593701629307314, | |
| "learning_rate": 3.7880592668413097e-07, | |
| "loss": 0.5855, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.94848, | |
| "grad_norm": 1.1129865180604308, | |
| "learning_rate": 3.610253178584627e-07, | |
| "loss": 0.5642, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.94976, | |
| "grad_norm": 1.041115620377996, | |
| "learning_rate": 3.4366906468814296e-07, | |
| "loss": 0.5592, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.95104, | |
| "grad_norm": 1.1265173048766302, | |
| "learning_rate": 3.2673746609893085e-07, | |
| "loss": 0.5395, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.95232, | |
| "grad_norm": 1.0800090936296318, | |
| "learning_rate": 3.1023081370278316e-07, | |
| "loss": 0.5036, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9536, | |
| "grad_norm": 1.2450519919977088, | |
| "learning_rate": 2.941493917928334e-07, | |
| "loss": 0.5548, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.95488, | |
| "grad_norm": 1.3647360259096213, | |
| "learning_rate": 2.7849347733849286e-07, | |
| "loss": 0.521, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.95616, | |
| "grad_norm": 1.1341317960241324, | |
| "learning_rate": 2.632633399806794e-07, | |
| "loss": 0.4605, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.95744, | |
| "grad_norm": 1.2402020713328101, | |
| "learning_rate": 2.484592420271853e-07, | |
| "loss": 0.6411, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.95872, | |
| "grad_norm": 1.1382506025272738, | |
| "learning_rate": 2.3408143844814434e-07, | |
| "loss": 0.4754, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.1575715904688921, | |
| "learning_rate": 2.2013017687164683e-07, | |
| "loss": 0.5752, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.96128, | |
| "grad_norm": 1.1951466005824083, | |
| "learning_rate": 2.0660569757947057e-07, | |
| "loss": 0.5305, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.96256, | |
| "grad_norm": 1.074547380237234, | |
| "learning_rate": 1.9350823350295642e-07, | |
| "loss": 0.5707, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.96384, | |
| "grad_norm": 1.171892371858253, | |
| "learning_rate": 1.8083801021898105e-07, | |
| "loss": 0.543, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.96512, | |
| "grad_norm": 1.2243547379647337, | |
| "learning_rate": 1.68595245946071e-07, | |
| "loss": 0.5566, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9664, | |
| "grad_norm": 1.2109301184588872, | |
| "learning_rate": 1.5678015154066417e-07, | |
| "loss": 0.4952, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.96768, | |
| "grad_norm": 1.1636364367891447, | |
| "learning_rate": 1.4539293049345426e-07, | |
| "loss": 0.5219, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.96896, | |
| "grad_norm": 2.081053175685527, | |
| "learning_rate": 1.3443377892589925e-07, | |
| "loss": 0.4338, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.97024, | |
| "grad_norm": 1.0047794129156076, | |
| "learning_rate": 1.2390288558684348e-07, | |
| "loss": 0.5486, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.97152, | |
| "grad_norm": 1.1014083528546708, | |
| "learning_rate": 1.1380043184925915e-07, | |
| "loss": 0.5241, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.9728, | |
| "grad_norm": 1.1137599558598383, | |
| "learning_rate": 1.041265917071349e-07, | |
| "loss": 0.5652, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.97408, | |
| "grad_norm": 1.2473110326618206, | |
| "learning_rate": 9.488153177246162e-08, | |
| "loss": 0.5903, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.97536, | |
| "grad_norm": 1.0771601825171315, | |
| "learning_rate": 8.606541127238465e-08, | |
| "loss": 0.5254, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.97664, | |
| "grad_norm": 1.1920491619718228, | |
| "learning_rate": 7.767838204644218e-08, | |
| "loss": 0.5747, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.97792, | |
| "grad_norm": 1.3979547504723078, | |
| "learning_rate": 6.972058854396168e-08, | |
| "loss": 0.5193, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9792, | |
| "grad_norm": 1.3612908714509893, | |
| "learning_rate": 6.219216782156479e-08, | |
| "loss": 0.5255, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.98048, | |
| "grad_norm": 1.346844138047549, | |
| "learning_rate": 5.509324954081074e-08, | |
| "loss": 0.4978, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.98176, | |
| "grad_norm": 1.3618047490886835, | |
| "learning_rate": 4.8423955965967715e-08, | |
| "loss": 0.4906, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.98304, | |
| "grad_norm": 1.1072215933472795, | |
| "learning_rate": 4.218440196189499e-08, | |
| "loss": 0.4888, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.98432, | |
| "grad_norm": 1.1349498434880683, | |
| "learning_rate": 3.6374694992077905e-08, | |
| "loss": 0.6076, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.9856, | |
| "grad_norm": 1.023092821919443, | |
| "learning_rate": 3.0994935116768206e-08, | |
| "loss": 0.5699, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.98688, | |
| "grad_norm": 1.2906016743158035, | |
| "learning_rate": 2.6045214991263224e-08, | |
| "loss": 0.515, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.98816, | |
| "grad_norm": 1.1749435900367358, | |
| "learning_rate": 2.1525619864307123e-08, | |
| "loss": 0.5719, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.98944, | |
| "grad_norm": 1.3052044649802965, | |
| "learning_rate": 1.7436227576633767e-08, | |
| "loss": 0.5178, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.99072, | |
| "grad_norm": 0.9887619519706906, | |
| "learning_rate": 1.3777108559606678e-08, | |
| "loss": 0.5256, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 1.2257730227628638, | |
| "learning_rate": 1.0548325834031091e-08, | |
| "loss": 0.5207, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.99328, | |
| "grad_norm": 1.0861821825699252, | |
| "learning_rate": 7.749935009046528e-09, | |
| "loss": 0.5083, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.99456, | |
| "grad_norm": 1.0230122646559745, | |
| "learning_rate": 5.381984281171981e-09, | |
| "loss": 0.5279, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.99584, | |
| "grad_norm": 1.0565200974449374, | |
| "learning_rate": 3.4445144334954715e-09, | |
| "loss": 0.5567, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.99712, | |
| "grad_norm": 1.362696721447004, | |
| "learning_rate": 1.93755883494684e-09, | |
| "loss": 0.5312, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.9984, | |
| "grad_norm": 1.5329037890797512, | |
| "learning_rate": 8.611434397343132e-10, | |
| "loss": 0.6223, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.99968, | |
| "grad_norm": 1.3285702582274823, | |
| "learning_rate": 2.1528678690041493e-10, | |
| "loss": 0.5301, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.99968, | |
| "step": 781, | |
| "total_flos": 490406510395392.0, | |
| "train_loss": 0.679141880531775, | |
| "train_runtime": 7593.0929, | |
| "train_samples_per_second": 13.17, | |
| "train_steps_per_second": 0.103 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 781, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 490406510395392.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |