| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.398950131233596, | |
| "eval_steps": 200000, | |
| "global_step": 160000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.4997000599880024e-06, | |
| "loss": 8.6813, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.999400119976005e-06, | |
| "loss": 8.0951, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.499100179964007e-06, | |
| "loss": 7.7394, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 5.99880023995201e-06, | |
| "loss": 7.4477, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 7.4985002999400115e-06, | |
| "loss": 7.2516, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 8.998200359928014e-06, | |
| "loss": 7.1331, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.0497900419916016e-05, | |
| "loss": 7.0447, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.199760047990402e-05, | |
| "loss": 6.9683, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.3497300539892021e-05, | |
| "loss": 6.9037, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.4997000599880023e-05, | |
| "loss": 6.8472, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.6496700659868028e-05, | |
| "loss": 6.7907, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.799640071985603e-05, | |
| "loss": 6.7407, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9496100779844032e-05, | |
| "loss": 6.7036, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.0995800839832032e-05, | |
| "loss": 6.6485, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.249550089982004e-05, | |
| "loss": 6.6153, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.399520095980804e-05, | |
| "loss": 6.5826, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.5494901019796042e-05, | |
| "loss": 6.553, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.6994601079784043e-05, | |
| "loss": 6.5222, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.8494301139772046e-05, | |
| "loss": 6.4979, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.9994001199760046e-05, | |
| "loss": 6.4695, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.1493701259748056e-05, | |
| "loss": 6.4505, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.2993401319736057e-05, | |
| "loss": 6.4254, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.449310137972406e-05, | |
| "loss": 6.412, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.599280143971206e-05, | |
| "loss": 6.3885, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.7492501499700064e-05, | |
| "loss": 6.3815, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.8992201559688064e-05, | |
| "loss": 6.3623, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.0491901619676064e-05, | |
| "loss": 6.3464, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.1991601679664064e-05, | |
| "loss": 6.3281, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.349130173965207e-05, | |
| "loss": 6.3324, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.499100179964008e-05, | |
| "loss": 6.3128, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.649070185962808e-05, | |
| "loss": 6.3033, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.799040191961608e-05, | |
| "loss": 6.3015, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.949010197960408e-05, | |
| "loss": 6.2881, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.996409765438009e-05, | |
| "loss": 6.2728, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.990970016101658e-05, | |
| "loss": 6.2617, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.9855302667653076e-05, | |
| "loss": 6.2561, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.980090517428957e-05, | |
| "loss": 6.2531, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.974650768092606e-05, | |
| "loss": 6.2222, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.9692110187562557e-05, | |
| "loss": 6.2062, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.963771269419905e-05, | |
| "loss": 6.1925, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.958331520083555e-05, | |
| "loss": 6.1704, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.952918969493886e-05, | |
| "loss": 6.1479, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.947479220157536e-05, | |
| "loss": 6.1375, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.9420394708211846e-05, | |
| "loss": 6.1155, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.936599721484834e-05, | |
| "loss": 6.0921, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.931159972148484e-05, | |
| "loss": 6.0671, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.9257202228121326e-05, | |
| "loss": 6.0437, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.920280473475782e-05, | |
| "loss": 6.0032, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.914840724139432e-05, | |
| "loss": 5.9209, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.9094009748030814e-05, | |
| "loss": 5.8316, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.90396122546673e-05, | |
| "loss": 5.7568, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.89852147613038e-05, | |
| "loss": 5.6574, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.893108925540711e-05, | |
| "loss": 5.5117, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.887669176204361e-05, | |
| "loss": 5.3986, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.88222942686801e-05, | |
| "loss": 5.2336, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.87678967753166e-05, | |
| "loss": 5.0519, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.871349928195309e-05, | |
| "loss": 4.9005, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.8659101788589584e-05, | |
| "loss": 4.769, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.860470429522608e-05, | |
| "loss": 4.6484, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.855030680186257e-05, | |
| "loss": 4.5375, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.8495909308499065e-05, | |
| "loss": 4.4369, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.844151181513556e-05, | |
| "loss": 4.3437, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.8387114321772056e-05, | |
| "loss": 4.267, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.8332716828408545e-05, | |
| "loss": 4.161, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.827831933504504e-05, | |
| "loss": 4.0868, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.822392184168154e-05, | |
| "loss": 4.0029, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.8169524348318026e-05, | |
| "loss": 3.9486, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.811512685495453e-05, | |
| "loss": 3.8743, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.8060729361591025e-05, | |
| "loss": 3.8206, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.8006331868227514e-05, | |
| "loss": 3.7676, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.795193437486401e-05, | |
| "loss": 3.7225, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.7897536881500505e-05, | |
| "loss": 3.6837, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.784341137560381e-05, | |
| "loss": 3.6421, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.778901388224031e-05, | |
| "loss": 3.6167, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.77346163888768e-05, | |
| "loss": 3.5802, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.76802188955133e-05, | |
| "loss": 3.5469, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.762582140214979e-05, | |
| "loss": 3.5208, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.7571423908786284e-05, | |
| "loss": 3.494, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.7517298402889596e-05, | |
| "loss": 3.4647, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.746290090952609e-05, | |
| "loss": 3.4417, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.740850341616259e-05, | |
| "loss": 3.4267, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.735410592279908e-05, | |
| "loss": 3.3992, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.729970842943557e-05, | |
| "loss": 3.3831, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.724531093607207e-05, | |
| "loss": 3.3647, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.7190913442708564e-05, | |
| "loss": 3.3377, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.7136515949345054e-05, | |
| "loss": 3.3197, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.708211845598155e-05, | |
| "loss": 3.2985, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.7027720962618045e-05, | |
| "loss": 3.287, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.697332346925454e-05, | |
| "loss": 3.2748, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.691892597589103e-05, | |
| "loss": 3.2557, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.6864528482527526e-05, | |
| "loss": 3.2419, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.681013098916402e-05, | |
| "loss": 3.2286, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.675573349580051e-05, | |
| "loss": 3.2102, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.670133600243701e-05, | |
| "loss": 3.1987, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.664693850907351e-05, | |
| "loss": 3.1854, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.659254101571e-05, | |
| "loss": 3.1682, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.6538143522346494e-05, | |
| "loss": 3.1562, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.648374602898299e-05, | |
| "loss": 3.1366, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.6429620523086296e-05, | |
| "loss": 3.1273, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.637522302972279e-05, | |
| "loss": 3.1139, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.632082553635929e-05, | |
| "loss": 3.1045, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.6266428042995777e-05, | |
| "loss": 3.0962, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.6212302537099096e-05, | |
| "loss": 3.0913, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.615790504373559e-05, | |
| "loss": 3.0805, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.610350755037208e-05, | |
| "loss": 3.0662, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.604911005700858e-05, | |
| "loss": 3.0485, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.599471256364507e-05, | |
| "loss": 3.0438, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.594031507028156e-05, | |
| "loss": 3.0368, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.588591757691806e-05, | |
| "loss": 3.0248, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.583152008355455e-05, | |
| "loss": 3.0124, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.577712259019105e-05, | |
| "loss": 3.0025, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.572272509682754e-05, | |
| "loss": 2.9902, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.5668327603464034e-05, | |
| "loss": 2.9838, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.561393011010053e-05, | |
| "loss": 2.9701, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.555953261673702e-05, | |
| "loss": 2.9594, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.5505135123373515e-05, | |
| "loss": 2.9549, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.545073763001001e-05, | |
| "loss": 2.9462, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.5396340136646506e-05, | |
| "loss": 2.9471, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.5341942643282996e-05, | |
| "loss": 2.9271, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.5287817137386315e-05, | |
| "loss": 2.9241, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.5233419644022804e-05, | |
| "loss": 2.9156, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.51790221506593e-05, | |
| "loss": 2.9079, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.5124624657295796e-05, | |
| "loss": 2.898, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.507022716393229e-05, | |
| "loss": 2.8902, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.501582967056878e-05, | |
| "loss": 2.8921, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.4961432177205276e-05, | |
| "loss": 2.8749, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.490703468384177e-05, | |
| "loss": 2.8724, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.485263719047826e-05, | |
| "loss": 2.8649, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.479823969711476e-05, | |
| "loss": 2.8586, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.474384220375125e-05, | |
| "loss": 2.8421, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.468944471038775e-05, | |
| "loss": 2.843, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.463504721702424e-05, | |
| "loss": 2.8397, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.4580649723660734e-05, | |
| "loss": 2.8311, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.452625223029723e-05, | |
| "loss": 2.8143, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 4.4471854736933725e-05, | |
| "loss": 2.8106, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 4.441745724357022e-05, | |
| "loss": 2.8199, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.436305975020672e-05, | |
| "loss": 2.8039, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 4.4308662256843206e-05, | |
| "loss": 2.7975, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 4.425453675094652e-05, | |
| "loss": 2.7903, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 4.4200139257583015e-05, | |
| "loss": 2.7907, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 4.4145741764219504e-05, | |
| "loss": 2.7836, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 4.4091344270856e-05, | |
| "loss": 2.7825, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 4.4036946777492495e-05, | |
| "loss": 2.765, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 4.3982549284128984e-05, | |
| "loss": 2.773, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 4.392815179076548e-05, | |
| "loss": 2.7608, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.3873754297401976e-05, | |
| "loss": 2.76, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.381935680403847e-05, | |
| "loss": 2.7613, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.376495931067497e-05, | |
| "loss": 2.7319, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 4.3710561817311464e-05, | |
| "loss": 2.7377, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.365616432394796e-05, | |
| "loss": 2.736, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.360176683058445e-05, | |
| "loss": 2.7348, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.3547369337220944e-05, | |
| "loss": 2.7285, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 4.349324383132426e-05, | |
| "loss": 2.7299, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 4.3438846337960746e-05, | |
| "loss": 2.7208, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 4.338444884459724e-05, | |
| "loss": 2.7115, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 4.333005135123374e-05, | |
| "loss": 2.7033, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 4.327565385787023e-05, | |
| "loss": 2.6996, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 4.322125636450672e-05, | |
| "loss": 2.6925, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 4.316685887114322e-05, | |
| "loss": 2.6896, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 4.3112461377779714e-05, | |
| "loss": 2.6846, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.30580638844162e-05, | |
| "loss": 2.6848, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 4.3003666391052706e-05, | |
| "loss": 2.6764, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 4.29492688976892e-05, | |
| "loss": 2.6846, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.289487140432569e-05, | |
| "loss": 2.6742, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 4.2840745898429003e-05, | |
| "loss": 2.6667, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 4.27863484050655e-05, | |
| "loss": 2.6632, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 4.273195091170199e-05, | |
| "loss": 2.6593, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.2677553418338484e-05, | |
| "loss": 2.6568, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 4.262315592497498e-05, | |
| "loss": 2.6514, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 4.256875843161147e-05, | |
| "loss": 2.648, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 4.2514360938247965e-05, | |
| "loss": 2.6362, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 4.245996344488446e-05, | |
| "loss": 2.6468, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.240556595152096e-05, | |
| "loss": 2.6268, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.2351168458157446e-05, | |
| "loss": 2.622, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 4.2297042952260765e-05, | |
| "loss": 2.6178, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 4.2242645458897254e-05, | |
| "loss": 2.6212, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 4.218824796553375e-05, | |
| "loss": 2.6161, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 4.2133850472170246e-05, | |
| "loss": 2.6139, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 4.2079452978806735e-05, | |
| "loss": 2.6165, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 4.202505548544323e-05, | |
| "loss": 2.6044, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.1970657992079727e-05, | |
| "loss": 2.6075, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 4.191626049871622e-05, | |
| "loss": 2.604, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.186186300535271e-05, | |
| "loss": 2.6026, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 4.180746551198921e-05, | |
| "loss": 2.6024, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 4.175334000609252e-05, | |
| "loss": 2.5888, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 4.1698942512729016e-05, | |
| "loss": 2.5914, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 4.164454501936551e-05, | |
| "loss": 2.592, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.159014752600201e-05, | |
| "loss": 2.5822, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 4.1535750032638496e-05, | |
| "loss": 2.5845, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 4.148135253927499e-05, | |
| "loss": 2.5731, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.142695504591149e-05, | |
| "loss": 2.5695, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 4.137255755254798e-05, | |
| "loss": 2.5682, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 4.131816005918447e-05, | |
| "loss": 2.5654, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.126376256582097e-05, | |
| "loss": 2.5641, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 4.1209365072457465e-05, | |
| "loss": 2.554, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 4.115523956656078e-05, | |
| "loss": 2.5569, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 4.110084207319727e-05, | |
| "loss": 2.5503, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 4.104644457983376e-05, | |
| "loss": 2.5554, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 4.099204708647026e-05, | |
| "loss": 2.552, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 4.0937649593106754e-05, | |
| "loss": 2.5564, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 4.088325209974325e-05, | |
| "loss": 2.5373, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 4.082885460637974e-05, | |
| "loss": 2.5377, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 4.0774457113016235e-05, | |
| "loss": 2.5404, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 4.072005961965273e-05, | |
| "loss": 2.5369, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 4.066566212628922e-05, | |
| "loss": 2.5352, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 4.0611808607859356e-05, | |
| "loss": 2.5284, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 4.0557411114495845e-05, | |
| "loss": 2.5308, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 4.050301362113234e-05, | |
| "loss": 2.5202, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 4.0448616127768836e-05, | |
| "loss": 2.5199, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 4.0394218634405325e-05, | |
| "loss": 2.5074, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 4.033982114104182e-05, | |
| "loss": 2.5086, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 4.028542364767832e-05, | |
| "loss": 2.5125, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 4.023102615431481e-05, | |
| "loss": 2.5082, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 4.01766286609513e-05, | |
| "loss": 2.4999, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 4.01222311675878e-05, | |
| "loss": 2.5073, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 4.0067833674224294e-05, | |
| "loss": 2.4998, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 4.001343618086078e-05, | |
| "loss": 2.4994, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.995903868749728e-05, | |
| "loss": 2.4952, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.9904641194133775e-05, | |
| "loss": 2.4914, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 3.985024370077027e-05, | |
| "loss": 2.4919, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.9795846207406766e-05, | |
| "loss": 2.4884, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.974144871404326e-05, | |
| "loss": 2.4886, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.968705122067976e-05, | |
| "loss": 2.4902, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.963265372731625e-05, | |
| "loss": 2.4784, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 3.957825623395274e-05, | |
| "loss": 2.479, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.9524130728056055e-05, | |
| "loss": 2.463, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 3.9469733234692544e-05, | |
| "loss": 2.4778, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.941533574132904e-05, | |
| "loss": 2.4758, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.9360938247965536e-05, | |
| "loss": 2.4627, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.9306540754602025e-05, | |
| "loss": 2.456, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.925214326123852e-05, | |
| "loss": 2.4624, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.919774576787502e-05, | |
| "loss": 2.4643, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.914334827451151e-05, | |
| "loss": 2.466, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.908895078114801e-05, | |
| "loss": 2.4556, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.9034553287784504e-05, | |
| "loss": 2.4566, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.898042778188781e-05, | |
| "loss": 2.4527, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.8926030288524306e-05, | |
| "loss": 2.452, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.88716327951608e-05, | |
| "loss": 2.439, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.88172353017973e-05, | |
| "loss": 2.4507, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.876283780843379e-05, | |
| "loss": 2.4393, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.870844031507028e-05, | |
| "loss": 2.4411, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.865404282170678e-05, | |
| "loss": 2.449, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.859964532834327e-05, | |
| "loss": 2.4413, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.854524783497976e-05, | |
| "loss": 2.4304, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.849085034161626e-05, | |
| "loss": 2.4276, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.843672483571957e-05, | |
| "loss": 2.4336, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 3.838232734235607e-05, | |
| "loss": 2.4269, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.8327929848992563e-05, | |
| "loss": 2.4291, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.827353235562905e-05, | |
| "loss": 2.4313, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.821913486226555e-05, | |
| "loss": 2.4198, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 3.8164737368902044e-05, | |
| "loss": 2.4152, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.811033987553853e-05, | |
| "loss": 2.4158, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.805594238217503e-05, | |
| "loss": 2.4107, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.8001544888811525e-05, | |
| "loss": 2.4144, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 3.794714739544802e-05, | |
| "loss": 2.4076, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.7893021889551333e-05, | |
| "loss": 2.4076, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.783862439618783e-05, | |
| "loss": 2.4016, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.7784498890291135e-05, | |
| "loss": 2.4178, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 3.773010139692763e-05, | |
| "loss": 2.4059, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 3.767570390356413e-05, | |
| "loss": 2.3955, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.7621306410200616e-05, | |
| "loss": 2.4031, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.756690891683711e-05, | |
| "loss": 2.4019, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.751251142347361e-05, | |
| "loss": 2.3981, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.74581139301101e-05, | |
| "loss": 2.3988, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.740371643674659e-05, | |
| "loss": 2.3848, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.734931894338309e-05, | |
| "loss": 2.3876, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.729492145001959e-05, | |
| "loss": 2.3849, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.724052395665608e-05, | |
| "loss": 2.3869, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.7186126463292576e-05, | |
| "loss": 2.3827, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.713172896992907e-05, | |
| "loss": 2.379, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 3.707733147656556e-05, | |
| "loss": 2.3768, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 3.7022933983202057e-05, | |
| "loss": 2.3795, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 3.696853648983855e-05, | |
| "loss": 2.3738, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 3.691413899647505e-05, | |
| "loss": 2.378, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 3.685974150311154e-05, | |
| "loss": 2.3671, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 3.680534400974803e-05, | |
| "loss": 2.3694, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 3.6751218503851346e-05, | |
| "loss": 2.3796, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 3.6696821010487835e-05, | |
| "loss": 2.3653, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 3.664242351712433e-05, | |
| "loss": 2.3676, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 3.6588026023760826e-05, | |
| "loss": 2.3658, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 3.653362853039732e-05, | |
| "loss": 2.3721, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 3.647923103703382e-05, | |
| "loss": 2.3668, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 3.6424833543670314e-05, | |
| "loss": 2.3639, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.63704360503068e-05, | |
| "loss": 2.3628, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 3.63160385569433e-05, | |
| "loss": 2.3688, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 3.6261641063579795e-05, | |
| "loss": 2.3577, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 3.6207243570216284e-05, | |
| "loss": 2.353, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 3.615284607685278e-05, | |
| "loss": 2.3509, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 3.6098448583489275e-05, | |
| "loss": 2.3409, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 3.604405109012577e-05, | |
| "loss": 2.3402, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 3.598965359676226e-05, | |
| "loss": 2.3542, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 3.5935256103398756e-05, | |
| "loss": 2.346, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 3.588085861003525e-05, | |
| "loss": 2.349, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 3.582646111667175e-05, | |
| "loss": 2.3462, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 3.577206362330824e-05, | |
| "loss": 2.3458, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 3.571766612994473e-05, | |
| "loss": 2.3351, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 3.5663540624048045e-05, | |
| "loss": 2.3336, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 3.560914313068454e-05, | |
| "loss": 2.3458, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 3.555474563732104e-05, | |
| "loss": 2.3354, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 3.550062013142434e-05, | |
| "loss": 2.3382, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.544622263806084e-05, | |
| "loss": 2.3381, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 3.5391825144697335e-05, | |
| "loss": 2.3287, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 3.5337427651333824e-05, | |
| "loss": 2.3267, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 3.528303015797032e-05, | |
| "loss": 2.3295, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 3.5228632664606815e-05, | |
| "loss": 2.3224, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 3.517423517124331e-05, | |
| "loss": 2.3151, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 3.511983767787981e-05, | |
| "loss": 2.3209, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 3.50654401845163e-05, | |
| "loss": 2.3215, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 3.50110426911528e-05, | |
| "loss": 2.3163, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 3.495664519778929e-05, | |
| "loss": 2.3137, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 3.4902247704425784e-05, | |
| "loss": 2.3109, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 3.484785021106228e-05, | |
| "loss": 2.3158, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 3.4793724705165585e-05, | |
| "loss": 2.3133, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 3.473932721180208e-05, | |
| "loss": 2.3127, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 3.468492971843858e-05, | |
| "loss": 2.3097, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 3.4630532225075066e-05, | |
| "loss": 2.3132, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.457613473171156e-05, | |
| "loss": 2.3049, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 3.452173723834806e-05, | |
| "loss": 2.3067, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 3.4467339744984553e-05, | |
| "loss": 2.3126, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.441294225162104e-05, | |
| "loss": 2.2959, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 3.4358544758257545e-05, | |
| "loss": 2.3025, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.430414726489404e-05, | |
| "loss": 2.2955, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 3.424974977153053e-05, | |
| "loss": 2.3055, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 3.4195352278167026e-05, | |
| "loss": 2.2957, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 3.414122677227033e-05, | |
| "loss": 2.2927, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 3.408682927890683e-05, | |
| "loss": 2.2961, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 3.4032431785543323e-05, | |
| "loss": 2.2937, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 3.397803429217982e-05, | |
| "loss": 2.2915, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 3.392363679881631e-05, | |
| "loss": 2.2915, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 3.386951129291963e-05, | |
| "loss": 2.29, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 3.381511379955612e-05, | |
| "loss": 2.2855, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 3.376071630619261e-05, | |
| "loss": 2.2918, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 3.370631881282911e-05, | |
| "loss": 2.2802, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 3.3651921319465604e-05, | |
| "loss": 2.2856, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 3.359752382610209e-05, | |
| "loss": 2.2877, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 3.354312633273859e-05, | |
| "loss": 2.2856, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 3.3488728839375085e-05, | |
| "loss": 2.2875, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 3.3434331346011574e-05, | |
| "loss": 2.2791, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 3.337993385264807e-05, | |
| "loss": 2.2777, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 3.3325536359284566e-05, | |
| "loss": 2.271, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 3.327113886592106e-05, | |
| "loss": 2.2785, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 3.321674137255755e-05, | |
| "loss": 2.2684, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 3.3162343879194047e-05, | |
| "loss": 2.2863, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 3.310794638583054e-05, | |
| "loss": 2.2815, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 3.305354889246703e-05, | |
| "loss": 2.2761, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 3.299915139910353e-05, | |
| "loss": 2.2672, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 3.294475390574003e-05, | |
| "loss": 2.2587, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 3.289035641237652e-05, | |
| "loss": 2.258, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 3.2835958919013015e-05, | |
| "loss": 2.2662, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 3.278156142564951e-05, | |
| "loss": 2.2605, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 3.2727163932286006e-05, | |
| "loss": 2.2608, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 3.267303842638931e-05, | |
| "loss": 2.2549, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 3.261864093302581e-05, | |
| "loss": 2.2667, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 3.2564243439662304e-05, | |
| "loss": 2.2601, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 3.250984594629879e-05, | |
| "loss": 2.2547, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 3.245544845293529e-05, | |
| "loss": 2.2547, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 3.2401050959571785e-05, | |
| "loss": 2.2552, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 3.2346653466208274e-05, | |
| "loss": 2.2498, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 3.229225597284477e-05, | |
| "loss": 2.2536, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 3.2237858479481265e-05, | |
| "loss": 2.2435, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 3.218346098611776e-05, | |
| "loss": 2.2544, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 3.212906349275426e-05, | |
| "loss": 2.2449, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 3.207466599939075e-05, | |
| "loss": 2.2506, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 3.202026850602725e-05, | |
| "loss": 2.2483, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 3.196587101266374e-05, | |
| "loss": 2.2417, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 3.191174550676705e-05, | |
| "loss": 2.2366, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 3.1857348013403546e-05, | |
| "loss": 2.2423, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 3.1802950520040035e-05, | |
| "loss": 2.2401, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 3.1748825014143355e-05, | |
| "loss": 2.245, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 3.1694427520779844e-05, | |
| "loss": 2.2359, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 3.164003002741634e-05, | |
| "loss": 2.2409, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 3.1585632534052835e-05, | |
| "loss": 2.2402, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 3.1531235040689325e-05, | |
| "loss": 2.2379, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 3.147683754732582e-05, | |
| "loss": 2.2365, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 3.1422440053962316e-05, | |
| "loss": 2.2368, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 3.136804256059881e-05, | |
| "loss": 2.2338, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 3.13136450672353e-05, | |
| "loss": 2.2456, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 3.12592475738718e-05, | |
| "loss": 2.2337, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 3.120485008050829e-05, | |
| "loss": 2.2337, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 3.115045258714478e-05, | |
| "loss": 2.2282, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 3.109605509378128e-05, | |
| "loss": 2.2292, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 3.1041657600417774e-05, | |
| "loss": 2.2282, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 3.098726010705427e-05, | |
| "loss": 2.2299, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 3.093286261369076e-05, | |
| "loss": 2.2131, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 3.0878465120327254e-05, | |
| "loss": 2.2223, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 3.082406762696375e-05, | |
| "loss": 2.2275, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 3.076967013360024e-05, | |
| "loss": 2.2156, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 3.071527264023674e-05, | |
| "loss": 2.2252, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 3.0661147134340054e-05, | |
| "loss": 2.2282, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 3.0606749640976543e-05, | |
| "loss": 2.2214, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 3.055235214761304e-05, | |
| "loss": 2.2244, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 3.0497954654249532e-05, | |
| "loss": 2.2149, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 3.0443557160886028e-05, | |
| "loss": 2.2203, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 3.038915966752252e-05, | |
| "loss": 2.216, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 3.0334762174159016e-05, | |
| "loss": 2.2148, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 3.028036468079551e-05, | |
| "loss": 2.2145, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 3.0225967187432004e-05, | |
| "loss": 2.2172, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 3.0171569694068497e-05, | |
| "loss": 2.2165, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 3.011717220070499e-05, | |
| "loss": 2.2186, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 3.0063046694808305e-05, | |
| "loss": 2.2044, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 3.00086492014448e-05, | |
| "loss": 2.2032, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 2.9954251708081293e-05, | |
| "loss": 2.2066, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 2.9899854214717786e-05, | |
| "loss": 2.2019, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 2.984545672135428e-05, | |
| "loss": 2.2085, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 2.9791059227990774e-05, | |
| "loss": 2.1975, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 2.973666173462727e-05, | |
| "loss": 2.1989, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 2.9682264241263762e-05, | |
| "loss": 2.2016, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 2.9627866747900258e-05, | |
| "loss": 2.1911, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 2.957346925453675e-05, | |
| "loss": 2.2, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 2.9519343748640067e-05, | |
| "loss": 2.2038, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 2.9465218242743376e-05, | |
| "loss": 2.1882, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 2.941082074937987e-05, | |
| "loss": 2.1936, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 2.9356423256016364e-05, | |
| "loss": 2.1979, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 2.9302025762652857e-05, | |
| "loss": 2.2051, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 2.9247628269289352e-05, | |
| "loss": 2.1915, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 2.9193230775925845e-05, | |
| "loss": 2.1869, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 2.9138833282562337e-05, | |
| "loss": 2.1824, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 2.9084435789198833e-05, | |
| "loss": 2.1974, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 2.9030038295835326e-05, | |
| "loss": 2.1854, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 2.8975640802471825e-05, | |
| "loss": 2.1844, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 2.892124330910832e-05, | |
| "loss": 2.1883, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 2.886711780321163e-05, | |
| "loss": 2.1925, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 2.8812720309848122e-05, | |
| "loss": 2.186, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 2.8758322816484618e-05, | |
| "loss": 2.1865, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 2.870392532312111e-05, | |
| "loss": 2.1837, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 2.8649527829757607e-05, | |
| "loss": 2.1836, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 2.85951303363941e-05, | |
| "loss": 2.1867, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 2.8540732843030595e-05, | |
| "loss": 2.1811, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 2.8486335349667087e-05, | |
| "loss": 2.1695, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 2.843193785630358e-05, | |
| "loss": 2.187, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 2.8377540362940076e-05, | |
| "loss": 2.1767, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 2.8323142869576568e-05, | |
| "loss": 2.1827, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 2.8268745376213064e-05, | |
| "loss": 2.1835, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 2.8214347882849563e-05, | |
| "loss": 2.164, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 2.8159950389486056e-05, | |
| "loss": 2.1696, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 2.810555289612255e-05, | |
| "loss": 2.1812, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 2.8051155402759044e-05, | |
| "loss": 2.1768, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 2.7996757909395536e-05, | |
| "loss": 2.1825, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 2.794263240349885e-05, | |
| "loss": 2.1763, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 2.788823491013534e-05, | |
| "loss": 2.1698, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 2.7833837416771834e-05, | |
| "loss": 2.1745, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 2.777943992340833e-05, | |
| "loss": 2.1675, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 2.7725042430044822e-05, | |
| "loss": 2.1661, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 2.7670644936681318e-05, | |
| "loss": 2.1603, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 2.761624744331781e-05, | |
| "loss": 2.1612, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 2.7561849949954306e-05, | |
| "loss": 2.1667, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 2.7507452456590805e-05, | |
| "loss": 2.1625, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 2.7453054963227298e-05, | |
| "loss": 2.1751, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 2.7398657469863794e-05, | |
| "loss": 2.163, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 2.7344259976500286e-05, | |
| "loss": 2.1606, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 2.728986248313678e-05, | |
| "loss": 2.1623, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 2.7235464989773274e-05, | |
| "loss": 2.1648, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 2.7181067496409767e-05, | |
| "loss": 2.1607, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 2.7126670003046263e-05, | |
| "loss": 2.1561, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 2.7072272509682755e-05, | |
| "loss": 2.1591, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 2.701787501631925e-05, | |
| "loss": 2.1596, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 2.6963477522955744e-05, | |
| "loss": 2.1524, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 2.6909080029592236e-05, | |
| "loss": 2.1508, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 2.6854682536228732e-05, | |
| "loss": 2.1607, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 2.6800285042865224e-05, | |
| "loss": 2.1485, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 2.674615953696854e-05, | |
| "loss": 2.1467, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 2.6691762043605033e-05, | |
| "loss": 2.1526, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 2.663736455024153e-05, | |
| "loss": 2.1489, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 2.658296705687802e-05, | |
| "loss": 2.1519, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 2.6528569563514517e-05, | |
| "loss": 2.1482, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 2.647417207015101e-05, | |
| "loss": 2.1503, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 2.6419774576787505e-05, | |
| "loss": 2.1474, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.6365377083423998e-05, | |
| "loss": 2.1475, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 2.631097959006049e-05, | |
| "loss": 2.1487, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 2.6256582096696986e-05, | |
| "loss": 2.1527, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 2.620218460333348e-05, | |
| "loss": 2.1438, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 2.6147787109969974e-05, | |
| "loss": 2.148, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 2.6093389616606467e-05, | |
| "loss": 2.1478, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 2.6038992123242962e-05, | |
| "loss": 2.1505, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.5984866617346275e-05, | |
| "loss": 2.1491, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 2.593046912398277e-05, | |
| "loss": 2.143, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 2.5876071630619263e-05, | |
| "loss": 2.1388, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 2.582167413725576e-05, | |
| "loss": 2.1433, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 2.576727664389225e-05, | |
| "loss": 2.1269, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 2.5712879150528747e-05, | |
| "loss": 2.1401, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 2.565848165716524e-05, | |
| "loss": 2.1382, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 2.5604084163801732e-05, | |
| "loss": 2.1371, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.5549686670438228e-05, | |
| "loss": 2.1347, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 2.5495561164541537e-05, | |
| "loss": 2.1407, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 2.544116367117803e-05, | |
| "loss": 2.1365, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 2.5386766177814526e-05, | |
| "loss": 2.1354, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 2.5332368684451025e-05, | |
| "loss": 2.1278, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 2.5277971191087517e-05, | |
| "loss": 2.1357, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 2.5223573697724013e-05, | |
| "loss": 2.1337, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 2.5169176204360506e-05, | |
| "loss": 2.1335, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 2.5114778710997e-05, | |
| "loss": 2.1302, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 2.5060381217633494e-05, | |
| "loss": 2.1351, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 2.5005983724269986e-05, | |
| "loss": 2.1346, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 2.4951586230906482e-05, | |
| "loss": 2.1307, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 2.4897188737542975e-05, | |
| "loss": 2.1222, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 2.484279124417947e-05, | |
| "loss": 2.1215, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 2.4788393750815963e-05, | |
| "loss": 2.1201, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 2.473399625745246e-05, | |
| "loss": 2.1307, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 2.467959876408895e-05, | |
| "loss": 2.1311, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 2.4625201270725444e-05, | |
| "loss": 2.1161, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 2.457080377736194e-05, | |
| "loss": 2.1259, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 2.4516406283998432e-05, | |
| "loss": 2.1199, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 2.446200879063493e-05, | |
| "loss": 2.1216, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 2.4407611297271424e-05, | |
| "loss": 2.1156, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 2.4353213803907916e-05, | |
| "loss": 2.1243, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 2.4298816310544412e-05, | |
| "loss": 2.1209, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 2.4244418817180905e-05, | |
| "loss": 2.1188, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 2.41900213238174e-05, | |
| "loss": 2.1206, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 2.4135623830453893e-05, | |
| "loss": 2.1195, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 2.408122633709039e-05, | |
| "loss": 2.1218, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 2.402682884372688e-05, | |
| "loss": 2.1098, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 2.3972703337830194e-05, | |
| "loss": 2.1126, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 2.3918305844466686e-05, | |
| "loss": 2.1132, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 2.3863908351103182e-05, | |
| "loss": 2.1141, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 2.3809510857739674e-05, | |
| "loss": 2.1059, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 2.375511336437617e-05, | |
| "loss": 2.1057, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 2.3700715871012666e-05, | |
| "loss": 2.1116, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 2.364631837764916e-05, | |
| "loss": 2.1138, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 2.3591920884285654e-05, | |
| "loss": 2.1129, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 2.3537523390922147e-05, | |
| "loss": 2.1108, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 2.3483125897558643e-05, | |
| "loss": 2.1146, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 2.3429000391661955e-05, | |
| "loss": 2.1102, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 2.3374602898298448e-05, | |
| "loss": 2.1138, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 2.332020540493494e-05, | |
| "loss": 2.1043, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 2.3265807911571436e-05, | |
| "loss": 2.1063, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 2.321141041820793e-05, | |
| "loss": 2.1108, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 2.3157012924844424e-05, | |
| "loss": 2.1106, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 2.3102615431480917e-05, | |
| "loss": 2.1117, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 2.3048217938117413e-05, | |
| "loss": 2.0953, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 2.299382044475391e-05, | |
| "loss": 2.1012, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 2.29394229513904e-05, | |
| "loss": 2.1044, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 2.2885297445493714e-05, | |
| "loss": 2.1023, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 2.283089995213021e-05, | |
| "loss": 2.1068, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 2.2776502458766702e-05, | |
| "loss": 2.1076, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 2.2722104965403194e-05, | |
| "loss": 2.1024, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 2.266770747203969e-05, | |
| "loss": 2.1007, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 2.2613309978676183e-05, | |
| "loss": 2.0993, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 2.255891248531268e-05, | |
| "loss": 2.0964, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 2.250451499194917e-05, | |
| "loss": 2.0933, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 2.2450117498585667e-05, | |
| "loss": 2.0945, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 2.239572000522216e-05, | |
| "loss": 2.0974, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 2.2341322511858655e-05, | |
| "loss": 2.0992, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.228692501849515e-05, | |
| "loss": 2.0913, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 2.223279951259846e-05, | |
| "loss": 2.096, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 2.2178402019234952e-05, | |
| "loss": 2.094, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 2.2124004525871452e-05, | |
| "loss": 2.0944, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 2.2069607032507944e-05, | |
| "loss": 2.0895, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 2.2015209539144437e-05, | |
| "loss": 2.0886, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 2.1960812045780932e-05, | |
| "loss": 2.095, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 2.1906414552417425e-05, | |
| "loss": 2.0854, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 2.185201705905392e-05, | |
| "loss": 2.0868, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 2.1797619565690413e-05, | |
| "loss": 2.0803, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 2.174322207232691e-05, | |
| "loss": 2.0918, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.16888245789634e-05, | |
| "loss": 2.0931, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 2.1634427085599894e-05, | |
| "loss": 2.0881, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 2.1580301579703207e-05, | |
| "loss": 2.0856, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 2.152617607380652e-05, | |
| "loss": 2.0818, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 2.1471778580443015e-05, | |
| "loss": 2.0788, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 2.1417381087079507e-05, | |
| "loss": 2.0836, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 2.1362983593716003e-05, | |
| "loss": 2.0898, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 2.1308586100352496e-05, | |
| "loss": 2.0803, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 2.125418860698899e-05, | |
| "loss": 2.081, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 2.1199791113625487e-05, | |
| "loss": 2.0859, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 2.114539362026198e-05, | |
| "loss": 2.0826, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 2.1090996126898476e-05, | |
| "loss": 2.0883, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 2.1036598633534968e-05, | |
| "loss": 2.0802, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 2.098220114017146e-05, | |
| "loss": 2.0868, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 2.0927803646807956e-05, | |
| "loss": 2.0827, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 2.087340615344445e-05, | |
| "loss": 2.0842, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 2.0819008660080945e-05, | |
| "loss": 2.0783, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 2.0764611166717437e-05, | |
| "loss": 2.0809, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 2.0710213673353933e-05, | |
| "loss": 2.0844, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 2.065581617999043e-05, | |
| "loss": 2.0746, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 2.060141868662692e-05, | |
| "loss": 2.0785, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 2.0547021193263417e-05, | |
| "loss": 2.0767, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 2.049262369989991e-05, | |
| "loss": 2.0763, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 2.0438498194003222e-05, | |
| "loss": 2.0837, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 2.038437268810653e-05, | |
| "loss": 2.0736, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 2.0329975194743027e-05, | |
| "loss": 2.0787, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 2.0275577701379523e-05, | |
| "loss": 2.084, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 2.0221180208016016e-05, | |
| "loss": 2.0804, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 2.016678271465251e-05, | |
| "loss": 2.0657, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 2.0112385221289004e-05, | |
| "loss": 2.0731, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 2.0057987727925496e-05, | |
| "loss": 2.0769, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 2.0003590234561992e-05, | |
| "loss": 2.0734, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 1.9949192741198485e-05, | |
| "loss": 2.0751, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 1.989479524783498e-05, | |
| "loss": 2.071, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 1.9840397754471473e-05, | |
| "loss": 2.075, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 1.978600026110797e-05, | |
| "loss": 2.0692, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 1.9731602767744465e-05, | |
| "loss": 2.0671, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 1.9677477261847774e-05, | |
| "loss": 2.0654, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 1.962307976848427e-05, | |
| "loss": 2.0612, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 1.9568682275120765e-05, | |
| "loss": 2.0703, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 1.9514284781757258e-05, | |
| "loss": 2.0644, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 1.9459887288393754e-05, | |
| "loss": 2.0662, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 1.9405489795030246e-05, | |
| "loss": 2.0652, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 1.935109230166674e-05, | |
| "loss": 2.0661, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 1.9296694808303234e-05, | |
| "loss": 2.0674, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 1.9242297314939727e-05, | |
| "loss": 2.0652, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 1.9187899821576223e-05, | |
| "loss": 2.0598, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 1.9133502328212715e-05, | |
| "loss": 2.0655, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 1.907910483484921e-05, | |
| "loss": 2.0605, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 1.9024707341485707e-05, | |
| "loss": 2.0619, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 1.89703098481222e-05, | |
| "loss": 2.0631, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 1.8915912354758695e-05, | |
| "loss": 2.0678, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 1.8861514861395188e-05, | |
| "loss": 2.0584, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 1.8807117368031683e-05, | |
| "loss": 2.0461, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 1.8752719874668176e-05, | |
| "loss": 2.0631, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 1.869832238130467e-05, | |
| "loss": 2.0695, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 1.8643924887941164e-05, | |
| "loss": 2.0574, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 1.8589527394577657e-05, | |
| "loss": 2.0504, | |
| "step": 122200 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 1.853540188868097e-05, | |
| "loss": 2.0567, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 1.8481004395317465e-05, | |
| "loss": 2.0592, | |
| "step": 122600 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 1.8426606901953958e-05, | |
| "loss": 2.0591, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 1.837220940859045e-05, | |
| "loss": 2.0552, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 1.831781191522695e-05, | |
| "loss": 2.0593, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 1.8263414421863442e-05, | |
| "loss": 2.0537, | |
| "step": 123400 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 1.8209016928499938e-05, | |
| "loss": 2.0523, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 1.815461943513643e-05, | |
| "loss": 2.0497, | |
| "step": 123800 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 1.8100221941772922e-05, | |
| "loss": 2.0571, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 1.8045824448409418e-05, | |
| "loss": 2.0573, | |
| "step": 124200 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 1.799142695504591e-05, | |
| "loss": 2.0526, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 1.7937029461682407e-05, | |
| "loss": 2.0604, | |
| "step": 124600 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 1.78826319683189e-05, | |
| "loss": 2.0436, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 1.7828234474955395e-05, | |
| "loss": 2.0445, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 1.7774108969058707e-05, | |
| "loss": 2.0484, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 1.77197114756952e-05, | |
| "loss": 2.0561, | |
| "step": 125400 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 1.7665585969798512e-05, | |
| "loss": 2.0471, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 1.7611188476435005e-05, | |
| "loss": 2.064, | |
| "step": 125800 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 1.75567909830715e-05, | |
| "loss": 2.0574, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 1.7502393489707993e-05, | |
| "loss": 2.0487, | |
| "step": 126200 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 1.744799599634449e-05, | |
| "loss": 2.0502, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 1.7393598502980985e-05, | |
| "loss": 2.0401, | |
| "step": 126600 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 1.7339201009617477e-05, | |
| "loss": 2.0351, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 1.7284803516253973e-05, | |
| "loss": 2.0526, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 1.7230406022890466e-05, | |
| "loss": 2.0423, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 1.717600852952696e-05, | |
| "loss": 2.0438, | |
| "step": 127400 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 1.7121611036163454e-05, | |
| "loss": 2.0423, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 1.7067213542799946e-05, | |
| "loss": 2.0511, | |
| "step": 127800 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 1.7012816049436442e-05, | |
| "loss": 2.0478, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 1.6958418556072935e-05, | |
| "loss": 2.0422, | |
| "step": 128200 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 1.6904021062709434e-05, | |
| "loss": 2.0438, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 1.6849623569345926e-05, | |
| "loss": 2.0421, | |
| "step": 128600 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 1.679522607598242e-05, | |
| "loss": 2.0399, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 1.6740828582618915e-05, | |
| "loss": 2.0455, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 1.6686431089255407e-05, | |
| "loss": 2.0355, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 1.6632033595891903e-05, | |
| "loss": 2.04, | |
| "step": 129400 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 1.6577636102528395e-05, | |
| "loss": 2.0416, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 1.652323860916489e-05, | |
| "loss": 2.0373, | |
| "step": 129800 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 1.6468841115801384e-05, | |
| "loss": 2.0423, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 1.6414443622437876e-05, | |
| "loss": 2.0354, | |
| "step": 130200 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 1.6360046129074372e-05, | |
| "loss": 2.0343, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 1.6305920623177685e-05, | |
| "loss": 2.0327, | |
| "step": 130600 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 1.6251523129814177e-05, | |
| "loss": 2.0362, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 1.6197125636450673e-05, | |
| "loss": 2.036, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 1.614272814308717e-05, | |
| "loss": 2.0384, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 1.608833064972366e-05, | |
| "loss": 2.0379, | |
| "step": 131400 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 1.6033933156360157e-05, | |
| "loss": 2.043, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 1.597953566299665e-05, | |
| "loss": 2.0316, | |
| "step": 131800 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 1.5925138169633145e-05, | |
| "loss": 2.0292, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 1.5870740676269638e-05, | |
| "loss": 2.0309, | |
| "step": 132200 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 1.5816343182906134e-05, | |
| "loss": 2.0392, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 1.5761945689542626e-05, | |
| "loss": 2.033, | |
| "step": 132600 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 1.570754819617912e-05, | |
| "loss": 2.0329, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 1.565342269028243e-05, | |
| "loss": 2.0324, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 1.5599025196918927e-05, | |
| "loss": 2.0286, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.554462770355542e-05, | |
| "loss": 2.0356, | |
| "step": 133400 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 1.5490502197658732e-05, | |
| "loss": 2.0315, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 1.5436104704295224e-05, | |
| "loss": 2.0332, | |
| "step": 133800 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 1.538170721093172e-05, | |
| "loss": 2.0236, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 1.5327309717568213e-05, | |
| "loss": 2.0283, | |
| "step": 134200 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 1.5272912224204712e-05, | |
| "loss": 2.0349, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 1.5218514730841204e-05, | |
| "loss": 2.0285, | |
| "step": 134600 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 1.5164117237477699e-05, | |
| "loss": 2.0336, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 1.5109719744114193e-05, | |
| "loss": 2.0387, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 1.5055322250750687e-05, | |
| "loss": 2.0346, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 1.5000924757387181e-05, | |
| "loss": 2.0271, | |
| "step": 135400 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 1.4946527264023673e-05, | |
| "loss": 2.0259, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 1.4892129770660168e-05, | |
| "loss": 2.0291, | |
| "step": 135800 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 1.4837732277296662e-05, | |
| "loss": 2.0281, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 1.4783606771399974e-05, | |
| "loss": 2.0334, | |
| "step": 136200 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 1.4729209278036469e-05, | |
| "loss": 2.033, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 1.4674811784672963e-05, | |
| "loss": 2.0291, | |
| "step": 136600 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 1.4620414291309457e-05, | |
| "loss": 2.0238, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 1.456601679794595e-05, | |
| "loss": 2.021, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.4511619304582447e-05, | |
| "loss": 2.0198, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 1.4457221811218941e-05, | |
| "loss": 2.0177, | |
| "step": 137400 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 1.4402824317855435e-05, | |
| "loss": 2.0238, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.434842682449193e-05, | |
| "loss": 2.0285, | |
| "step": 137800 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 1.4294029331128422e-05, | |
| "loss": 2.0242, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 1.4239631837764916e-05, | |
| "loss": 2.0165, | |
| "step": 138200 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 1.418523434440141e-05, | |
| "loss": 2.0177, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.4130836851037904e-05, | |
| "loss": 2.0275, | |
| "step": 138600 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 1.4076439357674398e-05, | |
| "loss": 2.0221, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 1.4022041864310892e-05, | |
| "loss": 2.0234, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 1.3967644370947388e-05, | |
| "loss": 2.0225, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 1.3913246877583882e-05, | |
| "loss": 2.0195, | |
| "step": 139400 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 1.3858849384220377e-05, | |
| "loss": 2.0142, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 1.380445189085687e-05, | |
| "loss": 2.0224, | |
| "step": 139800 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.3750054397493365e-05, | |
| "loss": 2.0261, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 1.3695656904129859e-05, | |
| "loss": 2.019, | |
| "step": 140200 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 1.3641259410766351e-05, | |
| "loss": 2.013, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 1.3586861917402846e-05, | |
| "loss": 2.0101, | |
| "step": 140600 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 1.353246442403934e-05, | |
| "loss": 2.0264, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 1.3478066930675834e-05, | |
| "loss": 2.0021, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 1.3423941424779146e-05, | |
| "loss": 2.008, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 1.336954393141564e-05, | |
| "loss": 2.0192, | |
| "step": 141400 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 1.3315146438052135e-05, | |
| "loss": 2.0169, | |
| "step": 141600 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 1.326074894468863e-05, | |
| "loss": 2.0168, | |
| "step": 141800 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 1.3206351451325125e-05, | |
| "loss": 2.0171, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 1.3151953957961619e-05, | |
| "loss": 2.0233, | |
| "step": 142200 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 1.3097556464598113e-05, | |
| "loss": 2.0167, | |
| "step": 142400 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 1.3043158971234607e-05, | |
| "loss": 2.02, | |
| "step": 142600 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 1.29887614778711e-05, | |
| "loss": 2.0108, | |
| "step": 142800 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 1.2934363984507594e-05, | |
| "loss": 2.0147, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 1.2879966491144088e-05, | |
| "loss": 2.0162, | |
| "step": 143200 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 1.28258409852474e-05, | |
| "loss": 2.0203, | |
| "step": 143400 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 1.2771443491883895e-05, | |
| "loss": 2.0186, | |
| "step": 143600 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 1.2717045998520389e-05, | |
| "loss": 2.0121, | |
| "step": 143800 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 1.2662648505156883e-05, | |
| "loss": 2.0089, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 1.2608251011793375e-05, | |
| "loss": 2.0045, | |
| "step": 144200 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 1.255385351842987e-05, | |
| "loss": 2.0085, | |
| "step": 144400 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 1.2499456025066365e-05, | |
| "loss": 2.0065, | |
| "step": 144600 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 1.244505853170286e-05, | |
| "loss": 2.019, | |
| "step": 144800 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 1.2390661038339354e-05, | |
| "loss": 2.0117, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 1.2336263544975848e-05, | |
| "loss": 2.0161, | |
| "step": 145200 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 1.2281866051612342e-05, | |
| "loss": 2.0129, | |
| "step": 145400 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 1.2227468558248836e-05, | |
| "loss": 2.0018, | |
| "step": 145600 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 1.217307106488533e-05, | |
| "loss": 2.0042, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 1.2118673571521826e-05, | |
| "loss": 2.0114, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 1.206427607815832e-05, | |
| "loss": 2.0042, | |
| "step": 146200 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 1.2009878584794813e-05, | |
| "loss": 2.0093, | |
| "step": 146400 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 1.1955481091431307e-05, | |
| "loss": 1.9995, | |
| "step": 146600 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 1.1901083598067801e-05, | |
| "loss": 2.0157, | |
| "step": 146800 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 1.1846686104704295e-05, | |
| "loss": 2.0057, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 1.1792288611340791e-05, | |
| "loss": 2.0072, | |
| "step": 147200 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 1.1738163105444102e-05, | |
| "loss": 1.9961, | |
| "step": 147400 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 1.1683765612080596e-05, | |
| "loss": 2.012, | |
| "step": 147600 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 1.1629640106183907e-05, | |
| "loss": 2.0102, | |
| "step": 147800 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 1.157551460028722e-05, | |
| "loss": 2.0096, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 1.1521117106923714e-05, | |
| "loss": 1.9972, | |
| "step": 148200 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 1.1466719613560208e-05, | |
| "loss": 2.0025, | |
| "step": 148400 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 1.1412322120196702e-05, | |
| "loss": 1.9985, | |
| "step": 148600 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 1.1357924626833196e-05, | |
| "loss": 2.001, | |
| "step": 148800 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 1.130352713346969e-05, | |
| "loss": 2.0019, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 1.1249129640106184e-05, | |
| "loss": 1.9946, | |
| "step": 149200 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 1.1194732146742679e-05, | |
| "loss": 1.9935, | |
| "step": 149400 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 1.1140334653379173e-05, | |
| "loss": 2.002, | |
| "step": 149600 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 1.1085937160015667e-05, | |
| "loss": 2.0009, | |
| "step": 149800 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 1.1031539666652161e-05, | |
| "loss": 1.997, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 1.0977142173288655e-05, | |
| "loss": 2.0059, | |
| "step": 150200 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 1.092274467992515e-05, | |
| "loss": 2.0007, | |
| "step": 150400 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 1.0868347186561643e-05, | |
| "loss": 2.0027, | |
| "step": 150600 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 1.0813949693198138e-05, | |
| "loss": 1.9926, | |
| "step": 150800 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 1.0759552199834633e-05, | |
| "loss": 2.001, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 1.0705154706471126e-05, | |
| "loss": 2.001, | |
| "step": 151200 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 1.0651029200574437e-05, | |
| "loss": 2.001, | |
| "step": 151400 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 1.0596631707210933e-05, | |
| "loss": 1.9974, | |
| "step": 151600 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 1.0542234213847427e-05, | |
| "loss": 2.0024, | |
| "step": 151800 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 1.0487836720483921e-05, | |
| "loss": 2.0063, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 1.0433439227120415e-05, | |
| "loss": 1.9962, | |
| "step": 152200 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1.037904173375691e-05, | |
| "loss": 2.0064, | |
| "step": 152400 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "learning_rate": 1.0324644240393403e-05, | |
| "loss": 1.9966, | |
| "step": 152600 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 1.0270246747029898e-05, | |
| "loss": 1.9983, | |
| "step": 152800 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "learning_rate": 1.0215849253666392e-05, | |
| "loss": 1.9881, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 1.0161451760302886e-05, | |
| "loss": 1.9954, | |
| "step": 153200 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 1.010705426693938e-05, | |
| "loss": 2.0016, | |
| "step": 153400 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 1.0052656773575874e-05, | |
| "loss": 1.9971, | |
| "step": 153600 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 9.998259280212368e-06, | |
| "loss": 1.9996, | |
| "step": 153800 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 9.943861786848862e-06, | |
| "loss": 2.0017, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 9.889464293485357e-06, | |
| "loss": 2.0009, | |
| "step": 154200 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 9.83506680012185e-06, | |
| "loss": 1.996, | |
| "step": 154400 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 9.780669306758345e-06, | |
| "loss": 1.99, | |
| "step": 154600 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 9.726271813394839e-06, | |
| "loss": 1.9913, | |
| "step": 154800 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 9.671874320031333e-06, | |
| "loss": 1.9959, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 9.617476826667827e-06, | |
| "loss": 1.9928, | |
| "step": 155200 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 9.563079333304321e-06, | |
| "loss": 1.994, | |
| "step": 155400 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 9.508681839940816e-06, | |
| "loss": 1.9984, | |
| "step": 155600 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 9.454284346577311e-06, | |
| "loss": 1.9942, | |
| "step": 155800 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "learning_rate": 9.399886853213804e-06, | |
| "loss": 1.9934, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 9.345489359850298e-06, | |
| "loss": 1.989, | |
| "step": 156200 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 9.291091866486792e-06, | |
| "loss": 1.9921, | |
| "step": 156400 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 9.236694373123286e-06, | |
| "loss": 1.9912, | |
| "step": 156600 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 9.182296879759782e-06, | |
| "loss": 1.9819, | |
| "step": 156800 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "learning_rate": 9.127899386396276e-06, | |
| "loss": 1.9904, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 9.073501893032769e-06, | |
| "loss": 1.9876, | |
| "step": 157200 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 9.019376387136081e-06, | |
| "loss": 1.9904, | |
| "step": 157400 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 8.964978893772575e-06, | |
| "loss": 1.9938, | |
| "step": 157600 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 8.910853387875886e-06, | |
| "loss": 1.9868, | |
| "step": 157800 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 8.856455894512382e-06, | |
| "loss": 1.9839, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 8.802058401148876e-06, | |
| "loss": 1.9929, | |
| "step": 158200 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 8.74766090778537e-06, | |
| "loss": 1.9819, | |
| "step": 158400 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 8.693263414421863e-06, | |
| "loss": 1.9854, | |
| "step": 158600 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 8.638865921058357e-06, | |
| "loss": 1.9909, | |
| "step": 158800 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 8.584468427694853e-06, | |
| "loss": 1.9958, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 8.530070934331347e-06, | |
| "loss": 1.9827, | |
| "step": 159200 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 8.475673440967841e-06, | |
| "loss": 1.9871, | |
| "step": 159400 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 8.421275947604335e-06, | |
| "loss": 1.9849, | |
| "step": 159600 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 8.366878454240828e-06, | |
| "loss": 1.9951, | |
| "step": 159800 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 8.312480960877324e-06, | |
| "loss": 1.9809, | |
| "step": 160000 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 190500, | |
| "num_train_epochs": 10, | |
| "save_steps": 40000, | |
| "total_flos": 8.08352616504361e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |