{ "best_global_step": 100, "best_metric": 0.0743941143155098, "best_model_checkpoint": "/content/models/gemma_qlora_lmh_inst/checkpoint-100", "epoch": 2.0, "eval_steps": 20, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 2.200686830282211, "epoch": 0.40404040404040403, "grad_norm": 2.258388042449951, "learning_rate": 8.1e-06, "loss": 0.1306, "mean_token_accuracy": 0.9350446417927742, "num_tokens": 237653.0, "step": 20 }, { "epoch": 0.40404040404040403, "eval_entropy": 2.201928762289194, "eval_loss": 0.0838593915104866, "eval_mean_token_accuracy": 0.9620879109089191, "eval_num_tokens": 237653.0, "eval_runtime": 5.7826, "eval_samples_per_second": 34.068, "eval_steps_per_second": 2.248, "step": 20 }, { "entropy": 2.203989064693451, "epoch": 0.8080808080808081, "grad_norm": 3.0667307376861572, "learning_rate": 6.1e-06, "loss": 0.0797, "mean_token_accuracy": 0.9622767880558968, "num_tokens": 473622.0, "step": 40 }, { "epoch": 0.8080808080808081, "eval_entropy": 2.2022548638857327, "eval_loss": 0.07604048401117325, "eval_mean_token_accuracy": 0.9581043995343722, "eval_num_tokens": 473622.0, "eval_runtime": 5.7858, "eval_samples_per_second": 34.049, "eval_steps_per_second": 2.247, "step": 40 }, { "entropy": 2.2022441717294545, "epoch": 1.202020202020202, "grad_norm": 2.590899705886841, "learning_rate": 4.1e-06, "loss": 0.0656, "mean_token_accuracy": 0.9664224661313571, "num_tokens": 702354.0, "step": 60 }, { "epoch": 1.202020202020202, "eval_entropy": 2.1788861201359677, "eval_loss": 0.08057427406311035, "eval_mean_token_accuracy": 0.9635989024088933, "eval_num_tokens": 702354.0, "eval_runtime": 5.7897, "eval_samples_per_second": 34.026, "eval_steps_per_second": 2.245, "step": 60 }, { "entropy": 2.19284747838974, "epoch": 1.606060606060606, "grad_norm": 2.1625924110412598, "learning_rate": 2.1000000000000002e-06, "loss": 0.0579, "mean_token_accuracy": 0.9754464238882065, "num_tokens": 937654.0, "step": 80 }, { "epoch": 1.606060606060606, "eval_entropy": 2.178425770539504, "eval_loss": 0.07602041214704514, "eval_mean_token_accuracy": 0.965659343279325, "eval_num_tokens": 937654.0, "eval_runtime": 5.786, "eval_samples_per_second": 34.047, "eval_steps_per_second": 2.247, "step": 80 }, { "entropy": 2.1867658969683523, "epoch": 2.0, "grad_norm": 1.2462379932403564, "learning_rate": 1.0000000000000001e-07, "loss": 0.0512, "mean_token_accuracy": 0.9764194106444334, "num_tokens": 1165538.0, "step": 100 }, { "epoch": 2.0, "eval_entropy": 2.181581442172711, "eval_loss": 0.0743941143155098, "eval_mean_token_accuracy": 0.963598906993866, "eval_num_tokens": 1165538.0, "eval_runtime": 5.7817, "eval_samples_per_second": 34.073, "eval_steps_per_second": 2.248, "step": 100 } ], "logging_steps": 20, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9940993720561664e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }