sivamuthusamy's picture
Upload folder using huggingface_hub
ce23912 verified
{
"best_global_step": 100,
"best_metric": 0.0743941143155098,
"best_model_checkpoint": "/content/models/gemma_qlora_lmh_inst/checkpoint-100",
"epoch": 2.0,
"eval_steps": 20,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 2.200686830282211,
"epoch": 0.40404040404040403,
"grad_norm": 2.258388042449951,
"learning_rate": 8.1e-06,
"loss": 0.1306,
"mean_token_accuracy": 0.9350446417927742,
"num_tokens": 237653.0,
"step": 20
},
{
"epoch": 0.40404040404040403,
"eval_entropy": 2.201928762289194,
"eval_loss": 0.0838593915104866,
"eval_mean_token_accuracy": 0.9620879109089191,
"eval_num_tokens": 237653.0,
"eval_runtime": 5.7826,
"eval_samples_per_second": 34.068,
"eval_steps_per_second": 2.248,
"step": 20
},
{
"entropy": 2.203989064693451,
"epoch": 0.8080808080808081,
"grad_norm": 3.0667307376861572,
"learning_rate": 6.1e-06,
"loss": 0.0797,
"mean_token_accuracy": 0.9622767880558968,
"num_tokens": 473622.0,
"step": 40
},
{
"epoch": 0.8080808080808081,
"eval_entropy": 2.2022548638857327,
"eval_loss": 0.07604048401117325,
"eval_mean_token_accuracy": 0.9581043995343722,
"eval_num_tokens": 473622.0,
"eval_runtime": 5.7858,
"eval_samples_per_second": 34.049,
"eval_steps_per_second": 2.247,
"step": 40
},
{
"entropy": 2.2022441717294545,
"epoch": 1.202020202020202,
"grad_norm": 2.590899705886841,
"learning_rate": 4.1e-06,
"loss": 0.0656,
"mean_token_accuracy": 0.9664224661313571,
"num_tokens": 702354.0,
"step": 60
},
{
"epoch": 1.202020202020202,
"eval_entropy": 2.1788861201359677,
"eval_loss": 0.08057427406311035,
"eval_mean_token_accuracy": 0.9635989024088933,
"eval_num_tokens": 702354.0,
"eval_runtime": 5.7897,
"eval_samples_per_second": 34.026,
"eval_steps_per_second": 2.245,
"step": 60
},
{
"entropy": 2.19284747838974,
"epoch": 1.606060606060606,
"grad_norm": 2.1625924110412598,
"learning_rate": 2.1000000000000002e-06,
"loss": 0.0579,
"mean_token_accuracy": 0.9754464238882065,
"num_tokens": 937654.0,
"step": 80
},
{
"epoch": 1.606060606060606,
"eval_entropy": 2.178425770539504,
"eval_loss": 0.07602041214704514,
"eval_mean_token_accuracy": 0.965659343279325,
"eval_num_tokens": 937654.0,
"eval_runtime": 5.786,
"eval_samples_per_second": 34.047,
"eval_steps_per_second": 2.247,
"step": 80
},
{
"entropy": 2.1867658969683523,
"epoch": 2.0,
"grad_norm": 1.2462379932403564,
"learning_rate": 1.0000000000000001e-07,
"loss": 0.0512,
"mean_token_accuracy": 0.9764194106444334,
"num_tokens": 1165538.0,
"step": 100
},
{
"epoch": 2.0,
"eval_entropy": 2.181581442172711,
"eval_loss": 0.0743941143155098,
"eval_mean_token_accuracy": 0.963598906993866,
"eval_num_tokens": 1165538.0,
"eval_runtime": 5.7817,
"eval_samples_per_second": 34.073,
"eval_steps_per_second": 2.248,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.9940993720561664e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}