sivamuthusamy commited on
Commit
ce23912
·
verified ·
1 Parent(s): 6d5973c

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -25,13 +25,13 @@
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
- "v_proj",
29
- "k_proj",
30
  "o_proj",
 
 
31
  "down_proj",
32
- "q_proj",
33
  "gate_proj",
34
- "up_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
+ "q_proj",
 
29
  "o_proj",
30
+ "up_proj",
31
+ "v_proj",
32
  "down_proj",
 
33
  "gate_proj",
34
+ "k_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1ee538a62ccdced6ae2ff179651eb0c5481a976ce36e4d78f01b25a5a48351a
3
  size 664584480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95aa29e997e65895e7438c6a5161582325b48ad40f334444dd9e8bcf1af49f7b
3
  size 664584480
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4df917b3bdd630be19a2284b2cbf7e679189cc5cfce397f316886a3716f5ca3e
3
  size 1329377575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a18c2b2f06141f8d8ef924be72aecffd7eea284834e455f0e1231458243c1fd
3
  size 1329377575
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9da0163c3936acb3c6c189647b4d67e82c3ede0faa2bc4c2be5a6384e1d0721e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a490ec3fd930ebc86317228df3023d06f035c382dedbf1c91b49dd6b00d113f2
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68e12ea105d1321d2f396e3113101572120c00e3b670a28d0154eb748559938c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89c7e585bf25f929251599f24890acceb440d06b049a496384748f1eed552840
3
  size 1465
trainer_state.json CHANGED
@@ -1,97 +1,118 @@
1
  {
2
- "best_global_step": 80,
3
- "best_metric": 0.2634693384170532,
4
- "best_model_checkpoint": "/content/models/gemma_qlora_lmh_inst/checkpoint-80",
5
- "epoch": 1.606060606060606,
6
  "eval_steps": 20,
7
- "global_step": 80,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "entropy": 2.385788154602051,
14
  "epoch": 0.40404040404040403,
15
- "grad_norm": 1.4871121644973755,
16
  "learning_rate": 8.1e-06,
17
- "loss": 1.4621,
18
- "mean_token_accuracy": 0.7171875,
19
- "num_tokens": 234453.0,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.40404040404040403,
24
- "eval_entropy": 2.444101461997399,
25
- "eval_loss": 0.38041621446609497,
26
- "eval_mean_token_accuracy": 0.7514423086093023,
27
- "eval_num_tokens": 234453.0,
28
- "eval_runtime": 5.7269,
29
- "eval_samples_per_second": 34.399,
30
- "eval_steps_per_second": 2.27,
31
  "step": 20
32
  },
33
  {
34
- "entropy": 2.449769389629364,
35
  "epoch": 0.8080808080808081,
36
- "grad_norm": 2.987805128097534,
37
  "learning_rate": 6.1e-06,
38
- "loss": 0.3307,
39
- "mean_token_accuracy": 0.8234375,
40
- "num_tokens": 467222.0,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.8080808080808081,
45
- "eval_entropy": 2.4303444899045505,
46
- "eval_loss": 0.29822829365730286,
47
- "eval_mean_token_accuracy": 0.8235576932246869,
48
- "eval_num_tokens": 467222.0,
49
- "eval_runtime": 5.7388,
50
- "eval_samples_per_second": 34.328,
51
- "eval_steps_per_second": 2.265,
52
  "step": 40
53
  },
54
  {
55
- "entropy": 2.4164060140267396,
56
  "epoch": 1.202020202020202,
57
- "grad_norm": 13.107892990112305,
58
  "learning_rate": 4.1e-06,
59
- "loss": 0.2526,
60
- "mean_token_accuracy": 0.8814102564102564,
61
- "num_tokens": 692884.0,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 1.202020202020202,
66
- "eval_entropy": 2.394979641987727,
67
- "eval_loss": 0.2977635860443115,
68
- "eval_mean_token_accuracy": 0.8701923076923077,
69
- "eval_num_tokens": 692884.0,
70
- "eval_runtime": 5.7325,
71
- "eval_samples_per_second": 34.366,
72
- "eval_steps_per_second": 2.268,
73
  "step": 60
74
  },
75
  {
76
- "entropy": 2.4086058020591734,
77
  "epoch": 1.606060606060606,
78
- "grad_norm": 5.737631797790527,
79
  "learning_rate": 2.1000000000000002e-06,
80
- "loss": 0.2291,
81
- "mean_token_accuracy": 0.8953125,
82
- "num_tokens": 924984.0,
83
  "step": 80
84
  },
85
  {
86
  "epoch": 1.606060606060606,
87
- "eval_entropy": 2.3859705924987793,
88
- "eval_loss": 0.2634693384170532,
89
- "eval_mean_token_accuracy": 0.8725961538461539,
90
- "eval_num_tokens": 924984.0,
91
- "eval_runtime": 5.7372,
92
- "eval_samples_per_second": 34.337,
93
- "eval_steps_per_second": 2.266,
94
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  }
96
  ],
97
  "logging_steps": 20,
@@ -106,12 +127,12 @@
106
  "should_evaluate": false,
107
  "should_log": false,
108
  "should_save": true,
109
- "should_training_stop": false
110
  },
111
  "attributes": {}
112
  }
113
  },
114
- "total_flos": 1.5898111053751296e+16,
115
  "train_batch_size": 16,
116
  "trial_name": null,
117
  "trial_params": null
 
1
  {
2
+ "best_global_step": 100,
3
+ "best_metric": 0.0743941143155098,
4
+ "best_model_checkpoint": "/content/models/gemma_qlora_lmh_inst/checkpoint-100",
5
+ "epoch": 2.0,
6
  "eval_steps": 20,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "entropy": 2.200686830282211,
14
  "epoch": 0.40404040404040403,
15
+ "grad_norm": 2.258388042449951,
16
  "learning_rate": 8.1e-06,
17
+ "loss": 0.1306,
18
+ "mean_token_accuracy": 0.9350446417927742,
19
+ "num_tokens": 237653.0,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.40404040404040403,
24
+ "eval_entropy": 2.201928762289194,
25
+ "eval_loss": 0.0838593915104866,
26
+ "eval_mean_token_accuracy": 0.9620879109089191,
27
+ "eval_num_tokens": 237653.0,
28
+ "eval_runtime": 5.7826,
29
+ "eval_samples_per_second": 34.068,
30
+ "eval_steps_per_second": 2.248,
31
  "step": 20
32
  },
33
  {
34
+ "entropy": 2.203989064693451,
35
  "epoch": 0.8080808080808081,
36
+ "grad_norm": 3.0667307376861572,
37
  "learning_rate": 6.1e-06,
38
+ "loss": 0.0797,
39
+ "mean_token_accuracy": 0.9622767880558968,
40
+ "num_tokens": 473622.0,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.8080808080808081,
45
+ "eval_entropy": 2.2022548638857327,
46
+ "eval_loss": 0.07604048401117325,
47
+ "eval_mean_token_accuracy": 0.9581043995343722,
48
+ "eval_num_tokens": 473622.0,
49
+ "eval_runtime": 5.7858,
50
+ "eval_samples_per_second": 34.049,
51
+ "eval_steps_per_second": 2.247,
52
  "step": 40
53
  },
54
  {
55
+ "entropy": 2.2022441717294545,
56
  "epoch": 1.202020202020202,
57
+ "grad_norm": 2.590899705886841,
58
  "learning_rate": 4.1e-06,
59
+ "loss": 0.0656,
60
+ "mean_token_accuracy": 0.9664224661313571,
61
+ "num_tokens": 702354.0,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 1.202020202020202,
66
+ "eval_entropy": 2.1788861201359677,
67
+ "eval_loss": 0.08057427406311035,
68
+ "eval_mean_token_accuracy": 0.9635989024088933,
69
+ "eval_num_tokens": 702354.0,
70
+ "eval_runtime": 5.7897,
71
+ "eval_samples_per_second": 34.026,
72
+ "eval_steps_per_second": 2.245,
73
  "step": 60
74
  },
75
  {
76
+ "entropy": 2.19284747838974,
77
  "epoch": 1.606060606060606,
78
+ "grad_norm": 2.1625924110412598,
79
  "learning_rate": 2.1000000000000002e-06,
80
+ "loss": 0.0579,
81
+ "mean_token_accuracy": 0.9754464238882065,
82
+ "num_tokens": 937654.0,
83
  "step": 80
84
  },
85
  {
86
  "epoch": 1.606060606060606,
87
+ "eval_entropy": 2.178425770539504,
88
+ "eval_loss": 0.07602041214704514,
89
+ "eval_mean_token_accuracy": 0.965659343279325,
90
+ "eval_num_tokens": 937654.0,
91
+ "eval_runtime": 5.786,
92
+ "eval_samples_per_second": 34.047,
93
+ "eval_steps_per_second": 2.247,
94
  "step": 80
95
+ },
96
+ {
97
+ "entropy": 2.1867658969683523,
98
+ "epoch": 2.0,
99
+ "grad_norm": 1.2462379932403564,
100
+ "learning_rate": 1.0000000000000001e-07,
101
+ "loss": 0.0512,
102
+ "mean_token_accuracy": 0.9764194106444334,
103
+ "num_tokens": 1165538.0,
104
+ "step": 100
105
+ },
106
+ {
107
+ "epoch": 2.0,
108
+ "eval_entropy": 2.181581442172711,
109
+ "eval_loss": 0.0743941143155098,
110
+ "eval_mean_token_accuracy": 0.963598906993866,
111
+ "eval_num_tokens": 1165538.0,
112
+ "eval_runtime": 5.7817,
113
+ "eval_samples_per_second": 34.073,
114
+ "eval_steps_per_second": 2.248,
115
+ "step": 100
116
  }
117
  ],
118
  "logging_steps": 20,
 
127
  "should_evaluate": false,
128
  "should_log": false,
129
  "should_save": true,
130
+ "should_training_stop": true
131
  },
132
  "attributes": {}
133
  }
134
  },
135
+ "total_flos": 1.9940993720561664e+16,
136
  "train_batch_size": 16,
137
  "trial_name": null,
138
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c58200221e48a21d7c48e6e07381e8bf36c583d36e4d40fe8b11bfda35fca12
3
  size 6289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7330fa1bb4c98d5e0bead323a45e059f49e51c7dd2d39c91a75a154d4bfb59b4
3
  size 6289