BRlkl commited on
Commit
e01bfef
·
verified ·
1 Parent(s): c166831

full-state checkpoint 20-percent (step 6)

Browse files
ckpt-20-percent/adapter_config.json CHANGED
@@ -33,12 +33,12 @@
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
36
- "o_proj",
37
  "v_proj",
38
- "k_proj",
39
- "down_proj",
40
  "up_proj",
41
  "gate_proj",
 
 
 
42
  "q_proj"
43
  ],
44
  "target_parameters": null,
 
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
 
36
  "v_proj",
 
 
37
  "up_proj",
38
  "gate_proj",
39
+ "down_proj",
40
+ "k_proj",
41
+ "o_proj",
42
  "q_proj"
43
  ],
44
  "target_parameters": null,
ckpt-20-percent/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:108de6ae76b7cb285b2ba5d7828387d0370816b7bc0e8b9a1e922d04b07a1951
3
  size 528550256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2692ca6b3f1e45eb31c44f3f5b6b02decccaf67435385d8535caa166aba7307
3
  size 528550256
ckpt-20-percent/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52f4cdc201c94751cc27170763d1edb98776638548a83e5129a2986354b4a2c1
3
  size 268963141
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b2c22631994608200195e064777a0752ca44363ad40960d7da06c99e9cca13
3
  size 268963141
ckpt-20-percent/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6ae49dde72e1f83c81e947321d7f4605cf45b9335bec36d6ffb9889e0c8e3d6
3
- size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:541f92c590bae4d5cfb89d8c6b04fcc0f2a4bf1ee5372e1bcc7c8d74c716c322
3
+ size 14709
ckpt-20-percent/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c311973de2c4be864df6b0845df16ca43b7130a6c3bef77ec0c6d70a1176ccb
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c84b2ab0436ef853ff8339460c19975da522fd88180c4d104ca97170d785798
3
  size 1465
ckpt-20-percent/trainer_state.json CHANGED
@@ -2,391 +2,127 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.1935483870967742,
6
  "eval_steps": 500,
7
- "global_step": 18,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "completion_length": 417.31876068115236,
14
  "completions/clipped_ratio": 0.0,
15
- "completions/max_length": 1219.0,
16
- "completions/max_terminated_length": 1219.0,
17
- "completions/mean_length": 418.3187255859375,
18
- "completions/mean_terminated_length": 418.3187255859375,
19
- "completions/min_length": 155.0,
20
- "completions/min_terminated_length": 155.0,
21
- "epoch": 0.010752688172043012,
22
- "frac_reward_zero_std": 0.2750000059604645,
23
- "grad_norm": 0.1586368829011917,
24
  "kl": 0.0,
25
  "learning_rate": 0.0,
26
  "loss": 0.0,
27
- "num_tokens": 589935.0,
28
- "reward": 0.9395886063575745,
29
- "reward_std": 0.6425187587738037,
30
- "rewards/multidomain_reward_func/mean": 0.9395885467529297,
31
- "rewards/multidomain_reward_func/std": 1.1834609508514404,
32
  "step": 1
33
  },
34
  {
35
- "completion_length": 447.3875045776367,
36
  "completions/clipped_ratio": 0.0,
37
- "completions/max_length": 1311.0,
38
- "completions/max_terminated_length": 1311.0,
39
- "completions/mean_length": 448.3874816894531,
40
- "completions/mean_terminated_length": 448.3874816894531,
41
- "completions/min_length": 144.0,
42
- "completions/min_terminated_length": 144.0,
43
- "epoch": 0.021505376344086023,
44
- "frac_reward_zero_std": 0.2874999940395355,
45
- "grad_norm": 0.09217020124197006,
46
  "kl": 0.0,
47
- "learning_rate": 9.999999999999999e-06,
48
  "loss": 0.0,
49
- "num_tokens": 1202245.0,
50
- "reward": 1.0898984670639038,
51
- "reward_std": 0.6939569115638733,
52
- "rewards/multidomain_reward_func/mean": 1.0898985862731934,
53
- "rewards/multidomain_reward_func/std": 1.1529914140701294,
54
  "step": 2
55
  },
56
  {
57
- "completion_length": 444.58375854492186,
58
  "completions/clipped_ratio": 0.0,
59
- "completions/max_length": 1050.0,
60
- "completions/max_terminated_length": 1050.0,
61
- "completions/mean_length": 445.583740234375,
62
- "completions/mean_terminated_length": 445.583740234375,
63
- "completions/min_length": 144.0,
64
- "completions/min_terminated_length": 144.0,
65
- "epoch": 0.03225806451612903,
66
- "frac_reward_zero_std": 0.2874999940395355,
67
- "grad_norm": 0.09536850452423096,
68
  "kl": 0.0,
69
- "learning_rate": 1.9999999999999998e-05,
70
- "loss": 0.0,
71
- "num_tokens": 1822762.0,
72
- "reward": 1.2017430067062378,
73
- "reward_std": 0.6153996586799622,
74
- "rewards/multidomain_reward_func/mean": 1.2017431259155273,
75
- "rewards/multidomain_reward_func/std": 1.1850411891937256,
76
- "step": 3
77
- },
78
- {
79
- "completion_length": 440.8112548828125,
80
- "completions/clipped_ratio": 0.0,
81
- "completions/max_length": 1121.0,
82
- "completions/max_terminated_length": 1121.0,
83
- "completions/mean_length": 441.8112487792969,
84
- "completions/mean_terminated_length": 441.8112487792969,
85
- "completions/min_length": 113.0,
86
- "completions/min_terminated_length": 113.0,
87
- "epoch": 0.043010752688172046,
88
- "frac_reward_zero_std": 0.36249998211860657,
89
- "grad_norm": 0.14304442703723907,
90
- "kl": 0.0,
91
- "learning_rate": 3e-05,
92
- "loss": -0.0,
93
- "num_tokens": 2404141.0,
94
- "reward": 0.9961501955986023,
95
- "reward_std": 0.5547618865966797,
96
- "rewards/multidomain_reward_func/mean": 0.9961501955986023,
97
- "rewards/multidomain_reward_func/std": 1.2231403589248657,
98
- "step": 4
99
- },
100
- {
101
- "completion_length": 513.616259765625,
102
- "completions/clipped_ratio": 0.0,
103
- "completions/max_length": 1674.0,
104
- "completions/max_terminated_length": 1674.0,
105
- "completions/mean_length": 514.6162109375,
106
- "completions/mean_terminated_length": 514.6162109375,
107
- "completions/min_length": 153.0,
108
- "completions/min_terminated_length": 153.0,
109
- "epoch": 0.053763440860215055,
110
- "frac_reward_zero_std": 0.21249999105930328,
111
- "grad_norm": 0.0913516953587532,
112
- "kl": 0.0,
113
- "learning_rate": 3e-05,
114
- "loss": 0.0,
115
- "num_tokens": 3070424.0,
116
- "reward": 0.9858412146568298,
117
- "reward_std": 0.6862795948982239,
118
- "rewards/multidomain_reward_func/mean": 0.9858411550521851,
119
- "rewards/multidomain_reward_func/std": 1.2488752603530884,
120
- "step": 5
121
- },
122
- {
123
- "completion_length": 501.64500885009767,
124
- "completions/clipped_ratio": 0.0,
125
- "completions/max_length": 1360.0,
126
- "completions/max_terminated_length": 1360.0,
127
- "completions/mean_length": 502.6449890136719,
128
- "completions/mean_terminated_length": 502.6449890136719,
129
- "completions/min_length": 128.0,
130
- "completions/min_terminated_length": 128.0,
131
- "epoch": 0.06451612903225806,
132
- "frac_reward_zero_std": 0.29999998211860657,
133
- "grad_norm": 0.09110506623983383,
134
- "kl": 0.0,
135
- "learning_rate": 3e-05,
136
- "loss": 0.0,
137
- "num_tokens": 3715110.0,
138
- "reward": 1.0306060314178467,
139
- "reward_std": 0.6257905960083008,
140
- "rewards/multidomain_reward_func/mean": 1.0306060314178467,
141
- "rewards/multidomain_reward_func/std": 1.1789323091506958,
142
- "step": 6
143
- },
144
- {
145
- "completion_length": 520.2025085449219,
146
- "completions/clipped_ratio": 0.0,
147
- "completions/max_length": 1468.0,
148
- "completions/max_terminated_length": 1468.0,
149
- "completions/mean_length": 521.2025146484375,
150
- "completions/mean_terminated_length": 521.2025146484375,
151
- "completions/min_length": 191.0,
152
- "completions/min_terminated_length": 191.0,
153
- "epoch": 0.07526881720430108,
154
- "frac_reward_zero_std": 0.26249998807907104,
155
- "grad_norm": 0.08829142153263092,
156
- "kl": 0.0,
157
- "learning_rate": 3e-05,
158
  "loss": -0.0,
159
- "num_tokens": 4384052.0,
160
- "reward": 1.1119601726531982,
161
- "reward_std": 0.6622768044471741,
162
- "rewards/multidomain_reward_func/mean": 1.1119602918624878,
163
- "rewards/multidomain_reward_func/std": 1.1772183179855347,
164
- "step": 7
165
- },
166
- {
167
- "completion_length": 528.4200134277344,
168
- "completions/clipped_ratio": 0.0,
169
- "completions/max_length": 1742.0,
170
- "completions/max_terminated_length": 1742.0,
171
- "completions/mean_length": 529.4199829101562,
172
- "completions/mean_terminated_length": 529.4199829101562,
173
- "completions/min_length": 164.0,
174
- "completions/min_terminated_length": 164.0,
175
- "epoch": 0.08602150537634409,
176
- "frac_reward_zero_std": 0.21249999105930328,
177
- "grad_norm": 0.09405792504549026,
178
- "kl": 0.0,
179
- "learning_rate": 3e-05,
180
- "loss": 0.0,
181
- "num_tokens": 5063198.0,
182
- "reward": 0.8146641254425049,
183
- "reward_std": 1.0251129865646362,
184
- "rewards/multidomain_reward_func/mean": 0.8146640658378601,
185
- "rewards/multidomain_reward_func/std": 1.5042277574539185,
186
- "step": 8
187
- },
188
- {
189
- "completion_length": 530.1662612915039,
190
- "completions/clipped_ratio": 0.0,
191
- "completions/max_length": 1322.0,
192
- "completions/max_terminated_length": 1322.0,
193
- "completions/mean_length": 531.166259765625,
194
- "completions/mean_terminated_length": 531.166259765625,
195
- "completions/min_length": 182.0,
196
- "completions/min_terminated_length": 182.0,
197
- "epoch": 0.0967741935483871,
198
- "frac_reward_zero_std": 0.3125,
199
- "grad_norm": 0.08664832264184952,
200
- "kl": 0.0,
201
- "learning_rate": 3e-05,
202
- "loss": 0.0,
203
- "num_tokens": 5745791.0,
204
- "reward": 1.105246663093567,
205
- "reward_std": 0.5355943441390991,
206
- "rewards/multidomain_reward_func/mean": 1.1052465438842773,
207
- "rewards/multidomain_reward_func/std": 1.1541340351104736,
208
- "step": 9
209
  },
210
  {
211
- "completion_length": 529.9687576293945,
212
  "completions/clipped_ratio": 0.0,
213
- "completions/max_length": 1527.0,
214
- "completions/max_terminated_length": 1527.0,
215
- "completions/mean_length": 530.96875,
216
- "completions/mean_terminated_length": 530.96875,
217
- "completions/min_length": 146.0,
218
- "completions/min_terminated_length": 146.0,
219
- "epoch": 0.10752688172043011,
220
- "frac_reward_zero_std": 0.23749999701976776,
221
- "grad_norm": 0.09411562234163284,
222
  "kl": 0.0,
223
- "learning_rate": 3e-05,
224
  "loss": 0.0,
225
- "num_tokens": 6448686.0,
226
- "reward": 0.5127537846565247,
227
- "reward_std": 1.1222161054611206,
228
- "rewards/multidomain_reward_func/mean": 0.5127537846565247,
229
- "rewards/multidomain_reward_func/std": 1.6059224605560303,
230
- "step": 10
231
- },
232
- {
233
- "completion_length": 506.1475082397461,
234
- "completions/clipped_ratio": 0.0,
235
- "completions/max_length": 1700.0,
236
- "completions/max_terminated_length": 1700.0,
237
- "completions/mean_length": 507.1474914550781,
238
- "completions/mean_terminated_length": 507.1474914550781,
239
- "completions/min_length": 101.0,
240
- "completions/min_terminated_length": 101.0,
241
- "epoch": 0.11827956989247312,
242
- "frac_reward_zero_std": 0.3375000059604645,
243
- "grad_norm": 0.09998169541358948,
244
- "kl": 0.0,
245
- "learning_rate": 3e-05,
246
- "loss": -0.0,
247
- "num_tokens": 7132404.0,
248
- "reward": 0.799436628818512,
249
- "reward_std": 0.71962970495224,
250
- "rewards/multidomain_reward_func/mean": 0.799436628818512,
251
- "rewards/multidomain_reward_func/std": 1.4251511096954346,
252
- "step": 11
253
  },
254
  {
255
- "completion_length": 486.396257019043,
256
- "completions/clipped_ratio": 0.0012499999720603228,
257
  "completions/max_length": 2048.0,
258
- "completions/max_terminated_length": 1110.0,
259
- "completions/mean_length": 487.3949890136719,
260
- "completions/mean_terminated_length": 485.4418029785156,
261
- "completions/min_length": 165.0,
262
- "completions/min_terminated_length": 165.0,
263
- "epoch": 0.12903225806451613,
264
- "frac_reward_zero_std": 0.29999998211860657,
265
- "grad_norm": 0.09192386269569397,
266
- "kl": 0.0,
267
- "learning_rate": 3e-05,
268
- "loss": 0.0,
269
- "num_tokens": 7759800.0,
270
- "reward": 1.1488876342773438,
271
- "reward_std": 0.5708559155464172,
272
- "rewards/multidomain_reward_func/mean": 1.1488877534866333,
273
- "rewards/multidomain_reward_func/std": 1.160998821258545,
274
- "step": 12
275
- },
276
- {
277
- "completion_length": 476.3787567138672,
278
- "completions/clipped_ratio": 0.0,
279
- "completions/max_length": 1302.0,
280
- "completions/max_terminated_length": 1302.0,
281
- "completions/mean_length": 477.3787536621094,
282
- "completions/mean_terminated_length": 477.3787536621094,
283
- "completions/min_length": 121.0,
284
- "completions/min_terminated_length": 121.0,
285
- "epoch": 0.13978494623655913,
286
- "frac_reward_zero_std": 0.4124999940395355,
287
- "grad_norm": 0.09517515450716019,
288
- "kl": 0.0,
289
- "learning_rate": 3e-05,
290
- "loss": 0.0,
291
- "num_tokens": 8412593.0,
292
- "reward": 1.0822017192840576,
293
- "reward_std": 0.47241905331611633,
294
- "rewards/multidomain_reward_func/mean": 1.082201600074768,
295
- "rewards/multidomain_reward_func/std": 1.0703767538070679,
296
- "step": 13
297
- },
298
- {
299
- "completion_length": 461.2287567138672,
300
- "completions/clipped_ratio": 0.0,
301
- "completions/max_length": 1070.0,
302
- "completions/max_terminated_length": 1070.0,
303
- "completions/mean_length": 462.2287292480469,
304
- "completions/mean_terminated_length": 462.2287292480469,
305
- "completions/min_length": 132.0,
306
- "completions/min_terminated_length": 132.0,
307
- "epoch": 0.15053763440860216,
308
- "frac_reward_zero_std": 0.4124999940395355,
309
- "grad_norm": 0.08922425657510757,
310
- "kl": 0.0,
311
- "learning_rate": 3e-05,
312
  "loss": -0.0,
313
- "num_tokens": 9027756.0,
314
- "reward": 1.1727960109710693,
315
- "reward_std": 0.5148429274559021,
316
- "rewards/multidomain_reward_func/mean": 1.1727960109710693,
317
- "rewards/multidomain_reward_func/std": 1.1102207899093628,
318
- "step": 14
319
- },
320
- {
321
- "completion_length": 447.0050079345703,
322
- "completions/clipped_ratio": 0.0,
323
- "completions/max_length": 1175.0,
324
- "completions/max_terminated_length": 1175.0,
325
- "completions/mean_length": 448.0050048828125,
326
- "completions/mean_terminated_length": 448.0050048828125,
327
- "completions/min_length": 125.0,
328
- "completions/min_terminated_length": 125.0,
329
- "epoch": 0.16129032258064516,
330
- "frac_reward_zero_std": 0.25,
331
- "grad_norm": 0.09966976195573807,
332
- "kl": 0.0,
333
- "learning_rate": 3e-05,
334
- "loss": -0.0,
335
- "num_tokens": 9647350.0,
336
- "reward": 1.1358871459960938,
337
- "reward_std": 0.6162644028663635,
338
- "rewards/multidomain_reward_func/mean": 1.1358871459960938,
339
- "rewards/multidomain_reward_func/std": 1.0770015716552734,
340
- "step": 15
341
- },
342
- {
343
- "completion_length": 453.9125045776367,
344
- "completions/clipped_ratio": 0.0,
345
- "completions/max_length": 1108.0,
346
- "completions/max_terminated_length": 1108.0,
347
- "completions/mean_length": 454.9124755859375,
348
- "completions/mean_terminated_length": 454.9124755859375,
349
- "completions/min_length": 118.0,
350
- "completions/min_terminated_length": 118.0,
351
- "epoch": 0.17204301075268819,
352
- "frac_reward_zero_std": 0.26249998807907104,
353
- "grad_norm": 0.09356285631656647,
354
- "kl": 0.0,
355
- "learning_rate": 3e-05,
356
- "loss": 0.0,
357
- "num_tokens": 10275890.0,
358
- "reward": 1.1041463613510132,
359
- "reward_std": 0.595342755317688,
360
- "rewards/multidomain_reward_func/mean": 1.1041463613510132,
361
- "rewards/multidomain_reward_func/std": 1.1405205726623535,
362
- "step": 16
363
- },
364
- {
365
- "completion_length": 448.9925048828125,
366
- "completions/clipped_ratio": 0.0,
367
- "completions/max_length": 1191.0,
368
- "completions/max_terminated_length": 1191.0,
369
- "completions/mean_length": 449.99249267578125,
370
- "completions/mean_terminated_length": 449.99249267578125,
371
- "completions/min_length": 142.0,
372
- "completions/min_terminated_length": 142.0,
373
- "epoch": 0.1827956989247312,
374
- "frac_reward_zero_std": 0.22499999403953552,
375
- "grad_norm": 0.10382834821939468,
376
- "kl": 0.0,
377
- "learning_rate": 3e-05,
378
- "loss": 0.0,
379
- "num_tokens": 10890984.0,
380
- "reward": 1.1085461378097534,
381
- "reward_std": 0.6585391163825989,
382
- "rewards/multidomain_reward_func/mean": 1.1085461378097534,
383
- "rewards/multidomain_reward_func/std": 1.2252981662750244,
384
- "step": 17
385
  }
386
  ],
387
  "logging_steps": 1,
388
- "max_steps": 93,
389
- "num_input_tokens_seen": 11513098,
390
  "num_train_epochs": 1,
391
  "save_steps": 250,
392
  "stateful_callbacks": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.12,
6
  "eval_steps": 500,
7
+ "global_step": 6,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "completion_length": 512.7475112915039,
14
  "completions/clipped_ratio": 0.0,
15
+ "completions/max_length": 1610.0,
16
+ "completions/max_terminated_length": 1610.0,
17
+ "completions/mean_length": 513.7474975585938,
18
+ "completions/mean_terminated_length": 513.7474975585938,
19
+ "completions/min_length": 158.0,
20
+ "completions/min_terminated_length": 158.0,
21
+ "epoch": 0.02,
22
+ "frac_reward_zero_std": 0.5,
23
+ "grad_norm": 0.05372562259435654,
24
  "kl": 0.0,
25
  "learning_rate": 0.0,
26
  "loss": 0.0,
27
+ "num_tokens": 793218.0,
28
+ "reward": 0.7917708158493042,
29
+ "reward_std": 0.5366402268409729,
30
+ "rewards/multidomain_reward_func/mean": 0.791770875453949,
31
+ "rewards/multidomain_reward_func/std": 0.9891355037689209,
32
  "step": 1
33
  },
34
  {
35
+ "completion_length": 457.9475067138672,
36
  "completions/clipped_ratio": 0.0,
37
+ "completions/max_length": 1562.0,
38
+ "completions/max_terminated_length": 1562.0,
39
+ "completions/mean_length": 458.9474792480469,
40
+ "completions/mean_terminated_length": 458.9474792480469,
41
+ "completions/min_length": 162.0,
42
+ "completions/min_terminated_length": 162.0,
43
+ "epoch": 0.04,
44
+ "frac_reward_zero_std": 0.5625,
45
+ "grad_norm": 0.04992515221238136,
46
  "kl": 0.0,
47
+ "learning_rate": 2e-05,
48
  "loss": 0.0,
49
+ "num_tokens": 1486606.0,
50
+ "reward": 0.9500247836112976,
51
+ "reward_std": 0.3836401402950287,
52
+ "rewards/multidomain_reward_func/mean": 0.9500247836112976,
53
+ "rewards/multidomain_reward_func/std": 0.8352798819541931,
54
  "step": 2
55
  },
56
  {
57
+ "completion_length": 483.39625549316406,
58
  "completions/clipped_ratio": 0.0,
59
+ "completions/max_length": 1802.0,
60
+ "completions/max_terminated_length": 1802.0,
61
+ "completions/mean_length": 484.396240234375,
62
+ "completions/mean_terminated_length": 484.396240234375,
63
+ "completions/min_length": 154.0,
64
+ "completions/min_terminated_length": 154.0,
65
+ "epoch": 0.06,
66
+ "frac_reward_zero_std": 0.637499988079071,
67
+ "grad_norm": 0.0435759611427784,
68
  "kl": 0.0,
69
+ "learning_rate": 4e-05,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  "loss": -0.0,
71
+ "num_tokens": 2215423.0,
72
+ "reward": 0.9322639107704163,
73
+ "reward_std": 0.3358458876609802,
74
+ "rewards/multidomain_reward_func/mean": 0.932263970375061,
75
+ "rewards/multidomain_reward_func/std": 0.8111148476600647,
76
+ "step": 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  },
78
  {
79
+ "completion_length": 559.6600112915039,
80
  "completions/clipped_ratio": 0.0,
81
+ "completions/max_length": 1764.0,
82
+ "completions/max_terminated_length": 1764.0,
83
+ "completions/mean_length": 560.6599731445312,
84
+ "completions/mean_terminated_length": 560.6599731445312,
85
+ "completions/min_length": 184.0,
86
+ "completions/min_terminated_length": 184.0,
87
+ "epoch": 0.08,
88
+ "frac_reward_zero_std": 0.5374999642372131,
89
+ "grad_norm": 0.06077693775296211,
90
  "kl": 0.0,
91
+ "learning_rate": 4e-05,
92
  "loss": 0.0,
93
+ "num_tokens": 3023491.0,
94
+ "reward": 0.9280069470405579,
95
+ "reward_std": 0.4681345224380493,
96
+ "rewards/multidomain_reward_func/mean": 0.9280068874359131,
97
+ "rewards/multidomain_reward_func/std": 0.956528902053833,
98
+ "step": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  },
100
  {
101
+ "completion_length": 600.337516784668,
102
+ "completions/clipped_ratio": 0.0037499999161809683,
103
  "completions/max_length": 2048.0,
104
+ "completions/max_terminated_length": 1804.0,
105
+ "completions/mean_length": 601.333740234375,
106
+ "completions/mean_terminated_length": 595.8883056640625,
107
+ "completions/min_length": 200.0,
108
+ "completions/min_terminated_length": 200.0,
109
+ "epoch": 0.1,
110
+ "frac_reward_zero_std": 0.5,
111
+ "grad_norm": 0.20458438992500305,
112
+ "kl": 0.0,
113
+ "learning_rate": 4e-05,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  "loss": -0.0,
115
+ "num_tokens": 3866158.0,
116
+ "reward": 0.6643861532211304,
117
+ "reward_std": 0.5354571342468262,
118
+ "rewards/multidomain_reward_func/mean": 0.6643861532211304,
119
+ "rewards/multidomain_reward_func/std": 1.127986192703247,
120
+ "step": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  }
122
  ],
123
  "logging_steps": 1,
124
+ "max_steps": 50,
125
+ "num_input_tokens_seen": 4737778,
126
  "num_train_epochs": 1,
127
  "save_steps": 250,
128
  "stateful_callbacks": {
ckpt-20-percent/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b48077003af5f11ced05cb103bb98c595f8752173ade63e67e1251ccaecb2486
3
  size 7505
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eff5fd1f692ab02145ab58306b563dfe1d660a1e051bd141b45e6e2aef660fc
3
  size 7505