k0ry commited on
Commit
3892aba
·
verified ·
1 Parent(s): 47e7021

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +445 -335
config.json CHANGED
@@ -1,336 +1,446 @@
1
- {
2
- "model": "DAIR-Group/HTR-ConvText",
3
- "generated_at": "2025-12-16T09:19:08.351480",
4
- "artifacts": [
5
- {
6
- "name": "iam.pth",
7
- "path": "checkpoints\\iam.pth",
8
- "exists": true,
9
- "size_bytes": 529444565,
10
- "size_mb": 504.918,
11
- "sha256": "9b2aba60e18d922a615562e23f4079cef4ba4bd559a38d1d6e53de8d7495c122",
12
- "modified": "2025-12-16T09:16:22.604113",
13
- "created": "2025-12-16T09:16:22.379708",
14
- "dataset": "iam",
15
- "best_cer": 0.02582327045571924,
16
- "best_wer": 0.08769689229459345,
17
- "nb_iter": 54000,
18
- "args": {
19
- "out_dir": "./output",
20
- "exp_name": "iam",
21
- "seed": 123,
22
- "use_wandb": true,
23
- "wandb_project": "iam",
24
- "print_iter": 100,
25
- "eval_iter": 1000,
26
- "dataset": "iam",
27
- "data_path": "/root/ws/dataset/iam/lines/",
28
- "train_data_list": "/root/ws/dataset/iam/train.ln",
29
- "val_data_list": "/root/ws/dataset/iam/val.ln",
30
- "test_data_list": "/root/ws/dataset/iam/test.ln",
31
- "nb_cls": 80,
32
- "num_workers": 4,
33
- "img_size": [
34
- 512,
35
- 64
36
- ],
37
- "patch_size": [
38
- 4,
39
- 32
40
- ],
41
- "train_bs": 128,
42
- "accum_steps": 1,
43
- "val_bs": 16,
44
- "total_iter": 100001,
45
- "warm_up_iter": 1000,
46
- "max_lr": 0.001,
47
- "weight_decay": 0.05,
48
- "ema_decay": 0.9999,
49
- "alpha": 1.0,
50
- "model_type": "ctc",
51
- "cos_temp": 8,
52
- "proj": 8.0,
53
- "attn_mask_ratio": 0.1,
54
- "use_masking": false,
55
- "mask_ratio": 0.4,
56
- "max_span_length": 8,
57
- "spacing": 0,
58
- "r_rand": 0.6,
59
- "r_block": 0.6,
60
- "block_span": 4,
61
- "r_span": 0.4,
62
- "max_span": 8,
63
- "dpi_min_factor": 0.5,
64
- "dpi_max_factor": 1.5,
65
- "perspective_low": 0.0,
66
- "perspective_high": 0.4,
67
- "elastic_distortion_min_kernel_size": 3,
68
- "elastic_distortion_max_kernel_size": 3,
69
- "elastic_distortion_max_magnitude": 20,
70
- "elastic_distortion_min_alpha": 0.5,
71
- "elastic_distortion_max_alpha": 1,
72
- "elastic_distortion_min_sigma": 1,
73
- "elastic_distortion_max_sigma": 10,
74
- "dila_ero_max_kernel": 2,
75
- "dila_ero_iter": 1,
76
- "jitter_contrast": 0.4,
77
- "jitter_brightness": 0.4,
78
- "jitter_saturation": 0.4,
79
- "jitter_hue": 0.2,
80
- "blur_min_kernel": 3,
81
- "blur_max_kernel": 5,
82
- "blur_min_sigma": 3,
83
- "blur_max_sigma": 5,
84
- "sharpen_min_alpha": 0,
85
- "sharpen_max_alpha": 1,
86
- "sharpen_min_strength": 0,
87
- "sharpen_max_strength": 1,
88
- "zoom_min_h": 0.8,
89
- "zoom_max_h": 1,
90
- "zoom_min_w": 0.99,
91
- "zoom_max_w": 1,
92
- "proba": 0.5,
93
- "decoder_layers": 6,
94
- "decoder_heads": 8,
95
- "max_seq_len": 256,
96
- "label_smoothing": 0.1,
97
- "beam_size": 5,
98
- "generation_method": "nucleus",
99
- "generation_temperature": 0.7,
100
- "repetition_penalty": 1.3,
101
- "top_p": 0.9,
102
- "tcm_enable": true,
103
- "tcm_lambda": 1.0,
104
- "ctc_lambda": 0.1,
105
- "tcm_sub_len": 5,
106
- "tcm_warmup_iters": 0,
107
- "resume": null,
108
- "load_model": null,
109
- "load_encoder_only": false,
110
- "strict_loading": true,
111
- "save_dir": "./output/iam"
112
- },
113
- "load_error": null
114
- },
115
- {
116
- "name": "hands-vnondb.pth",
117
- "path": "checkpoints\\hands-vnondb.pth",
118
- "exists": true,
119
- "size_bytes": 530033301,
120
- "size_mb": 505.479,
121
- "sha256": "7208c41f3e58cf02bc228e29aa669836393bb1c08ae33a077e42dfa119645a61",
122
- "modified": "2025-12-16T09:16:19.392341",
123
- "created": "2025-12-16T09:16:19.160248",
124
- "dataset": "hands-vnondb",
125
- "best_cer": 0.035116817411641174,
126
- "best_wer": 0.0871608236974248,
127
- "nb_iter": 65000,
128
- "args": {
129
- "out_dir": "./output",
130
- "exp_name": "vnondb",
131
- "seed": 123,
132
- "use_wandb": true,
133
- "wandb_project": "vnondb",
134
- "print_iter": 100,
135
- "eval_iter": 1000,
136
- "dataset": "vnondb",
137
- "data_path": "/root/ws/dataset/vnondb/lines/",
138
- "train_data_list": "/root/ws/dataset/vnondb/train.ln",
139
- "val_data_list": "/root/ws/dataset/vnondb/valid.ln",
140
- "test_data_list": "/root/ws/dataset/vnondb/test.ln",
141
- "nb_cls": 162,
142
- "num_workers": 4,
143
- "img_size": [
144
- 512,
145
- 64
146
- ],
147
- "patch_size": [
148
- 4,
149
- 32
150
- ],
151
- "train_bs": 128,
152
- "accum_steps": 1,
153
- "val_bs": 16,
154
- "total_iter": 100001,
155
- "warm_up_iter": 1000,
156
- "max_lr": 0.001,
157
- "weight_decay": 0.05,
158
- "ema_decay": 0.9999,
159
- "alpha": 1.0,
160
- "model_type": "ctc",
161
- "cos_temp": 8,
162
- "proj": 8.0,
163
- "attn_mask_ratio": 0.1,
164
- "use_masking": false,
165
- "mask_ratio": 0.4,
166
- "max_span_length": 8,
167
- "spacing": 0,
168
- "r_rand": 0.6,
169
- "r_block": 0.6,
170
- "block_span": 4,
171
- "r_span": 0.4,
172
- "max_span": 8,
173
- "dpi_min_factor": 0.5,
174
- "dpi_max_factor": 1.5,
175
- "perspective_low": 0.0,
176
- "perspective_high": 0.4,
177
- "elastic_distortion_min_kernel_size": 3,
178
- "elastic_distortion_max_kernel_size": 3,
179
- "elastic_distortion_max_magnitude": 20,
180
- "elastic_distortion_min_alpha": 0.5,
181
- "elastic_distortion_max_alpha": 1,
182
- "elastic_distortion_min_sigma": 1,
183
- "elastic_distortion_max_sigma": 10,
184
- "dila_ero_max_kernel": 2,
185
- "dila_ero_iter": 1,
186
- "jitter_contrast": 0.4,
187
- "jitter_brightness": 0.4,
188
- "jitter_saturation": 0.4,
189
- "jitter_hue": 0.2,
190
- "blur_min_kernel": 3,
191
- "blur_max_kernel": 5,
192
- "blur_min_sigma": 3,
193
- "blur_max_sigma": 5,
194
- "sharpen_min_alpha": 0,
195
- "sharpen_max_alpha": 1,
196
- "sharpen_min_strength": 0,
197
- "sharpen_max_strength": 1,
198
- "zoom_min_h": 0.8,
199
- "zoom_max_h": 1,
200
- "zoom_min_w": 0.99,
201
- "zoom_max_w": 1,
202
- "proba": 0.5,
203
- "decoder_layers": 6,
204
- "decoder_heads": 8,
205
- "max_seq_len": 256,
206
- "label_smoothing": 0.1,
207
- "beam_size": 5,
208
- "generation_method": "nucleus",
209
- "generation_temperature": 0.7,
210
- "repetition_penalty": 1.3,
211
- "top_p": 0.9,
212
- "tcm_enable": true,
213
- "tcm_lambda": 1.0,
214
- "ctc_lambda": 0.1,
215
- "tcm_sub_len": 5,
216
- "tcm_warmup_iters": 0,
217
- "resume": null,
218
- "load_model": null,
219
- "load_encoder_only": false,
220
- "strict_loading": true,
221
- "save_dir": "./output/vnondb"
222
- },
223
- "load_error": null
224
- },
225
- {
226
- "name": "read.pth",
227
- "path": "checkpoints\\read.pth",
228
- "exists": true,
229
- "size_bytes": 529516245,
230
- "size_mb": 504.986,
231
- "sha256": "01d3552f42356eecb1625dfc8692306f49efab1c7a3e116d819c9cc42d317d1f",
232
- "modified": "2025-12-16T09:16:17.590814",
233
- "created": "2025-12-16T09:16:17.347489",
234
- "dataset": "read",
235
- "best_cer": 0.039916476472587774,
236
- "best_wer": 0.17772215269086358,
237
- "nb_iter": 40000,
238
- "args": {
239
- "out_dir": "./output",
240
- "exp_name": "read",
241
- "seed": 123,
242
- "use_wandb": true,
243
- "wandb_project": "read2016",
244
- "print_iter": 100,
245
- "eval_iter": 1000,
246
- "dataset": "read2016",
247
- "data_path": "/root/ws/dataset/read/lines/",
248
- "train_data_list": "/root/ws/dataset/read/train.ln",
249
- "val_data_list": "/root/ws/dataset/read/val.ln",
250
- "test_data_list": "/root/ws/dataset/read/test.ln",
251
- "nb_cls": 90,
252
- "num_workers": 4,
253
- "img_size": [
254
- 512,
255
- 64
256
- ],
257
- "patch_size": [
258
- 4,
259
- 32
260
- ],
261
- "train_bs": 128,
262
- "accum_steps": 1,
263
- "val_bs": 16,
264
- "total_iter": 100001,
265
- "warm_up_iter": 1000,
266
- "max_lr": 0.001,
267
- "weight_decay": 0.05,
268
- "ema_decay": 0.9999,
269
- "alpha": 1.0,
270
- "model_type": "ctc",
271
- "cos_temp": 8,
272
- "proj": 8.0,
273
- "attn_mask_ratio": 0.1,
274
- "use_masking": false,
275
- "mask_ratio": 0.4,
276
- "max_span_length": 8,
277
- "spacing": 0,
278
- "r_rand": 0.6,
279
- "r_block": 0.6,
280
- "block_span": 4,
281
- "r_span": 0.4,
282
- "max_span": 8,
283
- "dpi_min_factor": 0.5,
284
- "dpi_max_factor": 1.5,
285
- "perspective_low": 0.0,
286
- "perspective_high": 0.4,
287
- "elastic_distortion_min_kernel_size": 3,
288
- "elastic_distortion_max_kernel_size": 3,
289
- "elastic_distortion_max_magnitude": 20,
290
- "elastic_distortion_min_alpha": 0.5,
291
- "elastic_distortion_max_alpha": 1,
292
- "elastic_distortion_min_sigma": 1,
293
- "elastic_distortion_max_sigma": 10,
294
- "dila_ero_max_kernel": 2,
295
- "dila_ero_iter": 1,
296
- "jitter_contrast": 0.4,
297
- "jitter_brightness": 0.4,
298
- "jitter_saturation": 0.4,
299
- "jitter_hue": 0.2,
300
- "blur_min_kernel": 3,
301
- "blur_max_kernel": 5,
302
- "blur_min_sigma": 3,
303
- "blur_max_sigma": 5,
304
- "sharpen_min_alpha": 0,
305
- "sharpen_max_alpha": 1,
306
- "sharpen_min_strength": 0,
307
- "sharpen_max_strength": 1,
308
- "zoom_min_h": 0.8,
309
- "zoom_max_h": 1,
310
- "zoom_min_w": 0.99,
311
- "zoom_max_w": 1,
312
- "proba": 0.5,
313
- "decoder_layers": 6,
314
- "decoder_heads": 8,
315
- "max_seq_len": 256,
316
- "label_smoothing": 0.1,
317
- "beam_size": 5,
318
- "generation_method": "nucleus",
319
- "generation_temperature": 0.7,
320
- "repetition_penalty": 1.3,
321
- "top_p": 0.9,
322
- "tcm_enable": true,
323
- "tcm_lambda": 1.0,
324
- "ctc_lambda": 0.1,
325
- "tcm_sub_len": 5,
326
- "tcm_warmup_iters": 0,
327
- "resume": null,
328
- "load_model": null,
329
- "load_encoder_only": false,
330
- "strict_loading": true,
331
- "save_dir": "./output/read"
332
- },
333
- "load_error": null
334
- }
335
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  }
 
1
+ {
2
+ "model": "DAIR-Group/HTR-ConvText",
3
+ "generated_at": "2025-12-17T08:57:42.912579",
4
+ "artifacts": [
5
+ {
6
+ "name": "iam.pth",
7
+ "path": "checkpoints\\iam.pth",
8
+ "exists": true,
9
+ "size_bytes": 529444565,
10
+ "size_mb": 504.918,
11
+ "sha256": "9b2aba60e18d922a615562e23f4079cef4ba4bd559a38d1d6e53de8d7495c122",
12
+ "modified": "2025-12-16T07:24:08.462236",
13
+ "created": "2025-12-16T08:20:24.508913",
14
+ "dataset": "iam",
15
+ "best_cer": 0.02582327045571924,
16
+ "best_wer": 0.08769689229459345,
17
+ "nb_iter": 54000,
18
+ "args": {
19
+ "out_dir": "./output",
20
+ "exp_name": "iam",
21
+ "seed": 123,
22
+ "use_wandb": true,
23
+ "wandb_project": "iam",
24
+ "print_iter": 100,
25
+ "eval_iter": 1000,
26
+ "dataset": "iam",
27
+ "data_path": "/root/ws/dataset/iam/lines/",
28
+ "train_data_list": "/root/ws/dataset/iam/train.ln",
29
+ "val_data_list": "/root/ws/dataset/iam/val.ln",
30
+ "test_data_list": "/root/ws/dataset/iam/test.ln",
31
+ "nb_cls": 80,
32
+ "num_workers": 4,
33
+ "img_size": [
34
+ 512,
35
+ 64
36
+ ],
37
+ "patch_size": [
38
+ 4,
39
+ 32
40
+ ],
41
+ "train_bs": 128,
42
+ "accum_steps": 1,
43
+ "val_bs": 16,
44
+ "total_iter": 100001,
45
+ "warm_up_iter": 1000,
46
+ "max_lr": 0.001,
47
+ "weight_decay": 0.05,
48
+ "ema_decay": 0.9999,
49
+ "alpha": 1.0,
50
+ "model_type": "ctc",
51
+ "cos_temp": 8,
52
+ "proj": 8.0,
53
+ "attn_mask_ratio": 0.1,
54
+ "use_masking": false,
55
+ "mask_ratio": 0.4,
56
+ "max_span_length": 8,
57
+ "spacing": 0,
58
+ "r_rand": 0.6,
59
+ "r_block": 0.6,
60
+ "block_span": 4,
61
+ "r_span": 0.4,
62
+ "max_span": 8,
63
+ "dpi_min_factor": 0.5,
64
+ "dpi_max_factor": 1.5,
65
+ "perspective_low": 0.0,
66
+ "perspective_high": 0.4,
67
+ "elastic_distortion_min_kernel_size": 3,
68
+ "elastic_distortion_max_kernel_size": 3,
69
+ "elastic_distortion_max_magnitude": 20,
70
+ "elastic_distortion_min_alpha": 0.5,
71
+ "elastic_distortion_max_alpha": 1,
72
+ "elastic_distortion_min_sigma": 1,
73
+ "elastic_distortion_max_sigma": 10,
74
+ "dila_ero_max_kernel": 2,
75
+ "dila_ero_iter": 1,
76
+ "jitter_contrast": 0.4,
77
+ "jitter_brightness": 0.4,
78
+ "jitter_saturation": 0.4,
79
+ "jitter_hue": 0.2,
80
+ "blur_min_kernel": 3,
81
+ "blur_max_kernel": 5,
82
+ "blur_min_sigma": 3,
83
+ "blur_max_sigma": 5,
84
+ "sharpen_min_alpha": 0,
85
+ "sharpen_max_alpha": 1,
86
+ "sharpen_min_strength": 0,
87
+ "sharpen_max_strength": 1,
88
+ "zoom_min_h": 0.8,
89
+ "zoom_max_h": 1,
90
+ "zoom_min_w": 0.99,
91
+ "zoom_max_w": 1,
92
+ "proba": 0.5,
93
+ "decoder_layers": 6,
94
+ "decoder_heads": 8,
95
+ "max_seq_len": 256,
96
+ "label_smoothing": 0.1,
97
+ "beam_size": 5,
98
+ "generation_method": "nucleus",
99
+ "generation_temperature": 0.7,
100
+ "repetition_penalty": 1.3,
101
+ "top_p": 0.9,
102
+ "tcm_enable": true,
103
+ "tcm_lambda": 1.0,
104
+ "ctc_lambda": 0.1,
105
+ "tcm_sub_len": 5,
106
+ "tcm_warmup_iters": 0,
107
+ "resume": null,
108
+ "load_model": null,
109
+ "load_encoder_only": false,
110
+ "strict_loading": true,
111
+ "save_dir": "./output/iam"
112
+ },
113
+ "load_error": null
114
+ },
115
+ {
116
+ "name": "hands-vnondb.pth",
117
+ "path": "checkpoints\\hands-vnondb.pth",
118
+ "exists": true,
119
+ "size_bytes": 530033301,
120
+ "size_mb": 505.479,
121
+ "sha256": "7208c41f3e58cf02bc228e29aa669836393bb1c08ae33a077e42dfa119645a61",
122
+ "modified": "2025-12-16T07:25:45.907545",
123
+ "created": "2025-12-16T08:20:25.225462",
124
+ "dataset": "hands-vnondb",
125
+ "best_cer": 0.035116817411641174,
126
+ "best_wer": 0.0871608236974248,
127
+ "nb_iter": 65000,
128
+ "args": {
129
+ "out_dir": "./output",
130
+ "exp_name": "vnondb",
131
+ "seed": 123,
132
+ "use_wandb": true,
133
+ "wandb_project": "vnondb",
134
+ "print_iter": 100,
135
+ "eval_iter": 1000,
136
+ "dataset": "vnondb",
137
+ "data_path": "/root/ws/dataset/vnondb/lines/",
138
+ "train_data_list": "/root/ws/dataset/vnondb/train.ln",
139
+ "val_data_list": "/root/ws/dataset/vnondb/valid.ln",
140
+ "test_data_list": "/root/ws/dataset/vnondb/test.ln",
141
+ "nb_cls": 162,
142
+ "num_workers": 4,
143
+ "img_size": [
144
+ 512,
145
+ 64
146
+ ],
147
+ "patch_size": [
148
+ 4,
149
+ 32
150
+ ],
151
+ "train_bs": 128,
152
+ "accum_steps": 1,
153
+ "val_bs": 16,
154
+ "total_iter": 100001,
155
+ "warm_up_iter": 1000,
156
+ "max_lr": 0.001,
157
+ "weight_decay": 0.05,
158
+ "ema_decay": 0.9999,
159
+ "alpha": 1.0,
160
+ "model_type": "ctc",
161
+ "cos_temp": 8,
162
+ "proj": 8.0,
163
+ "attn_mask_ratio": 0.1,
164
+ "use_masking": false,
165
+ "mask_ratio": 0.4,
166
+ "max_span_length": 8,
167
+ "spacing": 0,
168
+ "r_rand": 0.6,
169
+ "r_block": 0.6,
170
+ "block_span": 4,
171
+ "r_span": 0.4,
172
+ "max_span": 8,
173
+ "dpi_min_factor": 0.5,
174
+ "dpi_max_factor": 1.5,
175
+ "perspective_low": 0.0,
176
+ "perspective_high": 0.4,
177
+ "elastic_distortion_min_kernel_size": 3,
178
+ "elastic_distortion_max_kernel_size": 3,
179
+ "elastic_distortion_max_magnitude": 20,
180
+ "elastic_distortion_min_alpha": 0.5,
181
+ "elastic_distortion_max_alpha": 1,
182
+ "elastic_distortion_min_sigma": 1,
183
+ "elastic_distortion_max_sigma": 10,
184
+ "dila_ero_max_kernel": 2,
185
+ "dila_ero_iter": 1,
186
+ "jitter_contrast": 0.4,
187
+ "jitter_brightness": 0.4,
188
+ "jitter_saturation": 0.4,
189
+ "jitter_hue": 0.2,
190
+ "blur_min_kernel": 3,
191
+ "blur_max_kernel": 5,
192
+ "blur_min_sigma": 3,
193
+ "blur_max_sigma": 5,
194
+ "sharpen_min_alpha": 0,
195
+ "sharpen_max_alpha": 1,
196
+ "sharpen_min_strength": 0,
197
+ "sharpen_max_strength": 1,
198
+ "zoom_min_h": 0.8,
199
+ "zoom_max_h": 1,
200
+ "zoom_min_w": 0.99,
201
+ "zoom_max_w": 1,
202
+ "proba": 0.5,
203
+ "decoder_layers": 6,
204
+ "decoder_heads": 8,
205
+ "max_seq_len": 256,
206
+ "label_smoothing": 0.1,
207
+ "beam_size": 5,
208
+ "generation_method": "nucleus",
209
+ "generation_temperature": 0.7,
210
+ "repetition_penalty": 1.3,
211
+ "top_p": 0.9,
212
+ "tcm_enable": true,
213
+ "tcm_lambda": 1.0,
214
+ "ctc_lambda": 0.1,
215
+ "tcm_sub_len": 5,
216
+ "tcm_warmup_iters": 0,
217
+ "resume": null,
218
+ "load_model": null,
219
+ "load_encoder_only": false,
220
+ "strict_loading": true,
221
+ "save_dir": "./output/vnondb"
222
+ },
223
+ "load_error": null
224
+ },
225
+ {
226
+ "name": "read.pth",
227
+ "path": "checkpoints\\read.pth",
228
+ "exists": true,
229
+ "size_bytes": 529516245,
230
+ "size_mb": 504.986,
231
+ "sha256": "01d3552f42356eecb1625dfc8692306f49efab1c7a3e116d819c9cc42d317d1f",
232
+ "modified": "2025-12-16T07:24:15.937379",
233
+ "created": "2025-12-16T08:20:24.959519",
234
+ "dataset": "read",
235
+ "best_cer": 0.039916476472587774,
236
+ "best_wer": 0.17772215269086358,
237
+ "nb_iter": 40000,
238
+ "args": {
239
+ "out_dir": "./output",
240
+ "exp_name": "read",
241
+ "seed": 123,
242
+ "use_wandb": true,
243
+ "wandb_project": "read2016",
244
+ "print_iter": 100,
245
+ "eval_iter": 1000,
246
+ "dataset": "read2016",
247
+ "data_path": "/root/ws/dataset/read/lines/",
248
+ "train_data_list": "/root/ws/dataset/read/train.ln",
249
+ "val_data_list": "/root/ws/dataset/read/val.ln",
250
+ "test_data_list": "/root/ws/dataset/read/test.ln",
251
+ "nb_cls": 90,
252
+ "num_workers": 4,
253
+ "img_size": [
254
+ 512,
255
+ 64
256
+ ],
257
+ "patch_size": [
258
+ 4,
259
+ 32
260
+ ],
261
+ "train_bs": 128,
262
+ "accum_steps": 1,
263
+ "val_bs": 16,
264
+ "total_iter": 100001,
265
+ "warm_up_iter": 1000,
266
+ "max_lr": 0.001,
267
+ "weight_decay": 0.05,
268
+ "ema_decay": 0.9999,
269
+ "alpha": 1.0,
270
+ "model_type": "ctc",
271
+ "cos_temp": 8,
272
+ "proj": 8.0,
273
+ "attn_mask_ratio": 0.1,
274
+ "use_masking": false,
275
+ "mask_ratio": 0.4,
276
+ "max_span_length": 8,
277
+ "spacing": 0,
278
+ "r_rand": 0.6,
279
+ "r_block": 0.6,
280
+ "block_span": 4,
281
+ "r_span": 0.4,
282
+ "max_span": 8,
283
+ "dpi_min_factor": 0.5,
284
+ "dpi_max_factor": 1.5,
285
+ "perspective_low": 0.0,
286
+ "perspective_high": 0.4,
287
+ "elastic_distortion_min_kernel_size": 3,
288
+ "elastic_distortion_max_kernel_size": 3,
289
+ "elastic_distortion_max_magnitude": 20,
290
+ "elastic_distortion_min_alpha": 0.5,
291
+ "elastic_distortion_max_alpha": 1,
292
+ "elastic_distortion_min_sigma": 1,
293
+ "elastic_distortion_max_sigma": 10,
294
+ "dila_ero_max_kernel": 2,
295
+ "dila_ero_iter": 1,
296
+ "jitter_contrast": 0.4,
297
+ "jitter_brightness": 0.4,
298
+ "jitter_saturation": 0.4,
299
+ "jitter_hue": 0.2,
300
+ "blur_min_kernel": 3,
301
+ "blur_max_kernel": 5,
302
+ "blur_min_sigma": 3,
303
+ "blur_max_sigma": 5,
304
+ "sharpen_min_alpha": 0,
305
+ "sharpen_max_alpha": 1,
306
+ "sharpen_min_strength": 0,
307
+ "sharpen_max_strength": 1,
308
+ "zoom_min_h": 0.8,
309
+ "zoom_max_h": 1,
310
+ "zoom_min_w": 0.99,
311
+ "zoom_max_w": 1,
312
+ "proba": 0.5,
313
+ "decoder_layers": 6,
314
+ "decoder_heads": 8,
315
+ "max_seq_len": 256,
316
+ "label_smoothing": 0.1,
317
+ "beam_size": 5,
318
+ "generation_method": "nucleus",
319
+ "generation_temperature": 0.7,
320
+ "repetition_penalty": 1.3,
321
+ "top_p": 0.9,
322
+ "tcm_enable": true,
323
+ "tcm_lambda": 1.0,
324
+ "ctc_lambda": 0.1,
325
+ "tcm_sub_len": 5,
326
+ "tcm_warmup_iters": 0,
327
+ "resume": null,
328
+ "load_model": null,
329
+ "load_encoder_only": false,
330
+ "strict_loading": true,
331
+ "save_dir": "./output/read"
332
+ },
333
+ "load_error": null
334
+ },
335
+ {
336
+ "name": "lam.pth",
337
+ "path": "checkpoints\\lam.pth",
338
+ "exists": true,
339
+ "size_bytes": 529523413,
340
+ "size_mb": 504.993,
341
+ "sha256": "ac1117ff37ba4ffc4282c5ba31574472be63cb2344f6b59e16f264df02d24973",
342
+ "modified": "2025-12-17T08:55:03.999301",
343
+ "created": "2025-12-17T08:54:55.089015",
344
+ "dataset": "lam",
345
+ "best_cer": 0.024121379310344828,
346
+ "best_wer": 0.06445365282406991,
347
+ "nb_iter": 62000,
348
+ "args": {
349
+ "out_dir": "./output",
350
+ "exp_name": "lam",
351
+ "seed": 123,
352
+ "use_wandb": true,
353
+ "wandb_project": "lam",
354
+ "print_iter": 100,
355
+ "eval_iter": 1000,
356
+ "dataset": "lam",
357
+ "data_path": "/root/ws/dataset/lam/lines/",
358
+ "train_data_list": "/root/ws/dataset/lam/train.ln",
359
+ "val_data_list": "/root/ws/dataset/lam/val.ln",
360
+ "test_data_list": "/root/ws/dataset/lam/test.ln",
361
+ "nb_cls": 91,
362
+ "num_workers": 4,
363
+ "img_size": [
364
+ 512,
365
+ 64
366
+ ],
367
+ "patch_size": [
368
+ 4,
369
+ 32
370
+ ],
371
+ "train_bs": 96,
372
+ "accum_steps": 1,
373
+ "val_bs": 16,
374
+ "total_iter": 100001,
375
+ "warm_up_iter": 1000,
376
+ "max_lr": 0.001,
377
+ "weight_decay": 0.05,
378
+ "ema_decay": 0.9999,
379
+ "alpha": 1.0,
380
+ "model_type": "ctc",
381
+ "cos_temp": 8,
382
+ "proj": 8.0,
383
+ "attn_mask_ratio": 0.1,
384
+ "use_masking": false,
385
+ "mask_ratio": 0.4,
386
+ "max_span_length": 8,
387
+ "spacing": 0,
388
+ "r_rand": 0.6,
389
+ "r_block": 0.6,
390
+ "block_span": 4,
391
+ "r_span": 0.4,
392
+ "max_span": 8,
393
+ "dpi_min_factor": 0.5,
394
+ "dpi_max_factor": 1.5,
395
+ "perspective_low": 0.0,
396
+ "perspective_high": 0.4,
397
+ "elastic_distortion_min_kernel_size": 3,
398
+ "elastic_distortion_max_kernel_size": 3,
399
+ "elastic_distortion_max_magnitude": 20,
400
+ "elastic_distortion_min_alpha": 0.5,
401
+ "elastic_distortion_max_alpha": 1,
402
+ "elastic_distortion_min_sigma": 1,
403
+ "elastic_distortion_max_sigma": 10,
404
+ "dila_ero_max_kernel": 2,
405
+ "dila_ero_iter": 1,
406
+ "jitter_contrast": 0.4,
407
+ "jitter_brightness": 0.4,
408
+ "jitter_saturation": 0.4,
409
+ "jitter_hue": 0.2,
410
+ "blur_min_kernel": 3,
411
+ "blur_max_kernel": 5,
412
+ "blur_min_sigma": 3,
413
+ "blur_max_sigma": 5,
414
+ "sharpen_min_alpha": 0,
415
+ "sharpen_max_alpha": 1,
416
+ "sharpen_min_strength": 0,
417
+ "sharpen_max_strength": 1,
418
+ "zoom_min_h": 0.8,
419
+ "zoom_max_h": 1,
420
+ "zoom_min_w": 0.99,
421
+ "zoom_max_w": 1,
422
+ "proba": 0.5,
423
+ "decoder_layers": 6,
424
+ "decoder_heads": 8,
425
+ "max_seq_len": 256,
426
+ "label_smoothing": 0.1,
427
+ "beam_size": 5,
428
+ "generation_method": "nucleus",
429
+ "generation_temperature": 0.7,
430
+ "repetition_penalty": 1.3,
431
+ "top_p": 0.9,
432
+ "tcm_enable": true,
433
+ "tcm_lambda": 1.0,
434
+ "ctc_lambda": 0.1,
435
+ "tcm_sub_len": 5,
436
+ "tcm_warmup_iters": 0,
437
+ "resume": null,
438
+ "load_model": null,
439
+ "load_encoder_only": false,
440
+ "strict_loading": true,
441
+ "save_dir": "./output/lam"
442
+ },
443
+ "load_error": null
444
+ }
445
+ ]
446
  }