acayir64 commited on
Commit
59eec0c
·
verified ·
1 Parent(s): 93c82e0

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
adapter_config.json ADDED
@@ -0,0 +1,1075 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "mlx-community/Qwen3.5-0.8B-MLX-4bit",
3
+ "base_model_revision": "5d894f8cc4ef3e6c88537bf3746ed262f549da6a",
4
+ "base_model_commit_hash": "5d894f8cc4ef3e6c88537bf3746ed262f549da6a",
5
+ "base_quantization_config": {
6
+ "group_size": 64,
7
+ "bits": 4,
8
+ "mode": "affine"
9
+ },
10
+ "base_quantization_policy": {
11
+ "enabled": true,
12
+ "bits": 4,
13
+ "group_size": 64,
14
+ "mode": "affine",
15
+ "source": "load_in_4bit",
16
+ "quantize_modules": null,
17
+ "has_callable_predicate": false,
18
+ "force_requantize": false
19
+ },
20
+ "base_quantized_source": "mlx_config",
21
+ "base_resolved_quantization_map": {
22
+ "language_model.model.layers.23.mlp.up_proj": {
23
+ "bits": 4,
24
+ "group_size": 64,
25
+ "mode": "affine"
26
+ },
27
+ "language_model.model.layers.23.mlp.down_proj": {
28
+ "bits": 4,
29
+ "group_size": 64,
30
+ "mode": "affine"
31
+ },
32
+ "language_model.model.layers.23.mlp.gate_proj": {
33
+ "bits": 4,
34
+ "group_size": 64,
35
+ "mode": "affine"
36
+ },
37
+ "language_model.model.layers.23.self_attn.o_proj": {
38
+ "bits": 4,
39
+ "group_size": 64,
40
+ "mode": "affine"
41
+ },
42
+ "language_model.model.layers.23.self_attn.v_proj": {
43
+ "bits": 4,
44
+ "group_size": 64,
45
+ "mode": "affine"
46
+ },
47
+ "language_model.model.layers.23.self_attn.k_proj": {
48
+ "bits": 4,
49
+ "group_size": 64,
50
+ "mode": "affine"
51
+ },
52
+ "language_model.model.layers.23.self_attn.q_proj": {
53
+ "bits": 4,
54
+ "group_size": 64,
55
+ "mode": "affine"
56
+ },
57
+ "language_model.model.layers.22.mlp.up_proj": {
58
+ "bits": 4,
59
+ "group_size": 64,
60
+ "mode": "affine"
61
+ },
62
+ "language_model.model.layers.22.mlp.down_proj": {
63
+ "bits": 4,
64
+ "group_size": 64,
65
+ "mode": "affine"
66
+ },
67
+ "language_model.model.layers.22.mlp.gate_proj": {
68
+ "bits": 4,
69
+ "group_size": 64,
70
+ "mode": "affine"
71
+ },
72
+ "language_model.model.layers.22.linear_attn.out_proj": {
73
+ "bits": 4,
74
+ "group_size": 64,
75
+ "mode": "affine"
76
+ },
77
+ "language_model.model.layers.22.linear_attn.in_proj_a": {
78
+ "bits": 4,
79
+ "group_size": 64,
80
+ "mode": "affine"
81
+ },
82
+ "language_model.model.layers.22.linear_attn.in_proj_b": {
83
+ "bits": 4,
84
+ "group_size": 64,
85
+ "mode": "affine"
86
+ },
87
+ "language_model.model.layers.22.linear_attn.in_proj_z": {
88
+ "bits": 4,
89
+ "group_size": 64,
90
+ "mode": "affine"
91
+ },
92
+ "language_model.model.layers.22.linear_attn.in_proj_qkv": {
93
+ "bits": 4,
94
+ "group_size": 64,
95
+ "mode": "affine"
96
+ },
97
+ "language_model.model.layers.21.mlp.up_proj": {
98
+ "bits": 4,
99
+ "group_size": 64,
100
+ "mode": "affine"
101
+ },
102
+ "language_model.model.layers.21.mlp.down_proj": {
103
+ "bits": 4,
104
+ "group_size": 64,
105
+ "mode": "affine"
106
+ },
107
+ "language_model.model.layers.21.mlp.gate_proj": {
108
+ "bits": 4,
109
+ "group_size": 64,
110
+ "mode": "affine"
111
+ },
112
+ "language_model.model.layers.21.linear_attn.out_proj": {
113
+ "bits": 4,
114
+ "group_size": 64,
115
+ "mode": "affine"
116
+ },
117
+ "language_model.model.layers.21.linear_attn.in_proj_a": {
118
+ "bits": 4,
119
+ "group_size": 64,
120
+ "mode": "affine"
121
+ },
122
+ "language_model.model.layers.21.linear_attn.in_proj_b": {
123
+ "bits": 4,
124
+ "group_size": 64,
125
+ "mode": "affine"
126
+ },
127
+ "language_model.model.layers.21.linear_attn.in_proj_z": {
128
+ "bits": 4,
129
+ "group_size": 64,
130
+ "mode": "affine"
131
+ },
132
+ "language_model.model.layers.21.linear_attn.in_proj_qkv": {
133
+ "bits": 4,
134
+ "group_size": 64,
135
+ "mode": "affine"
136
+ },
137
+ "language_model.model.layers.20.mlp.up_proj": {
138
+ "bits": 4,
139
+ "group_size": 64,
140
+ "mode": "affine"
141
+ },
142
+ "language_model.model.layers.20.mlp.down_proj": {
143
+ "bits": 4,
144
+ "group_size": 64,
145
+ "mode": "affine"
146
+ },
147
+ "language_model.model.layers.20.mlp.gate_proj": {
148
+ "bits": 4,
149
+ "group_size": 64,
150
+ "mode": "affine"
151
+ },
152
+ "language_model.model.layers.20.linear_attn.out_proj": {
153
+ "bits": 4,
154
+ "group_size": 64,
155
+ "mode": "affine"
156
+ },
157
+ "language_model.model.layers.20.linear_attn.in_proj_a": {
158
+ "bits": 4,
159
+ "group_size": 64,
160
+ "mode": "affine"
161
+ },
162
+ "language_model.model.layers.20.linear_attn.in_proj_b": {
163
+ "bits": 4,
164
+ "group_size": 64,
165
+ "mode": "affine"
166
+ },
167
+ "language_model.model.layers.20.linear_attn.in_proj_z": {
168
+ "bits": 4,
169
+ "group_size": 64,
170
+ "mode": "affine"
171
+ },
172
+ "language_model.model.layers.20.linear_attn.in_proj_qkv": {
173
+ "bits": 4,
174
+ "group_size": 64,
175
+ "mode": "affine"
176
+ },
177
+ "language_model.model.layers.19.mlp.up_proj": {
178
+ "bits": 4,
179
+ "group_size": 64,
180
+ "mode": "affine"
181
+ },
182
+ "language_model.model.layers.19.mlp.down_proj": {
183
+ "bits": 4,
184
+ "group_size": 64,
185
+ "mode": "affine"
186
+ },
187
+ "language_model.model.layers.19.mlp.gate_proj": {
188
+ "bits": 4,
189
+ "group_size": 64,
190
+ "mode": "affine"
191
+ },
192
+ "language_model.model.layers.19.self_attn.o_proj": {
193
+ "bits": 4,
194
+ "group_size": 64,
195
+ "mode": "affine"
196
+ },
197
+ "language_model.model.layers.19.self_attn.v_proj": {
198
+ "bits": 4,
199
+ "group_size": 64,
200
+ "mode": "affine"
201
+ },
202
+ "language_model.model.layers.19.self_attn.k_proj": {
203
+ "bits": 4,
204
+ "group_size": 64,
205
+ "mode": "affine"
206
+ },
207
+ "language_model.model.layers.19.self_attn.q_proj": {
208
+ "bits": 4,
209
+ "group_size": 64,
210
+ "mode": "affine"
211
+ },
212
+ "language_model.model.layers.18.mlp.up_proj": {
213
+ "bits": 4,
214
+ "group_size": 64,
215
+ "mode": "affine"
216
+ },
217
+ "language_model.model.layers.18.mlp.down_proj": {
218
+ "bits": 4,
219
+ "group_size": 64,
220
+ "mode": "affine"
221
+ },
222
+ "language_model.model.layers.18.mlp.gate_proj": {
223
+ "bits": 4,
224
+ "group_size": 64,
225
+ "mode": "affine"
226
+ },
227
+ "language_model.model.layers.18.linear_attn.out_proj": {
228
+ "bits": 4,
229
+ "group_size": 64,
230
+ "mode": "affine"
231
+ },
232
+ "language_model.model.layers.18.linear_attn.in_proj_a": {
233
+ "bits": 4,
234
+ "group_size": 64,
235
+ "mode": "affine"
236
+ },
237
+ "language_model.model.layers.18.linear_attn.in_proj_b": {
238
+ "bits": 4,
239
+ "group_size": 64,
240
+ "mode": "affine"
241
+ },
242
+ "language_model.model.layers.18.linear_attn.in_proj_z": {
243
+ "bits": 4,
244
+ "group_size": 64,
245
+ "mode": "affine"
246
+ },
247
+ "language_model.model.layers.18.linear_attn.in_proj_qkv": {
248
+ "bits": 4,
249
+ "group_size": 64,
250
+ "mode": "affine"
251
+ },
252
+ "language_model.model.layers.17.mlp.up_proj": {
253
+ "bits": 4,
254
+ "group_size": 64,
255
+ "mode": "affine"
256
+ },
257
+ "language_model.model.layers.17.mlp.down_proj": {
258
+ "bits": 4,
259
+ "group_size": 64,
260
+ "mode": "affine"
261
+ },
262
+ "language_model.model.layers.17.mlp.gate_proj": {
263
+ "bits": 4,
264
+ "group_size": 64,
265
+ "mode": "affine"
266
+ },
267
+ "language_model.model.layers.17.linear_attn.out_proj": {
268
+ "bits": 4,
269
+ "group_size": 64,
270
+ "mode": "affine"
271
+ },
272
+ "language_model.model.layers.17.linear_attn.in_proj_a": {
273
+ "bits": 4,
274
+ "group_size": 64,
275
+ "mode": "affine"
276
+ },
277
+ "language_model.model.layers.17.linear_attn.in_proj_b": {
278
+ "bits": 4,
279
+ "group_size": 64,
280
+ "mode": "affine"
281
+ },
282
+ "language_model.model.layers.17.linear_attn.in_proj_z": {
283
+ "bits": 4,
284
+ "group_size": 64,
285
+ "mode": "affine"
286
+ },
287
+ "language_model.model.layers.17.linear_attn.in_proj_qkv": {
288
+ "bits": 4,
289
+ "group_size": 64,
290
+ "mode": "affine"
291
+ },
292
+ "language_model.model.layers.16.mlp.up_proj": {
293
+ "bits": 4,
294
+ "group_size": 64,
295
+ "mode": "affine"
296
+ },
297
+ "language_model.model.layers.16.mlp.down_proj": {
298
+ "bits": 4,
299
+ "group_size": 64,
300
+ "mode": "affine"
301
+ },
302
+ "language_model.model.layers.16.mlp.gate_proj": {
303
+ "bits": 4,
304
+ "group_size": 64,
305
+ "mode": "affine"
306
+ },
307
+ "language_model.model.layers.16.linear_attn.out_proj": {
308
+ "bits": 4,
309
+ "group_size": 64,
310
+ "mode": "affine"
311
+ },
312
+ "language_model.model.layers.16.linear_attn.in_proj_a": {
313
+ "bits": 4,
314
+ "group_size": 64,
315
+ "mode": "affine"
316
+ },
317
+ "language_model.model.layers.16.linear_attn.in_proj_b": {
318
+ "bits": 4,
319
+ "group_size": 64,
320
+ "mode": "affine"
321
+ },
322
+ "language_model.model.layers.16.linear_attn.in_proj_z": {
323
+ "bits": 4,
324
+ "group_size": 64,
325
+ "mode": "affine"
326
+ },
327
+ "language_model.model.layers.16.linear_attn.in_proj_qkv": {
328
+ "bits": 4,
329
+ "group_size": 64,
330
+ "mode": "affine"
331
+ },
332
+ "language_model.model.layers.15.mlp.up_proj": {
333
+ "bits": 4,
334
+ "group_size": 64,
335
+ "mode": "affine"
336
+ },
337
+ "language_model.model.layers.15.mlp.down_proj": {
338
+ "bits": 4,
339
+ "group_size": 64,
340
+ "mode": "affine"
341
+ },
342
+ "language_model.model.layers.15.mlp.gate_proj": {
343
+ "bits": 4,
344
+ "group_size": 64,
345
+ "mode": "affine"
346
+ },
347
+ "language_model.model.layers.15.self_attn.o_proj": {
348
+ "bits": 4,
349
+ "group_size": 64,
350
+ "mode": "affine"
351
+ },
352
+ "language_model.model.layers.15.self_attn.v_proj": {
353
+ "bits": 4,
354
+ "group_size": 64,
355
+ "mode": "affine"
356
+ },
357
+ "language_model.model.layers.15.self_attn.k_proj": {
358
+ "bits": 4,
359
+ "group_size": 64,
360
+ "mode": "affine"
361
+ },
362
+ "language_model.model.layers.15.self_attn.q_proj": {
363
+ "bits": 4,
364
+ "group_size": 64,
365
+ "mode": "affine"
366
+ },
367
+ "language_model.model.layers.14.mlp.up_proj": {
368
+ "bits": 4,
369
+ "group_size": 64,
370
+ "mode": "affine"
371
+ },
372
+ "language_model.model.layers.14.mlp.down_proj": {
373
+ "bits": 4,
374
+ "group_size": 64,
375
+ "mode": "affine"
376
+ },
377
+ "language_model.model.layers.14.mlp.gate_proj": {
378
+ "bits": 4,
379
+ "group_size": 64,
380
+ "mode": "affine"
381
+ },
382
+ "language_model.model.layers.14.linear_attn.out_proj": {
383
+ "bits": 4,
384
+ "group_size": 64,
385
+ "mode": "affine"
386
+ },
387
+ "language_model.model.layers.14.linear_attn.in_proj_a": {
388
+ "bits": 4,
389
+ "group_size": 64,
390
+ "mode": "affine"
391
+ },
392
+ "language_model.model.layers.14.linear_attn.in_proj_b": {
393
+ "bits": 4,
394
+ "group_size": 64,
395
+ "mode": "affine"
396
+ },
397
+ "language_model.model.layers.14.linear_attn.in_proj_z": {
398
+ "bits": 4,
399
+ "group_size": 64,
400
+ "mode": "affine"
401
+ },
402
+ "language_model.model.layers.14.linear_attn.in_proj_qkv": {
403
+ "bits": 4,
404
+ "group_size": 64,
405
+ "mode": "affine"
406
+ },
407
+ "language_model.model.layers.13.mlp.up_proj": {
408
+ "bits": 4,
409
+ "group_size": 64,
410
+ "mode": "affine"
411
+ },
412
+ "language_model.model.layers.13.mlp.down_proj": {
413
+ "bits": 4,
414
+ "group_size": 64,
415
+ "mode": "affine"
416
+ },
417
+ "language_model.model.layers.13.mlp.gate_proj": {
418
+ "bits": 4,
419
+ "group_size": 64,
420
+ "mode": "affine"
421
+ },
422
+ "language_model.model.layers.13.linear_attn.out_proj": {
423
+ "bits": 4,
424
+ "group_size": 64,
425
+ "mode": "affine"
426
+ },
427
+ "language_model.model.layers.13.linear_attn.in_proj_a": {
428
+ "bits": 4,
429
+ "group_size": 64,
430
+ "mode": "affine"
431
+ },
432
+ "language_model.model.layers.13.linear_attn.in_proj_b": {
433
+ "bits": 4,
434
+ "group_size": 64,
435
+ "mode": "affine"
436
+ },
437
+ "language_model.model.layers.13.linear_attn.in_proj_z": {
438
+ "bits": 4,
439
+ "group_size": 64,
440
+ "mode": "affine"
441
+ },
442
+ "language_model.model.layers.13.linear_attn.in_proj_qkv": {
443
+ "bits": 4,
444
+ "group_size": 64,
445
+ "mode": "affine"
446
+ },
447
+ "language_model.model.layers.12.mlp.up_proj": {
448
+ "bits": 4,
449
+ "group_size": 64,
450
+ "mode": "affine"
451
+ },
452
+ "language_model.model.layers.12.mlp.down_proj": {
453
+ "bits": 4,
454
+ "group_size": 64,
455
+ "mode": "affine"
456
+ },
457
+ "language_model.model.layers.12.mlp.gate_proj": {
458
+ "bits": 4,
459
+ "group_size": 64,
460
+ "mode": "affine"
461
+ },
462
+ "language_model.model.layers.12.linear_attn.out_proj": {
463
+ "bits": 4,
464
+ "group_size": 64,
465
+ "mode": "affine"
466
+ },
467
+ "language_model.model.layers.12.linear_attn.in_proj_a": {
468
+ "bits": 4,
469
+ "group_size": 64,
470
+ "mode": "affine"
471
+ },
472
+ "language_model.model.layers.12.linear_attn.in_proj_b": {
473
+ "bits": 4,
474
+ "group_size": 64,
475
+ "mode": "affine"
476
+ },
477
+ "language_model.model.layers.12.linear_attn.in_proj_z": {
478
+ "bits": 4,
479
+ "group_size": 64,
480
+ "mode": "affine"
481
+ },
482
+ "language_model.model.layers.12.linear_attn.in_proj_qkv": {
483
+ "bits": 4,
484
+ "group_size": 64,
485
+ "mode": "affine"
486
+ },
487
+ "language_model.model.layers.11.mlp.up_proj": {
488
+ "bits": 4,
489
+ "group_size": 64,
490
+ "mode": "affine"
491
+ },
492
+ "language_model.model.layers.11.mlp.down_proj": {
493
+ "bits": 4,
494
+ "group_size": 64,
495
+ "mode": "affine"
496
+ },
497
+ "language_model.model.layers.11.mlp.gate_proj": {
498
+ "bits": 4,
499
+ "group_size": 64,
500
+ "mode": "affine"
501
+ },
502
+ "language_model.model.layers.11.self_attn.o_proj": {
503
+ "bits": 4,
504
+ "group_size": 64,
505
+ "mode": "affine"
506
+ },
507
+ "language_model.model.layers.11.self_attn.v_proj": {
508
+ "bits": 4,
509
+ "group_size": 64,
510
+ "mode": "affine"
511
+ },
512
+ "language_model.model.layers.11.self_attn.k_proj": {
513
+ "bits": 4,
514
+ "group_size": 64,
515
+ "mode": "affine"
516
+ },
517
+ "language_model.model.layers.11.self_attn.q_proj": {
518
+ "bits": 4,
519
+ "group_size": 64,
520
+ "mode": "affine"
521
+ },
522
+ "language_model.model.layers.10.mlp.up_proj": {
523
+ "bits": 4,
524
+ "group_size": 64,
525
+ "mode": "affine"
526
+ },
527
+ "language_model.model.layers.10.mlp.down_proj": {
528
+ "bits": 4,
529
+ "group_size": 64,
530
+ "mode": "affine"
531
+ },
532
+ "language_model.model.layers.10.mlp.gate_proj": {
533
+ "bits": 4,
534
+ "group_size": 64,
535
+ "mode": "affine"
536
+ },
537
+ "language_model.model.layers.10.linear_attn.out_proj": {
538
+ "bits": 4,
539
+ "group_size": 64,
540
+ "mode": "affine"
541
+ },
542
+ "language_model.model.layers.10.linear_attn.in_proj_a": {
543
+ "bits": 4,
544
+ "group_size": 64,
545
+ "mode": "affine"
546
+ },
547
+ "language_model.model.layers.10.linear_attn.in_proj_b": {
548
+ "bits": 4,
549
+ "group_size": 64,
550
+ "mode": "affine"
551
+ },
552
+ "language_model.model.layers.10.linear_attn.in_proj_z": {
553
+ "bits": 4,
554
+ "group_size": 64,
555
+ "mode": "affine"
556
+ },
557
+ "language_model.model.layers.10.linear_attn.in_proj_qkv": {
558
+ "bits": 4,
559
+ "group_size": 64,
560
+ "mode": "affine"
561
+ },
562
+ "language_model.model.layers.9.mlp.up_proj": {
563
+ "bits": 4,
564
+ "group_size": 64,
565
+ "mode": "affine"
566
+ },
567
+ "language_model.model.layers.9.mlp.down_proj": {
568
+ "bits": 4,
569
+ "group_size": 64,
570
+ "mode": "affine"
571
+ },
572
+ "language_model.model.layers.9.mlp.gate_proj": {
573
+ "bits": 4,
574
+ "group_size": 64,
575
+ "mode": "affine"
576
+ },
577
+ "language_model.model.layers.9.linear_attn.out_proj": {
578
+ "bits": 4,
579
+ "group_size": 64,
580
+ "mode": "affine"
581
+ },
582
+ "language_model.model.layers.9.linear_attn.in_proj_a": {
583
+ "bits": 4,
584
+ "group_size": 64,
585
+ "mode": "affine"
586
+ },
587
+ "language_model.model.layers.9.linear_attn.in_proj_b": {
588
+ "bits": 4,
589
+ "group_size": 64,
590
+ "mode": "affine"
591
+ },
592
+ "language_model.model.layers.9.linear_attn.in_proj_z": {
593
+ "bits": 4,
594
+ "group_size": 64,
595
+ "mode": "affine"
596
+ },
597
+ "language_model.model.layers.9.linear_attn.in_proj_qkv": {
598
+ "bits": 4,
599
+ "group_size": 64,
600
+ "mode": "affine"
601
+ },
602
+ "language_model.model.layers.8.mlp.up_proj": {
603
+ "bits": 4,
604
+ "group_size": 64,
605
+ "mode": "affine"
606
+ },
607
+ "language_model.model.layers.8.mlp.down_proj": {
608
+ "bits": 4,
609
+ "group_size": 64,
610
+ "mode": "affine"
611
+ },
612
+ "language_model.model.layers.8.mlp.gate_proj": {
613
+ "bits": 4,
614
+ "group_size": 64,
615
+ "mode": "affine"
616
+ },
617
+ "language_model.model.layers.8.linear_attn.out_proj": {
618
+ "bits": 4,
619
+ "group_size": 64,
620
+ "mode": "affine"
621
+ },
622
+ "language_model.model.layers.8.linear_attn.in_proj_a": {
623
+ "bits": 4,
624
+ "group_size": 64,
625
+ "mode": "affine"
626
+ },
627
+ "language_model.model.layers.8.linear_attn.in_proj_b": {
628
+ "bits": 4,
629
+ "group_size": 64,
630
+ "mode": "affine"
631
+ },
632
+ "language_model.model.layers.8.linear_attn.in_proj_z": {
633
+ "bits": 4,
634
+ "group_size": 64,
635
+ "mode": "affine"
636
+ },
637
+ "language_model.model.layers.8.linear_attn.in_proj_qkv": {
638
+ "bits": 4,
639
+ "group_size": 64,
640
+ "mode": "affine"
641
+ },
642
+ "language_model.model.layers.7.mlp.up_proj": {
643
+ "bits": 4,
644
+ "group_size": 64,
645
+ "mode": "affine"
646
+ },
647
+ "language_model.model.layers.7.mlp.down_proj": {
648
+ "bits": 4,
649
+ "group_size": 64,
650
+ "mode": "affine"
651
+ },
652
+ "language_model.model.layers.7.mlp.gate_proj": {
653
+ "bits": 4,
654
+ "group_size": 64,
655
+ "mode": "affine"
656
+ },
657
+ "language_model.model.layers.7.self_attn.o_proj": {
658
+ "bits": 4,
659
+ "group_size": 64,
660
+ "mode": "affine"
661
+ },
662
+ "language_model.model.layers.7.self_attn.v_proj": {
663
+ "bits": 4,
664
+ "group_size": 64,
665
+ "mode": "affine"
666
+ },
667
+ "language_model.model.layers.7.self_attn.k_proj": {
668
+ "bits": 4,
669
+ "group_size": 64,
670
+ "mode": "affine"
671
+ },
672
+ "language_model.model.layers.7.self_attn.q_proj": {
673
+ "bits": 4,
674
+ "group_size": 64,
675
+ "mode": "affine"
676
+ },
677
+ "language_model.model.layers.6.mlp.up_proj": {
678
+ "bits": 4,
679
+ "group_size": 64,
680
+ "mode": "affine"
681
+ },
682
+ "language_model.model.layers.6.mlp.down_proj": {
683
+ "bits": 4,
684
+ "group_size": 64,
685
+ "mode": "affine"
686
+ },
687
+ "language_model.model.layers.6.mlp.gate_proj": {
688
+ "bits": 4,
689
+ "group_size": 64,
690
+ "mode": "affine"
691
+ },
692
+ "language_model.model.layers.6.linear_attn.out_proj": {
693
+ "bits": 4,
694
+ "group_size": 64,
695
+ "mode": "affine"
696
+ },
697
+ "language_model.model.layers.6.linear_attn.in_proj_a": {
698
+ "bits": 4,
699
+ "group_size": 64,
700
+ "mode": "affine"
701
+ },
702
+ "language_model.model.layers.6.linear_attn.in_proj_b": {
703
+ "bits": 4,
704
+ "group_size": 64,
705
+ "mode": "affine"
706
+ },
707
+ "language_model.model.layers.6.linear_attn.in_proj_z": {
708
+ "bits": 4,
709
+ "group_size": 64,
710
+ "mode": "affine"
711
+ },
712
+ "language_model.model.layers.6.linear_attn.in_proj_qkv": {
713
+ "bits": 4,
714
+ "group_size": 64,
715
+ "mode": "affine"
716
+ },
717
+ "language_model.model.layers.5.mlp.up_proj": {
718
+ "bits": 4,
719
+ "group_size": 64,
720
+ "mode": "affine"
721
+ },
722
+ "language_model.model.layers.5.mlp.down_proj": {
723
+ "bits": 4,
724
+ "group_size": 64,
725
+ "mode": "affine"
726
+ },
727
+ "language_model.model.layers.5.mlp.gate_proj": {
728
+ "bits": 4,
729
+ "group_size": 64,
730
+ "mode": "affine"
731
+ },
732
+ "language_model.model.layers.5.linear_attn.out_proj": {
733
+ "bits": 4,
734
+ "group_size": 64,
735
+ "mode": "affine"
736
+ },
737
+ "language_model.model.layers.5.linear_attn.in_proj_a": {
738
+ "bits": 4,
739
+ "group_size": 64,
740
+ "mode": "affine"
741
+ },
742
+ "language_model.model.layers.5.linear_attn.in_proj_b": {
743
+ "bits": 4,
744
+ "group_size": 64,
745
+ "mode": "affine"
746
+ },
747
+ "language_model.model.layers.5.linear_attn.in_proj_z": {
748
+ "bits": 4,
749
+ "group_size": 64,
750
+ "mode": "affine"
751
+ },
752
+ "language_model.model.layers.5.linear_attn.in_proj_qkv": {
753
+ "bits": 4,
754
+ "group_size": 64,
755
+ "mode": "affine"
756
+ },
757
+ "language_model.model.layers.4.mlp.up_proj": {
758
+ "bits": 4,
759
+ "group_size": 64,
760
+ "mode": "affine"
761
+ },
762
+ "language_model.model.layers.4.mlp.down_proj": {
763
+ "bits": 4,
764
+ "group_size": 64,
765
+ "mode": "affine"
766
+ },
767
+ "language_model.model.layers.4.mlp.gate_proj": {
768
+ "bits": 4,
769
+ "group_size": 64,
770
+ "mode": "affine"
771
+ },
772
+ "language_model.model.layers.4.linear_attn.out_proj": {
773
+ "bits": 4,
774
+ "group_size": 64,
775
+ "mode": "affine"
776
+ },
777
+ "language_model.model.layers.4.linear_attn.in_proj_a": {
778
+ "bits": 4,
779
+ "group_size": 64,
780
+ "mode": "affine"
781
+ },
782
+ "language_model.model.layers.4.linear_attn.in_proj_b": {
783
+ "bits": 4,
784
+ "group_size": 64,
785
+ "mode": "affine"
786
+ },
787
+ "language_model.model.layers.4.linear_attn.in_proj_z": {
788
+ "bits": 4,
789
+ "group_size": 64,
790
+ "mode": "affine"
791
+ },
792
+ "language_model.model.layers.4.linear_attn.in_proj_qkv": {
793
+ "bits": 4,
794
+ "group_size": 64,
795
+ "mode": "affine"
796
+ },
797
+ "language_model.model.layers.3.mlp.up_proj": {
798
+ "bits": 4,
799
+ "group_size": 64,
800
+ "mode": "affine"
801
+ },
802
+ "language_model.model.layers.3.mlp.down_proj": {
803
+ "bits": 4,
804
+ "group_size": 64,
805
+ "mode": "affine"
806
+ },
807
+ "language_model.model.layers.3.mlp.gate_proj": {
808
+ "bits": 4,
809
+ "group_size": 64,
810
+ "mode": "affine"
811
+ },
812
+ "language_model.model.layers.3.self_attn.o_proj": {
813
+ "bits": 4,
814
+ "group_size": 64,
815
+ "mode": "affine"
816
+ },
817
+ "language_model.model.layers.3.self_attn.v_proj": {
818
+ "bits": 4,
819
+ "group_size": 64,
820
+ "mode": "affine"
821
+ },
822
+ "language_model.model.layers.3.self_attn.k_proj": {
823
+ "bits": 4,
824
+ "group_size": 64,
825
+ "mode": "affine"
826
+ },
827
+ "language_model.model.layers.3.self_attn.q_proj": {
828
+ "bits": 4,
829
+ "group_size": 64,
830
+ "mode": "affine"
831
+ },
832
+ "language_model.model.layers.2.mlp.up_proj": {
833
+ "bits": 4,
834
+ "group_size": 64,
835
+ "mode": "affine"
836
+ },
837
+ "language_model.model.layers.2.mlp.down_proj": {
838
+ "bits": 4,
839
+ "group_size": 64,
840
+ "mode": "affine"
841
+ },
842
+ "language_model.model.layers.2.mlp.gate_proj": {
843
+ "bits": 4,
844
+ "group_size": 64,
845
+ "mode": "affine"
846
+ },
847
+ "language_model.model.layers.2.linear_attn.out_proj": {
848
+ "bits": 4,
849
+ "group_size": 64,
850
+ "mode": "affine"
851
+ },
852
+ "language_model.model.layers.2.linear_attn.in_proj_a": {
853
+ "bits": 4,
854
+ "group_size": 64,
855
+ "mode": "affine"
856
+ },
857
+ "language_model.model.layers.2.linear_attn.in_proj_b": {
858
+ "bits": 4,
859
+ "group_size": 64,
860
+ "mode": "affine"
861
+ },
862
+ "language_model.model.layers.2.linear_attn.in_proj_z": {
863
+ "bits": 4,
864
+ "group_size": 64,
865
+ "mode": "affine"
866
+ },
867
+ "language_model.model.layers.2.linear_attn.in_proj_qkv": {
868
+ "bits": 4,
869
+ "group_size": 64,
870
+ "mode": "affine"
871
+ },
872
+ "language_model.model.layers.1.mlp.up_proj": {
873
+ "bits": 4,
874
+ "group_size": 64,
875
+ "mode": "affine"
876
+ },
877
+ "language_model.model.layers.1.mlp.down_proj": {
878
+ "bits": 4,
879
+ "group_size": 64,
880
+ "mode": "affine"
881
+ },
882
+ "language_model.model.layers.1.mlp.gate_proj": {
883
+ "bits": 4,
884
+ "group_size": 64,
885
+ "mode": "affine"
886
+ },
887
+ "language_model.model.layers.1.linear_attn.out_proj": {
888
+ "bits": 4,
889
+ "group_size": 64,
890
+ "mode": "affine"
891
+ },
892
+ "language_model.model.layers.1.linear_attn.in_proj_a": {
893
+ "bits": 4,
894
+ "group_size": 64,
895
+ "mode": "affine"
896
+ },
897
+ "language_model.model.layers.1.linear_attn.in_proj_b": {
898
+ "bits": 4,
899
+ "group_size": 64,
900
+ "mode": "affine"
901
+ },
902
+ "language_model.model.layers.1.linear_attn.in_proj_z": {
903
+ "bits": 4,
904
+ "group_size": 64,
905
+ "mode": "affine"
906
+ },
907
+ "language_model.model.layers.1.linear_attn.in_proj_qkv": {
908
+ "bits": 4,
909
+ "group_size": 64,
910
+ "mode": "affine"
911
+ },
912
+ "language_model.model.layers.0.mlp.up_proj": {
913
+ "bits": 4,
914
+ "group_size": 64,
915
+ "mode": "affine"
916
+ },
917
+ "language_model.model.layers.0.mlp.down_proj": {
918
+ "bits": 4,
919
+ "group_size": 64,
920
+ "mode": "affine"
921
+ },
922
+ "language_model.model.layers.0.mlp.gate_proj": {
923
+ "bits": 4,
924
+ "group_size": 64,
925
+ "mode": "affine"
926
+ },
927
+ "language_model.model.layers.0.linear_attn.out_proj": {
928
+ "bits": 4,
929
+ "group_size": 64,
930
+ "mode": "affine"
931
+ },
932
+ "language_model.model.layers.0.linear_attn.in_proj_a": {
933
+ "bits": 4,
934
+ "group_size": 64,
935
+ "mode": "affine"
936
+ },
937
+ "language_model.model.layers.0.linear_attn.in_proj_b": {
938
+ "bits": 4,
939
+ "group_size": 64,
940
+ "mode": "affine"
941
+ },
942
+ "language_model.model.layers.0.linear_attn.in_proj_z": {
943
+ "bits": 4,
944
+ "group_size": 64,
945
+ "mode": "affine"
946
+ },
947
+ "language_model.model.layers.0.linear_attn.in_proj_qkv": {
948
+ "bits": 4,
949
+ "group_size": 64,
950
+ "mode": "affine"
951
+ },
952
+ "language_model.model.embed_tokens": {
953
+ "bits": 4,
954
+ "group_size": 64,
955
+ "mode": "affine"
956
+ }
957
+ },
958
+ "requires_unsloth_mlx_runtime_quantization": false,
959
+ "unsloth_mlx_lora_module_paths": [
960
+ "language_model.model.layers.23.mlp.up_proj",
961
+ "language_model.model.layers.23.mlp.down_proj",
962
+ "language_model.model.layers.23.mlp.gate_proj",
963
+ "language_model.model.layers.23.self_attn.o_proj",
964
+ "language_model.model.layers.23.self_attn.v_proj",
965
+ "language_model.model.layers.23.self_attn.k_proj",
966
+ "language_model.model.layers.23.self_attn.q_proj",
967
+ "language_model.model.layers.22.mlp.up_proj",
968
+ "language_model.model.layers.22.mlp.down_proj",
969
+ "language_model.model.layers.22.mlp.gate_proj",
970
+ "language_model.model.layers.22.linear_attn.out_proj",
971
+ "language_model.model.layers.21.mlp.up_proj",
972
+ "language_model.model.layers.21.mlp.down_proj",
973
+ "language_model.model.layers.21.mlp.gate_proj",
974
+ "language_model.model.layers.21.linear_attn.out_proj",
975
+ "language_model.model.layers.20.mlp.up_proj",
976
+ "language_model.model.layers.20.mlp.down_proj",
977
+ "language_model.model.layers.20.mlp.gate_proj",
978
+ "language_model.model.layers.20.linear_attn.out_proj",
979
+ "language_model.model.layers.19.mlp.up_proj",
980
+ "language_model.model.layers.19.mlp.down_proj",
981
+ "language_model.model.layers.19.mlp.gate_proj",
982
+ "language_model.model.layers.19.self_attn.o_proj",
983
+ "language_model.model.layers.19.self_attn.v_proj",
984
+ "language_model.model.layers.19.self_attn.k_proj",
985
+ "language_model.model.layers.19.self_attn.q_proj",
986
+ "language_model.model.layers.18.mlp.up_proj",
987
+ "language_model.model.layers.18.mlp.down_proj",
988
+ "language_model.model.layers.18.mlp.gate_proj",
989
+ "language_model.model.layers.18.linear_attn.out_proj",
990
+ "language_model.model.layers.17.mlp.up_proj",
991
+ "language_model.model.layers.17.mlp.down_proj",
992
+ "language_model.model.layers.17.mlp.gate_proj",
993
+ "language_model.model.layers.17.linear_attn.out_proj",
994
+ "language_model.model.layers.16.mlp.up_proj",
995
+ "language_model.model.layers.16.mlp.down_proj",
996
+ "language_model.model.layers.16.mlp.gate_proj",
997
+ "language_model.model.layers.16.linear_attn.out_proj",
998
+ "language_model.model.layers.15.mlp.up_proj",
999
+ "language_model.model.layers.15.mlp.down_proj",
1000
+ "language_model.model.layers.15.mlp.gate_proj",
1001
+ "language_model.model.layers.15.self_attn.o_proj",
1002
+ "language_model.model.layers.15.self_attn.v_proj",
1003
+ "language_model.model.layers.15.self_attn.k_proj",
1004
+ "language_model.model.layers.15.self_attn.q_proj",
1005
+ "language_model.model.layers.14.mlp.up_proj",
1006
+ "language_model.model.layers.14.mlp.down_proj",
1007
+ "language_model.model.layers.14.mlp.gate_proj",
1008
+ "language_model.model.layers.14.linear_attn.out_proj",
1009
+ "language_model.model.layers.13.mlp.up_proj",
1010
+ "language_model.model.layers.13.mlp.down_proj",
1011
+ "language_model.model.layers.13.mlp.gate_proj",
1012
+ "language_model.model.layers.13.linear_attn.out_proj",
1013
+ "language_model.model.layers.12.mlp.up_proj",
1014
+ "language_model.model.layers.12.mlp.down_proj",
1015
+ "language_model.model.layers.12.mlp.gate_proj",
1016
+ "language_model.model.layers.12.linear_attn.out_proj",
1017
+ "language_model.model.layers.11.mlp.up_proj",
1018
+ "language_model.model.layers.11.mlp.down_proj",
1019
+ "language_model.model.layers.11.mlp.gate_proj",
1020
+ "language_model.model.layers.11.self_attn.o_proj",
1021
+ "language_model.model.layers.11.self_attn.v_proj",
1022
+ "language_model.model.layers.11.self_attn.k_proj",
1023
+ "language_model.model.layers.11.self_attn.q_proj",
1024
+ "language_model.model.layers.10.mlp.up_proj",
1025
+ "language_model.model.layers.10.mlp.down_proj",
1026
+ "language_model.model.layers.10.mlp.gate_proj",
1027
+ "language_model.model.layers.10.linear_attn.out_proj",
1028
+ "language_model.model.layers.9.mlp.up_proj",
1029
+ "language_model.model.layers.9.mlp.down_proj",
1030
+ "language_model.model.layers.9.mlp.gate_proj",
1031
+ "language_model.model.layers.9.linear_attn.out_proj",
1032
+ "language_model.model.layers.8.mlp.up_proj",
1033
+ "language_model.model.layers.8.mlp.down_proj",
1034
+ "language_model.model.layers.8.mlp.gate_proj",
1035
+ "language_model.model.layers.8.linear_attn.out_proj",
1036
+ "language_model.model.layers.7.mlp.up_proj",
1037
+ "language_model.model.layers.7.mlp.down_proj",
1038
+ "language_model.model.layers.7.mlp.gate_proj",
1039
+ "language_model.model.layers.7.self_attn.o_proj",
1040
+ "language_model.model.layers.7.self_attn.v_proj",
1041
+ "language_model.model.layers.7.self_attn.k_proj",
1042
+ "language_model.model.layers.7.self_attn.q_proj",
1043
+ "language_model.model.layers.6.mlp.up_proj",
1044
+ "language_model.model.layers.6.mlp.down_proj",
1045
+ "language_model.model.layers.6.mlp.gate_proj",
1046
+ "language_model.model.layers.6.linear_attn.out_proj",
1047
+ "language_model.model.layers.5.mlp.up_proj",
1048
+ "language_model.model.layers.5.mlp.down_proj",
1049
+ "language_model.model.layers.5.mlp.gate_proj",
1050
+ "language_model.model.layers.5.linear_attn.out_proj",
1051
+ "language_model.model.layers.4.mlp.up_proj",
1052
+ "language_model.model.layers.4.mlp.down_proj",
1053
+ "language_model.model.layers.4.mlp.gate_proj",
1054
+ "language_model.model.layers.4.linear_attn.out_proj",
1055
+ "language_model.model.layers.3.mlp.up_proj",
1056
+ "language_model.model.layers.3.mlp.down_proj",
1057
+ "language_model.model.layers.3.mlp.gate_proj",
1058
+ "language_model.model.layers.3.self_attn.o_proj",
1059
+ "language_model.model.layers.3.self_attn.v_proj",
1060
+ "language_model.model.layers.3.self_attn.k_proj",
1061
+ "language_model.model.layers.3.self_attn.q_proj",
1062
+ "language_model.model.layers.2.mlp.up_proj",
1063
+ "language_model.model.layers.2.mlp.down_proj",
1064
+ "language_model.model.layers.2.mlp.gate_proj",
1065
+ "language_model.model.layers.2.linear_attn.out_proj",
1066
+ "language_model.model.layers.1.mlp.up_proj",
1067
+ "language_model.model.layers.1.mlp.down_proj",
1068
+ "language_model.model.layers.1.mlp.gate_proj",
1069
+ "language_model.model.layers.1.linear_attn.out_proj",
1070
+ "language_model.model.layers.0.mlp.up_proj",
1071
+ "language_model.model.layers.0.mlp.down_proj",
1072
+ "language_model.model.layers.0.mlp.gate_proj",
1073
+ "language_model.model.layers.0.linear_attn.out_proj"
1074
+ ]
1075
+ }
adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6609861b4d83647fd005e9b593f9e1aabd1f5252b8c9f8a672e2938071bdaa67
3
+ size 231342498
chat_template.jinja ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count, is_system_content=false) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- elif content is iterable and content is not mapping %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if is_system_content %}
10
+ {{- raise_exception('System message cannot contain images.') }}
11
+ {%- endif %}
12
+ {%- if do_vision_count %}
13
+ {%- set image_count.value = image_count.value + 1 %}
14
+ {%- endif %}
15
+ {%- if add_vision_id %}
16
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
17
+ {%- endif %}
18
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
19
+ {%- elif 'video' in item or item.type == 'video' %}
20
+ {%- if is_system_content %}
21
+ {{- raise_exception('System message cannot contain videos.') }}
22
+ {%- endif %}
23
+ {%- if do_vision_count %}
24
+ {%- set video_count.value = video_count.value + 1 %}
25
+ {%- endif %}
26
+ {%- if add_vision_id %}
27
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
28
+ {%- endif %}
29
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
30
+ {%- elif 'text' in item %}
31
+ {{- item.text }}
32
+ {%- else %}
33
+ {{- raise_exception('Unexpected item type in content.') }}
34
+ {%- endif %}
35
+ {%- endfor %}
36
+ {%- elif content is none or content is undefined %}
37
+ {{- '' }}
38
+ {%- else %}
39
+ {{- raise_exception('Unexpected content type.') }}
40
+ {%- endif %}
41
+ {%- endmacro %}
42
+ {%- if not messages %}
43
+ {{- raise_exception('No messages provided.') }}
44
+ {%- endif %}
45
+ {%- if tools and tools is iterable and tools is not mapping %}
46
+ {{- '<|im_start|>system\n' }}
47
+ {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
48
+ {%- for tool in tools %}
49
+ {{- "\n" }}
50
+ {{- tool | tojson }}
51
+ {%- endfor %}
52
+ {{- "\n</tools>" }}
53
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
54
+ {%- if messages[0].role == 'system' %}
55
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
56
+ {%- if content %}
57
+ {{- '\n\n' + content }}
58
+ {%- endif %}
59
+ {%- endif %}
60
+ {{- '<|im_end|>\n' }}
61
+ {%- else %}
62
+ {%- if messages[0].role == 'system' %}
63
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
64
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
68
+ {%- for message in messages[::-1] %}
69
+ {%- set index = (messages|length - 1) - loop.index0 %}
70
+ {%- if ns.multi_step_tool and message.role == "user" %}
71
+ {%- set content = render_content(message.content, false)|trim %}
72
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
73
+ {%- set ns.multi_step_tool = false %}
74
+ {%- set ns.last_query_index = index %}
75
+ {%- endif %}
76
+ {%- endif %}
77
+ {%- endfor %}
78
+ {%- if ns.multi_step_tool %}
79
+ {{- raise_exception('No user query found in messages.') }}
80
+ {%- endif %}
81
+ {%- for message in messages %}
82
+ {%- set content = render_content(message.content, true)|trim %}
83
+ {%- if message.role == "system" %}
84
+ {%- if not loop.first %}
85
+ {{- raise_exception('System message must be at the beginning.') }}
86
+ {%- endif %}
87
+ {%- elif message.role == "user" %}
88
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
89
+ {%- elif message.role == "assistant" %}
90
+ {%- set reasoning_content = '' %}
91
+ {%- if message.reasoning_content is string %}
92
+ {%- set reasoning_content = message.reasoning_content %}
93
+ {%- else %}
94
+ {%- if '</think>' in content %}
95
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
96
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
97
+ {%- endif %}
98
+ {%- endif %}
99
+ {%- set reasoning_content = reasoning_content|trim %}
100
+ {%- if loop.index0 > ns.last_query_index %}
101
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
102
+ {%- else %}
103
+ {{- '<|im_start|>' + message.role + '\n' + content }}
104
+ {%- endif %}
105
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
106
+ {%- for tool_call in message.tool_calls %}
107
+ {%- if tool_call.function is defined %}
108
+ {%- set tool_call = tool_call.function %}
109
+ {%- endif %}
110
+ {%- if loop.first %}
111
+ {%- if content|trim %}
112
+ {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
113
+ {%- else %}
114
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
115
+ {%- endif %}
116
+ {%- else %}
117
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
118
+ {%- endif %}
119
+ {%- if tool_call.arguments is defined %}
120
+ {%- for args_name, args_value in tool_call.arguments|items %}
121
+ {{- '<parameter=' + args_name + '>\n' }}
122
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
123
+ {{- args_value }}
124
+ {{- '\n</parameter>\n' }}
125
+ {%- endfor %}
126
+ {%- endif %}
127
+ {{- '</function>\n</tool_call>' }}
128
+ {%- endfor %}
129
+ {%- endif %}
130
+ {{- '<|im_end|>\n' }}
131
+ {%- elif message.role == "tool" %}
132
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
133
+ {{- '<|im_start|>user' }}
134
+ {%- endif %}
135
+ {{- '\n<tool_response>\n' }}
136
+ {{- content }}
137
+ {{- '\n</tool_response>' }}
138
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
139
+ {{- '<|im_end|>\n' }}
140
+ {%- elif loop.last %}
141
+ {{- '<|im_end|>\n' }}
142
+ {%- endif %}
143
+ {%- else %}
144
+ {{- raise_exception('Unexpected message role.') }}
145
+ {%- endif %}
146
+ {%- endfor %}
147
+ {%- if add_generation_prompt %}
148
+ {{- '<|im_start|>assistant\n' }}
149
+ {%- if enable_thinking is defined and enable_thinking is true %}
150
+ {{- '<think>\n' }}
151
+ {%- else %}
152
+ {{- '<think>\n\n</think>\n\n' }}
153
+ {%- endif %}
154
+ {%- endif %}
processor_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "do_convert_rgb": true,
4
+ "do_normalize": true,
5
+ "do_rescale": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "Qwen3VLImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "max_pixels": 16777216,
18
+ "merge_size": 2,
19
+ "min_pixels": 65536,
20
+ "patch_size": 16,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "temporal_patch_size": 2
23
+ },
24
+ "processor_class": "Qwen3VLProcessor",
25
+ "video_processor": {
26
+ "do_convert_rgb": true,
27
+ "do_normalize": true,
28
+ "do_rescale": true,
29
+ "fps": 2.0,
30
+ "image_mean": [
31
+ 0.5,
32
+ 0.5,
33
+ 0.5
34
+ ],
35
+ "image_std": [
36
+ 0.5,
37
+ 0.5,
38
+ 0.5
39
+ ],
40
+ "max_frames": 768,
41
+ "max_pixels": 25165824,
42
+ "merge_size": 2,
43
+ "min_frames": 4,
44
+ "min_pixels": 4096,
45
+ "patch_size": 16,
46
+ "rescale_factor": 0.00392156862745098,
47
+ "temporal_patch_size": 2,
48
+ "video_processor_type": "Qwen3VLVideoProcessor"
49
+ }
50
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
3
+ size 19989343
tokenizer_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "audio_bos_token": "<|audio_start|>",
4
+ "audio_eos_token": "<|audio_end|>",
5
+ "audio_token": "<|audio_pad|>",
6
+ "backend": "tokenizers",
7
+ "bos_token": null,
8
+ "clean_up_tokenization_spaces": false,
9
+ "eos_token": "<|im_end|>",
10
+ "errors": "replace",
11
+ "image_token": "<|image_pad|>",
12
+ "is_local": true,
13
+ "model_max_length": 262144,
14
+ "model_specific_special_tokens": {
15
+ "audio_bos_token": "<|audio_start|>",
16
+ "audio_eos_token": "<|audio_end|>",
17
+ "audio_token": "<|audio_pad|>",
18
+ "image_token": "<|image_pad|>",
19
+ "video_token": "<|video_pad|>",
20
+ "vision_bos_token": "<|vision_start|>",
21
+ "vision_eos_token": "<|vision_end|>"
22
+ },
23
+ "pad_token": "<|endoftext|>",
24
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
25
+ "processor_class": "Qwen3VLProcessor",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "TokenizersBackend",
28
+ "unk_token": null,
29
+ "video_token": "<|video_pad|>",
30
+ "vision_bos_token": "<|vision_start|>",
31
+ "vision_eos_token": "<|vision_end|>"
32
+ }