Firworks commited on
Commit
f164a87
·
verified ·
1 Parent(s): 386025b

Add NVFP4 quantized checkpoint

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - camel-ai/loong
4
+ base_model:
5
+ - khazarai/Chemistry-R1
6
+ tags:
7
+ - nvfp4
8
+ - fp4
9
+ - quantized
10
+ ---
11
+ # Chemistry-R1-nvfp4
12
+
13
+ **Format:** NVFP4 — weights & activations quantized to FP4 with dual scaling.
14
+ **Base model:** `khazarai/Chemistry-R1`
15
+ **How it was made:** One-shot calibration with LLM Compressor (NVFP4 recipe), long-seq calibration (256 samples of 4096 length) with camel-ai/loong.
16
+
17
+ > Notes: Keep `lm_head` in high precision; calibrate on long, domain-relevant sequences.
18
+
19
+ Check the original model card for information about this model.
20
+
21
+ # Running the model with VLLM in Docker
22
+ ```sh
23
+ sudo docker run --runtime nvidia --gpus all -p 8000:8000 --ipc=host vllm/vllm-openai:nightly --model Firworks/Chemistry-R1-nvfp4 --dtype auto --max-model-len 32768
24
+ ```
25
+
26
+ # Running the model on the DGX Spark with VLLM in Docker
27
+ ```sh
28
+ sudo docker run --gpus all --network host --ipc=host nvcr.io/nvidia/vllm:26.02-py3 vllm serve Firworks/Chemistry-R1-nvfp4 --dtype auto --max-model-len 32768
29
+ ```
30
+
31
+ This was tested on a DGX Spark (GB10 Grace Blackwell Superchip, 128GB unified memory).
32
+
33
+ If there are other models you're interested in seeing quantized to NVFP4 for use on the DGX Spark, or other modern Blackwell (or newer) cards let me know. I'm trying to make more NVFP4 models available to allow more people to try them out.
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
chat_template.jinja ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for forward_message in messages %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- set message = messages[index] %}
21
+ {%- set current_content = message.content if message.content is defined and message.content is not none else '' %}
22
+ {%- set tool_start = '<tool_response>' %}
23
+ {%- set tool_start_length = tool_start|length %}
24
+ {%- set start_of_message = current_content[:tool_start_length] %}
25
+ {%- set tool_end = '</tool_response>' %}
26
+ {%- set tool_end_length = tool_end|length %}
27
+ {%- set start_pos = (current_content|length) - tool_end_length %}
28
+ {%- if start_pos < 0 %}
29
+ {%- set start_pos = 0 %}
30
+ {%- endif %}
31
+ {%- set end_of_message = current_content[start_pos:] %}
32
+ {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
33
+ {%- set ns.multi_step_tool = false %}
34
+ {%- set ns.last_query_index = index %}
35
+ {%- endif %}
36
+ {%- endfor %}
37
+ {%- for message in messages %}
38
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
39
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
40
+ {%- elif message.role == "assistant" %}
41
+ {%- set m_content = message.content if message.content is defined and message.content is not none else '' %}
42
+ {%- set content = m_content %}
43
+ {%- set reasoning_content = '' %}
44
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
45
+ {%- set reasoning_content = message.reasoning_content %}
46
+ {%- else %}
47
+ {%- if '</think>' in m_content %}
48
+ {%- set content = (m_content.split('</think>')|last).lstrip('\n') %}
49
+ {%- set reasoning_content = (m_content.split('</think>')|first).rstrip('\n') %}
50
+ {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
51
+ {%- endif %}
52
+ {%- endif %}
53
+ {%- if loop.index0 > ns.last_query_index %}
54
+ {%- if loop.last or (not loop.last and (not reasoning_content.strip() == '')) %}
55
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
56
+ {%- else %}
57
+ {{- '<|im_start|>' + message.role + '\n' + content }}
58
+ {%- endif %}
59
+ {%- else %}
60
+ {{- '<|im_start|>' + message.role + '\n' + content }}
61
+ {%- endif %}
62
+ {%- if message.tool_calls %}
63
+ {%- for tool_call in message.tool_calls %}
64
+ {%- if (loop.first and content) or (not loop.first) %}
65
+ {{- '\n' }}
66
+ {%- endif %}
67
+ {%- if tool_call.function %}
68
+ {%- set tool_call = tool_call.function %}
69
+ {%- endif %}
70
+ {{- '<tool_call>\n{"name": "' }}
71
+ {{- tool_call.name }}
72
+ {{- '", "arguments": ' }}
73
+ {%- if tool_call.arguments is string %}
74
+ {{- tool_call.arguments }}
75
+ {%- else %}
76
+ {{- tool_call.arguments | tojson }}
77
+ {%- endif %}
78
+ {{- '}\n</tool_call>' }}
79
+ {%- endfor %}
80
+ {%- endif %}
81
+ {{- '<|im_end|>\n' }}
82
+ {%- elif message.role == "tool" %}
83
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
84
+ {{- '<|im_start|>user' }}
85
+ {%- endif %}
86
+ {{- '\n<tool_response>\n' }}
87
+ {{- message.content }}
88
+ {{- '\n</tool_response>' }}
89
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
90
+ {{- '<|im_end|>\n' }}
91
+ {%- endif %}
92
+ {%- endif %}
93
+ {%- endfor %}
94
+ {%- if add_generation_prompt %}
95
+ {{- '<|im_start|>assistant\n' }}
96
+ {%- if enable_thinking is defined and enable_thinking is false %}
97
+ {{- '<think>\n\n</think>\n\n' }}
98
+ {%- endif %}
99
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "dtype": "bfloat16",
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention"
43
+ ],
44
+ "max_position_embeddings": 40960,
45
+ "max_window_layers": 28,
46
+ "model_type": "qwen3",
47
+ "num_attention_heads": 16,
48
+ "num_hidden_layers": 28,
49
+ "num_key_value_heads": 8,
50
+ "pad_token_id": 151654,
51
+ "quantization_config": {
52
+ "config_groups": {
53
+ "group_0": {
54
+ "format": "nvfp4-pack-quantized",
55
+ "input_activations": {
56
+ "actorder": null,
57
+ "block_structure": null,
58
+ "dynamic": "local",
59
+ "group_size": 16,
60
+ "num_bits": 4,
61
+ "observer": "static_minmax",
62
+ "observer_kwargs": {},
63
+ "scale_dtype": "torch.float8_e4m3fn",
64
+ "strategy": "tensor_group",
65
+ "symmetric": true,
66
+ "type": "float",
67
+ "zp_dtype": null
68
+ },
69
+ "output_activations": null,
70
+ "targets": [
71
+ "Linear",
72
+ "MoELinear"
73
+ ],
74
+ "weights": {
75
+ "actorder": null,
76
+ "block_structure": null,
77
+ "dynamic": false,
78
+ "group_size": 16,
79
+ "num_bits": 4,
80
+ "observer": "memoryless_minmax",
81
+ "observer_kwargs": {},
82
+ "scale_dtype": "torch.float8_e4m3fn",
83
+ "strategy": "tensor_group",
84
+ "symmetric": true,
85
+ "type": "float",
86
+ "zp_dtype": null
87
+ }
88
+ }
89
+ },
90
+ "format": "nvfp4-pack-quantized",
91
+ "global_compression_ratio": null,
92
+ "ignore": [
93
+ "lm_head"
94
+ ],
95
+ "kv_cache_scheme": null,
96
+ "quant_method": "compressed-tensors",
97
+ "quantization_status": "compressed",
98
+ "sparsity_config": {},
99
+ "transform_config": {},
100
+ "version": "0.14.1.a20260326"
101
+ },
102
+ "rms_norm_eps": 1e-06,
103
+ "rope_parameters": {
104
+ "rope_theta": 1000000,
105
+ "rope_type": "default"
106
+ },
107
+ "rope_scaling": null,
108
+ "rope_theta": 10000.0,
109
+ "sliding_window": null,
110
+ "tie_word_embeddings": true,
111
+ "transformers_version": "4.57.6",
112
+ "unsloth_fixed": true,
113
+ "use_cache": true,
114
+ "use_sliding_window": false,
115
+ "vocab_size": 151936
116
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "max_length": 40960,
9
+ "pad_token_id": 151654,
10
+ "temperature": 0.6,
11
+ "top_k": 20,
12
+ "top_p": 0.95,
13
+ "transformers_version": "4.57.6"
14
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40a12243248a6d4c4b7dd59eb2df9abd95553745d890155ad2269b0396a63527
3
+ size 559125008
recipe.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ default_stage:
2
+ default_modifiers:
3
+ QuantizationModifier:
4
+ targets: [Linear, MoELinear]
5
+ ignore: [lm_head, 're:visual.*', 're:.*vision_tower.*', 're:.*video_tower.*', 're:.*audio_tower.*',
6
+ 're:.*multi_modal_projector.*']
7
+ scheme: NVFP4
8
+ bypass_divisibility_checks: false
special_tokens_map.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "<|im_end|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<|vision_pad|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ }
16
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9015c7b9bcc5ac57906c55460048b82b9dfcdef34eb1f781c079da50de99f3d0
3
+ size 11422749
tokenizer_config.json ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|object_ref_start|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "<|object_ref_end|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "<|box_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "<|box_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "<|quad_start|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "<|quad_end|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "<|vision_start|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "<|vision_end|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "<|vision_pad|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "<|image_pad|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "<|video_pad|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "151657": {
117
+ "content": "<tool_call>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "151658": {
125
+ "content": "</tool_call>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "151659": {
133
+ "content": "<|fim_prefix|>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "151660": {
141
+ "content": "<|fim_middle|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "151661": {
149
+ "content": "<|fim_suffix|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "151662": {
157
+ "content": "<|fim_pad|>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "151663": {
165
+ "content": "<|repo_name|>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "151664": {
173
+ "content": "<|file_sep|>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "151665": {
181
+ "content": "<tool_response>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "151666": {
189
+ "content": "</tool_response>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "151667": {
197
+ "content": "<think>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
+ "151668": {
205
+ "content": "</think>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": false
211
+ }
212
+ },
213
+ "backend": "tokenizers",
214
+ "bos_token": null,
215
+ "clean_up_tokenization_spaces": false,
216
+ "eos_token": "<|im_end|>",
217
+ "errors": "replace",
218
+ "extra_special_tokens": {},
219
+ "is_local": false,
220
+ "model_max_length": 40960,
221
+ "model_specific_special_tokens": {},
222
+ "pad_token": "<|vision_pad|>",
223
+ "padding_side": "left",
224
+ "split_special_tokens": false,
225
+ "tokenizer_class": "Qwen2Tokenizer",
226
+ "unk_token": null
227
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff