Text Generation
Transformers
Safetensors
PyTorch
nvidia
nemotron-h
conversational

Fix: Support loading dt_bias and other trained-model parameters in modeling_nemotron_h.py

#7
Files changed (1) hide show
  1. modeling_nemotron_h.py +2 -0
modeling_nemotron_h.py CHANGED
@@ -1114,6 +1114,8 @@ class NemotronHPreTrainedModel(PreTrainedModel):
1114
  def _init_weights(self, module):
1115
  """Initialize the weights."""
1116
  if isinstance(module, NemotronHMamba2Mixer):
 
 
1117
  module.A_log._no_weight_decay = True
1118
  module.D._no_weight_decay = True
1119
 
 
1114
  def _init_weights(self, module):
1115
  """Initialize the weights."""
1116
  if isinstance(module, NemotronHMamba2Mixer):
1117
+ if getattr(module.dt_bias, "_is_hf_initialized", False):
1118
+ return
1119
  module.A_log._no_weight_decay = True
1120
  module.D._no_weight_decay = True
1121