transformers 4.57.3__py3-none-any.whl → 4.57.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. transformers/__init__.py +1 -1
  2. transformers/generation/utils.py +4 -2
  3. transformers/models/apertus/modeling_apertus.py +1 -1
  4. transformers/models/arcee/modeling_arcee.py +1 -1
  5. transformers/models/aria/modeling_aria.py +1 -1
  6. transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +1 -1
  7. transformers/models/aya_vision/modeling_aya_vision.py +1 -1
  8. transformers/models/aya_vision/modular_aya_vision.py +1 -1
  9. transformers/models/bitnet/modeling_bitnet.py +1 -1
  10. transformers/models/blip_2/modeling_blip_2.py +1 -1
  11. transformers/models/blt/modeling_blt.py +2 -2
  12. transformers/models/blt/modular_blt.py +2 -2
  13. transformers/models/cohere/modeling_cohere.py +1 -1
  14. transformers/models/cohere2/modeling_cohere2.py +1 -1
  15. transformers/models/cohere2_vision/modeling_cohere2_vision.py +2 -2
  16. transformers/models/cohere2_vision/modular_cohere2_vision.py +2 -2
  17. transformers/models/csm/modeling_csm.py +2 -2
  18. transformers/models/csm/modular_csm.py +2 -2
  19. transformers/models/deepseek_v2/modeling_deepseek_v2.py +1 -1
  20. transformers/models/deepseek_v3/modeling_deepseek_v3.py +1 -1
  21. transformers/models/diffllama/modeling_diffllama.py +1 -1
  22. transformers/models/dinov2/modeling_dinov2.py +1 -1
  23. transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +1 -1
  24. transformers/models/doge/modeling_doge.py +1 -1
  25. transformers/models/dots1/modeling_dots1.py +1 -1
  26. transformers/models/edgetam/modeling_edgetam.py +2 -2
  27. transformers/models/edgetam/modular_edgetam.py +1 -1
  28. transformers/models/efficientloftr/modeling_efficientloftr.py +1 -1
  29. transformers/models/emu3/modeling_emu3.py +1 -1
  30. transformers/models/eomt/modeling_eomt.py +1 -1
  31. transformers/models/eomt/modular_eomt.py +1 -1
  32. transformers/models/ernie4_5/modeling_ernie4_5.py +1 -1
  33. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +1 -1
  34. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +1 -1
  35. transformers/models/esm/modeling_esm.py +1 -1
  36. transformers/models/evolla/modeling_evolla.py +2 -2
  37. transformers/models/evolla/modular_evolla.py +2 -2
  38. transformers/models/exaone4/modeling_exaone4.py +1 -1
  39. transformers/models/exaone4/modular_exaone4.py +1 -1
  40. transformers/models/flex_olmo/modeling_flex_olmo.py +1 -1
  41. transformers/models/flex_olmo/modular_flex_olmo.py +1 -1
  42. transformers/models/gemma/modeling_gemma.py +1 -1
  43. transformers/models/gemma2/modeling_gemma2.py +1 -1
  44. transformers/models/gemma3/modeling_gemma3.py +1 -1
  45. transformers/models/glm/modeling_glm.py +1 -1
  46. transformers/models/glm4/modeling_glm4.py +1 -1
  47. transformers/models/glm4_moe/modeling_glm4_moe.py +1 -1
  48. transformers/models/glm4v/modeling_glm4v.py +1 -1
  49. transformers/models/glm4v/modular_glm4v.py +1 -1
  50. transformers/models/glm4v_moe/modeling_glm4v_moe.py +1 -1
  51. transformers/models/gpt_neox/modeling_gpt_neox.py +1 -1
  52. transformers/models/gpt_oss/modeling_gpt_oss.py +1 -1
  53. transformers/models/gpt_oss/modular_gpt_oss.py +1 -1
  54. transformers/models/granite/modeling_granite.py +1 -1
  55. transformers/models/helium/modeling_helium.py +1 -1
  56. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +1 -1
  57. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +1 -1
  58. transformers/models/idefics/modeling_idefics.py +1 -1
  59. transformers/models/instructblip/modeling_instructblip.py +1 -1
  60. transformers/models/instructblipvideo/modeling_instructblipvideo.py +1 -1
  61. transformers/models/lfm2/modeling_lfm2.py +1 -1
  62. transformers/models/llama/modeling_llama.py +1 -1
  63. transformers/models/llama4/modeling_llama4.py +1 -1
  64. transformers/models/longcat_flash/modeling_longcat_flash.py +1 -1
  65. transformers/models/minimax/modeling_minimax.py +1 -1
  66. transformers/models/minimax/modular_minimax.py +1 -1
  67. transformers/models/ministral/modeling_ministral.py +1 -1
  68. transformers/models/ministral/modular_ministral.py +1 -1
  69. transformers/models/mistral/modeling_mistral.py +1 -1
  70. transformers/models/mistral/modular_mistral.py +1 -1
  71. transformers/models/mixtral/modeling_mixtral.py +1 -1
  72. transformers/models/mllama/modeling_mllama.py +3 -3
  73. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +1 -1
  74. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +1 -1
  75. transformers/models/moonshine/modeling_moonshine.py +2 -2
  76. transformers/models/moonshine/modular_moonshine.py +2 -2
  77. transformers/models/olmo/modeling_olmo.py +1 -1
  78. transformers/models/olmo2/modeling_olmo2.py +1 -1
  79. transformers/models/olmo3/modeling_olmo3.py +1 -1
  80. transformers/models/parakeet/modeling_parakeet.py +1 -1
  81. transformers/models/parakeet/modular_parakeet.py +1 -1
  82. transformers/models/phi/modeling_phi.py +1 -1
  83. transformers/models/phi3/modeling_phi3.py +1 -1
  84. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +1 -1
  85. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +1 -1
  86. transformers/models/qwen2/modeling_qwen2.py +1 -1
  87. transformers/models/qwen2/modular_qwen2.py +1 -1
  88. transformers/models/qwen3/modeling_qwen3.py +1 -1
  89. transformers/models/qwen3_moe/modeling_qwen3_moe.py +1 -1
  90. transformers/models/qwen3_next/modeling_qwen3_next.py +1 -1
  91. transformers/models/qwen3_next/modular_qwen3_next.py +1 -1
  92. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +4 -4
  93. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +1 -1
  94. transformers/models/qwen3_vl/modeling_qwen3_vl.py +3 -3
  95. transformers/models/qwen3_vl/modular_qwen3_vl.py +3 -3
  96. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +3 -3
  97. transformers/models/sam/modeling_sam.py +1 -1
  98. transformers/models/sam2/modeling_sam2.py +3 -3
  99. transformers/models/sam2/modular_sam2.py +3 -3
  100. transformers/models/sam_hq/modeling_sam_hq.py +1 -1
  101. transformers/models/seed_oss/modeling_seed_oss.py +1 -1
  102. transformers/models/siglip/modeling_siglip.py +1 -1
  103. transformers/models/siglip2/modeling_siglip2.py +1 -1
  104. transformers/models/smollm3/modeling_smollm3.py +1 -1
  105. transformers/models/starcoder2/modeling_starcoder2.py +1 -1
  106. transformers/models/starcoder2/modular_starcoder2.py +1 -1
  107. transformers/models/t5gemma/modeling_t5gemma.py +2 -2
  108. transformers/models/t5gemma/modular_t5gemma.py +2 -2
  109. transformers/models/vaultgemma/modeling_vaultgemma.py +1 -1
  110. transformers/models/voxtral/modeling_voxtral.py +1 -1
  111. transformers/models/voxtral/modular_voxtral.py +1 -1
  112. transformers/tokenization_utils_base.py +6 -1
  113. transformers/utils/generic.py +3 -1
  114. {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/METADATA +1 -1
  115. {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/RECORD +119 -119
  116. {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/WHEEL +0 -0
  117. {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/entry_points.txt +0 -0
  118. {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/licenses/LICENSE +0 -0
  119. {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/top_level.txt +0 -0
@@ -150,7 +150,7 @@ class Starcoder2Model(MistralModel):
150
150
  self.norm = nn.LayerNorm(config.hidden_size, eps=config.norm_epsilon)
151
151
  self.embedding_dropout = config.embedding_dropout
152
152
 
153
- @check_model_inputs()
153
+ @check_model_inputs
154
154
  def forward(
155
155
  self,
156
156
  input_ids: Optional[torch.LongTensor] = None,
@@ -707,7 +707,7 @@ class T5GemmaEncoder(T5GemmaPreTrainedModel):
707
707
  # Initialize weights and apply final processing
708
708
  self.post_init()
709
709
 
710
- @check_model_inputs()
710
+ @check_model_inputs
711
711
  def forward(
712
712
  self,
713
713
  input_ids: Optional[torch.LongTensor] = None,
@@ -791,7 +791,7 @@ class T5GemmaDecoder(T5GemmaEncoder):
791
791
 
792
792
  self.post_init()
793
793
 
794
- @check_model_inputs()
794
+ @check_model_inputs
795
795
  def forward(
796
796
  self,
797
797
  input_ids: Optional[torch.LongTensor] = None,
@@ -559,7 +559,7 @@ class T5GemmaEncoder(T5GemmaPreTrainedModel):
559
559
  # Initialize weights and apply final processing
560
560
  self.post_init()
561
561
 
562
- @check_model_inputs()
562
+ @check_model_inputs
563
563
  def forward(
564
564
  self,
565
565
  input_ids: Optional[torch.LongTensor] = None,
@@ -643,7 +643,7 @@ class T5GemmaDecoder(T5GemmaEncoder):
643
643
 
644
644
  self.post_init()
645
645
 
646
- @check_model_inputs()
646
+ @check_model_inputs
647
647
  def forward(
648
648
  self,
649
649
  input_ids: Optional[torch.LongTensor] = None,
@@ -368,7 +368,7 @@ class VaultGemmaModel(VaultGemmaPreTrainedModel):
368
368
  # Initialize weights and apply final processing
369
369
  self.post_init()
370
370
 
371
- @check_model_inputs()
371
+ @check_model_inputs
372
372
  @auto_docstring
373
373
  def forward(
374
374
  self,
@@ -321,7 +321,7 @@ class VoxtralEncoder(VoxtralPreTrainedModel):
321
321
  def set_input_embeddings(self, value: nn.Module):
322
322
  self.conv1 = value
323
323
 
324
- @check_model_inputs()
324
+ @check_model_inputs
325
325
  def forward(
326
326
  self,
327
327
  input_features,
@@ -65,7 +65,7 @@ class VoxtralEncoder(Qwen2AudioEncoder):
65
65
  "hidden_states": VoxtralEncoderLayer,
66
66
  }
67
67
 
68
- @check_model_inputs()
68
+ @check_model_inputs
69
69
  def forward(
70
70
  self,
71
71
  input_features,
@@ -2435,7 +2435,12 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
2435
2435
  return True
2436
2436
  return False
2437
2437
 
2438
- if _is_local or is_base_mistral(pretrained_model_name_or_path):
2438
+ if is_offline_mode():
2439
+ _is_local = True
2440
+
2441
+ if pretrained_model_name_or_path is not None and (
2442
+ _is_local or (not _is_local and is_base_mistral(pretrained_model_name_or_path))
2443
+ ):
2439
2444
  _config_file = cached_file(
2440
2445
  pretrained_model_name_or_path,
2441
2446
  "config.json",
@@ -944,7 +944,7 @@ class OutputRecorder:
944
944
  class_name: Optional[str] = None
945
945
 
946
946
 
947
- def check_model_inputs(tie_last_hidden_states=True):
947
+ def check_model_inputs(func=None, *, tie_last_hidden_states=True):
948
948
  """
949
949
  Decorator to intercept specific layer outputs without using hooks.
950
950
  Compatible with torch.compile (Dynamo tracing).
@@ -1115,6 +1115,8 @@ def check_model_inputs(tie_last_hidden_states=True):
1115
1115
 
1116
1116
  return wrapper
1117
1117
 
1118
+ if func is not None:
1119
+ return wrapped_fn(func)
1118
1120
  return wrapped_fn
1119
1121
 
1120
1122
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: transformers
3
- Version: 4.57.3
3
+ Version: 4.57.4
4
4
  Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
5
5
  Home-page: https://github.com/huggingface/transformers
6
6
  Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)