transformers 4.57.3__py3-none-any.whl → 4.57.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +1 -1
- transformers/generation/utils.py +4 -2
- transformers/models/apertus/modeling_apertus.py +1 -1
- transformers/models/arcee/modeling_arcee.py +1 -1
- transformers/models/aria/modeling_aria.py +1 -1
- transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +1 -1
- transformers/models/aya_vision/modeling_aya_vision.py +1 -1
- transformers/models/aya_vision/modular_aya_vision.py +1 -1
- transformers/models/bitnet/modeling_bitnet.py +1 -1
- transformers/models/blip_2/modeling_blip_2.py +1 -1
- transformers/models/blt/modeling_blt.py +2 -2
- transformers/models/blt/modular_blt.py +2 -2
- transformers/models/cohere/modeling_cohere.py +1 -1
- transformers/models/cohere2/modeling_cohere2.py +1 -1
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +2 -2
- transformers/models/cohere2_vision/modular_cohere2_vision.py +2 -2
- transformers/models/csm/modeling_csm.py +2 -2
- transformers/models/csm/modular_csm.py +2 -2
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +1 -1
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +1 -1
- transformers/models/dinov2/modeling_dinov2.py +1 -1
- transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +1 -1
- transformers/models/doge/modeling_doge.py +1 -1
- transformers/models/dots1/modeling_dots1.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +2 -2
- transformers/models/edgetam/modular_edgetam.py +1 -1
- transformers/models/efficientloftr/modeling_efficientloftr.py +1 -1
- transformers/models/emu3/modeling_emu3.py +1 -1
- transformers/models/eomt/modeling_eomt.py +1 -1
- transformers/models/eomt/modular_eomt.py +1 -1
- transformers/models/ernie4_5/modeling_ernie4_5.py +1 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +1 -1
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +1 -1
- transformers/models/esm/modeling_esm.py +1 -1
- transformers/models/evolla/modeling_evolla.py +2 -2
- transformers/models/evolla/modular_evolla.py +2 -2
- transformers/models/exaone4/modeling_exaone4.py +1 -1
- transformers/models/exaone4/modular_exaone4.py +1 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +1 -1
- transformers/models/flex_olmo/modular_flex_olmo.py +1 -1
- transformers/models/gemma/modeling_gemma.py +1 -1
- transformers/models/gemma2/modeling_gemma2.py +1 -1
- transformers/models/gemma3/modeling_gemma3.py +1 -1
- transformers/models/glm/modeling_glm.py +1 -1
- transformers/models/glm4/modeling_glm4.py +1 -1
- transformers/models/glm4_moe/modeling_glm4_moe.py +1 -1
- transformers/models/glm4v/modeling_glm4v.py +1 -1
- transformers/models/glm4v/modular_glm4v.py +1 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +1 -1
- transformers/models/gpt_neox/modeling_gpt_neox.py +1 -1
- transformers/models/gpt_oss/modeling_gpt_oss.py +1 -1
- transformers/models/gpt_oss/modular_gpt_oss.py +1 -1
- transformers/models/granite/modeling_granite.py +1 -1
- transformers/models/helium/modeling_helium.py +1 -1
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +1 -1
- transformers/models/idefics/modeling_idefics.py +1 -1
- transformers/models/instructblip/modeling_instructblip.py +1 -1
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +1 -1
- transformers/models/lfm2/modeling_lfm2.py +1 -1
- transformers/models/llama/modeling_llama.py +1 -1
- transformers/models/llama4/modeling_llama4.py +1 -1
- transformers/models/longcat_flash/modeling_longcat_flash.py +1 -1
- transformers/models/minimax/modeling_minimax.py +1 -1
- transformers/models/minimax/modular_minimax.py +1 -1
- transformers/models/ministral/modeling_ministral.py +1 -1
- transformers/models/ministral/modular_ministral.py +1 -1
- transformers/models/mistral/modeling_mistral.py +1 -1
- transformers/models/mistral/modular_mistral.py +1 -1
- transformers/models/mixtral/modeling_mixtral.py +1 -1
- transformers/models/mllama/modeling_mllama.py +3 -3
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +1 -1
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +1 -1
- transformers/models/moonshine/modeling_moonshine.py +2 -2
- transformers/models/moonshine/modular_moonshine.py +2 -2
- transformers/models/olmo/modeling_olmo.py +1 -1
- transformers/models/olmo2/modeling_olmo2.py +1 -1
- transformers/models/olmo3/modeling_olmo3.py +1 -1
- transformers/models/parakeet/modeling_parakeet.py +1 -1
- transformers/models/parakeet/modular_parakeet.py +1 -1
- transformers/models/phi/modeling_phi.py +1 -1
- transformers/models/phi3/modeling_phi3.py +1 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +1 -1
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +1 -1
- transformers/models/qwen2/modeling_qwen2.py +1 -1
- transformers/models/qwen2/modular_qwen2.py +1 -1
- transformers/models/qwen3/modeling_qwen3.py +1 -1
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +1 -1
- transformers/models/qwen3_next/modeling_qwen3_next.py +1 -1
- transformers/models/qwen3_next/modular_qwen3_next.py +1 -1
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +4 -4
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +1 -1
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl/modular_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +3 -3
- transformers/models/sam/modeling_sam.py +1 -1
- transformers/models/sam2/modeling_sam2.py +3 -3
- transformers/models/sam2/modular_sam2.py +3 -3
- transformers/models/sam_hq/modeling_sam_hq.py +1 -1
- transformers/models/seed_oss/modeling_seed_oss.py +1 -1
- transformers/models/siglip/modeling_siglip.py +1 -1
- transformers/models/siglip2/modeling_siglip2.py +1 -1
- transformers/models/smollm3/modeling_smollm3.py +1 -1
- transformers/models/starcoder2/modeling_starcoder2.py +1 -1
- transformers/models/starcoder2/modular_starcoder2.py +1 -1
- transformers/models/t5gemma/modeling_t5gemma.py +2 -2
- transformers/models/t5gemma/modular_t5gemma.py +2 -2
- transformers/models/vaultgemma/modeling_vaultgemma.py +1 -1
- transformers/models/voxtral/modeling_voxtral.py +1 -1
- transformers/models/voxtral/modular_voxtral.py +1 -1
- transformers/tokenization_utils_base.py +6 -1
- transformers/utils/generic.py +3 -1
- {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/METADATA +1 -1
- {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/RECORD +119 -119
- {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/WHEEL +0 -0
- {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/entry_points.txt +0 -0
- {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/licenses/LICENSE +0 -0
- {transformers-4.57.3.dist-info → transformers-4.57.4.dist-info}/top_level.txt +0 -0
|
@@ -150,7 +150,7 @@ class Starcoder2Model(MistralModel):
|
|
|
150
150
|
self.norm = nn.LayerNorm(config.hidden_size, eps=config.norm_epsilon)
|
|
151
151
|
self.embedding_dropout = config.embedding_dropout
|
|
152
152
|
|
|
153
|
-
@check_model_inputs
|
|
153
|
+
@check_model_inputs
|
|
154
154
|
def forward(
|
|
155
155
|
self,
|
|
156
156
|
input_ids: Optional[torch.LongTensor] = None,
|
|
@@ -707,7 +707,7 @@ class T5GemmaEncoder(T5GemmaPreTrainedModel):
|
|
|
707
707
|
# Initialize weights and apply final processing
|
|
708
708
|
self.post_init()
|
|
709
709
|
|
|
710
|
-
@check_model_inputs
|
|
710
|
+
@check_model_inputs
|
|
711
711
|
def forward(
|
|
712
712
|
self,
|
|
713
713
|
input_ids: Optional[torch.LongTensor] = None,
|
|
@@ -791,7 +791,7 @@ class T5GemmaDecoder(T5GemmaEncoder):
|
|
|
791
791
|
|
|
792
792
|
self.post_init()
|
|
793
793
|
|
|
794
|
-
@check_model_inputs
|
|
794
|
+
@check_model_inputs
|
|
795
795
|
def forward(
|
|
796
796
|
self,
|
|
797
797
|
input_ids: Optional[torch.LongTensor] = None,
|
|
@@ -559,7 +559,7 @@ class T5GemmaEncoder(T5GemmaPreTrainedModel):
|
|
|
559
559
|
# Initialize weights and apply final processing
|
|
560
560
|
self.post_init()
|
|
561
561
|
|
|
562
|
-
@check_model_inputs
|
|
562
|
+
@check_model_inputs
|
|
563
563
|
def forward(
|
|
564
564
|
self,
|
|
565
565
|
input_ids: Optional[torch.LongTensor] = None,
|
|
@@ -643,7 +643,7 @@ class T5GemmaDecoder(T5GemmaEncoder):
|
|
|
643
643
|
|
|
644
644
|
self.post_init()
|
|
645
645
|
|
|
646
|
-
@check_model_inputs
|
|
646
|
+
@check_model_inputs
|
|
647
647
|
def forward(
|
|
648
648
|
self,
|
|
649
649
|
input_ids: Optional[torch.LongTensor] = None,
|
|
@@ -2435,7 +2435,12 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|
|
2435
2435
|
return True
|
|
2436
2436
|
return False
|
|
2437
2437
|
|
|
2438
|
-
if
|
|
2438
|
+
if is_offline_mode():
|
|
2439
|
+
_is_local = True
|
|
2440
|
+
|
|
2441
|
+
if pretrained_model_name_or_path is not None and (
|
|
2442
|
+
_is_local or (not _is_local and is_base_mistral(pretrained_model_name_or_path))
|
|
2443
|
+
):
|
|
2439
2444
|
_config_file = cached_file(
|
|
2440
2445
|
pretrained_model_name_or_path,
|
|
2441
2446
|
"config.json",
|
transformers/utils/generic.py
CHANGED
|
@@ -944,7 +944,7 @@ class OutputRecorder:
|
|
|
944
944
|
class_name: Optional[str] = None
|
|
945
945
|
|
|
946
946
|
|
|
947
|
-
def check_model_inputs(tie_last_hidden_states=True):
|
|
947
|
+
def check_model_inputs(func=None, *, tie_last_hidden_states=True):
|
|
948
948
|
"""
|
|
949
949
|
Decorator to intercept specific layer outputs without using hooks.
|
|
950
950
|
Compatible with torch.compile (Dynamo tracing).
|
|
@@ -1115,6 +1115,8 @@ def check_model_inputs(tie_last_hidden_states=True):
|
|
|
1115
1115
|
|
|
1116
1116
|
return wrapper
|
|
1117
1117
|
|
|
1118
|
+
if func is not None:
|
|
1119
|
+
return wrapped_fn(func)
|
|
1118
1120
|
return wrapped_fn
|
|
1119
1121
|
|
|
1120
1122
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: transformers
|
|
3
|
-
Version: 4.57.
|
|
3
|
+
Version: 4.57.4
|
|
4
4
|
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
|
|
5
5
|
Home-page: https://github.com/huggingface/transformers
|
|
6
6
|
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
|