PyPI - transformers - Versions diffs - 4.57.3__py3-none-any.whl → 4.57.4__py3-none-any.whl - Mend

transformers 4.57.3py3-none-any.whl → 4.57.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

transformers/__init__.py CHANGED Viewed

@@ -18,7 +18,7 @@
 # to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
 # in the namespace without actually importing anything (and especially none of the backends).
-__version__ = "4.57.3"
+__version__ = "4.57.4"
 from pathlib import Path
 from typing import TYPE_CHECKING

transformers/generation/utils.py CHANGED Viewed

@@ -2379,9 +2379,11 @@ class GenerationMixin(ContinuousMixin):
             generation_config, use_model_defaults, **kwargs
         )
         generation_mode = generation_config.get_generation_mode(assistant_model)
+        deprecated_mode_repo = self._get_deprecated_gen_repo(generation_mode, trust_remote_code, custom_generate)
         if isinstance(custom_generate, Callable):
             decoding_method = custom_generate
-        else:
+        elif deprecated_mode_repo is None:
             # type() required to access the unbound class-level method
             decoding_method = getattr(type(self), GENERATION_MODES_MAPPING[generation_mode])
@@ -2392,7 +2394,7 @@ class GenerationMixin(ContinuousMixin):
         # NOTE: This must come after initializing generation_config, since we need it to determine if this is a deprecated mode.
         # It must also be before any preparation steps, since Hub repos expect to be loaded before preparation steps.
         # TODO joao, manuel: remove this in v4.62.0
-        if deprecated_mode_repo := self._get_deprecated_gen_repo(generation_mode, trust_remote_code, custom_generate):
+        if deprecated_mode_repo is not None:
             return GenerationMixin.generate(
                 self,
                 inputs=inputs,

transformers/models/apertus/modeling_apertus.py CHANGED Viewed

@@ -339,7 +339,7 @@ class ApertusModel(ApertusPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/arcee/modeling_arcee.py CHANGED Viewed

@@ -344,7 +344,7 @@ class ArceeModel(ArceePreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/aria/modeling_aria.py CHANGED Viewed

@@ -721,7 +721,7 @@ class AriaTextModel(AriaTextPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py CHANGED Viewed

@@ -366,7 +366,7 @@ class ASTModel(ASTPreTrainedModel):
         for layer, heads in heads_to_prune.items():
             self.encoder.layer[layer].attention.prune_heads(heads)
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/aya_vision/modeling_aya_vision.py CHANGED Viewed

@@ -263,7 +263,7 @@ class AyaVisionModel(AyaVisionPreTrainedModel):
             )
         return special_image_mask
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/aya_vision/modular_aya_vision.py CHANGED Viewed

@@ -162,7 +162,7 @@ class AyaVisionModel(LlavaModel):
         image_features = self.multi_modal_projector(selected_image_feature)
         return image_features
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/bitnet/modeling_bitnet.py CHANGED Viewed

@@ -343,7 +343,7 @@ class BitNetModel(BitNetPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/blip_2/modeling_blip_2.py CHANGED Viewed

@@ -1007,7 +1007,7 @@ class Blip2QFormerModel(Blip2PreTrainedModel):
         extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
         return extended_attention_mask
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/blt/modeling_blt.py CHANGED Viewed

@@ -577,7 +577,7 @@ class BltLocalDecoder(BltPreTrainedModel):
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
@@ -1047,7 +1047,7 @@ class BltModel(BltPreTrainedModel):
             self.patcher = None
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,

transformers/models/blt/modular_blt.py CHANGED Viewed

@@ -536,7 +536,7 @@ class BltLocalDecoder(BltPreTrainedModel):
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
@@ -799,7 +799,7 @@ class BltModel(BltPreTrainedModel):
             self.patcher = None
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,

transformers/models/cohere/modeling_cohere.py CHANGED Viewed

@@ -376,7 +376,7 @@ class CohereModel(CoherePreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/cohere2/modeling_cohere2.py CHANGED Viewed

@@ -351,7 +351,7 @@ class Cohere2Model(Cohere2PreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/cohere2_vision/modeling_cohere2_vision.py CHANGED Viewed

@@ -213,7 +213,7 @@ class Cohere2VisionModel(Cohere2VisionPreTrainedModel):
             )
         return special_image_mask
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,
@@ -306,7 +306,7 @@ class Cohere2VisionForConditionalGeneration(Cohere2VisionPreTrainedModel, Genera
     def multi_modal_projector(self):
         return self.model.multi_modal_projector
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/cohere2_vision/modular_cohere2_vision.py CHANGED Viewed

@@ -107,7 +107,7 @@ class Cohere2VisionModel(AyaVisionModel):
         image_features = self.multi_modal_projector(selected_image_feature)
         return image_features
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,
@@ -160,7 +160,7 @@ class Cohere2VisionForConditionalGeneration(AyaVisionForConditionalGeneration):
     def get_image_features(self, pixel_values: torch.FloatTensor):
         return self.model.get_image_features(pixel_values=pixel_values)
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/csm/modeling_csm.py CHANGED Viewed

@@ -409,7 +409,7 @@ class CsmDepthDecoderModel(CsmPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,
@@ -662,7 +662,7 @@ class CsmBackboneModel(CsmPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/csm/modular_csm.py CHANGED Viewed

@@ -156,7 +156,7 @@ class CsmDepthDecoderModel(LlamaModel, CsmPreTrainedModel):
         self.embed_tokens = nn.Embedding((config.num_codebooks * config.vocab_size), config.backbone_hidden_size)
         self.inputs_embeds_projector = nn.Linear(config.backbone_hidden_size, config.hidden_size, bias=False)
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,
@@ -395,7 +395,7 @@ class CsmBackboneModel(LlamaModel):
         super().__init__(config)
         self.embed_tokens = CsmBackboneModelEmbeddings(config)
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(self, **super_kwargs):
         r"""

transformers/models/deepseek_v2/modeling_deepseek_v2.py CHANGED Viewed

@@ -491,7 +491,7 @@ class DeepseekV2Model(DeepseekV2PreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/deepseek_v3/modeling_deepseek_v3.py CHANGED Viewed

@@ -539,7 +539,7 @@ class DeepseekV3Model(DeepseekV3PreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/diffllama/modeling_diffllama.py CHANGED Viewed

@@ -608,7 +608,7 @@ class DiffLlamaModel(DiffLlamaPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/dinov2/modeling_dinov2.py CHANGED Viewed

@@ -624,7 +624,7 @@ class Dinov2Backbone(Dinov2PreTrainedModel, BackboneMixin):
     def get_input_embeddings(self) -> Dinov2PatchEmbeddings:
         return self.embeddings.patch_embeddings
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self, pixel_values: torch.Tensor, output_hidden_states: Optional[bool] = None, **kwargs

transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py CHANGED Viewed

@@ -644,7 +644,7 @@ class Dinov2WithRegistersBackbone(Dinov2WithRegistersPreTrainedModel, BackboneMi
     def get_input_embeddings(self) -> Dinov2WithRegistersPatchEmbeddings:
         return self.embeddings.patch_embeddings
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/doge/modeling_doge.py CHANGED Viewed

@@ -530,7 +530,7 @@ class DogeModel(DogePreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/dots1/modeling_dots1.py CHANGED Viewed

@@ -454,7 +454,7 @@ class Dots1Model(Dots1PreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/edgetam/modeling_edgetam.py CHANGED Viewed

@@ -444,7 +444,7 @@ class EdgeTamVisionModel(EdgeTamPreTrainedModel):
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         pixel_values: Optional[torch.FloatTensor] = None,
@@ -1028,7 +1028,7 @@ class EdgeTamModel(EdgeTamPreTrainedModel):
         )
         return prompt_output
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/edgetam/modular_edgetam.py CHANGED Viewed

@@ -208,7 +208,7 @@ class EdgeTamVisionModel(Sam2VisionModel):
     def get_input_embeddings(self):
         raise NotImplementedError("Can't get input embeddings from timm wrapper model")
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         pixel_values: Optional[torch.FloatTensor] = None,

transformers/models/efficientloftr/modeling_efficientloftr.py CHANGED Viewed

@@ -680,7 +680,7 @@ class EfficientLoFTRModel(EfficientLoFTRPreTrainedModel):
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/emu3/modeling_emu3.py CHANGED Viewed

@@ -1166,7 +1166,7 @@ class Emu3TextModel(Emu3PreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/eomt/modeling_eomt.py CHANGED Viewed

@@ -1087,7 +1087,7 @@ class EomtForUniversalSegmentation(EomtPreTrainedModel):
     def get_loss(self, loss_dict: dict[str, Tensor]) -> Tensor:
         return sum(loss_dict.values())
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/eomt/modular_eomt.py CHANGED Viewed

@@ -492,7 +492,7 @@ class EomtForUniversalSegmentation(Mask2FormerForUniversalSegmentation):
         return attn_mask
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/ernie4_5/modeling_ernie4_5.py CHANGED Viewed

@@ -342,7 +342,7 @@ class Ernie4_5Model(Ernie4_5PreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py CHANGED Viewed

@@ -510,7 +510,7 @@ class Ernie4_5_MoeModel(Ernie4_5_MoePreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py CHANGED Viewed

@@ -248,7 +248,7 @@ class Ernie4_5_MoeModel(Ernie4_5_MoePreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/esm/modeling_esm.py CHANGED Viewed

@@ -678,7 +678,7 @@ class EsmModel(EsmPreTrainedModel):
         for layer, heads in heads_to_prune.items():
             self.encoder.layer[layer].attention.prune_heads(heads)
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/evolla/modeling_evolla.py CHANGED Viewed

@@ -610,7 +610,7 @@ class EvollaSaProtProteinEncoder(EvollaSaProtPreTrainedModel):
         for layer, heads in heads_to_prune.items():
             self.encoder.layer[layer].attention.prune_heads(heads)
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.Tensor],
@@ -1397,7 +1397,7 @@ class EvollaModel(EvollaPreTrainedModel):
         self.embed_tokens = value
     @auto_docstring
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,

transformers/models/evolla/modular_evolla.py CHANGED Viewed

@@ -241,7 +241,7 @@ class EvollaSaProtProteinEncoder(EvollaSaProtPreTrainedModel):
         for layer, heads in heads_to_prune.items():
             self.encoder.layer[layer].attention.prune_heads(heads)
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.Tensor],
@@ -835,7 +835,7 @@ class EvollaModel(EvollaPreTrainedModel):
         self.embed_tokens = value
     @auto_docstring
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,

transformers/models/exaone4/modeling_exaone4.py CHANGED Viewed

@@ -352,7 +352,7 @@ class Exaone4Model(Exaone4PreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,

transformers/models/exaone4/modular_exaone4.py CHANGED Viewed

@@ -364,7 +364,7 @@ class Exaone4Model(Exaone4PreTrainedModel, LlamaModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,

transformers/models/flex_olmo/modeling_flex_olmo.py CHANGED Viewed

@@ -417,7 +417,7 @@ class FlexOlmoModel(FlexOlmoPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/flex_olmo/modular_flex_olmo.py CHANGED Viewed

@@ -277,7 +277,7 @@ class FlexOlmoPreTrainedModel(MixtralPreTrainedModel):
 # FlexOlmo model is identical to Mixtral model except:
 # - FlexOlmo does not use sliding window attention.
 class FlexOlmoModel(MixtralModel):
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/gemma/modeling_gemma.py CHANGED Viewed

@@ -348,7 +348,7 @@ class GemmaModel(GemmaPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/gemma2/modeling_gemma2.py CHANGED Viewed

@@ -379,7 +379,7 @@ class Gemma2Model(Gemma2PreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/gemma3/modeling_gemma3.py CHANGED Viewed

@@ -482,7 +482,7 @@ class Gemma3TextModel(Gemma3PreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/glm/modeling_glm.py CHANGED Viewed

@@ -358,7 +358,7 @@ class GlmModel(GlmPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/glm4/modeling_glm4.py CHANGED Viewed

@@ -362,7 +362,7 @@ class Glm4Model(Glm4PreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/glm4_moe/modeling_glm4_moe.py CHANGED Viewed

@@ -476,7 +476,7 @@ class Glm4MoeModel(Glm4MoePreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/glm4v/modeling_glm4v.py CHANGED Viewed

@@ -806,7 +806,7 @@ class Glm4vTextModel(Glm4vPreTrainedModel):
         self.post_init()
     @auto_docstring
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,

transformers/models/glm4v/modular_glm4v.py CHANGED Viewed

@@ -872,7 +872,7 @@ class Glm4vTextModel(Qwen2_5_VLTextModel):
         del self.has_sliding_layers
     @auto_docstring
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,

transformers/models/glm4v_moe/modeling_glm4v_moe.py CHANGED Viewed

@@ -922,7 +922,7 @@ class Glm4vMoeTextModel(Glm4vMoePreTrainedModel):
         self.post_init()
     @auto_docstring
-    @check_model_inputs()
+    @check_model_inputs
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,

transformers/models/gpt_neox/modeling_gpt_neox.py CHANGED Viewed

@@ -393,7 +393,7 @@ class GPTNeoXModel(GPTNeoXPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/gpt_oss/modeling_gpt_oss.py CHANGED Viewed

@@ -456,7 +456,7 @@ class GptOssModel(GptOssPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/gpt_oss/modular_gpt_oss.py CHANGED Viewed

@@ -386,7 +386,7 @@ class GptOssPreTrainedModel(LlamaPreTrainedModel):
 class GptOssModel(MixtralModel):
     _no_split_modules = ["GptOssDecoderLayer"]
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/granite/modeling_granite.py CHANGED Viewed

@@ -375,7 +375,7 @@ class GraniteModel(GranitePreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers/models/helium/modeling_helium.py CHANGED Viewed

@@ -343,7 +343,7 @@ class HeliumModel(HeliumPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
-    @check_model_inputs()
+    @check_model_inputs
     @auto_docstring
     def forward(
         self,

transformers 4.57.3__py3-none-any.whl → 4.57.4__py3-none-any.whl

transformers 4.57.3py3-none-any.whl → 4.57.4py3-none-any.whl