transformers 4.57.2__py3-none-any.whl → 4.57.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. transformers/__init__.py +1 -1
  2. transformers/generation/utils.py +4 -2
  3. transformers/models/apertus/modeling_apertus.py +1 -1
  4. transformers/models/arcee/modeling_arcee.py +1 -1
  5. transformers/models/aria/modeling_aria.py +1 -1
  6. transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +1 -1
  7. transformers/models/aya_vision/modeling_aya_vision.py +1 -1
  8. transformers/models/aya_vision/modular_aya_vision.py +1 -1
  9. transformers/models/bitnet/modeling_bitnet.py +1 -1
  10. transformers/models/blip_2/modeling_blip_2.py +1 -1
  11. transformers/models/blt/modeling_blt.py +2 -2
  12. transformers/models/blt/modular_blt.py +2 -2
  13. transformers/models/cohere/modeling_cohere.py +1 -1
  14. transformers/models/cohere2/modeling_cohere2.py +1 -1
  15. transformers/models/cohere2_vision/modeling_cohere2_vision.py +2 -2
  16. transformers/models/cohere2_vision/modular_cohere2_vision.py +2 -2
  17. transformers/models/csm/modeling_csm.py +2 -2
  18. transformers/models/csm/modular_csm.py +2 -2
  19. transformers/models/deepseek_v2/modeling_deepseek_v2.py +1 -1
  20. transformers/models/deepseek_v3/modeling_deepseek_v3.py +1 -1
  21. transformers/models/diffllama/modeling_diffllama.py +1 -1
  22. transformers/models/dinov2/modeling_dinov2.py +1 -1
  23. transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +1 -1
  24. transformers/models/doge/modeling_doge.py +1 -1
  25. transformers/models/dots1/modeling_dots1.py +1 -1
  26. transformers/models/edgetam/modeling_edgetam.py +2 -2
  27. transformers/models/edgetam/modular_edgetam.py +1 -1
  28. transformers/models/efficientloftr/modeling_efficientloftr.py +1 -1
  29. transformers/models/emu3/modeling_emu3.py +1 -1
  30. transformers/models/eomt/modeling_eomt.py +1 -1
  31. transformers/models/eomt/modular_eomt.py +1 -1
  32. transformers/models/ernie4_5/modeling_ernie4_5.py +1 -1
  33. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +1 -1
  34. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +1 -1
  35. transformers/models/esm/modeling_esm.py +1 -1
  36. transformers/models/evolla/modeling_evolla.py +2 -2
  37. transformers/models/evolla/modular_evolla.py +2 -2
  38. transformers/models/exaone4/modeling_exaone4.py +1 -1
  39. transformers/models/exaone4/modular_exaone4.py +1 -1
  40. transformers/models/flex_olmo/modeling_flex_olmo.py +1 -1
  41. transformers/models/flex_olmo/modular_flex_olmo.py +1 -1
  42. transformers/models/gemma/modeling_gemma.py +1 -1
  43. transformers/models/gemma2/modeling_gemma2.py +1 -1
  44. transformers/models/gemma3/modeling_gemma3.py +1 -1
  45. transformers/models/glm/modeling_glm.py +1 -1
  46. transformers/models/glm4/modeling_glm4.py +1 -1
  47. transformers/models/glm4_moe/modeling_glm4_moe.py +1 -1
  48. transformers/models/glm4v/modeling_glm4v.py +1 -1
  49. transformers/models/glm4v/modular_glm4v.py +1 -1
  50. transformers/models/glm4v_moe/modeling_glm4v_moe.py +1 -1
  51. transformers/models/gpt_neox/modeling_gpt_neox.py +1 -1
  52. transformers/models/gpt_oss/modeling_gpt_oss.py +1 -1
  53. transformers/models/gpt_oss/modular_gpt_oss.py +1 -1
  54. transformers/models/granite/modeling_granite.py +1 -1
  55. transformers/models/helium/modeling_helium.py +1 -1
  56. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +1 -1
  57. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +1 -1
  58. transformers/models/idefics/modeling_idefics.py +1 -1
  59. transformers/models/instructblip/modeling_instructblip.py +1 -1
  60. transformers/models/instructblipvideo/modeling_instructblipvideo.py +1 -1
  61. transformers/models/lfm2/modeling_lfm2.py +1 -1
  62. transformers/models/llama/modeling_llama.py +1 -1
  63. transformers/models/llama4/modeling_llama4.py +1 -1
  64. transformers/models/longcat_flash/modeling_longcat_flash.py +1 -1
  65. transformers/models/minimax/modeling_minimax.py +1 -1
  66. transformers/models/minimax/modular_minimax.py +1 -1
  67. transformers/models/ministral/modeling_ministral.py +1 -1
  68. transformers/models/ministral/modular_ministral.py +1 -1
  69. transformers/models/mistral/modeling_mistral.py +1 -1
  70. transformers/models/mistral/modular_mistral.py +1 -1
  71. transformers/models/mixtral/modeling_mixtral.py +1 -1
  72. transformers/models/mllama/modeling_mllama.py +3 -3
  73. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +1 -1
  74. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +1 -1
  75. transformers/models/moonshine/modeling_moonshine.py +2 -2
  76. transformers/models/moonshine/modular_moonshine.py +2 -2
  77. transformers/models/olmo/modeling_olmo.py +1 -1
  78. transformers/models/olmo2/modeling_olmo2.py +1 -1
  79. transformers/models/olmo3/modeling_olmo3.py +1 -1
  80. transformers/models/parakeet/modeling_parakeet.py +1 -1
  81. transformers/models/parakeet/modular_parakeet.py +1 -1
  82. transformers/models/phi/modeling_phi.py +1 -1
  83. transformers/models/phi3/modeling_phi3.py +1 -1
  84. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +1 -1
  85. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +1 -1
  86. transformers/models/qwen2/modeling_qwen2.py +1 -1
  87. transformers/models/qwen2/modular_qwen2.py +1 -1
  88. transformers/models/qwen3/modeling_qwen3.py +1 -1
  89. transformers/models/qwen3_moe/modeling_qwen3_moe.py +1 -1
  90. transformers/models/qwen3_next/modeling_qwen3_next.py +1 -1
  91. transformers/models/qwen3_next/modular_qwen3_next.py +1 -1
  92. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +4 -4
  93. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +1 -1
  94. transformers/models/qwen3_vl/modeling_qwen3_vl.py +3 -3
  95. transformers/models/qwen3_vl/modular_qwen3_vl.py +3 -3
  96. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +3 -3
  97. transformers/models/sam/modeling_sam.py +1 -1
  98. transformers/models/sam2/modeling_sam2.py +3 -3
  99. transformers/models/sam2/modular_sam2.py +3 -3
  100. transformers/models/sam_hq/modeling_sam_hq.py +1 -1
  101. transformers/models/seed_oss/modeling_seed_oss.py +1 -1
  102. transformers/models/siglip/modeling_siglip.py +1 -1
  103. transformers/models/siglip2/modeling_siglip2.py +1 -1
  104. transformers/models/smollm3/modeling_smollm3.py +1 -1
  105. transformers/models/starcoder2/modeling_starcoder2.py +1 -1
  106. transformers/models/starcoder2/modular_starcoder2.py +1 -1
  107. transformers/models/t5gemma/modeling_t5gemma.py +2 -2
  108. transformers/models/t5gemma/modular_t5gemma.py +2 -2
  109. transformers/models/vaultgemma/modeling_vaultgemma.py +1 -1
  110. transformers/models/voxtral/modeling_voxtral.py +1 -1
  111. transformers/models/voxtral/modular_voxtral.py +1 -1
  112. transformers/tokenization_utils_base.py +88 -32
  113. transformers/utils/generic.py +3 -1
  114. {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/METADATA +1 -1
  115. {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/RECORD +119 -119
  116. {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/WHEEL +0 -0
  117. {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/entry_points.txt +0 -0
  118. {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/licenses/LICENSE +0 -0
  119. {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/top_level.txt +0 -0
transformers/__init__.py CHANGED
@@ -18,7 +18,7 @@
18
18
  # to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
19
19
  # in the namespace without actually importing anything (and especially none of the backends).
20
20
 
21
- __version__ = "4.57.2"
21
+ __version__ = "4.57.4"
22
22
 
23
23
  from pathlib import Path
24
24
  from typing import TYPE_CHECKING
@@ -2379,9 +2379,11 @@ class GenerationMixin(ContinuousMixin):
2379
2379
  generation_config, use_model_defaults, **kwargs
2380
2380
  )
2381
2381
  generation_mode = generation_config.get_generation_mode(assistant_model)
2382
+ deprecated_mode_repo = self._get_deprecated_gen_repo(generation_mode, trust_remote_code, custom_generate)
2383
+
2382
2384
  if isinstance(custom_generate, Callable):
2383
2385
  decoding_method = custom_generate
2384
- else:
2386
+ elif deprecated_mode_repo is None:
2385
2387
  # type() required to access the unbound class-level method
2386
2388
  decoding_method = getattr(type(self), GENERATION_MODES_MAPPING[generation_mode])
2387
2389
 
@@ -2392,7 +2394,7 @@ class GenerationMixin(ContinuousMixin):
2392
2394
  # NOTE: This must come after initializing generation_config, since we need it to determine if this is a deprecated mode.
2393
2395
  # It must also be before any preparation steps, since Hub repos expect to be loaded before preparation steps.
2394
2396
  # TODO joao, manuel: remove this in v4.62.0
2395
- if deprecated_mode_repo := self._get_deprecated_gen_repo(generation_mode, trust_remote_code, custom_generate):
2397
+ if deprecated_mode_repo is not None:
2396
2398
  return GenerationMixin.generate(
2397
2399
  self,
2398
2400
  inputs=inputs,
@@ -339,7 +339,7 @@ class ApertusModel(ApertusPreTrainedModel):
339
339
  # Initialize weights and apply final processing
340
340
  self.post_init()
341
341
 
342
- @check_model_inputs()
342
+ @check_model_inputs
343
343
  @auto_docstring
344
344
  def forward(
345
345
  self,
@@ -344,7 +344,7 @@ class ArceeModel(ArceePreTrainedModel):
344
344
  # Initialize weights and apply final processing
345
345
  self.post_init()
346
346
 
347
- @check_model_inputs()
347
+ @check_model_inputs
348
348
  @auto_docstring
349
349
  def forward(
350
350
  self,
@@ -721,7 +721,7 @@ class AriaTextModel(AriaTextPreTrainedModel):
721
721
  # Initialize weights and apply final processing
722
722
  self.post_init()
723
723
 
724
- @check_model_inputs()
724
+ @check_model_inputs
725
725
  @auto_docstring
726
726
  def forward(
727
727
  self,
@@ -366,7 +366,7 @@ class ASTModel(ASTPreTrainedModel):
366
366
  for layer, heads in heads_to_prune.items():
367
367
  self.encoder.layer[layer].attention.prune_heads(heads)
368
368
 
369
- @check_model_inputs()
369
+ @check_model_inputs
370
370
  @auto_docstring
371
371
  def forward(
372
372
  self,
@@ -263,7 +263,7 @@ class AyaVisionModel(AyaVisionPreTrainedModel):
263
263
  )
264
264
  return special_image_mask
265
265
 
266
- @check_model_inputs()
266
+ @check_model_inputs
267
267
  @auto_docstring
268
268
  def forward(
269
269
  self,
@@ -162,7 +162,7 @@ class AyaVisionModel(LlavaModel):
162
162
  image_features = self.multi_modal_projector(selected_image_feature)
163
163
  return image_features
164
164
 
165
- @check_model_inputs()
165
+ @check_model_inputs
166
166
  @auto_docstring
167
167
  def forward(
168
168
  self,
@@ -343,7 +343,7 @@ class BitNetModel(BitNetPreTrainedModel):
343
343
  # Initialize weights and apply final processing
344
344
  self.post_init()
345
345
 
346
- @check_model_inputs()
346
+ @check_model_inputs
347
347
  @auto_docstring
348
348
  def forward(
349
349
  self,
@@ -1007,7 +1007,7 @@ class Blip2QFormerModel(Blip2PreTrainedModel):
1007
1007
  extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
1008
1008
  return extended_attention_mask
1009
1009
 
1010
- @check_model_inputs()
1010
+ @check_model_inputs
1011
1011
  @auto_docstring
1012
1012
  def forward(
1013
1013
  self,
@@ -577,7 +577,7 @@ class BltLocalDecoder(BltPreTrainedModel):
577
577
 
578
578
  self.post_init()
579
579
 
580
- @check_model_inputs()
580
+ @check_model_inputs
581
581
  def forward(
582
582
  self,
583
583
  input_ids: Optional[torch.LongTensor] = None,
@@ -1047,7 +1047,7 @@ class BltModel(BltPreTrainedModel):
1047
1047
  self.patcher = None
1048
1048
  self.post_init()
1049
1049
 
1050
- @check_model_inputs()
1050
+ @check_model_inputs
1051
1051
  def forward(
1052
1052
  self,
1053
1053
  input_ids: Optional[torch.LongTensor] = None,
@@ -536,7 +536,7 @@ class BltLocalDecoder(BltPreTrainedModel):
536
536
 
537
537
  self.post_init()
538
538
 
539
- @check_model_inputs()
539
+ @check_model_inputs
540
540
  def forward(
541
541
  self,
542
542
  input_ids: Optional[torch.LongTensor] = None,
@@ -799,7 +799,7 @@ class BltModel(BltPreTrainedModel):
799
799
  self.patcher = None
800
800
  self.post_init()
801
801
 
802
- @check_model_inputs()
802
+ @check_model_inputs
803
803
  def forward(
804
804
  self,
805
805
  input_ids: Optional[torch.LongTensor] = None,
@@ -376,7 +376,7 @@ class CohereModel(CoherePreTrainedModel):
376
376
  # Initialize weights and apply final processing
377
377
  self.post_init()
378
378
 
379
- @check_model_inputs()
379
+ @check_model_inputs
380
380
  @auto_docstring
381
381
  def forward(
382
382
  self,
@@ -351,7 +351,7 @@ class Cohere2Model(Cohere2PreTrainedModel):
351
351
  # Initialize weights and apply final processing
352
352
  self.post_init()
353
353
 
354
- @check_model_inputs()
354
+ @check_model_inputs
355
355
  @auto_docstring
356
356
  def forward(
357
357
  self,
@@ -213,7 +213,7 @@ class Cohere2VisionModel(Cohere2VisionPreTrainedModel):
213
213
  )
214
214
  return special_image_mask
215
215
 
216
- @check_model_inputs()
216
+ @check_model_inputs
217
217
  @auto_docstring
218
218
  def forward(
219
219
  self,
@@ -306,7 +306,7 @@ class Cohere2VisionForConditionalGeneration(Cohere2VisionPreTrainedModel, Genera
306
306
  def multi_modal_projector(self):
307
307
  return self.model.multi_modal_projector
308
308
 
309
- @check_model_inputs()
309
+ @check_model_inputs
310
310
  @auto_docstring
311
311
  def forward(
312
312
  self,
@@ -107,7 +107,7 @@ class Cohere2VisionModel(AyaVisionModel):
107
107
  image_features = self.multi_modal_projector(selected_image_feature)
108
108
  return image_features
109
109
 
110
- @check_model_inputs()
110
+ @check_model_inputs
111
111
  @auto_docstring
112
112
  def forward(
113
113
  self,
@@ -160,7 +160,7 @@ class Cohere2VisionForConditionalGeneration(AyaVisionForConditionalGeneration):
160
160
  def get_image_features(self, pixel_values: torch.FloatTensor):
161
161
  return self.model.get_image_features(pixel_values=pixel_values)
162
162
 
163
- @check_model_inputs()
163
+ @check_model_inputs
164
164
  @auto_docstring
165
165
  def forward(
166
166
  self,
@@ -409,7 +409,7 @@ class CsmDepthDecoderModel(CsmPreTrainedModel):
409
409
  # Initialize weights and apply final processing
410
410
  self.post_init()
411
411
 
412
- @check_model_inputs()
412
+ @check_model_inputs
413
413
  @auto_docstring
414
414
  def forward(
415
415
  self,
@@ -662,7 +662,7 @@ class CsmBackboneModel(CsmPreTrainedModel):
662
662
  # Initialize weights and apply final processing
663
663
  self.post_init()
664
664
 
665
- @check_model_inputs()
665
+ @check_model_inputs
666
666
  @auto_docstring
667
667
  def forward(
668
668
  self,
@@ -156,7 +156,7 @@ class CsmDepthDecoderModel(LlamaModel, CsmPreTrainedModel):
156
156
  self.embed_tokens = nn.Embedding((config.num_codebooks * config.vocab_size), config.backbone_hidden_size)
157
157
  self.inputs_embeds_projector = nn.Linear(config.backbone_hidden_size, config.hidden_size, bias=False)
158
158
 
159
- @check_model_inputs()
159
+ @check_model_inputs
160
160
  @auto_docstring
161
161
  def forward(
162
162
  self,
@@ -395,7 +395,7 @@ class CsmBackboneModel(LlamaModel):
395
395
  super().__init__(config)
396
396
  self.embed_tokens = CsmBackboneModelEmbeddings(config)
397
397
 
398
- @check_model_inputs()
398
+ @check_model_inputs
399
399
  @auto_docstring
400
400
  def forward(self, **super_kwargs):
401
401
  r"""
@@ -491,7 +491,7 @@ class DeepseekV2Model(DeepseekV2PreTrainedModel):
491
491
  # Initialize weights and apply final processing
492
492
  self.post_init()
493
493
 
494
- @check_model_inputs()
494
+ @check_model_inputs
495
495
  @auto_docstring
496
496
  def forward(
497
497
  self,
@@ -539,7 +539,7 @@ class DeepseekV3Model(DeepseekV3PreTrainedModel):
539
539
  # Initialize weights and apply final processing
540
540
  self.post_init()
541
541
 
542
- @check_model_inputs()
542
+ @check_model_inputs
543
543
  @auto_docstring
544
544
  def forward(
545
545
  self,
@@ -608,7 +608,7 @@ class DiffLlamaModel(DiffLlamaPreTrainedModel):
608
608
  # Initialize weights and apply final processing
609
609
  self.post_init()
610
610
 
611
- @check_model_inputs()
611
+ @check_model_inputs
612
612
  @auto_docstring
613
613
  def forward(
614
614
  self,
@@ -624,7 +624,7 @@ class Dinov2Backbone(Dinov2PreTrainedModel, BackboneMixin):
624
624
  def get_input_embeddings(self) -> Dinov2PatchEmbeddings:
625
625
  return self.embeddings.patch_embeddings
626
626
 
627
- @check_model_inputs()
627
+ @check_model_inputs
628
628
  @auto_docstring
629
629
  def forward(
630
630
  self, pixel_values: torch.Tensor, output_hidden_states: Optional[bool] = None, **kwargs
@@ -644,7 +644,7 @@ class Dinov2WithRegistersBackbone(Dinov2WithRegistersPreTrainedModel, BackboneMi
644
644
  def get_input_embeddings(self) -> Dinov2WithRegistersPatchEmbeddings:
645
645
  return self.embeddings.patch_embeddings
646
646
 
647
- @check_model_inputs()
647
+ @check_model_inputs
648
648
  @auto_docstring
649
649
  def forward(
650
650
  self,
@@ -530,7 +530,7 @@ class DogeModel(DogePreTrainedModel):
530
530
  # Initialize weights and apply final processing
531
531
  self.post_init()
532
532
 
533
- @check_model_inputs()
533
+ @check_model_inputs
534
534
  @auto_docstring
535
535
  def forward(
536
536
  self,
@@ -454,7 +454,7 @@ class Dots1Model(Dots1PreTrainedModel):
454
454
  # Initialize weights and apply final processing
455
455
  self.post_init()
456
456
 
457
- @check_model_inputs()
457
+ @check_model_inputs
458
458
  @auto_docstring
459
459
  def forward(
460
460
  self,
@@ -444,7 +444,7 @@ class EdgeTamVisionModel(EdgeTamPreTrainedModel):
444
444
 
445
445
  self.post_init()
446
446
 
447
- @check_model_inputs()
447
+ @check_model_inputs
448
448
  def forward(
449
449
  self,
450
450
  pixel_values: Optional[torch.FloatTensor] = None,
@@ -1028,7 +1028,7 @@ class EdgeTamModel(EdgeTamPreTrainedModel):
1028
1028
  )
1029
1029
  return prompt_output
1030
1030
 
1031
- @check_model_inputs()
1031
+ @check_model_inputs
1032
1032
  @auto_docstring
1033
1033
  def forward(
1034
1034
  self,
@@ -208,7 +208,7 @@ class EdgeTamVisionModel(Sam2VisionModel):
208
208
  def get_input_embeddings(self):
209
209
  raise NotImplementedError("Can't get input embeddings from timm wrapper model")
210
210
 
211
- @check_model_inputs()
211
+ @check_model_inputs
212
212
  def forward(
213
213
  self,
214
214
  pixel_values: Optional[torch.FloatTensor] = None,
@@ -680,7 +680,7 @@ class EfficientLoFTRModel(EfficientLoFTRPreTrainedModel):
680
680
 
681
681
  self.post_init()
682
682
 
683
- @check_model_inputs()
683
+ @check_model_inputs
684
684
  @auto_docstring
685
685
  def forward(
686
686
  self,
@@ -1166,7 +1166,7 @@ class Emu3TextModel(Emu3PreTrainedModel):
1166
1166
  # Initialize weights and apply final processing
1167
1167
  self.post_init()
1168
1168
 
1169
- @check_model_inputs()
1169
+ @check_model_inputs
1170
1170
  @auto_docstring
1171
1171
  def forward(
1172
1172
  self,
@@ -1087,7 +1087,7 @@ class EomtForUniversalSegmentation(EomtPreTrainedModel):
1087
1087
  def get_loss(self, loss_dict: dict[str, Tensor]) -> Tensor:
1088
1088
  return sum(loss_dict.values())
1089
1089
 
1090
- @check_model_inputs()
1090
+ @check_model_inputs
1091
1091
  @auto_docstring
1092
1092
  def forward(
1093
1093
  self,
@@ -492,7 +492,7 @@ class EomtForUniversalSegmentation(Mask2FormerForUniversalSegmentation):
492
492
 
493
493
  return attn_mask
494
494
 
495
- @check_model_inputs()
495
+ @check_model_inputs
496
496
  @auto_docstring
497
497
  def forward(
498
498
  self,
@@ -342,7 +342,7 @@ class Ernie4_5Model(Ernie4_5PreTrainedModel):
342
342
  # Initialize weights and apply final processing
343
343
  self.post_init()
344
344
 
345
- @check_model_inputs()
345
+ @check_model_inputs
346
346
  @auto_docstring
347
347
  def forward(
348
348
  self,
@@ -510,7 +510,7 @@ class Ernie4_5_MoeModel(Ernie4_5_MoePreTrainedModel):
510
510
  # Initialize weights and apply final processing
511
511
  self.post_init()
512
512
 
513
- @check_model_inputs()
513
+ @check_model_inputs
514
514
  @auto_docstring
515
515
  def forward(
516
516
  self,
@@ -248,7 +248,7 @@ class Ernie4_5_MoeModel(Ernie4_5_MoePreTrainedModel):
248
248
  # Initialize weights and apply final processing
249
249
  self.post_init()
250
250
 
251
- @check_model_inputs()
251
+ @check_model_inputs
252
252
  @auto_docstring
253
253
  def forward(
254
254
  self,
@@ -678,7 +678,7 @@ class EsmModel(EsmPreTrainedModel):
678
678
  for layer, heads in heads_to_prune.items():
679
679
  self.encoder.layer[layer].attention.prune_heads(heads)
680
680
 
681
- @check_model_inputs()
681
+ @check_model_inputs
682
682
  @auto_docstring
683
683
  def forward(
684
684
  self,
@@ -610,7 +610,7 @@ class EvollaSaProtProteinEncoder(EvollaSaProtPreTrainedModel):
610
610
  for layer, heads in heads_to_prune.items():
611
611
  self.encoder.layer[layer].attention.prune_heads(heads)
612
612
 
613
- @check_model_inputs()
613
+ @check_model_inputs
614
614
  def forward(
615
615
  self,
616
616
  input_ids: Optional[torch.Tensor],
@@ -1397,7 +1397,7 @@ class EvollaModel(EvollaPreTrainedModel):
1397
1397
  self.embed_tokens = value
1398
1398
 
1399
1399
  @auto_docstring
1400
- @check_model_inputs()
1400
+ @check_model_inputs
1401
1401
  def forward(
1402
1402
  self,
1403
1403
  input_ids: Optional[torch.LongTensor] = None,
@@ -241,7 +241,7 @@ class EvollaSaProtProteinEncoder(EvollaSaProtPreTrainedModel):
241
241
  for layer, heads in heads_to_prune.items():
242
242
  self.encoder.layer[layer].attention.prune_heads(heads)
243
243
 
244
- @check_model_inputs()
244
+ @check_model_inputs
245
245
  def forward(
246
246
  self,
247
247
  input_ids: Optional[torch.Tensor],
@@ -835,7 +835,7 @@ class EvollaModel(EvollaPreTrainedModel):
835
835
  self.embed_tokens = value
836
836
 
837
837
  @auto_docstring
838
- @check_model_inputs()
838
+ @check_model_inputs
839
839
  def forward(
840
840
  self,
841
841
  input_ids: Optional[torch.LongTensor] = None,
@@ -352,7 +352,7 @@ class Exaone4Model(Exaone4PreTrainedModel):
352
352
  # Initialize weights and apply final processing
353
353
  self.post_init()
354
354
 
355
- @check_model_inputs()
355
+ @check_model_inputs
356
356
  def forward(
357
357
  self,
358
358
  input_ids: Optional[torch.LongTensor] = None,
@@ -364,7 +364,7 @@ class Exaone4Model(Exaone4PreTrainedModel, LlamaModel):
364
364
  # Initialize weights and apply final processing
365
365
  self.post_init()
366
366
 
367
- @check_model_inputs()
367
+ @check_model_inputs
368
368
  def forward(
369
369
  self,
370
370
  input_ids: Optional[torch.LongTensor] = None,
@@ -417,7 +417,7 @@ class FlexOlmoModel(FlexOlmoPreTrainedModel):
417
417
  # Initialize weights and apply final processing
418
418
  self.post_init()
419
419
 
420
- @check_model_inputs()
420
+ @check_model_inputs
421
421
  @auto_docstring
422
422
  def forward(
423
423
  self,
@@ -277,7 +277,7 @@ class FlexOlmoPreTrainedModel(MixtralPreTrainedModel):
277
277
  # FlexOlmo model is identical to Mixtral model except:
278
278
  # - FlexOlmo does not use sliding window attention.
279
279
  class FlexOlmoModel(MixtralModel):
280
- @check_model_inputs()
280
+ @check_model_inputs
281
281
  @auto_docstring
282
282
  def forward(
283
283
  self,
@@ -348,7 +348,7 @@ class GemmaModel(GemmaPreTrainedModel):
348
348
  # Initialize weights and apply final processing
349
349
  self.post_init()
350
350
 
351
- @check_model_inputs()
351
+ @check_model_inputs
352
352
  @auto_docstring
353
353
  def forward(
354
354
  self,
@@ -379,7 +379,7 @@ class Gemma2Model(Gemma2PreTrainedModel):
379
379
  # Initialize weights and apply final processing
380
380
  self.post_init()
381
381
 
382
- @check_model_inputs()
382
+ @check_model_inputs
383
383
  @auto_docstring
384
384
  def forward(
385
385
  self,
@@ -482,7 +482,7 @@ class Gemma3TextModel(Gemma3PreTrainedModel):
482
482
  # Initialize weights and apply final processing
483
483
  self.post_init()
484
484
 
485
- @check_model_inputs()
485
+ @check_model_inputs
486
486
  @auto_docstring
487
487
  def forward(
488
488
  self,
@@ -358,7 +358,7 @@ class GlmModel(GlmPreTrainedModel):
358
358
  # Initialize weights and apply final processing
359
359
  self.post_init()
360
360
 
361
- @check_model_inputs()
361
+ @check_model_inputs
362
362
  @auto_docstring
363
363
  def forward(
364
364
  self,
@@ -362,7 +362,7 @@ class Glm4Model(Glm4PreTrainedModel):
362
362
  # Initialize weights and apply final processing
363
363
  self.post_init()
364
364
 
365
- @check_model_inputs()
365
+ @check_model_inputs
366
366
  @auto_docstring
367
367
  def forward(
368
368
  self,
@@ -476,7 +476,7 @@ class Glm4MoeModel(Glm4MoePreTrainedModel):
476
476
  # Initialize weights and apply final processing
477
477
  self.post_init()
478
478
 
479
- @check_model_inputs()
479
+ @check_model_inputs
480
480
  @auto_docstring
481
481
  def forward(
482
482
  self,
@@ -806,7 +806,7 @@ class Glm4vTextModel(Glm4vPreTrainedModel):
806
806
  self.post_init()
807
807
 
808
808
  @auto_docstring
809
- @check_model_inputs()
809
+ @check_model_inputs
810
810
  def forward(
811
811
  self,
812
812
  input_ids: Optional[torch.LongTensor] = None,
@@ -872,7 +872,7 @@ class Glm4vTextModel(Qwen2_5_VLTextModel):
872
872
  del self.has_sliding_layers
873
873
 
874
874
  @auto_docstring
875
- @check_model_inputs()
875
+ @check_model_inputs
876
876
  def forward(
877
877
  self,
878
878
  input_ids: Optional[torch.LongTensor] = None,
@@ -922,7 +922,7 @@ class Glm4vMoeTextModel(Glm4vMoePreTrainedModel):
922
922
  self.post_init()
923
923
 
924
924
  @auto_docstring
925
- @check_model_inputs()
925
+ @check_model_inputs
926
926
  def forward(
927
927
  self,
928
928
  input_ids: Optional[torch.LongTensor] = None,
@@ -393,7 +393,7 @@ class GPTNeoXModel(GPTNeoXPreTrainedModel):
393
393
  # Initialize weights and apply final processing
394
394
  self.post_init()
395
395
 
396
- @check_model_inputs()
396
+ @check_model_inputs
397
397
  @auto_docstring
398
398
  def forward(
399
399
  self,
@@ -456,7 +456,7 @@ class GptOssModel(GptOssPreTrainedModel):
456
456
  # Initialize weights and apply final processing
457
457
  self.post_init()
458
458
 
459
- @check_model_inputs()
459
+ @check_model_inputs
460
460
  @auto_docstring
461
461
  def forward(
462
462
  self,
@@ -386,7 +386,7 @@ class GptOssPreTrainedModel(LlamaPreTrainedModel):
386
386
  class GptOssModel(MixtralModel):
387
387
  _no_split_modules = ["GptOssDecoderLayer"]
388
388
 
389
- @check_model_inputs()
389
+ @check_model_inputs
390
390
  @auto_docstring
391
391
  def forward(
392
392
  self,
@@ -375,7 +375,7 @@ class GraniteModel(GranitePreTrainedModel):
375
375
  # Initialize weights and apply final processing
376
376
  self.post_init()
377
377
 
378
- @check_model_inputs()
378
+ @check_model_inputs
379
379
  @auto_docstring
380
380
  def forward(
381
381
  self,
@@ -343,7 +343,7 @@ class HeliumModel(HeliumPreTrainedModel):
343
343
  # Initialize weights and apply final processing
344
344
  self.post_init()
345
345
 
346
- @check_model_inputs()
346
+ @check_model_inputs
347
347
  @auto_docstring
348
348
  def forward(
349
349
  self,