transformers 4.57.2__py3-none-any.whl → 4.57.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +1 -1
- transformers/generation/utils.py +4 -2
- transformers/models/apertus/modeling_apertus.py +1 -1
- transformers/models/arcee/modeling_arcee.py +1 -1
- transformers/models/aria/modeling_aria.py +1 -1
- transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +1 -1
- transformers/models/aya_vision/modeling_aya_vision.py +1 -1
- transformers/models/aya_vision/modular_aya_vision.py +1 -1
- transformers/models/bitnet/modeling_bitnet.py +1 -1
- transformers/models/blip_2/modeling_blip_2.py +1 -1
- transformers/models/blt/modeling_blt.py +2 -2
- transformers/models/blt/modular_blt.py +2 -2
- transformers/models/cohere/modeling_cohere.py +1 -1
- transformers/models/cohere2/modeling_cohere2.py +1 -1
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +2 -2
- transformers/models/cohere2_vision/modular_cohere2_vision.py +2 -2
- transformers/models/csm/modeling_csm.py +2 -2
- transformers/models/csm/modular_csm.py +2 -2
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +1 -1
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +1 -1
- transformers/models/dinov2/modeling_dinov2.py +1 -1
- transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +1 -1
- transformers/models/doge/modeling_doge.py +1 -1
- transformers/models/dots1/modeling_dots1.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +2 -2
- transformers/models/edgetam/modular_edgetam.py +1 -1
- transformers/models/efficientloftr/modeling_efficientloftr.py +1 -1
- transformers/models/emu3/modeling_emu3.py +1 -1
- transformers/models/eomt/modeling_eomt.py +1 -1
- transformers/models/eomt/modular_eomt.py +1 -1
- transformers/models/ernie4_5/modeling_ernie4_5.py +1 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +1 -1
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +1 -1
- transformers/models/esm/modeling_esm.py +1 -1
- transformers/models/evolla/modeling_evolla.py +2 -2
- transformers/models/evolla/modular_evolla.py +2 -2
- transformers/models/exaone4/modeling_exaone4.py +1 -1
- transformers/models/exaone4/modular_exaone4.py +1 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +1 -1
- transformers/models/flex_olmo/modular_flex_olmo.py +1 -1
- transformers/models/gemma/modeling_gemma.py +1 -1
- transformers/models/gemma2/modeling_gemma2.py +1 -1
- transformers/models/gemma3/modeling_gemma3.py +1 -1
- transformers/models/glm/modeling_glm.py +1 -1
- transformers/models/glm4/modeling_glm4.py +1 -1
- transformers/models/glm4_moe/modeling_glm4_moe.py +1 -1
- transformers/models/glm4v/modeling_glm4v.py +1 -1
- transformers/models/glm4v/modular_glm4v.py +1 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +1 -1
- transformers/models/gpt_neox/modeling_gpt_neox.py +1 -1
- transformers/models/gpt_oss/modeling_gpt_oss.py +1 -1
- transformers/models/gpt_oss/modular_gpt_oss.py +1 -1
- transformers/models/granite/modeling_granite.py +1 -1
- transformers/models/helium/modeling_helium.py +1 -1
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +1 -1
- transformers/models/idefics/modeling_idefics.py +1 -1
- transformers/models/instructblip/modeling_instructblip.py +1 -1
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +1 -1
- transformers/models/lfm2/modeling_lfm2.py +1 -1
- transformers/models/llama/modeling_llama.py +1 -1
- transformers/models/llama4/modeling_llama4.py +1 -1
- transformers/models/longcat_flash/modeling_longcat_flash.py +1 -1
- transformers/models/minimax/modeling_minimax.py +1 -1
- transformers/models/minimax/modular_minimax.py +1 -1
- transformers/models/ministral/modeling_ministral.py +1 -1
- transformers/models/ministral/modular_ministral.py +1 -1
- transformers/models/mistral/modeling_mistral.py +1 -1
- transformers/models/mistral/modular_mistral.py +1 -1
- transformers/models/mixtral/modeling_mixtral.py +1 -1
- transformers/models/mllama/modeling_mllama.py +3 -3
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +1 -1
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +1 -1
- transformers/models/moonshine/modeling_moonshine.py +2 -2
- transformers/models/moonshine/modular_moonshine.py +2 -2
- transformers/models/olmo/modeling_olmo.py +1 -1
- transformers/models/olmo2/modeling_olmo2.py +1 -1
- transformers/models/olmo3/modeling_olmo3.py +1 -1
- transformers/models/parakeet/modeling_parakeet.py +1 -1
- transformers/models/parakeet/modular_parakeet.py +1 -1
- transformers/models/phi/modeling_phi.py +1 -1
- transformers/models/phi3/modeling_phi3.py +1 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +1 -1
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +1 -1
- transformers/models/qwen2/modeling_qwen2.py +1 -1
- transformers/models/qwen2/modular_qwen2.py +1 -1
- transformers/models/qwen3/modeling_qwen3.py +1 -1
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +1 -1
- transformers/models/qwen3_next/modeling_qwen3_next.py +1 -1
- transformers/models/qwen3_next/modular_qwen3_next.py +1 -1
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +4 -4
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +1 -1
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl/modular_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +3 -3
- transformers/models/sam/modeling_sam.py +1 -1
- transformers/models/sam2/modeling_sam2.py +3 -3
- transformers/models/sam2/modular_sam2.py +3 -3
- transformers/models/sam_hq/modeling_sam_hq.py +1 -1
- transformers/models/seed_oss/modeling_seed_oss.py +1 -1
- transformers/models/siglip/modeling_siglip.py +1 -1
- transformers/models/siglip2/modeling_siglip2.py +1 -1
- transformers/models/smollm3/modeling_smollm3.py +1 -1
- transformers/models/starcoder2/modeling_starcoder2.py +1 -1
- transformers/models/starcoder2/modular_starcoder2.py +1 -1
- transformers/models/t5gemma/modeling_t5gemma.py +2 -2
- transformers/models/t5gemma/modular_t5gemma.py +2 -2
- transformers/models/vaultgemma/modeling_vaultgemma.py +1 -1
- transformers/models/voxtral/modeling_voxtral.py +1 -1
- transformers/models/voxtral/modular_voxtral.py +1 -1
- transformers/tokenization_utils_base.py +88 -32
- transformers/utils/generic.py +3 -1
- {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/METADATA +1 -1
- {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/RECORD +119 -119
- {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/WHEEL +0 -0
- {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/entry_points.txt +0 -0
- {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/licenses/LICENSE +0 -0
- {transformers-4.57.2.dist-info → transformers-4.57.4.dist-info}/top_level.txt +0 -0
|
@@ -943,7 +943,7 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|
|
943
943
|
def freeze_vision_layers(self, module_exceptions=[]):
|
|
944
944
|
freeze_model(self.vision_model, module_exceptions=module_exceptions)
|
|
945
945
|
|
|
946
|
-
@check_model_inputs
|
|
946
|
+
@check_model_inputs
|
|
947
947
|
@auto_docstring
|
|
948
948
|
def forward(
|
|
949
949
|
self,
|
|
@@ -915,7 +915,7 @@ class InstructBlipQFormerModel(InstructBlipPreTrainedModel):
|
|
|
915
915
|
extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
|
|
916
916
|
return extended_attention_mask
|
|
917
917
|
|
|
918
|
-
@check_model_inputs
|
|
918
|
+
@check_model_inputs
|
|
919
919
|
@auto_docstring
|
|
920
920
|
def forward(
|
|
921
921
|
self,
|
|
@@ -877,7 +877,7 @@ class InstructBlipVideoQFormerModel(InstructBlipVideoPreTrainedModel):
|
|
|
877
877
|
extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
|
|
878
878
|
return extended_attention_mask
|
|
879
879
|
|
|
880
|
-
@check_model_inputs
|
|
880
|
+
@check_model_inputs
|
|
881
881
|
@auto_docstring
|
|
882
882
|
def forward(
|
|
883
883
|
self,
|
|
@@ -993,7 +993,7 @@ class MllamaVisionModel(MllamaPreTrainedModel):
|
|
|
993
993
|
hidden_state = torch.cat([class_embedding, hidden_state], dim=1)
|
|
994
994
|
return hidden_state
|
|
995
995
|
|
|
996
|
-
@check_model_inputs
|
|
996
|
+
@check_model_inputs
|
|
997
997
|
@auto_docstring
|
|
998
998
|
def forward(
|
|
999
999
|
self, pixel_values: torch.Tensor, aspect_ratio_ids: torch.Tensor, aspect_ratio_mask: torch.Tensor, **kwargs
|
|
@@ -1161,7 +1161,7 @@ class MllamaTextModel(MllamaPreTrainedModel):
|
|
|
1161
1161
|
self.gradient_checkpointing = False
|
|
1162
1162
|
self.post_init()
|
|
1163
1163
|
|
|
1164
|
-
@check_model_inputs
|
|
1164
|
+
@check_model_inputs
|
|
1165
1165
|
@can_return_tuple
|
|
1166
1166
|
@auto_docstring
|
|
1167
1167
|
def forward(
|
|
@@ -1429,7 +1429,7 @@ class MllamaModel(MllamaPreTrainedModel):
|
|
|
1429
1429
|
def get_decoder(self):
|
|
1430
1430
|
return self.language_model
|
|
1431
1431
|
|
|
1432
|
-
@check_model_inputs
|
|
1432
|
+
@check_model_inputs
|
|
1433
1433
|
@can_return_tuple
|
|
1434
1434
|
@auto_docstring
|
|
1435
1435
|
def forward(
|
|
@@ -520,7 +520,7 @@ class MoonshineEncoder(MoonshinePreTrainedModel):
|
|
|
520
520
|
def set_input_embeddings(self, value: nn.Module):
|
|
521
521
|
self.conv1 = value
|
|
522
522
|
|
|
523
|
-
@check_model_inputs
|
|
523
|
+
@check_model_inputs
|
|
524
524
|
def forward(
|
|
525
525
|
self,
|
|
526
526
|
input_values: torch.FloatTensor,
|
|
@@ -605,7 +605,7 @@ class MoonshineDecoder(MoonshinePreTrainedModel):
|
|
|
605
605
|
# Initialize weights and apply final processing
|
|
606
606
|
self.post_init()
|
|
607
607
|
|
|
608
|
-
@check_model_inputs
|
|
608
|
+
@check_model_inputs
|
|
609
609
|
def forward(
|
|
610
610
|
self,
|
|
611
611
|
input_ids: Optional[torch.LongTensor] = None,
|
|
@@ -552,7 +552,7 @@ class MoonshineEncoder(MoonshinePreTrainedModel):
|
|
|
552
552
|
def set_input_embeddings(self, value: nn.Module):
|
|
553
553
|
self.conv1 = value
|
|
554
554
|
|
|
555
|
-
@check_model_inputs
|
|
555
|
+
@check_model_inputs
|
|
556
556
|
def forward(
|
|
557
557
|
self,
|
|
558
558
|
input_values: torch.FloatTensor,
|
|
@@ -627,7 +627,7 @@ class MoonshineDecoder(LlamaModel):
|
|
|
627
627
|
[MoonshineDecoderLayer(config, idx) for idx in range(config.decoder_num_hidden_layers)]
|
|
628
628
|
)
|
|
629
629
|
|
|
630
|
-
@check_model_inputs
|
|
630
|
+
@check_model_inputs
|
|
631
631
|
def forward(
|
|
632
632
|
self,
|
|
633
633
|
input_ids: Optional[torch.LongTensor] = None,
|
|
@@ -1562,7 +1562,7 @@ class Phi4MultimodalModel(Phi4MultimodalPreTrainedModel):
|
|
|
1562
1562
|
# Initialize weights and apply final processing
|
|
1563
1563
|
self.post_init()
|
|
1564
1564
|
|
|
1565
|
-
@check_model_inputs
|
|
1565
|
+
@check_model_inputs
|
|
1566
1566
|
def forward(
|
|
1567
1567
|
self,
|
|
1568
1568
|
input_ids: Optional[torch.LongTensor] = None,
|
|
@@ -1472,7 +1472,7 @@ class Phi4MultimodalModel(Phi3Model):
|
|
|
1472
1472
|
# Initialize weights and apply final processing
|
|
1473
1473
|
self.post_init()
|
|
1474
1474
|
|
|
1475
|
-
@check_model_inputs
|
|
1475
|
+
@check_model_inputs
|
|
1476
1476
|
def forward(
|
|
1477
1477
|
self,
|
|
1478
1478
|
input_ids: Optional[torch.LongTensor] = None,
|
|
@@ -1628,7 +1628,7 @@ class Qwen3OmniMoeThinkerTextModel(Qwen3OmniMoePreTrainedModel):
|
|
|
1628
1628
|
# Initialize weights and apply final processing
|
|
1629
1629
|
self.post_init()
|
|
1630
1630
|
|
|
1631
|
-
@check_model_inputs
|
|
1631
|
+
@check_model_inputs
|
|
1632
1632
|
@auto_docstring
|
|
1633
1633
|
def forward(
|
|
1634
1634
|
self,
|
|
@@ -2480,7 +2480,7 @@ class Qwen3OmniMoeTalkerCodePredictorModel(Qwen3OmniMoePreTrainedModel):
|
|
|
2480
2480
|
# Initialize weights and apply final processing
|
|
2481
2481
|
self.post_init()
|
|
2482
2482
|
|
|
2483
|
-
@check_model_inputs
|
|
2483
|
+
@check_model_inputs
|
|
2484
2484
|
@auto_docstring
|
|
2485
2485
|
def forward(
|
|
2486
2486
|
self,
|
|
@@ -2852,7 +2852,7 @@ class Qwen3OmniMoeTalkerModel(Qwen3OmniMoePreTrainedModel):
|
|
|
2852
2852
|
# Initialize weights and apply final processing
|
|
2853
2853
|
self.post_init()
|
|
2854
2854
|
|
|
2855
|
-
@check_model_inputs
|
|
2855
|
+
@check_model_inputs
|
|
2856
2856
|
@auto_docstring
|
|
2857
2857
|
def forward(
|
|
2858
2858
|
self,
|
|
@@ -3542,7 +3542,7 @@ class Qwen3OmniMoeCode2WavTransformerModel(Qwen3OmniMoePreTrainedModel):
|
|
|
3542
3542
|
# Initialize weights and apply final processing
|
|
3543
3543
|
self.post_init()
|
|
3544
3544
|
|
|
3545
|
-
@check_model_inputs
|
|
3545
|
+
@check_model_inputs
|
|
3546
3546
|
@auto_docstring
|
|
3547
3547
|
def forward(
|
|
3548
3548
|
self,
|
|
@@ -779,7 +779,7 @@ class Qwen3VLTextModel(Qwen3VLPreTrainedModel):
|
|
|
779
779
|
# Initialize weights and apply final processing
|
|
780
780
|
self.post_init()
|
|
781
781
|
|
|
782
|
-
@check_model_inputs
|
|
782
|
+
@check_model_inputs
|
|
783
783
|
@auto_docstring
|
|
784
784
|
def forward(
|
|
785
785
|
self,
|
|
@@ -1104,7 +1104,7 @@ class Qwen3VLModel(Qwen3VLPreTrainedModel):
|
|
|
1104
1104
|
return special_image_mask, special_video_mask
|
|
1105
1105
|
|
|
1106
1106
|
@auto_docstring
|
|
1107
|
-
@check_model_inputs
|
|
1107
|
+
@check_model_inputs
|
|
1108
1108
|
def forward(
|
|
1109
1109
|
self,
|
|
1110
1110
|
input_ids: torch.LongTensor = None,
|
|
@@ -1311,7 +1311,7 @@ class Qwen3VLForConditionalGeneration(Qwen3VLPreTrainedModel, GenerationMixin):
|
|
|
1311
1311
|
def visual(self):
|
|
1312
1312
|
return self.model.visual
|
|
1313
1313
|
|
|
1314
|
-
@check_model_inputs
|
|
1314
|
+
@check_model_inputs
|
|
1315
1315
|
def forward(
|
|
1316
1316
|
self,
|
|
1317
1317
|
input_ids: torch.LongTensor = None,
|
|
@@ -749,7 +749,7 @@ class Qwen3VLTextModel(Qwen3VLPreTrainedModel, Qwen3Model):
|
|
|
749
749
|
hidden_states[visual_pos_masks, :] = local_this
|
|
750
750
|
return hidden_states
|
|
751
751
|
|
|
752
|
-
@check_model_inputs
|
|
752
|
+
@check_model_inputs
|
|
753
753
|
@auto_docstring
|
|
754
754
|
def forward(
|
|
755
755
|
self,
|
|
@@ -1006,7 +1006,7 @@ class Qwen3VLModel(Qwen2_5_VLModel):
|
|
|
1006
1006
|
return self.get_image_features(pixel_values_videos, video_grid_thw)
|
|
1007
1007
|
|
|
1008
1008
|
@auto_docstring
|
|
1009
|
-
@check_model_inputs
|
|
1009
|
+
@check_model_inputs
|
|
1010
1010
|
def forward(
|
|
1011
1011
|
self,
|
|
1012
1012
|
input_ids: torch.LongTensor = None,
|
|
@@ -1149,7 +1149,7 @@ class Qwen3VLForConditionalGeneration(Qwen2_5_VLForConditionalGeneration):
|
|
|
1149
1149
|
config: Qwen3VLConfig
|
|
1150
1150
|
_checkpoint_conversion_mapping = {}
|
|
1151
1151
|
|
|
1152
|
-
@check_model_inputs
|
|
1152
|
+
@check_model_inputs
|
|
1153
1153
|
def forward(
|
|
1154
1154
|
self,
|
|
1155
1155
|
input_ids: torch.LongTensor = None,
|
|
@@ -891,7 +891,7 @@ class Qwen3VLMoeTextModel(Qwen3VLMoePreTrainedModel):
|
|
|
891
891
|
# Initialize weights and apply final processing
|
|
892
892
|
self.post_init()
|
|
893
893
|
|
|
894
|
-
@check_model_inputs
|
|
894
|
+
@check_model_inputs
|
|
895
895
|
@auto_docstring
|
|
896
896
|
def forward(
|
|
897
897
|
self,
|
|
@@ -1270,7 +1270,7 @@ class Qwen3VLMoeModel(Qwen3VLMoePreTrainedModel):
|
|
|
1270
1270
|
return special_image_mask, special_video_mask
|
|
1271
1271
|
|
|
1272
1272
|
@auto_docstring
|
|
1273
|
-
@check_model_inputs
|
|
1273
|
+
@check_model_inputs
|
|
1274
1274
|
def forward(
|
|
1275
1275
|
self,
|
|
1276
1276
|
input_ids: torch.LongTensor = None,
|
|
@@ -1530,7 +1530,7 @@ class Qwen3VLMoeForConditionalGeneration(Qwen3VLMoePreTrainedModel, GenerationMi
|
|
|
1530
1530
|
def visual(self):
|
|
1531
1531
|
return self.model.visual
|
|
1532
1532
|
|
|
1533
|
-
@check_model_inputs
|
|
1533
|
+
@check_model_inputs
|
|
1534
1534
|
def forward(
|
|
1535
1535
|
self,
|
|
1536
1536
|
input_ids: torch.LongTensor = None,
|
|
@@ -618,7 +618,7 @@ class Sam2HieraDetModel(Sam2PreTrainedModel):
|
|
|
618
618
|
pos_embed = pos_embed.permute(0, 2, 3, 1)
|
|
619
619
|
return pos_embed
|
|
620
620
|
|
|
621
|
-
@check_model_inputs
|
|
621
|
+
@check_model_inputs
|
|
622
622
|
def forward(
|
|
623
623
|
self,
|
|
624
624
|
pixel_values: Optional[torch.FloatTensor] = None,
|
|
@@ -670,7 +670,7 @@ class Sam2VisionModel(Sam2PreTrainedModel):
|
|
|
670
670
|
def get_input_embeddings(self):
|
|
671
671
|
return self.backbone.get_input_embeddings()
|
|
672
672
|
|
|
673
|
-
@check_model_inputs
|
|
673
|
+
@check_model_inputs
|
|
674
674
|
def forward(
|
|
675
675
|
self,
|
|
676
676
|
pixel_values: Optional[torch.FloatTensor] = None,
|
|
@@ -1387,7 +1387,7 @@ class Sam2Model(Sam2PreTrainedModel):
|
|
|
1387
1387
|
)
|
|
1388
1388
|
return prompt_output
|
|
1389
1389
|
|
|
1390
|
-
@check_model_inputs
|
|
1390
|
+
@check_model_inputs
|
|
1391
1391
|
@auto_docstring
|
|
1392
1392
|
def forward(
|
|
1393
1393
|
self,
|
|
@@ -726,7 +726,7 @@ class Sam2HieraDetModel(Sam2PreTrainedModel):
|
|
|
726
726
|
pos_embed = pos_embed.permute(0, 2, 3, 1)
|
|
727
727
|
return pos_embed
|
|
728
728
|
|
|
729
|
-
@check_model_inputs
|
|
729
|
+
@check_model_inputs
|
|
730
730
|
def forward(
|
|
731
731
|
self,
|
|
732
732
|
pixel_values: Optional[torch.FloatTensor] = None,
|
|
@@ -778,7 +778,7 @@ class Sam2VisionModel(Sam2PreTrainedModel):
|
|
|
778
778
|
def get_input_embeddings(self):
|
|
779
779
|
return self.backbone.get_input_embeddings()
|
|
780
780
|
|
|
781
|
-
@check_model_inputs
|
|
781
|
+
@check_model_inputs
|
|
782
782
|
def forward(
|
|
783
783
|
self,
|
|
784
784
|
pixel_values: Optional[torch.FloatTensor] = None,
|
|
@@ -1280,7 +1280,7 @@ class Sam2Model(SamModel):
|
|
|
1280
1280
|
|
|
1281
1281
|
return feature_maps, feature_maps_position_embeddings, vision_outputs.hidden_states, vision_outputs.attentions
|
|
1282
1282
|
|
|
1283
|
-
@check_model_inputs
|
|
1283
|
+
@check_model_inputs
|
|
1284
1284
|
@auto_docstring
|
|
1285
1285
|
def forward(
|
|
1286
1286
|
self,
|
|
@@ -325,7 +325,7 @@ class Starcoder2Model(Starcoder2PreTrainedModel):
|
|
|
325
325
|
# Initialize weights and apply final processing
|
|
326
326
|
self.post_init()
|
|
327
327
|
|
|
328
|
-
@check_model_inputs
|
|
328
|
+
@check_model_inputs
|
|
329
329
|
def forward(
|
|
330
330
|
self,
|
|
331
331
|
input_ids: Optional[torch.LongTensor] = None,
|