transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +30 -3
- transformers/cli/serve.py +47 -17
- transformers/conversion_mapping.py +15 -2
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +196 -135
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +1 -2
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +1 -2
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/configuration_utils.py +3 -2
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/continuous_api.py +134 -79
- transformers/image_processing_base.py +1 -2
- transformers/integrations/__init__.py +4 -2
- transformers/integrations/accelerate.py +15 -3
- transformers/integrations/aqlm.py +38 -66
- transformers/integrations/awq.py +48 -514
- transformers/integrations/bitnet.py +45 -100
- transformers/integrations/bitsandbytes.py +79 -191
- transformers/integrations/deepspeed.py +1 -0
- transformers/integrations/eetq.py +84 -79
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +236 -193
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +40 -62
- transformers/integrations/hub_kernels.py +42 -3
- transformers/integrations/integration_utils.py +10 -0
- transformers/integrations/mxfp4.py +25 -65
- transformers/integrations/peft.py +7 -29
- transformers/integrations/quanto.py +73 -55
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +44 -90
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +42 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +8 -0
- transformers/modeling_rope_utils.py +30 -6
- transformers/modeling_utils.py +116 -112
- transformers/models/__init__.py +3 -0
- transformers/models/afmoe/modeling_afmoe.py +4 -4
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +2 -0
- transformers/models/altclip/modeling_altclip.py +4 -0
- transformers/models/apertus/modeling_apertus.py +4 -4
- transformers/models/arcee/modeling_arcee.py +4 -4
- transformers/models/aria/modeling_aria.py +4 -4
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/auto/configuration_auto.py +11 -0
- transformers/models/auto/feature_extraction_auto.py +2 -0
- transformers/models/auto/image_processing_auto.py +1 -0
- transformers/models/auto/modeling_auto.py +6 -0
- transformers/models/auto/processing_auto.py +18 -10
- transformers/models/auto/tokenization_auto.py +74 -472
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/bamba/modeling_bamba.py +4 -3
- transformers/models/bark/modeling_bark.py +2 -0
- transformers/models/bart/modeling_bart.py +7 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/big_bird/modeling_big_bird.py +6 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +11 -2
- transformers/models/bitnet/modeling_bitnet.py +4 -4
- transformers/models/blenderbot/modeling_blenderbot.py +5 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
- transformers/models/blip/modeling_blip_text.py +2 -0
- transformers/models/blip_2/modeling_blip_2.py +2 -1
- transformers/models/bloom/modeling_bloom.py +4 -0
- transformers/models/blt/modeling_blt.py +2 -2
- transformers/models/blt/modular_blt.py +2 -2
- transformers/models/bridgetower/modeling_bridgetower.py +5 -1
- transformers/models/bros/modeling_bros.py +4 -0
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +5 -0
- transformers/models/chameleon/modeling_chameleon.py +2 -1
- transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
- transformers/models/clap/modeling_clap.py +5 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +5 -0
- transformers/models/clvp/modeling_clvp.py +5 -0
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +4 -3
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +7 -6
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
- transformers/models/convbert/modeling_convbert.py +6 -0
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/modeling_csm.py +4 -3
- transformers/models/ctrl/modeling_ctrl.py +1 -0
- transformers/models/cvt/modeling_cvt.py +2 -0
- transformers/models/cwm/modeling_cwm.py +4 -4
- transformers/models/d_fine/modeling_d_fine.py +2 -0
- transformers/models/d_fine/modular_d_fine.py +1 -0
- transformers/models/dab_detr/modeling_dab_detr.py +4 -0
- transformers/models/dac/modeling_dac.py +2 -2
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/dbrx/modeling_dbrx.py +2 -2
- transformers/models/deberta/modeling_deberta.py +5 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
- transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
- transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
- transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/modeling_detr.py +5 -0
- transformers/models/dia/modeling_dia.py +4 -3
- transformers/models/dia/modular_dia.py +0 -1
- transformers/models/diffllama/modeling_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +2 -3
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +2 -0
- transformers/models/dots1/modeling_dots1.py +10 -7
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/edgetam/modeling_edgetam.py +1 -1
- transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
- transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
- transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
- transformers/models/efficientnet/modeling_efficientnet.py +2 -0
- transformers/models/emu3/modeling_emu3.py +4 -4
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +14 -2
- transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
- transformers/models/esm/modeling_esmfold.py +5 -4
- transformers/models/evolla/modeling_evolla.py +4 -4
- transformers/models/exaone4/modeling_exaone4.py +2 -2
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +6 -1
- transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
- transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
- transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
- transformers/models/flaubert/modeling_flaubert.py +7 -0
- transformers/models/flava/modeling_flava.py +6 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
- transformers/models/florence2/modeling_florence2.py +2 -1
- transformers/models/florence2/modular_florence2.py +2 -1
- transformers/models/fnet/modeling_fnet.py +7 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/processing_fuyu.py +3 -3
- transformers/models/gemma/modeling_gemma.py +4 -4
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +4 -4
- transformers/models/gemma2/modular_gemma2.py +2 -1
- transformers/models/gemma3/modeling_gemma3.py +14 -84
- transformers/models/gemma3/modular_gemma3.py +12 -81
- transformers/models/gemma3n/modeling_gemma3n.py +18 -209
- transformers/models/gemma3n/modular_gemma3n.py +17 -59
- transformers/models/git/modeling_git.py +2 -0
- transformers/models/glm/modeling_glm.py +4 -4
- transformers/models/glm4/modeling_glm4.py +4 -4
- transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/modeling_glm4v.py +3 -3
- transformers/models/glm4v/modular_glm4v.py +6 -4
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
- transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/gpt2/modeling_gpt2.py +5 -1
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
- transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
- transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
- transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
- transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
- transformers/models/gptj/modeling_gptj.py +3 -0
- transformers/models/granite/modeling_granite.py +4 -4
- transformers/models/granitemoe/modeling_granitemoe.py +4 -6
- transformers/models/granitemoe/modular_granitemoe.py +0 -2
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
- transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
- transformers/models/groupvit/modeling_groupvit.py +3 -0
- transformers/models/helium/modeling_helium.py +4 -3
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +3 -0
- transformers/models/hubert/modular_hubert.py +1 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
- transformers/models/ibert/modeling_ibert.py +6 -0
- transformers/models/idefics/modeling_idefics.py +5 -21
- transformers/models/imagegpt/modeling_imagegpt.py +2 -1
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/internvl/modeling_internvl.py +2 -4
- transformers/models/internvl/modular_internvl.py +2 -4
- transformers/models/jamba/modeling_jamba.py +2 -2
- transformers/models/janus/modeling_janus.py +1 -0
- transformers/models/janus/modular_janus.py +1 -0
- transformers/models/jetmoe/modeling_jetmoe.py +2 -2
- transformers/models/kosmos2/modeling_kosmos2.py +1 -0
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +244 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +729 -0
- transformers/models/lasr/modular_lasr.py +569 -0
- transformers/models/lasr/processing_lasr.py +96 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +5 -0
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +6 -0
- transformers/models/levit/modeling_levit.py +3 -0
- transformers/models/lfm2/modeling_lfm2.py +4 -5
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +4 -0
- transformers/models/llama/modeling_llama.py +4 -4
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/modeling_llama4.py +3 -2
- transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
- transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -0
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +4 -0
- transformers/models/mamba/modeling_mamba.py +14 -22
- transformers/models/marian/modeling_marian.py +5 -0
- transformers/models/markuplm/modeling_markuplm.py +4 -0
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/modeling_mask2former.py +2 -0
- transformers/models/maskformer/modeling_maskformer.py +2 -0
- transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
- transformers/models/mbart/modeling_mbart.py +7 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +3 -1
- transformers/models/minimax/modeling_minimax.py +4 -4
- transformers/models/ministral/modeling_ministral.py +4 -4
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +4 -3
- transformers/models/mistral/modeling_mistral.py +4 -3
- transformers/models/mixtral/modeling_mixtral.py +4 -4
- transformers/models/mllama/modeling_mllama.py +2 -2
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/modeling_mobilevit.py +3 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
- transformers/models/modernbert/modeling_modernbert.py +4 -1
- transformers/models/modernbert/modular_modernbert.py +2 -0
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
- transformers/models/moonshine/modeling_moonshine.py +4 -2
- transformers/models/moshi/modeling_moshi.py +5 -2
- transformers/models/mpnet/modeling_mpnet.py +5 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +6 -0
- transformers/models/mt5/modeling_mt5.py +7 -0
- transformers/models/musicgen/modeling_musicgen.py +2 -0
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
- transformers/models/mvp/modeling_mvp.py +7 -0
- transformers/models/nanochat/modeling_nanochat.py +4 -4
- transformers/models/nemotron/modeling_nemotron.py +4 -2
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nougat/tokenization_nougat.py +11 -59
- transformers/models/nystromformer/modeling_nystromformer.py +6 -0
- transformers/models/olmo/modeling_olmo.py +4 -4
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +4 -5
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +4 -4
- transformers/models/olmoe/modeling_olmoe.py +4 -4
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
- transformers/models/oneformer/modeling_oneformer.py +4 -1
- transformers/models/openai/modeling_openai.py +3 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/owlv2/modeling_owlv2.py +4 -0
- transformers/models/owlvit/modeling_owlvit.py +4 -0
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +9 -6
- transformers/models/parakeet/modular_parakeet.py +2 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
- transformers/models/patchtst/modeling_patchtst.py +20 -2
- transformers/models/pegasus/modeling_pegasus.py +5 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
- transformers/models/perceiver/modeling_perceiver.py +8 -0
- transformers/models/persimmon/modeling_persimmon.py +2 -1
- transformers/models/phi/modeling_phi.py +4 -5
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +2 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
- transformers/models/phimoe/modeling_phimoe.py +4 -4
- transformers/models/phimoe/modular_phimoe.py +2 -2
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pixtral/modeling_pixtral.py +2 -1
- transformers/models/plbart/modeling_plbart.py +6 -0
- transformers/models/plbart/modular_plbart.py +2 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/modeling_poolformer.py +2 -0
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +3 -0
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +4 -4
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
- transformers/models/qwen3/modeling_qwen3.py +4 -4
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
- transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
- transformers/models/rag/modeling_rag.py +1 -0
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
- transformers/models/reformer/modeling_reformer.py +4 -0
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +6 -1
- transformers/models/rembert/modeling_rembert.py +6 -0
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +11 -2
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/modeling_rt_detr.py +2 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
- transformers/models/rwkv/modeling_rwkv.py +1 -0
- transformers/models/sam2/modeling_sam2.py +2 -2
- transformers/models/sam2/modular_sam2.py +2 -2
- transformers/models/sam2_video/modeling_sam2_video.py +1 -0
- transformers/models/sam2_video/modular_sam2_video.py +1 -0
- transformers/models/sam3/modeling_sam3.py +77 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
- transformers/models/sam3_video/modeling_sam3_video.py +1 -0
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
- transformers/models/seed_oss/modeling_seed_oss.py +2 -2
- transformers/models/segformer/modeling_segformer.py +4 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/siglip2/modeling_siglip2.py +4 -0
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +4 -4
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
- transformers/models/speecht5/modeling_speecht5.py +13 -1
- transformers/models/splinter/modeling_splinter.py +3 -0
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +6 -0
- transformers/models/stablelm/modeling_stablelm.py +3 -1
- transformers/models/starcoder2/modeling_starcoder2.py +4 -3
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +2 -0
- transformers/models/swin/modeling_swin.py +4 -0
- transformers/models/swin2sr/modeling_swin2sr.py +2 -0
- transformers/models/swinv2/modeling_swinv2.py +4 -0
- transformers/models/t5/modeling_t5.py +7 -0
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +5 -5
- transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
- transformers/models/table_transformer/modeling_table_transformer.py +4 -0
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +2 -0
- transformers/models/timesfm/modular_timesfm.py +2 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
- transformers/models/trocr/modeling_trocr.py +2 -0
- transformers/models/tvp/modeling_tvp.py +2 -0
- transformers/models/udop/modeling_udop.py +4 -0
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/modeling_umt5.py +7 -0
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
- transformers/models/vilt/modeling_vilt.py +6 -0
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +6 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/modeling_vitmatte.py +1 -0
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/modeling_whisper.py +6 -0
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +3 -0
- transformers/models/xglm/modeling_xglm.py +1 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +5 -0
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/yoso/modeling_yoso.py +6 -0
- transformers/models/zamba/modeling_zamba.py +2 -0
- transformers/models/zamba2/modeling_zamba2.py +4 -2
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/modeling_zoedepth.py +1 -0
- transformers/pipelines/__init__.py +2 -3
- transformers/pipelines/base.py +1 -9
- transformers/pipelines/document_question_answering.py +3 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/processing_utils.py +23 -11
- transformers/quantizers/base.py +35 -110
- transformers/quantizers/quantizer_aqlm.py +1 -5
- transformers/quantizers/quantizer_auto_round.py +1 -2
- transformers/quantizers/quantizer_awq.py +17 -81
- transformers/quantizers/quantizer_bitnet.py +3 -8
- transformers/quantizers/quantizer_bnb_4bit.py +13 -110
- transformers/quantizers/quantizer_bnb_8bit.py +16 -92
- transformers/quantizers/quantizer_compressed_tensors.py +1 -5
- transformers/quantizers/quantizer_eetq.py +14 -62
- transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
- transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
- transformers/quantizers/quantizer_fp_quant.py +48 -78
- transformers/quantizers/quantizer_gptq.py +7 -24
- transformers/quantizers/quantizer_higgs.py +40 -54
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +13 -167
- transformers/quantizers/quantizer_quanto.py +20 -64
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +1 -4
- transformers/quantizers/quantizer_torchao.py +23 -202
- transformers/quantizers/quantizer_vptq.py +8 -22
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +297 -36
- transformers/tokenization_mistral_common.py +4 -0
- transformers/tokenization_utils_base.py +113 -222
- transformers/tokenization_utils_tokenizers.py +168 -107
- transformers/trainer.py +28 -31
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +66 -28
- transformers/utils/__init__.py +3 -4
- transformers/utils/auto_docstring.py +1 -0
- transformers/utils/generic.py +27 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +61 -16
- transformers/utils/kernel_config.py +4 -2
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +75 -242
- transformers/video_processing_utils.py +1 -2
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -27,7 +27,7 @@ from torch import nn
|
|
|
27
27
|
from ...activations import ACT2FN
|
|
28
28
|
from ...cache_utils import Cache, DynamicCache
|
|
29
29
|
from ...generation import GenerationMixin
|
|
30
|
-
from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub
|
|
30
|
+
from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub, use_kernelized_func
|
|
31
31
|
from ...masking_utils import create_causal_mask
|
|
32
32
|
from ...modeling_flash_attention_utils import FlashAttentionKwargs
|
|
33
33
|
from ...modeling_layers import GradientCheckpointingLayer
|
|
@@ -36,7 +36,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
|
|
36
36
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
37
37
|
from ...processing_utils import Unpack
|
|
38
38
|
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
|
|
39
|
-
from ...utils.generic import check_model_inputs
|
|
39
|
+
from ...utils.generic import check_model_inputs, maybe_autocast
|
|
40
40
|
from .configuration_bitnet import BitNetConfig
|
|
41
41
|
|
|
42
42
|
|
|
@@ -151,6 +151,7 @@ def eager_attention_forward(
|
|
|
151
151
|
return attn_output, attn_weights
|
|
152
152
|
|
|
153
153
|
|
|
154
|
+
@use_kernelized_func(apply_rotary_pos_emb)
|
|
154
155
|
class BitNetAttention(nn.Module):
|
|
155
156
|
"""Multi-headed attention from 'Attention Is All You Need' paper"""
|
|
156
157
|
|
|
@@ -176,7 +177,6 @@ class BitNetAttention(nn.Module):
|
|
|
176
177
|
self.o_proj = nn.Linear(
|
|
177
178
|
config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
|
|
178
179
|
)
|
|
179
|
-
self.rotary_fn = apply_rotary_pos_emb
|
|
180
180
|
self.attn_sub_norm = BitNetRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
|
181
181
|
|
|
182
182
|
def forward(
|
|
@@ -326,7 +326,7 @@ class BitNetRotaryEmbedding(nn.Module):
|
|
|
326
326
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
327
327
|
|
|
328
328
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
329
|
-
with
|
|
329
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
330
330
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
331
331
|
emb = torch.cat((freqs, freqs), dim=-1)
|
|
332
332
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -493,6 +493,7 @@ class BlenderbotEncoder(BlenderbotPreTrainedModel):
|
|
|
493
493
|
output_attentions=None,
|
|
494
494
|
output_hidden_states=None,
|
|
495
495
|
return_dict=None,
|
|
496
|
+
**kwargs,
|
|
496
497
|
):
|
|
497
498
|
r"""
|
|
498
499
|
Args:
|
|
@@ -643,6 +644,7 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel):
|
|
|
643
644
|
output_hidden_states=None,
|
|
644
645
|
return_dict=None,
|
|
645
646
|
cache_position: Optional[torch.Tensor] = None,
|
|
647
|
+
**kwargs,
|
|
646
648
|
):
|
|
647
649
|
r"""
|
|
648
650
|
Args:
|
|
@@ -885,6 +887,7 @@ class BlenderbotModel(BlenderbotPreTrainedModel):
|
|
|
885
887
|
output_hidden_states: Optional[bool] = None,
|
|
886
888
|
return_dict: Optional[bool] = None,
|
|
887
889
|
cache_position: Optional[torch.Tensor] = None,
|
|
890
|
+
**kwargs,
|
|
888
891
|
) -> Union[tuple[torch.FloatTensor], Seq2SeqModelOutput]:
|
|
889
892
|
r"""
|
|
890
893
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -1039,6 +1042,7 @@ class BlenderbotForConditionalGeneration(BlenderbotPreTrainedModel, GenerationMi
|
|
|
1039
1042
|
output_hidden_states: Optional[bool] = None,
|
|
1040
1043
|
return_dict: Optional[bool] = None,
|
|
1041
1044
|
cache_position: Optional[torch.Tensor] = None,
|
|
1045
|
+
**kwargs,
|
|
1042
1046
|
) -> Union[tuple[torch.FloatTensor], Seq2SeqLMOutput]:
|
|
1043
1047
|
r"""
|
|
1044
1048
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -1196,6 +1200,7 @@ class BlenderbotForCausalLM(BlenderbotPreTrainedModel, GenerationMixin):
|
|
|
1196
1200
|
return_dict: Optional[bool] = None,
|
|
1197
1201
|
cache_position: Optional[torch.LongTensor] = None,
|
|
1198
1202
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
1203
|
+
**kwargs,
|
|
1199
1204
|
) -> Union[tuple, CausalLMOutputWithCrossAttentions]:
|
|
1200
1205
|
r"""
|
|
1201
1206
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -102,14 +102,15 @@ class BlenderbotTokenizer(TokenizersBackend):
|
|
|
102
102
|
add_prefix_space (`bool`, *optional*, defaults to `True`):
|
|
103
103
|
Whether or not to add an initial space to the input. This allows to treat the leading word just as any
|
|
104
104
|
other word. (Blenderbot tokenizer detect beginning of words by the preceding space).
|
|
105
|
-
vocab (`dict`, *optional*):
|
|
106
|
-
Custom vocabulary dictionary. If not provided, vocabulary is loaded from vocab_file
|
|
107
|
-
merges (`list`, *optional*):
|
|
108
|
-
Custom merges list. If not provided, merges are loaded from merges_file
|
|
105
|
+
vocab (`str` or `dict[str, int]`, *optional*):
|
|
106
|
+
Custom vocabulary dictionary. If not provided, vocabulary is loaded from `vocab_file`.
|
|
107
|
+
merges (`str` or `list[str]`, *optional*):
|
|
108
|
+
Custom merges list. If not provided, merges are loaded from `merges_file`.
|
|
109
109
|
"""
|
|
110
110
|
|
|
111
111
|
vocab_files_names = VOCAB_FILES_NAMES
|
|
112
112
|
model_input_names = ["input_ids", "attention_mask"]
|
|
113
|
+
model = BPE
|
|
113
114
|
|
|
114
115
|
def __init__(
|
|
115
116
|
self,
|
|
@@ -132,22 +133,20 @@ class BlenderbotTokenizer(TokenizersBackend):
|
|
|
132
133
|
else mask_token
|
|
133
134
|
)
|
|
134
135
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
else:
|
|
141
|
-
# Initialize with minimal vocab
|
|
142
|
-
self._vocab = {
|
|
136
|
+
# Initialize vocab and merges; when not provided fall back to minimal vocab
|
|
137
|
+
self._vocab = (
|
|
138
|
+
vocab
|
|
139
|
+
if vocab is not None
|
|
140
|
+
else {
|
|
143
141
|
str(bos_token): 0,
|
|
144
142
|
str(pad_token): 1,
|
|
145
143
|
str(eos_token): 2,
|
|
146
144
|
str(unk_token): 3,
|
|
147
145
|
str(mask_token): 4,
|
|
148
146
|
}
|
|
149
|
-
|
|
147
|
+
)
|
|
150
148
|
|
|
149
|
+
self._merges = merges or []
|
|
151
150
|
self._tokenizer = Tokenizer(
|
|
152
151
|
BPE(
|
|
153
152
|
vocab=self._vocab,
|
|
@@ -168,10 +167,7 @@ class BlenderbotTokenizer(TokenizersBackend):
|
|
|
168
167
|
trim_offsets=True,
|
|
169
168
|
)
|
|
170
169
|
|
|
171
|
-
tokenizer_object = self._tokenizer
|
|
172
|
-
|
|
173
170
|
super().__init__(
|
|
174
|
-
tokenizer_object=tokenizer_object,
|
|
175
171
|
bos_token=bos_token,
|
|
176
172
|
eos_token=eos_token,
|
|
177
173
|
sep_token=sep_token,
|
|
@@ -484,6 +484,7 @@ class BlenderbotSmallEncoder(BlenderbotSmallPreTrainedModel):
|
|
|
484
484
|
output_attentions=None,
|
|
485
485
|
output_hidden_states=None,
|
|
486
486
|
return_dict=None,
|
|
487
|
+
**kwargs,
|
|
487
488
|
):
|
|
488
489
|
r"""
|
|
489
490
|
Args:
|
|
@@ -630,6 +631,7 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel):
|
|
|
630
631
|
output_hidden_states=None,
|
|
631
632
|
return_dict=None,
|
|
632
633
|
cache_position=None,
|
|
634
|
+
**kwargs,
|
|
633
635
|
):
|
|
634
636
|
r"""
|
|
635
637
|
Args:
|
|
@@ -858,6 +860,7 @@ class BlenderbotSmallModel(BlenderbotSmallPreTrainedModel):
|
|
|
858
860
|
output_hidden_states: Optional[bool] = None,
|
|
859
861
|
return_dict: Optional[bool] = None,
|
|
860
862
|
cache_position: Optional[torch.Tensor] = None,
|
|
863
|
+
**kwargs,
|
|
861
864
|
) -> Union[tuple[torch.FloatTensor], Seq2SeqModelOutput]:
|
|
862
865
|
r"""
|
|
863
866
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -999,6 +1002,7 @@ class BlenderbotSmallForConditionalGeneration(BlenderbotSmallPreTrainedModel, Ge
|
|
|
999
1002
|
output_hidden_states: Optional[bool] = None,
|
|
1000
1003
|
return_dict: Optional[bool] = None,
|
|
1001
1004
|
cache_position: Optional[torch.Tensor] = None,
|
|
1005
|
+
**kwargs,
|
|
1002
1006
|
) -> Union[tuple[torch.FloatTensor], Seq2SeqLMOutput]:
|
|
1003
1007
|
r"""
|
|
1004
1008
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -1156,6 +1160,7 @@ class BlenderbotSmallForCausalLM(BlenderbotSmallPreTrainedModel, GenerationMixin
|
|
|
1156
1160
|
return_dict: Optional[bool] = None,
|
|
1157
1161
|
cache_position: Optional[torch.LongTensor] = None,
|
|
1158
1162
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
1163
|
+
**kwargs,
|
|
1159
1164
|
) -> Union[tuple, CausalLMOutputWithCrossAttentions]:
|
|
1160
1165
|
r"""
|
|
1161
1166
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -609,6 +609,7 @@ class BlipTextModel(BlipTextPreTrainedModel):
|
|
|
609
609
|
return_dict: Optional[bool] = None,
|
|
610
610
|
is_decoder: Optional[bool] = False,
|
|
611
611
|
cache_position: Optional[torch.Tensor] = None,
|
|
612
|
+
**kwargs,
|
|
612
613
|
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
|
613
614
|
r"""
|
|
614
615
|
encoder_hidden_states (`torch.FloatTensor`, *optional*):
|
|
@@ -771,6 +772,7 @@ class BlipTextLMHeadModel(BlipTextPreTrainedModel, GenerationMixin):
|
|
|
771
772
|
reduction: Optional[str] = "mean",
|
|
772
773
|
cache_position: Optional[torch.Tensor] = None,
|
|
773
774
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
775
|
+
**kwargs,
|
|
774
776
|
) -> Union[tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]:
|
|
775
777
|
r"""
|
|
776
778
|
encoder_hidden_states (`torch.FloatTensor`, *optional*): Sequence of
|
|
@@ -603,7 +603,7 @@ class Blip2QFormerMultiHeadAttention(nn.Module):
|
|
|
603
603
|
|
|
604
604
|
# This is actually dropping out entire tokens to attend to, which might
|
|
605
605
|
# seem a bit unusual, but is taken from the original Transformer paper.
|
|
606
|
-
attention_probs_dropped = self.dropout(attention_probs)
|
|
606
|
+
attention_probs_dropped = self.dropout(attention_probs).to(value_layer.dtype)
|
|
607
607
|
|
|
608
608
|
context_layer = torch.matmul(attention_probs_dropped, value_layer)
|
|
609
609
|
|
|
@@ -1948,6 +1948,7 @@ class Blip2ForImageTextRetrieval(Blip2PreTrainedModel):
|
|
|
1948
1948
|
output_attentions: Optional[bool] = None,
|
|
1949
1949
|
output_hidden_states: Optional[bool] = None,
|
|
1950
1950
|
return_dict: Optional[bool] = None,
|
|
1951
|
+
**kwargs,
|
|
1951
1952
|
) -> Union[tuple, Blip2ImageTextMatchingModelOutput]:
|
|
1952
1953
|
r"""
|
|
1953
1954
|
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -465,6 +465,7 @@ class BloomModel(BloomPreTrainedModel):
|
|
|
465
465
|
output_hidden_states: Optional[bool] = None,
|
|
466
466
|
return_dict: Optional[bool] = None,
|
|
467
467
|
cache_position: Optional[torch.LongTensor] = None,
|
|
468
|
+
**kwargs,
|
|
468
469
|
) -> Union[tuple[torch.Tensor, ...], BaseModelOutputWithPastAndCrossAttentions]:
|
|
469
470
|
r"""
|
|
470
471
|
input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
|
|
@@ -883,6 +884,7 @@ class BloomForSequenceClassification(BloomPreTrainedModel):
|
|
|
883
884
|
output_attentions: Optional[bool] = None,
|
|
884
885
|
output_hidden_states: Optional[bool] = None,
|
|
885
886
|
return_dict: Optional[bool] = None,
|
|
887
|
+
**kwargs,
|
|
886
888
|
) -> Union[tuple[torch.Tensor], SequenceClassifierOutputWithPast]:
|
|
887
889
|
r"""
|
|
888
890
|
input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
|
|
@@ -1006,6 +1008,7 @@ class BloomForTokenClassification(BloomPreTrainedModel):
|
|
|
1006
1008
|
output_attentions: Optional[bool] = None,
|
|
1007
1009
|
output_hidden_states: Optional[bool] = None,
|
|
1008
1010
|
return_dict: Optional[bool] = None,
|
|
1011
|
+
**kwargs,
|
|
1009
1012
|
) -> Union[tuple[torch.Tensor], TokenClassifierOutput]:
|
|
1010
1013
|
r"""
|
|
1011
1014
|
input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
|
|
@@ -1084,6 +1087,7 @@ class BloomForQuestionAnswering(BloomPreTrainedModel):
|
|
|
1084
1087
|
output_attentions: Optional[bool] = None,
|
|
1085
1088
|
output_hidden_states: Optional[bool] = None,
|
|
1086
1089
|
return_dict: Optional[bool] = None,
|
|
1090
|
+
**kwargs,
|
|
1087
1091
|
) -> Union[tuple, QuestionAnsweringModelOutput]:
|
|
1088
1092
|
r"""
|
|
1089
1093
|
input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
|
|
@@ -38,7 +38,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
|
|
38
38
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
39
39
|
from ...processing_utils import Unpack
|
|
40
40
|
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
|
|
41
|
-
from ...utils.generic import OutputRecorder, check_model_inputs
|
|
41
|
+
from ...utils.generic import OutputRecorder, check_model_inputs, maybe_autocast
|
|
42
42
|
from .configuration_blt import (
|
|
43
43
|
BltConfig,
|
|
44
44
|
BltGlobalTransformerConfig,
|
|
@@ -141,7 +141,7 @@ class BltRotaryEmbedding(nn.Module):
|
|
|
141
141
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
142
142
|
|
|
143
143
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
144
|
-
with
|
|
144
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
145
145
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
146
146
|
emb = torch.repeat_interleave(freqs, 2, dim=-1) # diff from Llama: we interleave() instead of cat()
|
|
147
147
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -29,7 +29,7 @@ from ...modeling_rope_utils import dynamic_rope_update
|
|
|
29
29
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS
|
|
30
30
|
from ...processing_utils import Unpack
|
|
31
31
|
from ...utils import TransformersKwargs, auto_docstring, logging
|
|
32
|
-
from ...utils.generic import OutputRecorder, check_model_inputs
|
|
32
|
+
from ...utils.generic import OutputRecorder, check_model_inputs, maybe_autocast
|
|
33
33
|
from ..cohere2.modeling_cohere2 import rotate_half # noqa: F401
|
|
34
34
|
from ..llama.modeling_llama import LlamaRotaryEmbedding
|
|
35
35
|
from ..mllama.modeling_mllama import (
|
|
@@ -277,7 +277,7 @@ class BltRotaryEmbedding(LlamaRotaryEmbedding):
|
|
|
277
277
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
278
278
|
|
|
279
279
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
280
|
-
with
|
|
280
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
281
281
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
282
282
|
emb = torch.repeat_interleave(freqs, 2, dim=-1) # diff from Llama: we interleave() instead of cat()
|
|
283
283
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -960,7 +960,7 @@ class BridgeTowerVisionModel(BridgeTowerPreTrainedModel):
|
|
|
960
960
|
def dtype(self):
|
|
961
961
|
return self.visual.embeddings.patch_embedding.weight.dtype
|
|
962
962
|
|
|
963
|
-
def forward(self, image, image_mask=None, interpolate_pos_encoding=False):
|
|
963
|
+
def forward(self, image, image_mask=None, interpolate_pos_encoding=False, **kwargs):
|
|
964
964
|
return self.visual(image.type(self.dtype), image_mask, interpolate_pos_encoding)
|
|
965
965
|
|
|
966
966
|
|
|
@@ -1223,6 +1223,7 @@ class BridgeTowerModel(BridgeTowerPreTrainedModel):
|
|
|
1223
1223
|
return_dict: Optional[bool] = None,
|
|
1224
1224
|
labels: Optional[torch.LongTensor] = None,
|
|
1225
1225
|
interpolate_pos_encoding: bool = False,
|
|
1226
|
+
**kwargs,
|
|
1226
1227
|
) -> Union[tuple[torch.Tensor], BridgeTowerModelOutput]:
|
|
1227
1228
|
r"""
|
|
1228
1229
|
image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
|
|
@@ -1530,6 +1531,7 @@ class BridgeTowerForMaskedLM(BridgeTowerPreTrainedModel):
|
|
|
1530
1531
|
output_hidden_states: Optional[bool] = None,
|
|
1531
1532
|
return_dict: Optional[bool] = None,
|
|
1532
1533
|
labels: Optional[torch.LongTensor] = None,
|
|
1534
|
+
**kwargs,
|
|
1533
1535
|
) -> Union[MaskedLMOutput, tuple[torch.FloatTensor]]:
|
|
1534
1536
|
r"""
|
|
1535
1537
|
image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
|
|
@@ -1630,6 +1632,7 @@ class BridgeTowerForImageAndTextRetrieval(BridgeTowerPreTrainedModel):
|
|
|
1630
1632
|
output_hidden_states: Optional[bool] = None,
|
|
1631
1633
|
return_dict: Optional[bool] = None,
|
|
1632
1634
|
labels: Optional[torch.LongTensor] = None,
|
|
1635
|
+
**kwargs,
|
|
1633
1636
|
) -> Union[SequenceClassifierOutput, tuple[torch.FloatTensor]]:
|
|
1634
1637
|
r"""
|
|
1635
1638
|
image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
|
|
@@ -1742,6 +1745,7 @@ class BridgeTowerForContrastiveLearning(BridgeTowerPreTrainedModel):
|
|
|
1742
1745
|
output_hidden_states: Optional[bool] = True,
|
|
1743
1746
|
return_dict: Optional[bool] = None,
|
|
1744
1747
|
return_loss: Optional[bool] = None,
|
|
1748
|
+
**kwargs,
|
|
1745
1749
|
) -> Union[BridgeTowerContrastiveOutput, tuple[torch.FloatTensor]]:
|
|
1746
1750
|
r"""
|
|
1747
1751
|
image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
|
|
@@ -563,6 +563,7 @@ class BrosModel(BrosPreTrainedModel):
|
|
|
563
563
|
output_attentions: Optional[bool] = None,
|
|
564
564
|
output_hidden_states: Optional[bool] = None,
|
|
565
565
|
return_dict: Optional[bool] = None,
|
|
566
|
+
**kwargs,
|
|
566
567
|
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
|
567
568
|
r"""
|
|
568
569
|
bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
|
|
@@ -701,6 +702,7 @@ class BrosForTokenClassification(BrosPreTrainedModel):
|
|
|
701
702
|
output_attentions: Optional[bool] = None,
|
|
702
703
|
output_hidden_states: Optional[bool] = None,
|
|
703
704
|
return_dict: Optional[bool] = None,
|
|
705
|
+
**kwargs,
|
|
704
706
|
) -> Union[tuple[torch.Tensor], TokenClassifierOutput]:
|
|
705
707
|
r"""
|
|
706
708
|
bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
|
|
@@ -821,6 +823,7 @@ class BrosSpadeEEForTokenClassification(BrosPreTrainedModel):
|
|
|
821
823
|
output_attentions: Optional[bool] = None,
|
|
822
824
|
output_hidden_states: Optional[bool] = None,
|
|
823
825
|
return_dict: Optional[bool] = None,
|
|
826
|
+
**kwargs,
|
|
824
827
|
) -> Union[tuple[torch.Tensor], BrosSpadeOutput]:
|
|
825
828
|
r"""
|
|
826
829
|
bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
|
|
@@ -957,6 +960,7 @@ class BrosSpadeELForTokenClassification(BrosPreTrainedModel):
|
|
|
957
960
|
output_attentions: Optional[bool] = None,
|
|
958
961
|
output_hidden_states: Optional[bool] = None,
|
|
959
962
|
return_dict: Optional[bool] = None,
|
|
963
|
+
**kwargs,
|
|
960
964
|
) -> Union[tuple[torch.Tensor], TokenClassifierOutput]:
|
|
961
965
|
r"""
|
|
962
966
|
bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
|
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
# limitations under the License
|
|
15
15
|
"""Tokenization classes for Camembert model."""
|
|
16
16
|
|
|
17
|
+
from typing import Optional, Union
|
|
18
|
+
|
|
17
19
|
from tokenizers import Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
|
|
18
20
|
from tokenizers.models import Unigram
|
|
19
21
|
|
|
@@ -83,7 +85,7 @@ class CamembertTokenizer(TokenizersBackend):
|
|
|
83
85
|
vocab_file (`str`, *optional*):
|
|
84
86
|
[SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
|
|
85
87
|
contains the vocabulary necessary to instantiate a tokenizer.
|
|
86
|
-
vocab (`dict`, *optional*):
|
|
88
|
+
vocab (`str`, `dict` or `list`, *optional*):
|
|
87
89
|
Custom vocabulary dictionary. If not provided, vocabulary is loaded from vocab_file.
|
|
88
90
|
"""
|
|
89
91
|
|
|
@@ -103,7 +105,7 @@ class CamembertTokenizer(TokenizersBackend):
|
|
|
103
105
|
additional_special_tokens=None,
|
|
104
106
|
add_prefix_space=True,
|
|
105
107
|
vocab_file=None,
|
|
106
|
-
vocab=None,
|
|
108
|
+
vocab: Optional[Union[str, dict, list]] = None,
|
|
107
109
|
**kwargs,
|
|
108
110
|
):
|
|
109
111
|
self.vocab_file = vocab_file
|
|
@@ -114,9 +116,9 @@ class CamembertTokenizer(TokenizersBackend):
|
|
|
114
116
|
if additional_special_tokens is None:
|
|
115
117
|
additional_special_tokens = ["<s>NOTUSED", "</s>NOTUSED", "<unk>NOTUSED"]
|
|
116
118
|
|
|
117
|
-
if vocab is not None
|
|
118
|
-
self._vocab =
|
|
119
|
-
unk_index = next(i for i, (tok, _) in enumerate(self._vocab) if tok == str(unk_token))
|
|
119
|
+
if vocab is not None:
|
|
120
|
+
self._vocab = vocab
|
|
121
|
+
unk_index = next((i for i, (tok, _) in enumerate(self._vocab) if tok == str(unk_token)), 0)
|
|
120
122
|
self._tokenizer = Tokenizer(Unigram(self._vocab, unk_id=unk_index, byte_fallback=False))
|
|
121
123
|
else:
|
|
122
124
|
self._vocab = [
|
|
@@ -131,11 +133,8 @@ class CamembertTokenizer(TokenizersBackend):
|
|
|
131
133
|
|
|
132
134
|
self._tokenizer.normalizer = normalizers.Sequence(
|
|
133
135
|
[
|
|
134
|
-
normalizers.Replace("\n", " "),
|
|
135
|
-
normalizers.Replace("\r", " "),
|
|
136
|
-
normalizers.Replace("\t", " "),
|
|
136
|
+
normalizers.Replace(Regex(r"\s{2,}|[\n\r\t]"), " "),
|
|
137
137
|
normalizers.Strip(left=False, right=True),
|
|
138
|
-
normalizers.Replace(Regex(" {2,}"), "▁"),
|
|
139
138
|
]
|
|
140
139
|
)
|
|
141
140
|
|
|
@@ -143,10 +142,7 @@ class CamembertTokenizer(TokenizersBackend):
|
|
|
143
142
|
self._tokenizer.pre_tokenizer = pre_tokenizers.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
|
|
144
143
|
self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
|
|
145
144
|
|
|
146
|
-
tokenizer_object = self._tokenizer
|
|
147
|
-
|
|
148
145
|
super().__init__(
|
|
149
|
-
tokenizer_object=tokenizer_object,
|
|
150
146
|
bos_token=bos_token,
|
|
151
147
|
eos_token=eos_token,
|
|
152
148
|
sep_token=sep_token,
|
|
@@ -836,6 +836,7 @@ class CanineModel(CaninePreTrainedModel):
|
|
|
836
836
|
output_attentions: Optional[bool] = None,
|
|
837
837
|
output_hidden_states: Optional[bool] = None,
|
|
838
838
|
return_dict: Optional[bool] = None,
|
|
839
|
+
**kwargs,
|
|
839
840
|
) -> Union[tuple, CanineModelOutputWithPooling]:
|
|
840
841
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
841
842
|
output_hidden_states = (
|
|
@@ -1006,6 +1007,7 @@ class CanineForSequenceClassification(CaninePreTrainedModel):
|
|
|
1006
1007
|
output_attentions: Optional[bool] = None,
|
|
1007
1008
|
output_hidden_states: Optional[bool] = None,
|
|
1008
1009
|
return_dict: Optional[bool] = None,
|
|
1010
|
+
**kwargs,
|
|
1009
1011
|
) -> Union[tuple, SequenceClassifierOutput]:
|
|
1010
1012
|
r"""
|
|
1011
1013
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -1089,6 +1091,7 @@ class CanineForMultipleChoice(CaninePreTrainedModel):
|
|
|
1089
1091
|
output_attentions: Optional[bool] = None,
|
|
1090
1092
|
output_hidden_states: Optional[bool] = None,
|
|
1091
1093
|
return_dict: Optional[bool] = None,
|
|
1094
|
+
**kwargs,
|
|
1092
1095
|
) -> Union[tuple, MultipleChoiceModelOutput]:
|
|
1093
1096
|
r"""
|
|
1094
1097
|
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
|
|
@@ -1192,6 +1195,7 @@ class CanineForTokenClassification(CaninePreTrainedModel):
|
|
|
1192
1195
|
output_attentions: Optional[bool] = None,
|
|
1193
1196
|
output_hidden_states: Optional[bool] = None,
|
|
1194
1197
|
return_dict: Optional[bool] = None,
|
|
1198
|
+
**kwargs,
|
|
1195
1199
|
) -> Union[tuple, TokenClassifierOutput]:
|
|
1196
1200
|
r"""
|
|
1197
1201
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -1287,6 +1291,7 @@ class CanineForQuestionAnswering(CaninePreTrainedModel):
|
|
|
1287
1291
|
output_attentions: Optional[bool] = None,
|
|
1288
1292
|
output_hidden_states: Optional[bool] = None,
|
|
1289
1293
|
return_dict: Optional[bool] = None,
|
|
1294
|
+
**kwargs,
|
|
1290
1295
|
) -> Union[tuple, QuestionAnsweringModelOutput]:
|
|
1291
1296
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
|
1292
1297
|
|
|
@@ -38,6 +38,7 @@ from ...utils import (
|
|
|
38
38
|
can_return_tuple,
|
|
39
39
|
logging,
|
|
40
40
|
)
|
|
41
|
+
from ...utils.generic import maybe_autocast
|
|
41
42
|
from .configuration_chameleon import ChameleonConfig, ChameleonVQVAEConfig
|
|
42
43
|
|
|
43
44
|
|
|
@@ -122,7 +123,7 @@ class ChameleonRotaryEmbedding(nn.Module):
|
|
|
122
123
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
123
124
|
|
|
124
125
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
125
|
-
with
|
|
126
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
126
127
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
127
128
|
emb = torch.cat((freqs, freqs), dim=-1)
|
|
128
129
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -839,6 +839,7 @@ class ChineseCLIPTextModel(ChineseCLIPPreTrainedModel):
|
|
|
839
839
|
output_attentions: Optional[bool] = None,
|
|
840
840
|
output_hidden_states: Optional[bool] = None,
|
|
841
841
|
return_dict: Optional[bool] = None,
|
|
842
|
+
**kwargs,
|
|
842
843
|
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPooling]:
|
|
843
844
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
844
845
|
output_hidden_states = (
|
|
@@ -926,6 +927,7 @@ class ChineseCLIPVisionModel(ChineseCLIPPreTrainedModel):
|
|
|
926
927
|
output_hidden_states: Optional[bool] = None,
|
|
927
928
|
interpolate_pos_encoding: bool = False,
|
|
928
929
|
return_dict: Optional[bool] = None,
|
|
930
|
+
**kwargs,
|
|
929
931
|
) -> Union[tuple, BaseModelOutputWithPooling]:
|
|
930
932
|
r"""
|
|
931
933
|
Examples:
|
|
@@ -1091,6 +1093,7 @@ class ChineseCLIPModel(ChineseCLIPPreTrainedModel):
|
|
|
1091
1093
|
output_hidden_states: Optional[bool] = None,
|
|
1092
1094
|
interpolate_pos_encoding: bool = False,
|
|
1093
1095
|
return_dict: Optional[bool] = None,
|
|
1096
|
+
**kwargs,
|
|
1094
1097
|
) -> Union[tuple, ChineseCLIPOutput]:
|
|
1095
1098
|
r"""
|
|
1096
1099
|
return_loss (`bool`, *optional*):
|
|
@@ -1356,6 +1356,7 @@ class ClapAudioModel(ClapPreTrainedModel):
|
|
|
1356
1356
|
output_attentions: Optional[bool] = None,
|
|
1357
1357
|
output_hidden_states: Optional[bool] = None,
|
|
1358
1358
|
return_dict: Optional[bool] = None,
|
|
1359
|
+
**kwargs,
|
|
1359
1360
|
) -> Union[tuple, BaseModelOutputWithPooling]:
|
|
1360
1361
|
r"""
|
|
1361
1362
|
is_longer (`torch.FloatTensor`, of shape `(batch_size, 1)`, *optional*):
|
|
@@ -1446,6 +1447,7 @@ class ClapTextModel(ClapPreTrainedModel):
|
|
|
1446
1447
|
output_attentions: Optional[bool] = None,
|
|
1447
1448
|
output_hidden_states: Optional[bool] = None,
|
|
1448
1449
|
return_dict: Optional[bool] = None,
|
|
1450
|
+
**kwargs,
|
|
1449
1451
|
) -> Union[tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
|
1450
1452
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
1451
1453
|
output_hidden_states = (
|
|
@@ -1627,6 +1629,7 @@ class ClapModel(ClapPreTrainedModel):
|
|
|
1627
1629
|
output_attentions: Optional[bool] = None,
|
|
1628
1630
|
output_hidden_states: Optional[bool] = None,
|
|
1629
1631
|
return_dict: Optional[bool] = None,
|
|
1632
|
+
**kwargs,
|
|
1630
1633
|
) -> Union[tuple, ClapOutput]:
|
|
1631
1634
|
r"""
|
|
1632
1635
|
is_longer (`torch.FloatTensor`, of shape `(batch_size, 1)`, *optional*):
|
|
@@ -1740,6 +1743,7 @@ class ClapTextModelWithProjection(ClapPreTrainedModel):
|
|
|
1740
1743
|
output_attentions: Optional[bool] = None,
|
|
1741
1744
|
output_hidden_states: Optional[bool] = None,
|
|
1742
1745
|
return_dict: Optional[bool] = None,
|
|
1746
|
+
**kwargs,
|
|
1743
1747
|
) -> Union[tuple, ClapTextModelOutput]:
|
|
1744
1748
|
r"""
|
|
1745
1749
|
Examples:
|
|
@@ -1803,6 +1807,7 @@ class ClapAudioModelWithProjection(ClapPreTrainedModel):
|
|
|
1803
1807
|
output_attentions: Optional[bool] = None,
|
|
1804
1808
|
output_hidden_states: Optional[bool] = None,
|
|
1805
1809
|
return_dict: Optional[bool] = None,
|
|
1810
|
+
**kwargs,
|
|
1806
1811
|
) -> Union[tuple, ClapAudioModelOutput]:
|
|
1807
1812
|
r"""
|
|
1808
1813
|
is_longer (`torch.FloatTensor`, of shape `(batch_size, 1)`, *optional*):
|