transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +30 -3
- transformers/cli/serve.py +47 -17
- transformers/conversion_mapping.py +15 -2
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +196 -135
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +1 -2
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +1 -2
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/configuration_utils.py +3 -2
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/continuous_api.py +134 -79
- transformers/image_processing_base.py +1 -2
- transformers/integrations/__init__.py +4 -2
- transformers/integrations/accelerate.py +15 -3
- transformers/integrations/aqlm.py +38 -66
- transformers/integrations/awq.py +48 -514
- transformers/integrations/bitnet.py +45 -100
- transformers/integrations/bitsandbytes.py +79 -191
- transformers/integrations/deepspeed.py +1 -0
- transformers/integrations/eetq.py +84 -79
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +236 -193
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +40 -62
- transformers/integrations/hub_kernels.py +42 -3
- transformers/integrations/integration_utils.py +10 -0
- transformers/integrations/mxfp4.py +25 -65
- transformers/integrations/peft.py +7 -29
- transformers/integrations/quanto.py +73 -55
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +44 -90
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +42 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +8 -0
- transformers/modeling_rope_utils.py +30 -6
- transformers/modeling_utils.py +116 -112
- transformers/models/__init__.py +3 -0
- transformers/models/afmoe/modeling_afmoe.py +4 -4
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +2 -0
- transformers/models/altclip/modeling_altclip.py +4 -0
- transformers/models/apertus/modeling_apertus.py +4 -4
- transformers/models/arcee/modeling_arcee.py +4 -4
- transformers/models/aria/modeling_aria.py +4 -4
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/auto/configuration_auto.py +11 -0
- transformers/models/auto/feature_extraction_auto.py +2 -0
- transformers/models/auto/image_processing_auto.py +1 -0
- transformers/models/auto/modeling_auto.py +6 -0
- transformers/models/auto/processing_auto.py +18 -10
- transformers/models/auto/tokenization_auto.py +74 -472
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/bamba/modeling_bamba.py +4 -3
- transformers/models/bark/modeling_bark.py +2 -0
- transformers/models/bart/modeling_bart.py +7 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/big_bird/modeling_big_bird.py +6 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +11 -2
- transformers/models/bitnet/modeling_bitnet.py +4 -4
- transformers/models/blenderbot/modeling_blenderbot.py +5 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
- transformers/models/blip/modeling_blip_text.py +2 -0
- transformers/models/blip_2/modeling_blip_2.py +2 -1
- transformers/models/bloom/modeling_bloom.py +4 -0
- transformers/models/blt/modeling_blt.py +2 -2
- transformers/models/blt/modular_blt.py +2 -2
- transformers/models/bridgetower/modeling_bridgetower.py +5 -1
- transformers/models/bros/modeling_bros.py +4 -0
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +5 -0
- transformers/models/chameleon/modeling_chameleon.py +2 -1
- transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
- transformers/models/clap/modeling_clap.py +5 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +5 -0
- transformers/models/clvp/modeling_clvp.py +5 -0
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +4 -3
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +7 -6
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
- transformers/models/convbert/modeling_convbert.py +6 -0
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/modeling_csm.py +4 -3
- transformers/models/ctrl/modeling_ctrl.py +1 -0
- transformers/models/cvt/modeling_cvt.py +2 -0
- transformers/models/cwm/modeling_cwm.py +4 -4
- transformers/models/d_fine/modeling_d_fine.py +2 -0
- transformers/models/d_fine/modular_d_fine.py +1 -0
- transformers/models/dab_detr/modeling_dab_detr.py +4 -0
- transformers/models/dac/modeling_dac.py +2 -2
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/dbrx/modeling_dbrx.py +2 -2
- transformers/models/deberta/modeling_deberta.py +5 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
- transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
- transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
- transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/modeling_detr.py +5 -0
- transformers/models/dia/modeling_dia.py +4 -3
- transformers/models/dia/modular_dia.py +0 -1
- transformers/models/diffllama/modeling_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +2 -3
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +2 -0
- transformers/models/dots1/modeling_dots1.py +10 -7
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/edgetam/modeling_edgetam.py +1 -1
- transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
- transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
- transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
- transformers/models/efficientnet/modeling_efficientnet.py +2 -0
- transformers/models/emu3/modeling_emu3.py +4 -4
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +14 -2
- transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
- transformers/models/esm/modeling_esmfold.py +5 -4
- transformers/models/evolla/modeling_evolla.py +4 -4
- transformers/models/exaone4/modeling_exaone4.py +2 -2
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +6 -1
- transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
- transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
- transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
- transformers/models/flaubert/modeling_flaubert.py +7 -0
- transformers/models/flava/modeling_flava.py +6 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
- transformers/models/florence2/modeling_florence2.py +2 -1
- transformers/models/florence2/modular_florence2.py +2 -1
- transformers/models/fnet/modeling_fnet.py +7 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/processing_fuyu.py +3 -3
- transformers/models/gemma/modeling_gemma.py +4 -4
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +4 -4
- transformers/models/gemma2/modular_gemma2.py +2 -1
- transformers/models/gemma3/modeling_gemma3.py +14 -84
- transformers/models/gemma3/modular_gemma3.py +12 -81
- transformers/models/gemma3n/modeling_gemma3n.py +18 -209
- transformers/models/gemma3n/modular_gemma3n.py +17 -59
- transformers/models/git/modeling_git.py +2 -0
- transformers/models/glm/modeling_glm.py +4 -4
- transformers/models/glm4/modeling_glm4.py +4 -4
- transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/modeling_glm4v.py +3 -3
- transformers/models/glm4v/modular_glm4v.py +6 -4
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
- transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/gpt2/modeling_gpt2.py +5 -1
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
- transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
- transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
- transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
- transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
- transformers/models/gptj/modeling_gptj.py +3 -0
- transformers/models/granite/modeling_granite.py +4 -4
- transformers/models/granitemoe/modeling_granitemoe.py +4 -6
- transformers/models/granitemoe/modular_granitemoe.py +0 -2
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
- transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
- transformers/models/groupvit/modeling_groupvit.py +3 -0
- transformers/models/helium/modeling_helium.py +4 -3
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +3 -0
- transformers/models/hubert/modular_hubert.py +1 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
- transformers/models/ibert/modeling_ibert.py +6 -0
- transformers/models/idefics/modeling_idefics.py +5 -21
- transformers/models/imagegpt/modeling_imagegpt.py +2 -1
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/internvl/modeling_internvl.py +2 -4
- transformers/models/internvl/modular_internvl.py +2 -4
- transformers/models/jamba/modeling_jamba.py +2 -2
- transformers/models/janus/modeling_janus.py +1 -0
- transformers/models/janus/modular_janus.py +1 -0
- transformers/models/jetmoe/modeling_jetmoe.py +2 -2
- transformers/models/kosmos2/modeling_kosmos2.py +1 -0
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +244 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +729 -0
- transformers/models/lasr/modular_lasr.py +569 -0
- transformers/models/lasr/processing_lasr.py +96 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +5 -0
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +6 -0
- transformers/models/levit/modeling_levit.py +3 -0
- transformers/models/lfm2/modeling_lfm2.py +4 -5
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +4 -0
- transformers/models/llama/modeling_llama.py +4 -4
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/modeling_llama4.py +3 -2
- transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
- transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -0
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +4 -0
- transformers/models/mamba/modeling_mamba.py +14 -22
- transformers/models/marian/modeling_marian.py +5 -0
- transformers/models/markuplm/modeling_markuplm.py +4 -0
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/modeling_mask2former.py +2 -0
- transformers/models/maskformer/modeling_maskformer.py +2 -0
- transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
- transformers/models/mbart/modeling_mbart.py +7 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +3 -1
- transformers/models/minimax/modeling_minimax.py +4 -4
- transformers/models/ministral/modeling_ministral.py +4 -4
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +4 -3
- transformers/models/mistral/modeling_mistral.py +4 -3
- transformers/models/mixtral/modeling_mixtral.py +4 -4
- transformers/models/mllama/modeling_mllama.py +2 -2
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/modeling_mobilevit.py +3 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
- transformers/models/modernbert/modeling_modernbert.py +4 -1
- transformers/models/modernbert/modular_modernbert.py +2 -0
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
- transformers/models/moonshine/modeling_moonshine.py +4 -2
- transformers/models/moshi/modeling_moshi.py +5 -2
- transformers/models/mpnet/modeling_mpnet.py +5 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +6 -0
- transformers/models/mt5/modeling_mt5.py +7 -0
- transformers/models/musicgen/modeling_musicgen.py +2 -0
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
- transformers/models/mvp/modeling_mvp.py +7 -0
- transformers/models/nanochat/modeling_nanochat.py +4 -4
- transformers/models/nemotron/modeling_nemotron.py +4 -2
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nougat/tokenization_nougat.py +11 -59
- transformers/models/nystromformer/modeling_nystromformer.py +6 -0
- transformers/models/olmo/modeling_olmo.py +4 -4
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +4 -5
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +4 -4
- transformers/models/olmoe/modeling_olmoe.py +4 -4
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
- transformers/models/oneformer/modeling_oneformer.py +4 -1
- transformers/models/openai/modeling_openai.py +3 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/owlv2/modeling_owlv2.py +4 -0
- transformers/models/owlvit/modeling_owlvit.py +4 -0
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +9 -6
- transformers/models/parakeet/modular_parakeet.py +2 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
- transformers/models/patchtst/modeling_patchtst.py +20 -2
- transformers/models/pegasus/modeling_pegasus.py +5 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
- transformers/models/perceiver/modeling_perceiver.py +8 -0
- transformers/models/persimmon/modeling_persimmon.py +2 -1
- transformers/models/phi/modeling_phi.py +4 -5
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +2 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
- transformers/models/phimoe/modeling_phimoe.py +4 -4
- transformers/models/phimoe/modular_phimoe.py +2 -2
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pixtral/modeling_pixtral.py +2 -1
- transformers/models/plbart/modeling_plbart.py +6 -0
- transformers/models/plbart/modular_plbart.py +2 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/modeling_poolformer.py +2 -0
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +3 -0
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +4 -4
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
- transformers/models/qwen3/modeling_qwen3.py +4 -4
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
- transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
- transformers/models/rag/modeling_rag.py +1 -0
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
- transformers/models/reformer/modeling_reformer.py +4 -0
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +6 -1
- transformers/models/rembert/modeling_rembert.py +6 -0
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +11 -2
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/modeling_rt_detr.py +2 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
- transformers/models/rwkv/modeling_rwkv.py +1 -0
- transformers/models/sam2/modeling_sam2.py +2 -2
- transformers/models/sam2/modular_sam2.py +2 -2
- transformers/models/sam2_video/modeling_sam2_video.py +1 -0
- transformers/models/sam2_video/modular_sam2_video.py +1 -0
- transformers/models/sam3/modeling_sam3.py +77 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
- transformers/models/sam3_video/modeling_sam3_video.py +1 -0
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
- transformers/models/seed_oss/modeling_seed_oss.py +2 -2
- transformers/models/segformer/modeling_segformer.py +4 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/siglip2/modeling_siglip2.py +4 -0
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +4 -4
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
- transformers/models/speecht5/modeling_speecht5.py +13 -1
- transformers/models/splinter/modeling_splinter.py +3 -0
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +6 -0
- transformers/models/stablelm/modeling_stablelm.py +3 -1
- transformers/models/starcoder2/modeling_starcoder2.py +4 -3
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +2 -0
- transformers/models/swin/modeling_swin.py +4 -0
- transformers/models/swin2sr/modeling_swin2sr.py +2 -0
- transformers/models/swinv2/modeling_swinv2.py +4 -0
- transformers/models/t5/modeling_t5.py +7 -0
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +5 -5
- transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
- transformers/models/table_transformer/modeling_table_transformer.py +4 -0
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +2 -0
- transformers/models/timesfm/modular_timesfm.py +2 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
- transformers/models/trocr/modeling_trocr.py +2 -0
- transformers/models/tvp/modeling_tvp.py +2 -0
- transformers/models/udop/modeling_udop.py +4 -0
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/modeling_umt5.py +7 -0
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
- transformers/models/vilt/modeling_vilt.py +6 -0
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +6 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/modeling_vitmatte.py +1 -0
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/modeling_whisper.py +6 -0
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +3 -0
- transformers/models/xglm/modeling_xglm.py +1 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +5 -0
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/yoso/modeling_yoso.py +6 -0
- transformers/models/zamba/modeling_zamba.py +2 -0
- transformers/models/zamba2/modeling_zamba2.py +4 -2
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/modeling_zoedepth.py +1 -0
- transformers/pipelines/__init__.py +2 -3
- transformers/pipelines/base.py +1 -9
- transformers/pipelines/document_question_answering.py +3 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/processing_utils.py +23 -11
- transformers/quantizers/base.py +35 -110
- transformers/quantizers/quantizer_aqlm.py +1 -5
- transformers/quantizers/quantizer_auto_round.py +1 -2
- transformers/quantizers/quantizer_awq.py +17 -81
- transformers/quantizers/quantizer_bitnet.py +3 -8
- transformers/quantizers/quantizer_bnb_4bit.py +13 -110
- transformers/quantizers/quantizer_bnb_8bit.py +16 -92
- transformers/quantizers/quantizer_compressed_tensors.py +1 -5
- transformers/quantizers/quantizer_eetq.py +14 -62
- transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
- transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
- transformers/quantizers/quantizer_fp_quant.py +48 -78
- transformers/quantizers/quantizer_gptq.py +7 -24
- transformers/quantizers/quantizer_higgs.py +40 -54
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +13 -167
- transformers/quantizers/quantizer_quanto.py +20 -64
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +1 -4
- transformers/quantizers/quantizer_torchao.py +23 -202
- transformers/quantizers/quantizer_vptq.py +8 -22
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +297 -36
- transformers/tokenization_mistral_common.py +4 -0
- transformers/tokenization_utils_base.py +113 -222
- transformers/tokenization_utils_tokenizers.py +168 -107
- transformers/trainer.py +28 -31
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +66 -28
- transformers/utils/__init__.py +3 -4
- transformers/utils/auto_docstring.py +1 -0
- transformers/utils/generic.py +27 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +61 -16
- transformers/utils/kernel_config.py +4 -2
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +75 -242
- transformers/video_processing_utils.py +1 -2
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -903,6 +903,7 @@ class AutoformerEncoder(AutoformerPreTrainedModel):
|
|
|
903
903
|
output_attentions: Optional[bool] = None,
|
|
904
904
|
output_hidden_states: Optional[bool] = None,
|
|
905
905
|
return_dict: Optional[bool] = None,
|
|
906
|
+
**kwargs,
|
|
906
907
|
) -> Union[tuple, BaseModelOutput]:
|
|
907
908
|
r"""
|
|
908
909
|
Args:
|
|
@@ -1024,6 +1025,7 @@ class AutoformerDecoder(AutoformerPreTrainedModel):
|
|
|
1024
1025
|
output_hidden_states: Optional[bool] = None,
|
|
1025
1026
|
return_dict: Optional[bool] = None,
|
|
1026
1027
|
cache_position: Optional[torch.Tensor] = None,
|
|
1028
|
+
**kwargs,
|
|
1027
1029
|
) -> Union[tuple, AutoFormerDecoderOutput]:
|
|
1028
1030
|
r"""
|
|
1029
1031
|
Args:
|
|
@@ -1360,6 +1362,7 @@ class AutoformerModel(AutoformerPreTrainedModel):
|
|
|
1360
1362
|
use_cache: Optional[bool] = None,
|
|
1361
1363
|
return_dict: Optional[bool] = None,
|
|
1362
1364
|
cache_position: Optional[torch.Tensor] = None,
|
|
1365
|
+
**kwargs,
|
|
1363
1366
|
) -> Union[AutoformerModelOutput, tuple]:
|
|
1364
1367
|
r"""
|
|
1365
1368
|
past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
|
|
@@ -1610,6 +1613,7 @@ class AutoformerForPrediction(AutoformerPreTrainedModel):
|
|
|
1610
1613
|
output_attentions: Optional[bool] = None,
|
|
1611
1614
|
use_cache: Optional[bool] = None,
|
|
1612
1615
|
return_dict: Optional[bool] = None,
|
|
1616
|
+
**kwargs,
|
|
1613
1617
|
) -> Union[Seq2SeqTSPredictionOutput, tuple]:
|
|
1614
1618
|
r"""
|
|
1615
1619
|
past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
|
|
@@ -35,7 +35,7 @@ from transformers.activations import ACT2FN
|
|
|
35
35
|
from ... import initialization as init
|
|
36
36
|
from ...cache_utils import Cache
|
|
37
37
|
from ...generation import GenerationMixin
|
|
38
|
-
from ...integrations import use_kernel_forward_from_hub
|
|
38
|
+
from ...integrations import use_kernel_forward_from_hub, use_kernelized_func
|
|
39
39
|
from ...modeling_attn_mask_utils import AttentionMaskConverter
|
|
40
40
|
from ...modeling_layers import GradientCheckpointingLayer
|
|
41
41
|
from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
|
|
@@ -43,6 +43,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
|
|
43
43
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
44
44
|
from ...processing_utils import Unpack
|
|
45
45
|
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, logging
|
|
46
|
+
from ...utils.generic import maybe_autocast
|
|
46
47
|
from ...utils.import_utils import is_causal_conv1d_available, is_mamba_2_ssm_available
|
|
47
48
|
from .configuration_bamba import BambaConfig
|
|
48
49
|
|
|
@@ -250,7 +251,7 @@ class BambaRotaryEmbedding(nn.Module):
|
|
|
250
251
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
251
252
|
|
|
252
253
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
253
|
-
with
|
|
254
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
254
255
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
255
256
|
emb = torch.cat((freqs, freqs), dim=-1)
|
|
256
257
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -345,6 +346,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
|
|
|
345
346
|
return q_embed, k_embed
|
|
346
347
|
|
|
347
348
|
|
|
349
|
+
@use_kernelized_func(apply_rotary_pos_emb)
|
|
348
350
|
class BambaAttention(nn.Module):
|
|
349
351
|
"""Multi-headed attention from 'Attention Is All You Need' paper"""
|
|
350
352
|
|
|
@@ -370,7 +372,6 @@ class BambaAttention(nn.Module):
|
|
|
370
372
|
self.o_proj = nn.Linear(
|
|
371
373
|
config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
|
|
372
374
|
)
|
|
373
|
-
self.rotary_fn = apply_rotary_pos_emb
|
|
374
375
|
|
|
375
376
|
def forward(
|
|
376
377
|
self,
|
|
@@ -426,6 +426,7 @@ class BarkCausalModel(BarkPreTrainedModel, GenerationMixin):
|
|
|
426
426
|
output_hidden_states: Optional[bool] = None,
|
|
427
427
|
return_dict: Optional[bool] = None,
|
|
428
428
|
cache_position: Optional[torch.Tensor] = None,
|
|
429
|
+
**kwargs,
|
|
429
430
|
) -> Union[tuple[torch.Tensor], CausalLMOutputWithPast]:
|
|
430
431
|
r"""
|
|
431
432
|
input_embeds (`torch.FloatTensor` of shape `(batch_size, input_sequence_length, hidden_size)`, *optional*):
|
|
@@ -1028,6 +1029,7 @@ class BarkFineModel(BarkPreTrainedModel):
|
|
|
1028
1029
|
output_attentions: Optional[bool] = None,
|
|
1029
1030
|
output_hidden_states: Optional[bool] = None,
|
|
1030
1031
|
return_dict: Optional[bool] = None,
|
|
1032
|
+
**kwargs,
|
|
1031
1033
|
) -> Union[tuple[torch.Tensor], MaskedLMOutput]:
|
|
1032
1034
|
r"""
|
|
1033
1035
|
codebook_idx (`int`):
|
|
@@ -547,6 +547,7 @@ class BartEncoder(BartPreTrainedModel):
|
|
|
547
547
|
output_attentions: Optional[bool] = None,
|
|
548
548
|
output_hidden_states: Optional[bool] = None,
|
|
549
549
|
return_dict: Optional[bool] = None,
|
|
550
|
+
**kwargs,
|
|
550
551
|
) -> Union[tuple, BaseModelOutput]:
|
|
551
552
|
r"""
|
|
552
553
|
Args:
|
|
@@ -694,6 +695,7 @@ class BartDecoder(BartPreTrainedModel):
|
|
|
694
695
|
output_hidden_states: Optional[bool] = None,
|
|
695
696
|
return_dict: Optional[bool] = None,
|
|
696
697
|
cache_position: Optional[torch.LongTensor] = None,
|
|
698
|
+
**kwargs,
|
|
697
699
|
) -> Union[tuple, BaseModelOutputWithPastAndCrossAttentions]:
|
|
698
700
|
r"""
|
|
699
701
|
Args:
|
|
@@ -921,6 +923,7 @@ class BartModel(BartPreTrainedModel):
|
|
|
921
923
|
output_hidden_states: Optional[bool] = None,
|
|
922
924
|
return_dict: Optional[bool] = None,
|
|
923
925
|
cache_position: Optional[torch.LongTensor] = None,
|
|
926
|
+
**kwargs,
|
|
924
927
|
) -> Union[tuple, Seq2SeqModelOutput]:
|
|
925
928
|
r"""
|
|
926
929
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -1067,6 +1070,7 @@ class BartForConditionalGeneration(BartPreTrainedModel, GenerationMixin):
|
|
|
1067
1070
|
output_hidden_states: Optional[bool] = None,
|
|
1068
1071
|
return_dict: Optional[bool] = None,
|
|
1069
1072
|
cache_position: Optional[torch.LongTensor] = None,
|
|
1073
|
+
**kwargs,
|
|
1070
1074
|
) -> Union[tuple, Seq2SeqLMOutput]:
|
|
1071
1075
|
r"""
|
|
1072
1076
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -1228,6 +1232,7 @@ class BartForSequenceClassification(BartPreTrainedModel):
|
|
|
1228
1232
|
output_hidden_states: Optional[bool] = None,
|
|
1229
1233
|
return_dict: Optional[bool] = None,
|
|
1230
1234
|
cache_position: Optional[torch.LongTensor] = None,
|
|
1235
|
+
**kwargs,
|
|
1231
1236
|
) -> Union[tuple, Seq2SeqSequenceClassifierOutput]:
|
|
1232
1237
|
r"""
|
|
1233
1238
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -1360,6 +1365,7 @@ class BartForQuestionAnswering(BartPreTrainedModel):
|
|
|
1360
1365
|
output_hidden_states: Optional[bool] = None,
|
|
1361
1366
|
return_dict: Optional[bool] = None,
|
|
1362
1367
|
cache_position: Optional[torch.LongTensor] = None,
|
|
1368
|
+
**kwargs,
|
|
1363
1369
|
) -> Union[tuple, Seq2SeqQuestionAnsweringModelOutput]:
|
|
1364
1370
|
r"""
|
|
1365
1371
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -1505,6 +1511,7 @@ class BartForCausalLM(BartPreTrainedModel, GenerationMixin):
|
|
|
1505
1511
|
return_dict: Optional[bool] = None,
|
|
1506
1512
|
cache_position: Optional[torch.LongTensor] = None,
|
|
1507
1513
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
1514
|
+
**kwargs,
|
|
1508
1515
|
) -> Union[tuple, CausalLMOutputWithCrossAttentions]:
|
|
1509
1516
|
r"""
|
|
1510
1517
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
# limitations under the License
|
|
15
15
|
"""Tokenization classes for the BARThez model."""
|
|
16
16
|
|
|
17
|
+
from typing import Optional, Union
|
|
18
|
+
|
|
17
19
|
from tokenizers import Regex, Tokenizer, decoders, normalizers, pre_tokenizers
|
|
18
20
|
from tokenizers.models import Unigram
|
|
19
21
|
|
|
@@ -77,7 +79,7 @@ class BarthezTokenizer(TokenizersBackend):
|
|
|
77
79
|
vocab_file (`str`, *optional*):
|
|
78
80
|
[SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
|
|
79
81
|
contains the vocabulary necessary to instantiate a tokenizer.
|
|
80
|
-
vocab (`dict`, *optional*):
|
|
82
|
+
vocab (`str`, `dict` or `list`, *optional*):
|
|
81
83
|
Custom vocabulary dictionary. If not provided, vocabulary is loaded from vocab_file.
|
|
82
84
|
add_prefix_space (`bool`, *optional*, defaults to `True`):
|
|
83
85
|
Whether or not to add an initial space to the input. This allows to treat the leading word just as any
|
|
@@ -90,6 +92,7 @@ class BarthezTokenizer(TokenizersBackend):
|
|
|
90
92
|
|
|
91
93
|
def __init__(
|
|
92
94
|
self,
|
|
95
|
+
vocab: Optional[Union[str, dict, list]] = None,
|
|
93
96
|
bos_token="<s>",
|
|
94
97
|
eos_token="</s>",
|
|
95
98
|
sep_token="</s>",
|
|
@@ -97,15 +100,12 @@ class BarthezTokenizer(TokenizersBackend):
|
|
|
97
100
|
unk_token="<unk>",
|
|
98
101
|
pad_token="<pad>",
|
|
99
102
|
mask_token="<mask>",
|
|
100
|
-
vocab_file=None,
|
|
101
|
-
vocab=None,
|
|
102
103
|
add_prefix_space=True,
|
|
103
104
|
**kwargs,
|
|
104
105
|
):
|
|
105
106
|
# Mask token behave like a normal word, i.e. include the space before it
|
|
106
107
|
mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token
|
|
107
108
|
self.add_prefix_space = add_prefix_space
|
|
108
|
-
self.vocab_file = vocab_file
|
|
109
109
|
|
|
110
110
|
if vocab is not None:
|
|
111
111
|
self._vocab = vocab
|
|
@@ -122,10 +122,7 @@ class BarthezTokenizer(TokenizersBackend):
|
|
|
122
122
|
|
|
123
123
|
self._tokenizer.normalizer = normalizers.Sequence(
|
|
124
124
|
[
|
|
125
|
-
normalizers.Replace("\n", " "),
|
|
126
|
-
normalizers.Replace("\r", " "),
|
|
127
|
-
normalizers.Replace("\t", " "),
|
|
128
|
-
normalizers.Replace(Regex(r" {2,}"), " "),
|
|
125
|
+
normalizers.Replace(Regex(r"\s{2,}|[\n\r\t]"), " "),
|
|
129
126
|
normalizers.NFC(),
|
|
130
127
|
normalizers.Strip(left=False, right=True),
|
|
131
128
|
]
|
|
@@ -134,9 +131,7 @@ class BarthezTokenizer(TokenizersBackend):
|
|
|
134
131
|
self._tokenizer.pre_tokenizer = pre_tokenizers.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
|
|
135
132
|
self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
|
|
136
133
|
|
|
137
|
-
tokenizer_object = self._tokenizer
|
|
138
134
|
super().__init__(
|
|
139
|
-
tokenizer_object=tokenizer_object,
|
|
140
135
|
bos_token=bos_token,
|
|
141
136
|
eos_token=eos_token,
|
|
142
137
|
unk_token=unk_token,
|
|
@@ -216,7 +216,7 @@ class BeitPatchEmbeddings(nn.Module):
|
|
|
216
216
|
"Make sure that the channel dimension of the pixel values match with the one set in the configuration."
|
|
217
217
|
)
|
|
218
218
|
|
|
219
|
-
embeddings = self.projection(pixel_values)
|
|
219
|
+
embeddings = self.projection(pixel_values.to(self.projection.weight.dtype))
|
|
220
220
|
patch_height, patch_width = embeddings.shape[2], embeddings.shape[3]
|
|
221
221
|
embeddings = embeddings.flatten(2).transpose(1, 2)
|
|
222
222
|
|
|
@@ -726,6 +726,7 @@ class BeitModel(BeitPreTrainedModel):
|
|
|
726
726
|
output_hidden_states: Optional[bool] = None,
|
|
727
727
|
interpolate_pos_encoding: bool = False,
|
|
728
728
|
return_dict: Optional[bool] = None,
|
|
729
|
+
**kwargs,
|
|
729
730
|
) -> Union[tuple, BeitModelOutputWithPooling]:
|
|
730
731
|
r"""
|
|
731
732
|
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`, *optional*):
|
|
@@ -818,6 +819,7 @@ class BeitForMaskedImageModeling(BeitPreTrainedModel):
|
|
|
818
819
|
output_hidden_states: Optional[bool] = None,
|
|
819
820
|
interpolate_pos_encoding: bool = False,
|
|
820
821
|
return_dict: Optional[bool] = None,
|
|
822
|
+
**kwargs,
|
|
821
823
|
) -> Union[tuple, MaskedLMOutput]:
|
|
822
824
|
r"""
|
|
823
825
|
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`):
|
|
@@ -911,6 +913,7 @@ class BeitForImageClassification(BeitPreTrainedModel):
|
|
|
911
913
|
output_hidden_states: Optional[bool] = None,
|
|
912
914
|
interpolate_pos_encoding: bool = False,
|
|
913
915
|
return_dict: Optional[bool] = None,
|
|
916
|
+
**kwargs,
|
|
914
917
|
) -> Union[tuple, ImageClassifierOutput]:
|
|
915
918
|
r"""
|
|
916
919
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -1244,6 +1247,7 @@ class BeitForSemanticSegmentation(BeitPreTrainedModel):
|
|
|
1244
1247
|
output_hidden_states: Optional[bool] = None,
|
|
1245
1248
|
interpolate_pos_encoding: bool = False,
|
|
1246
1249
|
return_dict: Optional[bool] = None,
|
|
1250
|
+
**kwargs,
|
|
1247
1251
|
) -> Union[tuple, SemanticSegmenterOutput]:
|
|
1248
1252
|
r"""
|
|
1249
1253
|
labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
|
|
@@ -1371,6 +1375,7 @@ class BeitBackbone(BeitPreTrainedModel, BackboneMixin):
|
|
|
1371
1375
|
output_hidden_states: Optional[bool] = None,
|
|
1372
1376
|
output_attentions: Optional[bool] = None,
|
|
1373
1377
|
return_dict: Optional[bool] = None,
|
|
1378
|
+
**kwargs,
|
|
1374
1379
|
) -> BackboneOutput:
|
|
1375
1380
|
r"""
|
|
1376
1381
|
Examples:
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"""Tokenization classes for Bert."""
|
|
16
16
|
|
|
17
17
|
import collections
|
|
18
|
-
from typing import Optional
|
|
18
|
+
from typing import Optional, Union
|
|
19
19
|
|
|
20
20
|
from tokenizers import Tokenizer, decoders, normalizers, pre_tokenizers, processors
|
|
21
21
|
from tokenizers.models import WordPiece
|
|
@@ -48,8 +48,8 @@ class BertTokenizer(TokenizersBackend):
|
|
|
48
48
|
this superclass for more information regarding those methods.
|
|
49
49
|
|
|
50
50
|
Args:
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
vocab (`str` or `dict[str, int]`, *optional*):
|
|
52
|
+
Custom vocabulary dictionary. If not provided, vocabulary is loaded from `vocab_file`.
|
|
53
53
|
do_lower_case (`bool`, *optional*, defaults to `False`):
|
|
54
54
|
Whether or not to lowercase the input when tokenizing.
|
|
55
55
|
unk_token (`str`, *optional*, defaults to `"[UNK]"`):
|
|
@@ -72,17 +72,15 @@ class BertTokenizer(TokenizersBackend):
|
|
|
72
72
|
strip_accents (`bool`, *optional*):
|
|
73
73
|
Whether or not to strip all accents. If this option is not specified, then it will be determined by the
|
|
74
74
|
value for `lowercase` (as in the original BERT).
|
|
75
|
-
vocab (`dict`, *optional*):
|
|
76
|
-
Custom vocabulary dictionary. If not provided, vocabulary is loaded from vocab_file.
|
|
77
75
|
"""
|
|
78
76
|
|
|
79
77
|
vocab_files_names = VOCAB_FILES_NAMES
|
|
80
78
|
model_input_names = ["input_ids", "token_type_ids", "attention_mask"]
|
|
81
|
-
|
|
79
|
+
model = WordPiece
|
|
82
80
|
|
|
83
81
|
def __init__(
|
|
84
82
|
self,
|
|
85
|
-
|
|
83
|
+
vocab: Optional[Union[str, dict[str, int]]] = None,
|
|
86
84
|
do_lower_case: bool = False,
|
|
87
85
|
unk_token: str = "[UNK]",
|
|
88
86
|
sep_token: str = "[SEP]",
|
|
@@ -91,28 +89,21 @@ class BertTokenizer(TokenizersBackend):
|
|
|
91
89
|
mask_token: str = "[MASK]",
|
|
92
90
|
tokenize_chinese_chars: bool = True,
|
|
93
91
|
strip_accents: Optional[bool] = None,
|
|
94
|
-
vocab: Optional[dict] = None,
|
|
95
92
|
**kwargs,
|
|
96
93
|
):
|
|
97
94
|
self.do_lower_case = do_lower_case
|
|
98
95
|
self.tokenize_chinese_chars = tokenize_chinese_chars
|
|
99
96
|
self.strip_accents = strip_accents
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
self._vocab = (
|
|
103
|
-
{token: idx for idx, (token, _score) in enumerate(vocab)} if isinstance(vocab, list) else vocab
|
|
104
|
-
)
|
|
105
|
-
else:
|
|
106
|
-
self._vocab = {
|
|
97
|
+
if vocab is None:
|
|
98
|
+
vocab = {
|
|
107
99
|
str(pad_token): 0,
|
|
108
100
|
str(unk_token): 1,
|
|
109
101
|
str(cls_token): 2,
|
|
110
102
|
str(sep_token): 3,
|
|
111
103
|
str(mask_token): 4,
|
|
112
104
|
}
|
|
113
|
-
|
|
105
|
+
self._vocab = vocab
|
|
114
106
|
self._tokenizer = Tokenizer(WordPiece(self._vocab, unk_token=str(unk_token)))
|
|
115
|
-
|
|
116
107
|
self._tokenizer.normalizer = normalizers.BertNormalizer(
|
|
117
108
|
clean_text=True,
|
|
118
109
|
handle_chinese_chars=tokenize_chinese_chars,
|
|
@@ -121,11 +112,7 @@ class BertTokenizer(TokenizersBackend):
|
|
|
121
112
|
)
|
|
122
113
|
self._tokenizer.pre_tokenizer = pre_tokenizers.BertPreTokenizer()
|
|
123
114
|
self._tokenizer.decoder = decoders.WordPiece(prefix="##")
|
|
124
|
-
|
|
125
|
-
tokenizer_object = self._tokenizer
|
|
126
|
-
|
|
127
115
|
super().__init__(
|
|
128
|
-
tokenizer_object=tokenizer_object,
|
|
129
116
|
do_lower_case=do_lower_case,
|
|
130
117
|
unk_token=unk_token,
|
|
131
118
|
sep_token=sep_token,
|
|
@@ -1918,6 +1918,7 @@ class BigBirdForPreTraining(BigBirdPreTrainedModel):
|
|
|
1918
1918
|
output_attentions: Optional[bool] = None,
|
|
1919
1919
|
output_hidden_states: Optional[bool] = None,
|
|
1920
1920
|
return_dict: Optional[bool] = None,
|
|
1921
|
+
**kwargs,
|
|
1921
1922
|
) -> Union[BigBirdForPreTrainingOutput, tuple[torch.FloatTensor]]:
|
|
1922
1923
|
r"""
|
|
1923
1924
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -2028,6 +2029,7 @@ class BigBirdForMaskedLM(BigBirdPreTrainedModel):
|
|
|
2028
2029
|
output_attentions: Optional[bool] = None,
|
|
2029
2030
|
output_hidden_states: Optional[bool] = None,
|
|
2030
2031
|
return_dict: Optional[bool] = None,
|
|
2032
|
+
**kwargs,
|
|
2031
2033
|
) -> Union[MaskedLMOutput, tuple[torch.FloatTensor]]:
|
|
2032
2034
|
r"""
|
|
2033
2035
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -2277,6 +2279,7 @@ class BigBirdForSequenceClassification(BigBirdPreTrainedModel):
|
|
|
2277
2279
|
output_attentions: Optional[bool] = None,
|
|
2278
2280
|
output_hidden_states: Optional[bool] = None,
|
|
2279
2281
|
return_dict: Optional[bool] = None,
|
|
2282
|
+
**kwargs,
|
|
2280
2283
|
) -> Union[SequenceClassifierOutput, tuple[torch.FloatTensor]]:
|
|
2281
2284
|
r"""
|
|
2282
2285
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -2394,6 +2397,7 @@ class BigBirdForMultipleChoice(BigBirdPreTrainedModel):
|
|
|
2394
2397
|
output_attentions: Optional[bool] = None,
|
|
2395
2398
|
output_hidden_states: Optional[bool] = None,
|
|
2396
2399
|
return_dict: Optional[bool] = None,
|
|
2400
|
+
**kwargs,
|
|
2397
2401
|
) -> Union[MultipleChoiceModelOutput, tuple[torch.FloatTensor]]:
|
|
2398
2402
|
r"""
|
|
2399
2403
|
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
|
|
@@ -2500,6 +2504,7 @@ class BigBirdForTokenClassification(BigBirdPreTrainedModel):
|
|
|
2500
2504
|
output_attentions: Optional[bool] = None,
|
|
2501
2505
|
output_hidden_states: Optional[bool] = None,
|
|
2502
2506
|
return_dict: Optional[bool] = None,
|
|
2507
|
+
**kwargs,
|
|
2503
2508
|
) -> Union[TokenClassifierOutput, tuple[torch.FloatTensor]]:
|
|
2504
2509
|
r"""
|
|
2505
2510
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -2591,6 +2596,7 @@ class BigBirdForQuestionAnswering(BigBirdPreTrainedModel):
|
|
|
2591
2596
|
output_attentions: Optional[bool] = None,
|
|
2592
2597
|
output_hidden_states: Optional[bool] = None,
|
|
2593
2598
|
return_dict: Optional[bool] = None,
|
|
2599
|
+
**kwargs,
|
|
2594
2600
|
) -> Union[BigBirdForQuestionAnsweringModelOutput, tuple[torch.FloatTensor]]:
|
|
2595
2601
|
r"""
|
|
2596
2602
|
question_lengths (`torch.LongTensor` of shape `(batch_size, 1)`, *optional*):
|
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Tokenization classes for Big Bird model."""
|
|
16
16
|
|
|
17
|
+
from typing import Optional, Union
|
|
18
|
+
|
|
17
19
|
from tokenizers import Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
|
|
18
20
|
from tokenizers.models import Unigram
|
|
19
21
|
|
|
@@ -37,7 +39,7 @@ class BigBirdTokenizer(TokenizersBackend):
|
|
|
37
39
|
this superclass for more information regarding those methods
|
|
38
40
|
|
|
39
41
|
Args:
|
|
40
|
-
vocab (`dict`, *optional*):
|
|
42
|
+
vocab (`str`, `dict` or `list`, *optional*):
|
|
41
43
|
Custom vocabulary dictionary. If not provided, vocabulary is loaded from vocab_file.
|
|
42
44
|
unk_token (`str`, *optional*, defaults to `"<unk>"`):
|
|
43
45
|
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
|
|
@@ -80,10 +82,11 @@ class BigBirdTokenizer(TokenizersBackend):
|
|
|
80
82
|
vocab_files_names = VOCAB_FILES_NAMES
|
|
81
83
|
model_input_names = ["input_ids", "attention_mask"]
|
|
82
84
|
prefix_tokens: list[int] = []
|
|
85
|
+
model = Unigram
|
|
83
86
|
|
|
84
87
|
def __init__(
|
|
85
88
|
self,
|
|
86
|
-
vocab=None,
|
|
89
|
+
vocab: Optional[Union[str, dict, list]] = None,
|
|
87
90
|
unk_token="<unk>",
|
|
88
91
|
bos_token="<s>",
|
|
89
92
|
eos_token="</s>",
|
|
@@ -92,8 +95,6 @@ class BigBirdTokenizer(TokenizersBackend):
|
|
|
92
95
|
mask_token="[MASK]",
|
|
93
96
|
cls_token="[CLS]",
|
|
94
97
|
add_prefix_space=True,
|
|
95
|
-
vocab_file=None,
|
|
96
|
-
tokenizer_file=None,
|
|
97
98
|
**kwargs,
|
|
98
99
|
):
|
|
99
100
|
bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
|
|
@@ -105,47 +106,18 @@ class BigBirdTokenizer(TokenizersBackend):
|
|
|
105
106
|
mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token
|
|
106
107
|
|
|
107
108
|
self.add_prefix_space = add_prefix_space
|
|
108
|
-
self.vocab_file = vocab_file
|
|
109
109
|
|
|
110
110
|
# Convert vocab to list of (token, score) tuples
|
|
111
111
|
if vocab is None:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
vocab_scores = [(str(token), float(score)) for token, score in vocab]
|
|
118
|
-
else:
|
|
119
|
-
vocab_scores = [(str(token), 0.0) for token in vocab]
|
|
120
|
-
else:
|
|
121
|
-
vocab_scores = [(str(pad_token), 0.0), (str(eos_token), 0.0), (str(bos_token), 0.0)]
|
|
122
|
-
|
|
123
|
-
# Find unk_id in vocab
|
|
124
|
-
unk_token_content = str(unk_token)
|
|
125
|
-
unk_id = next((idx for idx, (token, _) in enumerate(vocab_scores) if token == unk_token_content), None)
|
|
126
|
-
if unk_id is None:
|
|
127
|
-
unk_id = min(len(vocab_scores), 100)
|
|
128
|
-
if len(vocab_scores) > 100:
|
|
129
|
-
vocab_scores.insert(100, (unk_token_content, 0.0))
|
|
130
|
-
else:
|
|
131
|
-
vocab_scores.append((unk_token_content, 0.0))
|
|
132
|
-
|
|
133
|
-
# Ensure cls_token and sep_token are in vocab
|
|
134
|
-
cls_token_str = str(cls_token)
|
|
135
|
-
sep_token_str = str(sep_token)
|
|
136
|
-
cls_token_id = next((idx for idx, (token, _) in enumerate(vocab_scores) if token == cls_token_str), None)
|
|
137
|
-
sep_token_id = next((idx for idx, (token, _) in enumerate(vocab_scores) if token == sep_token_str), None)
|
|
112
|
+
vocab = [(str(pad_token), 0.0), (str(eos_token), 0.0), (str(bos_token), 0.0), (str(unk_token), 0.0)]
|
|
113
|
+
unk_id = 3
|
|
114
|
+
elif isinstance(vocab, list):
|
|
115
|
+
# vocab.insert(100, (str(unk_token), 0.0)) # Ensure unk_token is in vocab at index 100
|
|
116
|
+
unk_id = vocab.index((str(unk_token), 0.0)) if (str(unk_token), 0.0) in vocab else 100
|
|
138
117
|
|
|
139
|
-
|
|
140
|
-
cls_token_id = len(vocab_scores)
|
|
141
|
-
vocab_scores.append((cls_token_str, 0.0))
|
|
142
|
-
if sep_token_id is None:
|
|
143
|
-
sep_token_id = len(vocab_scores)
|
|
144
|
-
vocab_scores.append((sep_token_str, 0.0))
|
|
145
|
-
|
|
146
|
-
self._tokenizer = Tokenizer(Unigram(vocab_scores, unk_id=unk_id, byte_fallback=False))
|
|
118
|
+
self._tokenizer = Tokenizer(Unigram(vocab, unk_id=unk_id, byte_fallback=False))
|
|
147
119
|
self._tokenizer.normalizer = normalizers.Sequence(
|
|
148
|
-
[normalizers.Strip(left=False, right=
|
|
120
|
+
[normalizers.Strip(left=False, right=False), normalizers.Replace(Regex(r" {2,}"), SPIECE_UNDERLINE)]
|
|
149
121
|
)
|
|
150
122
|
|
|
151
123
|
prepend_scheme = "always" if add_prefix_space else "never"
|
|
@@ -155,7 +127,6 @@ class BigBirdTokenizer(TokenizersBackend):
|
|
|
155
127
|
self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme=prepend_scheme, split=True)
|
|
156
128
|
|
|
157
129
|
super().__init__(
|
|
158
|
-
tokenizer_object=self._tokenizer,
|
|
159
130
|
bos_token=bos_token,
|
|
160
131
|
eos_token=eos_token,
|
|
161
132
|
unk_token=unk_token,
|
|
@@ -163,10 +134,15 @@ class BigBirdTokenizer(TokenizersBackend):
|
|
|
163
134
|
mask_token=mask_token,
|
|
164
135
|
cls_token=cls_token,
|
|
165
136
|
sep_token=sep_token,
|
|
137
|
+
add_prefix_space=add_prefix_space,
|
|
166
138
|
**kwargs,
|
|
167
139
|
)
|
|
168
140
|
|
|
169
|
-
|
|
141
|
+
# Ensure cls_token and sep_token are in vocab
|
|
142
|
+
cls_token_str = str(cls_token)
|
|
143
|
+
sep_token_str = str(sep_token)
|
|
144
|
+
cls_token_id = self.cls_token_id
|
|
145
|
+
sep_token_id = self.sep_token_id
|
|
170
146
|
|
|
171
147
|
self._tokenizer.post_processor = processors.TemplateProcessing(
|
|
172
148
|
single=f"{cls_token_str}:0 $A:0 {sep_token_str}:0",
|
|
@@ -1154,7 +1154,6 @@ class BigBirdPegasusEncoderAttention(nn.Module):
|
|
|
1154
1154
|
return outputs
|
|
1155
1155
|
|
|
1156
1156
|
|
|
1157
|
-
# Copied from transformers.models.bert.modeling_bert.eager_attention_forward
|
|
1158
1157
|
def eager_attention_forward(
|
|
1159
1158
|
module: nn.Module,
|
|
1160
1159
|
query: torch.Tensor,
|
|
@@ -1178,7 +1177,7 @@ def eager_attention_forward(
|
|
|
1178
1177
|
attn_weights = nn.functional.softmax(attn_weights, dim=-1)
|
|
1179
1178
|
attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training)
|
|
1180
1179
|
|
|
1181
|
-
attn_output = torch.matmul(attn_weights, value)
|
|
1180
|
+
attn_output = torch.matmul(attn_weights.to(value.dtype), value)
|
|
1182
1181
|
attn_output = attn_output.transpose(1, 2).contiguous()
|
|
1183
1182
|
|
|
1184
1183
|
return attn_output, attn_weights
|
|
@@ -1595,6 +1594,7 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel):
|
|
|
1595
1594
|
output_attentions: Optional[bool] = None,
|
|
1596
1595
|
output_hidden_states: Optional[bool] = None,
|
|
1597
1596
|
return_dict: Optional[bool] = None,
|
|
1597
|
+
**kwargs,
|
|
1598
1598
|
):
|
|
1599
1599
|
r"""
|
|
1600
1600
|
Args:
|
|
@@ -1868,6 +1868,7 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel):
|
|
|
1868
1868
|
output_hidden_states: Optional[bool] = None,
|
|
1869
1869
|
return_dict: Optional[bool] = None,
|
|
1870
1870
|
cache_position: Optional[torch.Tensor] = None,
|
|
1871
|
+
**kwargs,
|
|
1871
1872
|
):
|
|
1872
1873
|
r"""
|
|
1873
1874
|
Args:
|
|
@@ -2097,6 +2098,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel):
|
|
|
2097
2098
|
output_hidden_states: Optional[bool] = None,
|
|
2098
2099
|
return_dict: Optional[bool] = None,
|
|
2099
2100
|
cache_position: Optional[torch.LongTensor] = None,
|
|
2101
|
+
**kwargs,
|
|
2100
2102
|
) -> Union[tuple, Seq2SeqModelOutput]:
|
|
2101
2103
|
r"""
|
|
2102
2104
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -2235,6 +2237,7 @@ class BigBirdPegasusForConditionalGeneration(BigBirdPegasusPreTrainedModel, Gene
|
|
|
2235
2237
|
output_hidden_states: Optional[bool] = None,
|
|
2236
2238
|
return_dict: Optional[bool] = None,
|
|
2237
2239
|
cache_position: Optional[torch.LongTensor] = None,
|
|
2240
|
+
**kwargs,
|
|
2238
2241
|
) -> Union[tuple, Seq2SeqLMOutput]:
|
|
2239
2242
|
r"""
|
|
2240
2243
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -2369,6 +2372,7 @@ class BigBirdPegasusForSequenceClassification(BigBirdPegasusPreTrainedModel):
|
|
|
2369
2372
|
output_hidden_states: Optional[bool] = None,
|
|
2370
2373
|
return_dict: Optional[bool] = None,
|
|
2371
2374
|
cache_position: Optional[torch.LongTensor] = None,
|
|
2375
|
+
**kwargs,
|
|
2372
2376
|
) -> Union[tuple, Seq2SeqSequenceClassifierOutput]:
|
|
2373
2377
|
r"""
|
|
2374
2378
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -2490,6 +2494,7 @@ class BigBirdPegasusForQuestionAnswering(BigBirdPegasusPreTrainedModel):
|
|
|
2490
2494
|
output_hidden_states: Optional[bool] = None,
|
|
2491
2495
|
return_dict: Optional[bool] = None,
|
|
2492
2496
|
cache_position: Optional[torch.LongTensor] = None,
|
|
2497
|
+
**kwargs,
|
|
2493
2498
|
) -> Union[tuple, Seq2SeqQuestionAnsweringModelOutput]:
|
|
2494
2499
|
r"""
|
|
2495
2500
|
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
|
|
@@ -2616,6 +2621,7 @@ class BigBirdPegasusForCausalLM(BigBirdPegasusPreTrainedModel, GenerationMixin):
|
|
|
2616
2621
|
return_dict: Optional[bool] = None,
|
|
2617
2622
|
cache_position: Optional[torch.LongTensor] = None,
|
|
2618
2623
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
2624
|
+
**kwargs,
|
|
2619
2625
|
) -> Union[tuple, CausalLMOutputWithCrossAttentions]:
|
|
2620
2626
|
r"""
|
|
2621
2627
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -620,6 +620,7 @@ class BioGptForTokenClassification(BioGptPreTrainedModel):
|
|
|
620
620
|
output_hidden_states: Optional[bool] = None,
|
|
621
621
|
return_dict: Optional[bool] = None,
|
|
622
622
|
cache_position: Optional[torch.Tensor] = None,
|
|
623
|
+
**kwargs,
|
|
623
624
|
) -> Union[tuple, TokenClassifierOutput]:
|
|
624
625
|
r"""
|
|
625
626
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -711,6 +712,7 @@ class BioGptForSequenceClassification(BioGptPreTrainedModel):
|
|
|
711
712
|
return_dict: Optional[bool] = None,
|
|
712
713
|
cache_position: Optional[torch.Tensor] = None,
|
|
713
714
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
715
|
+
**kwargs,
|
|
714
716
|
) -> Union[tuple, SequenceClassifierOutputWithPast]:
|
|
715
717
|
r"""
|
|
716
718
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -442,6 +442,7 @@ class BioGptForTokenClassification(BioGptPreTrainedModel):
|
|
|
442
442
|
output_hidden_states: Optional[bool] = None,
|
|
443
443
|
return_dict: Optional[bool] = None,
|
|
444
444
|
cache_position: Optional[torch.Tensor] = None,
|
|
445
|
+
**kwargs,
|
|
445
446
|
) -> Union[tuple, TokenClassifierOutput]:
|
|
446
447
|
r"""
|
|
447
448
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -533,6 +534,7 @@ class BioGptForSequenceClassification(BioGptPreTrainedModel):
|
|
|
533
534
|
return_dict: Optional[bool] = None,
|
|
534
535
|
cache_position: Optional[torch.Tensor] = None,
|
|
535
536
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
537
|
+
**kwargs,
|
|
536
538
|
) -> Union[tuple, SequenceClassifierOutputWithPast]:
|
|
537
539
|
r"""
|
|
538
540
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -666,7 +666,11 @@ class BitModel(BitPreTrainedModel):
|
|
|
666
666
|
|
|
667
667
|
@auto_docstring
|
|
668
668
|
def forward(
|
|
669
|
-
self,
|
|
669
|
+
self,
|
|
670
|
+
pixel_values: Tensor,
|
|
671
|
+
output_hidden_states: Optional[bool] = None,
|
|
672
|
+
return_dict: Optional[bool] = None,
|
|
673
|
+
**kwargs,
|
|
670
674
|
) -> BaseModelOutputWithPoolingAndNoAttention:
|
|
671
675
|
output_hidden_states = (
|
|
672
676
|
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
|
@@ -721,6 +725,7 @@ class BitForImageClassification(BitPreTrainedModel):
|
|
|
721
725
|
labels: Optional[torch.LongTensor] = None,
|
|
722
726
|
output_hidden_states: Optional[bool] = None,
|
|
723
727
|
return_dict: Optional[bool] = None,
|
|
728
|
+
**kwargs,
|
|
724
729
|
) -> ImageClassifierOutputWithNoAttention:
|
|
725
730
|
r"""
|
|
726
731
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -767,7 +772,11 @@ class BitBackbone(BitPreTrainedModel, BackboneMixin):
|
|
|
767
772
|
|
|
768
773
|
@auto_docstring
|
|
769
774
|
def forward(
|
|
770
|
-
self,
|
|
775
|
+
self,
|
|
776
|
+
pixel_values: Tensor,
|
|
777
|
+
output_hidden_states: Optional[bool] = None,
|
|
778
|
+
return_dict: Optional[bool] = None,
|
|
779
|
+
**kwargs,
|
|
771
780
|
) -> BackboneOutput:
|
|
772
781
|
r"""
|
|
773
782
|
Examples:
|