transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -54,6 +54,112 @@ from .configuration_xlm_roberta import XLMRobertaConfig
|
|
|
54
54
|
logger = logging.get_logger(__name__)
|
|
55
55
|
|
|
56
56
|
|
|
57
|
+
class XLMRobertaEmbeddings(nn.Module):
|
|
58
|
+
"""Construct the embeddings from word, position and token_type embeddings."""
|
|
59
|
+
|
|
60
|
+
def __init__(self, config):
|
|
61
|
+
super().__init__()
|
|
62
|
+
self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
|
|
63
|
+
self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
|
|
64
|
+
|
|
65
|
+
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
|
66
|
+
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
|
67
|
+
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
|
|
68
|
+
self.register_buffer(
|
|
69
|
+
"position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)), persistent=False
|
|
70
|
+
)
|
|
71
|
+
self.register_buffer(
|
|
72
|
+
"token_type_ids", torch.zeros(self.position_ids.size(), dtype=torch.long), persistent=False
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
self.padding_idx = config.pad_token_id
|
|
76
|
+
self.position_embeddings = nn.Embedding(
|
|
77
|
+
config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def forward(
|
|
81
|
+
self,
|
|
82
|
+
input_ids: Optional[torch.LongTensor] = None,
|
|
83
|
+
token_type_ids: Optional[torch.LongTensor] = None,
|
|
84
|
+
position_ids: Optional[torch.LongTensor] = None,
|
|
85
|
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
|
86
|
+
past_key_values_length: int = 0,
|
|
87
|
+
) -> torch.Tensor:
|
|
88
|
+
if position_ids is None:
|
|
89
|
+
if input_ids is not None:
|
|
90
|
+
# Create the position ids from the input token ids. Any padded tokens remain padded.
|
|
91
|
+
position_ids = self.create_position_ids_from_input_ids(
|
|
92
|
+
input_ids, self.padding_idx, past_key_values_length
|
|
93
|
+
)
|
|
94
|
+
else:
|
|
95
|
+
position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds, self.padding_idx)
|
|
96
|
+
|
|
97
|
+
if input_ids is not None:
|
|
98
|
+
input_shape = input_ids.size()
|
|
99
|
+
else:
|
|
100
|
+
input_shape = inputs_embeds.size()[:-1]
|
|
101
|
+
|
|
102
|
+
batch_size, seq_length = input_shape
|
|
103
|
+
|
|
104
|
+
# Setting the token_type_ids to the registered buffer in constructor where it is all zeros, which usually occurs
|
|
105
|
+
# when its auto-generated, registered buffer helps users when tracing the model without passing token_type_ids, solves
|
|
106
|
+
# issue #5664
|
|
107
|
+
if token_type_ids is None:
|
|
108
|
+
if hasattr(self, "token_type_ids"):
|
|
109
|
+
# NOTE: We assume either pos ids to have bsz == 1 (broadcastable) or bsz == effective bsz (input_shape[0])
|
|
110
|
+
buffered_token_type_ids = self.token_type_ids.expand(position_ids.shape[0], -1)
|
|
111
|
+
buffered_token_type_ids = torch.gather(buffered_token_type_ids, dim=1, index=position_ids)
|
|
112
|
+
token_type_ids = buffered_token_type_ids.expand(batch_size, seq_length)
|
|
113
|
+
else:
|
|
114
|
+
token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)
|
|
115
|
+
|
|
116
|
+
if inputs_embeds is None:
|
|
117
|
+
inputs_embeds = self.word_embeddings(input_ids)
|
|
118
|
+
token_type_embeddings = self.token_type_embeddings(token_type_ids)
|
|
119
|
+
embeddings = inputs_embeds + token_type_embeddings
|
|
120
|
+
|
|
121
|
+
position_embeddings = self.position_embeddings(position_ids)
|
|
122
|
+
embeddings = embeddings + position_embeddings
|
|
123
|
+
|
|
124
|
+
embeddings = self.LayerNorm(embeddings)
|
|
125
|
+
embeddings = self.dropout(embeddings)
|
|
126
|
+
return embeddings
|
|
127
|
+
|
|
128
|
+
@staticmethod
|
|
129
|
+
def create_position_ids_from_inputs_embeds(inputs_embeds, padding_idx):
|
|
130
|
+
"""
|
|
131
|
+
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
inputs_embeds: torch.Tensor
|
|
135
|
+
|
|
136
|
+
Returns: torch.Tensor
|
|
137
|
+
"""
|
|
138
|
+
input_shape = inputs_embeds.size()[:-1]
|
|
139
|
+
sequence_length = input_shape[1]
|
|
140
|
+
|
|
141
|
+
position_ids = torch.arange(
|
|
142
|
+
padding_idx + 1, sequence_length + padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
|
|
143
|
+
)
|
|
144
|
+
return position_ids.unsqueeze(0).expand(input_shape)
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def create_position_ids_from_input_ids(input_ids, padding_idx, past_key_values_length=0):
|
|
148
|
+
"""
|
|
149
|
+
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
|
|
150
|
+
are ignored. This is modified from fairseq's `utils.make_positions`.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
x: torch.Tensor x:
|
|
154
|
+
|
|
155
|
+
Returns: torch.Tensor
|
|
156
|
+
"""
|
|
157
|
+
# The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
|
|
158
|
+
mask = input_ids.ne(padding_idx).int()
|
|
159
|
+
incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
|
|
160
|
+
return incremental_indices.long() + padding_idx
|
|
161
|
+
|
|
162
|
+
|
|
57
163
|
def eager_attention_forward(
|
|
58
164
|
module: nn.Module,
|
|
59
165
|
query: torch.Tensor,
|
|
@@ -417,112 +523,9 @@ class XLMRobertaPreTrainedModel(PreTrainedModel):
|
|
|
417
523
|
super()._init_weights(module)
|
|
418
524
|
if isinstance(module, XLMRobertaLMHead):
|
|
419
525
|
init.zeros_(module.bias)
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
"""Construct the embeddings from word, position and token_type embeddings."""
|
|
424
|
-
|
|
425
|
-
def __init__(self, config):
|
|
426
|
-
super().__init__()
|
|
427
|
-
self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
|
|
428
|
-
self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
|
|
429
|
-
|
|
430
|
-
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
|
431
|
-
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
|
432
|
-
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
|
|
433
|
-
self.register_buffer(
|
|
434
|
-
"position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)), persistent=False
|
|
435
|
-
)
|
|
436
|
-
self.register_buffer(
|
|
437
|
-
"token_type_ids", torch.zeros(self.position_ids.size(), dtype=torch.long), persistent=False
|
|
438
|
-
)
|
|
439
|
-
|
|
440
|
-
self.padding_idx = config.pad_token_id
|
|
441
|
-
self.position_embeddings = nn.Embedding(
|
|
442
|
-
config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
|
|
443
|
-
)
|
|
444
|
-
|
|
445
|
-
def forward(
|
|
446
|
-
self,
|
|
447
|
-
input_ids: Optional[torch.LongTensor] = None,
|
|
448
|
-
token_type_ids: Optional[torch.LongTensor] = None,
|
|
449
|
-
position_ids: Optional[torch.LongTensor] = None,
|
|
450
|
-
inputs_embeds: Optional[torch.FloatTensor] = None,
|
|
451
|
-
past_key_values_length: int = 0,
|
|
452
|
-
) -> torch.Tensor:
|
|
453
|
-
if position_ids is None:
|
|
454
|
-
if input_ids is not None:
|
|
455
|
-
# Create the position ids from the input token ids. Any padded tokens remain padded.
|
|
456
|
-
position_ids = self.create_position_ids_from_input_ids(
|
|
457
|
-
input_ids, self.padding_idx, past_key_values_length
|
|
458
|
-
)
|
|
459
|
-
else:
|
|
460
|
-
position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds, self.padding_idx)
|
|
461
|
-
|
|
462
|
-
if input_ids is not None:
|
|
463
|
-
input_shape = input_ids.size()
|
|
464
|
-
else:
|
|
465
|
-
input_shape = inputs_embeds.size()[:-1]
|
|
466
|
-
|
|
467
|
-
batch_size, seq_length = input_shape
|
|
468
|
-
|
|
469
|
-
# Setting the token_type_ids to the registered buffer in constructor where it is all zeros, which usually occurs
|
|
470
|
-
# when its auto-generated, registered buffer helps users when tracing the model without passing token_type_ids, solves
|
|
471
|
-
# issue #5664
|
|
472
|
-
if token_type_ids is None:
|
|
473
|
-
if hasattr(self, "token_type_ids"):
|
|
474
|
-
# NOTE: We assume either pos ids to have bsz == 1 (broadcastable) or bsz == effective bsz (input_shape[0])
|
|
475
|
-
buffered_token_type_ids = self.token_type_ids.expand(position_ids.shape[0], -1)
|
|
476
|
-
buffered_token_type_ids = torch.gather(buffered_token_type_ids, dim=1, index=position_ids)
|
|
477
|
-
token_type_ids = buffered_token_type_ids.expand(batch_size, seq_length)
|
|
478
|
-
else:
|
|
479
|
-
token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)
|
|
480
|
-
|
|
481
|
-
if inputs_embeds is None:
|
|
482
|
-
inputs_embeds = self.word_embeddings(input_ids)
|
|
483
|
-
token_type_embeddings = self.token_type_embeddings(token_type_ids)
|
|
484
|
-
embeddings = inputs_embeds + token_type_embeddings
|
|
485
|
-
|
|
486
|
-
position_embeddings = self.position_embeddings(position_ids)
|
|
487
|
-
embeddings = embeddings + position_embeddings
|
|
488
|
-
|
|
489
|
-
embeddings = self.LayerNorm(embeddings)
|
|
490
|
-
embeddings = self.dropout(embeddings)
|
|
491
|
-
return embeddings
|
|
492
|
-
|
|
493
|
-
@staticmethod
|
|
494
|
-
def create_position_ids_from_inputs_embeds(inputs_embeds, padding_idx):
|
|
495
|
-
"""
|
|
496
|
-
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.
|
|
497
|
-
|
|
498
|
-
Args:
|
|
499
|
-
inputs_embeds: torch.Tensor
|
|
500
|
-
|
|
501
|
-
Returns: torch.Tensor
|
|
502
|
-
"""
|
|
503
|
-
input_shape = inputs_embeds.size()[:-1]
|
|
504
|
-
sequence_length = input_shape[1]
|
|
505
|
-
|
|
506
|
-
position_ids = torch.arange(
|
|
507
|
-
padding_idx + 1, sequence_length + padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
|
|
508
|
-
)
|
|
509
|
-
return position_ids.unsqueeze(0).expand(input_shape)
|
|
510
|
-
|
|
511
|
-
@staticmethod
|
|
512
|
-
def create_position_ids_from_input_ids(input_ids, padding_idx, past_key_values_length=0):
|
|
513
|
-
"""
|
|
514
|
-
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
|
|
515
|
-
are ignored. This is modified from fairseq's `utils.make_positions`.
|
|
516
|
-
|
|
517
|
-
Args:
|
|
518
|
-
x: torch.Tensor x:
|
|
519
|
-
|
|
520
|
-
Returns: torch.Tensor
|
|
521
|
-
"""
|
|
522
|
-
# The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
|
|
523
|
-
mask = input_ids.ne(padding_idx).int()
|
|
524
|
-
incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
|
|
525
|
-
return incremental_indices.long() + padding_idx
|
|
526
|
+
elif isinstance(module, XLMRobertaEmbeddings):
|
|
527
|
+
init.copy_(module.position_ids, torch.arange(module.position_ids.shape[-1]).expand((1, -1)))
|
|
528
|
+
init.zeros_(module.token_type_ids)
|
|
526
529
|
|
|
527
530
|
|
|
528
531
|
class XLMRobertaEncoder(nn.Module):
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
# limitations under the License
|
|
15
15
|
"""Tokenization classes for XLM-RoBERTa model (Tokenizers backend)."""
|
|
16
16
|
|
|
17
|
-
from typing import Optional
|
|
17
|
+
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
from tokenizers import Tokenizer, decoders, normalizers, pre_tokenizers, processors
|
|
20
20
|
from tokenizers.models import Unigram
|
|
@@ -47,16 +47,17 @@ class XLMRobertaTokenizer(TokenizersBackend):
|
|
|
47
47
|
pad_token (`str`, optional, defaults to `"<pad>"`): The padding token.
|
|
48
48
|
mask_token (`str`, optional, defaults to `"<mask>"`): The mask token.
|
|
49
49
|
add_prefix_space (`bool`, optional, defaults to `True`): Whether to add an initial space.
|
|
50
|
-
vocab (`dict`, optional): Custom vocabulary dictionary.
|
|
51
|
-
merges (`list`, optional): Custom merges list.
|
|
50
|
+
vocab (`str`, `dict` or `list`, optional): Custom vocabulary dictionary.
|
|
52
51
|
"""
|
|
53
52
|
|
|
54
53
|
vocab_files_names = VOCAB_FILES_NAMES
|
|
55
54
|
model_input_names = ["input_ids", "attention_mask"]
|
|
56
|
-
|
|
55
|
+
model = Unigram
|
|
57
56
|
|
|
58
57
|
def __init__(
|
|
59
58
|
self,
|
|
59
|
+
vocab: Optional[Union[str, list[tuple[str, float]]]] = None,
|
|
60
|
+
add_prefix_space: bool = True,
|
|
60
61
|
bos_token: str = "<s>",
|
|
61
62
|
eos_token: str = "</s>",
|
|
62
63
|
sep_token: str = "</s>",
|
|
@@ -64,9 +65,6 @@ class XLMRobertaTokenizer(TokenizersBackend):
|
|
|
64
65
|
unk_token: str = "<unk>",
|
|
65
66
|
pad_token: str = "<pad>",
|
|
66
67
|
mask_token: str = "<mask>",
|
|
67
|
-
add_prefix_space: bool = True,
|
|
68
|
-
vocab: Optional[dict] = None,
|
|
69
|
-
vocab_file: Optional[str] = None,
|
|
70
68
|
**kwargs,
|
|
71
69
|
):
|
|
72
70
|
self.add_prefix_space = add_prefix_space
|
|
@@ -99,11 +97,7 @@ class XLMRobertaTokenizer(TokenizersBackend):
|
|
|
99
97
|
]
|
|
100
98
|
)
|
|
101
99
|
self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
|
|
102
|
-
|
|
103
|
-
tokenizer_object = self._tokenizer
|
|
104
|
-
|
|
105
100
|
super().__init__(
|
|
106
|
-
tokenizer_object=tokenizer_object,
|
|
107
101
|
bos_token=bos_token,
|
|
108
102
|
eos_token=eos_token,
|
|
109
103
|
sep_token=sep_token,
|
|
@@ -116,14 +110,13 @@ class XLMRobertaTokenizer(TokenizersBackend):
|
|
|
116
110
|
)
|
|
117
111
|
|
|
118
112
|
self._tokenizer.post_processor = processors.TemplateProcessing(
|
|
119
|
-
single=["$A",
|
|
120
|
-
pair=["$A",
|
|
113
|
+
single=[str(bos_token), "$A", str(eos_token)],
|
|
114
|
+
pair=[str(bos_token), "$A", str(eos_token), "$B", str(eos_token)],
|
|
121
115
|
special_tokens=[
|
|
122
|
-
(
|
|
116
|
+
(str(bos_token), self.bos_token_id),
|
|
117
|
+
(str(eos_token), self.eos_token_id),
|
|
123
118
|
],
|
|
124
119
|
)
|
|
125
120
|
|
|
126
|
-
self.vocab_file = vocab_file
|
|
127
|
-
|
|
128
121
|
|
|
129
122
|
__all__ = ["XLMRobertaTokenizer"]
|
|
@@ -542,6 +542,9 @@ class XLMRobertaXLPreTrainedModel(PreTrainedModel):
|
|
|
542
542
|
super()._init_weights(module)
|
|
543
543
|
if isinstance(module, XLMRobertaXLLMHead):
|
|
544
544
|
init.zeros_(module.bias)
|
|
545
|
+
elif isinstance(module, XLMRobertaXLEmbeddings):
|
|
546
|
+
init.copy_(module.position_ids, torch.arange(module.position_ids.shape[-1]).expand((1, -1)))
|
|
547
|
+
init.zeros_(module.token_type_ids)
|
|
545
548
|
|
|
546
549
|
|
|
547
550
|
class XLMRobertaXLPooler(nn.Module):
|
|
@@ -1244,7 +1244,9 @@ class XLNetLMHeadModel(XLNetPreTrainedModel, GenerationMixin):
|
|
|
1244
1244
|
def set_output_embeddings(self, new_embeddings):
|
|
1245
1245
|
self.lm_loss = new_embeddings
|
|
1246
1246
|
|
|
1247
|
-
def prepare_inputs_for_generation(
|
|
1247
|
+
def prepare_inputs_for_generation(
|
|
1248
|
+
self, input_ids, past_key_values=None, use_mems=None, is_first_iteration=False, **kwargs
|
|
1249
|
+
):
|
|
1248
1250
|
# Overwritten -- this model has unique input preparation
|
|
1249
1251
|
|
|
1250
1252
|
# Add dummy token at the end (no attention on this one)
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Tokenization classes for XLNet model."""
|
|
16
16
|
|
|
17
|
-
from typing import Optional
|
|
17
|
+
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
|
|
20
20
|
from tokenizers.models import Unigram
|
|
@@ -98,10 +98,11 @@ class XLNetTokenizer(TokenizersBackend):
|
|
|
98
98
|
|
|
99
99
|
vocab_files_names = VOCAB_FILES_NAMES
|
|
100
100
|
padding_side = "left"
|
|
101
|
+
model = Unigram
|
|
101
102
|
|
|
102
103
|
def __init__(
|
|
103
104
|
self,
|
|
104
|
-
vocab: Optional[list] = None,
|
|
105
|
+
vocab: Optional[Union[str, list[tuple[str, float]]]] = None,
|
|
105
106
|
unk_id: int = 0,
|
|
106
107
|
do_lower_case=False,
|
|
107
108
|
remove_space=True,
|
|
@@ -159,13 +160,8 @@ class XLNetTokenizer(TokenizersBackend):
|
|
|
159
160
|
self.do_lower_case = do_lower_case
|
|
160
161
|
self.remove_space = remove_space
|
|
161
162
|
self.keep_accents = keep_accents
|
|
162
|
-
|
|
163
163
|
mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token
|
|
164
|
-
|
|
165
|
-
tokenizer_object = self._tokenizer
|
|
166
|
-
|
|
167
164
|
super().__init__(
|
|
168
|
-
tokenizer_object=tokenizer_object,
|
|
169
165
|
unk_id=unk_id,
|
|
170
166
|
do_lower_case=do_lower_case,
|
|
171
167
|
remove_space=remove_space,
|
|
@@ -634,6 +634,9 @@ class XmodPreTrainedModel(PreTrainedModel):
|
|
|
634
634
|
super()._init_weights(module)
|
|
635
635
|
if isinstance(module, XmodLMHead):
|
|
636
636
|
init.zeros_(module.bias)
|
|
637
|
+
elif isinstance(module, XmodEmbeddings):
|
|
638
|
+
init.copy_(module.position_ids, torch.arange(module.position_ids.shape[-1]).expand((1, -1)))
|
|
639
|
+
init.zeros_(module.token_type_ids)
|
|
637
640
|
|
|
638
641
|
def set_default_language(self, language: str):
|
|
639
642
|
"""
|
|
@@ -54,7 +54,7 @@ def load_cuda_kernels():
|
|
|
54
54
|
global lsh_cumulation
|
|
55
55
|
if not is_kernels_available():
|
|
56
56
|
raise ImportError("kernels is not installed, please install it with `pip install kernels`")
|
|
57
|
-
from
|
|
57
|
+
from ...integrations.hub_kernels import get_kernel
|
|
58
58
|
|
|
59
59
|
yoso = get_kernel("kernels-community/yoso")
|
|
60
60
|
lsh_cumulation = yoso.lsh_cumulation
|
|
@@ -611,6 +611,9 @@ class YosoPreTrainedModel(PreTrainedModel):
|
|
|
611
611
|
super()._init_weights(module)
|
|
612
612
|
if isinstance(module, YosoLMPredictionHead):
|
|
613
613
|
init.zeros_(module.bias)
|
|
614
|
+
elif isinstance(module, YosoEmbeddings):
|
|
615
|
+
init.copy_(module.position_ids, torch.arange(module.position_ids.shape[-1]).expand((1, -1)) + 2)
|
|
616
|
+
init.zeros_(module.token_type_ids)
|
|
614
617
|
|
|
615
618
|
|
|
616
619
|
@auto_docstring
|
|
@@ -642,6 +645,7 @@ class YosoModel(YosoPreTrainedModel):
|
|
|
642
645
|
output_attentions: Optional[bool] = None,
|
|
643
646
|
output_hidden_states: Optional[bool] = None,
|
|
644
647
|
return_dict: Optional[bool] = None,
|
|
648
|
+
**kwargs,
|
|
645
649
|
) -> Union[tuple, BaseModelOutputWithCrossAttentions]:
|
|
646
650
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
647
651
|
output_hidden_states = (
|
|
@@ -734,6 +738,7 @@ class YosoForMaskedLM(YosoPreTrainedModel):
|
|
|
734
738
|
output_attentions: Optional[bool] = None,
|
|
735
739
|
output_hidden_states: Optional[bool] = None,
|
|
736
740
|
return_dict: Optional[bool] = None,
|
|
741
|
+
**kwargs,
|
|
737
742
|
) -> Union[tuple, MaskedLMOutput]:
|
|
738
743
|
r"""
|
|
739
744
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -823,6 +828,7 @@ class YosoForSequenceClassification(YosoPreTrainedModel):
|
|
|
823
828
|
output_attentions: Optional[bool] = None,
|
|
824
829
|
output_hidden_states: Optional[bool] = None,
|
|
825
830
|
return_dict: Optional[bool] = None,
|
|
831
|
+
**kwargs,
|
|
826
832
|
) -> Union[tuple, SequenceClassifierOutput]:
|
|
827
833
|
r"""
|
|
828
834
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -904,6 +910,7 @@ class YosoForMultipleChoice(YosoPreTrainedModel):
|
|
|
904
910
|
output_attentions: Optional[bool] = None,
|
|
905
911
|
output_hidden_states: Optional[bool] = None,
|
|
906
912
|
return_dict: Optional[bool] = None,
|
|
913
|
+
**kwargs,
|
|
907
914
|
) -> Union[tuple, MultipleChoiceModelOutput]:
|
|
908
915
|
r"""
|
|
909
916
|
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
|
|
@@ -1009,6 +1016,7 @@ class YosoForTokenClassification(YosoPreTrainedModel):
|
|
|
1009
1016
|
output_attentions: Optional[bool] = None,
|
|
1010
1017
|
output_hidden_states: Optional[bool] = None,
|
|
1011
1018
|
return_dict: Optional[bool] = None,
|
|
1019
|
+
**kwargs,
|
|
1012
1020
|
) -> Union[tuple, TokenClassifierOutput]:
|
|
1013
1021
|
r"""
|
|
1014
1022
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -1085,6 +1093,7 @@ class YosoForQuestionAnswering(YosoPreTrainedModel):
|
|
|
1085
1093
|
output_attentions: Optional[bool] = None,
|
|
1086
1094
|
output_hidden_states: Optional[bool] = None,
|
|
1087
1095
|
return_dict: Optional[bool] = None,
|
|
1096
|
+
**kwargs,
|
|
1088
1097
|
) -> Union[tuple, QuestionAnsweringModelOutput]:
|
|
1089
1098
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
|
1090
1099
|
|
|
@@ -870,6 +870,7 @@ class ZambaModel(ZambaPreTrainedModel):
|
|
|
870
870
|
output_hidden_states: Optional[bool] = None,
|
|
871
871
|
return_dict: Optional[bool] = None,
|
|
872
872
|
cache_position: Optional[torch.LongTensor] = None,
|
|
873
|
+
**kwargs,
|
|
873
874
|
) -> Union[tuple, BaseModelOutputWithPast]:
|
|
874
875
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
875
876
|
output_hidden_states = (
|
|
@@ -1098,6 +1099,7 @@ class ZambaForCausalLM(ZambaPreTrainedModel, GenerationMixin):
|
|
|
1098
1099
|
cache_position=None,
|
|
1099
1100
|
position_ids=None,
|
|
1100
1101
|
use_cache=True,
|
|
1102
|
+
is_first_iteration=False,
|
|
1101
1103
|
**kwargs,
|
|
1102
1104
|
):
|
|
1103
1105
|
# Overwritten -- has a unique cache type, `ZambaHybridDynamicCache`
|
|
@@ -1131,7 +1133,7 @@ class ZambaForCausalLM(ZambaPreTrainedModel, GenerationMixin):
|
|
|
1131
1133
|
position_ids = position_ids[:, -input_ids.shape[1] :]
|
|
1132
1134
|
|
|
1133
1135
|
# if `inputs_embeds` are passed, we only want to use them in the 1st generation step
|
|
1134
|
-
if inputs_embeds is not None and
|
|
1136
|
+
if inputs_embeds is not None and is_first_iteration:
|
|
1135
1137
|
model_inputs = {"inputs_embeds": inputs_embeds}
|
|
1136
1138
|
else:
|
|
1137
1139
|
model_inputs = {"input_ids": input_ids.contiguous()} # `contiguous()` needed for compilation use cases
|
|
@@ -1192,6 +1194,7 @@ class ZambaForSequenceClassification(ZambaPreTrainedModel):
|
|
|
1192
1194
|
output_attentions: Optional[bool] = None,
|
|
1193
1195
|
output_hidden_states: Optional[bool] = None,
|
|
1194
1196
|
return_dict: Optional[bool] = None,
|
|
1197
|
+
**kwargs,
|
|
1195
1198
|
) -> Union[tuple, SequenceClassifierOutputWithPast]:
|
|
1196
1199
|
r"""
|
|
1197
1200
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -41,6 +41,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
|
|
41
41
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
42
42
|
from ...processing_utils import Unpack
|
|
43
43
|
from ...utils import auto_docstring, logging
|
|
44
|
+
from ...utils.generic import maybe_autocast
|
|
44
45
|
from ...utils.import_utils import is_causal_conv1d_available, is_mamba_ssm_available
|
|
45
46
|
from .configuration_zamba2 import Zamba2Config
|
|
46
47
|
|
|
@@ -224,7 +225,7 @@ class Zamba2RotaryEmbedding(nn.Module):
|
|
|
224
225
|
inv_freq, self.attention_scaling = rope_init_fn(self.config, device)
|
|
225
226
|
|
|
226
227
|
self.register_buffer("inv_freq", inv_freq, persistent=False)
|
|
227
|
-
self.original_inv_freq =
|
|
228
|
+
self.register_buffer("original_inv_freq", inv_freq.clone(), persistent=False)
|
|
228
229
|
|
|
229
230
|
@staticmethod
|
|
230
231
|
def compute_default_rope_parameters(
|
|
@@ -263,7 +264,7 @@ class Zamba2RotaryEmbedding(nn.Module):
|
|
|
263
264
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
264
265
|
|
|
265
266
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
266
|
-
with
|
|
267
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
267
268
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
268
269
|
emb = torch.cat((freqs, freqs), dim=-1)
|
|
269
270
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -424,7 +425,6 @@ class Zamba2Attention(nn.Module):
|
|
|
424
425
|
attention_mask: Optional[torch.Tensor] = None,
|
|
425
426
|
past_key_values: Optional[Zamba2HybridDynamicCache] = None,
|
|
426
427
|
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None,
|
|
427
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
428
428
|
**kwargs: Unpack[FlashAttentionKwargs],
|
|
429
429
|
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
|
430
430
|
input_shape = hidden_states.shape[:-1]
|
|
@@ -1294,6 +1294,7 @@ class Zamba2Model(Zamba2PreTrainedModel):
|
|
|
1294
1294
|
output_hidden_states: Optional[bool] = None,
|
|
1295
1295
|
return_dict: Optional[bool] = None,
|
|
1296
1296
|
cache_position: Optional[torch.LongTensor] = None,
|
|
1297
|
+
**kwargs,
|
|
1297
1298
|
) -> Union[tuple, BaseModelOutputWithPast]:
|
|
1298
1299
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
1299
1300
|
output_hidden_states = (
|
|
@@ -1544,6 +1545,7 @@ class Zamba2ForCausalLM(Zamba2PreTrainedModel, GenerationMixin):
|
|
|
1544
1545
|
cache_position=None,
|
|
1545
1546
|
position_ids=None,
|
|
1546
1547
|
use_cache=True,
|
|
1548
|
+
is_first_iteration=False,
|
|
1547
1549
|
**kwargs,
|
|
1548
1550
|
):
|
|
1549
1551
|
# Overwritten -- has a unique cache type, `Zamba2HybridDynamicCache`
|
|
@@ -1577,7 +1579,7 @@ class Zamba2ForCausalLM(Zamba2PreTrainedModel, GenerationMixin):
|
|
|
1577
1579
|
position_ids = position_ids[:, -input_ids.shape[1] :]
|
|
1578
1580
|
|
|
1579
1581
|
# if `inputs_embeds` are passed, we only want to use them in the 1st generation step
|
|
1580
|
-
if inputs_embeds is not None and
|
|
1582
|
+
if inputs_embeds is not None and is_first_iteration:
|
|
1581
1583
|
model_inputs = {"inputs_embeds": inputs_embeds}
|
|
1582
1584
|
else:
|
|
1583
1585
|
model_inputs = {"input_ids": input_ids.contiguous()} # `contiguous()` needed for compilation use cases
|
|
@@ -1638,6 +1640,7 @@ class Zamba2ForSequenceClassification(Zamba2PreTrainedModel):
|
|
|
1638
1640
|
output_attentions: Optional[bool] = None,
|
|
1639
1641
|
output_hidden_states: Optional[bool] = None,
|
|
1640
1642
|
return_dict: Optional[bool] = None,
|
|
1643
|
+
**kwargs,
|
|
1641
1644
|
) -> Union[tuple, SequenceClassifierOutputWithPast]:
|
|
1642
1645
|
r"""
|
|
1643
1646
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -232,7 +232,6 @@ class Zamba2Attention(ZambaAttention):
|
|
|
232
232
|
attention_mask: Optional[torch.Tensor] = None,
|
|
233
233
|
past_key_values: Optional[Zamba2HybridDynamicCache] = None,
|
|
234
234
|
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None,
|
|
235
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
236
235
|
**kwargs: Unpack[FlashAttentionKwargs],
|
|
237
236
|
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
|
238
237
|
input_shape = hidden_states.shape[:-1]
|
|
@@ -993,6 +992,7 @@ class Zamba2Model(ZambaModel, Zamba2PreTrainedModel):
|
|
|
993
992
|
output_hidden_states: Optional[bool] = None,
|
|
994
993
|
return_dict: Optional[bool] = None,
|
|
995
994
|
cache_position: Optional[torch.LongTensor] = None,
|
|
995
|
+
**kwargs,
|
|
996
996
|
) -> Union[tuple, BaseModelOutputWithPast]:
|
|
997
997
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
998
998
|
output_hidden_states = (
|
|
@@ -37,7 +37,7 @@ class ZoeDepthConfig(PreTrainedConfig):
|
|
|
37
37
|
documentation from [`PreTrainedConfig`] for more information.
|
|
38
38
|
|
|
39
39
|
Args:
|
|
40
|
-
backbone_config (`Union[dict
|
|
40
|
+
backbone_config (`Union[dict, "PreTrainedConfig"]`, *optional*, defaults to `BeitConfig()`):
|
|
41
41
|
The configuration of the backbone model.
|
|
42
42
|
backbone (`str`, *optional*):
|
|
43
43
|
Name of backbone to use when `backbone_config` is `None`. If `use_pretrained_backbone` is `True`, this
|
|
@@ -171,9 +171,7 @@ class ZoeDepthImageProcessorFast(BaseImageProcessorFast):
|
|
|
171
171
|
if do_normalize:
|
|
172
172
|
stacked_images = self.normalize(stacked_images, image_mean, image_std)
|
|
173
173
|
resized_images_grouped[shape] = stacked_images
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
processed_images = torch.stack(resized_images, dim=0) if return_tensors else resized_images
|
|
174
|
+
processed_images = reorder_images(resized_images_grouped, grouped_images_index)
|
|
177
175
|
|
|
178
176
|
return BatchFeature(data={"pixel_values": processed_images}, tensor_type=return_tensors)
|
|
179
177
|
|
|
@@ -21,6 +21,7 @@ from typing import Optional, Union
|
|
|
21
21
|
import torch
|
|
22
22
|
from torch import nn
|
|
23
23
|
|
|
24
|
+
from ... import initialization as init
|
|
24
25
|
from ...activations import ACT2FN
|
|
25
26
|
from ...modeling_outputs import DepthEstimatorOutput
|
|
26
27
|
from ...modeling_utils import PreTrainedModel
|
|
@@ -1211,6 +1212,12 @@ class ZoeDepthPreTrainedModel(PreTrainedModel):
|
|
|
1211
1212
|
input_modalities = ("image",)
|
|
1212
1213
|
supports_gradient_checkpointing = True
|
|
1213
1214
|
|
|
1215
|
+
def _init_weights(self, module):
|
|
1216
|
+
super()._init_weights(module)
|
|
1217
|
+
if isinstance(module, LogBinomialSoftmax):
|
|
1218
|
+
init.copy_(module.k_idx, torch.arange(0, module.k).view(1, -1, 1, 1))
|
|
1219
|
+
init.copy_(module.k_minus_1, torch.tensor([module.k - 1]).view(1, -1, 1, 1))
|
|
1220
|
+
|
|
1214
1221
|
|
|
1215
1222
|
@auto_docstring(
|
|
1216
1223
|
custom_intro="""
|
|
@@ -1251,6 +1258,7 @@ class ZoeDepthForDepthEstimation(ZoeDepthPreTrainedModel):
|
|
|
1251
1258
|
output_attentions: Optional[bool] = None,
|
|
1252
1259
|
output_hidden_states: Optional[bool] = None,
|
|
1253
1260
|
return_dict: Optional[bool] = None,
|
|
1261
|
+
**kwargs,
|
|
1254
1262
|
) -> Union[tuple[torch.Tensor], DepthEstimatorOutput]:
|
|
1255
1263
|
r"""
|
|
1256
1264
|
labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
|
|
@@ -18,11 +18,11 @@ import warnings
|
|
|
18
18
|
from pathlib import Path
|
|
19
19
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
20
20
|
|
|
21
|
-
from huggingface_hub import model_info
|
|
21
|
+
from huggingface_hub import is_offline_mode, model_info
|
|
22
22
|
|
|
23
23
|
from ..configuration_utils import PreTrainedConfig
|
|
24
24
|
from ..dynamic_module_utils import get_class_from_dynamic_module
|
|
25
|
-
from ..feature_extraction_utils import PreTrainedFeatureExtractor
|
|
25
|
+
from ..feature_extraction_utils import FeatureExtractionMixin, PreTrainedFeatureExtractor
|
|
26
26
|
from ..image_processing_utils import BaseImageProcessor
|
|
27
27
|
from ..models.auto.configuration_auto import AutoConfig
|
|
28
28
|
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
|
|
@@ -38,7 +38,6 @@ from ..utils import (
|
|
|
38
38
|
extract_commit_hash,
|
|
39
39
|
find_adapter_config_file,
|
|
40
40
|
is_kenlm_available,
|
|
41
|
-
is_offline_mode,
|
|
42
41
|
is_peft_available,
|
|
43
42
|
is_pyctcdecode_available,
|
|
44
43
|
is_torch_available,
|
|
@@ -278,7 +277,7 @@ SUPPORTED_TASKS = {
|
|
|
278
277
|
"image-to-text": {
|
|
279
278
|
"impl": ImageToTextPipeline,
|
|
280
279
|
"pt": (AutoModelForImageTextToText,) if is_torch_available() else (),
|
|
281
|
-
"default": {"model": ("ydshieh/vit-gpt2-coco-en", "
|
|
280
|
+
"default": {"model": ("ydshieh/vit-gpt2-coco-en", "e460201")},
|
|
282
281
|
"type": "multimodal",
|
|
283
282
|
},
|
|
284
283
|
"image-text-to-text": {
|
|
@@ -701,12 +700,14 @@ def pipeline(
|
|
|
701
700
|
|
|
702
701
|
code_revision = kwargs.pop("code_revision", None)
|
|
703
702
|
commit_hash = kwargs.pop("_commit_hash", None)
|
|
703
|
+
local_files_only = kwargs.get("local_files_only", False)
|
|
704
704
|
|
|
705
705
|
hub_kwargs = {
|
|
706
706
|
"revision": revision,
|
|
707
707
|
"token": token,
|
|
708
708
|
"trust_remote_code": trust_remote_code,
|
|
709
709
|
"_commit_hash": commit_hash,
|
|
710
|
+
"local_files_only": local_files_only,
|
|
710
711
|
}
|
|
711
712
|
|
|
712
713
|
if task is None and model is None:
|
|
@@ -987,12 +988,13 @@ def pipeline(
|
|
|
987
988
|
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
|
988
989
|
feature_extractor, _from_pipeline=task, **hub_kwargs, **model_kwargs
|
|
989
990
|
)
|
|
991
|
+
config_dict, _ = FeatureExtractionMixin.get_feature_extractor_dict(
|
|
992
|
+
pretrained_model_name_or_path or model_name,
|
|
993
|
+
**hub_kwargs,
|
|
994
|
+
)
|
|
995
|
+
processor_class = config_dict.get("processor_class", None)
|
|
990
996
|
|
|
991
|
-
if (
|
|
992
|
-
feature_extractor._processor_class
|
|
993
|
-
and feature_extractor._processor_class.endswith("WithLM")
|
|
994
|
-
and isinstance(model_name, str)
|
|
995
|
-
):
|
|
997
|
+
if processor_class is not None and processor_class.endswith("WithLM") and isinstance(model_name, str):
|
|
996
998
|
try:
|
|
997
999
|
import kenlm # to trigger `ImportError` if not installed
|
|
998
1000
|
from pyctcdecode import BeamSearchDecoderCTC
|