transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -711,9 +711,6 @@ class DataCollatorForLanguageModeling(DataCollatorMixin):
|
|
|
711
711
|
if self.random_replace_prob < 0 or self.random_replace_prob > 1:
|
|
712
712
|
raise ValueError("random_replace_prob should be between 0 and 1.")
|
|
713
713
|
|
|
714
|
-
self.mask_replace_prob = float(self.mask_replace_prob)
|
|
715
|
-
self.random_replace_prob = float(self.random_replace_prob)
|
|
716
|
-
|
|
717
714
|
if self.whole_word_mask:
|
|
718
715
|
if not self.tokenizer.is_fast:
|
|
719
716
|
warnings.warn(
|
|
@@ -729,6 +726,9 @@ class DataCollatorForLanguageModeling(DataCollatorMixin):
|
|
|
729
726
|
self.mask_replace_prob = 1
|
|
730
727
|
self.random_replace_prob = 0
|
|
731
728
|
|
|
729
|
+
self.mask_replace_prob = float(self.mask_replace_prob)
|
|
730
|
+
self.random_replace_prob = float(self.random_replace_prob)
|
|
731
|
+
|
|
732
732
|
self.generator = None
|
|
733
733
|
|
|
734
734
|
def get_generator(self, seed):
|
|
@@ -1413,9 +1413,17 @@ class DataCollatorWithFlattening(DefaultDataCollator):
|
|
|
1413
1413
|
max_length = 0
|
|
1414
1414
|
for seq_idx, sample in enumerate(features):
|
|
1415
1415
|
input_ids = sample["input_ids"]
|
|
1416
|
+
# Convert to list if tensor
|
|
1417
|
+
if hasattr(input_ids, "tolist"):
|
|
1418
|
+
input_ids = input_ids.tolist()
|
|
1416
1419
|
batch["input_ids"] += input_ids
|
|
1420
|
+
|
|
1417
1421
|
if is_labels_provided:
|
|
1418
|
-
|
|
1422
|
+
labels = sample["labels"]
|
|
1423
|
+
# Convert to list if tensor
|
|
1424
|
+
if hasattr(labels, "tolist"):
|
|
1425
|
+
labels = labels.tolist()
|
|
1426
|
+
batch["labels"] += [separator_id] + labels[1:]
|
|
1419
1427
|
else:
|
|
1420
1428
|
batch["labels"] += [separator_id] + input_ids[1:]
|
|
1421
1429
|
if self.return_position_ids:
|
|
@@ -9,7 +9,6 @@ deps = {
|
|
|
9
9
|
"blobfile": "blobfile",
|
|
10
10
|
"codecarbon": "codecarbon>=2.8.1",
|
|
11
11
|
"cookiecutter": "cookiecutter==1.7.3",
|
|
12
|
-
"dataclasses": "dataclasses",
|
|
13
12
|
"datasets": "datasets>=2.15.0",
|
|
14
13
|
"deepspeed": "deepspeed>=0.9.3",
|
|
15
14
|
"diffusers": "diffusers",
|
|
@@ -23,7 +22,7 @@ deps = {
|
|
|
23
22
|
"GitPython": "GitPython<3.1.19",
|
|
24
23
|
"hf-doc-builder": "hf-doc-builder>=0.3.0",
|
|
25
24
|
"hf_xet": "hf_xet",
|
|
26
|
-
"huggingface-hub": "huggingface-hub>=1.
|
|
25
|
+
"huggingface-hub": "huggingface-hub>=1.2.1,<2.0",
|
|
27
26
|
"importlib_metadata": "importlib_metadata",
|
|
28
27
|
"ipadic": "ipadic>=1.0.0,<2.0",
|
|
29
28
|
"jinja2": "jinja2>=3.1.0",
|
|
@@ -76,7 +75,7 @@ deps = {
|
|
|
76
75
|
"tensorboard": "tensorboard",
|
|
77
76
|
"timeout-decorator": "timeout-decorator",
|
|
78
77
|
"tiktoken": "tiktoken",
|
|
79
|
-
"timm": "timm
|
|
78
|
+
"timm": "timm>=1.0.23",
|
|
80
79
|
"tokenizers": "tokenizers>=0.22.0,<=0.23.0",
|
|
81
80
|
"torch": "torch>=2.2",
|
|
82
81
|
"torchaudio": "torchaudio",
|
|
@@ -30,7 +30,7 @@ from pathlib import Path
|
|
|
30
30
|
from types import ModuleType
|
|
31
31
|
from typing import Any, Optional, Union
|
|
32
32
|
|
|
33
|
-
from huggingface_hub import try_to_load_from_cache
|
|
33
|
+
from huggingface_hub import is_offline_mode, try_to_load_from_cache
|
|
34
34
|
from packaging import version
|
|
35
35
|
|
|
36
36
|
from .utils import (
|
|
@@ -38,7 +38,6 @@ from .utils import (
|
|
|
38
38
|
TRANSFORMERS_DYNAMIC_MODULE_NAME,
|
|
39
39
|
cached_file,
|
|
40
40
|
extract_commit_hash,
|
|
41
|
-
is_offline_mode,
|
|
42
41
|
logging,
|
|
43
42
|
)
|
|
44
43
|
from .utils.import_utils import VersionComparison, split_package_version
|
|
@@ -22,7 +22,7 @@ from collections import UserDict
|
|
|
22
22
|
from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union
|
|
23
23
|
|
|
24
24
|
import numpy as np
|
|
25
|
-
from huggingface_hub import create_repo
|
|
25
|
+
from huggingface_hub import create_repo, is_offline_mode
|
|
26
26
|
|
|
27
27
|
from .dynamic_module_utils import custom_object_save
|
|
28
28
|
from .utils import (
|
|
@@ -30,9 +30,9 @@ from .utils import (
|
|
|
30
30
|
PROCESSOR_NAME,
|
|
31
31
|
PushToHubMixin,
|
|
32
32
|
TensorType,
|
|
33
|
+
_is_tensor_or_array_like,
|
|
33
34
|
copy_func,
|
|
34
35
|
is_numpy_array,
|
|
35
|
-
is_offline_mode,
|
|
36
36
|
is_torch_available,
|
|
37
37
|
is_torch_device,
|
|
38
38
|
is_torch_dtype,
|
|
@@ -68,11 +68,18 @@ class BatchFeature(UserDict):
|
|
|
68
68
|
tensor_type (`Union[None, str, TensorType]`, *optional*):
|
|
69
69
|
You can give a tensor_type here to convert the lists of integers in PyTorch/Numpy Tensors at
|
|
70
70
|
initialization.
|
|
71
|
+
skip_tensor_conversion (`list[str]` or `set[str]`, *optional*):
|
|
72
|
+
List or set of keys that should NOT be converted to tensors, even when `tensor_type` is specified.
|
|
71
73
|
"""
|
|
72
74
|
|
|
73
|
-
def __init__(
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
data: Optional[dict[str, Any]] = None,
|
|
78
|
+
tensor_type: Union[None, str, TensorType] = None,
|
|
79
|
+
skip_tensor_conversion: Optional[Union[list[str], set[str]]] = None,
|
|
80
|
+
):
|
|
74
81
|
super().__init__(data)
|
|
75
|
-
self.convert_to_tensors(tensor_type=tensor_type)
|
|
82
|
+
self.convert_to_tensors(tensor_type=tensor_type, skip_tensor_conversion=skip_tensor_conversion)
|
|
76
83
|
|
|
77
84
|
def __getitem__(self, item: str) -> Any:
|
|
78
85
|
"""
|
|
@@ -111,6 +118,14 @@ class BatchFeature(UserDict):
|
|
|
111
118
|
import torch
|
|
112
119
|
|
|
113
120
|
def as_tensor(value):
|
|
121
|
+
if torch.is_tensor(value):
|
|
122
|
+
return value
|
|
123
|
+
|
|
124
|
+
# stack list of tensors if tensor_type is PyTorch (# torch.tensor() does not support list of tensors)
|
|
125
|
+
if isinstance(value, (list, tuple)) and len(value) > 0 and torch.is_tensor(value[0]):
|
|
126
|
+
return torch.stack(value)
|
|
127
|
+
|
|
128
|
+
# convert list of numpy arrays to numpy array (stack) if tensor_type is Numpy
|
|
114
129
|
if isinstance(value, (list, tuple)) and len(value) > 0:
|
|
115
130
|
if isinstance(value[0], np.ndarray):
|
|
116
131
|
value = np.array(value)
|
|
@@ -139,7 +154,11 @@ class BatchFeature(UserDict):
|
|
|
139
154
|
is_tensor = is_numpy_array
|
|
140
155
|
return is_tensor, as_tensor
|
|
141
156
|
|
|
142
|
-
def convert_to_tensors(
|
|
157
|
+
def convert_to_tensors(
|
|
158
|
+
self,
|
|
159
|
+
tensor_type: Optional[Union[str, TensorType]] = None,
|
|
160
|
+
skip_tensor_conversion: Optional[Union[list[str], set[str]]] = None,
|
|
161
|
+
):
|
|
143
162
|
"""
|
|
144
163
|
Convert the inner content to tensors.
|
|
145
164
|
|
|
@@ -147,6 +166,13 @@ class BatchFeature(UserDict):
|
|
|
147
166
|
tensor_type (`str` or [`~utils.TensorType`], *optional*):
|
|
148
167
|
The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
|
|
149
168
|
`None`, no modification is done.
|
|
169
|
+
skip_tensor_conversion (`list[str]` or `set[str]`, *optional*):
|
|
170
|
+
List or set of keys that should NOT be converted to tensors, even when `tensor_type` is specified.
|
|
171
|
+
|
|
172
|
+
Note:
|
|
173
|
+
Values that don't have an array-like structure (e.g., strings, dicts, lists of strings) are
|
|
174
|
+
automatically skipped and won't be converted to tensors. Ragged arrays (lists of arrays with
|
|
175
|
+
different lengths) are still attempted, though they may raise errors during conversion.
|
|
150
176
|
"""
|
|
151
177
|
if tensor_type is None:
|
|
152
178
|
return self
|
|
@@ -155,18 +181,30 @@ class BatchFeature(UserDict):
|
|
|
155
181
|
|
|
156
182
|
# Do the tensor conversion in batch
|
|
157
183
|
for key, value in self.items():
|
|
184
|
+
# Skip keys explicitly marked for no conversion
|
|
185
|
+
if skip_tensor_conversion and key in skip_tensor_conversion:
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
# Skip values that are not array-like
|
|
189
|
+
if not _is_tensor_or_array_like(value):
|
|
190
|
+
continue
|
|
191
|
+
|
|
158
192
|
try:
|
|
159
193
|
if not is_tensor(value):
|
|
160
194
|
tensor = as_tensor(value)
|
|
161
|
-
|
|
162
195
|
self[key] = tensor
|
|
163
|
-
except
|
|
196
|
+
except Exception as e:
|
|
164
197
|
if key == "overflowing_values":
|
|
165
|
-
raise ValueError(
|
|
198
|
+
raise ValueError(
|
|
199
|
+
f"Unable to create tensor for '{key}' with overflowing values of different lengths. "
|
|
200
|
+
f"Original error: {str(e)}"
|
|
201
|
+
) from e
|
|
166
202
|
raise ValueError(
|
|
167
|
-
"Unable to
|
|
168
|
-
"
|
|
169
|
-
|
|
203
|
+
f"Unable to convert output '{key}' (type: {type(value).__name__}) to tensor: {str(e)}\n"
|
|
204
|
+
f"You can try:\n"
|
|
205
|
+
f" 1. Use padding=True to ensure all outputs have the same shape\n"
|
|
206
|
+
f" 2. Set return_tensors=None to return Python objects instead of tensors"
|
|
207
|
+
) from e
|
|
170
208
|
|
|
171
209
|
return self
|
|
172
210
|
|
|
@@ -205,12 +243,15 @@ class BatchFeature(UserDict):
|
|
|
205
243
|
|
|
206
244
|
# We cast only floating point tensors to avoid issues with tokenizers casting `LongTensor` to `FloatTensor`
|
|
207
245
|
def maybe_to(v):
|
|
208
|
-
# check if v is a floating point
|
|
246
|
+
# check if v is a floating point tensor
|
|
209
247
|
if isinstance(v, torch.Tensor) and torch.is_floating_point(v):
|
|
210
248
|
# cast and send to device
|
|
211
249
|
return v.to(*args, **kwargs)
|
|
212
250
|
elif isinstance(v, torch.Tensor) and device is not None:
|
|
213
251
|
return v.to(device=device, non_blocking=non_blocking)
|
|
252
|
+
# recursively handle lists and tuples
|
|
253
|
+
elif isinstance(v, (list, tuple)):
|
|
254
|
+
return type(v)(maybe_to(item) for item in v)
|
|
214
255
|
else:
|
|
215
256
|
return v
|
|
216
257
|
|
|
@@ -228,8 +269,8 @@ class FeatureExtractionMixin(PushToHubMixin):
|
|
|
228
269
|
|
|
229
270
|
def __init__(self, **kwargs):
|
|
230
271
|
"""Set elements of `kwargs` as attributes."""
|
|
231
|
-
# Pop "processor_class"
|
|
232
|
-
|
|
272
|
+
# Pop "processor_class", it should not be saved in feature extractor config
|
|
273
|
+
kwargs.pop("processor_class", None)
|
|
233
274
|
# Additional attributes without default values
|
|
234
275
|
for key, value in kwargs.items():
|
|
235
276
|
try:
|
|
@@ -238,10 +279,6 @@ class FeatureExtractionMixin(PushToHubMixin):
|
|
|
238
279
|
logger.error(f"Can't set {key} with value {value} for {self}")
|
|
239
280
|
raise err
|
|
240
281
|
|
|
241
|
-
def _set_processor_class(self, processor_class: str):
|
|
242
|
-
"""Sets processor class as an attribute."""
|
|
243
|
-
self._processor_class = processor_class
|
|
244
|
-
|
|
245
282
|
@classmethod
|
|
246
283
|
def from_pretrained(
|
|
247
284
|
cls: type[SpecificFeatureExtractorType],
|
|
@@ -585,12 +622,6 @@ class FeatureExtractionMixin(PushToHubMixin):
|
|
|
585
622
|
if isinstance(value, np.ndarray):
|
|
586
623
|
dictionary[key] = value.tolist()
|
|
587
624
|
|
|
588
|
-
# make sure private name "_processor_class" is correctly
|
|
589
|
-
# saved as "processor_class"
|
|
590
|
-
_processor_class = dictionary.pop("_processor_class", None)
|
|
591
|
-
if _processor_class is not None:
|
|
592
|
-
dictionary["processor_class"] = _processor_class
|
|
593
|
-
|
|
594
625
|
return json.dumps(dictionary, indent=2, sort_keys=True) + "\n"
|
|
595
626
|
|
|
596
627
|
def to_json_file(self, json_file_path: Union[str, os.PathLike]):
|
transformers/file_utils.py
CHANGED
|
@@ -86,7 +86,11 @@ else:
|
|
|
86
86
|
"StopStringCriteria",
|
|
87
87
|
]
|
|
88
88
|
_import_structure["continuous_batching"] = [
|
|
89
|
+
"ContinuousBatchingManager",
|
|
89
90
|
"ContinuousMixin",
|
|
91
|
+
"FIFOScheduler",
|
|
92
|
+
"PrefillFirstScheduler",
|
|
93
|
+
"Scheduler",
|
|
90
94
|
]
|
|
91
95
|
_import_structure["utils"] = [
|
|
92
96
|
"GenerationMixin",
|
|
@@ -127,7 +131,13 @@ if TYPE_CHECKING:
|
|
|
127
131
|
EarlyExitCandidateGenerator,
|
|
128
132
|
PromptLookupCandidateGenerator,
|
|
129
133
|
)
|
|
130
|
-
from .continuous_batching import
|
|
134
|
+
from .continuous_batching import (
|
|
135
|
+
ContinuousBatchingManager,
|
|
136
|
+
ContinuousMixin,
|
|
137
|
+
FIFOScheduler,
|
|
138
|
+
PrefillFirstScheduler,
|
|
139
|
+
Scheduler,
|
|
140
|
+
)
|
|
131
141
|
from .logits_process import (
|
|
132
142
|
AlternatingCodebooksLogitsProcessor,
|
|
133
143
|
ClassifierFreeGuidanceLogitsProcessor,
|
|
@@ -41,7 +41,9 @@ if TYPE_CHECKING:
|
|
|
41
41
|
class CandidateGenerator:
|
|
42
42
|
"""Abstract base class for all candidate generators that can be applied during assisted generation."""
|
|
43
43
|
|
|
44
|
-
def get_candidates(
|
|
44
|
+
def get_candidates(
|
|
45
|
+
self, input_ids: torch.LongTensor, is_first_iteration: bool
|
|
46
|
+
) -> tuple[torch.LongTensor, torch.FloatTensor]:
|
|
45
47
|
"""
|
|
46
48
|
Fetches the candidates to be tried for the current input.
|
|
47
49
|
|
|
@@ -117,11 +119,16 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|
|
117
119
|
|
|
118
120
|
# Prepare the assistant and the starting number of candidate tokens
|
|
119
121
|
self.assistant_model = assistant_model
|
|
120
|
-
|
|
121
|
-
|
|
122
|
+
|
|
123
|
+
# Prepare the generation config by updating with default values if not already set by users
|
|
124
|
+
self.assistant_generation_config = copy.deepcopy(assistant_model.generation_config)
|
|
125
|
+
global_defaults = self.assistant_generation_config._get_default_generation_params()
|
|
126
|
+
self.assistant_generation_config.update(**global_defaults, defaults_only=True)
|
|
127
|
+
self.num_assistant_tokens = self.assistant_generation_config.num_assistant_tokens
|
|
128
|
+
self.assistant_confidence_threshold = self.assistant_generation_config.assistant_confidence_threshold
|
|
122
129
|
|
|
123
130
|
# Set eos in assistant same as in target model
|
|
124
|
-
self.
|
|
131
|
+
self.assistant_generation_config.eos_token_id = generation_config.eos_token_id
|
|
125
132
|
|
|
126
133
|
# Prepare the kwargs for the assistant model
|
|
127
134
|
assistant_kwargs = {}
|
|
@@ -138,10 +145,10 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|
|
138
145
|
# If the assistant is an encoder-decoder model, assume the encoder is different on the assistant.
|
|
139
146
|
if assistant_model.config.is_encoder_decoder:
|
|
140
147
|
inputs_tensor, model_input_name, assistant_kwargs = assistant_model._prepare_model_inputs(
|
|
141
|
-
inputs_tensor,
|
|
148
|
+
inputs_tensor, self.assistant_generation_config.bos_token_id, assistant_kwargs
|
|
142
149
|
)
|
|
143
150
|
assistant_kwargs = assistant_model._prepare_encoder_decoder_kwargs_for_generation(
|
|
144
|
-
inputs_tensor, assistant_kwargs, model_input_name,
|
|
151
|
+
inputs_tensor, assistant_kwargs, model_input_name, self.assistant_generation_config
|
|
145
152
|
)
|
|
146
153
|
elif "encoder_outputs" in model_kwargs:
|
|
147
154
|
assistant_kwargs["encoder_outputs"] = model_kwargs["encoder_outputs"]
|
|
@@ -189,13 +196,15 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|
|
189
196
|
|
|
190
197
|
if (
|
|
191
198
|
is_sklearn_available()
|
|
192
|
-
and self.
|
|
199
|
+
and self.assistant_generation_config.assistant_confidence_threshold
|
|
193
200
|
and type(self) is AssistedCandidateGenerator
|
|
194
201
|
):
|
|
195
202
|
self.probs = []
|
|
196
203
|
self.matches = []
|
|
197
204
|
|
|
198
|
-
def get_candidates(
|
|
205
|
+
def get_candidates(
|
|
206
|
+
self, input_ids: torch.LongTensor, is_first_iteration: bool
|
|
207
|
+
) -> tuple[torch.LongTensor, torch.FloatTensor]:
|
|
199
208
|
"""
|
|
200
209
|
Fetches the candidates to be tried for the current input.
|
|
201
210
|
|
|
@@ -216,7 +225,7 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|
|
216
225
|
# Update past key values and masks
|
|
217
226
|
self._update_past_and_masks(input_ids)
|
|
218
227
|
# Generate candidates
|
|
219
|
-
generation_args = self._prepare_generation_args(input_ids, min_new_tokens, max_new_tokens)
|
|
228
|
+
generation_args = self._prepare_generation_args(input_ids, min_new_tokens, max_new_tokens, is_first_iteration)
|
|
220
229
|
candidate_ids, candidate_logits = self._generate_candidates(generation_args)
|
|
221
230
|
return candidate_ids, candidate_logits
|
|
222
231
|
|
|
@@ -236,7 +245,7 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|
|
236
245
|
# Adjust the max number of assistant tokens to use in the next iteration. This is a simple heuristic,
|
|
237
246
|
# probably can be improved -- we want to balance the benefits of getting assistant tokens correct with the
|
|
238
247
|
# cost of forecasting incorrect assistant tokens.
|
|
239
|
-
if self.
|
|
248
|
+
if self.assistant_generation_config.num_assistant_tokens_schedule in {
|
|
240
249
|
"heuristic",
|
|
241
250
|
"heuristic_transient",
|
|
242
251
|
}:
|
|
@@ -250,7 +259,7 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|
|
250
259
|
# This adaptation is not compatible with UAG, as it relies on the number of matched tokens based on the draft vocabulary, which is unavailable in UAG.
|
|
251
260
|
if (
|
|
252
261
|
is_sklearn_available()
|
|
253
|
-
and self.
|
|
262
|
+
and self.assistant_generation_config.assistant_confidence_threshold
|
|
254
263
|
and type(self) is AssistedCandidateGenerator
|
|
255
264
|
):
|
|
256
265
|
# update self.matches
|
|
@@ -276,7 +285,7 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|
|
276
285
|
optimal_threshold_index = np.argmin(costs)
|
|
277
286
|
best_threshold = thresholds[optimal_threshold_index]
|
|
278
287
|
|
|
279
|
-
self.
|
|
288
|
+
self.assistant_generation_config.assistant_confidence_threshold = best_threshold
|
|
280
289
|
|
|
281
290
|
def _calculate_new_tokens(self, input_ids: torch.LongTensor) -> tuple[int, int]:
|
|
282
291
|
"""Calculate the minimum and maximum number of new tokens to generate."""
|
|
@@ -304,23 +313,50 @@ class AssistedCandidateGenerator(CandidateGenerator):
|
|
|
304
313
|
|
|
305
314
|
return has_past_key_values
|
|
306
315
|
|
|
307
|
-
def _prepare_generation_args(
|
|
316
|
+
def _prepare_generation_args(
|
|
317
|
+
self, input_ids: torch.LongTensor, min_new_tokens: int, max_new_tokens: int, is_first_iteration: bool
|
|
318
|
+
) -> dict:
|
|
308
319
|
"""Prepare arguments for the generation call."""
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
320
|
+
# Generate candidates. Run prefill-specific logic in first generation and prepare model kwargs.
|
|
321
|
+
# Some models prepare inputs differently depending on first vs subsequent iterations.(e.g. VLMs)
|
|
322
|
+
# Assisted generation however calls internally `self.generate()` many times and technically will
|
|
323
|
+
# lead to many `is_first_iteration's`. This way we can call prefill only once per assistant model
|
|
324
|
+
if is_first_iteration:
|
|
325
|
+
generation_args = self.assistant_model._get_initial_cache_position(
|
|
326
|
+
input_ids.shape[1], input_ids.device, self.assistant_kwargs.copy()
|
|
327
|
+
)
|
|
328
|
+
generation_args = self.assistant_model.prepare_inputs_for_generation(
|
|
329
|
+
input_ids, is_first_iteration=True, **generation_args
|
|
330
|
+
)
|
|
331
|
+
# NOTE: `prepare_inputs_for_generation` creates inputs that can't be used when continuing generation with past-cache
|
|
332
|
+
# therefore we manually re-assign full input ids and other args. It is a known issue, due to legacy reasons we
|
|
333
|
+
# have to pass whole input ids to `generate()` including past tokens which are in encoded in cache
|
|
334
|
+
generation_args[self.input_ids_key] = input_ids
|
|
335
|
+
for model_input_name in ["position_ids", "token_type_ids", "decoder_position_ids", "cache_position"]:
|
|
336
|
+
generation_args.pop(model_input_name, None)
|
|
337
|
+
else:
|
|
338
|
+
generation_args = {self.input_ids_key: input_ids}
|
|
339
|
+
generation_args.update(
|
|
340
|
+
{
|
|
341
|
+
"min_new_tokens": min_new_tokens,
|
|
342
|
+
"max_new_tokens": max_new_tokens,
|
|
343
|
+
"generation_config": self.generation_config,
|
|
344
|
+
"logits_processor": self.logits_processor,
|
|
345
|
+
}
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
generation_args.update(
|
|
349
|
+
{k: self.assistant_kwargs[k] for k in self.assistant_kwargs if k not in generation_args}
|
|
350
|
+
)
|
|
351
|
+
return generation_args
|
|
316
352
|
|
|
317
353
|
def _generate_candidates(self, generation_args: dict) -> tuple[torch.LongTensor, torch.FloatTensor | None]:
|
|
318
354
|
"""Generate candidate sequences using the assistant model."""
|
|
319
|
-
assistant_output = self.assistant_model.generate(**generation_args
|
|
355
|
+
assistant_output = self.assistant_model.generate(**generation_args)
|
|
320
356
|
self.assistant_kwargs["past_key_values"] = assistant_output.past_key_values
|
|
321
357
|
if (
|
|
322
358
|
is_sklearn_available()
|
|
323
|
-
and self.
|
|
359
|
+
and self.assistant_generation_config.assistant_confidence_threshold
|
|
324
360
|
and type(self) is AssistedCandidateGenerator
|
|
325
361
|
):
|
|
326
362
|
scores_tensor = torch.cat(assistant_output.scores, dim=0)
|
|
@@ -383,8 +419,8 @@ class AssistedCandidateGeneratorDifferentTokenizers(AssistedCandidateGenerator):
|
|
|
383
419
|
self.assistant_tokenizer = assistant_tokenizer
|
|
384
420
|
self.prev_target_ids_len: int | None = None
|
|
385
421
|
self.prev_assistant_ids = None
|
|
386
|
-
self.target_lookbehind =
|
|
387
|
-
self.assistant_lookbehind =
|
|
422
|
+
self.target_lookbehind = self.assistant_generation_config.target_lookbehind
|
|
423
|
+
self.assistant_lookbehind = self.assistant_generation_config.assistant_lookbehind
|
|
388
424
|
|
|
389
425
|
@staticmethod
|
|
390
426
|
def _get_longest_diag_dict(input_matrix, nonzero_idx):
|
|
@@ -494,7 +530,9 @@ class AssistedCandidateGeneratorDifferentTokenizers(AssistedCandidateGenerator):
|
|
|
494
530
|
dest_ids = destination_tokenizer(text, add_special_tokens=True, return_tensors="pt")["input_ids"]
|
|
495
531
|
return dest_ids.to(input_ids.device)
|
|
496
532
|
|
|
497
|
-
def get_candidates(
|
|
533
|
+
def get_candidates(
|
|
534
|
+
self, input_ids: torch.LongTensor, is_first_iteration: bool
|
|
535
|
+
) -> tuple[torch.LongTensor, torch.FloatTensor]:
|
|
498
536
|
"""
|
|
499
537
|
Fetches the candidates to be tried for the current input.
|
|
500
538
|
|
|
@@ -520,10 +558,12 @@ class AssistedCandidateGeneratorDifferentTokenizers(AssistedCandidateGenerator):
|
|
|
520
558
|
min_new_tokens = max(min(max_new_tokens, self.main_model_min_length - assistant_input_ids.shape[-1]), 0)
|
|
521
559
|
|
|
522
560
|
self._update_past_and_masks(assistant_input_ids, remove_from_pkv)
|
|
523
|
-
generation_args = self._prepare_generation_args(
|
|
561
|
+
generation_args = self._prepare_generation_args(
|
|
562
|
+
assistant_input_ids, min_new_tokens, max_new_tokens, is_first_iteration
|
|
563
|
+
)
|
|
524
564
|
self.assistant_kwargs.pop("attention_mask", None)
|
|
525
565
|
|
|
526
|
-
assistant_output = self.assistant_model.generate(**generation_args
|
|
566
|
+
assistant_output = self.assistant_model.generate(**generation_args)
|
|
527
567
|
new_target_ids = self._process_assistant_outputs(input_ids, assistant_output.sequences)
|
|
528
568
|
|
|
529
569
|
# Update state
|
|
@@ -919,7 +959,9 @@ class UniversalSpeculativeDecodingGenerator(AssistedCandidateGeneratorDifferentT
|
|
|
919
959
|
self._target_seq_len_with_candidates: int = 0
|
|
920
960
|
self._prev_assistant_ids: torch.LongTensor | None = None
|
|
921
961
|
|
|
922
|
-
def get_candidates(
|
|
962
|
+
def get_candidates(
|
|
963
|
+
self, input_ids: torch.LongTensor, is_first_iteration: bool
|
|
964
|
+
) -> tuple[torch.LongTensor, torch.FloatTensor]:
|
|
923
965
|
"""
|
|
924
966
|
Simplified version of get_candidates that uses the translator cache for token conversion.
|
|
925
967
|
"""
|
|
@@ -931,7 +973,9 @@ class UniversalSpeculativeDecodingGenerator(AssistedCandidateGeneratorDifferentT
|
|
|
931
973
|
return input_ids, None
|
|
932
974
|
|
|
933
975
|
self._update_past_and_masks(assistant_input_ids, num_added_tokens=num_added_tokens)
|
|
934
|
-
generation_args = self._prepare_generation_args(
|
|
976
|
+
generation_args = self._prepare_generation_args(
|
|
977
|
+
assistant_input_ids, min_new_tokens, max_new_tokens, is_first_iteration
|
|
978
|
+
)
|
|
935
979
|
|
|
936
980
|
# Ensure scores are returned
|
|
937
981
|
generation_args["generation_config"].output_scores = True
|
|
@@ -1045,7 +1089,9 @@ class PromptLookupCandidateGenerator(CandidateGenerator):
|
|
|
1045
1089
|
if self.max_matching_ngram_size <= 0 or self.num_output_tokens <= 0:
|
|
1046
1090
|
raise ValueError("Invalid max_matching_ngram_size or num_output_tokens")
|
|
1047
1091
|
|
|
1048
|
-
def get_candidates(
|
|
1092
|
+
def get_candidates(
|
|
1093
|
+
self, input_ids: torch.LongTensor, is_first_iteration: bool
|
|
1094
|
+
) -> tuple[torch.LongTensor, torch.FloatTensor]:
|
|
1049
1095
|
"""
|
|
1050
1096
|
Fetches the candidates to be tried for the current input.
|
|
1051
1097
|
|
|
@@ -1202,7 +1248,9 @@ class EarlyExitCandidateGenerator(AssistedCandidateGenerator):
|
|
|
1202
1248
|
self.assistant_early_exit = self.generation_config.assistant_early_exit
|
|
1203
1249
|
self.generation_config.assistant_early_exit = None
|
|
1204
1250
|
|
|
1205
|
-
def get_candidates(
|
|
1251
|
+
def get_candidates(
|
|
1252
|
+
self, input_ids: torch.LongTensor, is_first_iteration: bool
|
|
1253
|
+
) -> tuple[torch.LongTensor, torch.FloatTensor]:
|
|
1206
1254
|
# Temporarily sets the number of hidden layers to the early exit value
|
|
1207
1255
|
base_model = getattr(self.assistant_model, self.assistant_model.base_model_prefix)
|
|
1208
1256
|
original_num_hidden_layers = base_model.config.num_hidden_layers
|