transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from functools import wraps
|
|
16
|
+
|
|
17
|
+
from ..utils.generic import GeneralInterface
|
|
18
|
+
from ..utils.import_utils import is_torch_available
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
if is_torch_available():
|
|
22
|
+
import torch
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Examples of experts class with its eager mm implementation
|
|
26
|
+
# class Experts(nn.Module):
|
|
27
|
+
# """Collection of expert weights stored as 3D tensors."""
|
|
28
|
+
|
|
29
|
+
# def __init__(self, config):
|
|
30
|
+
# super().__init__()
|
|
31
|
+
# self.num_experts = config.n_routed_experts
|
|
32
|
+
# self.hidden_dim = config.hidden_size
|
|
33
|
+
# self.intermediate_dim = config.moe_intermediate_size
|
|
34
|
+
# self.gate_up_proj = nn.Parameter(torch.empty(self.num_experts, 2 * self.intermediate_dim, self.hidden_dim))
|
|
35
|
+
# self.down_proj = nn.Parameter(torch.empty(self.num_experts, self.hidden_dim, self.intermediate_dim))
|
|
36
|
+
# self.act_fn = ACT2FN[config.hidden_act]
|
|
37
|
+
|
|
38
|
+
# def forward(
|
|
39
|
+
# self,
|
|
40
|
+
# hidden_states: torch.Tensor,
|
|
41
|
+
# top_k_index: torch.Tensor,
|
|
42
|
+
# top_k_weights: torch.Tensor,
|
|
43
|
+
# ) -> torch.Tensor:
|
|
44
|
+
# final_hidden_states = torch.zeros_like(hidden_states)
|
|
45
|
+
# with torch.no_grad():
|
|
46
|
+
# expert_mask = torch.nn.functional.one_hot(top_k_index, num_classes=self.num_experts)
|
|
47
|
+
# expert_mask = expert_mask.permute(2, 1, 0)
|
|
48
|
+
# expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
|
49
|
+
|
|
50
|
+
# for expert_idx in expert_hit:
|
|
51
|
+
# expert_idx = expert_idx[0]
|
|
52
|
+
# if expert_idx == self.num_experts:
|
|
53
|
+
# continue
|
|
54
|
+
# top_k_pos, token_idx = torch.where(expert_mask[expert_idx])
|
|
55
|
+
# current_state = hidden_states[token_idx]
|
|
56
|
+
# gate, up = nn.functional.linear(current_state, self.gate_up_proj[expert_idx]).chunk(2, dim=-1)
|
|
57
|
+
# current_hidden_states = self.act_fn(gate) * up
|
|
58
|
+
# current_hidden_states = nn.functional.linear(current_hidden_states, self.down_proj[expert_idx])
|
|
59
|
+
# current_hidden_states = current_hidden_states * top_k_weights[token_idx, top_k_pos, None]
|
|
60
|
+
# final_hidden_states.index_add_(0, token_idx, current_hidden_states.to(final_hidden_states.dtype))
|
|
61
|
+
|
|
62
|
+
# return final_hidden_states
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def batched_mm_experts_forward(
|
|
66
|
+
self: torch.nn.Module,
|
|
67
|
+
hidden_states: torch.Tensor,
|
|
68
|
+
top_k_index: torch.Tensor,
|
|
69
|
+
top_k_weights: torch.Tensor,
|
|
70
|
+
) -> torch.Tensor:
|
|
71
|
+
device = hidden_states.device
|
|
72
|
+
num_top_k = top_k_index.size(-1)
|
|
73
|
+
num_tokens = hidden_states.size(0)
|
|
74
|
+
num_experts = self.gate_up_proj.size(0)
|
|
75
|
+
final_hidden_states = torch.zeros_like(hidden_states)
|
|
76
|
+
|
|
77
|
+
# Flatten top_k_index to get expert_ids per selected sample
|
|
78
|
+
expert_ids = top_k_index.reshape(-1)
|
|
79
|
+
token_idx = torch.arange(num_tokens, device=device).unsqueeze(1).expand(-1, num_top_k).reshape(-1)
|
|
80
|
+
|
|
81
|
+
# Resolve routing weights per selected sample, allowing top_k_weights to be either:
|
|
82
|
+
# - (num_tokens, num_top_k) Qwen2MoE style
|
|
83
|
+
# - (num_tokens, num_experts) DeepseekV2 style
|
|
84
|
+
if top_k_weights.shape == (num_tokens, num_top_k):
|
|
85
|
+
sample_weights = top_k_weights.reshape(-1) # (S,)
|
|
86
|
+
elif top_k_weights.shape == (num_tokens, num_experts):
|
|
87
|
+
sample_weights = top_k_weights[token_idx, expert_ids] # (S,)
|
|
88
|
+
else:
|
|
89
|
+
raise ValueError(
|
|
90
|
+
f"top_k_weights has an invalid/unsupported shape. It should be either (num_tokens, num_top_k)({num_tokens}, {num_top_k}) "
|
|
91
|
+
f"or (num_tokens, num_experts)({num_tokens}, {num_experts}), but got {top_k_weights.shape}."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Get current hidden states for selected samples
|
|
95
|
+
current_hidden_states = hidden_states[token_idx] # (S, hidden_dim)
|
|
96
|
+
|
|
97
|
+
# Select projection matrices for selected experts
|
|
98
|
+
selected_gate_up = self.gate_up_proj[expert_ids] # (S, hidden_dim, 2 * intermediate_dim)
|
|
99
|
+
selected_down = self.down_proj[expert_ids] # (S, hidden_dim, intermediate_dim)
|
|
100
|
+
|
|
101
|
+
# --- Up projection per expert (batched) ---
|
|
102
|
+
gate_up_out = torch.bmm(selected_gate_up, current_hidden_states.unsqueeze(-1)).squeeze(-1)
|
|
103
|
+
if hasattr(self, "gate_up_proj_bias") and self.gate_up_proj_bias is not None:
|
|
104
|
+
gate_up_out = gate_up_out + self.gate_up_proj_bias[expert_ids]
|
|
105
|
+
|
|
106
|
+
# Split into gate and up components
|
|
107
|
+
gate, up = gate_up_out.chunk(2, dim=-1) # both have shape (S, intermediate_dim)
|
|
108
|
+
|
|
109
|
+
# Apply activation
|
|
110
|
+
hidden_after_activation = self.act_fn(gate) * up # (S, intermediate_dim)
|
|
111
|
+
|
|
112
|
+
# --- Down projection per expert (batched) ---
|
|
113
|
+
out_per_sample = torch.bmm(selected_down, hidden_after_activation.unsqueeze(-1)).squeeze(-1)
|
|
114
|
+
if hasattr(self, "down_proj_bias") and self.down_proj_bias is not None:
|
|
115
|
+
out_per_sample = out_per_sample + self.down_proj_bias[expert_ids]
|
|
116
|
+
|
|
117
|
+
# Apply routing weights
|
|
118
|
+
out_per_sample = out_per_sample * sample_weights.unsqueeze(-1) # (S, hidden_dim)
|
|
119
|
+
|
|
120
|
+
# Accumulate results back to the final_hidden_states using original token indices
|
|
121
|
+
final_hidden_states.index_add_(0, token_idx, out_per_sample.to(final_hidden_states.dtype))
|
|
122
|
+
|
|
123
|
+
return final_hidden_states
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def grouped_mm_experts_forward(
|
|
127
|
+
self: torch.nn.Module,
|
|
128
|
+
hidden_states: torch.Tensor,
|
|
129
|
+
top_k_index: torch.Tensor,
|
|
130
|
+
top_k_weights: torch.Tensor,
|
|
131
|
+
) -> torch.Tensor:
|
|
132
|
+
if not hasattr(torch, "_grouped_mm"):
|
|
133
|
+
raise ImportError(
|
|
134
|
+
"torch._grouped_mm is not available. Please make sure you are using a PyTorch version that includes it (2.9+)."
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
device = hidden_states.device
|
|
138
|
+
num_top_k = top_k_index.size(-1)
|
|
139
|
+
num_tokens = hidden_states.size(0)
|
|
140
|
+
num_experts = self.gate_up_proj.size(0)
|
|
141
|
+
final_hidden_states = torch.zeros_like(hidden_states)
|
|
142
|
+
|
|
143
|
+
# Flatten top_k_index to get expert_ids per selected sample
|
|
144
|
+
expert_ids = top_k_index.reshape(-1)
|
|
145
|
+
token_idx = torch.arange(num_tokens, device=device).unsqueeze(1).expand(-1, num_top_k).reshape(-1)
|
|
146
|
+
|
|
147
|
+
# Get permutation to group by expert
|
|
148
|
+
perm = torch.argsort(expert_ids, stable=True)
|
|
149
|
+
inv_perm = torch.argsort(perm, stable=True)
|
|
150
|
+
|
|
151
|
+
# Resolve routing weights per selected sample, allowing top_k_weights to be either:
|
|
152
|
+
# - (num_tokens, num_top_k) Qwen2MoE style
|
|
153
|
+
# - (num_tokens, num_experts) DeepseekV2 style
|
|
154
|
+
if top_k_weights.shape == (num_tokens, num_top_k):
|
|
155
|
+
sample_weights = top_k_weights.reshape(-1) # (S,)
|
|
156
|
+
elif top_k_weights.shape == (num_tokens, num_experts):
|
|
157
|
+
sample_weights = top_k_weights[token_idx, expert_ids] # (S,)
|
|
158
|
+
else:
|
|
159
|
+
raise ValueError(
|
|
160
|
+
f"top_k_weights has an invalid/unsupported shape. It should be either (num_tokens, num_top_k)({num_tokens}, {num_top_k}) "
|
|
161
|
+
f"or (num_tokens, num_experts)({num_tokens}, {num_experts}), but got {top_k_weights.shape}."
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Get current hidden states for selected samples
|
|
165
|
+
current_hidden_states = hidden_states[token_idx] # (S, hidden_dim)
|
|
166
|
+
|
|
167
|
+
# Group by expert for grouped_mm
|
|
168
|
+
expert_ids_g = expert_ids[perm]
|
|
169
|
+
sample_weights_g = sample_weights[perm]
|
|
170
|
+
current_states_g = current_hidden_states[perm]
|
|
171
|
+
|
|
172
|
+
# Compute offsets for grouped_mm
|
|
173
|
+
# using histc instead of bincount to avoid cuda graph issues
|
|
174
|
+
# (grouped_mm_experts_forward still fails with cuda graphs but because of _grouped_mm internals)
|
|
175
|
+
num_tokens_per_expert = torch.histc(expert_ids_g.float(), bins=num_experts, min=0, max=num_experts - 1)
|
|
176
|
+
offsets = torch.cumsum(num_tokens_per_expert, dim=0, dtype=torch.int32)
|
|
177
|
+
|
|
178
|
+
# --- Up projection per expert (grouped_mm) ---
|
|
179
|
+
gate_up_out = torch._grouped_mm(current_states_g, self.gate_up_proj.transpose(-2, -1), offs=offsets)
|
|
180
|
+
if hasattr(self, "gate_up_proj_bias") and self.gate_up_proj_bias is not None:
|
|
181
|
+
# we should be able to pass bias to the grouped_mm call, but it's still not fully supported
|
|
182
|
+
gate_up_out = gate_up_out + self.gate_up_proj_bias[expert_ids_g]
|
|
183
|
+
|
|
184
|
+
# Split into gate and up components
|
|
185
|
+
gate, up = gate_up_out.chunk(2, dim=-1) # both have shape (S, intermediate_dim)
|
|
186
|
+
|
|
187
|
+
# Apply activation
|
|
188
|
+
hidden_after_activation = self.act_fn(gate) * up # (S, intermediate_dim)
|
|
189
|
+
|
|
190
|
+
# --- Down projection per expert (grouped_mm) ---
|
|
191
|
+
out_per_sample_g = torch._grouped_mm(hidden_after_activation, self.down_proj.transpose(-2, -1), offs=offsets)
|
|
192
|
+
if hasattr(self, "down_proj_bias") and self.down_proj_bias is not None:
|
|
193
|
+
# we should be able to pass bias to the grouped_mm call, but it's still not fully supported
|
|
194
|
+
out_per_sample_g = out_per_sample_g + self.down_proj_bias[expert_ids_g]
|
|
195
|
+
|
|
196
|
+
# Apply routing weights
|
|
197
|
+
out_per_sample_g = out_per_sample_g * sample_weights_g.unsqueeze(-1)
|
|
198
|
+
|
|
199
|
+
# Restore original order
|
|
200
|
+
out_per_sample = out_per_sample_g[inv_perm]
|
|
201
|
+
|
|
202
|
+
# Accumulate results back to the final_hidden_states using original token indices
|
|
203
|
+
final_hidden_states.index_add_(0, token_idx, out_per_sample.to(final_hidden_states.dtype))
|
|
204
|
+
|
|
205
|
+
return final_hidden_states
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class ExpertsInterface(GeneralInterface):
|
|
209
|
+
"""Interface for registering custom experts implementations."""
|
|
210
|
+
|
|
211
|
+
_global_mapping = {
|
|
212
|
+
"batched_mm": batched_mm_experts_forward,
|
|
213
|
+
"grouped_mm": grouped_mm_experts_forward,
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
ALL_EXPERTS_FUNCTIONS = ExpertsInterface()
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def use_experts_implementation(experts_class: type[torch.nn.Module]) -> type[torch.nn.Module]:
|
|
221
|
+
original_init = experts_class.__init__
|
|
222
|
+
original_forward = experts_class.forward
|
|
223
|
+
|
|
224
|
+
@wraps(original_init)
|
|
225
|
+
def __init__(self, config, *args, **kwargs):
|
|
226
|
+
original_init(self, config, *args, **kwargs)
|
|
227
|
+
self.config = config
|
|
228
|
+
|
|
229
|
+
@wraps(original_forward)
|
|
230
|
+
def forward(self, *args, **kwargs):
|
|
231
|
+
experts_forward = original_forward
|
|
232
|
+
|
|
233
|
+
if self.config._experts_implementation != "eager":
|
|
234
|
+
experts_forward = ALL_EXPERTS_FUNCTIONS[self.config._experts_implementation]
|
|
235
|
+
|
|
236
|
+
return experts_forward(self, *args, **kwargs)
|
|
237
|
+
|
|
238
|
+
experts_class.__init__ = __init__
|
|
239
|
+
experts_class.forward = forward
|
|
240
|
+
return experts_class
|
|
@@ -12,24 +12,17 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from ..utils import
|
|
15
|
+
from ..utils import is_torch_available, is_torch_xpu_available, logging
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
if is_torch_available():
|
|
19
19
|
import torch
|
|
20
20
|
from torch import nn
|
|
21
|
+
from contextlib import contextmanager
|
|
21
22
|
from typing import Optional
|
|
22
23
|
|
|
23
24
|
from ..core_model_loading import ConversionOps
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
if is_accelerate_available():
|
|
27
|
-
from accelerate import init_empty_weights
|
|
28
|
-
|
|
29
|
-
import re
|
|
30
|
-
from contextlib import contextmanager
|
|
31
|
-
|
|
32
|
-
from ..quantizers.quantizers_utils import get_module_from_name
|
|
25
|
+
from ..quantizers.quantizers_utils import get_module_from_name, should_convert_module
|
|
33
26
|
|
|
34
27
|
|
|
35
28
|
logger = logging.get_logger(__name__)
|
|
@@ -436,15 +429,6 @@ def mlp_forward(self, hidden_states):
|
|
|
436
429
|
return routed_out, router_logits
|
|
437
430
|
|
|
438
431
|
|
|
439
|
-
def should_convert_module(current_key_name, patterns):
|
|
440
|
-
current_key_name_str = ".".join(current_key_name)
|
|
441
|
-
if not any(
|
|
442
|
-
re.match(f"{key}\\.", current_key_name_str) or re.match(f"{key}", current_key_name_str) for key in patterns
|
|
443
|
-
):
|
|
444
|
-
return True
|
|
445
|
-
return False
|
|
446
|
-
|
|
447
|
-
|
|
448
432
|
def dequantize(module, param_name, param_value, target_device, dq_param_name, **kwargs):
|
|
449
433
|
from ..integrations.tensor_parallel import shard_and_distribute_module
|
|
450
434
|
|
|
@@ -604,70 +588,40 @@ def swizzle_mxfp4_convertops(blocks, scales, module, proj, target_device, triton
|
|
|
604
588
|
)
|
|
605
589
|
|
|
606
590
|
|
|
607
|
-
def
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
):
|
|
615
|
-
|
|
616
|
-
|
|
591
|
+
def replace_with_mxfp4_linear(model, quantization_config=None, modules_to_not_convert: list[str] | None = None):
|
|
592
|
+
"""
|
|
593
|
+
Public method that replaces the expert layers of the given model with mxfp4 quantized layers.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
model (`torch.nn.Module`):
|
|
597
|
+
The model to convert, can be any `torch.nn.Module` instance.
|
|
598
|
+
quantization_config (`Mxfp4Config`, defaults to `None`):
|
|
599
|
+
The quantization config object that contains the quantization parameters.
|
|
600
|
+
modules_to_not_convert (`list`, *optional*, defaults to `None`):
|
|
601
|
+
A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
|
|
602
|
+
converted.
|
|
603
|
+
"""
|
|
604
|
+
if quantization_config.dequantize:
|
|
605
|
+
return model
|
|
606
|
+
|
|
607
|
+
from .hub_kernels import get_kernel
|
|
617
608
|
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
609
|
+
global triton_kernels_hub
|
|
610
|
+
triton_kernels_hub = get_kernel("kernels-community/triton_kernels")
|
|
611
|
+
|
|
612
|
+
has_been_replaced = False
|
|
613
|
+
for module_name, module in model.named_modules():
|
|
614
|
+
if not should_convert_module(module_name, modules_to_not_convert):
|
|
622
615
|
continue
|
|
623
616
|
if module.__class__.__name__ == "GptOssExperts" and not quantization_config.dequantize:
|
|
624
|
-
with
|
|
625
|
-
model.
|
|
617
|
+
with torch.device("meta"):
|
|
618
|
+
model.set_submodule(module_name, Mxfp4GptOssExperts(model.config))
|
|
626
619
|
has_been_replaced = True
|
|
627
620
|
if module.__class__.__name__ == "GptOssMLP" and not quantization_config.dequantize:
|
|
628
621
|
from types import MethodType
|
|
629
622
|
|
|
630
623
|
module.forward = MethodType(mlp_forward, module)
|
|
631
|
-
if len(list(module.children())) > 0:
|
|
632
|
-
_, has_been_replaced = _replace_with_mxfp4_linear(
|
|
633
|
-
module,
|
|
634
|
-
modules_to_not_convert,
|
|
635
|
-
current_key_name,
|
|
636
|
-
quantization_config,
|
|
637
|
-
has_been_replaced=has_been_replaced,
|
|
638
|
-
config=config,
|
|
639
|
-
)
|
|
640
|
-
current_key_name.pop(-1)
|
|
641
|
-
return model, has_been_replaced
|
|
642
|
-
|
|
643
624
|
|
|
644
|
-
def replace_with_mxfp4_linear(
|
|
645
|
-
model,
|
|
646
|
-
modules_to_not_convert=None,
|
|
647
|
-
current_key_name=None,
|
|
648
|
-
quantization_config=None,
|
|
649
|
-
config=None,
|
|
650
|
-
):
|
|
651
|
-
if quantization_config.dequantize:
|
|
652
|
-
return model
|
|
653
|
-
else:
|
|
654
|
-
from kernels import get_kernel
|
|
655
|
-
|
|
656
|
-
global triton_kernels_hub
|
|
657
|
-
triton_kernels_hub = get_kernel("kernels-community/triton_kernels")
|
|
658
|
-
|
|
659
|
-
modules_to_not_convert = ["lm_head"] if modules_to_not_convert is None else modules_to_not_convert
|
|
660
|
-
|
|
661
|
-
if quantization_config.modules_to_not_convert is not None:
|
|
662
|
-
modules_to_not_convert.extend(quantization_config.modules_to_not_convert)
|
|
663
|
-
modules_to_not_convert = list(set(modules_to_not_convert))
|
|
664
|
-
model, has_been_replaced = _replace_with_mxfp4_linear(
|
|
665
|
-
model,
|
|
666
|
-
modules_to_not_convert,
|
|
667
|
-
current_key_name,
|
|
668
|
-
quantization_config,
|
|
669
|
-
config=config,
|
|
670
|
-
)
|
|
671
625
|
if not has_been_replaced:
|
|
672
626
|
logger.warning(
|
|
673
627
|
"You are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model."
|
|
@@ -17,6 +17,7 @@ import json
|
|
|
17
17
|
import os
|
|
18
18
|
from typing import Any, Literal
|
|
19
19
|
|
|
20
|
+
from ..conversion_mapping import get_model_conversion_mapping
|
|
20
21
|
from ..core_model_loading import WeightRenaming, rename_source_key
|
|
21
22
|
from ..utils import (
|
|
22
23
|
CONFIG_NAME,
|
|
@@ -46,26 +47,6 @@ MIN_PEFT_VERSION = "0.18.0"
|
|
|
46
47
|
logger = logging.get_logger(__name__)
|
|
47
48
|
|
|
48
49
|
|
|
49
|
-
# DO NOT MODIFY, KEPT FOR BC ONLY
|
|
50
|
-
VLMS = [
|
|
51
|
-
"aria",
|
|
52
|
-
"ayavision",
|
|
53
|
-
"emu3",
|
|
54
|
-
"fuyu",
|
|
55
|
-
"gotocr2",
|
|
56
|
-
"gemma3",
|
|
57
|
-
"internvl",
|
|
58
|
-
"llava", # all llava prefixed models fall under this check
|
|
59
|
-
"mistral3",
|
|
60
|
-
"mllama",
|
|
61
|
-
"paligemma",
|
|
62
|
-
"qwen2vl",
|
|
63
|
-
"qwen2_5_vl",
|
|
64
|
-
"videollava",
|
|
65
|
-
"vipllava",
|
|
66
|
-
]
|
|
67
|
-
|
|
68
|
-
|
|
69
50
|
class PeftAdapterMixin:
|
|
70
51
|
"""
|
|
71
52
|
A class containing all functions for loading and using adapters weights that are supported in PEFT library. For
|
|
@@ -103,6 +84,7 @@ class PeftAdapterMixin:
|
|
|
103
84
|
low_cpu_mem_usage: bool = False,
|
|
104
85
|
is_trainable: bool = False,
|
|
105
86
|
hotswap: bool | Literal["auto"] = "auto",
|
|
87
|
+
local_files_only: bool = False,
|
|
106
88
|
adapter_kwargs: dict[str, Any] | None = None,
|
|
107
89
|
) -> None:
|
|
108
90
|
"""
|
|
@@ -211,11 +193,10 @@ class PeftAdapterMixin:
|
|
|
211
193
|
if any(conf.peft_type != PeftType.LORA for conf in self.peft_config.values()):
|
|
212
194
|
raise ValueError("Hotswapping is currently only supported for LoRA, please set `hotswap=False`.")
|
|
213
195
|
|
|
196
|
+
key_mapping = adapter_kwargs.pop("key_mapping", None) if adapter_kwargs is not None else None
|
|
197
|
+
weight_conversions = get_model_conversion_mapping(self, key_mapping=key_mapping)
|
|
214
198
|
# peft only supports low_cpu_mem_usage starting from v0.13.0
|
|
215
199
|
peft_load_kwargs = {}
|
|
216
|
-
key_mapping = adapter_kwargs.pop("key_mapping", None) if adapter_kwargs is not None else None
|
|
217
|
-
if key_mapping is None and any(allowed_name in self.__class__.__name__.lower() for allowed_name in VLMS):
|
|
218
|
-
key_mapping = self._checkpoint_conversion_mapping
|
|
219
200
|
peft_load_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
|
|
220
201
|
|
|
221
202
|
adapter_name = adapter_name if adapter_name is not None else "default"
|
|
@@ -263,6 +244,7 @@ class PeftAdapterMixin:
|
|
|
263
244
|
adapter_config_file = find_adapter_config_file(
|
|
264
245
|
peft_model_id,
|
|
265
246
|
token=token,
|
|
247
|
+
local_files_only=local_files_only,
|
|
266
248
|
**adapter_kwargs,
|
|
267
249
|
)
|
|
268
250
|
|
|
@@ -275,13 +257,11 @@ class PeftAdapterMixin:
|
|
|
275
257
|
peft_config = PeftConfig.from_pretrained(
|
|
276
258
|
peft_model_id,
|
|
277
259
|
token=token,
|
|
260
|
+
local_files_only=local_files_only,
|
|
278
261
|
**adapter_kwargs,
|
|
279
262
|
)
|
|
280
263
|
peft_config.inference_mode = not is_trainable
|
|
281
264
|
|
|
282
|
-
if peft_config.peft_type != PeftType.LORA:
|
|
283
|
-
raise ValueError("Hotswapping is currently only supported for LoRA, please set `hotswap=False`.")
|
|
284
|
-
|
|
285
265
|
if not hotswap:
|
|
286
266
|
# TODO: WE NEED TOO APPLY OUR DYNAMIC WEIGHT CONVERSION AT SOME POINT HERE!
|
|
287
267
|
# Create and add fresh new adapters into the model, unless the weights are hotswapped
|
|
@@ -291,21 +271,24 @@ class PeftAdapterMixin:
|
|
|
291
271
|
self._hf_peft_config_loaded = True
|
|
292
272
|
|
|
293
273
|
if peft_model_id is not None:
|
|
274
|
+
if "local_files_only" not in adapter_kwargs:
|
|
275
|
+
adapter_kwargs["local_files_only"] = local_files_only
|
|
294
276
|
adapter_state_dict = load_peft_weights(peft_model_id, token=token, device=device, **adapter_kwargs)
|
|
295
277
|
|
|
296
278
|
# We need to pre-process the state dict to remove unneeded prefixes - for backward compatibility
|
|
297
279
|
renamings = []
|
|
298
|
-
if
|
|
299
|
-
renamings = [entry for entry in
|
|
280
|
+
if weight_conversions:
|
|
281
|
+
renamings = [entry for entry in weight_conversions if isinstance(entry, WeightRenaming)]
|
|
300
282
|
processed_adapter_state_dict = {}
|
|
301
283
|
prefix = "base_model.model."
|
|
284
|
+
state_dict = self.state_dict()
|
|
302
285
|
for key, value in adapter_state_dict.items():
|
|
303
286
|
if key.startswith(prefix):
|
|
304
287
|
new_key = key[len(prefix) :]
|
|
305
288
|
else:
|
|
306
289
|
new_key = key
|
|
307
290
|
|
|
308
|
-
new_key = rename_source_key(new_key, renamings, [])[0]
|
|
291
|
+
new_key = rename_source_key(new_key, renamings, [], self.base_model_prefix, state_dict)[0]
|
|
309
292
|
|
|
310
293
|
# For hotswapping, we need the adapter name to be present in the state dict keys
|
|
311
294
|
if hotswap:
|
|
@@ -12,21 +12,57 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from ..
|
|
15
|
+
from ..core_model_loading import ConversionOps
|
|
16
|
+
from ..quantizers.quantizers_utils import get_module_from_name, should_convert_module
|
|
17
|
+
from ..utils import is_torch_available, logging
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
if is_torch_available():
|
|
19
21
|
import torch
|
|
22
|
+
import torch.nn as nn
|
|
20
23
|
|
|
21
24
|
logger = logging.get_logger(__name__)
|
|
22
25
|
|
|
23
26
|
|
|
27
|
+
class QuantoQuantize(ConversionOps):
|
|
28
|
+
def __init__(self, hf_quantizer):
|
|
29
|
+
self.hf_quantizer = hf_quantizer
|
|
30
|
+
|
|
31
|
+
def convert(
|
|
32
|
+
self,
|
|
33
|
+
input_dict: dict[str, list[torch.Tensor]],
|
|
34
|
+
model: torch.nn.Module | None = None,
|
|
35
|
+
full_layer_name: str | None = None,
|
|
36
|
+
missing_keys: list[str] | None = None,
|
|
37
|
+
**kwargs,
|
|
38
|
+
) -> dict[str, torch.Tensor]:
|
|
39
|
+
_, value = tuple(input_dict.items())[0]
|
|
40
|
+
value = value[0]
|
|
41
|
+
|
|
42
|
+
from ..modeling_utils import _load_parameter_into_model
|
|
43
|
+
|
|
44
|
+
_load_parameter_into_model(model, full_layer_name, value)
|
|
45
|
+
module, _ = get_module_from_name(model, full_layer_name)
|
|
46
|
+
# Need to set those to a specific value, otherwise they will remain on meta device ...
|
|
47
|
+
module.input_scale = torch.ones(module.input_scale.shape)
|
|
48
|
+
module.output_scale = torch.ones(module.output_scale.shape)
|
|
49
|
+
# quantize
|
|
50
|
+
module.freeze()
|
|
51
|
+
module.weight.requires_grad = False
|
|
52
|
+
module._is_hf_initialized = True
|
|
53
|
+
|
|
54
|
+
# need to discard some missing keys we already updated the module in freeze.
|
|
55
|
+
module_name = full_layer_name.rsplit(".", 1)[0]
|
|
56
|
+
missing_keys.discard(f"{module_name}.weight")
|
|
57
|
+
missing_keys.discard(f"{module_name}.input_scale")
|
|
58
|
+
missing_keys.discard(f"{module_name}.output_scale")
|
|
59
|
+
return {}
|
|
60
|
+
|
|
61
|
+
|
|
24
62
|
def replace_with_quanto_layers(
|
|
25
63
|
model,
|
|
26
64
|
quantization_config=None,
|
|
27
|
-
modules_to_not_convert=None,
|
|
28
|
-
current_key_name=None,
|
|
29
|
-
has_been_replaced=False,
|
|
65
|
+
modules_to_not_convert: list[str] | None = None,
|
|
30
66
|
):
|
|
31
67
|
"""
|
|
32
68
|
Public method that recursively replaces the Linear layers of the given model with Quanto quantized layers.
|
|
@@ -35,64 +71,49 @@ def replace_with_quanto_layers(
|
|
|
35
71
|
Args:
|
|
36
72
|
model (`torch.nn.Module`):
|
|
37
73
|
The model to convert, can be any `torch.nn.Module` instance.
|
|
38
|
-
quantization_config (`
|
|
74
|
+
quantization_config (`QuantoConfig`, defaults to `None`):
|
|
39
75
|
The quantization config object that contains the quantization parameters.
|
|
40
76
|
modules_to_not_convert (`list`, *optional*, defaults to `None`):
|
|
41
77
|
A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
|
|
42
78
|
converted.
|
|
43
|
-
current_key_name (`list`, *optional*, defaults to `None`):
|
|
44
|
-
A list that contains the current key name. This is used for recursion and should not be passed by the user.
|
|
45
|
-
has_been_replaced (`bool`, *optional*, defaults to `None`):
|
|
46
|
-
A boolean that indicates if the conversion has been successful or not. This is used for recursion and
|
|
47
|
-
should not be passed by the user.
|
|
48
79
|
"""
|
|
49
|
-
from
|
|
50
|
-
|
|
51
|
-
if is_optimum_quanto_available():
|
|
52
|
-
from optimum.quanto import QLayerNorm, QLinear, qfloat8, qint2, qint4, qint8
|
|
80
|
+
from optimum.quanto import QLayerNorm, QLinear, qfloat8, qint2, qint4, qint8
|
|
53
81
|
|
|
54
82
|
w_mapping = {"float8": qfloat8, "int8": qint8, "int4": qint4, "int2": qint2}
|
|
55
83
|
a_mapping = {None: None, "float8": qfloat8, "int8": qint8}
|
|
56
84
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
modules_to_not_convert=modules_to_not_convert,
|
|
93
|
-
current_key_name=current_key_name,
|
|
94
|
-
has_been_replaced=has_been_replaced,
|
|
95
|
-
)
|
|
96
|
-
# Remove the last key for recursion
|
|
97
|
-
current_key_name.pop(-1)
|
|
98
|
-
return model, has_been_replaced
|
|
85
|
+
has_been_replaced = False
|
|
86
|
+
for module_name, module in model.named_modules():
|
|
87
|
+
if not should_convert_module(module_name, modules_to_not_convert):
|
|
88
|
+
continue
|
|
89
|
+
with torch.device("meta"):
|
|
90
|
+
new_module = None
|
|
91
|
+
if isinstance(module, nn.Linear):
|
|
92
|
+
new_module = QLinear(
|
|
93
|
+
in_features=module.in_features,
|
|
94
|
+
out_features=module.out_features,
|
|
95
|
+
bias=module.bias is not None,
|
|
96
|
+
dtype=module.weight.dtype,
|
|
97
|
+
weights=w_mapping[quantization_config.weights],
|
|
98
|
+
activations=a_mapping[quantization_config.activations],
|
|
99
|
+
)
|
|
100
|
+
elif isinstance(module, torch.nn.LayerNorm) and quantization_config.activations is not None:
|
|
101
|
+
new_module = QLayerNorm(
|
|
102
|
+
module.normalized_shape,
|
|
103
|
+
module.eps,
|
|
104
|
+
module.elementwise_affine,
|
|
105
|
+
module.bias is not None,
|
|
106
|
+
activations=a_mapping[quantization_config.activations],
|
|
107
|
+
)
|
|
108
|
+
if new_module is not None:
|
|
109
|
+
has_been_replaced = True
|
|
110
|
+
model.set_submodule(module_name, new_module)
|
|
111
|
+
|
|
112
|
+
if not has_been_replaced:
|
|
113
|
+
logger.warning(
|
|
114
|
+
"You are loading your model using quanto but no linear modules were found in your model."
|
|
115
|
+
" Please double check your model architecture, or submit an issue on github if you think this is"
|
|
116
|
+
" a bug."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return model
|