transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -13,14 +13,12 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import importlib
|
|
15
15
|
import re
|
|
16
|
-
import types
|
|
17
|
-
from collections import defaultdict
|
|
18
16
|
from typing import TYPE_CHECKING
|
|
19
17
|
|
|
20
18
|
from packaging import version
|
|
21
19
|
|
|
22
20
|
from .base import HfQuantizer
|
|
23
|
-
from .quantizers_utils import get_module_from_name
|
|
21
|
+
from .quantizers_utils import get_module_from_name, should_convert_module
|
|
24
22
|
|
|
25
23
|
|
|
26
24
|
if TYPE_CHECKING:
|
|
@@ -37,17 +35,12 @@ if is_torch_available():
|
|
|
37
35
|
|
|
38
36
|
if is_torch_available():
|
|
39
37
|
import torch
|
|
40
|
-
import torch.nn as nn
|
|
41
38
|
|
|
42
39
|
if is_torchao_available():
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.14.0"):
|
|
40
|
+
if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.15.0"):
|
|
46
41
|
from torchao.prototype.safetensors.safetensors_support import (
|
|
47
42
|
flatten_tensor_state_dict,
|
|
48
|
-
unflatten_tensor_state_dict,
|
|
49
43
|
)
|
|
50
|
-
from torchao.prototype.safetensors.safetensors_utils import is_metadata_torchao
|
|
51
44
|
|
|
52
45
|
|
|
53
46
|
logger = logging.get_logger(__name__)
|
|
@@ -88,11 +81,6 @@ def _linear_extra_repr(self):
|
|
|
88
81
|
|
|
89
82
|
|
|
90
83
|
if is_torchao_available():
|
|
91
|
-
SUPPORTED_SAFE_SERIALIZATION_CONFIGS = [
|
|
92
|
-
torchao.quantization.Float8WeightOnlyConfig,
|
|
93
|
-
torchao.quantization.Float8DynamicActivationFloat8WeightConfig,
|
|
94
|
-
]
|
|
95
|
-
|
|
96
84
|
TORCHAO_VERSION = version.parse(importlib.metadata.version("torchao"))
|
|
97
85
|
|
|
98
86
|
|
|
@@ -101,26 +89,24 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
101
89
|
Quantizer for torchao: https://github.com/pytorch/ao/
|
|
102
90
|
"""
|
|
103
91
|
|
|
104
|
-
requires_parameters_quantization = True
|
|
105
92
|
requires_calibration = False
|
|
106
|
-
required_packages = ["torchao"]
|
|
107
93
|
|
|
108
94
|
def __init__(self, quantization_config, **kwargs):
|
|
109
95
|
super().__init__(quantization_config, **kwargs)
|
|
110
96
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
97
|
+
self.quantized_param_size = None
|
|
98
|
+
quant_type = self.quantization_config.quant_type
|
|
99
|
+
if isinstance(quant_type, str):
|
|
100
|
+
map_to_param_size = {
|
|
101
|
+
"int4_weight_only": 0.5,
|
|
102
|
+
"int8_weight_only": 1,
|
|
103
|
+
"int8_dynamic_activation_int8_weight": 1,
|
|
104
|
+
}
|
|
105
|
+
if quant_type in map_to_param_size:
|
|
106
|
+
self.quantized_param_size = map_to_param_size[quant_type]
|
|
120
107
|
else:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
self.full_ao_keys = self.weight_ao_keys + ["_data"]
|
|
108
|
+
size_digit = fuzzy_match_size(quant_type.__class__.__name__)
|
|
109
|
+
self.quantized_param_size = 0.5 if size_digit == "4" else 1
|
|
124
110
|
|
|
125
111
|
def validate_environment(self, *args, **kwargs):
|
|
126
112
|
if not is_torchao_available():
|
|
@@ -148,84 +134,39 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
148
134
|
|
|
149
135
|
def update_dtype(self, dtype):
|
|
150
136
|
if self.quantization_config.quant_type == "int4_weight_only":
|
|
151
|
-
if dtype
|
|
137
|
+
if dtype != torch.bfloat16:
|
|
152
138
|
logger.warning_once(
|
|
153
|
-
f"Setting dtype to {dtype} for int4_weight_only quantization, but only bfloat16 is supported right now.
|
|
154
|
-
)
|
|
155
|
-
if dtype is None:
|
|
156
|
-
logger.warning_once(
|
|
157
|
-
"Setting dtype to torch.bfloat16 for int4_weight_only quantization since only bfloat16 is supported right now. Please set dtype=torch.bfloat16 to remove this warning."
|
|
139
|
+
f"Setting dtype to {dtype} for int4_weight_only quantization, but only bfloat16 is supported right now. Overwriting torch_dtype to bfloat16."
|
|
158
140
|
)
|
|
159
141
|
dtype = torch.bfloat16
|
|
160
|
-
if self.quantization_config.quant_type == "int8_dynamic_activation_int8_weight":
|
|
161
|
-
if dtype is None:
|
|
162
|
-
logger.info(
|
|
163
|
-
"Setting dtype to torch.float32 for int8_dynamic_activation_int8_weight quantization as no dtype was specified in from_pretrained"
|
|
164
|
-
)
|
|
165
|
-
# we need to set the dtype, otherwise we have dtype mismatch when performing the quantized linear op
|
|
166
|
-
dtype = torch.float32
|
|
167
142
|
return dtype
|
|
168
143
|
|
|
169
|
-
def get_state_dict_and_metadata(self, model
|
|
144
|
+
def get_state_dict_and_metadata(self, model):
|
|
170
145
|
"""
|
|
171
|
-
|
|
172
|
-
the safetensors format.
|
|
146
|
+
We flatten the state dict of tensor subclasses so that it is compatible with the safetensors format.
|
|
173
147
|
"""
|
|
174
|
-
if
|
|
175
|
-
|
|
176
|
-
return flatten_tensor_state_dict(model.state_dict())
|
|
177
|
-
else:
|
|
178
|
-
raise RuntimeError(
|
|
179
|
-
f"In order to use safetensors with torchao, please use torchao version >= 0.14.0. Current version: {TORCHAO_VERSION}"
|
|
180
|
-
)
|
|
181
|
-
else:
|
|
182
|
-
return None, {}
|
|
183
|
-
|
|
184
|
-
def adjust_target_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
185
|
-
from accelerate.utils import CustomDtype
|
|
186
|
-
|
|
187
|
-
# Import AOBaseConfig directly since we know we have the right version
|
|
188
|
-
if self.quantization_config._get_ao_version() > version.Version("0.9.0"):
|
|
189
|
-
from torchao.core.config import AOBaseConfig
|
|
190
|
-
|
|
191
|
-
quant_type = self.quantization_config.quant_type
|
|
192
|
-
if isinstance(quant_type, AOBaseConfig):
|
|
193
|
-
# Extract size digit using fuzzy match on the class name
|
|
194
|
-
config_name = quant_type.__class__.__name__
|
|
195
|
-
size_digit = fuzzy_match_size(config_name)
|
|
196
|
-
|
|
197
|
-
# Map the extracted digit to appropriate dtype
|
|
198
|
-
if size_digit == "4":
|
|
199
|
-
return CustomDtype.INT4
|
|
200
|
-
else:
|
|
201
|
-
# Default to int8
|
|
202
|
-
return torch.int8
|
|
203
|
-
|
|
204
|
-
# Original mapping for non-AOBaseConfig types
|
|
205
|
-
map_to_target_dtype = {
|
|
206
|
-
"int4_weight_only": CustomDtype.INT4,
|
|
207
|
-
"int8_weight_only": torch.int8,
|
|
208
|
-
"int8_dynamic_activation_int8_weight": torch.int8,
|
|
209
|
-
"autoquant": None,
|
|
210
|
-
}
|
|
211
|
-
return map_to_target_dtype[self.quantization_config.quant_type]
|
|
148
|
+
if TORCHAO_VERSION >= version.parse("0.15.0"):
|
|
149
|
+
return flatten_tensor_state_dict(model.state_dict())
|
|
212
150
|
else:
|
|
213
|
-
raise
|
|
214
|
-
"
|
|
215
|
-
" the appropriate device map, you should upgrade your `accelerate` library with "
|
|
216
|
-
"`pip install --upgrade accelerate`"
|
|
151
|
+
raise RuntimeError(
|
|
152
|
+
f"In order to use safetensors with torchao, please use torchao version >= 0.15.0. Current version: {TORCHAO_VERSION}"
|
|
217
153
|
)
|
|
218
154
|
|
|
155
|
+
def param_element_size(self, model: "PreTrainedModel", param_name: str, param: "torch.Tensor") -> float:
|
|
156
|
+
"Return the element size (in bytes) for `param_name`."
|
|
157
|
+
if self.param_needs_quantization(model, param_name) and self.quantized_param_size is not None:
|
|
158
|
+
return self.quantized_param_size
|
|
159
|
+
|
|
160
|
+
return super().param_element_size(model, param_name, param)
|
|
161
|
+
|
|
219
162
|
def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]:
|
|
220
163
|
# need more space for the quantization parameters (e.g. scale). Tested with int4 wo and group size = 128
|
|
221
164
|
max_memory = {key: val * 0.9 for key, val in max_memory.items()}
|
|
222
165
|
return max_memory
|
|
223
166
|
|
|
224
|
-
def _process_model_before_weight_loading(
|
|
225
|
-
self, model: "PreTrainedModel", keep_in_fp32_modules: list[str] | None = None, **kwargs
|
|
226
|
-
):
|
|
167
|
+
def _process_model_before_weight_loading(self, model: "PreTrainedModel", checkpoint_files=None, **kwargs):
|
|
227
168
|
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
228
|
-
model, self.quantization_config.modules_to_not_convert,
|
|
169
|
+
model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules
|
|
229
170
|
)
|
|
230
171
|
if self.quantization_config.include_input_output_embeddings:
|
|
231
172
|
input_emb = model.get_input_embeddings()
|
|
@@ -235,22 +176,17 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
235
176
|
self.modules_to_not_convert = [
|
|
236
177
|
x for x in self.modules_to_not_convert if x not in input_emb_names + output_emb_names
|
|
237
178
|
]
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
return [k for k in unexpected_keys if not any(k.endswith(x) for x in self.full_ao_keys)]
|
|
179
|
+
if checkpoint_files is not None:
|
|
180
|
+
# Torchao needs access to all metadata later
|
|
181
|
+
self.set_metadata(checkpoint_files)
|
|
242
182
|
|
|
243
183
|
def param_needs_quantization(self, model: "PreTrainedModel", param_name: str, **kwargs) -> bool:
|
|
244
|
-
if self.pre_quantized:
|
|
245
|
-
return False
|
|
246
184
|
if self.quantization_config.quant_type == "autoquant":
|
|
247
185
|
return False
|
|
248
186
|
|
|
249
187
|
# check if the param_name is not in self.modules_to_not_convert
|
|
250
|
-
if
|
|
188
|
+
if not should_convert_module(param_name, self.modules_to_not_convert):
|
|
251
189
|
return False
|
|
252
|
-
elif any(param_name.endswith(f":{x}") for x in self.full_ao_keys):
|
|
253
|
-
return True
|
|
254
190
|
|
|
255
191
|
# we only quantize the weight of nn.Linear and nn.Embedding
|
|
256
192
|
module, tensor_name = get_module_from_name(model, param_name)
|
|
@@ -276,164 +212,6 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
276
212
|
|
|
277
213
|
return isinstance(module, tuple(_QUANTIZABLE)) and tensor_name == "weight"
|
|
278
214
|
|
|
279
|
-
def create_quantized_param(
|
|
280
|
-
self,
|
|
281
|
-
model: "PreTrainedModel",
|
|
282
|
-
param_value: "torch.Tensor",
|
|
283
|
-
param_name: str,
|
|
284
|
-
target_device: "torch.device",
|
|
285
|
-
**kwargs,
|
|
286
|
-
):
|
|
287
|
-
"""
|
|
288
|
-
Each nn.Linear layer that needs to be quantized is processed here.
|
|
289
|
-
First, we set the value the weight tensor, then we move it to the target device. Finally, we quantize the module.
|
|
290
|
-
"""
|
|
291
|
-
from torchao.quantization import quantize_
|
|
292
|
-
|
|
293
|
-
full_name = param_name
|
|
294
|
-
# Those are the pre quantized weights
|
|
295
|
-
if ":" in param_name:
|
|
296
|
-
param_name = param_name.rsplit(":", 1)[0]
|
|
297
|
-
module, tensor_name = get_module_from_name(model, param_name)
|
|
298
|
-
|
|
299
|
-
if self.pre_quantized:
|
|
300
|
-
# If it's a bias, no need to do anything special (except removing the ":_data" part of the key, but was
|
|
301
|
-
# already done) - if it's unsafe-serialized (i.e. not safetensors), not need for anything either
|
|
302
|
-
is_unsafe_serialization = ":" not in full_name
|
|
303
|
-
if tensor_name == "bias" or is_unsafe_serialization:
|
|
304
|
-
module._parameters[tensor_name] = torch.nn.Parameter(
|
|
305
|
-
param_value.to(target_device), requires_grad=param_value.requires_grad
|
|
306
|
-
)
|
|
307
|
-
return
|
|
308
|
-
# Sanity check for the new serialization format
|
|
309
|
-
elif not (TORCHAO_VERSION >= version.parse("0.14.0") and is_metadata_torchao(self.metadata)):
|
|
310
|
-
raise ValueError("To use `safetensors` serialization, you should have `torchao>=0.14.0` installed")
|
|
311
|
-
|
|
312
|
-
# Save the states for later quantization when they are all gathered
|
|
313
|
-
if not hasattr(self, "ao_params"):
|
|
314
|
-
self.ao_params = defaultdict(dict)
|
|
315
|
-
self.ao_params[param_name].update({full_name: param_value})
|
|
316
|
-
|
|
317
|
-
# We are ready for quantization in this case (we retrieved all the needed keys)
|
|
318
|
-
if len(self.ao_params[param_name]) == len(self.weight_ao_keys):
|
|
319
|
-
new_param = unflatten_tensor_state_dict(self.ao_params[param_name], self.metadata)[param_name]
|
|
320
|
-
# Set it
|
|
321
|
-
module._parameters[tensor_name] = torch.nn.Parameter(
|
|
322
|
-
new_param.to(target_device), requires_grad=new_param.requires_grad
|
|
323
|
-
)
|
|
324
|
-
|
|
325
|
-
# Free memory
|
|
326
|
-
del self.ao_params[param_name]
|
|
327
|
-
|
|
328
|
-
# Add repr to the module
|
|
329
|
-
if isinstance(module, nn.Linear):
|
|
330
|
-
module.extra_repr = types.MethodType(_linear_extra_repr, module)
|
|
331
|
-
else:
|
|
332
|
-
module._parameters[tensor_name] = torch.nn.Parameter(
|
|
333
|
-
param_value, requires_grad=param_value.requires_grad
|
|
334
|
-
).to(target_device)
|
|
335
|
-
# if we are quantizing tied parameters, to avoid tying the quantized weights
|
|
336
|
-
# the correct order to do it is
|
|
337
|
-
# 1. load the weight to model
|
|
338
|
-
# 2. run tie_weights to populate the weights
|
|
339
|
-
# 3. quantize
|
|
340
|
-
input_embed = model.get_input_embeddings()
|
|
341
|
-
if self.quantization_config.untie_embedding_weights and id(module) == id(input_embed):
|
|
342
|
-
model.tie_weights()
|
|
343
|
-
setattr(model.config.get_text_config(decoder=True), "tie_word_embeddings", False)
|
|
344
|
-
|
|
345
|
-
# handle FqnToConfig, introduced in torchao 0.15.0+
|
|
346
|
-
if self.quantization_config._get_ao_version() >= version.Version("0.15.0"):
|
|
347
|
-
from torchao.quantization import FqnToConfig
|
|
348
|
-
|
|
349
|
-
config = self.quantization_config.get_apply_tensor_subclass()
|
|
350
|
-
if isinstance(config, FqnToConfig):
|
|
351
|
-
module_fqn, top_level_param_name = param_name.rsplit(".", 1)
|
|
352
|
-
c = None
|
|
353
|
-
if param_name in config.fqn_to_config:
|
|
354
|
-
assert not module_fqn.startswith("re:"), (
|
|
355
|
-
"param fqn should not start with`re:`, which is used for specifying regex"
|
|
356
|
-
)
|
|
357
|
-
c = config.module_fqn_to_config[param_name]
|
|
358
|
-
elif module_fqn in config.fqn_to_config:
|
|
359
|
-
assert not module_fqn.startswith("re:"), (
|
|
360
|
-
"module fqn should not start with`re:`, which is used for specifying regex"
|
|
361
|
-
)
|
|
362
|
-
c = config.module_fqn_to_config[module_fqn]
|
|
363
|
-
# regex match module and param
|
|
364
|
-
else:
|
|
365
|
-
for maybe_module_fqn_pattern in config.fqn_to_config:
|
|
366
|
-
# if key doesn't start with re, it is an exact fqn key, so we don't regex match
|
|
367
|
-
if not maybe_module_fqn_pattern.startswith("re:"):
|
|
368
|
-
continue
|
|
369
|
-
# see if param matches first
|
|
370
|
-
elif re.fullmatch(maybe_module_fqn_pattern[3:], param_name):
|
|
371
|
-
c = config.module_fqn_to_config[maybe_module_fqn_pattern]
|
|
372
|
-
break
|
|
373
|
-
elif re.fullmatch(maybe_module_fqn_pattern[3:], module_fqn):
|
|
374
|
-
# we'll apply the config for first fully matched pattern
|
|
375
|
-
c = config.module_fqn_to_config[maybe_module_fqn_pattern]
|
|
376
|
-
break
|
|
377
|
-
else:
|
|
378
|
-
c = config.module_fqn_to_config.get("_default", None)
|
|
379
|
-
|
|
380
|
-
if c is not None:
|
|
381
|
-
if top_level_param_name == "weight":
|
|
382
|
-
# we can apply the module config directly
|
|
383
|
-
quantize_(module, c, (lambda x, fqn: True))
|
|
384
|
-
else:
|
|
385
|
-
# need to apply to custom param name
|
|
386
|
-
custom_param_fqn_config = FqnToConfig({top_level_param_name: c})
|
|
387
|
-
quantize_(module, custom_param_fqn_config, filter_fn=None)
|
|
388
|
-
return
|
|
389
|
-
|
|
390
|
-
# handle ModuleFqnToConfig, introduced in torchao 0.12.0+
|
|
391
|
-
# TODO deprecate this when we deprecate ModuleFqnToConfig
|
|
392
|
-
elif self.quantization_config._get_ao_version() >= version.Version("0.12.0"):
|
|
393
|
-
from torchao.quantization import ModuleFqnToConfig
|
|
394
|
-
|
|
395
|
-
config = self.quantization_config.get_apply_tensor_subclass()
|
|
396
|
-
if isinstance(config, ModuleFqnToConfig):
|
|
397
|
-
module_fqn, _ = param_name.rsplit(".", 1)
|
|
398
|
-
c = None
|
|
399
|
-
if module_fqn in config.module_fqn_to_config:
|
|
400
|
-
assert not module_fqn.startswith("re:"), (
|
|
401
|
-
"module fqn should not start with`re:`, which is used for specifying regex"
|
|
402
|
-
)
|
|
403
|
-
c = config.module_fqn_to_config[module_fqn]
|
|
404
|
-
else:
|
|
405
|
-
for maybe_module_fqn_pattern in config.module_fqn_to_config:
|
|
406
|
-
if not maybe_module_fqn_pattern.startswith("re:"):
|
|
407
|
-
continue
|
|
408
|
-
elif re.fullmatch(maybe_module_fqn_pattern[3:], module_fqn):
|
|
409
|
-
# we'll apply the config for first fully matched pattern
|
|
410
|
-
c = config.module_fqn_to_config[maybe_module_fqn_pattern]
|
|
411
|
-
break
|
|
412
|
-
else:
|
|
413
|
-
c = config.module_fqn_to_config.get("_default", None)
|
|
414
|
-
if c is not None:
|
|
415
|
-
# filter_fn: not filtering out any modules
|
|
416
|
-
quantize_(module, c, filter_fn=lambda x, fqn: True)
|
|
417
|
-
return
|
|
418
|
-
|
|
419
|
-
quantize_(module, self.quantization_config.get_apply_tensor_subclass())
|
|
420
|
-
|
|
421
|
-
def preprocess_model(self, model: "PreTrainedModel", config, dtype=None, checkpoint_files=None, **kwargs):
|
|
422
|
-
"""
|
|
423
|
-
Setting model attributes and/or converting model before weights loading. At this point
|
|
424
|
-
the model should be initialized on the meta device so you can freely manipulate the skeleton
|
|
425
|
-
of the model in order to replace modules in-place. Make sure to override the abstract method `_process_model_before_weight_loading`.
|
|
426
|
-
|
|
427
|
-
Args:
|
|
428
|
-
model (`~transformers.PreTrainedModel`):
|
|
429
|
-
The model to quantize
|
|
430
|
-
kwargs (`dict`, *optional*):
|
|
431
|
-
The keyword arguments that are passed along `_process_model_before_weight_loading`.
|
|
432
|
-
"""
|
|
433
|
-
super().preprocess_model(model, config, dtype, checkpoint_files, **kwargs)
|
|
434
|
-
# Torchao needs access to all metadata later
|
|
435
|
-
self.set_metadata(checkpoint_files)
|
|
436
|
-
|
|
437
215
|
def _process_model_after_weight_loading(self, model, **kwargs):
|
|
438
216
|
"""No process required for torchao quantized model"""
|
|
439
217
|
if self.quantization_config.quant_type == "autoquant":
|
|
@@ -450,71 +228,15 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
450
228
|
return model
|
|
451
229
|
return
|
|
452
230
|
|
|
453
|
-
def is_serializable(self
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
self.quantization_config.quant_type
|
|
457
|
-
) in SUPPORTED_SAFE_SERIALIZATION_CONFIGS and TORCHAO_VERSION >= version.parse("0.14.0")
|
|
458
|
-
if not _is_torchao_serializable:
|
|
459
|
-
logger.warning(
|
|
460
|
-
f"torchao quantized model only supports safe serialization for {SUPPORTED_SAFE_SERIALIZATION_CONFIGS}, \
|
|
461
|
-
and torchao version >= 0.14.0, please set `safe_serialization` to False for \
|
|
462
|
-
{type(self.quantization_config.quant_type)} and {TORCHAO_VERSION}."
|
|
463
|
-
)
|
|
464
|
-
return _is_torchao_serializable
|
|
465
|
-
|
|
466
|
-
_is_torchao_serializable = version.parse(importlib.metadata.version("huggingface_hub")) >= version.parse(
|
|
467
|
-
"0.25.0"
|
|
468
|
-
)
|
|
469
|
-
if not _is_torchao_serializable:
|
|
470
|
-
logger.warning("torchao quantized model is only serializable after huggingface_hub >= 0.25.0 ")
|
|
471
|
-
if self.offload and self.quantization_config.modules_to_not_convert is None:
|
|
231
|
+
def is_serializable(self) -> bool:
|
|
232
|
+
_is_torchao_serializable = TORCHAO_VERSION >= version.parse("0.15.0")
|
|
233
|
+
if not TORCHAO_VERSION >= version.parse("0.15.0"):
|
|
472
234
|
logger.warning(
|
|
473
|
-
"
|
|
474
|
-
"
|
|
235
|
+
"torchao quantized model only supports serialization for torchao version >= 0.15.0, please upgrade "
|
|
236
|
+
"your version to save the quantized model"
|
|
475
237
|
)
|
|
476
|
-
return False
|
|
477
238
|
return _is_torchao_serializable
|
|
478
239
|
|
|
479
|
-
def get_accelerator_warm_up_factor(self):
|
|
480
|
-
"""
|
|
481
|
-
This factor is used in caching_allocator_warmup to determine how many bytes to pre-allocate for accelerator warmup.
|
|
482
|
-
- A factor of 2 means we pre-allocate the full memory footprint of the model.
|
|
483
|
-
- A factor of 4 means we pre-allocate half of that, and so on
|
|
484
|
-
|
|
485
|
-
However, when using TorchAO, calculating memory usage with param.numel() * param.element_size() doesn't give the correct size for quantized weights (like int4 or int8)
|
|
486
|
-
That's because TorchAO internally represents quantized tensors using subtensors and metadata, and the reported element_size() still corresponds to the dtype
|
|
487
|
-
not the actual bit-width of the quantized data.
|
|
488
|
-
|
|
489
|
-
To correct for this:
|
|
490
|
-
- Use a division factor of 8 for int4 weights
|
|
491
|
-
- Use a division factor of 4 for int8 weights
|
|
492
|
-
"""
|
|
493
|
-
if self.quantization_config._get_ao_version() > version.Version("0.9.0"):
|
|
494
|
-
from torchao.core.config import AOBaseConfig
|
|
495
|
-
|
|
496
|
-
quant_type = self.quantization_config.quant_type
|
|
497
|
-
# For autoquant case, it will be treated in the string implementation below in map_to_target_dtype
|
|
498
|
-
if isinstance(quant_type, AOBaseConfig):
|
|
499
|
-
# Extract size digit using fuzzy match on the class name
|
|
500
|
-
config_name = quant_type.__class__.__name__
|
|
501
|
-
size_digit = fuzzy_match_size(config_name)
|
|
502
|
-
|
|
503
|
-
if size_digit == "4":
|
|
504
|
-
return 8
|
|
505
|
-
else:
|
|
506
|
-
return 4
|
|
507
|
-
|
|
508
|
-
# Original mapping for non-AOBaseConfig types
|
|
509
|
-
map_to_target_dtype = {
|
|
510
|
-
"int4_weight_only": 8,
|
|
511
|
-
"int8_weight_only": 4,
|
|
512
|
-
"int8_dynamic_activation_int8_weight": 4,
|
|
513
|
-
"autoquant": 4,
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
return map_to_target_dtype[self.quantization_config.quant_type]
|
|
517
|
-
|
|
518
240
|
@property
|
|
519
241
|
def is_trainable(self) -> bool:
|
|
520
242
|
supported_quant_types_for_training = [
|
|
@@ -548,15 +270,18 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
548
270
|
if self.pre_quantized:
|
|
549
271
|
return [
|
|
550
272
|
WeightConverter(
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
273
|
+
# TODO: incr flexibility by generalizing the source patterns to match the format of "_weight_"
|
|
274
|
+
# note that the matching logic is greedy, so for ex, if _weight_scale is before _weight_scale_and_zero in this list, it will match _weight_scale always (this is incorrect)
|
|
275
|
+
# thus, the order of source_patterns is intentional
|
|
276
|
+
source_patterns=[
|
|
277
|
+
"_weight_qdata",
|
|
278
|
+
"_weight_scale_and_zero",
|
|
279
|
+
"_weight_scale",
|
|
280
|
+
"_weight_zero_point",
|
|
281
|
+
"_weight_act_pre_scale",
|
|
282
|
+
],
|
|
557
283
|
target_patterns="weight",
|
|
558
284
|
operations=[TorchAoDeserialize(self)],
|
|
559
285
|
),
|
|
560
|
-
# used for unsafe serialization
|
|
561
286
|
]
|
|
562
287
|
return []
|
|
@@ -35,11 +35,9 @@ class VptqHfQuantizer(HfQuantizer):
|
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
37
|
requires_calibration = True
|
|
38
|
-
required_packages = ["vptq"]
|
|
39
38
|
|
|
40
39
|
def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
|
|
41
40
|
super().__init__(quantization_config, **kwargs)
|
|
42
|
-
self.quantization_config = quantization_config
|
|
43
41
|
|
|
44
42
|
def validate_environment(self, *args, **kwargs):
|
|
45
43
|
if not is_accelerate_available():
|
|
@@ -48,49 +46,28 @@ class VptqHfQuantizer(HfQuantizer):
|
|
|
48
46
|
if not is_vptq_available():
|
|
49
47
|
raise ImportError("Using `vptq` quantization requires VPTQ>=0.0.4: `pip install -U vptq`")
|
|
50
48
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if torch.cuda.is_available():
|
|
54
|
-
dtype = torch.float16
|
|
55
|
-
logger.info(
|
|
56
|
-
"CUDA available. Assuming VPTQ inference on GPU and loading the model in `torch.float16`. To overwrite it, set `dtype` manually."
|
|
57
|
-
)
|
|
58
|
-
else:
|
|
59
|
-
import vptq
|
|
60
|
-
|
|
61
|
-
device_availability = getattr(vptq, "device_availability", lambda device: False)
|
|
62
|
-
if device_availability("cpu") is True:
|
|
63
|
-
raise RuntimeError("No GPU found. Please wait for the next release of VPTQ to use CPU inference")
|
|
64
|
-
dtype = torch.float32
|
|
65
|
-
logger.info("No GPU found. Assuming VPTQ inference on CPU and loading the model in `torch.float32`.")
|
|
66
|
-
return dtype
|
|
49
|
+
if not torch.cuda.is_available():
|
|
50
|
+
raise RuntimeError("GPU is required to run VTPQ quantized model.")
|
|
67
51
|
|
|
68
52
|
def _process_model_before_weight_loading(
|
|
69
53
|
self,
|
|
70
54
|
model: "PreTrainedModel",
|
|
71
|
-
keep_in_fp32_modules: list[str] | None = None,
|
|
72
55
|
**kwargs,
|
|
73
56
|
):
|
|
74
|
-
"""
|
|
75
|
-
we don't have param like modules_to_not_convert to indicate which layers should not be quantized
|
|
76
|
-
because `quantization_config` include the layers that should be quantized
|
|
77
|
-
"""
|
|
78
57
|
from ..integrations import replace_with_vptq_linear
|
|
79
58
|
|
|
80
59
|
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
81
|
-
model, self.quantization_config.modules_to_not_convert,
|
|
60
|
+
model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules
|
|
82
61
|
)
|
|
83
|
-
|
|
84
62
|
replace_with_vptq_linear(
|
|
85
63
|
model,
|
|
86
64
|
quantization_config=self.quantization_config,
|
|
87
65
|
modules_to_not_convert=self.modules_to_not_convert,
|
|
88
66
|
)
|
|
89
|
-
model.config.quantization_config = self.quantization_config
|
|
90
67
|
|
|
91
68
|
@property
|
|
92
69
|
def is_trainable(self) -> bool:
|
|
93
70
|
return False
|
|
94
71
|
|
|
95
|
-
def is_serializable(self
|
|
72
|
+
def is_serializable(self):
|
|
96
73
|
return True
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
import re
|
|
14
15
|
from typing import Any
|
|
15
16
|
|
|
16
17
|
|
|
@@ -19,3 +20,22 @@ def get_module_from_name(module, tensor_name: str) -> tuple[Any, str]:
|
|
|
19
20
|
module_name, tensor_name = tensor_name.rsplit(".", 1)
|
|
20
21
|
module = module.get_submodule(module_name)
|
|
21
22
|
return module, tensor_name
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def should_convert_module(full_name, patterns: list[str] | None = None):
|
|
26
|
+
if patterns is None:
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
# We should avoid converting in the following situations:
|
|
30
|
+
# 1. The pattern appears as a prefix followed by a dot in `full_name`
|
|
31
|
+
# (e.g., "model.decoder.layer.11." matches "model.decoder.layer.11.attn.weight").
|
|
32
|
+
# 2. The pattern matches `full_name` exactly or via regex
|
|
33
|
+
# (e.g., "lm_head" matches "lm_head"; "model.decoder.layer.*" matches "model.decoder.layer.11.attn.weight").
|
|
34
|
+
# 3. `full_name` ends with the pattern
|
|
35
|
+
# (e.g., "fc1" matches "model.decoder.layers.23.fc1").
|
|
36
|
+
|
|
37
|
+
should_not_convert = any(
|
|
38
|
+
re.match(f"{key}\\.", full_name) or re.match(f"{key}", full_name) or full_name.endswith(key)
|
|
39
|
+
for key in patterns
|
|
40
|
+
)
|
|
41
|
+
return not should_not_convert
|