transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
transformers/quantizers/base.py
CHANGED
|
@@ -12,17 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
from abc import ABC, abstractmethod
|
|
15
|
-
from copy import deepcopy
|
|
16
15
|
from typing import TYPE_CHECKING, Any
|
|
17
16
|
|
|
18
|
-
from ..utils import
|
|
17
|
+
from ..utils import is_torch_available, logging
|
|
19
18
|
from ..utils.quantization_config import QuantizationConfigMixin, QuantizationMethod
|
|
20
19
|
from .quantizers_utils import get_module_from_name
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
if is_accelerate_available():
|
|
24
|
-
from accelerate.utils import find_tied_parameters
|
|
25
|
-
|
|
26
22
|
if TYPE_CHECKING:
|
|
27
23
|
from ..modeling_utils import PreTrainedModel
|
|
28
24
|
|
|
@@ -35,60 +31,31 @@ else:
|
|
|
35
31
|
logger = logging.get_logger(__file__)
|
|
36
32
|
|
|
37
33
|
|
|
38
|
-
def
|
|
39
|
-
# not very nice in a recursive function but it avoids a circular import
|
|
40
|
-
from ..modeling_utils import PreTrainedModel
|
|
41
|
-
|
|
42
|
-
for child in module.children():
|
|
43
|
-
if isinstance(child, PreTrainedModel):
|
|
44
|
-
child.config._pre_quantization_dtype = original_dtype
|
|
45
|
-
_assign_original_dtype(child, original_dtype)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def get_keys_to_not_convert(model):
|
|
34
|
+
def get_keys_to_not_convert(model) -> list:
|
|
49
35
|
r"""
|
|
50
|
-
|
|
51
|
-
we may want to keep the lm_head in full precision for numerical stability reasons.
|
|
52
|
-
to keep the tied weights of the model. The function will return a list of the keys of the modules to not convert in
|
|
53
|
-
int8.
|
|
54
|
-
|
|
55
|
-
Parameters:
|
|
56
|
-
model (`torch.nn.Module`):
|
|
57
|
-
Input model
|
|
36
|
+
Function to automatically detect keys to not convert for usage like quantization. For example for CausalLM modules
|
|
37
|
+
we may want to keep the lm_head in full precision for numerical stability reasons.
|
|
58
38
|
"""
|
|
59
|
-
#
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
intersection = set(list_last_module) - set(tied_keys)
|
|
80
|
-
list_untouched = list(set(tied_keys)) + list(intersection)
|
|
81
|
-
|
|
82
|
-
# remove ".weight" from the keys
|
|
83
|
-
names_to_remove = [".weight", ".bias"]
|
|
84
|
-
filtered_module_names = []
|
|
85
|
-
for name in list_untouched:
|
|
86
|
-
for name_to_remove in names_to_remove:
|
|
87
|
-
if name_to_remove in name:
|
|
88
|
-
name = name.replace(name_to_remove, "")
|
|
89
|
-
filtered_module_names.append(name)
|
|
90
|
-
|
|
91
|
-
return filtered_module_names
|
|
39
|
+
# remove tied weights
|
|
40
|
+
tied_keys = set()
|
|
41
|
+
if len(model.all_tied_weights_keys) > 0:
|
|
42
|
+
tied_keys = set(model.all_tied_weights_keys.values()) | set(model.all_tied_weights_keys.keys())
|
|
43
|
+
|
|
44
|
+
# remove last module
|
|
45
|
+
last_module_key = {list(model.named_parameters())[-1][0]}
|
|
46
|
+
|
|
47
|
+
# remove output emb
|
|
48
|
+
output_emb_module = model.get_output_embeddings()
|
|
49
|
+
output_emb_keys = {
|
|
50
|
+
name
|
|
51
|
+
for name, module in model.named_modules()
|
|
52
|
+
if output_emb_module is not None and id(module) == id(output_emb_module)
|
|
53
|
+
}
|
|
54
|
+
modules_to_not_convert = tied_keys | last_module_key | output_emb_keys
|
|
55
|
+
|
|
56
|
+
modules_to_not_convert = list({k.removesuffix(".weight") for k in modules_to_not_convert})
|
|
57
|
+
|
|
58
|
+
return list(modules_to_not_convert)
|
|
92
59
|
|
|
93
60
|
|
|
94
61
|
class HfQuantizer(ABC):
|
|
@@ -100,26 +67,14 @@ class HfQuantizer(ABC):
|
|
|
100
67
|
Attributes
|
|
101
68
|
quantization_config (`transformers.utils.quantization_config.QuantizationConfigMixin`):
|
|
102
69
|
The quantization config that defines the quantization parameters of your model that you want to quantize.
|
|
103
|
-
modules_to_not_convert (`list[str]`, *optional*):
|
|
104
|
-
The list of module names to not convert when quantizing the model.
|
|
105
|
-
required_packages (`list[str]`, *optional*):
|
|
106
|
-
The list of required pip packages to install prior to using the quantizer
|
|
107
70
|
requires_calibration (`bool`):
|
|
108
71
|
Whether the quantization method requires to calibrate the model before using it.
|
|
109
|
-
requires_parameters_quantization (`bool`):
|
|
110
|
-
Whether the quantization method requires to create a new Parameter. For example, for bitsandbytes, it is
|
|
111
|
-
required to create a new xxxParameter in order to properly quantize the model.
|
|
112
72
|
"""
|
|
113
73
|
|
|
114
74
|
requires_calibration = False
|
|
115
|
-
required_packages = None
|
|
116
|
-
requires_parameters_quantization = False
|
|
117
75
|
|
|
118
76
|
def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
|
|
119
77
|
self.quantization_config = quantization_config
|
|
120
|
-
|
|
121
|
-
# -- Handle extra kwargs below --
|
|
122
|
-
self.modules_to_not_convert = kwargs.pop("modules_to_not_convert", [])
|
|
123
78
|
self.pre_quantized = kwargs.pop("pre_quantized", True)
|
|
124
79
|
|
|
125
80
|
if not self.pre_quantized and self.requires_calibration:
|
|
@@ -153,82 +108,19 @@ class HfQuantizer(ABC):
|
|
|
153
108
|
"""
|
|
154
109
|
return device_map
|
|
155
110
|
|
|
156
|
-
def adjust_target_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
157
|
-
"""
|
|
158
|
-
Override this method if you want to adjust the `target_dtype` variable used in `from_pretrained`
|
|
159
|
-
to compute the device_map in case the device_map is a `str`. E.g. for bitsandbytes we force-set `target_dtype`
|
|
160
|
-
to `torch.int8` and for 4-bit we pass a custom enum `accelerate.CustomDtype.int4`.
|
|
161
|
-
|
|
162
|
-
Args:
|
|
163
|
-
dtype (`torch.dtype`, *optional*):
|
|
164
|
-
The dtype that is used to compute the device_map.
|
|
165
|
-
"""
|
|
166
|
-
return dtype
|
|
167
|
-
|
|
168
111
|
def param_element_size(self, model: "PreTrainedModel", param_name: str, param: "torch.Tensor") -> float:
|
|
169
|
-
"Return the element size (in bytes) for `param_name`."
|
|
170
|
-
|
|
171
|
-
if self.param_needs_quantization(model, param_name):
|
|
172
|
-
from accelerate.utils import CustomDtype
|
|
173
|
-
|
|
174
|
-
mapping = {
|
|
175
|
-
torch.int8: 1,
|
|
176
|
-
CustomDtype.INT4: 0.5,
|
|
177
|
-
CustomDtype.FP8: 1,
|
|
178
|
-
CustomDtype.INT2: 0.25,
|
|
179
|
-
}
|
|
180
|
-
# The value passed is actually not used when the method is overridden
|
|
181
|
-
if (custom_dtype := self.adjust_target_dtype(torch.float16)) in mapping:
|
|
182
|
-
return mapping[custom_dtype]
|
|
183
112
|
return param.element_size()
|
|
184
113
|
|
|
185
|
-
def update_missing_keys(self, model, missing_keys: list[str], prefix: str) -> list[str]:
|
|
186
|
-
"""
|
|
187
|
-
Override this method if you want to adjust the `missing_keys`.
|
|
188
|
-
|
|
189
|
-
Args:
|
|
190
|
-
missing_keys (`list[str]`, *optional*):
|
|
191
|
-
The list of missing keys in the checkpoint compared to the state dict of the model
|
|
192
|
-
"""
|
|
193
|
-
return missing_keys
|
|
194
|
-
|
|
195
|
-
def update_expected_keys(self, model, expected_keys: list[str], loaded_keys: list[str]) -> list[str]:
|
|
196
|
-
"""
|
|
197
|
-
Override this method if you want to adjust the `update_expected_keys`.
|
|
198
|
-
|
|
199
|
-
Args:
|
|
200
|
-
expected_keys (`list[str]`, *optional*):
|
|
201
|
-
The list of the expected keys in the initialized model.
|
|
202
|
-
loaded_keys (`list[str]`, *optional*):
|
|
203
|
-
The list of the loaded keys in the checkpoint.
|
|
204
|
-
"""
|
|
205
|
-
return expected_keys
|
|
206
|
-
|
|
207
|
-
def update_unexpected_keys(self, model, unexpected_keys: list[str]) -> list[str]:
|
|
208
|
-
return unexpected_keys
|
|
209
|
-
|
|
210
114
|
def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]:
|
|
211
115
|
"""adjust max_memory argument for infer_auto_device_map() if extra memory is needed for quantization"""
|
|
212
116
|
return max_memory
|
|
213
117
|
|
|
214
118
|
def param_needs_quantization(self, model: "PreTrainedModel", param_name: str, **kwargs) -> bool:
|
|
215
119
|
"""
|
|
216
|
-
Check whether a given param needs
|
|
120
|
+
Check whether a given param needs to be quantized.
|
|
217
121
|
"""
|
|
218
122
|
return False
|
|
219
123
|
|
|
220
|
-
def create_quantized_param(self, *args, **kwargs):
|
|
221
|
-
"""
|
|
222
|
-
Take needed components from state_dict (those from which `param_needs_quantization` is True) and create
|
|
223
|
-
quantized param.
|
|
224
|
-
It usually also load the new param directly in the `model`.
|
|
225
|
-
Note: only applicable if requires_parameters_quantization == True.
|
|
226
|
-
"""
|
|
227
|
-
if not self.requires_parameters_quantization:
|
|
228
|
-
raise AttributeError(
|
|
229
|
-
f"`.create_quantized_param()` method is not supported by quantizer class {self.__class__.__name__}."
|
|
230
|
-
)
|
|
231
|
-
|
|
232
124
|
def validate_environment(self, *args, **kwargs):
|
|
233
125
|
"""
|
|
234
126
|
This method is used to potentially check for potential conflicts with arguments that are
|
|
@@ -248,7 +140,7 @@ class HfQuantizer(ABC):
|
|
|
248
140
|
def _process_model_before_weight_loading(self, model, **kwargs):
|
|
249
141
|
return model
|
|
250
142
|
|
|
251
|
-
def preprocess_model(self, model: "PreTrainedModel",
|
|
143
|
+
def preprocess_model(self, model: "PreTrainedModel", dtype=None, **kwargs):
|
|
252
144
|
"""
|
|
253
145
|
Setting model attributes and/or converting model before weights loading. At this point
|
|
254
146
|
the model should be initialized on the meta device so you can freely manipulate the skeleton
|
|
@@ -266,14 +158,6 @@ class HfQuantizer(ABC):
|
|
|
266
158
|
self._convert_model_for_quantization(model)
|
|
267
159
|
self._process_model_before_weight_loading(model, **kwargs)
|
|
268
160
|
|
|
269
|
-
# We store the original dtype for quantized models as we cannot easily retrieve it
|
|
270
|
-
# once the weights have been quantized
|
|
271
|
-
# Note that once you have loaded a quantized model, you can't change its dtype so this will
|
|
272
|
-
# remain a single source of truth
|
|
273
|
-
original_dtype = dtype if dtype is not None else torch.get_default_dtype()
|
|
274
|
-
config._pre_quantization_dtype = original_dtype
|
|
275
|
-
_assign_original_dtype(model, original_dtype)
|
|
276
|
-
|
|
277
161
|
def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs):
|
|
278
162
|
return model
|
|
279
163
|
|
|
@@ -288,6 +172,11 @@ class HfQuantizer(ABC):
|
|
|
288
172
|
kwargs (`dict`, *optional*):
|
|
289
173
|
The keyword arguments that are passed along `_process_model_after_weight_loading`.
|
|
290
174
|
"""
|
|
175
|
+
model.config.quantization_config = self.quantization_config
|
|
176
|
+
|
|
177
|
+
if self.pre_quantized and getattr(self.quantization_config, "dequantize", False):
|
|
178
|
+
self.remove_quantization_config(model)
|
|
179
|
+
|
|
291
180
|
return self._process_model_after_weight_loading(model, **kwargs)
|
|
292
181
|
|
|
293
182
|
def remove_quantization_config(self, model):
|
|
@@ -298,40 +187,25 @@ class HfQuantizer(ABC):
|
|
|
298
187
|
del model.hf_quantizer
|
|
299
188
|
if hasattr(model.config, "quantization_config"):
|
|
300
189
|
del model.config.quantization_config
|
|
301
|
-
if hasattr(model.config, "_pre_quantization_dtype"):
|
|
302
|
-
del model.config._pre_quantization_dtype
|
|
303
190
|
if hasattr(model, "quantization_method"):
|
|
304
191
|
del model.quantization_method
|
|
305
192
|
model.is_quantized = False
|
|
306
193
|
|
|
307
|
-
def dequantize(self, model):
|
|
194
|
+
def dequantize(self, model, dtype=None):
|
|
308
195
|
"""
|
|
309
196
|
Potentially dequantize the model to retrieve the original model, with some loss in accuracy / performance.
|
|
310
197
|
Note not all quantization schemes support this.
|
|
311
198
|
"""
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
del model.quantization_method
|
|
319
|
-
model.is_quantized = False
|
|
199
|
+
if dtype is None:
|
|
200
|
+
# using the same dtype we used to load the model. If we don't do that, we might have issues with modules we didn't quantize.
|
|
201
|
+
# or we need to upcast everything to the same dtype
|
|
202
|
+
dtype = model.config.dtype
|
|
203
|
+
model = self._dequantize(model, dtype=dtype)
|
|
204
|
+
self.remove_quantization_config(model)
|
|
320
205
|
|
|
321
206
|
return model
|
|
322
207
|
|
|
323
|
-
def
|
|
324
|
-
"""
|
|
325
|
-
The factor to be used in `caching_allocator_warmup` to get the number of bytes to pre-allocate to warm up accelerator.
|
|
326
|
-
A factor of 2 means we allocate all bytes in the empty model (since we allocate in fp16), a factor of 4 means
|
|
327
|
-
we allocate half the memory of the weights residing in the empty model, etc...
|
|
328
|
-
"""
|
|
329
|
-
# By default we return 4, i.e. half the model size (this corresponds to the case where the model is not
|
|
330
|
-
# really pre-processed, i.e. we do not have the info that weights are going to be 8 bits before actual
|
|
331
|
-
# weight loading)
|
|
332
|
-
return 4
|
|
333
|
-
|
|
334
|
-
def _dequantize(self, model):
|
|
208
|
+
def _dequantize(self, model, dtype=None):
|
|
335
209
|
raise NotImplementedError(
|
|
336
210
|
f"{self.quantization_config.quant_method} has no implementation of `dequantize`, please raise an issue on GitHub."
|
|
337
211
|
)
|
|
@@ -360,6 +234,8 @@ class HfQuantizer(ABC):
|
|
|
360
234
|
if keep_in_fp32_modules is not None:
|
|
361
235
|
modules_to_not_convert.extend(keep_in_fp32_modules)
|
|
362
236
|
|
|
237
|
+
modules_to_not_convert = list(set(modules_to_not_convert))
|
|
238
|
+
|
|
363
239
|
return modules_to_not_convert
|
|
364
240
|
|
|
365
241
|
@property
|
|
@@ -372,31 +248,25 @@ class HfQuantizer(ABC):
|
|
|
372
248
|
"""Flag indicating whether the quantized model can be compiled"""
|
|
373
249
|
return False
|
|
374
250
|
|
|
375
|
-
def get_state_dict_and_metadata(self, model
|
|
251
|
+
def get_state_dict_and_metadata(self, model):
|
|
376
252
|
"""Get state dict and metadata. Useful when we need to modify a bit the state dict due to quantization"""
|
|
377
253
|
return None, {}
|
|
378
254
|
|
|
379
|
-
def update_state_dict_with_metadata(self, state_dict, metadata):
|
|
380
|
-
"""Update state dict with metadata. Default behaviour returns state_dict"""
|
|
381
|
-
return state_dict
|
|
382
|
-
|
|
383
255
|
@abstractmethod
|
|
384
|
-
def is_serializable(self
|
|
256
|
+
def is_serializable(self): ...
|
|
385
257
|
|
|
386
258
|
@property
|
|
387
259
|
@abstractmethod
|
|
388
260
|
def is_trainable(self): ...
|
|
389
261
|
|
|
390
262
|
def _convert_model_for_quantization(self, model):
|
|
391
|
-
from accelerate import init_empty_weights
|
|
392
|
-
|
|
393
263
|
for name, module in model.named_modules():
|
|
394
264
|
module_class_name = module.__class__.__name__
|
|
395
265
|
if module_class_name in MODULES_TO_PATCH_FOR_QUANTIZATION and (
|
|
396
266
|
self.quantization_config.quant_method
|
|
397
267
|
in MODULES_TO_PATCH_FOR_QUANTIZATION[module_class_name]["quantization_methods"]
|
|
398
268
|
):
|
|
399
|
-
with
|
|
269
|
+
with torch.device("meta"):
|
|
400
270
|
parent_module, name = get_module_from_name(model, name)
|
|
401
271
|
parent_module._modules[name] = MODULES_TO_PATCH_FOR_QUANTIZATION[module_class_name]["module_name"](
|
|
402
272
|
model.config.get_text_config()
|
|
@@ -23,13 +23,10 @@ if TYPE_CHECKING:
|
|
|
23
23
|
from ..modeling_utils import PreTrainedModel
|
|
24
24
|
|
|
25
25
|
from ..integrations import replace_with_aqlm_linear
|
|
26
|
-
from ..utils import is_accelerate_available, is_aqlm_available,
|
|
26
|
+
from ..utils import is_accelerate_available, is_aqlm_available, logging
|
|
27
27
|
from ..utils.quantization_config import QuantizationConfigMixin
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
if is_torch_available():
|
|
31
|
-
import torch
|
|
32
|
-
|
|
33
30
|
logger = logging.get_logger(__name__)
|
|
34
31
|
|
|
35
32
|
|
|
@@ -39,12 +36,9 @@ class AqlmHfQuantizer(HfQuantizer):
|
|
|
39
36
|
"""
|
|
40
37
|
|
|
41
38
|
requires_calibration = True
|
|
42
|
-
required_packages = ["aqlm"]
|
|
43
|
-
optimum_quantizer = None
|
|
44
39
|
|
|
45
40
|
def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
|
|
46
41
|
super().__init__(quantization_config, **kwargs)
|
|
47
|
-
self.quantization_config = quantization_config
|
|
48
42
|
|
|
49
43
|
def validate_environment(self, *args, **kwargs):
|
|
50
44
|
if not is_accelerate_available():
|
|
@@ -53,20 +47,6 @@ class AqlmHfQuantizer(HfQuantizer):
|
|
|
53
47
|
if not is_aqlm_available():
|
|
54
48
|
raise ImportError("Using `aqlm` quantization requires AQLM: `pip install aqlm[gpu,cpu]`")
|
|
55
49
|
|
|
56
|
-
def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
57
|
-
if dtype is None:
|
|
58
|
-
if torch.cuda.is_available():
|
|
59
|
-
dtype = torch.float16
|
|
60
|
-
logger.info(
|
|
61
|
-
"CUDA available. Assuming AQLM inference on GPU and loading the model in `torch.float16`. To overwrite it, set `dtype` manually."
|
|
62
|
-
)
|
|
63
|
-
else:
|
|
64
|
-
dtype = torch.float32
|
|
65
|
-
logger.info(
|
|
66
|
-
"CUDA is unavailable. Assuming AQLM inference on CPU and loading the model in `torch.float32`. To overwrite it, set `dtype` manually."
|
|
67
|
-
)
|
|
68
|
-
return dtype
|
|
69
|
-
|
|
70
50
|
def _process_model_before_weight_loading(
|
|
71
51
|
self,
|
|
72
52
|
model: "PreTrainedModel",
|
|
@@ -77,7 +57,6 @@ class AqlmHfQuantizer(HfQuantizer):
|
|
|
77
57
|
quantization_config=self.quantization_config,
|
|
78
58
|
linear_weights_not_to_quantize=self.quantization_config.linear_weights_not_to_quantize,
|
|
79
59
|
)
|
|
80
|
-
model.config.quantization_config = self.quantization_config
|
|
81
60
|
|
|
82
61
|
@property
|
|
83
62
|
def is_trainable(self) -> bool:
|
|
@@ -90,5 +69,5 @@ class AqlmHfQuantizer(HfQuantizer):
|
|
|
90
69
|
)
|
|
91
70
|
return False
|
|
92
71
|
|
|
93
|
-
def is_serializable(self
|
|
72
|
+
def is_serializable(self):
|
|
94
73
|
return True
|
|
@@ -19,13 +19,10 @@ from .base import HfQuantizer
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
20
|
from ..modeling_utils import PreTrainedModel
|
|
21
21
|
|
|
22
|
-
from ..utils import is_auto_round_available,
|
|
22
|
+
from ..utils import is_auto_round_available, logging
|
|
23
23
|
from ..utils.quantization_config import QuantizationConfigMixin
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
if is_torch_available():
|
|
27
|
-
import torch
|
|
28
|
-
|
|
29
26
|
logger = logging.get_logger(__name__)
|
|
30
27
|
|
|
31
28
|
|
|
@@ -36,7 +33,6 @@ class AutoRoundQuantizer(HfQuantizer):
|
|
|
36
33
|
|
|
37
34
|
# AutoRound requires data calibration - we support only inference
|
|
38
35
|
requires_calibration = True
|
|
39
|
-
required_packages = ["auto_round"]
|
|
40
36
|
|
|
41
37
|
def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
|
|
42
38
|
super().__init__(quantization_config, **kwargs)
|
|
@@ -48,12 +44,6 @@ class AutoRoundQuantizer(HfQuantizer):
|
|
|
48
44
|
"Loading an AutoRound quantized model requires auto-round library (`pip install 'auto-round>=0.5'`)"
|
|
49
45
|
)
|
|
50
46
|
|
|
51
|
-
def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
52
|
-
if dtype is None:
|
|
53
|
-
dtype = torch.bfloat16
|
|
54
|
-
logger.info("Loading the model in `torch.bfloat16`. To overwrite it, set `dtype` manually.")
|
|
55
|
-
return dtype
|
|
56
|
-
|
|
57
47
|
def _process_model_before_weight_loading(self, model: "PreTrainedModel", **kwargs):
|
|
58
48
|
if model.__class__.main_input_name != "input_ids":
|
|
59
49
|
logger.warning("AutoRound offers only limited support for models that are not strictly text-based.")
|
|
@@ -76,6 +66,6 @@ class AutoRoundQuantizer(HfQuantizer):
|
|
|
76
66
|
def is_trainable(self) -> bool:
|
|
77
67
|
return False
|
|
78
68
|
|
|
79
|
-
def is_serializable(self
|
|
69
|
+
def is_serializable(self):
|
|
80
70
|
## for gptq/awq models, the quantization config will be changed
|
|
81
71
|
return True
|
|
@@ -22,8 +22,8 @@ from .base import HfQuantizer
|
|
|
22
22
|
if TYPE_CHECKING:
|
|
23
23
|
from ..modeling_utils import PreTrainedModel
|
|
24
24
|
|
|
25
|
-
from ..utils import is_accelerate_available,
|
|
26
|
-
from ..utils.quantization_config import
|
|
25
|
+
from ..utils import is_accelerate_available, is_gptqmodel_available, is_torch_available, logging
|
|
26
|
+
from ..utils.quantization_config import AwqBackend
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
if is_torch_available():
|
|
@@ -40,65 +40,20 @@ class AwqQuantizer(HfQuantizer):
|
|
|
40
40
|
# AWQ requires data calibration - we support only inference
|
|
41
41
|
requires_calibration = True
|
|
42
42
|
|
|
43
|
-
required_packages = ["awq", "accelerate"]
|
|
44
|
-
|
|
45
43
|
def __init__(self, quantization_config, **kwargs):
|
|
46
44
|
super().__init__(quantization_config, **kwargs)
|
|
47
45
|
|
|
48
|
-
def validate_environment(self,
|
|
49
|
-
if not
|
|
50
|
-
raise ImportError(
|
|
46
|
+
def validate_environment(self, **kwargs):
|
|
47
|
+
if not is_gptqmodel_available():
|
|
48
|
+
raise ImportError(
|
|
49
|
+
"Loading an AWQ quantized model requires gptqmodel. Please install it with `pip install gptqmodel`"
|
|
50
|
+
)
|
|
51
51
|
|
|
52
52
|
if not is_accelerate_available():
|
|
53
53
|
raise ImportError("Loading an AWQ quantized model requires accelerate (`pip install accelerate`)")
|
|
54
54
|
|
|
55
|
-
if (
|
|
56
|
-
self.quantization_config.version == AWQLinearVersion.GEMM
|
|
57
|
-
and not torch.cuda.is_available()
|
|
58
|
-
and not torch.xpu.is_available()
|
|
59
|
-
):
|
|
60
|
-
logger.warning_once("No CUDA or XPU found, consider switching to the IPEX version for CPU-only execution.")
|
|
61
|
-
self.quantization_config.version = AWQLinearVersion.IPEX
|
|
62
|
-
|
|
63
|
-
if self.quantization_config.version == AWQLinearVersion.IPEX:
|
|
64
|
-
if version.parse(importlib.metadata.version("autoawq")) < version.parse("0.2.6"):
|
|
65
|
-
raise RuntimeError(
|
|
66
|
-
"To use IPEX backend, you need autoawq>0.2.6. Please install the latest version or from source."
|
|
67
|
-
)
|
|
68
|
-
if device_map is None:
|
|
69
|
-
logger.warning_once(
|
|
70
|
-
"You have loaded an AWQ model without setting device_map, please set 'cpu' or 'xpu' or 'auto'"
|
|
71
|
-
)
|
|
72
|
-
elif isinstance(device_map, dict) and "disk" in device_map.values():
|
|
73
|
-
raise ValueError(
|
|
74
|
-
"You are attempting to load an IPEX version AWQ model with a device_map that contains disk device."
|
|
75
|
-
" This is not supported. Please make sure only cpu and xpu in the device_map."
|
|
76
|
-
)
|
|
77
|
-
else:
|
|
78
|
-
if not torch.cuda.is_available() and not torch.xpu.is_available():
|
|
79
|
-
raise RuntimeError(
|
|
80
|
-
"GPU is required to run AWQ quantized model. You can use IPEX version AWQ if you have an Intel CPU"
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
if device_map is None:
|
|
84
|
-
logger.warning_once(
|
|
85
|
-
"You have loaded an AWQ model on CPU and have a CUDA/XPU device available, make sure to set "
|
|
86
|
-
"your model on a GPU device in order to run your model."
|
|
87
|
-
)
|
|
88
|
-
elif device_map is not None:
|
|
89
|
-
if isinstance(device_map, dict) and any(
|
|
90
|
-
forbidden in device_map.values() for forbidden in ("cpu", torch.device("cpu"), "disk")
|
|
91
|
-
):
|
|
92
|
-
raise ValueError(
|
|
93
|
-
"You are attempting to load an AWQ model with a device_map that contains a CPU or disk device."
|
|
94
|
-
" This is not supported. Please remove the CPU or disk device from the device_map."
|
|
95
|
-
)
|
|
96
|
-
|
|
97
55
|
def update_dtype(self, dtype):
|
|
98
|
-
if dtype
|
|
99
|
-
dtype = torch.float16
|
|
100
|
-
logger.info("Loading the model in `torch.float16`. To overwrite it, set `dtype` manually.")
|
|
101
|
-
elif dtype == torch.bfloat16 and (torch.cuda.is_available() or torch.xpu.is_available()):
|
|
56
|
+
if dtype == torch.bfloat16 and (torch.cuda.is_available() or torch.xpu.is_available()):
|
|
102
57
|
logger.warning(
|
|
103
58
|
"`torch.bfloat16` is not supported for AWQ CUDA/XPU kernels yet. Casting to `torch.float16`."
|
|
104
59
|
)
|
|
@@ -107,51 +62,29 @@ class AwqQuantizer(HfQuantizer):
|
|
|
107
62
|
logger.warning("We suggest you to set `dtype=torch.float16` for better efficiency on CUDA/XPU with AWQ.")
|
|
108
63
|
return dtype
|
|
109
64
|
|
|
110
|
-
def _process_model_before_weight_loading(
|
|
111
|
-
self, model: "PreTrainedModel", keep_in_fp32_modules: list[str] | None = None, **kwargs
|
|
112
|
-
):
|
|
65
|
+
def _process_model_before_weight_loading(self, model: "PreTrainedModel", **kwargs):
|
|
113
66
|
from ..integrations import replace_quantization_scales, replace_with_awq_linear
|
|
114
67
|
|
|
115
68
|
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
116
|
-
model, self.quantization_config.modules_to_not_convert,
|
|
69
|
+
model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules, add_default_skips=True
|
|
117
70
|
)
|
|
118
71
|
|
|
119
|
-
model
|
|
120
|
-
model,
|
|
72
|
+
model = replace_with_awq_linear(
|
|
73
|
+
model,
|
|
74
|
+
quantization_config=self.quantization_config,
|
|
75
|
+
modules_to_not_convert=self.modules_to_not_convert,
|
|
76
|
+
device_map=kwargs.get("device_map"),
|
|
121
77
|
)
|
|
122
78
|
|
|
123
79
|
model = replace_quantization_scales(model, model.config.model_type)
|
|
124
80
|
|
|
125
|
-
if not has_been_replaced:
|
|
126
|
-
logger.warning(
|
|
127
|
-
"You are loading an AWQ model but no linear modules were found in your model."
|
|
128
|
-
" Please double check your model architecture, or submit an issue on github if you think this is a bug."
|
|
129
|
-
)
|
|
130
|
-
|
|
131
81
|
def _process_model_after_weight_loading(self, model, **kwargs):
|
|
132
|
-
|
|
133
|
-
from ..integrations import fuse_awq_modules
|
|
82
|
+
from gptqmodel.utils.model import hf_gptqmodel_post_init
|
|
134
83
|
|
|
135
|
-
|
|
136
|
-
model._awq_is_fused = True # TODO: consider storing this flag in model.config instead
|
|
137
|
-
|
|
138
|
-
if self.quantization_config.version == AWQLinearVersion.EXLLAMA:
|
|
139
|
-
from ..integrations import post_init_awq_exllama_modules
|
|
140
|
-
|
|
141
|
-
model = post_init_awq_exllama_modules(model, self.quantization_config.exllama_config)
|
|
142
|
-
|
|
143
|
-
if self.quantization_config.version == AWQLinearVersion.IPEX:
|
|
144
|
-
from ..integrations import post_init_awq_ipex_modules
|
|
145
|
-
|
|
146
|
-
model = post_init_awq_ipex_modules(model)
|
|
147
|
-
|
|
148
|
-
def is_serializable(self, safe_serialization=None):
|
|
149
|
-
# AWQ through auto-awq has been always serializable, except if the model is fused.
|
|
150
|
-
if self.quantization_config.do_fuse:
|
|
151
|
-
logger.warning("You cannot save an AWQ model that uses fused modules!")
|
|
152
|
-
return False
|
|
84
|
+
hf_gptqmodel_post_init(model, use_act_order=self.quantization_config.desc_act)
|
|
153
85
|
|
|
154
|
-
|
|
86
|
+
def is_serializable(self):
|
|
87
|
+
if self.quantization_config.backend in [AwqBackend.EXLLAMA_V1, AwqBackend.EXLLAMA_V2]:
|
|
155
88
|
logger.warning("You cannot save an AWQ model that uses Exllama backend!")
|
|
156
89
|
return False
|
|
157
90
|
|
|
@@ -159,6 +92,4 @@ class AwqQuantizer(HfQuantizer):
|
|
|
159
92
|
|
|
160
93
|
@property
|
|
161
94
|
def is_trainable(self):
|
|
162
|
-
|
|
163
|
-
MIN_AWQ_VERSION_FOR_PEFT = "0.2.0"
|
|
164
|
-
return version.parse(importlib.metadata.version("autoawq")) >= version.parse(MIN_AWQ_VERSION_FOR_PEFT)
|
|
95
|
+
return version.parse(importlib.metadata.version("gptqmodel")) >= version.parse("5.0.0")
|
|
@@ -37,14 +37,10 @@ class BitNetHfQuantizer(HfQuantizer):
|
|
|
37
37
|
Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
|
|
38
38
|
"""
|
|
39
39
|
|
|
40
|
-
requires_parameters_quantization = False
|
|
41
40
|
requires_calibration = True
|
|
42
41
|
|
|
43
|
-
required_packages = ["accelerate"]
|
|
44
|
-
|
|
45
42
|
def __init__(self, quantization_config, **kwargs):
|
|
46
43
|
super().__init__(quantization_config, **kwargs)
|
|
47
|
-
self.quantization_config = quantization_config
|
|
48
44
|
|
|
49
45
|
def validate_environment(self, *args, **kwargs):
|
|
50
46
|
if not is_accelerate_available():
|
|
@@ -62,8 +58,8 @@ class BitNetHfQuantizer(HfQuantizer):
|
|
|
62
58
|
"You have loaded a BitNet model on CPU and have a CUDA device available, make sure to set "
|
|
63
59
|
"your model on a GPU device in order to run your model."
|
|
64
60
|
)
|
|
65
|
-
elif device_map
|
|
66
|
-
if
|
|
61
|
+
elif isinstance(device_map, dict):
|
|
62
|
+
if len(device_map) > 1 and "cpu" in device_map.values() or "disk" in device_map.values():
|
|
67
63
|
raise ValueError(
|
|
68
64
|
"You are attempting to load a BitNet model with a device_map that contains a CPU or disk device."
|
|
69
65
|
"This is not supported. Please remove the CPU or disk device from the device_map."
|
|
@@ -72,31 +68,25 @@ class BitNetHfQuantizer(HfQuantizer):
|
|
|
72
68
|
def _process_model_before_weight_loading(
|
|
73
69
|
self,
|
|
74
70
|
model: "PreTrainedModel",
|
|
75
|
-
keep_in_fp32_modules: list[str] | None = None,
|
|
76
71
|
**kwargs,
|
|
77
72
|
):
|
|
78
73
|
from ..integrations import replace_with_bitnet_linear
|
|
79
74
|
|
|
80
75
|
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
81
|
-
model, self.quantization_config.modules_to_not_convert,
|
|
76
|
+
model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules
|
|
82
77
|
)
|
|
83
78
|
|
|
84
79
|
model = replace_with_bitnet_linear(
|
|
85
80
|
model,
|
|
86
81
|
modules_to_not_convert=self.modules_to_not_convert,
|
|
87
82
|
quantization_config=self.quantization_config,
|
|
88
|
-
pre_quantized=self.pre_quantized,
|
|
89
83
|
)
|
|
90
84
|
|
|
91
85
|
def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]:
|
|
92
86
|
max_memory = {key: val * 0.90 for key, val in max_memory.items()}
|
|
93
87
|
return max_memory
|
|
94
88
|
|
|
95
|
-
def
|
|
96
|
-
target_dtype = torch.int8
|
|
97
|
-
return target_dtype
|
|
98
|
-
|
|
99
|
-
def is_serializable(self, safe_serialization=None):
|
|
89
|
+
def is_serializable(self):
|
|
100
90
|
return True
|
|
101
91
|
|
|
102
92
|
@property
|