transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -43,21 +43,17 @@ class Mxfp4HfQuantizer(HfQuantizer):
|
|
|
43
43
|
FP4 quantization using fbgemm kernels
|
|
44
44
|
"""
|
|
45
45
|
|
|
46
|
-
requires_parameters_quantization = True
|
|
47
46
|
requires_calibration = False
|
|
48
47
|
|
|
49
|
-
required_packages = ["accelerate"]
|
|
50
|
-
|
|
51
48
|
def __init__(self, quantization_config, **kwargs):
|
|
52
49
|
super().__init__(quantization_config, **kwargs)
|
|
53
|
-
self.quantization_config = quantization_config
|
|
54
50
|
self.triton_kernels_hub = None
|
|
55
51
|
|
|
56
52
|
def _lazy_import_kernels(self):
|
|
57
53
|
"""Lazy import and initialize kernels only when needed"""
|
|
58
54
|
if self.triton_kernels_hub is None:
|
|
59
55
|
try:
|
|
60
|
-
from
|
|
56
|
+
from ..integrations.hub_kernels import get_kernel
|
|
61
57
|
|
|
62
58
|
self.triton_kernels_hub = get_kernel("kernels-community/triton_kernels")
|
|
63
59
|
except ImportError:
|
|
@@ -74,7 +70,7 @@ class Mxfp4HfQuantizer(HfQuantizer):
|
|
|
74
70
|
if self.quantization_config.dequantize:
|
|
75
71
|
return
|
|
76
72
|
|
|
77
|
-
if not
|
|
73
|
+
if not torch.cuda.is_available() and not torch.xpu.is_available():
|
|
78
74
|
if self.pre_quantized:
|
|
79
75
|
logger.warning_once(
|
|
80
76
|
"Using MXFP4 quantized models requires a GPU, we will default to dequantizing the model to bf16"
|
|
@@ -131,185 +127,39 @@ class Mxfp4HfQuantizer(HfQuantizer):
|
|
|
131
127
|
"You have loaded an FP4 model on CPU and have a CUDA/XPU device available, make sure to set "
|
|
132
128
|
"your model on a GPU/XPU device in order to run your model. To remove this warning, pass device_map = 'cuda' or device_map = 'xpu'. "
|
|
133
129
|
)
|
|
134
|
-
elif device_map
|
|
135
|
-
if (
|
|
136
|
-
not self.pre_quantized
|
|
137
|
-
and isinstance(device_map, dict)
|
|
138
|
-
and ("cpu" in device_map.values() or "disk" in device_map.values())
|
|
139
|
-
):
|
|
130
|
+
elif isinstance(device_map, dict):
|
|
131
|
+
if not self.pre_quantized and ("cpu" in device_map.values() or "disk" in device_map.values()):
|
|
140
132
|
raise ValueError(
|
|
141
133
|
"You are attempting to load an FP4 model with a device_map that contains a CPU or disk device."
|
|
142
134
|
"This is not supported when the model is quantized on the fly. "
|
|
143
135
|
"Please use a quantized checkpoint or remove the CPU or disk device from the device_map."
|
|
144
136
|
)
|
|
145
137
|
|
|
146
|
-
def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
147
|
-
if dtype is None:
|
|
148
|
-
dtype = torch.bfloat16
|
|
149
|
-
logger.info(
|
|
150
|
-
"Overriding dtype=%s with `dtype=torch.bfloat16` due to "
|
|
151
|
-
"requirements of `fbgemm-gpu` to enable model loading in fp4. "
|
|
152
|
-
"Pass your own dtype to specify the dtype of the remaining non-linear layers or pass"
|
|
153
|
-
" dtype=torch.bfloat16 to remove this warning.",
|
|
154
|
-
dtype,
|
|
155
|
-
)
|
|
156
|
-
return dtype
|
|
157
|
-
|
|
158
138
|
def param_needs_quantization(self, model: "PreTrainedModel", param_name: str, **kwargs) -> bool:
|
|
159
139
|
from ..integrations import Mxfp4GptOssExperts
|
|
160
|
-
from ..models.gpt_oss.modeling_gpt_oss import GptOssExperts
|
|
161
140
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
# if we are dequantizing, the model doesn't have scales, and blocks only params like gate_up_proj and down_proj so we need to handle this case differently
|
|
165
|
-
if self.quantization_config.dequantize and ("blocks" in param_name or "scales" in param_name):
|
|
166
|
-
module, tensor_name = get_module_from_name(model, param_name[: -len("_blocks")])
|
|
167
|
-
else:
|
|
168
|
-
module, tensor_name = get_module_from_name(model, param_name)
|
|
169
|
-
if isinstance(module, Mxfp4GptOssExperts) or (
|
|
170
|
-
isinstance(module, GptOssExperts) and self.quantization_config.dequantize
|
|
171
|
-
):
|
|
141
|
+
module, tensor_name = get_module_from_name(model, param_name)
|
|
142
|
+
if isinstance(module, Mxfp4GptOssExperts):
|
|
172
143
|
if tensor_name in ["down_proj_bias", "gate_up_proj_bias"]:
|
|
173
144
|
return False
|
|
174
145
|
return True
|
|
175
146
|
return False
|
|
176
147
|
|
|
177
|
-
def create_quantized_param(
|
|
178
|
-
self,
|
|
179
|
-
model: "PreTrainedModel",
|
|
180
|
-
param_value: "torch.Tensor",
|
|
181
|
-
param_name: str,
|
|
182
|
-
target_device: "torch.device",
|
|
183
|
-
**kwargs,
|
|
184
|
-
):
|
|
185
|
-
from ..integrations import (
|
|
186
|
-
Mxfp4GptOssExperts,
|
|
187
|
-
dequantize,
|
|
188
|
-
load_and_swizzle_mxfp4,
|
|
189
|
-
quantize_to_mxfp4,
|
|
190
|
-
swizzle_mxfp4,
|
|
191
|
-
)
|
|
192
|
-
from ..models.gpt_oss.modeling_gpt_oss import GptOssExperts
|
|
193
|
-
|
|
194
|
-
if not self.pre_quantized:
|
|
195
|
-
triton_kernels_hub = self._lazy_import_kernels()
|
|
196
|
-
module, _ = get_module_from_name(model, param_name)
|
|
197
|
-
with torch.device(target_device):
|
|
198
|
-
if isinstance(module, Mxfp4GptOssExperts):
|
|
199
|
-
triton_weight_tensor, weight_scale = quantize_to_mxfp4(param_value, triton_kernels_hub)
|
|
200
|
-
PrecisionConfig, FlexCtx, InFlexData = (
|
|
201
|
-
triton_kernels_hub.matmul_ogs.PrecisionConfig,
|
|
202
|
-
triton_kernels_hub.matmul_ogs.FlexCtx,
|
|
203
|
-
triton_kernels_hub.matmul_ogs.InFlexData,
|
|
204
|
-
)
|
|
205
|
-
triton_weight_tensor, weight_scale = swizzle_mxfp4(
|
|
206
|
-
triton_weight_tensor, weight_scale, triton_kernels_hub
|
|
207
|
-
)
|
|
208
|
-
|
|
209
|
-
proj = "gate_up_proj" if "gate_up_proj" in param_name else "down_proj"
|
|
210
|
-
setattr(module, proj, triton_weight_tensor)
|
|
211
|
-
setattr(
|
|
212
|
-
module,
|
|
213
|
-
f"{proj}_precision_config",
|
|
214
|
-
PrecisionConfig(weight_scale=weight_scale, flex_ctx=FlexCtx(rhs_data=InFlexData())),
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
delattr(module, f"{proj}_blocks")
|
|
218
|
-
delattr(module, f"{proj}_scales")
|
|
219
|
-
|
|
220
|
-
# The params going here are either gate_up_proj_blocks, or down_proj_blocks, or gate_up_proj_scales, or down_proj_scales
|
|
221
|
-
else:
|
|
222
|
-
# This is when loading a quantized model (blocks and scales exist)
|
|
223
|
-
empty_param = kwargs.get("empty_param")
|
|
224
|
-
casting_dtype = kwargs.get("casting_dtype")
|
|
225
|
-
to_contiguous = kwargs.get("to_contiguous")
|
|
226
|
-
rank = kwargs.get("rank")
|
|
227
|
-
device_mesh = kwargs.get("device_mesh")
|
|
228
|
-
if ("blocks" in param_name or "scales" in param_name) and self.quantization_config.dequantize:
|
|
229
|
-
# blocks and scales have the same length that's why this works for both
|
|
230
|
-
module, _ = get_module_from_name(model, param_name[: -len("_blocks")])
|
|
231
|
-
else:
|
|
232
|
-
module, _ = get_module_from_name(model, param_name)
|
|
233
|
-
|
|
234
|
-
shard_kwargs = {
|
|
235
|
-
"empty_param": empty_param,
|
|
236
|
-
"casting_dtype": casting_dtype,
|
|
237
|
-
"to_contiguous": to_contiguous,
|
|
238
|
-
"rank": rank,
|
|
239
|
-
"device_mesh": device_mesh,
|
|
240
|
-
"model": model,
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
if isinstance(module, Mxfp4GptOssExperts) or (
|
|
244
|
-
isinstance(module, GptOssExperts) and self.quantization_config.dequantize
|
|
245
|
-
):
|
|
246
|
-
if self.quantization_config.dequantize:
|
|
247
|
-
# dq_param_name is the name of the parameter without the blocks or scales suffix, it's used in this case since we don't switch linears
|
|
248
|
-
# so we only have the original param name
|
|
249
|
-
dq_param_name = param_name[: -len("_blocks")]
|
|
250
|
-
dequantize(module, param_name, param_value, target_device, dq_param_name, **shard_kwargs)
|
|
251
|
-
else:
|
|
252
|
-
load_and_swizzle_mxfp4(
|
|
253
|
-
module,
|
|
254
|
-
param_name,
|
|
255
|
-
param_value,
|
|
256
|
-
target_device,
|
|
257
|
-
self._lazy_import_kernels(),
|
|
258
|
-
**shard_kwargs,
|
|
259
|
-
)
|
|
260
|
-
|
|
261
148
|
def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs):
|
|
262
|
-
# we are not really dequantizing, we are just removing everything related to quantization here
|
|
263
|
-
if self.quantization_config.dequantize:
|
|
264
|
-
self.remove_quantization_config(model)
|
|
265
149
|
# clean cache due to triton ops
|
|
266
150
|
if torch.cuda.is_available():
|
|
267
151
|
torch.cuda.empty_cache()
|
|
268
152
|
elif torch.xpu.is_available():
|
|
269
153
|
torch.xpu.empty_cache()
|
|
270
154
|
|
|
271
|
-
def update_expected_keys(self, model: "PreTrainedModel", expected_keys: list[str], checkpoint_keys: list[str]):
|
|
272
|
-
# Replace expected_keys for experts' gate_up_proj and down_proj with their _blocks and _scales variants
|
|
273
|
-
new_expected_keys = []
|
|
274
|
-
for key in expected_keys:
|
|
275
|
-
if key.endswith(".mlp.experts.gate_up_proj"):
|
|
276
|
-
base = key[: -len("gate_up_proj")]
|
|
277
|
-
new_expected_keys.append(base + "gate_up_proj_blocks")
|
|
278
|
-
new_expected_keys.append(base + "gate_up_proj_scales")
|
|
279
|
-
elif key.endswith(".mlp.experts.down_proj"):
|
|
280
|
-
base = key[: -len("down_proj")]
|
|
281
|
-
new_expected_keys.append(base + "down_proj_blocks")
|
|
282
|
-
new_expected_keys.append(base + "down_proj_scales")
|
|
283
|
-
elif not self.pre_quantized:
|
|
284
|
-
# in this case, we are quantizing the model so we need to update the keys as we changed the layers
|
|
285
|
-
if key.endswith(".mlp.experts.down_proj_blocks"):
|
|
286
|
-
base = key[: -len("down_proj_blocks")]
|
|
287
|
-
new_expected_keys.append(base + "down_proj")
|
|
288
|
-
elif key.endswith(".mlp.experts.gate_up_proj_blocks"):
|
|
289
|
-
base = key[: -len("gate_up_proj_blocks")]
|
|
290
|
-
new_expected_keys.append(base + "gate_up_proj")
|
|
291
|
-
elif key.endswith("scales"):
|
|
292
|
-
# we remove it the scales as the checkpoint don't contain them
|
|
293
|
-
continue
|
|
294
|
-
else:
|
|
295
|
-
new_expected_keys.append(key)
|
|
296
|
-
else:
|
|
297
|
-
new_expected_keys.append(key)
|
|
298
|
-
return new_expected_keys
|
|
299
|
-
|
|
300
155
|
def _process_model_before_weight_loading(
|
|
301
156
|
self,
|
|
302
157
|
model: "PreTrainedModel",
|
|
303
|
-
|
|
158
|
+
use_kernels: bool = False,
|
|
304
159
|
**kwargs,
|
|
305
160
|
):
|
|
306
161
|
from ..integrations import replace_with_mxfp4_linear
|
|
307
162
|
|
|
308
|
-
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
309
|
-
model, self.quantization_config.modules_to_not_convert, keep_in_fp32_modules
|
|
310
|
-
)
|
|
311
|
-
|
|
312
|
-
use_kernels = kwargs.get("use_kernels", False)
|
|
313
163
|
# if we are using kernels, we can't use the quantized model, since the forward pass is different and needs special handling
|
|
314
164
|
if use_kernels:
|
|
315
165
|
logger.warning_once(
|
|
@@ -318,30 +168,13 @@ class Mxfp4HfQuantizer(HfQuantizer):
|
|
|
318
168
|
)
|
|
319
169
|
self.quantization_config.dequantize = True
|
|
320
170
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
model,
|
|
324
|
-
modules_to_not_convert=self.modules_to_not_convert,
|
|
325
|
-
quantization_config=self.quantization_config,
|
|
326
|
-
config=config,
|
|
171
|
+
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
172
|
+
model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules
|
|
327
173
|
)
|
|
328
174
|
|
|
329
|
-
model
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
from ..integrations import Mxfp4GptOssExperts
|
|
333
|
-
|
|
334
|
-
not_missing_keys = []
|
|
335
|
-
for name, module in model.named_modules():
|
|
336
|
-
if isinstance(module, Mxfp4GptOssExperts):
|
|
337
|
-
for missing in missing_keys:
|
|
338
|
-
if (
|
|
339
|
-
(name in missing or name in f"{prefix}.{missing}")
|
|
340
|
-
and not missing.endswith(".weight")
|
|
341
|
-
and not missing.endswith(".bias")
|
|
342
|
-
):
|
|
343
|
-
not_missing_keys.append(missing)
|
|
344
|
-
return [k for k in missing_keys if k not in not_missing_keys]
|
|
175
|
+
model = replace_with_mxfp4_linear(
|
|
176
|
+
model, modules_to_not_convert=self.modules_to_not_convert, quantization_config=self.quantization_config
|
|
177
|
+
)
|
|
345
178
|
|
|
346
179
|
def update_tp_plan(self, config):
|
|
347
180
|
if "GptOssConfig" in config.__class__.__name__:
|
|
@@ -369,20 +202,7 @@ class Mxfp4HfQuantizer(HfQuantizer):
|
|
|
369
202
|
)
|
|
370
203
|
return config
|
|
371
204
|
|
|
372
|
-
def
|
|
373
|
-
if self.quantization_config.dequantize:
|
|
374
|
-
if "_blocks" in param_name:
|
|
375
|
-
return param_name.replace("_blocks", "")
|
|
376
|
-
elif "_scales" in param_name:
|
|
377
|
-
return param_name.replace("_scales", "")
|
|
378
|
-
elif not self.pre_quantized:
|
|
379
|
-
if param_name.endswith("gate_up_proj"):
|
|
380
|
-
return param_name.replace("gate_up_proj", "gate_up_proj_blocks")
|
|
381
|
-
if param_name.endswith("down_proj"):
|
|
382
|
-
return param_name.replace("down_proj", "down_proj_blocks")
|
|
383
|
-
return param_name
|
|
384
|
-
|
|
385
|
-
def get_state_dict_and_metadata(self, model, safe_serialization: bool = False):
|
|
205
|
+
def get_state_dict_and_metadata(self, model):
|
|
386
206
|
from ..integrations import Mxfp4GptOssExperts
|
|
387
207
|
|
|
388
208
|
state_dict = model.state_dict()
|
|
@@ -421,7 +241,7 @@ class Mxfp4HfQuantizer(HfQuantizer):
|
|
|
421
241
|
metadata = {}
|
|
422
242
|
return state_dict, metadata
|
|
423
243
|
|
|
424
|
-
def is_serializable(self
|
|
244
|
+
def is_serializable(self):
|
|
425
245
|
return True
|
|
426
246
|
|
|
427
247
|
@property
|
|
@@ -40,23 +40,17 @@ class QuantoHfQuantizer(HfQuantizer):
|
|
|
40
40
|
Quantizer for the quanto library
|
|
41
41
|
"""
|
|
42
42
|
|
|
43
|
-
required_packages = ["quanto", "accelerate"]
|
|
44
|
-
requires_parameters_quantization = True
|
|
45
43
|
requires_calibration = False
|
|
46
44
|
|
|
47
45
|
def __init__(self, quantization_config: QuantoConfig, **kwargs):
|
|
48
46
|
super().__init__(quantization_config, **kwargs)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
raise ValueError(
|
|
57
|
-
"We don't support quantizing the activations with transformers library."
|
|
58
|
-
"Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training."
|
|
59
|
-
)
|
|
47
|
+
map_to_param_size = {
|
|
48
|
+
"int8": 1,
|
|
49
|
+
"float8": 1,
|
|
50
|
+
"int4": 0.5,
|
|
51
|
+
"int2": 0.25,
|
|
52
|
+
}
|
|
53
|
+
self.quantized_param_size = map_to_param_size.get(self.quantization_config.weights, None)
|
|
60
54
|
|
|
61
55
|
def validate_environment(self, *args, **kwargs):
|
|
62
56
|
if not is_optimum_quanto_available():
|
|
@@ -67,42 +61,22 @@ class QuantoHfQuantizer(HfQuantizer):
|
|
|
67
61
|
raise ImportError(
|
|
68
62
|
"Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`)"
|
|
69
63
|
)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
64
|
+
device_map = kwargs.get("device_map")
|
|
65
|
+
if isinstance(device_map, dict):
|
|
66
|
+
if len(device_map) > 1 and "cpu" in device_map.values() or "disk" in device_map.values():
|
|
67
|
+
raise ValueError(
|
|
68
|
+
"You are attempting to load an model with a device_map that contains a CPU or disk device."
|
|
69
|
+
"This is not supported with quanto when the model is quantized on the fly. "
|
|
70
|
+
"Please remove the CPU or disk device from the device_map."
|
|
71
|
+
)
|
|
72
|
+
if self.quantization_config.activations is not None:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
"We don't support quantizing the activations with transformers library."
|
|
75
|
+
"Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training."
|
|
78
76
|
)
|
|
79
|
-
return device_map
|
|
80
|
-
|
|
81
|
-
def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
82
|
-
if dtype is None:
|
|
83
|
-
logger.info("You did not specify `dtype` in `from_pretrained`. Setting it to `torch.float32`.")
|
|
84
|
-
dtype = torch.float32
|
|
85
|
-
return dtype
|
|
86
|
-
|
|
87
|
-
def update_missing_keys(self, model, missing_keys: list[str], prefix: str) -> list[str]:
|
|
88
|
-
if is_optimum_quanto_available():
|
|
89
|
-
from optimum.quanto import QModuleMixin
|
|
90
|
-
|
|
91
|
-
not_missing_keys = []
|
|
92
|
-
for name, module in model.named_modules():
|
|
93
|
-
if isinstance(module, QModuleMixin):
|
|
94
|
-
for missing in missing_keys:
|
|
95
|
-
if (
|
|
96
|
-
(name in missing or name in f"{prefix}.{missing}")
|
|
97
|
-
and not missing.endswith(".weight")
|
|
98
|
-
and not missing.endswith(".bias")
|
|
99
|
-
):
|
|
100
|
-
not_missing_keys.append(missing)
|
|
101
|
-
return [k for k in missing_keys if k not in not_missing_keys]
|
|
102
77
|
|
|
103
78
|
def param_needs_quantization(self, model: "PreTrainedModel", param_name: str, **kwargs) -> bool:
|
|
104
|
-
|
|
105
|
-
from optimum.quanto import QModuleMixin
|
|
79
|
+
from optimum.quanto import QModuleMixin
|
|
106
80
|
|
|
107
81
|
module, tensor_name = get_module_from_name(model, param_name)
|
|
108
82
|
# We only quantize the weights and the bias is not quantized.
|
|
@@ -116,50 +90,32 @@ class QuantoHfQuantizer(HfQuantizer):
|
|
|
116
90
|
max_memory = {key: val * 0.90 for key, val in max_memory.items()}
|
|
117
91
|
return max_memory
|
|
118
92
|
|
|
119
|
-
def
|
|
120
|
-
|
|
121
|
-
model:
|
|
122
|
-
|
|
123
|
-
param_name: str,
|
|
124
|
-
target_device: "torch.device",
|
|
125
|
-
**kwargs,
|
|
126
|
-
):
|
|
127
|
-
from ..modeling_utils import _load_parameter_into_model
|
|
128
|
-
|
|
129
|
-
_load_parameter_into_model(model, param_name, param_value.to(target_device))
|
|
130
|
-
module, _ = get_module_from_name(model, param_name)
|
|
131
|
-
module.freeze()
|
|
132
|
-
module.weight.requires_grad = False
|
|
133
|
-
|
|
134
|
-
def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype":
|
|
135
|
-
from accelerate.utils import CustomDtype
|
|
136
|
-
|
|
137
|
-
mapping = {
|
|
138
|
-
"int8": torch.int8,
|
|
139
|
-
"float8": CustomDtype.FP8,
|
|
140
|
-
"int4": CustomDtype.INT4,
|
|
141
|
-
"int2": CustomDtype.INT2,
|
|
142
|
-
}
|
|
143
|
-
target_dtype = mapping[self.quantization_config.weights]
|
|
144
|
-
return target_dtype
|
|
93
|
+
def param_element_size(self, model: "PreTrainedModel", param_name: str, param: "torch.Tensor") -> float:
|
|
94
|
+
"Return the element size (in bytes) for `param_name`."
|
|
95
|
+
if self.param_needs_quantization(model, param_name) and self.quantized_param_size is not None:
|
|
96
|
+
return self.quantized_param_size
|
|
145
97
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
):
|
|
98
|
+
return super().param_element_size(model, param_name, param)
|
|
99
|
+
|
|
100
|
+
def _process_model_before_weight_loading(self, model: "PreTrainedModel", **kwargs):
|
|
149
101
|
from ..integrations import replace_with_quanto_layers
|
|
150
102
|
|
|
151
103
|
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
152
|
-
model, self.quantization_config.modules_to_not_convert,
|
|
104
|
+
model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules
|
|
153
105
|
)
|
|
154
106
|
|
|
155
|
-
model
|
|
107
|
+
model = replace_with_quanto_layers(
|
|
156
108
|
model, modules_to_not_convert=self.modules_to_not_convert, quantization_config=self.quantization_config
|
|
157
109
|
)
|
|
158
|
-
model.config.quantization_config = self.quantization_config
|
|
159
110
|
|
|
160
111
|
@property
|
|
161
112
|
def is_trainable(self) -> bool:
|
|
162
113
|
return True
|
|
163
114
|
|
|
164
|
-
def is_serializable(self
|
|
115
|
+
def is_serializable(self):
|
|
165
116
|
return False
|
|
117
|
+
|
|
118
|
+
def get_quantize_ops(self):
|
|
119
|
+
from ..integrations.quanto import QuantoQuantize
|
|
120
|
+
|
|
121
|
+
return QuantoQuantize(self)
|
|
@@ -45,12 +45,6 @@ class QuarkHfQuantizer(HfQuantizer):
|
|
|
45
45
|
"""
|
|
46
46
|
|
|
47
47
|
requires_calibration = True # On-the-fly quantization with quark is not supported for now.
|
|
48
|
-
required_packages = ["quark"]
|
|
49
|
-
|
|
50
|
-
# Checkpoints are expected to be already quantized when loading a quark model. However, as some keys from
|
|
51
|
-
# the checkpoint might mismatch the model parameters keys, we use the `create_quantized_param` method
|
|
52
|
-
# to load the checkpoints, remapping the keys.
|
|
53
|
-
requires_parameters_quantization = True
|
|
54
48
|
|
|
55
49
|
def __init__(self, quantization_config, **kwargs):
|
|
56
50
|
super().__init__(quantization_config, **kwargs)
|
|
@@ -78,19 +72,44 @@ class QuarkHfQuantizer(HfQuantizer):
|
|
|
78
72
|
def param_needs_quantization(self, model: "PreTrainedModel", param_name: str, **kwargs) -> bool:
|
|
79
73
|
return True
|
|
80
74
|
|
|
81
|
-
def
|
|
82
|
-
from ..modeling_utils import _load_parameter_into_model
|
|
83
|
-
|
|
84
|
-
postfix = param_name.split(".")[-1]
|
|
85
|
-
|
|
86
|
-
if postfix in CHECKPOINT_KEYS:
|
|
87
|
-
param_name = param_name.replace(postfix, CHECKPOINT_KEYS[postfix])
|
|
88
|
-
|
|
89
|
-
_load_parameter_into_model(model, param_name, param.to(param_device))
|
|
90
|
-
|
|
91
|
-
def is_serializable(self, safe_serialization=None):
|
|
75
|
+
def is_serializable(self):
|
|
92
76
|
return False
|
|
93
77
|
|
|
94
78
|
@property
|
|
95
79
|
def is_trainable(self):
|
|
96
80
|
return False
|
|
81
|
+
|
|
82
|
+
def get_weight_conversions(self):
|
|
83
|
+
from ..core_model_loading import WeightConverter
|
|
84
|
+
from ..integrations.quark import QuarkDeserialize
|
|
85
|
+
# In Quark, quantization is managed through a QParamsLinear module, which holds
|
|
86
|
+
# separate quantizers for the weights, inputs, and biases (e.g. weight_quantizer
|
|
87
|
+
# input_quantizer, bias_quantizer, etc.).
|
|
88
|
+
#
|
|
89
|
+
# When you call `module.state_dict()`, Quark automatically renames the quantizer
|
|
90
|
+
# parameters — for example, `input_quantizer.scale` becomes `input_scale` — and
|
|
91
|
+
# saves them directly at the parent module level.
|
|
92
|
+
#
|
|
93
|
+
# This means we cannot simply rename keys like `weight_scale` back to
|
|
94
|
+
# `weight_quantizer.scale` when loading the state_dict.
|
|
95
|
+
# Otherwise, the `missing_keys` list would still expect keys such as
|
|
96
|
+
# `weight_scale`, `bias_scale`, etc.
|
|
97
|
+
#
|
|
98
|
+
# To fix this, we keep the expected state_dict keys (like `weight_scale`,
|
|
99
|
+
# `bias_scale`, etc.) unchanged, and during the conversion step, we explicitly
|
|
100
|
+
# assign their values into the corresponding quantizer attributes
|
|
101
|
+
# (`weight_quantizer.scale`, `input_quantizer.scale`, and so on).
|
|
102
|
+
|
|
103
|
+
# You can notice here that in target_patterns we use the same key as the source_patterns,
|
|
104
|
+
# this is because we just want to collect the tensors, and we will rename them later in the convert function.
|
|
105
|
+
# We cannot rename directly or else the missing_keys list will not be able to find the tensors.
|
|
106
|
+
converters = []
|
|
107
|
+
for key in CHECKPOINT_KEYS.keys():
|
|
108
|
+
converters.append(
|
|
109
|
+
WeightConverter(
|
|
110
|
+
source_patterns=[key],
|
|
111
|
+
target_patterns=key,
|
|
112
|
+
operations=[QuarkDeserialize(self)],
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
return converters
|
|
@@ -39,7 +39,6 @@ class SpQRHfQuantizer(HfQuantizer):
|
|
|
39
39
|
|
|
40
40
|
def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
|
|
41
41
|
super().__init__(quantization_config, **kwargs)
|
|
42
|
-
self.quantization_config = quantization_config
|
|
43
42
|
|
|
44
43
|
def validate_environment(self, *args, **kwargs):
|
|
45
44
|
if not torch.cuda.is_available():
|
|
@@ -52,36 +51,29 @@ class SpQRHfQuantizer(HfQuantizer):
|
|
|
52
51
|
raise ImportError("Using `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`")
|
|
53
52
|
|
|
54
53
|
def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
55
|
-
if dtype
|
|
56
|
-
dtype = torch.float16
|
|
57
|
-
logger.info("Assuming SpQR inference on GPU and loading the model in `torch.float16`.")
|
|
58
|
-
elif dtype != torch.float16:
|
|
54
|
+
if dtype != torch.float16:
|
|
59
55
|
raise ValueError(
|
|
60
|
-
"You cannot use any type other than torch.float16 for SpQR. Please
|
|
61
|
-
"torch.float16 explicitly."
|
|
56
|
+
"You cannot use any type other than torch.float16 for SpQR. Please set it totorch.float16 explicitly."
|
|
62
57
|
)
|
|
63
58
|
return dtype
|
|
64
59
|
|
|
65
60
|
def _process_model_before_weight_loading(
|
|
66
61
|
self,
|
|
67
62
|
model: "PreTrainedModel",
|
|
68
|
-
keep_in_fp32_modules: list[str] | None = None,
|
|
69
63
|
**kwargs,
|
|
70
64
|
):
|
|
71
65
|
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
72
|
-
model, self.quantization_config.modules_to_not_convert,
|
|
66
|
+
model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules
|
|
73
67
|
)
|
|
74
|
-
|
|
75
68
|
replace_with_spqr_linear(
|
|
76
69
|
model,
|
|
77
70
|
quantization_config=self.quantization_config,
|
|
78
71
|
modules_to_not_convert=self.modules_to_not_convert,
|
|
79
72
|
)
|
|
80
|
-
model.config.quantization_config = self.quantization_config
|
|
81
73
|
|
|
82
74
|
@property
|
|
83
75
|
def is_trainable(self):
|
|
84
76
|
return False
|
|
85
77
|
|
|
86
|
-
def is_serializable(self
|
|
78
|
+
def is_serializable(self):
|
|
87
79
|
return True
|