transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
from ..
|
|
1
|
+
from ..quantizers.quantizers_utils import should_convert_module
|
|
2
|
+
from ..utils import is_torch_available, logging
|
|
2
3
|
|
|
3
4
|
|
|
4
|
-
if is_accelerate_available():
|
|
5
|
-
from accelerate import init_empty_weights
|
|
6
|
-
|
|
7
5
|
if is_torch_available():
|
|
8
6
|
import torch
|
|
9
7
|
import torch.nn as nn
|
|
@@ -91,7 +89,7 @@ def unpack_weights(packed: torch.Tensor, dtype: torch.dtype) -> torch.Tensor:
|
|
|
91
89
|
|
|
92
90
|
Explanation of the example:
|
|
93
91
|
---------------------------
|
|
94
|
-
Let's take the first value for example 0b10100001, we
|
|
92
|
+
Let's take the first value for example 0b10100001, we will only focus on the first column,
|
|
95
93
|
because every element is unpacked across the first dimension
|
|
96
94
|
- First 2 bits: `01` → 0 at [0][0]
|
|
97
95
|
- Second 2 bits: `00` → -1 at [0][2]
|
|
@@ -172,7 +170,7 @@ class BitLinear(nn.Module):
|
|
|
172
170
|
Activation function : Performs symmetric, per-token quantization on the input activations.
|
|
173
171
|
Parameters:
|
|
174
172
|
-----------
|
|
175
|
-
|
|
173
|
+
input : torch.Tensor
|
|
176
174
|
Input activations to be quantized.
|
|
177
175
|
num_bits : int, optional (default=8)
|
|
178
176
|
Number of bits to use for quantization, determining the quantization range.
|
|
@@ -314,109 +312,53 @@ class AutoBitLinear(nn.Linear):
|
|
|
314
312
|
return output
|
|
315
313
|
|
|
316
314
|
|
|
317
|
-
def
|
|
318
|
-
model,
|
|
319
|
-
modules_to_not_convert=None,
|
|
320
|
-
current_key_name=None,
|
|
321
|
-
quantization_config=None,
|
|
322
|
-
has_been_replaced=False,
|
|
323
|
-
pre_quantized=False,
|
|
324
|
-
):
|
|
325
|
-
"""
|
|
326
|
-
Private method that wraps the recursion for module replacement.
|
|
327
|
-
|
|
328
|
-
Returns the converted model and a boolean that indicates if the conversion has been successful or not.
|
|
315
|
+
def replace_with_bitnet_linear(model, modules_to_not_convert: list[str] | None = None, quantization_config=None):
|
|
329
316
|
"""
|
|
317
|
+
Public method that replaces the linear layers of the given model with bitnet quantized layers.
|
|
330
318
|
|
|
331
|
-
|
|
332
|
-
current_key_name = []
|
|
333
|
-
|
|
334
|
-
for name, module in model.named_children():
|
|
335
|
-
if current_key_name is None:
|
|
336
|
-
current_key_name = []
|
|
337
|
-
current_key_name.append(name)
|
|
338
|
-
|
|
339
|
-
# Check if the current key is not in the `modules_to_not_convert`
|
|
340
|
-
if not any(key in ".".join(current_key_name) for key in modules_to_not_convert):
|
|
341
|
-
with init_empty_weights():
|
|
342
|
-
if isinstance(module, nn.Linear) and name not in modules_to_not_convert:
|
|
343
|
-
in_features = module.in_features
|
|
344
|
-
out_features = module.out_features
|
|
345
|
-
if quantization_config and quantization_config.linear_class == "autobitlinear":
|
|
346
|
-
model._modules[name] = AutoBitLinear(
|
|
347
|
-
in_features=in_features,
|
|
348
|
-
out_features=out_features,
|
|
349
|
-
bias=module.bias is not None,
|
|
350
|
-
device=module.weight.device,
|
|
351
|
-
dtype=module.weight.dtype,
|
|
352
|
-
online_quant=(quantization_config.quantization_mode == "online"),
|
|
353
|
-
use_rms_norm=quantization_config.use_rms_norm,
|
|
354
|
-
rms_norm_eps=quantization_config.rms_norm_eps,
|
|
355
|
-
)
|
|
356
|
-
if quantization_config.quantization_mode == "offline":
|
|
357
|
-
model._modules[name].requires_grad_(False)
|
|
358
|
-
else:
|
|
359
|
-
model._modules[name] = BitLinear(
|
|
360
|
-
in_features=in_features,
|
|
361
|
-
out_features=out_features,
|
|
362
|
-
bias=module.bias is not None,
|
|
363
|
-
device=module.weight.device,
|
|
364
|
-
dtype=module.weight.dtype,
|
|
365
|
-
use_rms_norm=quantization_config.use_rms_norm if quantization_config else False,
|
|
366
|
-
rms_norm_eps=quantization_config.rms_norm_eps if quantization_config else 1e-6,
|
|
367
|
-
)
|
|
368
|
-
model._modules[name].requires_grad_(False)
|
|
369
|
-
has_been_replaced = True
|
|
370
|
-
|
|
371
|
-
if len(list(module.children())) > 0:
|
|
372
|
-
_, has_been_replaced = _replace_with_bitnet_linear(
|
|
373
|
-
module,
|
|
374
|
-
modules_to_not_convert=modules_to_not_convert,
|
|
375
|
-
current_key_name=current_key_name,
|
|
376
|
-
quantization_config=quantization_config,
|
|
377
|
-
has_been_replaced=has_been_replaced,
|
|
378
|
-
)
|
|
379
|
-
# Remove the last key for recursion
|
|
380
|
-
current_key_name.pop(-1)
|
|
381
|
-
return model, has_been_replaced
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
def replace_with_bitnet_linear(
|
|
385
|
-
model,
|
|
386
|
-
modules_to_not_convert=None,
|
|
387
|
-
current_key_name=None,
|
|
388
|
-
quantization_config=None,
|
|
389
|
-
pre_quantized=False,
|
|
390
|
-
):
|
|
391
|
-
"""
|
|
392
|
-
A helper function to replace all `torch.nn.Linear` modules by `BitLinear158` modules`.
|
|
393
|
-
|
|
394
|
-
The function will be run recursively and replace all `torch.nn.Linear` modules except for the `lm_head` that should
|
|
395
|
-
be kept as a `torch.nn.Linear` module. The replacement is done under `init_empty_weights` context manager so no
|
|
396
|
-
CPU/GPU memory is required to run this function. Each weight will be quantized along the channel.
|
|
397
|
-
|
|
398
|
-
Parameters:
|
|
319
|
+
Args:
|
|
399
320
|
model (`torch.nn.Module`):
|
|
400
|
-
|
|
401
|
-
modules_to_not_convert (`list[
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
it) is not in the list of modules to not convert (for instances modules that are offloaded to `cpu` or
|
|
407
|
-
`disk`).
|
|
321
|
+
The model to convert, can be any `torch.nn.Module` instance.
|
|
322
|
+
modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
|
|
323
|
+
A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
|
|
324
|
+
converted.
|
|
325
|
+
quantization_config (`BitNetConfig`):
|
|
326
|
+
The quantization config object that contains the quantization parameters.
|
|
408
327
|
"""
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
328
|
+
|
|
329
|
+
has_been_replaced = False
|
|
330
|
+
# we need this to correctly materialize the weights during quantization
|
|
331
|
+
for module_name, module in model.named_modules():
|
|
332
|
+
if not should_convert_module(module_name, modules_to_not_convert):
|
|
333
|
+
continue
|
|
334
|
+
with torch.device("meta"):
|
|
335
|
+
if isinstance(module, nn.Linear):
|
|
336
|
+
if quantization_config and quantization_config.linear_class == "autobitlinear":
|
|
337
|
+
new_module = AutoBitLinear(
|
|
338
|
+
in_features=module.in_features,
|
|
339
|
+
out_features=module.out_features,
|
|
340
|
+
bias=module.bias is not None,
|
|
341
|
+
device=module.weight.device,
|
|
342
|
+
dtype=module.weight.dtype,
|
|
343
|
+
online_quant=(quantization_config.quantization_mode == "online"),
|
|
344
|
+
use_rms_norm=quantization_config.use_rms_norm,
|
|
345
|
+
rms_norm_eps=quantization_config.rms_norm_eps,
|
|
346
|
+
)
|
|
347
|
+
if quantization_config.quantization_mode == "offline":
|
|
348
|
+
new_module.requires_grad_(False)
|
|
349
|
+
else:
|
|
350
|
+
new_module = BitLinear(
|
|
351
|
+
in_features=module.in_features,
|
|
352
|
+
out_features=module.out_features,
|
|
353
|
+
bias=module.bias is not None,
|
|
354
|
+
device=module.weight.device,
|
|
355
|
+
dtype=module.weight.dtype,
|
|
356
|
+
use_rms_norm=quantization_config.use_rms_norm if quantization_config else False,
|
|
357
|
+
rms_norm_eps=quantization_config.rms_norm_eps if quantization_config else 1e-6,
|
|
358
|
+
)
|
|
359
|
+
new_module.requires_grad_(False)
|
|
360
|
+
model.set_submodule(module_name, new_module)
|
|
361
|
+
has_been_replaced = True
|
|
420
362
|
|
|
421
363
|
if not has_been_replaced:
|
|
422
364
|
logger.warning(
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import inspect
|
|
2
|
-
from inspect import signature
|
|
3
2
|
|
|
4
3
|
from ..core_model_loading import ConversionOps
|
|
5
|
-
from ..quantizers.quantizers_utils import get_module_from_name
|
|
4
|
+
from ..quantizers.quantizers_utils import get_module_from_name, should_convert_module
|
|
6
5
|
from ..utils import (
|
|
7
6
|
get_available_devices,
|
|
8
7
|
is_accelerate_available,
|
|
@@ -23,7 +22,6 @@ if is_torch_available():
|
|
|
23
22
|
|
|
24
23
|
if is_accelerate_available():
|
|
25
24
|
import accelerate
|
|
26
|
-
from accelerate import init_empty_weights
|
|
27
25
|
from accelerate.hooks import add_hook_to_module, remove_hook_from_module
|
|
28
26
|
|
|
29
27
|
logger = logging.get_logger(__name__)
|
|
@@ -44,7 +42,7 @@ class Bnb4bitQuantize(ConversionOps):
|
|
|
44
42
|
we need to store some parameters to create the quantized weight. For example, bnb requires 6 values that are stored in the checkpoint to recover the quantized weight. So we store them in a dict that it stored in hf_quantizer for now as we can't save it in the op since we create an op per tensor.
|
|
45
43
|
"""
|
|
46
44
|
value = list(input_dict.values())[0]
|
|
47
|
-
value = value[0]
|
|
45
|
+
value = value[0]
|
|
48
46
|
|
|
49
47
|
# update param name to get the weights instead of the quantized stats
|
|
50
48
|
module, _ = get_module_from_name(model, full_layer_name)
|
|
@@ -156,134 +154,77 @@ class Bnb8bitDeserialize(ConversionOps):
|
|
|
156
154
|
return {key_weight: new_value}
|
|
157
155
|
|
|
158
156
|
|
|
159
|
-
def
|
|
160
|
-
model,
|
|
161
|
-
modules_to_not_convert=None,
|
|
162
|
-
current_key_name=None,
|
|
157
|
+
def replace_with_bnb_linear(
|
|
158
|
+
model: torch.nn.Module,
|
|
159
|
+
modules_to_not_convert: list[str] | None = None,
|
|
163
160
|
quantization_config=None,
|
|
164
|
-
has_been_replaced=False,
|
|
165
161
|
pre_quantized=False,
|
|
166
162
|
):
|
|
167
163
|
"""
|
|
168
|
-
|
|
164
|
+
A helper function to replace all `torch.nn.Linear` modules by bnb modules from the `bitsandbytes` library.
|
|
169
165
|
|
|
170
|
-
|
|
166
|
+
Args:
|
|
167
|
+
model (`torch.nn.Module`):
|
|
168
|
+
The model to convert, can be any `torch.nn.Module` instance.
|
|
169
|
+
modules_to_not_convert (`list[str]`, defaults to `None`):
|
|
170
|
+
A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
|
|
171
|
+
converted.
|
|
172
|
+
quantization_config (`BitsAndBytesConfig`):
|
|
173
|
+
The quantization config object that contains the quantization parameters.
|
|
174
|
+
pre_quantized (`book`, defaults to `False`):
|
|
175
|
+
Whether the model is pre-quantized or not
|
|
171
176
|
"""
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
in_features
|
|
188
|
-
out_features
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
177
|
+
has_been_replaced = False
|
|
178
|
+
# we need this to correctly materialize the weights during quantization
|
|
179
|
+
for module_name, module in model.named_modules():
|
|
180
|
+
if not should_convert_module(module_name, modules_to_not_convert):
|
|
181
|
+
continue
|
|
182
|
+
new_module = None
|
|
183
|
+
with torch.device("meta"):
|
|
184
|
+
if isinstance(module, (nn.Linear, Conv1D)):
|
|
185
|
+
if isinstance(module, Conv1D):
|
|
186
|
+
in_features, out_features = module.weight.shape
|
|
187
|
+
else:
|
|
188
|
+
in_features = module.in_features
|
|
189
|
+
out_features = module.out_features
|
|
190
|
+
if quantization_config.quantization_method() == "llm_int8":
|
|
191
|
+
new_module = bnb.nn.Linear8bitLt(
|
|
192
|
+
in_features,
|
|
193
|
+
out_features,
|
|
194
|
+
module.bias is not None,
|
|
195
|
+
has_fp16_weights=quantization_config.llm_int8_has_fp16_weight,
|
|
196
|
+
threshold=quantization_config.llm_int8_threshold,
|
|
197
|
+
)
|
|
198
|
+
if pre_quantized:
|
|
199
|
+
# this is kind of an edge case when supporting both loading and quantization ...
|
|
200
|
+
# we need to set the right dtype as we cast the checkpoint with the dtype of the meta model
|
|
201
|
+
new_module.weight.data = new_module.weight.data.to(dtype=torch.int8)
|
|
202
|
+
else:
|
|
203
|
+
new_module = bnb.nn.Linear4bit(
|
|
204
|
+
in_features,
|
|
205
|
+
out_features,
|
|
206
|
+
module.bias is not None,
|
|
207
|
+
quantization_config.bnb_4bit_compute_dtype,
|
|
208
|
+
compress_statistics=quantization_config.bnb_4bit_use_double_quant,
|
|
209
|
+
quant_type=quantization_config.bnb_4bit_quant_type,
|
|
210
|
+
quant_storage=quantization_config.bnb_4bit_quant_storage,
|
|
211
|
+
)
|
|
212
|
+
if pre_quantized:
|
|
213
|
+
# same here
|
|
214
|
+
new_module.weight.data = new_module.weight.data.to(
|
|
215
|
+
dtype=quantization_config.bnb_4bit_quant_storage
|
|
197
216
|
)
|
|
198
|
-
|
|
199
|
-
new_module.weight.data = new_module.weight.data.to(dtype=torch.int8)
|
|
200
|
-
model._modules[name] = new_module
|
|
201
|
-
has_been_replaced = True
|
|
202
|
-
else:
|
|
203
|
-
if (
|
|
204
|
-
quantization_config.llm_int8_skip_modules is not None
|
|
205
|
-
and name in quantization_config.llm_int8_skip_modules
|
|
206
|
-
):
|
|
207
|
-
pass
|
|
208
|
-
else:
|
|
209
|
-
extra_kwargs = (
|
|
210
|
-
{"quant_storage": quantization_config.bnb_4bit_quant_storage}
|
|
211
|
-
if "quant_storage" in list(signature(bnb.nn.Linear4bit).parameters)
|
|
212
|
-
else {}
|
|
213
|
-
)
|
|
214
|
-
new_module = bnb.nn.Linear4bit(
|
|
215
|
-
in_features,
|
|
216
|
-
out_features,
|
|
217
|
-
module.bias is not None,
|
|
218
|
-
quantization_config.bnb_4bit_compute_dtype,
|
|
219
|
-
compress_statistics=quantization_config.bnb_4bit_use_double_quant,
|
|
220
|
-
quant_type=quantization_config.bnb_4bit_quant_type,
|
|
221
|
-
**extra_kwargs,
|
|
222
|
-
)
|
|
223
|
-
if pre_quantized:
|
|
224
|
-
# this is kind of an edge case when supporting both loading and quantization ...
|
|
225
|
-
# we need to set the right dtype as we cast the checkpoint with the dtype of the meta model
|
|
226
|
-
new_module.weight.data = new_module.weight.data.to(dtype=torch.uint8)
|
|
227
|
-
model._modules[name] = new_module
|
|
228
|
-
has_been_replaced = True
|
|
217
|
+
if new_module is not None:
|
|
229
218
|
# Store the module class in case we need to transpose the weight later
|
|
230
|
-
|
|
219
|
+
new_module.source_cls = type(module)
|
|
231
220
|
# Force requires grad to False to avoid unexpected errors
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
module,
|
|
236
|
-
modules_to_not_convert,
|
|
237
|
-
current_key_name,
|
|
238
|
-
quantization_config,
|
|
239
|
-
has_been_replaced=has_been_replaced,
|
|
240
|
-
pre_quantized=pre_quantized,
|
|
241
|
-
)
|
|
242
|
-
# Remove the last key for recursion
|
|
243
|
-
current_key_name.pop(-1)
|
|
244
|
-
return model, has_been_replaced
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
def replace_with_bnb_linear(
|
|
248
|
-
model, modules_to_not_convert=None, current_key_name=None, quantization_config=None, pre_quantized=False
|
|
249
|
-
):
|
|
250
|
-
"""
|
|
251
|
-
A helper function to replace all `torch.nn.Linear` modules by `bnb.nn.Linear8bit` modules from the `bitsandbytes`
|
|
252
|
-
library. This will enable running your models using mixed int8 precision as described by the paper `LLM.int8():
|
|
253
|
-
8-bit Matrix Multiplication for Transformers at Scale`. Make sure `bitsandbytes` compiled with the correct CUDA
|
|
254
|
-
version of your hardware is installed before running this function. `pip install -i https://test.pypi.org/simple/
|
|
255
|
-
bitsandbytes`
|
|
256
|
-
|
|
257
|
-
The function will be run recursively and replace all `torch.nn.Linear` modules except for the `lm_head` that should
|
|
258
|
-
be kept as a `torch.nn.Linear` module. The replacement is done under `init_empty_weights` context manager so no
|
|
259
|
-
CPU/GPU memory is required to run this function. Int8 mixed-precision matrix decomposition works by separating a
|
|
260
|
-
matrix multiplication into two streams: (1) and systematic feature outlier stream matrix multiplied in fp16
|
|
261
|
-
(0.01%), (2) a regular stream of int8 matrix multiplication (99.9%). With this method, int8 inference with no
|
|
262
|
-
predictive degradation is possible for very large models (>=176B parameters).
|
|
263
|
-
|
|
264
|
-
Parameters:
|
|
265
|
-
model (`torch.nn.Module`):
|
|
266
|
-
Input model or `torch.nn.Module` as the function is run recursively.
|
|
267
|
-
modules_to_not_convert (`list[`str`]`, *optional*, defaults to `["lm_head"]`):
|
|
268
|
-
Names of the modules to not convert in `Linear8bitLt`. In practice we keep the `lm_head` in full precision
|
|
269
|
-
for numerical stability reasons.
|
|
270
|
-
current_key_name (`list[`str`]`, *optional*):
|
|
271
|
-
An array to track the current key of the recursion. This is used to check whether the current key (part of
|
|
272
|
-
it) is not in the list of modules to not convert (for instances modules that are offloaded to `cpu` or
|
|
273
|
-
`disk`).
|
|
274
|
-
quantization_config ('transformers.utils.quantization_config.BitsAndBytesConfig'):
|
|
275
|
-
To configure and manage settings related to quantization, a technique used to compress neural network models
|
|
276
|
-
by reducing the precision of the weights and activations, thus making models more efficient in terms of both
|
|
277
|
-
storage and computation.
|
|
278
|
-
"""
|
|
279
|
-
modules_to_not_convert = ["lm_head"] if modules_to_not_convert is None else modules_to_not_convert
|
|
280
|
-
model, has_been_replaced = _replace_with_bnb_linear(
|
|
281
|
-
model, modules_to_not_convert, current_key_name, quantization_config, pre_quantized=pre_quantized
|
|
282
|
-
)
|
|
221
|
+
new_module.requires_grad_(False)
|
|
222
|
+
model.set_submodule(module_name, new_module)
|
|
223
|
+
has_been_replaced = True
|
|
283
224
|
|
|
284
225
|
if not has_been_replaced:
|
|
285
226
|
logger.warning(
|
|
286
|
-
"You are loading your model
|
|
227
|
+
"You are loading your model using eetq but no linear modules were found in your model."
|
|
287
228
|
" Please double check your model architecture, or submit an issue on github if you think this is"
|
|
288
229
|
" a bug."
|
|
289
230
|
)
|
|
@@ -291,7 +232,7 @@ def replace_with_bnb_linear(
|
|
|
291
232
|
|
|
292
233
|
|
|
293
234
|
# Copied from PEFT: https://github.com/huggingface/peft/blob/47b3712898539569c02ec5b3ed4a6c36811331a1/src/peft/utils/integrations.py#L41
|
|
294
|
-
def dequantize_bnb_weight(weight: "torch.nn.Parameter",
|
|
235
|
+
def dequantize_bnb_weight(weight: "torch.nn.Parameter", state=None):
|
|
295
236
|
"""
|
|
296
237
|
Helper function to dequantize 4bit or 8bit bnb weights.
|
|
297
238
|
|
|
@@ -306,10 +247,7 @@ def dequantize_bnb_weight(weight: "torch.nn.Parameter", dtype: "torch.dtype", st
|
|
|
306
247
|
|
|
307
248
|
if cls_name == "Params4bit":
|
|
308
249
|
output_tensor = bnb.functional.dequantize_4bit(weight.data, weight.quant_state)
|
|
309
|
-
|
|
310
|
-
f"The model is going to be dequantized in {output_tensor.dtype} - if you want to upcast it to another dtype, make sure to pass the desired dtype when quantizing the model through `bnb_4bit_quant_type` argument of `BitsAndBytesConfig`"
|
|
311
|
-
)
|
|
312
|
-
return output_tensor.to(dtype)
|
|
250
|
+
return output_tensor
|
|
313
251
|
|
|
314
252
|
if state.SCB is None:
|
|
315
253
|
state.SCB = weight.SCB
|
|
@@ -321,7 +259,7 @@ def dequantize_bnb_weight(weight: "torch.nn.Parameter", dtype: "torch.dtype", st
|
|
|
321
259
|
# Multiply by (scale/127) to dequantize.
|
|
322
260
|
dequantized = weight.data * state.SCB.view(-1, 1) * 7.874015718698502e-3
|
|
323
261
|
|
|
324
|
-
return dequantized
|
|
262
|
+
return dequantized
|
|
325
263
|
|
|
326
264
|
|
|
327
265
|
def _create_accelerate_new_hook(old_hook):
|
|
@@ -341,95 +279,48 @@ def _create_accelerate_new_hook(old_hook):
|
|
|
341
279
|
return new_hook
|
|
342
280
|
|
|
343
281
|
|
|
344
|
-
def
|
|
345
|
-
model,
|
|
346
|
-
dtype,
|
|
347
|
-
modules_to_not_convert=None,
|
|
348
|
-
current_key_name=None,
|
|
349
|
-
quantization_config=None,
|
|
350
|
-
has_been_replaced=False,
|
|
351
|
-
):
|
|
282
|
+
def dequantize_and_replace(model, quantization_config=None, dtype=None):
|
|
352
283
|
"""
|
|
353
284
|
Converts a quantized model into its dequantized original version. The newly converted model will have
|
|
354
285
|
some performance drop compared to the original model before quantization - use it only for specific usecases
|
|
355
286
|
such as QLoRA adapters merging.
|
|
356
287
|
|
|
357
|
-
Returns the converted model
|
|
288
|
+
Returns the converted model.
|
|
358
289
|
"""
|
|
359
290
|
quant_method = quantization_config.quantization_method()
|
|
360
291
|
|
|
361
292
|
target_cls = bnb.nn.Linear8bitLt if quant_method == "llm_int8" else bnb.nn.Linear4bit
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
current_key_name = []
|
|
366
|
-
current_key_name.append(name)
|
|
367
|
-
|
|
368
|
-
if isinstance(module, target_cls) and name not in modules_to_not_convert:
|
|
369
|
-
# Check if the current key is not in the `modules_to_not_convert`
|
|
370
|
-
current_key_name_str = ".".join(current_key_name)
|
|
371
|
-
|
|
372
|
-
if not any(
|
|
373
|
-
(key + "." in current_key_name_str) or (key == current_key_name_str) for key in modules_to_not_convert
|
|
374
|
-
):
|
|
293
|
+
for module_name, module in model.named_modules():
|
|
294
|
+
if isinstance(module, target_cls):
|
|
295
|
+
with torch.device("meta"):
|
|
375
296
|
bias = getattr(module, "bias", None)
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
new_module.to(device)
|
|
400
|
-
model._modules[name] = new_module
|
|
401
|
-
has_been_replaced = True
|
|
402
|
-
if len(list(module.children())) > 0:
|
|
403
|
-
_, has_been_replaced = _dequantize_and_replace(
|
|
404
|
-
module,
|
|
405
|
-
dtype,
|
|
406
|
-
modules_to_not_convert,
|
|
407
|
-
current_key_name,
|
|
408
|
-
quantization_config,
|
|
409
|
-
has_been_replaced=has_been_replaced,
|
|
410
|
-
)
|
|
411
|
-
# Remove the last key for recursion
|
|
412
|
-
current_key_name.pop(-1)
|
|
413
|
-
return model, has_been_replaced
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
def dequantize_and_replace(
|
|
417
|
-
model,
|
|
418
|
-
modules_to_not_convert=None,
|
|
419
|
-
quantization_config=None,
|
|
420
|
-
):
|
|
421
|
-
model, has_been_replaced = _dequantize_and_replace(
|
|
422
|
-
model,
|
|
423
|
-
model.dtype,
|
|
424
|
-
modules_to_not_convert=modules_to_not_convert,
|
|
425
|
-
quantization_config=quantization_config,
|
|
426
|
-
)
|
|
297
|
+
new_module = torch.nn.Linear(module.in_features, module.out_features, bias=bias is not None)
|
|
298
|
+
state = module.state if quant_method == "llm_int8" else None
|
|
299
|
+
new_module.weight = torch.nn.Parameter(dequantize_bnb_weight(module.weight, state))
|
|
300
|
+
weight = dequantize_bnb_weight(module.weight, state)
|
|
301
|
+
if dtype is None:
|
|
302
|
+
logger.warning_once(
|
|
303
|
+
f"The modules are dequantized in {weight.dtype}. If you want to change the dtype, please specify `dtype` in `dequantize`. "
|
|
304
|
+
)
|
|
305
|
+
else:
|
|
306
|
+
logger.warning_once(f"The modules are dequantized in {weight.dtype} and casted to {dtype}.")
|
|
307
|
+
weight = weight.to(dtype)
|
|
308
|
+
new_module.weight = torch.nn.Parameter(weight)
|
|
309
|
+
if bias is not None:
|
|
310
|
+
new_module.bias = bias
|
|
311
|
+
if hasattr(module, "_hf_hook"):
|
|
312
|
+
old_hook = module._hf_hook
|
|
313
|
+
new_hook = _create_accelerate_new_hook(old_hook)
|
|
314
|
+
remove_hook_from_module(module)
|
|
315
|
+
add_hook_to_module(new_module, new_hook)
|
|
316
|
+
new_module.to(module.weight.device)
|
|
317
|
+
model.set_submodule(module_name, new_module)
|
|
318
|
+
has_been_replaced = True
|
|
427
319
|
|
|
428
320
|
if not has_been_replaced:
|
|
429
321
|
logger.warning(
|
|
430
322
|
"For some reason the model has not been properly dequantized. You might see unexpected behavior."
|
|
431
323
|
)
|
|
432
|
-
|
|
433
324
|
return model
|
|
434
325
|
|
|
435
326
|
|
|
@@ -437,8 +328,6 @@ def validate_bnb_backend_availability(raise_exception=False):
|
|
|
437
328
|
"""
|
|
438
329
|
Validates if the available devices are supported by bitsandbytes, optionally raising an exception if not.
|
|
439
330
|
"""
|
|
440
|
-
import bitsandbytes as bnb
|
|
441
|
-
|
|
442
331
|
bnb_supported_devices = getattr(bnb, "supported_torch_devices", set())
|
|
443
332
|
available_devices = set(get_available_devices())
|
|
444
333
|
|
|
@@ -304,6 +304,15 @@ def _load_state_dict_into_zero3_model(model_to_load, state_dict):
|
|
|
304
304
|
state_dict._metadata = metadata
|
|
305
305
|
|
|
306
306
|
error_msgs = []
|
|
307
|
+
meta_model_state_dict = model_to_load.state_dict()
|
|
308
|
+
missing_keys = set(meta_model_state_dict.keys())
|
|
309
|
+
|
|
310
|
+
prefix_model = getattr(model_to_load, "base_model_prefix", None)
|
|
311
|
+
# take care of the case where in the checkpoint we don't have the prefix
|
|
312
|
+
state_dict = {
|
|
313
|
+
(f"{prefix_model}.{k}" if meta_model_state_dict.get(f"{prefix_model}.{k}") is not None else k): v
|
|
314
|
+
for k, v in state_dict.items()
|
|
315
|
+
}
|
|
307
316
|
|
|
308
317
|
# PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants
|
|
309
318
|
# so we need to apply the function recursively.
|
|
@@ -320,7 +329,14 @@ def _load_state_dict_into_zero3_model(model_to_load, state_dict):
|
|
|
320
329
|
# In sharded models, each shard has only part of the full state_dict, so only gather
|
|
321
330
|
# parameters that are in the current state_dict.
|
|
322
331
|
named_parameters = dict(module.named_parameters(prefix=prefix[:-1], recurse=False))
|
|
323
|
-
params_to_gather = [
|
|
332
|
+
params_to_gather = []
|
|
333
|
+
for k in named_parameters:
|
|
334
|
+
if k in state_dict:
|
|
335
|
+
param = named_parameters[k]
|
|
336
|
+
# crutial to not init the weight again
|
|
337
|
+
param._is_hf_initialized = True
|
|
338
|
+
params_to_gather.append(param)
|
|
339
|
+
missing_keys.discard(k)
|
|
324
340
|
|
|
325
341
|
if len(params_to_gather) > 0:
|
|
326
342
|
# because zero3 puts placeholders in model params, this context
|
|
@@ -336,7 +352,7 @@ def _load_state_dict_into_zero3_model(model_to_load, state_dict):
|
|
|
336
352
|
|
|
337
353
|
load(model_to_load, state_dict, assign_to_params_buffers=False)
|
|
338
354
|
|
|
339
|
-
return error_msgs
|
|
355
|
+
return error_msgs, missing_keys
|
|
340
356
|
|
|
341
357
|
|
|
342
358
|
def deepspeed_optim_sched(trainer, hf_deepspeed_config, args, num_training_steps, model_parameters):
|