transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
transformers/initialization.py
CHANGED
|
@@ -206,3 +206,40 @@ def guard_torch_init_functions():
|
|
|
206
206
|
for module, functions in originals.items():
|
|
207
207
|
for func_name, func in functions.items():
|
|
208
208
|
setattr(module, func_name, func)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@contextmanager
|
|
212
|
+
def no_init_weights():
|
|
213
|
+
"""
|
|
214
|
+
Disable weight initialization both at the torch-level, and at the transformers-level (`init_weights`).
|
|
215
|
+
This is used to speed-up initializing an empty model with deepspeed, as we do not initialize the model on meta device
|
|
216
|
+
with deepspeed, but we still don't need to run expensive weight initializations as we are loading params afterwards.
|
|
217
|
+
"""
|
|
218
|
+
from .modeling_utils import PreTrainedModel
|
|
219
|
+
|
|
220
|
+
def empty_func(*args, **kwargs):
|
|
221
|
+
pass
|
|
222
|
+
|
|
223
|
+
originals = defaultdict(dict)
|
|
224
|
+
try:
|
|
225
|
+
# Replace all torch funcs by empty ones
|
|
226
|
+
for module_name in TORCH_MODULES_TO_PATCH:
|
|
227
|
+
if module_name in sys.modules:
|
|
228
|
+
module = sys.modules[module_name]
|
|
229
|
+
for func_name in TORCH_INIT_FUNCTIONS.keys():
|
|
230
|
+
if hasattr(module, func_name):
|
|
231
|
+
originals[module][func_name] = getattr(module, func_name)
|
|
232
|
+
setattr(module, func_name, empty_func)
|
|
233
|
+
|
|
234
|
+
# Also patch our own `init_weights`
|
|
235
|
+
original_init_weights = PreTrainedModel.init_weights
|
|
236
|
+
PreTrainedModel.init_weights = empty_func
|
|
237
|
+
|
|
238
|
+
yield
|
|
239
|
+
finally:
|
|
240
|
+
# Set back the original torch functions on all modules
|
|
241
|
+
for module, functions in originals.items():
|
|
242
|
+
for func_name, func in functions.items():
|
|
243
|
+
setattr(module, func_name, func)
|
|
244
|
+
# Set back `init_weights`
|
|
245
|
+
PreTrainedModel.init_weights = original_init_weights
|
|
@@ -19,7 +19,6 @@ from ..utils import OptionalDependencyNotAvailable, _LazyModule, is_torch_availa
|
|
|
19
19
|
_import_structure = {
|
|
20
20
|
"aqlm": ["replace_with_aqlm_linear"],
|
|
21
21
|
"awq": [
|
|
22
|
-
"fuse_awq_modules",
|
|
23
22
|
"post_init_awq_exllama_modules",
|
|
24
23
|
"post_init_awq_ipex_modules",
|
|
25
24
|
"replace_quantization_scales",
|
|
@@ -54,6 +53,7 @@ _import_structure = {
|
|
|
54
53
|
"finegrained_fp8": ["FP8Linear", "replace_with_fp8_linear"],
|
|
55
54
|
"fsdp": ["is_fsdp_enabled", "is_fsdp_managed_module"],
|
|
56
55
|
"ggml": [
|
|
56
|
+
"GGUF_CONFIG_DEFAULTS_MAPPING",
|
|
57
57
|
"GGUF_CONFIG_MAPPING",
|
|
58
58
|
"GGUF_TOKENIZER_MAPPING",
|
|
59
59
|
"_gguf_parse_value",
|
|
@@ -69,10 +69,12 @@ _import_structure = {
|
|
|
69
69
|
"hqq": ["prepare_for_hqq_linear"],
|
|
70
70
|
"hub_kernels": [
|
|
71
71
|
"LayerRepository",
|
|
72
|
+
"lazy_load_kernel",
|
|
72
73
|
"register_kernel_mapping",
|
|
73
74
|
"replace_kernel_forward_from_hub",
|
|
74
75
|
"use_kernel_forward_from_hub",
|
|
75
76
|
"use_kernel_func_from_hub",
|
|
77
|
+
"use_kernelized_func",
|
|
76
78
|
],
|
|
77
79
|
"integration_utils": [
|
|
78
80
|
"INTEGRATION_TO_CALLBACK",
|
|
@@ -115,6 +117,11 @@ _import_structure = {
|
|
|
115
117
|
"run_hp_search_ray",
|
|
116
118
|
"run_hp_search_wandb",
|
|
117
119
|
],
|
|
120
|
+
"moe": [
|
|
121
|
+
"batched_mm_experts_forward",
|
|
122
|
+
"grouped_mm_experts_forward",
|
|
123
|
+
"use_experts_implementation",
|
|
124
|
+
],
|
|
118
125
|
"mxfp4": [
|
|
119
126
|
"Mxfp4GptOssExperts",
|
|
120
127
|
"convert_moe_packed_tensors",
|
|
@@ -165,7 +172,6 @@ else:
|
|
|
165
172
|
if TYPE_CHECKING:
|
|
166
173
|
from .aqlm import replace_with_aqlm_linear
|
|
167
174
|
from .awq import (
|
|
168
|
-
fuse_awq_modules,
|
|
169
175
|
post_init_awq_exllama_modules,
|
|
170
176
|
post_init_awq_ipex_modules,
|
|
171
177
|
replace_quantization_scales,
|
|
@@ -200,6 +206,7 @@ if TYPE_CHECKING:
|
|
|
200
206
|
from .finegrained_fp8 import FP8Linear, replace_with_fp8_linear
|
|
201
207
|
from .fsdp import is_fsdp_enabled, is_fsdp_managed_module
|
|
202
208
|
from .ggml import (
|
|
209
|
+
GGUF_CONFIG_DEFAULTS_MAPPING,
|
|
203
210
|
GGUF_CONFIG_MAPPING,
|
|
204
211
|
GGUF_TOKENIZER_MAPPING,
|
|
205
212
|
_gguf_parse_value,
|
|
@@ -210,10 +217,12 @@ if TYPE_CHECKING:
|
|
|
210
217
|
from .hqq import prepare_for_hqq_linear
|
|
211
218
|
from .hub_kernels import (
|
|
212
219
|
LayerRepository,
|
|
220
|
+
lazy_load_kernel,
|
|
213
221
|
register_kernel_mapping,
|
|
214
222
|
replace_kernel_forward_from_hub,
|
|
215
223
|
use_kernel_forward_from_hub,
|
|
216
224
|
use_kernel_func_from_hub,
|
|
225
|
+
use_kernelized_func,
|
|
217
226
|
)
|
|
218
227
|
from .integration_utils import (
|
|
219
228
|
INTEGRATION_TO_CALLBACK,
|
|
@@ -256,6 +265,11 @@ if TYPE_CHECKING:
|
|
|
256
265
|
run_hp_search_ray,
|
|
257
266
|
run_hp_search_wandb,
|
|
258
267
|
)
|
|
268
|
+
from .moe import (
|
|
269
|
+
batched_mm_experts_forward,
|
|
270
|
+
grouped_mm_experts_forward,
|
|
271
|
+
use_experts_implementation,
|
|
272
|
+
)
|
|
259
273
|
from .mxfp4 import (
|
|
260
274
|
Mxfp4GptOssExperts,
|
|
261
275
|
dequantize,
|
|
@@ -21,7 +21,6 @@ import inspect
|
|
|
21
21
|
import os
|
|
22
22
|
import re
|
|
23
23
|
from collections import OrderedDict, defaultdict
|
|
24
|
-
from contextlib import contextmanager
|
|
25
24
|
from typing import TYPE_CHECKING
|
|
26
25
|
|
|
27
26
|
from safetensors import safe_open
|
|
@@ -55,114 +54,6 @@ if TYPE_CHECKING:
|
|
|
55
54
|
logger = logging.get_logger(__name__)
|
|
56
55
|
|
|
57
56
|
|
|
58
|
-
@contextmanager
|
|
59
|
-
def init_empty_weights(include_buffers: bool = False):
|
|
60
|
-
"""
|
|
61
|
-
A context manager under which models are initialized with all parameters on the meta device, therefore creating an
|
|
62
|
-
empty model. Useful when just initializing the model would blow the available RAM.
|
|
63
|
-
|
|
64
|
-
Args:
|
|
65
|
-
include_buffers (`bool`, *optional*):
|
|
66
|
-
Whether or not to also put all buffers on the meta device while initializing.
|
|
67
|
-
|
|
68
|
-
Example:
|
|
69
|
-
|
|
70
|
-
```python
|
|
71
|
-
import torch.nn as nn
|
|
72
|
-
from accelerate import init_empty_weights
|
|
73
|
-
|
|
74
|
-
# Initialize a model with 100 billions parameters in no time and without using any RAM.
|
|
75
|
-
with init_empty_weights():
|
|
76
|
-
tst = nn.Sequential(*[nn.Linear(10000, 10000) for _ in range(1000)])
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
<Tip warning={true}>
|
|
80
|
-
|
|
81
|
-
Any model created under this context manager has no weights. As such you can't do something like
|
|
82
|
-
`model.to(some_device)` with it. To load weights inside your empty model, see [`load_checkpoint_and_dispatch`].
|
|
83
|
-
Make sure to overwrite the default device_map param for [`load_checkpoint_and_dispatch`], otherwise dispatch is not
|
|
84
|
-
called.
|
|
85
|
-
|
|
86
|
-
</Tip>
|
|
87
|
-
"""
|
|
88
|
-
with init_on_device(torch.device("meta"), include_buffers=include_buffers) as f:
|
|
89
|
-
yield f
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
@contextmanager
|
|
93
|
-
def init_on_device(device: "torch.device", include_buffers: bool = False):
|
|
94
|
-
"""
|
|
95
|
-
A context manager under which models are initialized with all parameters on the specified device.
|
|
96
|
-
|
|
97
|
-
Args:
|
|
98
|
-
device (`torch.device`):
|
|
99
|
-
Device to initialize all parameters on.
|
|
100
|
-
include_buffers (`bool`, *optional*):
|
|
101
|
-
Whether or not to also put all buffers on the meta device while initializing.
|
|
102
|
-
|
|
103
|
-
Example:
|
|
104
|
-
|
|
105
|
-
```python
|
|
106
|
-
import torch.nn as nn
|
|
107
|
-
from accelerate import init_on_device
|
|
108
|
-
|
|
109
|
-
with init_on_device(device=torch.device("cuda")):
|
|
110
|
-
tst = nn.Linear(100, 100) # on `cuda` device
|
|
111
|
-
```
|
|
112
|
-
"""
|
|
113
|
-
if include_buffers:
|
|
114
|
-
with device:
|
|
115
|
-
yield
|
|
116
|
-
return
|
|
117
|
-
|
|
118
|
-
old_register_parameter = nn.Module.register_parameter
|
|
119
|
-
if include_buffers:
|
|
120
|
-
old_register_buffer = nn.Module.register_buffer
|
|
121
|
-
|
|
122
|
-
def register_empty_parameter(module, name, param):
|
|
123
|
-
old_register_parameter(module, name, param)
|
|
124
|
-
if param is not None:
|
|
125
|
-
param_cls = type(module._parameters[name])
|
|
126
|
-
kwargs = module._parameters[name].__dict__
|
|
127
|
-
kwargs["requires_grad"] = param.requires_grad
|
|
128
|
-
module._parameters[name] = param_cls(module._parameters[name].to(device), **kwargs)
|
|
129
|
-
|
|
130
|
-
def register_empty_buffer(module, name, buffer, persistent=True):
|
|
131
|
-
old_register_buffer(module, name, buffer, persistent=persistent)
|
|
132
|
-
if buffer is not None:
|
|
133
|
-
module._buffers[name] = module._buffers[name].to(device)
|
|
134
|
-
|
|
135
|
-
# Patch tensor creation
|
|
136
|
-
if include_buffers:
|
|
137
|
-
tensor_constructors_to_patch = {
|
|
138
|
-
torch_function_name: getattr(torch, torch_function_name)
|
|
139
|
-
for torch_function_name in ["empty", "zeros", "ones", "full"]
|
|
140
|
-
}
|
|
141
|
-
else:
|
|
142
|
-
tensor_constructors_to_patch = {}
|
|
143
|
-
|
|
144
|
-
def patch_tensor_constructor(fn):
|
|
145
|
-
def wrapper(*args, **kwargs):
|
|
146
|
-
kwargs["device"] = device
|
|
147
|
-
return fn(*args, **kwargs)
|
|
148
|
-
|
|
149
|
-
return wrapper
|
|
150
|
-
|
|
151
|
-
try:
|
|
152
|
-
nn.Module.register_parameter = register_empty_parameter
|
|
153
|
-
if include_buffers:
|
|
154
|
-
nn.Module.register_buffer = register_empty_buffer
|
|
155
|
-
for torch_function_name in tensor_constructors_to_patch:
|
|
156
|
-
setattr(torch, torch_function_name, patch_tensor_constructor(getattr(torch, torch_function_name)))
|
|
157
|
-
yield
|
|
158
|
-
finally:
|
|
159
|
-
nn.Module.register_parameter = old_register_parameter
|
|
160
|
-
if include_buffers:
|
|
161
|
-
nn.Module.register_buffer = old_register_buffer
|
|
162
|
-
for torch_function_name, old_torch_function in tensor_constructors_to_patch.items():
|
|
163
|
-
setattr(torch, torch_function_name, old_torch_function)
|
|
164
|
-
|
|
165
|
-
|
|
166
57
|
def check_and_set_device_map(device_map: "torch.device | int | str | dict | None") -> dict | str | None:
|
|
167
58
|
from ..modeling_utils import get_torch_context_manager_or_global_device
|
|
168
59
|
|
|
@@ -182,6 +73,10 @@ def check_and_set_device_map(device_map: "torch.device | int | str | dict | None
|
|
|
182
73
|
device_map = {"": device_map}
|
|
183
74
|
elif isinstance(device_map, str) and device_map not in ["auto", "balanced", "balanced_low_0", "sequential"]:
|
|
184
75
|
try:
|
|
76
|
+
if device_map == "cuda":
|
|
77
|
+
# setting to the local rank
|
|
78
|
+
local_rank = int(os.environ.get("LOCAL_RANK", 0))
|
|
79
|
+
device_map = f"cuda:{local_rank}"
|
|
185
80
|
device_map = {"": torch.device(device_map)}
|
|
186
81
|
except RuntimeError:
|
|
187
82
|
raise ValueError(
|
|
@@ -392,6 +287,15 @@ def _get_device_map(
|
|
|
392
287
|
)
|
|
393
288
|
else:
|
|
394
289
|
inferred_max_memory = get_max_memory(max_memory)
|
|
290
|
+
|
|
291
|
+
# If the user does not provide `max_memory`, accelerate sets the WHOLE cpu available memory as available.
|
|
292
|
+
# This is unwanted, as we don't want to set extremely tight bound and pressure for cpu if we are memory-constrained,
|
|
293
|
+
# especially if the model uses WeightConverter (because there will be some uncontrollable cpu memory spikes during
|
|
294
|
+
# the conversions before we resave the weights). In those cases, it's better to offload to disk a bit more
|
|
295
|
+
# if we were in-between, as otherwise we blow-up cpu memory
|
|
296
|
+
if max_memory is None and "cpu" in inferred_max_memory:
|
|
297
|
+
inferred_max_memory["cpu"] *= 0.90
|
|
298
|
+
|
|
395
299
|
if hf_quantizer is not None:
|
|
396
300
|
inferred_max_memory = hf_quantizer.adjust_max_memory(inferred_max_memory)
|
|
397
301
|
|
|
@@ -449,10 +353,13 @@ def accelerate_dispatch(model, hf_quantizer, device_map, offload_folder, offload
|
|
|
449
353
|
dispatch_model(model, **device_map_kwargs)
|
|
450
354
|
|
|
451
355
|
|
|
452
|
-
def expand_device_map(device_map, param_names):
|
|
356
|
+
def expand_device_map(device_map: dict | None, param_names: list[str]):
|
|
453
357
|
"""
|
|
454
358
|
Expand a device map to return the correspondence parameter name to device.
|
|
455
359
|
"""
|
|
360
|
+
if device_map is None:
|
|
361
|
+
return dict.fromkeys(param_names, "cpu")
|
|
362
|
+
|
|
456
363
|
# Here, we first sort by number of submodules, then length of the full string, to make sure to match correctly
|
|
457
364
|
device_map_regex = re.compile(
|
|
458
365
|
"|".join(rf"({k})" for k in sorted(device_map.keys(), key=lambda x: (x.count("."), len(x)), reverse=True))
|
|
@@ -465,11 +372,20 @@ def expand_device_map(device_map, param_names):
|
|
|
465
372
|
return new_device_map
|
|
466
373
|
|
|
467
374
|
|
|
375
|
+
def get_device(device_map: dict | None, param_name: str, valid_torch_device: bool = False) -> torch.device | str | int:
|
|
376
|
+
"""Return the device on which `param_name` should be according to the `device_map`. If `valid_torch_device` is `True`,
|
|
377
|
+
then if the device is `"disk"`, `"cpu"` will be returned instead."""
|
|
378
|
+
device = expand_device_map(device_map, [param_name])[param_name]
|
|
379
|
+
if valid_torch_device and device == "disk":
|
|
380
|
+
return "cpu"
|
|
381
|
+
return device
|
|
382
|
+
|
|
383
|
+
|
|
468
384
|
def accelerate_disk_offload(
|
|
385
|
+
model: "PreTrainedModel",
|
|
469
386
|
disk_offload_folder: str | None,
|
|
470
387
|
checkpoint_files: list[str] | None,
|
|
471
388
|
device_map: dict,
|
|
472
|
-
expected_keys: list[str],
|
|
473
389
|
sharded_metadata: dict | None,
|
|
474
390
|
dtype: torch.dtype | None,
|
|
475
391
|
weight_mapping=None,
|
|
@@ -493,7 +409,8 @@ def accelerate_disk_offload(
|
|
|
493
409
|
# In this case, the offload index is simply the existing safetensors (except if using custom weight loading
|
|
494
410
|
# Operation, e.g. the MoE models, where we need to resave the weights that were changed at loading time)
|
|
495
411
|
if is_offloaded_safetensors:
|
|
496
|
-
|
|
412
|
+
meta_state_dict = model.state_dict()
|
|
413
|
+
param_device_map = expand_device_map(device_map, meta_state_dict.keys())
|
|
497
414
|
str_dtype = str(dtype).replace("torch.", "") if dtype is not None else "float32"
|
|
498
415
|
if sharded_metadata is None:
|
|
499
416
|
weight_map = dict.fromkeys(safe_open(checkpoint_files[0], framework="pt").keys(), checkpoint_files[0])
|
|
@@ -502,7 +419,9 @@ def accelerate_disk_offload(
|
|
|
502
419
|
weight_map = {k: os.path.join(folder, v) for k, v in sharded_metadata["weight_map"].items()}
|
|
503
420
|
|
|
504
421
|
# Update the weight names according to the `weight_mapping`
|
|
505
|
-
weight_renaming_map = {
|
|
422
|
+
weight_renaming_map = {
|
|
423
|
+
rename_source_key(k, renamings, [], model.base_model_prefix, meta_state_dict)[0]: k for k in weight_map
|
|
424
|
+
}
|
|
506
425
|
|
|
507
426
|
# Prepare the index using existing safetensors files
|
|
508
427
|
disk_offload_index = {
|
|
@@ -542,6 +461,32 @@ def offload_weight(weight: torch.Tensor, weight_name: str, offload_folder: str |
|
|
|
542
461
|
return offload_index
|
|
543
462
|
|
|
544
463
|
|
|
464
|
+
def load_offloaded_parameter(model: "PreTrainedModel", param_name: str) -> torch.Tensor:
|
|
465
|
+
"""Load `param_name` from disk, if it was offloaded due to the device_map, and thus lives as a meta parameter
|
|
466
|
+
inside `model`.
|
|
467
|
+
This is needed when resaving a model, when some parameters were offloaded (we need to load them from disk, to
|
|
468
|
+
then resave them to disk in the correct shard...)."""
|
|
469
|
+
# Start from the most inner module, and try to find the hook that was used for offloading the param
|
|
470
|
+
module_parts = param_name.split(".")
|
|
471
|
+
modules_to_check = [".".join(module_parts[:-idx]) for idx in range(1, len(module_parts))] + [""]
|
|
472
|
+
for parent_name in modules_to_check:
|
|
473
|
+
parent = model.get_submodule(parent_name)
|
|
474
|
+
if hasattr(parent, "_hf_hook"):
|
|
475
|
+
weights_map = parent._hf_hook.weights_map
|
|
476
|
+
truncated_param_name = param_name.replace(f"{parent_name}." if parent_name != "" else parent_name, "")
|
|
477
|
+
break
|
|
478
|
+
# If we did not break the loop, something is wrong
|
|
479
|
+
else:
|
|
480
|
+
raise ValueError(
|
|
481
|
+
f"{param_name} is on the meta device because it was offloaded, but we could not find "
|
|
482
|
+
"the corresponding hook for it"
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# This call loads it from disk
|
|
486
|
+
tensor = weights_map[truncated_param_name]
|
|
487
|
+
return tensor
|
|
488
|
+
|
|
489
|
+
|
|
545
490
|
def _init_infer_auto_device_map(
|
|
546
491
|
model: nn.Module,
|
|
547
492
|
max_memory: dict[int | str, int | str] | None = None,
|
|
@@ -13,88 +13,58 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
"AQLM (Additive Quantization of Language Model) integration file"
|
|
15
15
|
|
|
16
|
-
from ..
|
|
16
|
+
from ..quantizers.quantizers_utils import should_convert_module
|
|
17
|
+
from ..utils import is_torch_available, logging
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
if is_torch_available():
|
|
21
|
+
import torch
|
|
20
22
|
import torch.nn as nn
|
|
21
23
|
|
|
24
|
+
logger = logging.get_logger(__name__)
|
|
22
25
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
quantization_config=None,
|
|
26
|
-
linear_weights_not_to_quantize=None,
|
|
27
|
-
current_key_name=None,
|
|
28
|
-
has_been_replaced=False,
|
|
29
|
-
):
|
|
26
|
+
|
|
27
|
+
def replace_with_aqlm_linear(model, modules_to_not_convert: list[str] | None = None, quantization_config=None):
|
|
30
28
|
"""
|
|
31
29
|
Public method that recursively replaces the Linear layers of the given model with AQLM quantized layers.
|
|
32
|
-
`accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the
|
|
33
|
-
conversion has been successful or not.
|
|
34
30
|
|
|
35
31
|
Args:
|
|
36
32
|
model (`torch.nn.Module`):
|
|
37
33
|
The model to convert, can be any `torch.nn.Module` instance.
|
|
38
|
-
|
|
39
|
-
The quantization config object that contains the quantization parameters.
|
|
40
|
-
linear_weights_not_to_quantize (`list[str]`, *optional*):
|
|
34
|
+
modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
|
|
41
35
|
A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
|
|
42
36
|
converted.
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
has_been_replaced (`bool`, *optional*):
|
|
46
|
-
A boolean that indicates if the conversion has been successful or not. This is used for recursion and
|
|
47
|
-
should not be passed by the user.
|
|
37
|
+
quantization_config (`AqlmConfig`):
|
|
38
|
+
The quantization config object that contains the quantization parameters.
|
|
48
39
|
"""
|
|
49
|
-
if not is_aqlm_available():
|
|
50
|
-
raise ValueError("AQLM is not available. Please install it with `pip install aqlm[cpu,gpu]`")
|
|
51
|
-
|
|
52
|
-
if not is_accelerate_available():
|
|
53
|
-
raise ValueError(
|
|
54
|
-
f"AQLM requires Accelerate to be installed: `pip install 'accelerate>={ACCELERATE_MIN_VERSION}'`"
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
if linear_weights_not_to_quantize is None:
|
|
58
|
-
linear_weights_not_to_quantize = []
|
|
59
|
-
|
|
60
|
-
from accelerate import init_empty_weights
|
|
61
40
|
from aqlm import QuantizedLinear
|
|
62
41
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
42
|
+
has_been_replaced = False
|
|
43
|
+
# we need this to correctly materialize the weights during quantization
|
|
44
|
+
for module_name, module in model.named_modules():
|
|
45
|
+
if not should_convert_module(module_name, modules_to_not_convert):
|
|
46
|
+
continue
|
|
47
|
+
with torch.device("meta"):
|
|
48
|
+
if isinstance(module, nn.Linear):
|
|
49
|
+
new_module = QuantizedLinear(
|
|
50
|
+
module.in_features,
|
|
51
|
+
module.out_features,
|
|
52
|
+
bias=module.bias is not None,
|
|
53
|
+
in_group_size=quantization_config.in_group_size,
|
|
54
|
+
out_group_size=quantization_config.out_group_size,
|
|
55
|
+
num_codebooks=quantization_config.num_codebooks,
|
|
56
|
+
nbits_per_codebook=quantization_config.nbits_per_codebook,
|
|
57
|
+
)
|
|
58
|
+
new_module.source_cls = type(module)
|
|
59
|
+
new_module.requires_grad_(False)
|
|
60
|
+
model.set_submodule(module_name, new_module)
|
|
61
|
+
has_been_replaced = True
|
|
67
62
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
model._modules[name] = QuantizedLinear(
|
|
76
|
-
in_features,
|
|
77
|
-
out_features,
|
|
78
|
-
bias=module.bias is not None,
|
|
79
|
-
in_group_size=quantization_config.in_group_size,
|
|
80
|
-
out_group_size=quantization_config.out_group_size,
|
|
81
|
-
num_codebooks=quantization_config.num_codebooks,
|
|
82
|
-
nbits_per_codebook=quantization_config.nbits_per_codebook,
|
|
83
|
-
)
|
|
84
|
-
has_been_replaced = True
|
|
63
|
+
if not has_been_replaced:
|
|
64
|
+
logger.warning(
|
|
65
|
+
"You are loading your model using eetq but no linear modules were found in your model."
|
|
66
|
+
" Please double check your model architecture, or submit an issue on github if you think this is"
|
|
67
|
+
" a bug."
|
|
68
|
+
)
|
|
85
69
|
|
|
86
|
-
|
|
87
|
-
model._modules[name].source_cls = type(module)
|
|
88
|
-
# Force requires grad to False to avoid unexpected errors
|
|
89
|
-
model._modules[name].requires_grad_(False)
|
|
90
|
-
if len(list(module.children())) > 0:
|
|
91
|
-
_, has_been_replaced = replace_with_aqlm_linear(
|
|
92
|
-
module,
|
|
93
|
-
quantization_config=quantization_config,
|
|
94
|
-
linear_weights_not_to_quantize=linear_weights_not_to_quantize,
|
|
95
|
-
current_key_name=current_key_name,
|
|
96
|
-
has_been_replaced=has_been_replaced,
|
|
97
|
-
)
|
|
98
|
-
# Remove the last key for recursion
|
|
99
|
-
current_key_name.pop(-1)
|
|
100
|
-
return model, has_been_replaced
|
|
70
|
+
return model
|