transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -28,16 +28,13 @@ from typing import Any, Optional, Union
|
|
|
28
28
|
from packaging import version
|
|
29
29
|
|
|
30
30
|
from ..utils import (
|
|
31
|
-
is_auto_awq_available,
|
|
32
31
|
is_compressed_tensors_available,
|
|
33
|
-
is_gptqmodel_available,
|
|
34
32
|
is_hqq_available,
|
|
35
33
|
is_quark_available,
|
|
36
34
|
is_torch_available,
|
|
37
35
|
is_torchao_available,
|
|
38
36
|
logging,
|
|
39
37
|
)
|
|
40
|
-
from .import_utils import is_auto_gptq_available
|
|
41
38
|
|
|
42
39
|
|
|
43
40
|
if is_torch_available():
|
|
@@ -68,30 +65,27 @@ class QuantizationMethod(str, Enum):
|
|
|
68
65
|
MXFP4 = "mxfp4"
|
|
69
66
|
|
|
70
67
|
|
|
71
|
-
class
|
|
68
|
+
class AwqFormat(str, Enum):
|
|
72
69
|
GEMM = "gemm"
|
|
73
70
|
GEMV = "gemv"
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
class AwqBackendPackingMethod(str, Enum):
|
|
93
|
-
AUTOAWQ = "autoawq"
|
|
94
|
-
LLMAWQ = "llm-awq"
|
|
71
|
+
GEMV_FAST = "gemv_fast"
|
|
72
|
+
LLM_AWQ = "llm-awq"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class AwqBackend(str, Enum):
|
|
76
|
+
LEGACY_AWQ = "autoawq"
|
|
77
|
+
AUTO = "auto"
|
|
78
|
+
AUTO_TRAINABLE = "auto_trainable"
|
|
79
|
+
MACHETE = "machete"
|
|
80
|
+
MARLIN = "marlin"
|
|
81
|
+
EXLLAMA_V2 = "exllama_v2"
|
|
82
|
+
EXLLAMA_V1 = "exllama_v1"
|
|
83
|
+
GEMM = "gemm"
|
|
84
|
+
GEMM_TRITON = "gemm_triton"
|
|
85
|
+
GEMV = "gemv"
|
|
86
|
+
GEMV_FAST = "gemv_fast"
|
|
87
|
+
TORCH_AWQ = "torch_awq"
|
|
88
|
+
TORCH_FUSED_AWQ = "torch_fused_awq"
|
|
95
89
|
|
|
96
90
|
|
|
97
91
|
@dataclass
|
|
@@ -620,7 +614,7 @@ class ExllamaVersion(int, Enum):
|
|
|
620
614
|
class GPTQConfig(QuantizationConfigMixin):
|
|
621
615
|
"""
|
|
622
616
|
This is a wrapper class about all possible attributes and features that you can play with a model that has been
|
|
623
|
-
loaded using `optimum` api for
|
|
617
|
+
loaded using `optimum` api for GPTQ quantization relying on the gptqmodel backend.
|
|
624
618
|
|
|
625
619
|
Args:
|
|
626
620
|
bits (`int`):
|
|
@@ -641,22 +635,23 @@ class GPTQConfig(QuantizationConfigMixin):
|
|
|
641
635
|
desc_act (`bool`, *optional*, defaults to `False`):
|
|
642
636
|
Whether to quantize columns in order of decreasing activation size. Setting it to False can significantly
|
|
643
637
|
speed up inference but the perplexity may become slightly worse. Also known as act-order.
|
|
638
|
+
act_group_aware (`bool`, *optional*, defaults to `True`):
|
|
639
|
+
Use GAR (group aware activation order) during quantization. Has measurable positive impact on quantization
|
|
640
|
+
quality. Only applicable when `desc_act = False`. Will forced to be `False` when `desc_act = True`.
|
|
644
641
|
sym (`bool`, *optional*, defaults to `True`):
|
|
645
642
|
Whether to use symmetric quantization.
|
|
646
643
|
true_sequential (`bool`, *optional*, defaults to `True`):
|
|
647
644
|
Whether to perform sequential quantization even within a single Transformer block. Instead of quantizing
|
|
648
645
|
the entire block at once, we perform layer-wise quantization. As a result, each layer undergoes
|
|
649
646
|
quantization using inputs that have passed through the previously quantized layers.
|
|
650
|
-
|
|
651
|
-
GPTQ weight format. `gptq`(v1) is supported by
|
|
647
|
+
format (`str`, *optional*, defaults to `"gptq"`):
|
|
648
|
+
GPTQ weight format. `gptq` (v1) is supported by gptqmodel. `gptq_v2` is gptqmodel only.
|
|
652
649
|
meta (`dict[str, any]`, *optional*):
|
|
653
650
|
Properties, such as tooling:version, that do not directly contributes to quantization or quant inference are stored in meta.
|
|
654
651
|
i.e. `meta.quantizer`: ["optimum:_version_", "gptqmodel:_version_"]
|
|
655
652
|
backend (`str`, *optional*):
|
|
656
|
-
Controls which
|
|
657
|
-
|
|
658
|
-
use_cuda_fp16 (`bool`, *optional*, defaults to `False`):
|
|
659
|
-
Whether or not to use optimized cuda kernel for fp16 model. Need to have model in fp16. Auto-gptq only.
|
|
653
|
+
Controls which kernel to use. Valid values for gptqmodel are `auto`, `auto_trainable` and more. Ref gptqmodel backends:
|
|
654
|
+
https://github.com/ModelCloud/GPTQModel/blob/main/gptqmodel/utils/backend.py
|
|
660
655
|
model_seqlen (`int`, *optional*):
|
|
661
656
|
The maximum sequence length that the model can take.
|
|
662
657
|
block_name_to_quantize (`str`, *optional*):
|
|
@@ -667,14 +662,9 @@ class GPTQConfig(QuantizationConfigMixin):
|
|
|
667
662
|
The batch size used when processing the dataset
|
|
668
663
|
pad_token_id (`int`, *optional*):
|
|
669
664
|
The pad token id. Needed to prepare the dataset when `batch_size` > 1.
|
|
670
|
-
use_exllama (`bool`, *optional*):
|
|
671
|
-
Whether to use exllama backend. Defaults to `True` if unset. Only works with `bits` = 4.
|
|
672
665
|
max_input_length (`int`, *optional*):
|
|
673
666
|
The maximum input length. This is needed to initialize a buffer that depends on the maximum expected input
|
|
674
667
|
length. It is specific to the exllama backend with act-order.
|
|
675
|
-
exllama_config (`dict[str, Any]`, *optional*):
|
|
676
|
-
The exllama config. You can specify the version of the exllama kernel through the `version` key. Defaults
|
|
677
|
-
to `{"version": 1}` if unset.
|
|
678
668
|
cache_block_outputs (`bool`, *optional*, defaults to `True`):
|
|
679
669
|
Whether to cache block outputs to reuse as inputs for the succeeding block.
|
|
680
670
|
modules_in_block_to_quantize (`list[list[str]]`, *optional*):
|
|
@@ -694,20 +684,18 @@ class GPTQConfig(QuantizationConfigMixin):
|
|
|
694
684
|
group_size: int = 128,
|
|
695
685
|
damp_percent: float = 0.1,
|
|
696
686
|
desc_act: bool = False,
|
|
687
|
+
act_group_aware: bool = True,
|
|
697
688
|
sym: bool = True,
|
|
698
689
|
true_sequential: bool = True,
|
|
699
|
-
|
|
700
|
-
meta: dict[str, Any]
|
|
701
|
-
backend: str
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
module_name_preceding_first_block: list[str] | None = None,
|
|
690
|
+
format: str = "gptq",
|
|
691
|
+
meta: Optional[dict[str, Any]] = None,
|
|
692
|
+
backend: Optional[str] = None,
|
|
693
|
+
model_seqlen: Optional[int] = None,
|
|
694
|
+
block_name_to_quantize: Optional[str] = None,
|
|
695
|
+
module_name_preceding_first_block: Optional[list[str]] = None,
|
|
706
696
|
batch_size: int = 1,
|
|
707
|
-
pad_token_id: int
|
|
708
|
-
|
|
709
|
-
max_input_length: int | None = None,
|
|
710
|
-
exllama_config: dict[str, Any] | None = None,
|
|
697
|
+
pad_token_id: Optional[int] = None,
|
|
698
|
+
max_input_length: Optional[int] = None,
|
|
711
699
|
cache_block_outputs: bool = True,
|
|
712
700
|
modules_in_block_to_quantize: list[list[str]] | None = None,
|
|
713
701
|
**kwargs,
|
|
@@ -719,33 +707,28 @@ class GPTQConfig(QuantizationConfigMixin):
|
|
|
719
707
|
self.group_size = group_size
|
|
720
708
|
self.damp_percent = damp_percent
|
|
721
709
|
self.desc_act = desc_act
|
|
710
|
+
self.act_group_aware = act_group_aware
|
|
722
711
|
self.sym = sym
|
|
723
712
|
self.true_sequential = true_sequential
|
|
724
|
-
self.
|
|
713
|
+
self.format = format.lower()
|
|
714
|
+
# Compatible with legacy field: checkpoint_format
|
|
715
|
+
if kwargs.get("checkpoint_format") is not None:
|
|
716
|
+
self.format = kwargs.pop("checkpoint_format").lower()
|
|
725
717
|
self.meta = meta
|
|
726
718
|
self.backend = backend.lower() if isinstance(backend, str) else backend
|
|
727
|
-
self.use_cuda_fp16 = use_cuda_fp16
|
|
728
719
|
self.model_seqlen = model_seqlen
|
|
729
720
|
self.block_name_to_quantize = block_name_to_quantize
|
|
730
721
|
self.module_name_preceding_first_block = module_name_preceding_first_block
|
|
731
722
|
self.batch_size = batch_size
|
|
732
723
|
self.pad_token_id = pad_token_id
|
|
733
|
-
self.use_exllama = use_exllama
|
|
734
724
|
self.max_input_length = max_input_length
|
|
735
|
-
self.exllama_config = exllama_config
|
|
736
725
|
self.cache_block_outputs = cache_block_outputs
|
|
737
726
|
self.modules_in_block_to_quantize = modules_in_block_to_quantize
|
|
738
727
|
self.post_init()
|
|
739
728
|
|
|
740
729
|
def get_loading_attributes(self):
|
|
741
730
|
attributes_dict = copy.deepcopy(self.__dict__)
|
|
742
|
-
loading_attributes = [
|
|
743
|
-
"use_exllama",
|
|
744
|
-
"exllama_config",
|
|
745
|
-
"use_cuda_fp16",
|
|
746
|
-
"max_input_length",
|
|
747
|
-
"backend",
|
|
748
|
-
]
|
|
731
|
+
loading_attributes = ["max_input_length", "backend"]
|
|
749
732
|
loading_attributes_dict = {i: j for i, j in attributes_dict.items() if i in loading_attributes}
|
|
750
733
|
return loading_attributes_dict
|
|
751
734
|
|
|
@@ -772,46 +755,14 @@ class GPTQConfig(QuantizationConfigMixin):
|
|
|
772
755
|
['wikitext2','c4','c4-new'], but we found {self.dataset}"""
|
|
773
756
|
)
|
|
774
757
|
|
|
775
|
-
#
|
|
776
|
-
if
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
self.backend = "auto_trainable" if self.use_exllama is not None and not self.use_exllama else "auto"
|
|
780
|
-
else:
|
|
781
|
-
# convert gptqmodel backend `auto_trainable` into auto-gptq control
|
|
782
|
-
if self.backend == "auto_trainable":
|
|
783
|
-
self.use_exllama = False
|
|
758
|
+
# act_group_order is only applicable when `desc_act = False`
|
|
759
|
+
if self.desc_act and self.act_group_aware:
|
|
760
|
+
self.act_group_aware = False
|
|
761
|
+
logger.warning("`act_group_aware` has been auto-disabled as it is not compatible with `desc_act = True`.")
|
|
784
762
|
|
|
785
|
-
#
|
|
786
|
-
if self.
|
|
787
|
-
|
|
788
|
-
self.use_exllama = True
|
|
789
|
-
|
|
790
|
-
if self.exllama_config is None:
|
|
791
|
-
self.exllama_config = {"version": ExllamaVersion.ONE}
|
|
792
|
-
else:
|
|
793
|
-
if "version" not in self.exllama_config:
|
|
794
|
-
raise ValueError("`exllama_config` needs to have a `version` key.")
|
|
795
|
-
elif self.exllama_config["version"] not in [ExllamaVersion.ONE, ExllamaVersion.TWO]:
|
|
796
|
-
exllama_version = self.exllama_config["version"]
|
|
797
|
-
raise ValueError(
|
|
798
|
-
f"Only supported versions are in [ExllamaVersion.ONE, ExllamaVersion.TWO] - not recognized version {exllama_version}"
|
|
799
|
-
)
|
|
800
|
-
|
|
801
|
-
if self.bits == 4 and self.use_exllama:
|
|
802
|
-
if self.exllama_config["version"] == ExllamaVersion.ONE:
|
|
803
|
-
logger.info(
|
|
804
|
-
"You have activated exllama backend. Note that you can get better inference "
|
|
805
|
-
"speed using exllamav2 kernel by setting `exllama_config`."
|
|
806
|
-
)
|
|
807
|
-
elif self.exllama_config["version"] == ExllamaVersion.TWO:
|
|
808
|
-
if is_auto_gptq_available():
|
|
809
|
-
optimum_version = version.parse(importlib.metadata.version("optimum"))
|
|
810
|
-
autogptq_version = version.parse(importlib.metadata.version("auto_gptq"))
|
|
811
|
-
if optimum_version <= version.parse("1.13.2") or autogptq_version <= version.parse("0.4.2"):
|
|
812
|
-
raise ValueError(
|
|
813
|
-
f"You need optimum > 1.13.2 and auto-gptq > 0.4.2 . Make sure to have that version installed - detected version : optimum {optimum_version} and autogptq {autogptq_version}"
|
|
814
|
-
)
|
|
763
|
+
# make sure backend default stays consistent with gptqmodel expectations
|
|
764
|
+
if self.backend is None:
|
|
765
|
+
self.backend = "auto"
|
|
815
766
|
if self.modules_in_block_to_quantize is not None:
|
|
816
767
|
optimum_version = version.parse(importlib.metadata.version("optimum"))
|
|
817
768
|
if optimum_version < version.parse("1.15.0"):
|
|
@@ -821,17 +772,15 @@ class GPTQConfig(QuantizationConfigMixin):
|
|
|
821
772
|
|
|
822
773
|
def to_dict(self) -> dict[str, Any]:
|
|
823
774
|
config_dict = super().to_dict()
|
|
824
|
-
|
|
775
|
+
# Compatible with legacy field: checkpoint_format
|
|
776
|
+
config_dict["checkpoint_format"] = self.format
|
|
825
777
|
return config_dict
|
|
826
778
|
|
|
827
779
|
def to_dict_optimum(self):
|
|
828
780
|
"""
|
|
829
781
|
Get compatible dict for optimum gptq config
|
|
830
782
|
"""
|
|
831
|
-
|
|
832
|
-
# make it compatible with optimum config
|
|
833
|
-
quant_dict["disable_exllama"] = not self.use_exllama
|
|
834
|
-
return quant_dict
|
|
783
|
+
return self.to_dict()
|
|
835
784
|
|
|
836
785
|
@classmethod
|
|
837
786
|
def from_dict_optimum(cls, config_dict):
|
|
@@ -839,17 +788,12 @@ class GPTQConfig(QuantizationConfigMixin):
|
|
|
839
788
|
Get compatible class with optimum gptq config dict
|
|
840
789
|
"""
|
|
841
790
|
|
|
842
|
-
if "disable_exllama" in config_dict:
|
|
843
|
-
config_dict["use_exllama"] = not config_dict["disable_exllama"]
|
|
844
|
-
# switch to None to not trigger the warning
|
|
845
|
-
config_dict.pop("disable_exllama")
|
|
846
|
-
|
|
847
791
|
config = cls(**config_dict)
|
|
848
792
|
return config
|
|
849
793
|
|
|
850
794
|
|
|
851
795
|
@dataclass
|
|
852
|
-
class AwqConfig(
|
|
796
|
+
class AwqConfig(GPTQConfig):
|
|
853
797
|
"""
|
|
854
798
|
This is a wrapper class about all possible attributes and features that you can play with a model that has been
|
|
855
799
|
loaded using `auto-awq` library awq quantization relying on auto_awq backend.
|
|
@@ -861,26 +805,12 @@ class AwqConfig(QuantizationConfigMixin):
|
|
|
861
805
|
The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization.
|
|
862
806
|
zero_point (`bool`, *optional*, defaults to `True`):
|
|
863
807
|
Whether to use zero point quantization.
|
|
864
|
-
|
|
865
|
-
The
|
|
866
|
-
GEMV is better (e.g. < 8 ). GEMM models are compatible with Exllama kernels.
|
|
867
|
-
backend (`AwqBackendPackingMethod`, *optional*, defaults to `AwqBackendPackingMethod.AUTOAWQ`):
|
|
868
|
-
The quantization backend. Some models might be quantized using `llm-awq` backend. This is useful for users
|
|
869
|
-
that quantize their own models using `llm-awq` library.
|
|
870
|
-
do_fuse (`bool`, *optional*, defaults to `False`):
|
|
871
|
-
Whether to fuse attention and mlp layers together for faster inference
|
|
872
|
-
fuse_max_seq_len (`int`, *optional*):
|
|
873
|
-
The Maximum sequence length to generate when using fusing.
|
|
874
|
-
modules_to_fuse (`dict`, *optional*, default to `None`):
|
|
875
|
-
Overwrite the natively supported fusing scheme with the one specified by the users.
|
|
808
|
+
backend (`AwqBackend`, *optional*, defaults to `AwqBackend.AUTO`):
|
|
809
|
+
The quantization backend.
|
|
876
810
|
modules_to_not_convert (`list`, *optional*, default to `None`):
|
|
877
811
|
The list of modules to not quantize, useful for quantizing models that explicitly require to have
|
|
878
812
|
some modules left in their original precision (e.g. Whisper encoder, Llava encoder, Mixtral gate layers).
|
|
879
813
|
Note you cannot quantize directly with transformers, please refer to `AutoAWQ` documentation for quantizing HF models.
|
|
880
|
-
exllama_config (`dict[str, Any]`, *optional*):
|
|
881
|
-
You can specify the version of the exllama kernel through the `version` key, the maximum sequence
|
|
882
|
-
length through the `max_input_len` key, and the maximum batch size through the `max_batch_size` key.
|
|
883
|
-
Defaults to `{"version": 2, "max_input_len": 2048, "max_batch_size": 8}` if unset.
|
|
884
814
|
"""
|
|
885
815
|
|
|
886
816
|
def __init__(
|
|
@@ -888,141 +818,44 @@ class AwqConfig(QuantizationConfigMixin):
|
|
|
888
818
|
bits: int = 4,
|
|
889
819
|
group_size: int = 128,
|
|
890
820
|
zero_point: bool = True,
|
|
891
|
-
|
|
892
|
-
backend: AwqBackendPackingMethod = AwqBackendPackingMethod.AUTOAWQ,
|
|
893
|
-
do_fuse: bool | None = None,
|
|
894
|
-
fuse_max_seq_len: int | None = None,
|
|
895
|
-
modules_to_fuse: dict | None = None,
|
|
821
|
+
backend: AwqBackend = AwqBackend.AUTO,
|
|
896
822
|
modules_to_not_convert: list | None = None,
|
|
897
|
-
exllama_config: dict[str, int] | None = None,
|
|
898
823
|
**kwargs,
|
|
899
824
|
):
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
825
|
+
format = kwargs.pop("format", AwqFormat.GEMM)
|
|
826
|
+
# Compatible with legacy field: version
|
|
827
|
+
if kwargs.get("version") is not None:
|
|
828
|
+
format = kwargs.pop("version").lower()
|
|
829
|
+
# Compatible with legacy backend
|
|
830
|
+
if backend == AwqBackend.LEGACY_AWQ:
|
|
831
|
+
backend = AwqBackend.AUTO
|
|
904
832
|
self.zero_point = zero_point
|
|
905
|
-
self.version = version
|
|
906
|
-
self.backend = backend
|
|
907
|
-
self.fuse_max_seq_len = fuse_max_seq_len
|
|
908
833
|
self.modules_to_not_convert = modules_to_not_convert
|
|
909
|
-
self.exllama_config = exllama_config
|
|
910
834
|
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
self.do_fuse = modules_to_fuse is not None and len(modules_to_fuse) > 0
|
|
914
|
-
else:
|
|
915
|
-
self.do_fuse = do_fuse
|
|
916
|
-
self.fuse_max_seq_len = fuse_max_seq_len
|
|
917
|
-
|
|
918
|
-
self.post_init()
|
|
835
|
+
super().__init__(bits=bits, group_size=group_size, backend=backend, format=format, **kwargs)
|
|
836
|
+
self.quant_method = QuantizationMethod.AWQ
|
|
919
837
|
|
|
920
838
|
def post_init(self):
|
|
921
839
|
r"""
|
|
922
840
|
Safety checker that arguments are correct
|
|
923
841
|
"""
|
|
924
|
-
if self.backend not in [AwqBackendPackingMethod.AUTOAWQ, AwqBackendPackingMethod.LLMAWQ]:
|
|
925
|
-
raise ValueError(
|
|
926
|
-
f"Only supported quantization backends in {AwqBackendPackingMethod.AUTOAWQ} and {AwqBackendPackingMethod.LLMAWQ} - not recognized backend {self.backend}"
|
|
927
|
-
)
|
|
928
842
|
|
|
929
|
-
self.
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
AWQLinearVersion.GEMV,
|
|
933
|
-
AWQLinearVersion.EXLLAMA,
|
|
934
|
-
AWQLinearVersion.IPEX,
|
|
935
|
-
]:
|
|
936
|
-
raise ValueError(
|
|
937
|
-
f"Only supported versions are in [AWQLinearVersion.GEMM, AWQLinearVersion.GEMV, AWQLinearVersion.EXLLAMA, AWQLinearVersion.IPEX] - not recognized version {self.version}"
|
|
938
|
-
)
|
|
843
|
+
if self.backend == "llm-awq":
|
|
844
|
+
self.format = AwqFormat.LLM_AWQ
|
|
845
|
+
self.backend = AwqBackend.AUTO
|
|
939
846
|
|
|
940
|
-
if self.
|
|
941
|
-
|
|
942
|
-
if not (torch.cuda.is_available() or torch.xpu.is_available()):
|
|
943
|
-
raise ValueError("LLM-AWQ backend is only supported on CUDA and XPU")
|
|
944
|
-
if torch.cuda.is_available():
|
|
945
|
-
compute_capability = torch.cuda.get_device_capability()
|
|
946
|
-
major, minor = compute_capability
|
|
947
|
-
if major < 8:
|
|
948
|
-
raise ValueError("LLM-AWQ backend is only supported on CUDA GPUs with compute capability >= 8.0")
|
|
949
|
-
|
|
950
|
-
if self.do_fuse and self.fuse_max_seq_len is None:
|
|
951
|
-
raise ValueError(
|
|
952
|
-
"You cannot enable fused modules without specifying a `fuse_max_seq_len`, make sure to pass a valid `fuse_max_seq_len` for your usecase"
|
|
953
|
-
)
|
|
954
|
-
|
|
955
|
-
if self.do_fuse:
|
|
956
|
-
awq_version_supports_fusing = False
|
|
957
|
-
MIN_AWQ_VERSION = "0.1.7"
|
|
958
|
-
if is_auto_awq_available():
|
|
959
|
-
awq_version_supports_fusing = version.parse(importlib.metadata.version("autoawq")) >= version.parse(
|
|
960
|
-
MIN_AWQ_VERSION
|
|
961
|
-
)
|
|
962
|
-
|
|
963
|
-
if not awq_version_supports_fusing:
|
|
964
|
-
raise ValueError(
|
|
965
|
-
f"You current version of `autoawq` does not support module fusing, please upgrade `autoawq` package to at least {MIN_AWQ_VERSION}."
|
|
966
|
-
)
|
|
967
|
-
|
|
968
|
-
if self.modules_to_not_convert is not None:
|
|
969
|
-
awq_version_supports_non_conversion = False
|
|
970
|
-
MIN_AWQ_VERSION = "0.1.8"
|
|
971
|
-
if is_auto_awq_available():
|
|
972
|
-
awq_version_supports_non_conversion = version.parse(
|
|
973
|
-
importlib.metadata.version("autoawq")
|
|
974
|
-
) >= version.parse(MIN_AWQ_VERSION)
|
|
847
|
+
if self.format not in AwqFormat.__members__.values():
|
|
848
|
+
raise ValueError(f"Invalid format '{self.format}'. Must be one of: {[b.value for b in AwqFormat]}")
|
|
975
849
|
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
f"You current version of `autoawq` does not support module quantization skipping, please upgrade `autoawq` package to at least {MIN_AWQ_VERSION}."
|
|
979
|
-
)
|
|
980
|
-
|
|
981
|
-
if self.do_fuse and self.modules_to_fuse is not None:
|
|
982
|
-
required_keys = [
|
|
983
|
-
"hidden_size",
|
|
984
|
-
"num_attention_heads",
|
|
985
|
-
"num_key_value_heads",
|
|
986
|
-
"mlp",
|
|
987
|
-
"attention",
|
|
988
|
-
"layernorm",
|
|
989
|
-
"use_alibi",
|
|
990
|
-
]
|
|
991
|
-
if not all(key in self.modules_to_fuse for key in required_keys):
|
|
992
|
-
raise ValueError(
|
|
993
|
-
f"Required fields are missing in the fusing mapping, required fields are {required_keys}"
|
|
994
|
-
)
|
|
995
|
-
|
|
996
|
-
if self.version == AWQLinearVersion.EXLLAMA:
|
|
997
|
-
awq_version_supports_exllama = False
|
|
998
|
-
MIN_AWQ_VERSION = "0.2.0"
|
|
999
|
-
if is_auto_awq_available():
|
|
1000
|
-
awq_version_supports_exllama = version.parse(importlib.metadata.version("autoawq")) >= version.parse(
|
|
1001
|
-
MIN_AWQ_VERSION
|
|
1002
|
-
)
|
|
850
|
+
if self.backend not in AwqBackend.__members__.values():
|
|
851
|
+
raise ValueError(f"Invalid backend '{self.backend}'. Must be one of: {[b.value for b in AwqBackend]}")
|
|
1003
852
|
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
if self.exllama_config is None:
|
|
1011
|
-
self.exllama_config = {"version": ExllamaVersion.TWO, "max_input_len": 2048, "max_batch_size": 8}
|
|
1012
|
-
else:
|
|
1013
|
-
if "version" not in self.exllama_config:
|
|
1014
|
-
raise ValueError("`exllama_config` needs to have a `version` key.")
|
|
1015
|
-
elif self.exllama_config["version"] not in [ExllamaVersion.ONE, ExllamaVersion.TWO]:
|
|
1016
|
-
exllama_version = self.exllama_config["version"]
|
|
1017
|
-
raise ValueError(
|
|
1018
|
-
f"Only supported versions are in [ExllamaVersion.ONE, ExllamaVersion.TWO] - not recognized version {exllama_version}"
|
|
1019
|
-
)
|
|
1020
|
-
|
|
1021
|
-
def get_loading_attributes(self):
|
|
1022
|
-
attributes_dict = copy.deepcopy(self.__dict__)
|
|
1023
|
-
loading_attributes = ["version", "do_fuse", "modules_to_fuse", "fuse_max_seq_len", "exllama_config"]
|
|
1024
|
-
loading_attributes_dict = {i: j for i, j in attributes_dict.items() if i in loading_attributes}
|
|
1025
|
-
return loading_attributes_dict
|
|
853
|
+
def to_dict(self) -> dict[str, Any]:
|
|
854
|
+
config_dict = super().to_dict()
|
|
855
|
+
config_dict.pop("checkpoint_format")
|
|
856
|
+
# Compatible with legacy field: version
|
|
857
|
+
config_dict["version"] = self.format
|
|
858
|
+
return config_dict
|
|
1026
859
|
|
|
1027
860
|
|
|
1028
861
|
@dataclass
|
|
@@ -22,7 +22,7 @@ from functools import partial
|
|
|
22
22
|
from typing import Any, Optional, Union
|
|
23
23
|
|
|
24
24
|
import numpy as np
|
|
25
|
-
from huggingface_hub import create_repo
|
|
25
|
+
from huggingface_hub import create_repo, is_offline_mode
|
|
26
26
|
from huggingface_hub.dataclasses import validate_typed_dict
|
|
27
27
|
|
|
28
28
|
from .dynamic_module_utils import custom_object_save
|
|
@@ -44,7 +44,6 @@ from .utils import (
|
|
|
44
44
|
TensorType,
|
|
45
45
|
add_start_docstrings,
|
|
46
46
|
copy_func,
|
|
47
|
-
is_offline_mode,
|
|
48
47
|
is_torch_available,
|
|
49
48
|
is_torchcodec_available,
|
|
50
49
|
is_torchvision_v2_available,
|
|
@@ -176,7 +175,7 @@ class BaseVideoProcessor(BaseImageProcessorFast):
|
|
|
176
175
|
def __init__(self, **kwargs: Unpack[VideosKwargs]) -> None:
|
|
177
176
|
super().__init__()
|
|
178
177
|
|
|
179
|
-
|
|
178
|
+
kwargs.pop("processor_class", None)
|
|
180
179
|
|
|
181
180
|
# Additional attributes without default values
|
|
182
181
|
for key, value in kwargs.items():
|
|
@@ -443,7 +442,6 @@ class BaseVideoProcessor(BaseImageProcessorFast):
|
|
|
443
442
|
processed_videos_grouped[shape] = stacked_videos
|
|
444
443
|
|
|
445
444
|
processed_videos = reorder_videos(processed_videos_grouped, grouped_videos_index)
|
|
446
|
-
processed_videos = torch.stack(processed_videos, dim=0) if return_tensors else processed_videos
|
|
447
445
|
|
|
448
446
|
return BatchFeature(data={"pixel_values_videos": processed_videos}, tensor_type=return_tensors)
|
|
449
447
|
|
|
@@ -717,6 +715,7 @@ class BaseVideoProcessor(BaseImageProcessorFast):
|
|
|
717
715
|
logger.info(
|
|
718
716
|
f"loading configuration file {video_processor_file} from cache at {resolved_video_processor_file}"
|
|
719
717
|
)
|
|
718
|
+
|
|
720
719
|
return video_processor_dict, kwargs
|
|
721
720
|
|
|
722
721
|
@classmethod
|
|
@@ -772,11 +771,21 @@ class BaseVideoProcessor(BaseImageProcessorFast):
|
|
|
772
771
|
`dict[str, Any]`: Dictionary of all the attributes that make up this video processor instance.
|
|
773
772
|
"""
|
|
774
773
|
output = deepcopy(self.__dict__)
|
|
775
|
-
|
|
776
|
-
output.
|
|
777
|
-
|
|
774
|
+
filtered_dict = {}
|
|
775
|
+
for key, value in output.items():
|
|
776
|
+
if value is None:
|
|
777
|
+
class_default = getattr(type(self), key, "NOT_FOUND")
|
|
778
|
+
# Keep None if user explicitly set it (class default is non-None)
|
|
779
|
+
if class_default != "NOT_FOUND" and class_default is not None:
|
|
780
|
+
filtered_dict[key] = value
|
|
781
|
+
else:
|
|
782
|
+
filtered_dict[key] = value
|
|
778
783
|
|
|
779
|
-
|
|
784
|
+
filtered_dict.pop("model_valid_processing_keys", None)
|
|
785
|
+
filtered_dict.pop("_valid_kwargs_names", None)
|
|
786
|
+
filtered_dict["video_processor_type"] = self.__class__.__name__
|
|
787
|
+
|
|
788
|
+
return filtered_dict
|
|
780
789
|
|
|
781
790
|
def to_json_string(self) -> str:
|
|
782
791
|
"""
|
|
@@ -791,12 +800,6 @@ class BaseVideoProcessor(BaseImageProcessorFast):
|
|
|
791
800
|
if isinstance(value, np.ndarray):
|
|
792
801
|
dictionary[key] = value.tolist()
|
|
793
802
|
|
|
794
|
-
# make sure private name "_processor_class" is correctly
|
|
795
|
-
# saved as "processor_class"
|
|
796
|
-
_processor_class = dictionary.pop("_processor_class", None)
|
|
797
|
-
if _processor_class is not None:
|
|
798
|
-
dictionary["processor_class"] = _processor_class
|
|
799
|
-
|
|
800
803
|
return json.dumps(dictionary, indent=2, sort_keys=True) + "\n"
|
|
801
804
|
|
|
802
805
|
def to_json_file(self, json_file_path: Union[str, os.PathLike]):
|