transformers 5.0.0rc1__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +20 -1
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +68 -5
- transformers/core_model_loading.py +201 -35
- transformers/dependency_versions_table.py +1 -1
- transformers/feature_extraction_utils.py +54 -22
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +162 -122
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +101 -64
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +2 -12
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +12 -0
- transformers/integrations/accelerate.py +44 -111
- transformers/integrations/aqlm.py +3 -5
- transformers/integrations/awq.py +2 -5
- transformers/integrations/bitnet.py +5 -8
- transformers/integrations/bitsandbytes.py +16 -15
- transformers/integrations/deepspeed.py +18 -3
- transformers/integrations/eetq.py +3 -5
- transformers/integrations/fbgemm_fp8.py +1 -1
- transformers/integrations/finegrained_fp8.py +6 -16
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/higgs.py +2 -5
- transformers/integrations/hub_kernels.py +23 -5
- transformers/integrations/integration_utils.py +35 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +4 -10
- transformers/integrations/peft.py +5 -0
- transformers/integrations/quanto.py +5 -2
- transformers/integrations/spqr.py +3 -5
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/vptq.py +3 -5
- transformers/modeling_gguf_pytorch_utils.py +66 -19
- transformers/modeling_rope_utils.py +78 -81
- transformers/modeling_utils.py +583 -503
- transformers/models/__init__.py +19 -0
- transformers/models/afmoe/modeling_afmoe.py +7 -16
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/align/modeling_align.py +12 -6
- transformers/models/altclip/modeling_altclip.py +7 -3
- transformers/models/apertus/modeling_apertus.py +4 -2
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +1 -1
- transformers/models/aria/modeling_aria.py +8 -4
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +27 -0
- transformers/models/auto/feature_extraction_auto.py +7 -3
- transformers/models/auto/image_processing_auto.py +4 -2
- transformers/models/auto/modeling_auto.py +31 -0
- transformers/models/auto/processing_auto.py +4 -0
- transformers/models/auto/tokenization_auto.py +132 -153
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +18 -19
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +9 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +7 -0
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +3 -0
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +7 -0
- transformers/models/bit/modeling_bit.py +5 -1
- transformers/models/bitnet/modeling_bitnet.py +1 -1
- transformers/models/blenderbot/modeling_blenderbot.py +7 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +6 -7
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +7 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +8 -0
- transformers/models/blip_2/modeling_blip_2.py +2 -0
- transformers/models/bloom/modeling_bloom.py +13 -44
- transformers/models/blt/modeling_blt.py +162 -2
- transformers/models/blt/modular_blt.py +168 -3
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +6 -0
- transformers/models/bros/modeling_bros.py +8 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/canine/modeling_canine.py +6 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +9 -4
- transformers/models/chinese_clip/modeling_chinese_clip.py +6 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +25 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clipseg/modeling_clipseg.py +4 -0
- transformers/models/clvp/modeling_clvp.py +14 -3
- transformers/models/code_llama/tokenization_code_llama.py +1 -1
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/cohere/modeling_cohere.py +1 -1
- transformers/models/cohere2/modeling_cohere2.py +1 -1
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +0 -1
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +4 -1
- transformers/models/convbert/modeling_convbert.py +3 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +3 -1
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +14 -2
- transformers/models/cvt/modeling_cvt.py +5 -1
- transformers/models/cwm/modeling_cwm.py +1 -1
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +46 -39
- transformers/models/d_fine/modular_d_fine.py +15 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +1 -1
- transformers/models/dac/modeling_dac.py +4 -4
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +1 -1
- transformers/models/deberta/modeling_deberta.py +2 -0
- transformers/models/deberta_v2/modeling_deberta_v2.py +2 -0
- transformers/models/decision_transformer/modeling_decision_transformer.py +8 -5
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +7 -4
- transformers/models/deepseek_v2/modular_deepseek_v2.py +4 -2
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +9 -5
- transformers/models/deepseek_v3/modular_deepseek_v3.py +6 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +1 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +8 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +12 -1
- transformers/models/dia/modular_dia.py +11 -0
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +3 -3
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +3 -0
- transformers/models/dinov3_vit/modular_dinov3_vit.py +3 -0
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/doge/modeling_doge.py +1 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +16 -12
- transformers/models/dots1/modeling_dots1.py +14 -5
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +5 -2
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +55 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +13 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +14 -1
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +5 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +8 -2
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt_fast.py +46 -14
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +1 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +16 -13
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +9 -35
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +6 -1
- transformers/models/evolla/modeling_evolla.py +9 -1
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +1 -1
- transformers/models/falcon/modeling_falcon.py +3 -3
- transformers/models/falcon_h1/modeling_falcon_h1.py +28 -23
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +6 -2
- transformers/models/falcon_mamba/modular_falcon_mamba.py +7 -2
- transformers/models/fast_vlm/modeling_fast_vlm.py +7 -3
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +23 -10
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +14 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +4 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +7 -4
- transformers/models/florence2/modeling_florence2.py +20 -3
- transformers/models/florence2/modular_florence2.py +13 -0
- transformers/models/fnet/modeling_fnet.py +7 -0
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +16 -0
- transformers/models/gemma/modeling_gemma.py +10 -12
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma2/modeling_gemma2.py +1 -1
- transformers/models/gemma2/modular_gemma2.py +1 -1
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +28 -7
- transformers/models/gemma3/modular_gemma3.py +26 -6
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +47 -9
- transformers/models/gemma3n/modular_gemma3n.py +51 -9
- transformers/models/git/modeling_git.py +181 -126
- transformers/models/glm/modeling_glm.py +1 -1
- transformers/models/glm4/modeling_glm4.py +1 -1
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +9 -5
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +15 -5
- transformers/models/glm4v/modular_glm4v.py +11 -3
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +39 -23
- transformers/models/glm4v_moe/modular_glm4v_moe.py +12 -0
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +8 -5
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +3 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +15 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +1 -1
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +1 -1
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +6 -9
- transformers/models/gpt_oss/modular_gpt_oss.py +5 -7
- transformers/models/gptj/modeling_gptj.py +15 -6
- transformers/models/granite/modeling_granite.py +1 -1
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +2 -3
- transformers/models/granitemoe/modular_granitemoe.py +1 -2
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +33 -23
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +2 -3
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +4 -4
- transformers/models/groupvit/modeling_groupvit.py +6 -1
- transformers/models/helium/modeling_helium.py +1 -1
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +10 -0
- transformers/models/hgnet_v2/modular_hgnet_v2.py +10 -0
- transformers/models/hubert/modeling_hubert.py +4 -0
- transformers/models/hubert/modular_hubert.py +4 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +12 -4
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +16 -0
- transformers/models/idefics/modeling_idefics.py +10 -0
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +9 -2
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +11 -8
- transformers/models/internvl/modular_internvl.py +5 -9
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +24 -19
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +15 -7
- transformers/models/janus/modular_janus.py +16 -7
- transformers/models/jetmoe/modeling_jetmoe.py +2 -2
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +14 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +9 -3
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/configuration_lasr.py +4 -0
- transformers/models/lasr/modeling_lasr.py +3 -2
- transformers/models/lasr/modular_lasr.py +8 -1
- transformers/models/lasr/processing_lasr.py +0 -2
- transformers/models/layoutlm/modeling_layoutlm.py +5 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +12 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +1 -0
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +29 -5
- transformers/models/led/modeling_led.py +6 -0
- transformers/models/levit/modeling_levit.py +18 -0
- transformers/models/lfm2/modeling_lfm2.py +1 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +14 -4
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lilt/modeling_lilt.py +19 -15
- transformers/models/llama/modeling_llama.py +1 -1
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +8 -4
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +2 -1
- transformers/models/longcat_flash/modular_longcat_flash.py +1 -0
- transformers/models/longt5/modeling_longt5.py +0 -4
- transformers/models/m2m_100/modeling_m2m_100.py +10 -0
- transformers/models/mamba/modeling_mamba.py +2 -1
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +3 -0
- transformers/models/markuplm/modeling_markuplm.py +5 -8
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +9 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +9 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +19 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +7 -0
- transformers/models/megatron_bert/modeling_megatron_bert.py +2 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mimi/modeling_mimi.py +25 -4
- transformers/models/minimax/modeling_minimax.py +16 -3
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +1 -1
- transformers/models/mistral/modeling_mistral.py +1 -1
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +12 -4
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +13 -2
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +4 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +4 -0
- transformers/models/modernbert/modeling_modernbert.py +12 -1
- transformers/models/modernbert/modular_modernbert.py +12 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +9 -1
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +9 -1
- transformers/models/moonshine/modeling_moonshine.py +1 -1
- transformers/models/moshi/modeling_moshi.py +21 -51
- transformers/models/mpnet/modeling_mpnet.py +2 -0
- transformers/models/mra/modeling_mra.py +4 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +0 -10
- transformers/models/musicgen/modeling_musicgen.py +5 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +4 -0
- transformers/models/mvp/modeling_mvp.py +7 -0
- transformers/models/nanochat/modeling_nanochat.py +1 -1
- transformers/models/nemotron/modeling_nemotron.py +3 -3
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +11 -16
- transformers/models/nystromformer/modeling_nystromformer.py +7 -0
- transformers/models/olmo/modeling_olmo.py +1 -1
- transformers/models/olmo2/modeling_olmo2.py +1 -1
- transformers/models/olmo3/modeling_olmo3.py +1 -1
- transformers/models/olmoe/modeling_olmoe.py +12 -4
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +4 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +7 -38
- transformers/models/openai/modeling_openai.py +12 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +7 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +7 -3
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +3 -2
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +28 -14
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +22 -12
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/modeling_parakeet.py +5 -0
- transformers/models/parakeet/modular_parakeet.py +5 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +4 -0
- transformers/models/patchtst/modeling_patchtst.py +5 -4
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/models/pe_audio/processing_pe_audio.py +24 -0
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +3 -0
- transformers/models/pegasus_x/modeling_pegasus_x.py +1 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +5 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +1 -1
- transformers/models/phi/modeling_phi.py +1 -1
- transformers/models/phi3/modeling_phi3.py +1 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +4 -1
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +3 -0
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +12 -4
- transformers/models/phimoe/modular_phimoe.py +1 -1
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +1 -1
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +7 -0
- transformers/models/plbart/modular_plbart.py +6 -0
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +11 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prophetnet/modeling_prophetnet.py +2 -1
- transformers/models/qwen2/modeling_qwen2.py +1 -1
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +104 -64
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +58 -18
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +18 -5
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +26 -22
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +12 -4
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +17 -4
- transformers/models/qwen3/modeling_qwen3.py +1 -1
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +12 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +4 -6
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +92 -46
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +48 -4
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +17 -4
- transformers/models/qwen3_vl/modular_qwen3_vl.py +21 -10
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +94 -112
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +32 -81
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +7 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +3 -2
- transformers/models/reformer/modeling_reformer.py +9 -1
- transformers/models/regnet/modeling_regnet.py +4 -0
- transformers/models/rembert/modeling_rembert.py +7 -1
- transformers/models/resnet/modeling_resnet.py +8 -3
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +4 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +8 -3
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +7 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +1 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +5 -1
- transformers/models/sam2/modular_sam2.py +5 -1
- transformers/models/sam2_video/modeling_sam2_video.py +51 -43
- transformers/models/sam2_video/modular_sam2_video.py +31 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +23 -0
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +2 -0
- transformers/models/sam3_tracker/modular_sam3_tracker.py +2 -0
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +26 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +3 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +27 -11
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +6 -0
- transformers/models/seed_oss/modeling_seed_oss.py +1 -1
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +2 -2
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +63 -41
- transformers/models/smollm3/modeling_smollm3.py +1 -1
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +10 -0
- transformers/models/speecht5/modeling_speecht5.py +28 -0
- transformers/models/splinter/modeling_splinter.py +9 -3
- transformers/models/squeezebert/modeling_squeezebert.py +2 -0
- transformers/models/stablelm/modeling_stablelm.py +1 -1
- transformers/models/starcoder2/modeling_starcoder2.py +1 -1
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/swiftformer/modeling_swiftformer.py +4 -0
- transformers/models/swin/modeling_swin.py +16 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +49 -33
- transformers/models/swinv2/modeling_swinv2.py +41 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +1 -7
- transformers/models/t5gemma/modeling_t5gemma.py +1 -1
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +13 -4
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +1 -1
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/timesfm/modeling_timesfm.py +12 -0
- transformers/models/timesfm/modular_timesfm.py +12 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +19 -13
- transformers/models/trocr/modeling_trocr.py +1 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +4 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +3 -7
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +0 -6
- transformers/models/vaultgemma/modeling_vaultgemma.py +1 -1
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +7 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/visual_bert/modeling_visual_bert.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +4 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +16 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +7 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +21 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +5 -3
- transformers/models/x_clip/modeling_x_clip.py +2 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +10 -0
- transformers/models/xlm/modeling_xlm.py +13 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +4 -1
- transformers/models/zamba/modeling_zamba.py +2 -1
- transformers/models/zamba2/modeling_zamba2.py +3 -2
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +7 -0
- transformers/pipelines/__init__.py +9 -6
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +1 -1
- transformers/pipelines/document_question_answering.py +1 -1
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +127 -56
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +9 -64
- transformers/quantizers/quantizer_aqlm.py +1 -18
- transformers/quantizers/quantizer_auto_round.py +1 -10
- transformers/quantizers/quantizer_awq.py +3 -8
- transformers/quantizers/quantizer_bitnet.py +1 -6
- transformers/quantizers/quantizer_bnb_4bit.py +9 -49
- transformers/quantizers/quantizer_bnb_8bit.py +9 -19
- transformers/quantizers/quantizer_compressed_tensors.py +1 -4
- transformers/quantizers/quantizer_eetq.py +2 -12
- transformers/quantizers/quantizer_fbgemm_fp8.py +5 -14
- transformers/quantizers/quantizer_finegrained_fp8.py +15 -10
- transformers/quantizers/quantizer_fp_quant.py +4 -4
- transformers/quantizers/quantizer_gptq.py +1 -4
- transformers/quantizers/quantizer_higgs.py +2 -6
- transformers/quantizers/quantizer_mxfp4.py +2 -28
- transformers/quantizers/quantizer_quanto.py +14 -14
- transformers/quantizers/quantizer_spqr.py +3 -8
- transformers/quantizers/quantizer_torchao.py +28 -124
- transformers/quantizers/quantizer_vptq.py +1 -10
- transformers/testing_utils.py +28 -12
- transformers/tokenization_mistral_common.py +3 -2
- transformers/tokenization_utils_base.py +3 -2
- transformers/tokenization_utils_tokenizers.py +25 -2
- transformers/trainer.py +24 -2
- transformers/trainer_callback.py +8 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/training_args.py +8 -10
- transformers/utils/__init__.py +4 -0
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +34 -25
- transformers/utils/generic.py +20 -0
- transformers/utils/import_utils.py +51 -9
- transformers/utils/kernel_config.py +71 -18
- transformers/utils/quantization_config.py +8 -8
- transformers/video_processing_utils.py +16 -12
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +5 -6
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +671 -632
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +0 -0
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/licenses/LICENSE +0 -0
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -198,11 +198,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
|
|
198
198
|
self.type = "seq2seq_whisper"
|
|
199
199
|
elif model.__class__.__name__ in MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES.values():
|
|
200
200
|
self.type = "seq2seq"
|
|
201
|
-
elif
|
|
202
|
-
feature_extractor._processor_class
|
|
203
|
-
and feature_extractor._processor_class.endswith("WithLM")
|
|
204
|
-
and decoder is not None
|
|
205
|
-
):
|
|
201
|
+
elif decoder is not None:
|
|
206
202
|
self.decoder = decoder
|
|
207
203
|
self.type = "ctc_with_lm"
|
|
208
204
|
else:
|
|
@@ -350,6 +346,20 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
|
|
350
346
|
|
|
351
347
|
return preprocess_params, forward_params, postprocess_params
|
|
352
348
|
|
|
349
|
+
@property
|
|
350
|
+
def _align_to(self):
|
|
351
|
+
"""Sample stride per output."""
|
|
352
|
+
# XXX: Carefully, this variable will not exist in `seq2seq` setting.
|
|
353
|
+
# Currently chunking is not possible at this level for `seq2seq` so
|
|
354
|
+
# it's ok.
|
|
355
|
+
align_to = getattr(self.model.config, "inputs_to_logits_ratio", 1)
|
|
356
|
+
if self.model.config.model_type == "lasr_ctc":
|
|
357
|
+
# TODO: find a standard for that but not easy because input length -> mel length depends on the feature extractor
|
|
358
|
+
# specific way of doing it
|
|
359
|
+
# means the model take mel features as input, we align according to the hop length
|
|
360
|
+
align_to *= self.feature_extractor.hop_length
|
|
361
|
+
return align_to
|
|
362
|
+
|
|
353
363
|
def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None):
|
|
354
364
|
if isinstance(inputs, str):
|
|
355
365
|
if inputs.startswith("http://") or inputs.startswith("https://"):
|
|
@@ -444,10 +454,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
|
|
444
454
|
if isinstance(stride_length_s, (int, float)):
|
|
445
455
|
stride_length_s = [stride_length_s, stride_length_s]
|
|
446
456
|
|
|
447
|
-
|
|
448
|
-
# Currently chunking is not possible at this level for `seq2seq` so
|
|
449
|
-
# it's ok.
|
|
450
|
-
align_to = getattr(self.model.config, "inputs_to_logits_ratio", 1)
|
|
457
|
+
align_to = self._align_to
|
|
451
458
|
chunk_len = int(round(chunk_length_s * self.feature_extractor.sampling_rate / align_to) * align_to)
|
|
452
459
|
stride_left = int(round(stride_length_s[0] * self.feature_extractor.sampling_rate / align_to) * align_to)
|
|
453
460
|
stride_right = int(round(stride_length_s[1] * self.feature_extractor.sampling_rate / align_to) * align_to)
|
|
@@ -567,7 +574,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
|
|
567
574
|
# Send stride to `postprocess`.
|
|
568
575
|
# it needs to be handled there where
|
|
569
576
|
# the pieces are to be concatenated.
|
|
570
|
-
ratio = 1 / self.
|
|
577
|
+
ratio = 1 / self._align_to
|
|
571
578
|
if isinstance(stride, tuple):
|
|
572
579
|
out["stride"] = rescale_stride([stride], ratio)[0]
|
|
573
580
|
else:
|
|
@@ -650,11 +657,12 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
|
|
650
657
|
|
|
651
658
|
if return_timestamps and self.type not in {"seq2seq", "seq2seq_whisper"}:
|
|
652
659
|
chunks = []
|
|
660
|
+
align_to = self._align_to
|
|
653
661
|
for item in offsets:
|
|
654
|
-
start = item["start_offset"] *
|
|
662
|
+
start = item["start_offset"] * align_to
|
|
655
663
|
start /= self.feature_extractor.sampling_rate
|
|
656
664
|
|
|
657
|
-
stop = item["end_offset"] *
|
|
665
|
+
stop = item["end_offset"] * align_to
|
|
658
666
|
stop /= self.feature_extractor.sampling_rate
|
|
659
667
|
|
|
660
668
|
chunks.append({"text": item[return_timestamps], "timestamp": (start, stop)})
|
transformers/pipelines/base.py
CHANGED
|
@@ -884,7 +884,7 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
|
|
|
884
884
|
# NOTE: _prepare_generation_config creates a deep copy of the generation config before updating it,
|
|
885
885
|
# and returns all kwargs that were not used to update the generation config
|
|
886
886
|
prepared_generation_config, kwargs = self.model._prepare_generation_config(
|
|
887
|
-
generation_config=default_pipeline_generation_config,
|
|
887
|
+
generation_config=default_pipeline_generation_config, **kwargs
|
|
888
888
|
)
|
|
889
889
|
self.generation_config = prepared_generation_config
|
|
890
890
|
# if the `max_new_tokens` is set to the pipeline default, but `max_length` is set to a non-default
|
|
@@ -201,7 +201,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
|
|
|
201
201
|
postprocess_params["top_k"] = top_k
|
|
202
202
|
if max_answer_len is not None:
|
|
203
203
|
if max_answer_len < 1:
|
|
204
|
-
raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len}")
|
|
204
|
+
raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len})")
|
|
205
205
|
postprocess_params["max_answer_len"] = max_answer_len
|
|
206
206
|
if handle_impossible_answer is not None:
|
|
207
207
|
postprocess_params["handle_impossible_answer"] = handle_impossible_answer
|
|
@@ -328,7 +328,7 @@ class QuestionAnsweringPipeline(ChunkPipeline):
|
|
|
328
328
|
postprocess_params["top_k"] = top_k
|
|
329
329
|
if max_answer_len is not None:
|
|
330
330
|
if max_answer_len < 1:
|
|
331
|
-
raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len}")
|
|
331
|
+
raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len})")
|
|
332
332
|
postprocess_params["max_answer_len"] = max_answer_len
|
|
333
333
|
if handle_impossible_answer is not None:
|
|
334
334
|
postprocess_params["handle_impossible_answer"] = handle_impossible_answer
|
|
@@ -117,8 +117,8 @@ class TextToAudioPipeline(Pipeline):
|
|
|
117
117
|
else vocoder
|
|
118
118
|
)
|
|
119
119
|
|
|
120
|
-
if self.model.config.model_type in ["musicgen"]:
|
|
121
|
-
# MusicGen expect to use
|
|
120
|
+
if self.model.config.model_type in ["musicgen", "speecht5"]:
|
|
121
|
+
# MusicGen and SpeechT5 expect to use their tokenizer instead
|
|
122
122
|
self.processor = None
|
|
123
123
|
|
|
124
124
|
self.sampling_rate = sampling_rate
|
transformers/processing_utils.py
CHANGED
|
@@ -129,6 +129,26 @@ MODALITY_TO_BASE_CLASS_MAPPING = {
|
|
|
129
129
|
"video_processor": "BaseVideoProcessor",
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
+
|
|
133
|
+
def _get_modality_for_attribute(attribute_name: str) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Get the canonical modality type for a given attribute name.
|
|
136
|
+
|
|
137
|
+
For example:
|
|
138
|
+
- "image_processor" -> "image_processor"
|
|
139
|
+
- "encoder_image_processor" -> "image_processor"
|
|
140
|
+
- "text_tokenizer" -> "tokenizer"
|
|
141
|
+
- "my_feature_extractor" -> "feature_extractor"
|
|
142
|
+
"""
|
|
143
|
+
for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys():
|
|
144
|
+
if modality in attribute_name:
|
|
145
|
+
return modality
|
|
146
|
+
raise ValueError(
|
|
147
|
+
f"Cannot determine modality for attribute '{attribute_name}'. "
|
|
148
|
+
f"Attribute name must contain one of: {list(MODALITY_TO_AUTOPROCESSOR_MAPPING.keys())}"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
132
152
|
if sys.version_info >= (3, 11):
|
|
133
153
|
Unpack = typing.Unpack
|
|
134
154
|
else:
|
|
@@ -663,8 +683,10 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
663
683
|
mismatch between expected and actual class, an error is raise. Otherwise, the proper retrieved class
|
|
664
684
|
is returned.
|
|
665
685
|
"""
|
|
666
|
-
|
|
667
|
-
|
|
686
|
+
# If the exact attribute name is not in the mapping, use its canonical modality
|
|
687
|
+
# (e.g., "encoder_tokenizer" -> "tokenizer")
|
|
688
|
+
if argument_name not in MODALITY_TO_BASE_CLASS_MAPPING:
|
|
689
|
+
argument_name = _get_modality_for_attribute(argument_name)
|
|
668
690
|
class_name = MODALITY_TO_BASE_CLASS_MAPPING.get(argument_name)
|
|
669
691
|
if isinstance(class_name, tuple):
|
|
670
692
|
proper_class = tuple(self.get_possibly_dynamic_module(n) for n in class_name if n is not None)
|
|
@@ -695,24 +717,17 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
695
717
|
# extra attributes to be kept
|
|
696
718
|
attrs_to_save += ["auto_map"]
|
|
697
719
|
|
|
720
|
+
# Remove tokenizers from output - they have their own vocab files and are saved separately.
|
|
721
|
+
# All other sub-processors (image_processor, feature_extractor, etc.) are kept in processor_config.json.
|
|
698
722
|
for attribute in self.__class__.get_attributes():
|
|
699
|
-
if
|
|
700
|
-
|
|
723
|
+
if attribute in output:
|
|
724
|
+
modality = _get_modality_for_attribute(attribute)
|
|
725
|
+
if modality == "tokenizer":
|
|
726
|
+
del output[attribute]
|
|
701
727
|
|
|
702
728
|
if "chat_template" in output:
|
|
703
729
|
del output["chat_template"]
|
|
704
730
|
|
|
705
|
-
def save_public_processor_class(dictionary):
|
|
706
|
-
# make sure private name "_processor_class" is correctly
|
|
707
|
-
# saved as "processor_class"
|
|
708
|
-
_processor_class = dictionary.pop("_processor_class", None)
|
|
709
|
-
if _processor_class is not None:
|
|
710
|
-
dictionary["processor_class"] = _processor_class
|
|
711
|
-
for value in dictionary.values():
|
|
712
|
-
if isinstance(value, dict):
|
|
713
|
-
save_public_processor_class(value)
|
|
714
|
-
return dictionary
|
|
715
|
-
|
|
716
731
|
def cast_array_to_list(dictionary):
|
|
717
732
|
"""
|
|
718
733
|
Numpy arrays are not serialiazable but can be in pre-processing dicts.
|
|
@@ -743,7 +758,6 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
743
758
|
)
|
|
744
759
|
}
|
|
745
760
|
output = cast_array_to_list(output)
|
|
746
|
-
output = save_public_processor_class(output)
|
|
747
761
|
output["processor_class"] = self.__class__.__name__
|
|
748
762
|
|
|
749
763
|
return output
|
|
@@ -816,16 +830,17 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
816
830
|
|
|
817
831
|
for attribute_name in self.get_attributes():
|
|
818
832
|
attribute = getattr(self, attribute_name)
|
|
819
|
-
if hasattr(attribute, "_set_processor_class"):
|
|
820
|
-
attribute._set_processor_class(self.__class__.__name__)
|
|
821
833
|
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
834
|
+
modality = _get_modality_for_attribute(attribute_name)
|
|
835
|
+
is_primary = attribute_name == modality
|
|
836
|
+
if modality == "tokenizer":
|
|
837
|
+
attribute._set_processor_class(self.__class__.__name__)
|
|
838
|
+
# Save the tokenizer in its own vocab file. The other attributes are saved as part of `processor_config.json`
|
|
839
|
+
if is_primary:
|
|
840
|
+
attribute.save_pretrained(save_directory)
|
|
841
|
+
else:
|
|
842
|
+
# if a model has multiple tokenizers, save the additional tokenizers in their own folders.
|
|
843
|
+
attribute.save_pretrained(os.path.join(save_directory, attribute_name))
|
|
829
844
|
elif attribute._auto_class is not None:
|
|
830
845
|
custom_object_save(attribute, save_directory, config=attribute)
|
|
831
846
|
|
|
@@ -1393,9 +1408,10 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
1393
1408
|
if token is not None:
|
|
1394
1409
|
kwargs["token"] = token
|
|
1395
1410
|
|
|
1396
|
-
|
|
1397
|
-
processor_dict,
|
|
1398
|
-
|
|
1411
|
+
# Get processor_dict first so we can use it to instantiate non-tokenizer sub-processors
|
|
1412
|
+
processor_dict, instantiation_kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
|
|
1413
|
+
args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
|
|
1414
|
+
return cls.from_args_and_dict(args, processor_dict, **instantiation_kwargs)
|
|
1399
1415
|
|
|
1400
1416
|
@classmethod
|
|
1401
1417
|
def get_attributes(cls):
|
|
@@ -1405,7 +1421,7 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
1405
1421
|
# don't treat audio_tokenizer as an attribute
|
|
1406
1422
|
if sub_processor_type == "audio_tokenizer":
|
|
1407
1423
|
continue
|
|
1408
|
-
if
|
|
1424
|
+
if any(modality in sub_processor_type for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys()):
|
|
1409
1425
|
attributes.append(sub_processor_type)
|
|
1410
1426
|
|
|
1411
1427
|
# Legacy processors may not override `__init__` and instead expose modality
|
|
@@ -1419,7 +1435,7 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
1419
1435
|
inferred_attribute = attribute_name[: -len("_class")]
|
|
1420
1436
|
if inferred_attribute == "audio_tokenizer":
|
|
1421
1437
|
continue
|
|
1422
|
-
if
|
|
1438
|
+
if any(modality in inferred_attribute for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys()):
|
|
1423
1439
|
attributes.append(inferred_attribute)
|
|
1424
1440
|
|
|
1425
1441
|
return attributes
|
|
@@ -1447,49 +1463,104 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
1447
1463
|
cls._auto_class = auto_class
|
|
1448
1464
|
|
|
1449
1465
|
@classmethod
|
|
1450
|
-
def
|
|
1466
|
+
def _load_tokenizer_from_pretrained(
|
|
1467
|
+
cls, sub_processor_type, pretrained_model_name_or_path, subfolder="", **kwargs
|
|
1468
|
+
):
|
|
1469
|
+
auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING["tokenizer"]
|
|
1470
|
+
is_primary = sub_processor_type == "tokenizer"
|
|
1471
|
+
|
|
1472
|
+
if is_primary:
|
|
1473
|
+
# Primary tokenizer: load from root
|
|
1474
|
+
tokenizer = auto_processor_class.from_pretrained(
|
|
1475
|
+
pretrained_model_name_or_path, subfolder=subfolder, **kwargs
|
|
1476
|
+
)
|
|
1477
|
+
else:
|
|
1478
|
+
# Additional tokenizer: load from subfolder (e.g., "decoder_tokenizer")
|
|
1479
|
+
tokenizer_subfolder = os.path.join(subfolder, sub_processor_type) if subfolder else sub_processor_type
|
|
1480
|
+
tokenizer = auto_processor_class.from_pretrained(
|
|
1481
|
+
pretrained_model_name_or_path, subfolder=tokenizer_subfolder, **kwargs
|
|
1482
|
+
)
|
|
1483
|
+
return tokenizer
|
|
1484
|
+
|
|
1485
|
+
@classmethod
|
|
1486
|
+
def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, processor_dict=None, **kwargs):
|
|
1451
1487
|
"""
|
|
1452
1488
|
Identify and instantiate the subcomponents of Processor classes, such as image processors, tokenizers,
|
|
1453
1489
|
and feature extractors. This method inspects the processor's `__init__` signature to identify parameters
|
|
1454
1490
|
that correspond to known modality types (image_processor, tokenizer, feature_extractor, etc.) or contain
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1491
|
+
modality names in their attribute name.
|
|
1492
|
+
|
|
1493
|
+
For tokenizers: Uses the appropriate Auto class (AutoTokenizer) to load via `.from_pretrained()`.
|
|
1494
|
+
Additional tokenizers (e.g., "decoder_tokenizer") are loaded from subfolders.
|
|
1495
|
+
|
|
1496
|
+
For other sub-processors (image_processor, feature_extractor, etc.): Primary ones are loaded via
|
|
1497
|
+
Auto class. Additional ones are instantiated from the config stored in processor_config.json
|
|
1498
|
+
(passed as processor_dict).
|
|
1499
|
+
|
|
1500
|
+
Args:
|
|
1501
|
+
pretrained_model_name_or_path: Path or model id to load from.
|
|
1502
|
+
processor_dict: Optional dict containing processor config (from processor_config.json).
|
|
1503
|
+
Required when loading additional non-tokenizer sub-processors.
|
|
1458
1504
|
"""
|
|
1459
1505
|
args = []
|
|
1506
|
+
processor_dict = processor_dict if processor_dict is not None else {}
|
|
1507
|
+
# Remove subfolder from kwargs to avoid duplicate keyword arguments
|
|
1508
|
+
subfolder = kwargs.pop("subfolder", "")
|
|
1509
|
+
|
|
1460
1510
|
# get args from processor init signature
|
|
1461
1511
|
sub_processors = cls.get_attributes()
|
|
1462
1512
|
for sub_processor_type in sub_processors:
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
tokenizer = TokenizersBackend.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
|
1467
|
-
if "token_type_ids" in tokenizer.model_input_names:
|
|
1468
|
-
tokenizer.model_input_names.remove("token_type_ids")
|
|
1469
|
-
args.append(tokenizer)
|
|
1470
|
-
elif "PixtralProcessor" in cls.__name__ and "tokenizer" in sub_processor_type:
|
|
1471
|
-
from tokenizers import pre_tokenizers
|
|
1513
|
+
modality = _get_modality_for_attribute(sub_processor_type)
|
|
1514
|
+
is_primary = sub_processor_type == modality
|
|
1472
1515
|
|
|
1473
|
-
|
|
1516
|
+
if (
|
|
1517
|
+
"tokenizer" in sub_processor_type
|
|
1518
|
+
): # This is only necessary for the checkpoing in test_procesing_mistral3.py which has no config.json and
|
|
1519
|
+
# the tokenizer_config.json references LlamaTokenizerFast. TODO: update the config on the hub.
|
|
1520
|
+
if "PixtralProcessor" in cls.__name__:
|
|
1521
|
+
from .tokenization_utils_tokenizers import TokenizersBackend
|
|
1474
1522
|
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1523
|
+
tokenizer = TokenizersBackend.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
|
1524
|
+
else:
|
|
1525
|
+
tokenizer = cls._load_tokenizer_from_pretrained(
|
|
1526
|
+
sub_processor_type, pretrained_model_name_or_path, subfolder=subfolder, **kwargs
|
|
1527
|
+
)
|
|
1479
1528
|
args.append(tokenizer)
|
|
1480
|
-
elif
|
|
1529
|
+
elif is_primary:
|
|
1530
|
+
# Primary non-tokenizer sub-processor: load via Auto class
|
|
1481
1531
|
auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING[sub_processor_type]
|
|
1482
|
-
sub_processor = auto_processor_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
|
1483
|
-
args.append(sub_processor)
|
|
1484
|
-
elif "tokenizer" in sub_processor_type:
|
|
1485
|
-
# Special case: tokenizer-like parameters not in the mapping (e.g., "protein_tokenizer")
|
|
1486
|
-
# Load using AutoTokenizer with subfolder
|
|
1487
|
-
auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING["tokenizer"]
|
|
1488
1532
|
sub_processor = auto_processor_class.from_pretrained(
|
|
1489
|
-
pretrained_model_name_or_path, subfolder=
|
|
1533
|
+
pretrained_model_name_or_path, subfolder=subfolder, **kwargs
|
|
1490
1534
|
)
|
|
1491
1535
|
args.append(sub_processor)
|
|
1492
1536
|
|
|
1537
|
+
elif sub_processor_type in processor_dict:
|
|
1538
|
+
# Additional non-tokenizer sub-processor: instantiate from config in processor_dict
|
|
1539
|
+
sub_processor_config = processor_dict[sub_processor_type]
|
|
1540
|
+
if isinstance(sub_processor_config, dict):
|
|
1541
|
+
# Determine the class to instantiate
|
|
1542
|
+
# Image processors have 'image_processor_type', feature extractors have 'feature_extractor_type'
|
|
1543
|
+
type_key = f"{modality}_type"
|
|
1544
|
+
class_name = sub_processor_config.get(type_key)
|
|
1545
|
+
if class_name is None:
|
|
1546
|
+
raise ValueError(
|
|
1547
|
+
f"Cannot instantiate {sub_processor_type}: missing '{type_key}' in config. "
|
|
1548
|
+
f"Config keys: {list(sub_processor_config.keys())}"
|
|
1549
|
+
)
|
|
1550
|
+
processor_class = cls.get_possibly_dynamic_module(class_name)
|
|
1551
|
+
sub_processor = processor_class(**sub_processor_config)
|
|
1552
|
+
args.append(sub_processor)
|
|
1553
|
+
else:
|
|
1554
|
+
raise ValueError(
|
|
1555
|
+
f"Expected dict for {sub_processor_type} in processor_config.json, "
|
|
1556
|
+
f"got {type(sub_processor_config)}"
|
|
1557
|
+
)
|
|
1558
|
+
else:
|
|
1559
|
+
raise ValueError(
|
|
1560
|
+
f"Cannot find config for {sub_processor_type} in processor_config.json. "
|
|
1561
|
+
f"Available keys: {list(processor_dict.keys())}"
|
|
1562
|
+
)
|
|
1563
|
+
|
|
1493
1564
|
return args
|
|
1494
1565
|
|
|
1495
1566
|
@staticmethod
|
transformers/quantizers/auto.py
CHANGED
|
@@ -302,7 +302,7 @@ def register_quantizer(name: str):
|
|
|
302
302
|
return register_quantizer_fn
|
|
303
303
|
|
|
304
304
|
|
|
305
|
-
def get_hf_quantizer(config, quantization_config,
|
|
305
|
+
def get_hf_quantizer(config, quantization_config, device_map, weights_only, user_agent):
|
|
306
306
|
pre_quantized = hasattr(config, "quantization_config")
|
|
307
307
|
if pre_quantized and not AutoHfQuantizer.supports_quant_method(config.quantization_config):
|
|
308
308
|
pre_quantized = False
|
|
@@ -324,11 +324,9 @@ def get_hf_quantizer(config, quantization_config, dtype, device_map, weights_onl
|
|
|
324
324
|
|
|
325
325
|
if hf_quantizer is not None:
|
|
326
326
|
hf_quantizer.validate_environment(
|
|
327
|
-
dtype=dtype,
|
|
328
327
|
device_map=device_map,
|
|
329
328
|
weights_only=weights_only,
|
|
330
329
|
)
|
|
331
|
-
dtype = hf_quantizer.update_dtype(dtype)
|
|
332
330
|
device_map = hf_quantizer.update_device_map(device_map)
|
|
333
331
|
config = hf_quantizer.update_tp_plan(config)
|
|
334
332
|
config = hf_quantizer.update_ep_plan(config)
|
|
@@ -337,4 +335,4 @@ def get_hf_quantizer(config, quantization_config, dtype, device_map, weights_onl
|
|
|
337
335
|
if not getattr(hf_quantizer.quantization_config, "dequantize", False):
|
|
338
336
|
quant_method = hf_quantizer.quantization_config.quant_method
|
|
339
337
|
user_agent["quant"] = getattr(quant_method, "value", quant_method)
|
|
340
|
-
return hf_quantizer, config,
|
|
338
|
+
return hf_quantizer, config, device_map
|
transformers/quantizers/base.py
CHANGED
|
@@ -31,16 +31,6 @@ else:
|
|
|
31
31
|
logger = logging.get_logger(__file__)
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def _assign_original_dtype(module, original_dtype):
|
|
35
|
-
# not very nice in a recursive function but it avoids a circular import
|
|
36
|
-
from ..modeling_utils import PreTrainedModel
|
|
37
|
-
|
|
38
|
-
for child in module.children():
|
|
39
|
-
if isinstance(child, PreTrainedModel):
|
|
40
|
-
child.config._pre_quantization_dtype = original_dtype
|
|
41
|
-
_assign_original_dtype(child, original_dtype)
|
|
42
|
-
|
|
43
|
-
|
|
44
34
|
def get_keys_to_not_convert(model) -> list:
|
|
45
35
|
r"""
|
|
46
36
|
Function to automatically detect keys to not convert for usage like quantization. For example for CausalLM modules
|
|
@@ -118,33 +108,7 @@ class HfQuantizer(ABC):
|
|
|
118
108
|
"""
|
|
119
109
|
return device_map
|
|
120
110
|
|
|
121
|
-
def adjust_target_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
122
|
-
"""
|
|
123
|
-
Override this method if you want to adjust the `target_dtype` variable used in `from_pretrained`
|
|
124
|
-
to compute the device_map in case the device_map is a `str`. E.g. for bitsandbytes we force-set `target_dtype`
|
|
125
|
-
to `torch.int8` and for 4-bit we pass a custom enum `accelerate.CustomDtype.int4`.
|
|
126
|
-
|
|
127
|
-
Args:
|
|
128
|
-
dtype (`torch.dtype`, *optional*):
|
|
129
|
-
The dtype that is used to compute the device_map.
|
|
130
|
-
"""
|
|
131
|
-
return dtype
|
|
132
|
-
|
|
133
111
|
def param_element_size(self, model: "PreTrainedModel", param_name: str, param: "torch.Tensor") -> float:
|
|
134
|
-
"Return the element size (in bytes) for `param_name`."
|
|
135
|
-
|
|
136
|
-
if self.param_needs_quantization(model, param_name):
|
|
137
|
-
from accelerate.utils import CustomDtype
|
|
138
|
-
|
|
139
|
-
mapping = {
|
|
140
|
-
torch.int8: 1,
|
|
141
|
-
CustomDtype.INT4: 0.5,
|
|
142
|
-
CustomDtype.FP8: 1,
|
|
143
|
-
CustomDtype.INT2: 0.25,
|
|
144
|
-
}
|
|
145
|
-
# The value passed is actually not used when the method is overridden
|
|
146
|
-
if (custom_dtype := self.adjust_target_dtype(torch.float16)) in mapping:
|
|
147
|
-
return mapping[custom_dtype]
|
|
148
112
|
return param.element_size()
|
|
149
113
|
|
|
150
114
|
def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]:
|
|
@@ -176,7 +140,7 @@ class HfQuantizer(ABC):
|
|
|
176
140
|
def _process_model_before_weight_loading(self, model, **kwargs):
|
|
177
141
|
return model
|
|
178
142
|
|
|
179
|
-
def preprocess_model(self, model: "PreTrainedModel",
|
|
143
|
+
def preprocess_model(self, model: "PreTrainedModel", dtype=None, **kwargs):
|
|
180
144
|
"""
|
|
181
145
|
Setting model attributes and/or converting model before weights loading. At this point
|
|
182
146
|
the model should be initialized on the meta device so you can freely manipulate the skeleton
|
|
@@ -194,14 +158,6 @@ class HfQuantizer(ABC):
|
|
|
194
158
|
self._convert_model_for_quantization(model)
|
|
195
159
|
self._process_model_before_weight_loading(model, **kwargs)
|
|
196
160
|
|
|
197
|
-
# We store the original dtype for quantized models as we cannot easily retrieve it
|
|
198
|
-
# once the weights have been quantized
|
|
199
|
-
# Note that once you have loaded a quantized model, you can't change its dtype so this will
|
|
200
|
-
# remain a single source of truth
|
|
201
|
-
original_dtype = dtype if dtype is not None else torch.get_default_dtype()
|
|
202
|
-
config._pre_quantization_dtype = original_dtype
|
|
203
|
-
_assign_original_dtype(model, original_dtype)
|
|
204
|
-
|
|
205
161
|
def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs):
|
|
206
162
|
return model
|
|
207
163
|
|
|
@@ -231,34 +187,25 @@ class HfQuantizer(ABC):
|
|
|
231
187
|
del model.hf_quantizer
|
|
232
188
|
if hasattr(model.config, "quantization_config"):
|
|
233
189
|
del model.config.quantization_config
|
|
234
|
-
if hasattr(model.config, "_pre_quantization_dtype"):
|
|
235
|
-
del model.config._pre_quantization_dtype
|
|
236
190
|
if hasattr(model, "quantization_method"):
|
|
237
191
|
del model.quantization_method
|
|
238
192
|
model.is_quantized = False
|
|
239
193
|
|
|
240
|
-
def dequantize(self, model):
|
|
194
|
+
def dequantize(self, model, dtype=None):
|
|
241
195
|
"""
|
|
242
196
|
Potentially dequantize the model to retrieve the original model, with some loss in accuracy / performance.
|
|
243
197
|
Note not all quantization schemes support this.
|
|
244
198
|
"""
|
|
245
|
-
|
|
199
|
+
if dtype is None:
|
|
200
|
+
# using the same dtype we used to load the model. If we don't do that, we might have issues with modules we didn't quantize.
|
|
201
|
+
# or we need to upcast everything to the same dtype
|
|
202
|
+
dtype = model.config.dtype
|
|
203
|
+
model = self._dequantize(model, dtype=dtype)
|
|
246
204
|
self.remove_quantization_config(model)
|
|
247
205
|
|
|
248
206
|
return model
|
|
249
207
|
|
|
250
|
-
def
|
|
251
|
-
"""
|
|
252
|
-
The factor to be used in `caching_allocator_warmup` to get the number of bytes to pre-allocate to warm up accelerator.
|
|
253
|
-
A factor of 2 means we allocate all bytes in the empty model (since we allocate in fp16), a factor of 4 means
|
|
254
|
-
we allocate half the memory of the weights residing in the empty model, etc...
|
|
255
|
-
"""
|
|
256
|
-
# By default we return 4, i.e. half the model size (this corresponds to the case where the model is not
|
|
257
|
-
# really pre-processed, i.e. we do not have the info that weights are going to be 8 bits before actual
|
|
258
|
-
# weight loading)
|
|
259
|
-
return 4
|
|
260
|
-
|
|
261
|
-
def _dequantize(self, model):
|
|
208
|
+
def _dequantize(self, model, dtype=None):
|
|
262
209
|
raise NotImplementedError(
|
|
263
210
|
f"{self.quantization_config.quant_method} has no implementation of `dequantize`, please raise an issue on GitHub."
|
|
264
211
|
)
|
|
@@ -313,15 +260,13 @@ class HfQuantizer(ABC):
|
|
|
313
260
|
def is_trainable(self): ...
|
|
314
261
|
|
|
315
262
|
def _convert_model_for_quantization(self, model):
|
|
316
|
-
from accelerate import init_empty_weights
|
|
317
|
-
|
|
318
263
|
for name, module in model.named_modules():
|
|
319
264
|
module_class_name = module.__class__.__name__
|
|
320
265
|
if module_class_name in MODULES_TO_PATCH_FOR_QUANTIZATION and (
|
|
321
266
|
self.quantization_config.quant_method
|
|
322
267
|
in MODULES_TO_PATCH_FOR_QUANTIZATION[module_class_name]["quantization_methods"]
|
|
323
268
|
):
|
|
324
|
-
with
|
|
269
|
+
with torch.device("meta"):
|
|
325
270
|
parent_module, name = get_module_from_name(model, name)
|
|
326
271
|
parent_module._modules[name] = MODULES_TO_PATCH_FOR_QUANTIZATION[module_class_name]["module_name"](
|
|
327
272
|
model.config.get_text_config()
|
|
@@ -23,13 +23,10 @@ if TYPE_CHECKING:
|
|
|
23
23
|
from ..modeling_utils import PreTrainedModel
|
|
24
24
|
|
|
25
25
|
from ..integrations import replace_with_aqlm_linear
|
|
26
|
-
from ..utils import is_accelerate_available, is_aqlm_available,
|
|
26
|
+
from ..utils import is_accelerate_available, is_aqlm_available, logging
|
|
27
27
|
from ..utils.quantization_config import QuantizationConfigMixin
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
if is_torch_available():
|
|
31
|
-
import torch
|
|
32
|
-
|
|
33
30
|
logger = logging.get_logger(__name__)
|
|
34
31
|
|
|
35
32
|
|
|
@@ -50,20 +47,6 @@ class AqlmHfQuantizer(HfQuantizer):
|
|
|
50
47
|
if not is_aqlm_available():
|
|
51
48
|
raise ImportError("Using `aqlm` quantization requires AQLM: `pip install aqlm[gpu,cpu]`")
|
|
52
49
|
|
|
53
|
-
def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
54
|
-
if dtype is None:
|
|
55
|
-
if torch.cuda.is_available():
|
|
56
|
-
dtype = torch.float16
|
|
57
|
-
logger.info(
|
|
58
|
-
"CUDA available. Assuming AQLM inference on GPU and loading the model in `torch.float16`. To overwrite it, set `dtype` manually."
|
|
59
|
-
)
|
|
60
|
-
else:
|
|
61
|
-
dtype = torch.float32
|
|
62
|
-
logger.info(
|
|
63
|
-
"CUDA is unavailable. Assuming AQLM inference on CPU and loading the model in `torch.float32`. To overwrite it, set `dtype` manually."
|
|
64
|
-
)
|
|
65
|
-
return dtype
|
|
66
|
-
|
|
67
50
|
def _process_model_before_weight_loading(
|
|
68
51
|
self,
|
|
69
52
|
model: "PreTrainedModel",
|
|
@@ -19,13 +19,10 @@ from .base import HfQuantizer
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
20
|
from ..modeling_utils import PreTrainedModel
|
|
21
21
|
|
|
22
|
-
from ..utils import is_auto_round_available,
|
|
22
|
+
from ..utils import is_auto_round_available, logging
|
|
23
23
|
from ..utils.quantization_config import QuantizationConfigMixin
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
if is_torch_available():
|
|
27
|
-
import torch
|
|
28
|
-
|
|
29
26
|
logger = logging.get_logger(__name__)
|
|
30
27
|
|
|
31
28
|
|
|
@@ -47,12 +44,6 @@ class AutoRoundQuantizer(HfQuantizer):
|
|
|
47
44
|
"Loading an AutoRound quantized model requires auto-round library (`pip install 'auto-round>=0.5'`)"
|
|
48
45
|
)
|
|
49
46
|
|
|
50
|
-
def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
51
|
-
if dtype is None:
|
|
52
|
-
dtype = torch.bfloat16
|
|
53
|
-
logger.info("Loading the model in `torch.bfloat16`. To overwrite it, set `dtype` manually.")
|
|
54
|
-
return dtype
|
|
55
|
-
|
|
56
47
|
def _process_model_before_weight_loading(self, model: "PreTrainedModel", **kwargs):
|
|
57
48
|
if model.__class__.main_input_name != "input_ids":
|
|
58
49
|
logger.warning("AutoRound offers only limited support for models that are not strictly text-based.")
|
|
@@ -53,10 +53,7 @@ class AwqQuantizer(HfQuantizer):
|
|
|
53
53
|
raise ImportError("Loading an AWQ quantized model requires accelerate (`pip install accelerate`)")
|
|
54
54
|
|
|
55
55
|
def update_dtype(self, dtype):
|
|
56
|
-
if dtype
|
|
57
|
-
dtype = torch.float16
|
|
58
|
-
logger.info("Loading the model in `torch.float16`. To overwrite it, set `dtype` manually.")
|
|
59
|
-
elif dtype == torch.bfloat16 and (torch.cuda.is_available() or torch.xpu.is_available()):
|
|
56
|
+
if dtype == torch.bfloat16 and (torch.cuda.is_available() or torch.xpu.is_available()):
|
|
60
57
|
logger.warning(
|
|
61
58
|
"`torch.bfloat16` is not supported for AWQ CUDA/XPU kernels yet. Casting to `torch.float16`."
|
|
62
59
|
)
|
|
@@ -65,13 +62,11 @@ class AwqQuantizer(HfQuantizer):
|
|
|
65
62
|
logger.warning("We suggest you to set `dtype=torch.float16` for better efficiency on CUDA/XPU with AWQ.")
|
|
66
63
|
return dtype
|
|
67
64
|
|
|
68
|
-
def _process_model_before_weight_loading(
|
|
69
|
-
self, model: "PreTrainedModel", keep_in_fp32_modules: list[str] | None = None, **kwargs
|
|
70
|
-
):
|
|
65
|
+
def _process_model_before_weight_loading(self, model: "PreTrainedModel", **kwargs):
|
|
71
66
|
from ..integrations import replace_quantization_scales, replace_with_awq_linear
|
|
72
67
|
|
|
73
68
|
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
74
|
-
model, self.quantization_config.modules_to_not_convert,
|
|
69
|
+
model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules, add_default_skips=True
|
|
75
70
|
)
|
|
76
71
|
|
|
77
72
|
model = replace_with_awq_linear(
|