transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -198,11 +198,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
|
|
198
198
|
self.type = "seq2seq_whisper"
|
|
199
199
|
elif model.__class__.__name__ in MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES.values():
|
|
200
200
|
self.type = "seq2seq"
|
|
201
|
-
elif
|
|
202
|
-
feature_extractor._processor_class
|
|
203
|
-
and feature_extractor._processor_class.endswith("WithLM")
|
|
204
|
-
and decoder is not None
|
|
205
|
-
):
|
|
201
|
+
elif decoder is not None:
|
|
206
202
|
self.decoder = decoder
|
|
207
203
|
self.type = "ctc_with_lm"
|
|
208
204
|
else:
|
|
@@ -350,6 +346,20 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
|
|
350
346
|
|
|
351
347
|
return preprocess_params, forward_params, postprocess_params
|
|
352
348
|
|
|
349
|
+
@property
|
|
350
|
+
def _align_to(self):
|
|
351
|
+
"""Sample stride per output."""
|
|
352
|
+
# XXX: Carefully, this variable will not exist in `seq2seq` setting.
|
|
353
|
+
# Currently chunking is not possible at this level for `seq2seq` so
|
|
354
|
+
# it's ok.
|
|
355
|
+
align_to = getattr(self.model.config, "inputs_to_logits_ratio", 1)
|
|
356
|
+
if self.model.config.model_type == "lasr_ctc":
|
|
357
|
+
# TODO: find a standard for that but not easy because input length -> mel length depends on the feature extractor
|
|
358
|
+
# specific way of doing it
|
|
359
|
+
# means the model take mel features as input, we align according to the hop length
|
|
360
|
+
align_to *= self.feature_extractor.hop_length
|
|
361
|
+
return align_to
|
|
362
|
+
|
|
353
363
|
def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None):
|
|
354
364
|
if isinstance(inputs, str):
|
|
355
365
|
if inputs.startswith("http://") or inputs.startswith("https://"):
|
|
@@ -444,10 +454,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
|
|
444
454
|
if isinstance(stride_length_s, (int, float)):
|
|
445
455
|
stride_length_s = [stride_length_s, stride_length_s]
|
|
446
456
|
|
|
447
|
-
|
|
448
|
-
# Currently chunking is not possible at this level for `seq2seq` so
|
|
449
|
-
# it's ok.
|
|
450
|
-
align_to = getattr(self.model.config, "inputs_to_logits_ratio", 1)
|
|
457
|
+
align_to = self._align_to
|
|
451
458
|
chunk_len = int(round(chunk_length_s * self.feature_extractor.sampling_rate / align_to) * align_to)
|
|
452
459
|
stride_left = int(round(stride_length_s[0] * self.feature_extractor.sampling_rate / align_to) * align_to)
|
|
453
460
|
stride_right = int(round(stride_length_s[1] * self.feature_extractor.sampling_rate / align_to) * align_to)
|
|
@@ -567,7 +574,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
|
|
567
574
|
# Send stride to `postprocess`.
|
|
568
575
|
# it needs to be handled there where
|
|
569
576
|
# the pieces are to be concatenated.
|
|
570
|
-
ratio = 1 / self.
|
|
577
|
+
ratio = 1 / self._align_to
|
|
571
578
|
if isinstance(stride, tuple):
|
|
572
579
|
out["stride"] = rescale_stride([stride], ratio)[0]
|
|
573
580
|
else:
|
|
@@ -650,11 +657,12 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
|
|
|
650
657
|
|
|
651
658
|
if return_timestamps and self.type not in {"seq2seq", "seq2seq_whisper"}:
|
|
652
659
|
chunks = []
|
|
660
|
+
align_to = self._align_to
|
|
653
661
|
for item in offsets:
|
|
654
|
-
start = item["start_offset"] *
|
|
662
|
+
start = item["start_offset"] * align_to
|
|
655
663
|
start /= self.feature_extractor.sampling_rate
|
|
656
664
|
|
|
657
|
-
stop = item["end_offset"] *
|
|
665
|
+
stop = item["end_offset"] * align_to
|
|
658
666
|
stop /= self.feature_extractor.sampling_rate
|
|
659
667
|
|
|
660
668
|
chunks.append({"text": item[return_timestamps], "timestamp": (start, stop)})
|
transformers/pipelines/base.py
CHANGED
|
@@ -884,7 +884,7 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
|
|
|
884
884
|
# NOTE: _prepare_generation_config creates a deep copy of the generation config before updating it,
|
|
885
885
|
# and returns all kwargs that were not used to update the generation config
|
|
886
886
|
prepared_generation_config, kwargs = self.model._prepare_generation_config(
|
|
887
|
-
generation_config=default_pipeline_generation_config,
|
|
887
|
+
generation_config=default_pipeline_generation_config, **kwargs
|
|
888
888
|
)
|
|
889
889
|
self.generation_config = prepared_generation_config
|
|
890
890
|
# if the `max_new_tokens` is set to the pipeline default, but `max_length` is set to a non-default
|
|
@@ -950,20 +950,13 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
|
|
|
950
950
|
pipe_information["output_modalities"] = self.model.output_modalities
|
|
951
951
|
return f"{self.__class__.__name__}: {pipe_information}"
|
|
952
952
|
|
|
953
|
-
def save_pretrained(
|
|
954
|
-
self,
|
|
955
|
-
save_directory: str | os.PathLike,
|
|
956
|
-
safe_serialization: bool = True,
|
|
957
|
-
**kwargs: Any,
|
|
958
|
-
):
|
|
953
|
+
def save_pretrained(self, save_directory: str | os.PathLike, **kwargs: Any):
|
|
959
954
|
"""
|
|
960
955
|
Save the pipeline's model and tokenizer.
|
|
961
956
|
|
|
962
957
|
Args:
|
|
963
958
|
save_directory (`str` or `os.PathLike`):
|
|
964
959
|
A path to the directory where to saved. It will be created if it doesn't exist.
|
|
965
|
-
safe_serialization (`str`):
|
|
966
|
-
Whether to save the model using `safetensors` or PyTorch serialization.
|
|
967
960
|
kwargs (`dict[str, Any]`, *optional*):
|
|
968
961
|
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
|
969
962
|
"""
|
|
@@ -992,7 +985,6 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
|
|
|
992
985
|
# Save the pipeline custom code
|
|
993
986
|
custom_object_save(self, save_directory)
|
|
994
987
|
|
|
995
|
-
kwargs["safe_serialization"] = safe_serialization
|
|
996
988
|
self.model.save_pretrained(save_directory, **kwargs)
|
|
997
989
|
|
|
998
990
|
if self.tokenizer is not None:
|
|
@@ -146,7 +146,9 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
|
|
|
146
146
|
|
|
147
147
|
def __init__(self, *args, **kwargs):
|
|
148
148
|
super().__init__(*args, **kwargs)
|
|
149
|
-
if self.tokenizer is not None and not
|
|
149
|
+
if self.tokenizer is not None and not (
|
|
150
|
+
self.tokenizer.__class__.__name__.endswith("Fast") or self.tokenizer.backend == "tokenizers"
|
|
151
|
+
):
|
|
150
152
|
raise ValueError(
|
|
151
153
|
"`DocumentQuestionAnsweringPipeline` requires a fast tokenizer, but a slow tokenizer "
|
|
152
154
|
f"(`{self.tokenizer.__class__.__name__}`) is provided."
|
|
@@ -199,7 +201,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
|
|
|
199
201
|
postprocess_params["top_k"] = top_k
|
|
200
202
|
if max_answer_len is not None:
|
|
201
203
|
if max_answer_len < 1:
|
|
202
|
-
raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len}")
|
|
204
|
+
raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len})")
|
|
203
205
|
postprocess_params["max_answer_len"] = max_answer_len
|
|
204
206
|
if handle_impossible_answer is not None:
|
|
205
207
|
postprocess_params["handle_impossible_answer"] = handle_impossible_answer
|
|
@@ -328,7 +328,7 @@ class QuestionAnsweringPipeline(ChunkPipeline):
|
|
|
328
328
|
postprocess_params["top_k"] = top_k
|
|
329
329
|
if max_answer_len is not None:
|
|
330
330
|
if max_answer_len < 1:
|
|
331
|
-
raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len}")
|
|
331
|
+
raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len})")
|
|
332
332
|
postprocess_params["max_answer_len"] = max_answer_len
|
|
333
333
|
if handle_impossible_answer is not None:
|
|
334
334
|
postprocess_params["handle_impossible_answer"] = handle_impossible_answer
|
|
@@ -486,7 +486,7 @@ class TextGenerationPipeline(Pipeline):
|
|
|
486
486
|
]
|
|
487
487
|
else:
|
|
488
488
|
# When we're not starting from a prefill, the output is a new assistant message
|
|
489
|
-
if self.tokenizer
|
|
489
|
+
if getattr(self.tokenizer, "response_schema", False):
|
|
490
490
|
assistant_message = self.tokenizer.parse_response(all_text)
|
|
491
491
|
else:
|
|
492
492
|
# If there's no schema, then we have to assume it's all content
|
|
@@ -117,8 +117,8 @@ class TextToAudioPipeline(Pipeline):
|
|
|
117
117
|
else vocoder
|
|
118
118
|
)
|
|
119
119
|
|
|
120
|
-
if self.model.config.model_type in ["musicgen"]:
|
|
121
|
-
# MusicGen expect to use
|
|
120
|
+
if self.model.config.model_type in ["musicgen", "speecht5"]:
|
|
121
|
+
# MusicGen and SpeechT5 expect to use their tokenizer instead
|
|
122
122
|
self.processor = None
|
|
123
123
|
|
|
124
124
|
self.sampling_rate = sampling_rate
|
transformers/processing_utils.py
CHANGED
|
@@ -28,7 +28,7 @@ from typing import Annotated, Any, Literal, Optional, TypedDict, TypeVar, Union
|
|
|
28
28
|
|
|
29
29
|
import numpy as np
|
|
30
30
|
import typing_extensions
|
|
31
|
-
from huggingface_hub import create_repo
|
|
31
|
+
from huggingface_hub import create_repo, is_offline_mode
|
|
32
32
|
from huggingface_hub.dataclasses import validate_typed_dict
|
|
33
33
|
from huggingface_hub.errors import EntryNotFoundError
|
|
34
34
|
|
|
@@ -54,7 +54,6 @@ from .utils import (
|
|
|
54
54
|
cached_file,
|
|
55
55
|
copy_func,
|
|
56
56
|
direct_transformers_import,
|
|
57
|
-
is_offline_mode,
|
|
58
57
|
is_torch_available,
|
|
59
58
|
list_repo_templates,
|
|
60
59
|
logging,
|
|
@@ -130,6 +129,26 @@ MODALITY_TO_BASE_CLASS_MAPPING = {
|
|
|
130
129
|
"video_processor": "BaseVideoProcessor",
|
|
131
130
|
}
|
|
132
131
|
|
|
132
|
+
|
|
133
|
+
def _get_modality_for_attribute(attribute_name: str) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Get the canonical modality type for a given attribute name.
|
|
136
|
+
|
|
137
|
+
For example:
|
|
138
|
+
- "image_processor" -> "image_processor"
|
|
139
|
+
- "encoder_image_processor" -> "image_processor"
|
|
140
|
+
- "text_tokenizer" -> "tokenizer"
|
|
141
|
+
- "my_feature_extractor" -> "feature_extractor"
|
|
142
|
+
"""
|
|
143
|
+
for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys():
|
|
144
|
+
if modality in attribute_name:
|
|
145
|
+
return modality
|
|
146
|
+
raise ValueError(
|
|
147
|
+
f"Cannot determine modality for attribute '{attribute_name}'. "
|
|
148
|
+
f"Attribute name must contain one of: {list(MODALITY_TO_AUTOPROCESSOR_MAPPING.keys())}"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
133
152
|
if sys.version_info >= (3, 11):
|
|
134
153
|
Unpack = typing.Unpack
|
|
135
154
|
else:
|
|
@@ -664,8 +683,10 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
664
683
|
mismatch between expected and actual class, an error is raise. Otherwise, the proper retrieved class
|
|
665
684
|
is returned.
|
|
666
685
|
"""
|
|
667
|
-
|
|
668
|
-
|
|
686
|
+
# If the exact attribute name is not in the mapping, use its canonical modality
|
|
687
|
+
# (e.g., "encoder_tokenizer" -> "tokenizer")
|
|
688
|
+
if argument_name not in MODALITY_TO_BASE_CLASS_MAPPING:
|
|
689
|
+
argument_name = _get_modality_for_attribute(argument_name)
|
|
669
690
|
class_name = MODALITY_TO_BASE_CLASS_MAPPING.get(argument_name)
|
|
670
691
|
if isinstance(class_name, tuple):
|
|
671
692
|
proper_class = tuple(self.get_possibly_dynamic_module(n) for n in class_name if n is not None)
|
|
@@ -696,28 +717,17 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
696
717
|
# extra attributes to be kept
|
|
697
718
|
attrs_to_save += ["auto_map"]
|
|
698
719
|
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
720
|
+
# Remove tokenizers from output - they have their own vocab files and are saved separately.
|
|
721
|
+
# All other sub-processors (image_processor, feature_extractor, etc.) are kept in processor_config.json.
|
|
722
|
+
for attribute in self.__class__.get_attributes():
|
|
723
|
+
if attribute in output:
|
|
724
|
+
modality = _get_modality_for_attribute(attribute)
|
|
725
|
+
if modality == "tokenizer":
|
|
726
|
+
del output[attribute]
|
|
727
|
+
|
|
707
728
|
if "chat_template" in output:
|
|
708
729
|
del output["chat_template"]
|
|
709
730
|
|
|
710
|
-
def save_public_processor_class(dictionary):
|
|
711
|
-
# make sure private name "_processor_class" is correctly
|
|
712
|
-
# saved as "processor_class"
|
|
713
|
-
_processor_class = dictionary.pop("_processor_class", None)
|
|
714
|
-
if _processor_class is not None:
|
|
715
|
-
dictionary["processor_class"] = _processor_class
|
|
716
|
-
for value in dictionary.values():
|
|
717
|
-
if isinstance(value, dict):
|
|
718
|
-
save_public_processor_class(value)
|
|
719
|
-
return dictionary
|
|
720
|
-
|
|
721
731
|
def cast_array_to_list(dictionary):
|
|
722
732
|
"""
|
|
723
733
|
Numpy arrays are not serialiazable but can be in pre-processing dicts.
|
|
@@ -748,7 +758,6 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
748
758
|
)
|
|
749
759
|
}
|
|
750
760
|
output = cast_array_to_list(output)
|
|
751
|
-
output = save_public_processor_class(output)
|
|
752
761
|
output["processor_class"] = self.__class__.__name__
|
|
753
762
|
|
|
754
763
|
return output
|
|
@@ -821,16 +830,17 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
821
830
|
|
|
822
831
|
for attribute_name in self.get_attributes():
|
|
823
832
|
attribute = getattr(self, attribute_name)
|
|
824
|
-
if hasattr(attribute, "_set_processor_class"):
|
|
825
|
-
attribute._set_processor_class(self.__class__.__name__)
|
|
826
833
|
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
+
modality = _get_modality_for_attribute(attribute_name)
|
|
835
|
+
is_primary = attribute_name == modality
|
|
836
|
+
if modality == "tokenizer":
|
|
837
|
+
attribute._set_processor_class(self.__class__.__name__)
|
|
838
|
+
# Save the tokenizer in its own vocab file. The other attributes are saved as part of `processor_config.json`
|
|
839
|
+
if is_primary:
|
|
840
|
+
attribute.save_pretrained(save_directory)
|
|
841
|
+
else:
|
|
842
|
+
# if a model has multiple tokenizers, save the additional tokenizers in their own folders.
|
|
843
|
+
attribute.save_pretrained(os.path.join(save_directory, attribute_name))
|
|
834
844
|
elif attribute._auto_class is not None:
|
|
835
845
|
custom_object_save(attribute, save_directory, config=attribute)
|
|
836
846
|
|
|
@@ -1398,9 +1408,10 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
1398
1408
|
if token is not None:
|
|
1399
1409
|
kwargs["token"] = token
|
|
1400
1410
|
|
|
1401
|
-
|
|
1402
|
-
processor_dict,
|
|
1403
|
-
|
|
1411
|
+
# Get processor_dict first so we can use it to instantiate non-tokenizer sub-processors
|
|
1412
|
+
processor_dict, instantiation_kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
|
|
1413
|
+
args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
|
|
1414
|
+
return cls.from_args_and_dict(args, processor_dict, **instantiation_kwargs)
|
|
1404
1415
|
|
|
1405
1416
|
@classmethod
|
|
1406
1417
|
def get_attributes(cls):
|
|
@@ -1410,7 +1421,7 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
1410
1421
|
# don't treat audio_tokenizer as an attribute
|
|
1411
1422
|
if sub_processor_type == "audio_tokenizer":
|
|
1412
1423
|
continue
|
|
1413
|
-
if
|
|
1424
|
+
if any(modality in sub_processor_type for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys()):
|
|
1414
1425
|
attributes.append(sub_processor_type)
|
|
1415
1426
|
|
|
1416
1427
|
# Legacy processors may not override `__init__` and instead expose modality
|
|
@@ -1424,7 +1435,7 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
1424
1435
|
inferred_attribute = attribute_name[: -len("_class")]
|
|
1425
1436
|
if inferred_attribute == "audio_tokenizer":
|
|
1426
1437
|
continue
|
|
1427
|
-
if
|
|
1438
|
+
if any(modality in inferred_attribute for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys()):
|
|
1428
1439
|
attributes.append(inferred_attribute)
|
|
1429
1440
|
|
|
1430
1441
|
return attributes
|
|
@@ -1452,32 +1463,104 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
1452
1463
|
cls._auto_class = auto_class
|
|
1453
1464
|
|
|
1454
1465
|
@classmethod
|
|
1455
|
-
def
|
|
1466
|
+
def _load_tokenizer_from_pretrained(
|
|
1467
|
+
cls, sub_processor_type, pretrained_model_name_or_path, subfolder="", **kwargs
|
|
1468
|
+
):
|
|
1469
|
+
auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING["tokenizer"]
|
|
1470
|
+
is_primary = sub_processor_type == "tokenizer"
|
|
1471
|
+
|
|
1472
|
+
if is_primary:
|
|
1473
|
+
# Primary tokenizer: load from root
|
|
1474
|
+
tokenizer = auto_processor_class.from_pretrained(
|
|
1475
|
+
pretrained_model_name_or_path, subfolder=subfolder, **kwargs
|
|
1476
|
+
)
|
|
1477
|
+
else:
|
|
1478
|
+
# Additional tokenizer: load from subfolder (e.g., "decoder_tokenizer")
|
|
1479
|
+
tokenizer_subfolder = os.path.join(subfolder, sub_processor_type) if subfolder else sub_processor_type
|
|
1480
|
+
tokenizer = auto_processor_class.from_pretrained(
|
|
1481
|
+
pretrained_model_name_or_path, subfolder=tokenizer_subfolder, **kwargs
|
|
1482
|
+
)
|
|
1483
|
+
return tokenizer
|
|
1484
|
+
|
|
1485
|
+
@classmethod
|
|
1486
|
+
def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, processor_dict=None, **kwargs):
|
|
1456
1487
|
"""
|
|
1457
1488
|
Identify and instantiate the subcomponents of Processor classes, such as image processors, tokenizers,
|
|
1458
1489
|
and feature extractors. This method inspects the processor's `__init__` signature to identify parameters
|
|
1459
1490
|
that correspond to known modality types (image_processor, tokenizer, feature_extractor, etc.) or contain
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1491
|
+
modality names in their attribute name.
|
|
1492
|
+
|
|
1493
|
+
For tokenizers: Uses the appropriate Auto class (AutoTokenizer) to load via `.from_pretrained()`.
|
|
1494
|
+
Additional tokenizers (e.g., "decoder_tokenizer") are loaded from subfolders.
|
|
1495
|
+
|
|
1496
|
+
For other sub-processors (image_processor, feature_extractor, etc.): Primary ones are loaded via
|
|
1497
|
+
Auto class. Additional ones are instantiated from the config stored in processor_config.json
|
|
1498
|
+
(passed as processor_dict).
|
|
1499
|
+
|
|
1500
|
+
Args:
|
|
1501
|
+
pretrained_model_name_or_path: Path or model id to load from.
|
|
1502
|
+
processor_dict: Optional dict containing processor config (from processor_config.json).
|
|
1503
|
+
Required when loading additional non-tokenizer sub-processors.
|
|
1463
1504
|
"""
|
|
1464
1505
|
args = []
|
|
1506
|
+
processor_dict = processor_dict if processor_dict is not None else {}
|
|
1507
|
+
# Remove subfolder from kwargs to avoid duplicate keyword arguments
|
|
1508
|
+
subfolder = kwargs.pop("subfolder", "")
|
|
1509
|
+
|
|
1465
1510
|
# get args from processor init signature
|
|
1466
1511
|
sub_processors = cls.get_attributes()
|
|
1467
1512
|
for sub_processor_type in sub_processors:
|
|
1468
|
-
|
|
1513
|
+
modality = _get_modality_for_attribute(sub_processor_type)
|
|
1514
|
+
is_primary = sub_processor_type == modality
|
|
1515
|
+
|
|
1516
|
+
if (
|
|
1517
|
+
"tokenizer" in sub_processor_type
|
|
1518
|
+
): # This is only necessary for the checkpoing in test_procesing_mistral3.py which has no config.json and
|
|
1519
|
+
# the tokenizer_config.json references LlamaTokenizerFast. TODO: update the config on the hub.
|
|
1520
|
+
if "PixtralProcessor" in cls.__name__:
|
|
1521
|
+
from .tokenization_utils_tokenizers import TokenizersBackend
|
|
1522
|
+
|
|
1523
|
+
tokenizer = TokenizersBackend.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
|
1524
|
+
else:
|
|
1525
|
+
tokenizer = cls._load_tokenizer_from_pretrained(
|
|
1526
|
+
sub_processor_type, pretrained_model_name_or_path, subfolder=subfolder, **kwargs
|
|
1527
|
+
)
|
|
1528
|
+
args.append(tokenizer)
|
|
1529
|
+
elif is_primary:
|
|
1530
|
+
# Primary non-tokenizer sub-processor: load via Auto class
|
|
1469
1531
|
auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING[sub_processor_type]
|
|
1470
|
-
sub_processor = auto_processor_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
|
1471
|
-
args.append(sub_processor)
|
|
1472
|
-
elif "tokenizer" in sub_processor_type:
|
|
1473
|
-
# Special case: tokenizer-like parameters not in the mapping (e.g., "protein_tokenizer")
|
|
1474
|
-
# Load using AutoTokenizer with subfolder
|
|
1475
|
-
auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING["tokenizer"]
|
|
1476
1532
|
sub_processor = auto_processor_class.from_pretrained(
|
|
1477
|
-
pretrained_model_name_or_path, subfolder=
|
|
1533
|
+
pretrained_model_name_or_path, subfolder=subfolder, **kwargs
|
|
1478
1534
|
)
|
|
1479
1535
|
args.append(sub_processor)
|
|
1480
1536
|
|
|
1537
|
+
elif sub_processor_type in processor_dict:
|
|
1538
|
+
# Additional non-tokenizer sub-processor: instantiate from config in processor_dict
|
|
1539
|
+
sub_processor_config = processor_dict[sub_processor_type]
|
|
1540
|
+
if isinstance(sub_processor_config, dict):
|
|
1541
|
+
# Determine the class to instantiate
|
|
1542
|
+
# Image processors have 'image_processor_type', feature extractors have 'feature_extractor_type'
|
|
1543
|
+
type_key = f"{modality}_type"
|
|
1544
|
+
class_name = sub_processor_config.get(type_key)
|
|
1545
|
+
if class_name is None:
|
|
1546
|
+
raise ValueError(
|
|
1547
|
+
f"Cannot instantiate {sub_processor_type}: missing '{type_key}' in config. "
|
|
1548
|
+
f"Config keys: {list(sub_processor_config.keys())}"
|
|
1549
|
+
)
|
|
1550
|
+
processor_class = cls.get_possibly_dynamic_module(class_name)
|
|
1551
|
+
sub_processor = processor_class(**sub_processor_config)
|
|
1552
|
+
args.append(sub_processor)
|
|
1553
|
+
else:
|
|
1554
|
+
raise ValueError(
|
|
1555
|
+
f"Expected dict for {sub_processor_type} in processor_config.json, "
|
|
1556
|
+
f"got {type(sub_processor_config)}"
|
|
1557
|
+
)
|
|
1558
|
+
else:
|
|
1559
|
+
raise ValueError(
|
|
1560
|
+
f"Cannot find config for {sub_processor_type} in processor_config.json. "
|
|
1561
|
+
f"Available keys: {list(processor_dict.keys())}"
|
|
1562
|
+
)
|
|
1563
|
+
|
|
1481
1564
|
return args
|
|
1482
1565
|
|
|
1483
1566
|
@staticmethod
|
transformers/quantizers/auto.py
CHANGED
|
@@ -302,7 +302,7 @@ def register_quantizer(name: str):
|
|
|
302
302
|
return register_quantizer_fn
|
|
303
303
|
|
|
304
304
|
|
|
305
|
-
def get_hf_quantizer(config, quantization_config,
|
|
305
|
+
def get_hf_quantizer(config, quantization_config, device_map, weights_only, user_agent):
|
|
306
306
|
pre_quantized = hasattr(config, "quantization_config")
|
|
307
307
|
if pre_quantized and not AutoHfQuantizer.supports_quant_method(config.quantization_config):
|
|
308
308
|
pre_quantized = False
|
|
@@ -324,11 +324,9 @@ def get_hf_quantizer(config, quantization_config, dtype, device_map, weights_onl
|
|
|
324
324
|
|
|
325
325
|
if hf_quantizer is not None:
|
|
326
326
|
hf_quantizer.validate_environment(
|
|
327
|
-
dtype=dtype,
|
|
328
327
|
device_map=device_map,
|
|
329
328
|
weights_only=weights_only,
|
|
330
329
|
)
|
|
331
|
-
dtype = hf_quantizer.update_dtype(dtype)
|
|
332
330
|
device_map = hf_quantizer.update_device_map(device_map)
|
|
333
331
|
config = hf_quantizer.update_tp_plan(config)
|
|
334
332
|
config = hf_quantizer.update_ep_plan(config)
|
|
@@ -337,4 +335,4 @@ def get_hf_quantizer(config, quantization_config, dtype, device_map, weights_onl
|
|
|
337
335
|
if not getattr(hf_quantizer.quantization_config, "dequantize", False):
|
|
338
336
|
quant_method = hf_quantizer.quantization_config.quant_method
|
|
339
337
|
user_agent["quant"] = getattr(quant_method, "value", quant_method)
|
|
340
|
-
return hf_quantizer, config,
|
|
338
|
+
return hf_quantizer, config, device_map
|