transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -19,19 +19,21 @@ fronting encoding methods) Special token mixing (host the special tokens logic)
|
|
|
19
19
|
of output with special method for the Fast tokenizers)
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
22
24
|
import copy
|
|
23
25
|
import json
|
|
24
26
|
import os
|
|
25
27
|
import re
|
|
26
28
|
import warnings
|
|
27
29
|
from collections import OrderedDict, UserDict
|
|
28
|
-
from collections.abc import Callable, Mapping, Sequence, Sized
|
|
30
|
+
from collections.abc import Callable, Collection, Mapping, Sequence, Sized
|
|
29
31
|
from dataclasses import dataclass
|
|
30
32
|
from pathlib import Path
|
|
31
33
|
from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union
|
|
32
34
|
|
|
33
35
|
import numpy as np
|
|
34
|
-
from huggingface_hub import create_repo, list_repo_files
|
|
36
|
+
from huggingface_hub import create_repo, is_offline_mode, list_repo_files
|
|
35
37
|
from packaging import version
|
|
36
38
|
|
|
37
39
|
from . import __version__
|
|
@@ -49,7 +51,6 @@ from .utils import (
|
|
|
49
51
|
extract_commit_hash,
|
|
50
52
|
is_mlx_available,
|
|
51
53
|
is_numpy_array,
|
|
52
|
-
is_offline_mode,
|
|
53
54
|
is_protobuf_available,
|
|
54
55
|
is_tokenizers_available,
|
|
55
56
|
is_torch_available,
|
|
@@ -60,6 +61,7 @@ from .utils import (
|
|
|
60
61
|
requires_backends,
|
|
61
62
|
to_py_obj,
|
|
62
63
|
)
|
|
64
|
+
from .utils.chat_parsing_utils import recursive_parse
|
|
63
65
|
from .utils.chat_template_utils import render_jinja_template
|
|
64
66
|
from .utils.import_utils import PROTOBUF_IMPORT_ERROR
|
|
65
67
|
|
|
@@ -756,7 +758,7 @@ class BatchEncoding(UserDict):
|
|
|
756
758
|
|
|
757
759
|
return self
|
|
758
760
|
|
|
759
|
-
def to(self, device: Union[str,
|
|
761
|
+
def to(self, device: Union[str, torch.device], *, non_blocking: bool = False) -> BatchEncoding:
|
|
760
762
|
"""
|
|
761
763
|
Send all values to device by calling `v.to(device, non_blocking=non_blocking)` (PyTorch only).
|
|
762
764
|
|
|
@@ -970,7 +972,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
970
972
|
|
|
971
973
|
# first name has to correspond to main model input name
|
|
972
974
|
# to make sure `tokenizer.pad(...)` works correctly
|
|
973
|
-
model_input_names: list[str] = ["input_ids", "
|
|
975
|
+
model_input_names: list[str] = ["input_ids", "attention_mask"]
|
|
974
976
|
padding_side: str = "right"
|
|
975
977
|
truncation_side: str = "right"
|
|
976
978
|
slow_tokenizer_class = None
|
|
@@ -1629,11 +1631,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1629
1631
|
f"Calling {cls.__name__}.from_pretrained() with the path to a single file or url is not "
|
|
1630
1632
|
"supported for this tokenizer. Use a model identifier or the path to a directory instead."
|
|
1631
1633
|
)
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
file_id = [k for k in cls.vocab_files_names.keys() if k != "tokenizer_file"][0]
|
|
1636
|
-
|
|
1634
|
+
file_id = "vocab_file"
|
|
1635
|
+
if pretrained_model_name_or_path.endswith("tokenizer.json"):
|
|
1636
|
+
file_id = "tokenizer_file"
|
|
1637
1637
|
vocab_files[file_id] = pretrained_model_name_or_path
|
|
1638
1638
|
single_file_id = file_id
|
|
1639
1639
|
else:
|
|
@@ -1651,10 +1651,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1651
1651
|
}
|
|
1652
1652
|
|
|
1653
1653
|
vocab_files = {**cls.vocab_files_names, **additional_files_names}
|
|
1654
|
+
|
|
1655
|
+
# Check for versioned tokenizer files
|
|
1654
1656
|
if "tokenizer_file" in vocab_files:
|
|
1655
|
-
# Try to get the tokenizer config to see if there are versioned tokenizer files.
|
|
1656
1657
|
fast_tokenizer_file = FULL_TOKENIZER_FILE
|
|
1657
|
-
|
|
1658
1658
|
try:
|
|
1659
1659
|
resolved_config_file = cached_file(
|
|
1660
1660
|
pretrained_model_name_or_path,
|
|
@@ -1670,43 +1670,33 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1670
1670
|
_raise_exceptions_for_missing_entries=False,
|
|
1671
1671
|
_commit_hash=commit_hash,
|
|
1672
1672
|
)
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1673
|
+
if resolved_config_file is not None:
|
|
1674
|
+
with open(resolved_config_file, encoding="utf-8") as reader:
|
|
1675
|
+
tokenizer_config = json.load(reader)
|
|
1676
|
+
if "fast_tokenizer_files" in tokenizer_config:
|
|
1677
|
+
fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
|
|
1678
|
+
commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
|
|
1676
1679
|
except Exception:
|
|
1677
|
-
|
|
1678
|
-
raise OSError(
|
|
1679
|
-
f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from "
|
|
1680
|
-
"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
|
|
1681
|
-
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
|
|
1682
|
-
f"containing all relevant files for a {cls.__name__} tokenizer."
|
|
1683
|
-
)
|
|
1684
|
-
|
|
1685
|
-
commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
|
|
1686
|
-
if resolved_config_file is not None:
|
|
1687
|
-
with open(resolved_config_file, encoding="utf-8") as reader:
|
|
1688
|
-
tokenizer_config = json.load(reader)
|
|
1689
|
-
if "fast_tokenizer_files" in tokenizer_config:
|
|
1690
|
-
fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
|
|
1680
|
+
pass
|
|
1691
1681
|
vocab_files["tokenizer_file"] = fast_tokenizer_file
|
|
1692
1682
|
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1683
|
+
# This block looks for any extra chat template files
|
|
1684
|
+
if is_local:
|
|
1685
|
+
template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR)
|
|
1686
|
+
if template_dir.is_dir():
|
|
1687
|
+
for template_file in template_dir.glob("*.jinja"):
|
|
1688
|
+
template_name = template_file.name.removesuffix(".jinja")
|
|
1689
|
+
vocab_files[f"chat_template_{template_name}"] = f"{CHAT_TEMPLATE_DIR}/{template_file.name}"
|
|
1690
|
+
else:
|
|
1691
|
+
for template in list_repo_templates(
|
|
1692
|
+
pretrained_model_name_or_path,
|
|
1693
|
+
local_files_only=local_files_only,
|
|
1694
|
+
revision=revision,
|
|
1695
|
+
cache_dir=cache_dir,
|
|
1696
|
+
token=token,
|
|
1697
|
+
):
|
|
1698
|
+
template = template.removesuffix(".jinja")
|
|
1699
|
+
vocab_files[f"chat_template_{template}"] = f"{CHAT_TEMPLATE_DIR}/{template}.jinja"
|
|
1710
1700
|
|
|
1711
1701
|
remote_files = []
|
|
1712
1702
|
if not is_local and not local_files_only:
|
|
@@ -1764,11 +1754,6 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1764
1754
|
if file_id not in resolved_vocab_files:
|
|
1765
1755
|
continue
|
|
1766
1756
|
|
|
1767
|
-
if is_local:
|
|
1768
|
-
logger.info(f"loading file {file_path}")
|
|
1769
|
-
else:
|
|
1770
|
-
logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
|
|
1771
|
-
|
|
1772
1757
|
return cls._from_pretrained(
|
|
1773
1758
|
resolved_vocab_files,
|
|
1774
1759
|
pretrained_model_name_or_path,
|
|
@@ -1798,29 +1783,6 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1798
1783
|
trust_remote_code=False,
|
|
1799
1784
|
**kwargs,
|
|
1800
1785
|
):
|
|
1801
|
-
# We instantiate fast tokenizers based on a slow tokenizer if we don't have access to the tokenizer.json
|
|
1802
|
-
# file or if `from_slow` is set to True.
|
|
1803
|
-
from_slow = kwargs.get("from_slow", False)
|
|
1804
|
-
gguf_file = kwargs.get("gguf_file")
|
|
1805
|
-
has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
|
|
1806
|
-
|
|
1807
|
-
# If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be
|
|
1808
|
-
# loaded directly from the GGUF file.
|
|
1809
|
-
if (from_slow or not has_tokenizer_file) and cls.slow_tokenizer_class is not None and not gguf_file:
|
|
1810
|
-
slow_tokenizer = (cls.slow_tokenizer_class)._from_pretrained(
|
|
1811
|
-
copy.deepcopy(resolved_vocab_files),
|
|
1812
|
-
pretrained_model_name_or_path,
|
|
1813
|
-
copy.deepcopy(init_configuration),
|
|
1814
|
-
*init_inputs,
|
|
1815
|
-
token=token,
|
|
1816
|
-
cache_dir=cache_dir,
|
|
1817
|
-
local_files_only=local_files_only,
|
|
1818
|
-
_commit_hash=_commit_hash,
|
|
1819
|
-
**(copy.deepcopy(kwargs)),
|
|
1820
|
-
)
|
|
1821
|
-
else:
|
|
1822
|
-
slow_tokenizer = None
|
|
1823
|
-
|
|
1824
1786
|
# Prepare tokenizer initialization kwargs
|
|
1825
1787
|
# Did we saved some inputs and kwargs to reload ?
|
|
1826
1788
|
tokenizer_config_file = resolved_vocab_files.pop("tokenizer_config_file", None)
|
|
@@ -1829,14 +1791,16 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1829
1791
|
init_kwargs = json.load(tokenizer_config_handle)
|
|
1830
1792
|
# used in the past to check if the tokenizer class matches the class in the repo
|
|
1831
1793
|
init_kwargs.pop("tokenizer_class", None)
|
|
1832
|
-
if not has_tokenizer_file:
|
|
1833
|
-
init_kwargs.get("tokenizer_file", None)
|
|
1834
1794
|
saved_init_inputs = init_kwargs.pop("init_inputs", ())
|
|
1835
1795
|
if not init_inputs:
|
|
1836
1796
|
init_inputs = saved_init_inputs
|
|
1837
1797
|
else:
|
|
1838
1798
|
init_kwargs = init_configuration
|
|
1839
1799
|
|
|
1800
|
+
if resolved_vocab_files.get("tokenizer_file", None) is not None:
|
|
1801
|
+
init_kwargs.pop("add_bos_token", None)
|
|
1802
|
+
init_kwargs.pop("add_eos_token", None)
|
|
1803
|
+
|
|
1840
1804
|
# If independent chat template file(s) exist, they take priority over template entries in the tokenizer config
|
|
1841
1805
|
chat_templates = {}
|
|
1842
1806
|
chat_template_file = resolved_vocab_files.pop("chat_template_file", None)
|
|
@@ -1917,8 +1881,6 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1917
1881
|
init_kwargs[args_name] = file_path
|
|
1918
1882
|
tokenizer_file = resolved_vocab_files.get("tokenizer_file", None)
|
|
1919
1883
|
|
|
1920
|
-
if slow_tokenizer is not None:
|
|
1921
|
-
init_kwargs["__slow_tokenizer"] = slow_tokenizer
|
|
1922
1884
|
init_kwargs["name_or_path"] = pretrained_model_name_or_path
|
|
1923
1885
|
init_kwargs["is_local"] = _is_local
|
|
1924
1886
|
|
|
@@ -2037,28 +1999,12 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2037
1999
|
if key in init_kwargs and added_tokens_map != {} and init_kwargs[key] is not None:
|
|
2038
2000
|
init_kwargs[key] = added_tokens_map.get(str(init_kwargs[key]), init_kwargs[key])
|
|
2039
2001
|
|
|
2040
|
-
#
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
# If tokenizer_file is in the class's vocab_files_names and exists, prioritize it (TokenizersBackend)
|
|
2047
|
-
if "tokenizer_file" in tokenizer_needs_files and resolved_vocab_files.get("tokenizer_file"):
|
|
2048
|
-
files_loaded.append(os.path.basename(resolved_vocab_files["tokenizer_file"]))
|
|
2049
|
-
else:
|
|
2050
|
-
# Otherwise, add the actual vocab files that were used by this tokenizer class
|
|
2051
|
-
for file_key, file_path in resolved_vocab_files.items():
|
|
2052
|
-
if (
|
|
2053
|
-
file_path
|
|
2054
|
-
and file_key not in ["tokenizer_config_file", "special_tokens_map_file", "added_tokens_file"]
|
|
2055
|
-
and file_key in tokenizer_needs_files
|
|
2056
|
-
):
|
|
2057
|
-
# Extract just the filename from the path
|
|
2058
|
-
files_loaded.append(os.path.basename(file_path))
|
|
2059
|
-
init_kwargs["files_loaded"] = files_loaded
|
|
2002
|
+
# From pretrained with the legacy fixes
|
|
2003
|
+
# for `tokenizers` based tokenizer, we actually want to have vocab and merges pre-extracted from whatever inputs
|
|
2004
|
+
# for `none` (PythonBackend) based tokenizer, we also want the vocab file / merge files not extracted.
|
|
2005
|
+
# for `sentencepiece` based tokenizer, we pass the sentencepiece model file directly.
|
|
2006
|
+
init_kwargs = cls.convert_to_native_format(**init_kwargs)
|
|
2060
2007
|
|
|
2061
|
-
# Instantiate the tokenizer.
|
|
2062
2008
|
try:
|
|
2063
2009
|
tokenizer = cls(*init_inputs, **init_kwargs)
|
|
2064
2010
|
except import_protobuf_decode_error():
|
|
@@ -2079,118 +2025,12 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2079
2025
|
"Unable to load vocabulary from file. "
|
|
2080
2026
|
"Please check that the provided vocabulary is accessible and not corrupted."
|
|
2081
2027
|
)
|
|
2082
|
-
|
|
2083
|
-
# If tokenizer_file exists and tokenizer has a TokenizersBackend, replace the blank tokenizer with tokenizer.json
|
|
2084
|
-
if tokenizer_file is not None and hasattr(tokenizer, "_tokenizer"):
|
|
2085
|
-
from tokenizers import Tokenizer as TokenizerFast
|
|
2086
|
-
|
|
2087
|
-
tokenizer._tokenizer = TokenizerFast.from_file(tokenizer_file)
|
|
2088
|
-
# Re-run post-initialization if the tokenizer has it
|
|
2089
|
-
if hasattr(tokenizer, "_post_init"):
|
|
2090
|
-
tokenizer._post_init()
|
|
2091
|
-
# If only SPM exists, try to get vocab and merges and init to load a tokenizers-backend
|
|
2092
|
-
else:
|
|
2093
|
-
spm_filename = find_sentencepiece_model_file(
|
|
2094
|
-
pretrained_model_name_or_path,
|
|
2095
|
-
revision=kwargs.get("revision"),
|
|
2096
|
-
token=kwargs.get("token"),
|
|
2097
|
-
cache_dir=kwargs.get("cache_dir"),
|
|
2098
|
-
local_files_only=kwargs.get("local_files_only", False),
|
|
2099
|
-
subfolder=kwargs.get("subfolder", ""),
|
|
2100
|
-
)
|
|
2101
|
-
if spm_filename is not None:
|
|
2102
|
-
try:
|
|
2103
|
-
resolved_spm = cached_file(
|
|
2104
|
-
pretrained_model_name_or_path,
|
|
2105
|
-
spm_filename,
|
|
2106
|
-
cache_dir=kwargs.get("cache_dir"),
|
|
2107
|
-
force_download=kwargs.get("force_download", False),
|
|
2108
|
-
proxies=kwargs.get("proxies"),
|
|
2109
|
-
token=kwargs.get("token"),
|
|
2110
|
-
revision=kwargs.get("revision"),
|
|
2111
|
-
local_files_only=kwargs.get("local_files_only", False),
|
|
2112
|
-
subfolder=kwargs.get("subfolder", ""),
|
|
2113
|
-
)
|
|
2114
|
-
except Exception:
|
|
2115
|
-
resolved_spm = None
|
|
2116
|
-
if resolved_spm is not None:
|
|
2117
|
-
try:
|
|
2118
|
-
# Mirror AutoTokenizer fallback: extract vocab/merges from SentencePiece
|
|
2119
|
-
import inspect as _inspect
|
|
2120
|
-
|
|
2121
|
-
from .tokenization_utils_sentencepiece import SentencePieceExtractor
|
|
2122
|
-
|
|
2123
|
-
class_sig = _inspect.signature(getattr(cls, "__init__", cls))
|
|
2124
|
-
vocab_ids, vocab_scores, merges = SentencePieceExtractor(resolved_spm).extract()
|
|
2125
|
-
files_loaded = [spm_filename]
|
|
2126
|
-
init_kwargs["backend"] = "tokenizers"
|
|
2127
|
-
init_kwargs["files_loaded"] = files_loaded
|
|
2128
|
-
# If tokenizer needs merges too (BPE), pass both; unigram models only need vocab
|
|
2129
|
-
if "merges" in class_sig.parameters:
|
|
2130
|
-
return cls.from_pretrained(
|
|
2131
|
-
pretrained_model_name_or_path,
|
|
2132
|
-
*init_inputs,
|
|
2133
|
-
vocab=vocab_scores,
|
|
2134
|
-
merges=merges,
|
|
2135
|
-
**init_kwargs,
|
|
2136
|
-
)
|
|
2137
|
-
elif "vocab" in class_sig.parameters:
|
|
2138
|
-
return cls.from_pretrained(
|
|
2139
|
-
pretrained_model_name_or_path,
|
|
2140
|
-
*init_inputs,
|
|
2141
|
-
vocab=vocab_scores,
|
|
2142
|
-
**init_kwargs,
|
|
2143
|
-
)
|
|
2144
|
-
except Exception as e:
|
|
2145
|
-
logger.warning(
|
|
2146
|
-
f"Could not extract vocab/merges from the SentencePiece model to initialize a Tokenizers backend: {e}. We are falling back so we are falling back to the standard loading method."
|
|
2147
|
-
)
|
|
2148
|
-
pass
|
|
2149
|
-
# Fallback to vocab.json + merges.txt (BPE) or just vocab.json (WordLevel/WordPiece)
|
|
2150
|
-
vocab, merges, files_loaded = load_vocab_and_merges(
|
|
2151
|
-
pretrained_model_name_or_path,
|
|
2152
|
-
cache_dir=kwargs.get("cache_dir"),
|
|
2153
|
-
force_download=kwargs.get("force_download", False),
|
|
2154
|
-
proxies=kwargs.get("proxies"),
|
|
2155
|
-
token=kwargs.get("token"),
|
|
2156
|
-
revision=kwargs.get("revision"),
|
|
2157
|
-
local_files_only=kwargs.get("local_files_only", False),
|
|
2158
|
-
subfolder=kwargs.get("subfolder", ""),
|
|
2159
|
-
)
|
|
2160
|
-
|
|
2161
|
-
if vocab is not None:
|
|
2162
|
-
try:
|
|
2163
|
-
import inspect as _inspect
|
|
2164
|
-
|
|
2165
|
-
class_sig = _inspect.signature(getattr(cls, "__init__", cls))
|
|
2166
|
-
init_kwargs["backend"] = "tokenizers"
|
|
2167
|
-
init_kwargs["files_loaded"] = files_loaded
|
|
2168
|
-
|
|
2169
|
-
if merges is not None and "merges" in class_sig.parameters:
|
|
2170
|
-
return cls.from_pretrained(
|
|
2171
|
-
pretrained_model_name_or_path,
|
|
2172
|
-
*init_inputs,
|
|
2173
|
-
vocab=vocab,
|
|
2174
|
-
merges=merges,
|
|
2175
|
-
**init_kwargs,
|
|
2176
|
-
)
|
|
2177
|
-
elif "vocab" in class_sig.parameters:
|
|
2178
|
-
return cls.from_pretrained(
|
|
2179
|
-
pretrained_model_name_or_path,
|
|
2180
|
-
*init_inputs,
|
|
2181
|
-
vocab=vocab,
|
|
2182
|
-
**init_kwargs,
|
|
2183
|
-
)
|
|
2184
|
-
except Exception:
|
|
2185
|
-
pass
|
|
2186
|
-
if added_tokens_decoder != {} and max(list(added_tokens_decoder.keys())[-1], 0) > tokenizer.vocab_size:
|
|
2187
|
-
logger.info(
|
|
2188
|
-
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are"
|
|
2189
|
-
" fine-tuned or trained."
|
|
2190
|
-
)
|
|
2191
|
-
|
|
2192
2028
|
return tokenizer
|
|
2193
2029
|
|
|
2030
|
+
@classmethod
|
|
2031
|
+
def convert_to_native_format(cls, **kwargs):
|
|
2032
|
+
return kwargs
|
|
2033
|
+
|
|
2194
2034
|
@classmethod
|
|
2195
2035
|
def convert_added_tokens(cls, obj: Union[AddedToken, Any], save=False, add_type_field=True):
|
|
2196
2036
|
if isinstance(obj, dict) and "__type" in obj and obj["__type"] == "AddedToken":
|
|
@@ -2271,9 +2111,13 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2271
2111
|
)
|
|
2272
2112
|
|
|
2273
2113
|
tokenizer_config = copy.deepcopy(self.init_kwargs)
|
|
2114
|
+
tokenizer_config.pop("add_bos_token", None)
|
|
2115
|
+
tokenizer_config.pop("add_eos_token", None)
|
|
2274
2116
|
|
|
2275
2117
|
# Let's save the init kwargs
|
|
2276
2118
|
target_keys = set(self.init_kwargs.keys())
|
|
2119
|
+
target_keys.discard("add_bos_token")
|
|
2120
|
+
target_keys.discard("add_eos_token")
|
|
2277
2121
|
# Let's save the special tokens map (only the strings)
|
|
2278
2122
|
target_keys.update(["model_max_length"])
|
|
2279
2123
|
|
|
@@ -2308,9 +2152,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2308
2152
|
# Add tokenizer class to the tokenizer config to be able to reload it with from_pretrained
|
|
2309
2153
|
tokenizer_class = self.__class__.__name__
|
|
2310
2154
|
|
|
2311
|
-
# tokenizers backend don't need to save added_tokens_decoder
|
|
2155
|
+
# tokenizers backend don't need to save added_tokens_decoder and additional_special_tokens
|
|
2312
2156
|
if any(base.__name__ == "TokenizersBackend" for base in self.__class__.__mro__):
|
|
2313
2157
|
tokenizer_config.pop("added_tokens_decoder", None)
|
|
2158
|
+
tokenizer_config.pop("additional_special_tokens", None)
|
|
2314
2159
|
|
|
2315
2160
|
# Remove the Fast at the end if we can save the slow tokenizer
|
|
2316
2161
|
if tokenizer_class.endswith("Fast") and getattr(self, "can_save_slow_tokenizer", False):
|
|
@@ -3045,7 +2890,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3045
2890
|
|
|
3046
2891
|
def decode(
|
|
3047
2892
|
self,
|
|
3048
|
-
token_ids: Union[int, list[int], list[list[int]], np.ndarray,
|
|
2893
|
+
token_ids: Union[int, list[int], list[list[int]], np.ndarray, torch.Tensor],
|
|
3049
2894
|
skip_special_tokens: bool = False,
|
|
3050
2895
|
**kwargs,
|
|
3051
2896
|
) -> Union[str, list[str]]:
|
|
@@ -3093,7 +2938,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3093
2938
|
|
|
3094
2939
|
def batch_decode(
|
|
3095
2940
|
self,
|
|
3096
|
-
sequences: Union[list[int], list[list[int]], np.ndarray,
|
|
2941
|
+
sequences: Union[list[int], list[list[int]], np.ndarray, torch.Tensor],
|
|
3097
2942
|
skip_special_tokens: bool = False,
|
|
3098
2943
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
3099
2944
|
**kwargs,
|
|
@@ -3192,7 +3037,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3192
3037
|
truncation: bool = False,
|
|
3193
3038
|
max_length: Optional[int] = None,
|
|
3194
3039
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
|
3195
|
-
return_dict: bool =
|
|
3040
|
+
return_dict: bool = True,
|
|
3196
3041
|
return_assistant_tokens_mask: bool = False,
|
|
3197
3042
|
tokenizer_kwargs: Optional[dict[str, Any]] = None,
|
|
3198
3043
|
**kwargs,
|
|
@@ -3265,14 +3110,11 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3265
3110
|
set, will return a dict of tokenizer outputs instead.
|
|
3266
3111
|
"""
|
|
3267
3112
|
|
|
3268
|
-
if
|
|
3269
|
-
|
|
3270
|
-
"`return_dict=True` is incompatible with `tokenize=False`, because there is no dict "
|
|
3271
|
-
"of tokenizer outputs to return."
|
|
3272
|
-
)
|
|
3113
|
+
if not tokenize:
|
|
3114
|
+
return_dict = False # dicts are only returned by the tokenizer anyway
|
|
3273
3115
|
|
|
3274
|
-
if return_assistant_tokens_mask and not return_dict:
|
|
3275
|
-
raise ValueError("`return_assistant_tokens_mask=True`
|
|
3116
|
+
if return_assistant_tokens_mask and not (return_dict and tokenize):
|
|
3117
|
+
raise ValueError("`return_assistant_tokens_mask=True` requires `return_dict=True` and `tokenize=True`")
|
|
3276
3118
|
|
|
3277
3119
|
if tokenizer_kwargs is None:
|
|
3278
3120
|
tokenizer_kwargs = {}
|
|
@@ -3387,13 +3229,17 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3387
3229
|
)
|
|
3388
3230
|
|
|
3389
3231
|
if conversation_history is None or len(conversation_history) == 0:
|
|
3390
|
-
return self.apply_chat_template(
|
|
3232
|
+
return self.apply_chat_template(
|
|
3233
|
+
[message], add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
|
|
3234
|
+
)
|
|
3391
3235
|
|
|
3392
3236
|
conversation = conversation_history + [message]
|
|
3393
|
-
tokens = self.apply_chat_template(
|
|
3237
|
+
tokens = self.apply_chat_template(
|
|
3238
|
+
conversation, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
|
|
3239
|
+
)
|
|
3394
3240
|
|
|
3395
3241
|
prefix_tokens = self.apply_chat_template(
|
|
3396
|
-
conversation_history, add_generation_prompt=False, tokenize=True, **kwargs
|
|
3242
|
+
conversation_history, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
|
|
3397
3243
|
)
|
|
3398
3244
|
# It's possible that the prefix tokens are not a prefix of the full list of tokens.
|
|
3399
3245
|
# For example, if the prefix is `<s>User: Hi` and the full conversation is `<s>User: Hi</s><s>Assistant: Hello`.
|
|
@@ -3519,6 +3365,45 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3519
3365
|
tokenizer_config["chat_template"] = self.chat_template
|
|
3520
3366
|
return tokenizer_config, saved_raw_chat_template_files
|
|
3521
3367
|
|
|
3368
|
+
def parse_response(
|
|
3369
|
+
self,
|
|
3370
|
+
response: str | list[str | int | list[int]] | np.ndarray | torch.Tensor,
|
|
3371
|
+
schema: list | dict | None = None,
|
|
3372
|
+
):
|
|
3373
|
+
"""
|
|
3374
|
+
Converts an output string created by generating text from a model into a parsed message dictionary.
|
|
3375
|
+
This method is intended for use with chat models, and will read the tokenizer's `response_schema` attribute to
|
|
3376
|
+
control parsing, although this can be overridden by passing a `response_schema` argument directly.
|
|
3377
|
+
|
|
3378
|
+
This method is currently **highly experimental** and the schema specification is likely to change in future!
|
|
3379
|
+
We recommend not building production code on top of it just yet.
|
|
3380
|
+
|
|
3381
|
+
Args:
|
|
3382
|
+
response (`str`):
|
|
3383
|
+
The output string generated by the model. This can be either a decoded string or list of strings,
|
|
3384
|
+
or token IDs as a list/array.
|
|
3385
|
+
schema (`Union[list, dict]`, *optional*):
|
|
3386
|
+
A response schema that indicates the expected output format and how parsing should be performed.
|
|
3387
|
+
If not provided, the tokenizer's `response_schema` attribute will be used.
|
|
3388
|
+
"""
|
|
3389
|
+
batched = (
|
|
3390
|
+
(isinstance(response, list) and not isinstance(response[0], int))
|
|
3391
|
+
or getattr(response, "ndim", 0) > 1 # For torch/numpy tensors
|
|
3392
|
+
)
|
|
3393
|
+
|
|
3394
|
+
if schema is None:
|
|
3395
|
+
if getattr(self, "response_schema", None) is None:
|
|
3396
|
+
raise AttributeError("This tokenizer does not have a `response_schema` for parsing chat responses!")
|
|
3397
|
+
schema = self.response_schema
|
|
3398
|
+
if batched:
|
|
3399
|
+
if not (isinstance(response, list) and isinstance(response[0], str)):
|
|
3400
|
+
response = self.batch_decode(response)
|
|
3401
|
+
return [recursive_parse(single_response, schema) for single_response in response]
|
|
3402
|
+
else:
|
|
3403
|
+
if not isinstance(response, str):
|
|
3404
|
+
response = self.decode(response)
|
|
3405
|
+
return recursive_parse(response, schema)
|
|
3406
|
+
|
|
3522
3407
|
|
|
3523
3408
|
def get_fast_tokenizer_file(tokenization_files: list[str]) -> str:
|
|
3524
3409
|
"""
|
|
@@ -3728,15 +3613,22 @@ def _get_prepend_scheme(add_prefix_space: bool, original_tokenizer) -> str:
|
|
|
3728
3613
|
return prepend_scheme
|
|
3729
3614
|
|
|
3730
3615
|
|
|
3731
|
-
def generate_merges(
|
|
3616
|
+
def generate_merges(
|
|
3617
|
+
vocab, vocab_scores: Optional[dict[str, float]] = None, skip_tokens: Optional[Collection[str]] = None
|
|
3618
|
+
):
|
|
3619
|
+
skip_tokens = set(skip_tokens) if skip_tokens is not None else set()
|
|
3732
3620
|
reverse = vocab_scores is not None
|
|
3733
3621
|
vocab_scores = dict(vocab_scores) if reverse else vocab
|
|
3734
3622
|
|
|
3735
3623
|
merges = []
|
|
3736
3624
|
for merge, piece_score in vocab_scores.items():
|
|
3625
|
+
if merge in skip_tokens:
|
|
3626
|
+
continue
|
|
3737
3627
|
local = []
|
|
3738
3628
|
for index in range(1, len(merge)):
|
|
3739
3629
|
piece_l, piece_r = merge[:index], merge[index:]
|
|
3630
|
+
if piece_l in skip_tokens or piece_r in skip_tokens:
|
|
3631
|
+
continue
|
|
3740
3632
|
if piece_l in vocab and piece_r in vocab:
|
|
3741
3633
|
local.append((piece_l, piece_r, piece_score))
|
|
3742
3634
|
local = sorted(local, key=lambda x: (vocab[x[0]], vocab[x[1]]))
|