transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
"""Auto Tokenizer class."""
|
|
16
16
|
|
|
17
17
|
import importlib
|
|
18
|
-
import inspect
|
|
19
18
|
import json
|
|
20
19
|
import os
|
|
21
20
|
from collections import OrderedDict
|
|
@@ -26,8 +25,7 @@ from transformers.utils.import_utils import is_mistral_common_available
|
|
|
26
25
|
from ...configuration_utils import PreTrainedConfig
|
|
27
26
|
from ...dynamic_module_utils import get_class_from_dynamic_module, resolve_trust_remote_code
|
|
28
27
|
from ...modeling_gguf_pytorch_utils import load_gguf_checkpoint
|
|
29
|
-
from ...
|
|
30
|
-
from ...tokenization_utils_base import TOKENIZER_CONFIG_FILE, find_sentencepiece_model_file, load_vocab_and_merges
|
|
28
|
+
from ...tokenization_utils_base import TOKENIZER_CONFIG_FILE
|
|
31
29
|
from ...utils import (
|
|
32
30
|
extract_commit_hash,
|
|
33
31
|
is_g2p_en_available,
|
|
@@ -35,7 +33,7 @@ from ...utils import (
|
|
|
35
33
|
is_tokenizers_available,
|
|
36
34
|
logging,
|
|
37
35
|
)
|
|
38
|
-
from ...utils.hub import cached_file
|
|
36
|
+
from ...utils.hub import cached_file
|
|
39
37
|
from ..encoder_decoder import EncoderDecoderConfig
|
|
40
38
|
from .auto_factory import _LazyAutoMapping
|
|
41
39
|
from .configuration_auto import (
|
|
@@ -65,11 +63,10 @@ REGISTERED_FAST_ALIASES: dict[str, type[Any]] = {}
|
|
|
65
63
|
|
|
66
64
|
TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
|
|
67
65
|
[
|
|
68
|
-
("aimv2", "
|
|
66
|
+
("aimv2", "CLIPTokenizer" if is_tokenizers_available() else None),
|
|
69
67
|
("albert", "AlbertTokenizer" if is_tokenizers_available() else None),
|
|
70
68
|
("align", "BertTokenizer" if is_tokenizers_available() else None),
|
|
71
|
-
("
|
|
72
|
-
("aria", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
69
|
+
("audioflamingo3", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
73
70
|
("aya_vision", "CohereTokenizer" if is_tokenizers_available() else None),
|
|
74
71
|
("bark", "BertTokenizer" if is_tokenizers_available() else None),
|
|
75
72
|
("bart", "RobertaTokenizer" if is_tokenizers_available() else None),
|
|
@@ -82,19 +79,15 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
|
|
|
82
79
|
("big_bird", "BigBirdTokenizer" if is_tokenizers_available() else None),
|
|
83
80
|
("bigbird_pegasus", "PegasusTokenizer" if is_tokenizers_available() else None),
|
|
84
81
|
("biogpt", "BioGptTokenizer"),
|
|
85
|
-
("bitnet", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
86
82
|
("blenderbot", "BlenderbotTokenizer" if is_tokenizers_available() else None),
|
|
87
83
|
("blenderbot-small", "BlenderbotSmallTokenizer"),
|
|
88
84
|
("blip", "BertTokenizer" if is_tokenizers_available() else None),
|
|
89
85
|
("blip-2", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
90
|
-
("bloom", "TokenizersBackend" if is_tokenizers_available() else None),
|
|
91
|
-
("blt", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
92
86
|
("bridgetower", "RobertaTokenizer"),
|
|
93
87
|
("bros", "BertTokenizer" if is_tokenizers_available() else None),
|
|
94
88
|
("byt5", "ByT5Tokenizer"),
|
|
95
89
|
("camembert", "CamembertTokenizer" if is_tokenizers_available() else None),
|
|
96
90
|
("canine", "CanineTokenizer"),
|
|
97
|
-
("chameleon", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
98
91
|
("chinese_clip", "BertTokenizer" if is_tokenizers_available() else None),
|
|
99
92
|
("clap", "RobertaTokenizer"),
|
|
100
93
|
("clip", "CLIPTokenizer" if is_tokenizers_available() else None),
|
|
@@ -104,265 +97,219 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
|
|
|
104
97
|
("codegen", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
105
98
|
("cohere", "CohereTokenizer" if is_tokenizers_available() else None),
|
|
106
99
|
("cohere2", "CohereTokenizer" if is_tokenizers_available() else None),
|
|
107
|
-
("
|
|
108
|
-
("colqwen2", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
|
|
100
|
+
("colqwen2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
109
101
|
("convbert", "BertTokenizer" if is_tokenizers_available() else None),
|
|
110
102
|
("cpm", "CpmTokenizer" if is_tokenizers_available() else None),
|
|
111
103
|
("cpmant", "CpmAntTokenizer"),
|
|
112
|
-
("csm", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
113
104
|
("ctrl", "CTRLTokenizer"),
|
|
114
105
|
("data2vec-audio", "Wav2Vec2CTCTokenizer"),
|
|
115
106
|
("data2vec-text", "RobertaTokenizer"),
|
|
116
107
|
("dbrx", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
117
108
|
("deberta", "DebertaTokenizer" if is_tokenizers_available() else None),
|
|
118
109
|
("deberta-v2", "DebertaV2Tokenizer" if is_tokenizers_available() else None),
|
|
119
|
-
("deepseek_v2", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
120
|
-
("deepseek_v3", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
121
|
-
("deepseek_vl", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
122
|
-
("deepseek_vl_hybrid", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
123
110
|
("dia", "DiaTokenizer"),
|
|
124
|
-
("diffllama", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
125
111
|
("distilbert", "BertTokenizer" if is_tokenizers_available() else None),
|
|
126
|
-
("dpr", "
|
|
112
|
+
("dpr", "DPRQuestionEncoderTokenizer" if is_tokenizers_available() else None),
|
|
127
113
|
("electra", "BertTokenizer" if is_tokenizers_available() else None),
|
|
128
114
|
("emu3", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
129
115
|
("ernie", "BertTokenizer" if is_tokenizers_available() else None),
|
|
130
|
-
("ernie4_5", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
131
|
-
("ernie4_5_moe", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
132
116
|
("esm", "EsmTokenizer"),
|
|
133
117
|
("exaone4", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
134
|
-
("
|
|
135
|
-
("falcon_mamba", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
|
|
118
|
+
("falcon_mamba", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
136
119
|
("fastspeech2_conformer", "FastSpeech2ConformerTokenizer" if is_g2p_en_available() else None),
|
|
137
120
|
("flaubert", "FlaubertTokenizer"),
|
|
138
121
|
("flava", "BertTokenizer" if is_tokenizers_available() else None),
|
|
139
122
|
("flex_olmo", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
140
123
|
("florence2", "BartTokenizer" if is_tokenizers_available() else None),
|
|
141
|
-
("fnet", "
|
|
124
|
+
("fnet", "FNetTokenizer" if is_tokenizers_available() else None),
|
|
142
125
|
("fsmt", "FSMTTokenizer"),
|
|
143
126
|
("funnel", "FunnelTokenizer" if is_tokenizers_available() else None),
|
|
144
|
-
("gemma", "
|
|
145
|
-
("gemma2", "
|
|
146
|
-
("gemma3", "
|
|
147
|
-
("gemma3_text", "
|
|
148
|
-
("gemma3n", "
|
|
149
|
-
("gemma3n_text", "
|
|
127
|
+
("gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
|
|
128
|
+
("gemma2", "GemmaTokenizer" if is_tokenizers_available() else None),
|
|
129
|
+
("gemma3", "GemmaTokenizer" if is_tokenizers_available() else None),
|
|
130
|
+
("gemma3_text", "GemmaTokenizer" if is_tokenizers_available() else None),
|
|
131
|
+
("gemma3n", "GemmaTokenizer" if is_tokenizers_available() else None),
|
|
132
|
+
("gemma3n_text", "GemmaTokenizer" if is_tokenizers_available() else None),
|
|
150
133
|
("git", "BertTokenizer" if is_tokenizers_available() else None),
|
|
151
|
-
("glm", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
152
|
-
("glm4", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
153
|
-
("glm4_moe", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
154
|
-
("glm4v", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
155
|
-
("glm4v_moe", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
156
|
-
("got_ocr2", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
157
134
|
("gpt-sw3", "GPTSw3Tokenizer" if is_sentencepiece_available() else None),
|
|
158
135
|
("gpt2", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
159
136
|
("gpt_bigcode", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
160
137
|
("gpt_neo", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
161
138
|
("gpt_neox", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
162
139
|
("gpt_neox_japanese", "GPTNeoXJapaneseTokenizer"),
|
|
163
|
-
("gpt_oss", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
164
140
|
("gptj", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
165
141
|
("granite", "GPT2Tokenizer"),
|
|
166
142
|
("granitemoe", "GPT2Tokenizer"),
|
|
167
143
|
("granitemoehybrid", "GPT2Tokenizer"),
|
|
168
144
|
("granitemoeshared", "GPT2Tokenizer"),
|
|
169
145
|
("grounding-dino", "BertTokenizer" if is_tokenizers_available() else None),
|
|
170
|
-
("groupvit", "
|
|
171
|
-
("helium", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
146
|
+
("groupvit", "CLIPTokenizer" if is_tokenizers_available() else None),
|
|
172
147
|
("herbert", "HerbertTokenizer" if is_tokenizers_available() else None),
|
|
173
148
|
("hubert", "Wav2Vec2CTCTokenizer"),
|
|
174
149
|
("ibert", "RobertaTokenizer"),
|
|
175
|
-
("idefics", "
|
|
176
|
-
("idefics2", "
|
|
177
|
-
("idefics3", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
150
|
+
("idefics", "LlamaTokenizer" if is_tokenizers_available() else None),
|
|
151
|
+
("idefics2", "LlamaTokenizer" if is_tokenizers_available() else None),
|
|
178
152
|
("instructblip", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
179
153
|
("instructblipvideo", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
180
|
-
("internvl", "
|
|
181
|
-
("
|
|
182
|
-
("janus", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
183
|
-
("jetmoe", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
154
|
+
("internvl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
155
|
+
("jais2", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
184
156
|
("kosmos-2", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
|
|
185
|
-
("
|
|
157
|
+
("lasr_ctc", "ParakeetTokenizer" if is_tokenizers_available() else None),
|
|
158
|
+
("lasr_encoder", "ParakeetTokenizer" if is_tokenizers_available() else None),
|
|
186
159
|
("layoutlm", "BertTokenizer" if is_tokenizers_available() else None),
|
|
187
160
|
("layoutlmv2", "LayoutLMv2Tokenizer" if is_tokenizers_available() else None),
|
|
188
161
|
("layoutlmv3", "LayoutLMv3Tokenizer" if is_tokenizers_available() else None),
|
|
189
162
|
("layoutxlm", "LayoutXLMTokenizer" if is_tokenizers_available() else None),
|
|
190
163
|
("led", "LEDTokenizer" if is_tokenizers_available() else None),
|
|
191
|
-
("lfm2_vl", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
192
164
|
("lilt", "RobertaTokenizer" if is_tokenizers_available() else None),
|
|
193
|
-
("llama", "LlamaTokenizer" if is_tokenizers_available() else None),
|
|
194
|
-
("llama4", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
195
|
-
("llama4_text", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
196
|
-
("llava", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
197
|
-
("llava_next", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
198
|
-
("llava_next_video", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
199
|
-
("llava_onevision", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
200
165
|
("longformer", "RobertaTokenizer" if is_tokenizers_available() else None),
|
|
201
166
|
("longt5", "T5Tokenizer" if is_tokenizers_available() else None),
|
|
202
167
|
("luke", "LukeTokenizer"),
|
|
203
168
|
("lxmert", "LxmertTokenizer" if is_tokenizers_available() else None),
|
|
204
169
|
("m2m_100", "M2M100Tokenizer" if is_sentencepiece_available() else None),
|
|
205
|
-
("mamba", "
|
|
206
|
-
("mamba2", "
|
|
170
|
+
("mamba", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
171
|
+
("mamba2", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
207
172
|
("marian", "MarianTokenizer" if is_sentencepiece_available() else None),
|
|
173
|
+
("markuplm", "MarkupLMTokenizer" if is_tokenizers_available() else None),
|
|
208
174
|
("mbart", "MBartTokenizer" if is_tokenizers_available() else None),
|
|
209
175
|
("mbart50", "MBart50Tokenizer" if is_tokenizers_available() else None),
|
|
210
176
|
("mega", "RobertaTokenizer"),
|
|
211
177
|
("megatron-bert", "BertTokenizer" if is_tokenizers_available() else None),
|
|
212
|
-
("metaclip_2", "
|
|
178
|
+
("metaclip_2", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
|
|
213
179
|
("mgp-str", "MgpstrTokenizer"),
|
|
214
|
-
("minimax", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
215
180
|
(
|
|
216
181
|
"ministral3",
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
else ("LlamaTokenizer" if is_sentencepiece_available() else None),
|
|
221
|
-
"LlamaTokenizerFast" if is_tokenizers_available() and not is_mistral_common_available() else None,
|
|
222
|
-
),
|
|
182
|
+
"MistralCommonBackend"
|
|
183
|
+
if is_mistral_common_available()
|
|
184
|
+
else ("TokenizersBackend" if is_tokenizers_available() else None),
|
|
223
185
|
),
|
|
224
186
|
(
|
|
225
187
|
"mistral",
|
|
226
188
|
"MistralCommonBackend"
|
|
227
189
|
if is_mistral_common_available()
|
|
228
|
-
else ("
|
|
190
|
+
else ("TokenizersBackend" if is_tokenizers_available() else None),
|
|
229
191
|
),
|
|
230
192
|
(
|
|
231
193
|
"mistral3",
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
else ("LlamaTokenizer" if is_sentencepiece_available() else None),
|
|
236
|
-
"LlamaTokenizerFast" if is_tokenizers_available() and not is_mistral_common_available() else None,
|
|
237
|
-
),
|
|
194
|
+
"MistralCommonBackend"
|
|
195
|
+
if is_mistral_common_available()
|
|
196
|
+
else ("TokenizersBackend" if is_tokenizers_available() else None),
|
|
238
197
|
),
|
|
239
198
|
(
|
|
240
199
|
"mixtral",
|
|
241
200
|
"MistralCommonBackend"
|
|
242
201
|
if is_mistral_common_available()
|
|
243
|
-
else ("
|
|
202
|
+
else ("TokenizersBackend" if is_tokenizers_available() else None),
|
|
244
203
|
),
|
|
245
|
-
("mllama", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
246
204
|
("mluke", "MLukeTokenizer" if is_sentencepiece_available() else None),
|
|
247
205
|
("mm-grounding-dino", "BertTokenizer" if is_tokenizers_available() else None),
|
|
248
206
|
("mobilebert", "MobileBertTokenizer" if is_tokenizers_available() else None),
|
|
249
|
-
("modernbert", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
250
|
-
("moonshine", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
251
|
-
("moshi", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
252
207
|
("mpnet", "MPNetTokenizer" if is_tokenizers_available() else None),
|
|
253
|
-
("mpt", "
|
|
208
|
+
("mpt", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
254
209
|
("mra", "RobertaTokenizer"),
|
|
255
210
|
("mt5", "T5Tokenizer" if is_tokenizers_available() else None),
|
|
256
211
|
("musicgen", "T5Tokenizer" if is_tokenizers_available() else None),
|
|
257
212
|
("musicgen_melody", "T5Tokenizer" if is_tokenizers_available() else None),
|
|
258
213
|
("mvp", "MvpTokenizer" if is_tokenizers_available() else None),
|
|
259
214
|
("myt5", "MyT5Tokenizer"),
|
|
260
|
-
("nemotron", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
261
215
|
("nezha", "BertTokenizer" if is_tokenizers_available() else None),
|
|
262
216
|
("nllb", "NllbTokenizer" if is_tokenizers_available() else None),
|
|
263
217
|
("nllb-moe", "NllbTokenizer" if is_tokenizers_available() else None),
|
|
264
218
|
("nougat", "NougatTokenizer" if is_tokenizers_available() else None),
|
|
265
|
-
("nystromformer", "
|
|
266
|
-
("olmo", "
|
|
267
|
-
("olmo2", "
|
|
219
|
+
("nystromformer", "AlbertTokenizer" if is_tokenizers_available() else None),
|
|
220
|
+
("olmo", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
221
|
+
("olmo2", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
268
222
|
("olmo3", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
269
|
-
("olmoe", "
|
|
270
|
-
("omdet-turbo", "
|
|
271
|
-
("oneformer", "
|
|
223
|
+
("olmoe", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
224
|
+
("omdet-turbo", "CLIPTokenizer" if is_tokenizers_available() else None),
|
|
225
|
+
("oneformer", "CLIPTokenizer" if is_tokenizers_available() else None),
|
|
272
226
|
("openai-gpt", "OpenAIGPTTokenizer" if is_tokenizers_available() else None),
|
|
273
227
|
("opt", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
274
|
-
("ovis2", "
|
|
275
|
-
("owlv2", "
|
|
276
|
-
("owlvit", "
|
|
277
|
-
("paligemma", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
228
|
+
("ovis2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
229
|
+
("owlv2", "CLIPTokenizer" if is_tokenizers_available() else None),
|
|
230
|
+
("owlvit", "CLIPTokenizer" if is_tokenizers_available() else None),
|
|
278
231
|
("pegasus", "PegasusTokenizer" if is_tokenizers_available() else None),
|
|
279
232
|
("pegasus_x", "PegasusTokenizer" if is_tokenizers_available() else None),
|
|
280
233
|
("perceiver", "PerceiverTokenizer"),
|
|
281
|
-
("persimmon", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
282
234
|
("phi", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
283
|
-
("phi3", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
284
|
-
("phimoe", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
285
235
|
("phobert", "PhobertTokenizer"),
|
|
286
236
|
("pix2struct", "T5Tokenizer" if is_tokenizers_available() else None),
|
|
287
237
|
(
|
|
288
238
|
"pixtral",
|
|
289
239
|
"MistralCommonBackend"
|
|
290
240
|
if is_mistral_common_available()
|
|
291
|
-
else ("
|
|
241
|
+
else ("TokenizersBackend" if is_tokenizers_available() else None),
|
|
292
242
|
),
|
|
293
243
|
("plbart", "PLBartTokenizer" if is_tokenizers_available() else None),
|
|
294
244
|
("prophetnet", "ProphetNetTokenizer"),
|
|
295
245
|
("qdqbert", "BertTokenizer" if is_tokenizers_available() else None),
|
|
296
|
-
("qwen2", "
|
|
297
|
-
("qwen2_5_omni", "
|
|
298
|
-
("qwen2_5_vl", "
|
|
299
|
-
("qwen2_audio", "
|
|
300
|
-
("qwen2_moe", "
|
|
301
|
-
("qwen2_vl", "
|
|
302
|
-
("qwen3", "
|
|
303
|
-
("qwen3_moe", "
|
|
304
|
-
("qwen3_next", "
|
|
305
|
-
("qwen3_omni_moe", "
|
|
306
|
-
("qwen3_vl", "
|
|
307
|
-
("qwen3_vl_moe", "
|
|
246
|
+
("qwen2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
247
|
+
("qwen2_5_omni", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
248
|
+
("qwen2_5_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
249
|
+
("qwen2_audio", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
250
|
+
("qwen2_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
251
|
+
("qwen2_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
252
|
+
("qwen3", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
253
|
+
("qwen3_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
254
|
+
("qwen3_next", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
255
|
+
("qwen3_omni_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
256
|
+
("qwen3_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
257
|
+
("qwen3_vl_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
|
|
308
258
|
("rag", "RagTokenizer"),
|
|
309
259
|
("realm", "BertTokenizer" if is_tokenizers_available() else None),
|
|
310
|
-
("recurrent_gemma", "
|
|
260
|
+
("recurrent_gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
|
|
311
261
|
("reformer", "ReformerTokenizer" if is_tokenizers_available() else None),
|
|
312
262
|
("rembert", "RemBertTokenizer" if is_tokenizers_available() else None),
|
|
313
263
|
("retribert", "BertTokenizer" if is_tokenizers_available() else None),
|
|
314
264
|
("roberta", "RobertaTokenizer"),
|
|
315
265
|
("roberta-prelayernorm", "RobertaTokenizer"),
|
|
316
266
|
("roc_bert", "RoCBertTokenizer"),
|
|
317
|
-
("roformer", "
|
|
318
|
-
("rwkv", "
|
|
267
|
+
("roformer", "RoFormerTokenizer" if is_tokenizers_available() else None),
|
|
268
|
+
("rwkv", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
319
269
|
("seamless_m4t", "SeamlessM4TTokenizer" if is_tokenizers_available() else None),
|
|
320
270
|
("seamless_m4t_v2", "SeamlessM4TTokenizer" if is_tokenizers_available() else None),
|
|
321
|
-
("shieldgemma2", "
|
|
271
|
+
("shieldgemma2", "GemmaTokenizer" if is_tokenizers_available() else None),
|
|
322
272
|
("siglip", "SiglipTokenizer" if is_sentencepiece_available() else None),
|
|
323
|
-
("siglip2", "
|
|
324
|
-
("smollm3", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
|
|
273
|
+
("siglip2", "GemmaTokenizer" if is_tokenizers_available() else None),
|
|
325
274
|
("speech_to_text", "Speech2TextTokenizer" if is_sentencepiece_available() else None),
|
|
326
275
|
("speecht5", "SpeechT5Tokenizer" if is_sentencepiece_available() else None),
|
|
327
276
|
("splinter", "SplinterTokenizer"),
|
|
328
277
|
("squeezebert", "BertTokenizer" if is_tokenizers_available() else None),
|
|
329
|
-
("stablelm", "
|
|
278
|
+
("stablelm", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
330
279
|
("starcoder2", "GPT2Tokenizer" if is_tokenizers_available() else None),
|
|
331
280
|
("switch_transformers", "T5Tokenizer" if is_tokenizers_available() else None),
|
|
332
281
|
("t5", "T5Tokenizer" if is_tokenizers_available() else None),
|
|
333
|
-
("t5gemma", "
|
|
282
|
+
("t5gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
|
|
334
283
|
("tapas", "TapasTokenizer"),
|
|
335
284
|
("trocr", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
|
|
336
285
|
("tvp", "BertTokenizer" if is_tokenizers_available() else None),
|
|
337
286
|
("udop", "UdopTokenizer" if is_tokenizers_available() else None),
|
|
338
287
|
("umt5", "T5Tokenizer" if is_tokenizers_available() else None),
|
|
339
|
-
("
|
|
288
|
+
("unispeech", "Wav2Vec2CTCTokenizer"),
|
|
289
|
+
("unispeech-sat", "Wav2Vec2CTCTokenizer"),
|
|
340
290
|
("vilt", "BertTokenizer" if is_tokenizers_available() else None),
|
|
341
|
-
("vipllava", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
342
291
|
("visual_bert", "BertTokenizer" if is_tokenizers_available() else None),
|
|
343
292
|
("vits", "VitsTokenizer"),
|
|
344
293
|
(
|
|
345
294
|
"voxtral",
|
|
346
295
|
"MistralCommonBackend"
|
|
347
296
|
if is_mistral_common_available()
|
|
348
|
-
else ("
|
|
297
|
+
else ("TokenizersBackend" if is_tokenizers_available() else None),
|
|
349
298
|
),
|
|
350
299
|
("wav2vec2", "Wav2Vec2CTCTokenizer"),
|
|
351
300
|
("wav2vec2-bert", "Wav2Vec2CTCTokenizer"),
|
|
352
301
|
("wav2vec2-conformer", "Wav2Vec2CTCTokenizer"),
|
|
353
302
|
("wav2vec2_phoneme", "Wav2Vec2PhonemeCTCTokenizer"),
|
|
354
303
|
("whisper", "WhisperTokenizer" if is_tokenizers_available() else None),
|
|
355
|
-
("xclip", "
|
|
304
|
+
("xclip", "CLIPTokenizer" if is_tokenizers_available() else None),
|
|
356
305
|
("xglm", "XGLMTokenizer" if is_tokenizers_available() else None),
|
|
357
306
|
("xlm", "XLMTokenizer"),
|
|
358
307
|
("xlm-roberta", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
|
|
359
308
|
("xlm-roberta-xl", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
|
|
360
309
|
("xlnet", "XLNetTokenizer" if is_tokenizers_available() else None),
|
|
361
|
-
("xlstm", "
|
|
362
|
-
("xmod", "
|
|
310
|
+
("xlstm", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
|
|
311
|
+
("xmod", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
|
|
363
312
|
("yoso", "AlbertTokenizer" if is_tokenizers_available() else None),
|
|
364
|
-
("zamba", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
365
|
-
("zamba2", "LlamaTokenizerFast" if is_tokenizers_available() else None),
|
|
366
313
|
]
|
|
367
314
|
)
|
|
368
315
|
|
|
@@ -389,13 +336,17 @@ def load_merges(merges_file):
|
|
|
389
336
|
|
|
390
337
|
|
|
391
338
|
def tokenizer_class_from_name(class_name: str) -> Union[type[Any], None]:
|
|
339
|
+
# Bloom tokenizer classes were removed but should map to the fast backend for BC
|
|
340
|
+
if class_name in {"BloomTokenizer", "BloomTokenizerFast"}:
|
|
341
|
+
return TokenizersBackend
|
|
342
|
+
|
|
392
343
|
if class_name in REGISTERED_FAST_ALIASES:
|
|
393
344
|
return REGISTERED_FAST_ALIASES[class_name]
|
|
394
345
|
|
|
395
346
|
if class_name in REGISTERED_TOKENIZER_CLASSES:
|
|
396
347
|
return REGISTERED_TOKENIZER_CLASSES[class_name]
|
|
397
348
|
|
|
398
|
-
if class_name == "
|
|
349
|
+
if class_name == "TokenizersBackend":
|
|
399
350
|
return TokenizersBackend
|
|
400
351
|
|
|
401
352
|
# V5: TOKENIZER_MAPPING_NAMES now maps to single strings, not tuples
|
|
@@ -404,7 +355,7 @@ def tokenizer_class_from_name(class_name: str) -> Union[type[Any], None]:
|
|
|
404
355
|
module_name = model_type_to_module_name(module_name)
|
|
405
356
|
if (
|
|
406
357
|
module_name in ["mistral", "mistral3", "mixtral", "ministral", "ministral3", "pixtral", "voxtral"]
|
|
407
|
-
and class_name == "
|
|
358
|
+
and class_name == "MistralCommonBackend"
|
|
408
359
|
):
|
|
409
360
|
module = importlib.import_module(".tokenization_mistral_common", "transformers")
|
|
410
361
|
else:
|
|
@@ -428,402 +379,6 @@ def tokenizer_class_from_name(class_name: str) -> Union[type[Any], None]:
|
|
|
428
379
|
return None
|
|
429
380
|
|
|
430
381
|
|
|
431
|
-
def _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs):
|
|
432
|
-
# Delegate to shared helper to avoid duplication
|
|
433
|
-
return find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
def _load_tokenizers_backend(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs):
|
|
437
|
-
"""
|
|
438
|
-
Load a tokenizer using only the tokenizers backend (no SentencePiece fallback).
|
|
439
|
-
|
|
440
|
-
This function attempts to load with the following priority:
|
|
441
|
-
1. If tokenizer.json exists, load directly
|
|
442
|
-
2. If any .model file (SPM) exists, try extracting vocab and merges
|
|
443
|
-
3. If vocab.json and merges.txt exist, load with those
|
|
444
|
-
4. If vocab.txt exists (WordPiece models), load with that
|
|
445
|
-
|
|
446
|
-
Args:
|
|
447
|
-
tokenizer_class: The tokenizer class to instantiate
|
|
448
|
-
pretrained_model_name_or_path: Path or model id
|
|
449
|
-
inputs: Additional positional arguments for tokenizer init
|
|
450
|
-
kwargs: Additional keyword arguments
|
|
451
|
-
|
|
452
|
-
Returns:
|
|
453
|
-
An instantiated tokenizer object
|
|
454
|
-
|
|
455
|
-
Raises:
|
|
456
|
-
ValueError: If tokenizer could not be loaded with tokenizers backend
|
|
457
|
-
"""
|
|
458
|
-
files_loaded = []
|
|
459
|
-
|
|
460
|
-
# Try tokenizer.json first
|
|
461
|
-
try:
|
|
462
|
-
tokenizer_json_exists = has_file(
|
|
463
|
-
pretrained_model_name_or_path,
|
|
464
|
-
"tokenizer.json",
|
|
465
|
-
revision=kwargs.get("revision"),
|
|
466
|
-
token=kwargs.get("token"),
|
|
467
|
-
cache_dir=kwargs.get("cache_dir"),
|
|
468
|
-
local_files_only=kwargs.get("local_files_only", False),
|
|
469
|
-
)
|
|
470
|
-
except Exception:
|
|
471
|
-
tokenizer_json_exists = False
|
|
472
|
-
|
|
473
|
-
if tokenizer_json_exists:
|
|
474
|
-
files_loaded.append("tokenizer.json")
|
|
475
|
-
kwargs["backend"] = "tokenizers"
|
|
476
|
-
kwargs["files_loaded"] = files_loaded
|
|
477
|
-
# Some old models have uploaded a tokenizer.json but haven't updated tokenizer_config.json to point to the correct tokenizer class
|
|
478
|
-
tokenizer_class = (
|
|
479
|
-
TokenizersBackend
|
|
480
|
-
if tokenizer_class.__name__ in ("PythonBackend", "PreTrainedTokenizer")
|
|
481
|
-
else tokenizer_class
|
|
482
|
-
)
|
|
483
|
-
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
484
|
-
|
|
485
|
-
# Try tekken.json (Mistral format)
|
|
486
|
-
try:
|
|
487
|
-
if has_file(
|
|
488
|
-
pretrained_model_name_or_path,
|
|
489
|
-
"tekken.json",
|
|
490
|
-
revision=kwargs.get("revision"),
|
|
491
|
-
token=kwargs.get("token"),
|
|
492
|
-
cache_dir=kwargs.get("cache_dir"),
|
|
493
|
-
local_files_only=kwargs.get("local_files_only", False),
|
|
494
|
-
):
|
|
495
|
-
from ...integrations.mistral import convert_tekken_tokenizer
|
|
496
|
-
|
|
497
|
-
tekken_file = cached_file(
|
|
498
|
-
pretrained_model_name_or_path,
|
|
499
|
-
"tekken.json",
|
|
500
|
-
**{
|
|
501
|
-
k: v
|
|
502
|
-
for k, v in kwargs.items()
|
|
503
|
-
if k
|
|
504
|
-
in ["cache_dir", "force_download", "proxies", "token", "revision", "local_files_only", "subfolder"]
|
|
505
|
-
},
|
|
506
|
-
)
|
|
507
|
-
if tekken_file is not None:
|
|
508
|
-
files_loaded.append("tekken.json")
|
|
509
|
-
kwargs["backend"] = "tokenizers"
|
|
510
|
-
kwargs["files_loaded"] = files_loaded
|
|
511
|
-
return convert_tekken_tokenizer(tekken_file)
|
|
512
|
-
except (ImportError, Exception):
|
|
513
|
-
pass
|
|
514
|
-
|
|
515
|
-
# Try extracting from SentencePiece model
|
|
516
|
-
spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
|
|
517
|
-
if spm_file is not None:
|
|
518
|
-
try:
|
|
519
|
-
resolved_spm = cached_file(
|
|
520
|
-
pretrained_model_name_or_path,
|
|
521
|
-
spm_file,
|
|
522
|
-
cache_dir=kwargs.get("cache_dir"),
|
|
523
|
-
force_download=kwargs.get("force_download", False),
|
|
524
|
-
proxies=kwargs.get("proxies"),
|
|
525
|
-
token=kwargs.get("token"),
|
|
526
|
-
revision=kwargs.get("revision"),
|
|
527
|
-
local_files_only=kwargs.get("local_files_only", False),
|
|
528
|
-
subfolder=kwargs.get("subfolder", ""),
|
|
529
|
-
)
|
|
530
|
-
except Exception:
|
|
531
|
-
resolved_spm = None
|
|
532
|
-
|
|
533
|
-
if resolved_spm is not None:
|
|
534
|
-
try:
|
|
535
|
-
from ...tokenization_utils_sentencepiece import SentencePieceExtractor
|
|
536
|
-
|
|
537
|
-
fast_sig = inspect.signature(getattr(tokenizer_class, "__init__", tokenizer_class))
|
|
538
|
-
if "vocab" in fast_sig.parameters:
|
|
539
|
-
try:
|
|
540
|
-
vocab_ids, vocab_scores, merges = SentencePieceExtractor(resolved_spm).extract()
|
|
541
|
-
files_loaded.append(spm_file)
|
|
542
|
-
kwargs["backend"] = "tokenizers"
|
|
543
|
-
kwargs["files_loaded"] = files_loaded
|
|
544
|
-
# If tokenizer needs both vocab and merges (BPE models)
|
|
545
|
-
if "merges" in fast_sig.parameters:
|
|
546
|
-
return tokenizer_class.from_pretrained(
|
|
547
|
-
pretrained_model_name_or_path, *inputs, vocab=vocab_scores, merges=merges, **kwargs
|
|
548
|
-
)
|
|
549
|
-
# If tokenizer only needs vocab (Unigram models like NLLB, SeamlessM4T)
|
|
550
|
-
else:
|
|
551
|
-
return tokenizer_class.from_pretrained(
|
|
552
|
-
pretrained_model_name_or_path, *inputs, vocab=vocab_scores, **kwargs
|
|
553
|
-
)
|
|
554
|
-
except Exception:
|
|
555
|
-
pass
|
|
556
|
-
except ImportError as e:
|
|
557
|
-
if "sentencepiece" in str(e).lower() or "SentencePiece" in str(e):
|
|
558
|
-
raise ImportError(
|
|
559
|
-
f"This checkpoint only contains a SentencePiece model file ({spm_file}), but the `sentencepiece` library is not installed. "
|
|
560
|
-
f"Please install sentencepiece to load this tokenizer: `pip install sentencepiece`"
|
|
561
|
-
) from e
|
|
562
|
-
raise
|
|
563
|
-
except Exception:
|
|
564
|
-
pass
|
|
565
|
-
|
|
566
|
-
vocab, merges, loaded = load_vocab_and_merges(pretrained_model_name_or_path, **kwargs)
|
|
567
|
-
if vocab is not None:
|
|
568
|
-
files_loaded.extend(loaded)
|
|
569
|
-
if issubclass(tokenizer_class, PreTrainedTokenizer):
|
|
570
|
-
kwargs["backend"] = "python"
|
|
571
|
-
else:
|
|
572
|
-
kwargs["backend"] = "tokenizers"
|
|
573
|
-
kwargs["files_loaded"] = files_loaded
|
|
574
|
-
if merges is not None:
|
|
575
|
-
return tokenizer_class.from_pretrained(
|
|
576
|
-
pretrained_model_name_or_path, *inputs, vocab=vocab, merges=merges, **kwargs
|
|
577
|
-
)
|
|
578
|
-
else:
|
|
579
|
-
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, vocab=vocab, **kwargs)
|
|
580
|
-
|
|
581
|
-
# Try vocab.txt (WordPiece models like SplinterTokenizer)
|
|
582
|
-
try:
|
|
583
|
-
resolved_vocab_txt = cached_file(
|
|
584
|
-
pretrained_model_name_or_path,
|
|
585
|
-
"vocab.txt",
|
|
586
|
-
cache_dir=kwargs.get("cache_dir"),
|
|
587
|
-
force_download=kwargs.get("force_download", False),
|
|
588
|
-
proxies=kwargs.get("proxies"),
|
|
589
|
-
token=kwargs.get("token"),
|
|
590
|
-
revision=kwargs.get("revision"),
|
|
591
|
-
local_files_only=kwargs.get("local_files_only", False),
|
|
592
|
-
subfolder=kwargs.get("subfolder", ""),
|
|
593
|
-
)
|
|
594
|
-
except Exception:
|
|
595
|
-
resolved_vocab_txt = None
|
|
596
|
-
|
|
597
|
-
if resolved_vocab_txt is not None:
|
|
598
|
-
try:
|
|
599
|
-
fast_sig = inspect.signature(getattr(tokenizer_class, "__init__", tokenizer_class))
|
|
600
|
-
if "vocab" in fast_sig.parameters:
|
|
601
|
-
# Load vocab.txt: each line is a token, line number is the ID
|
|
602
|
-
vocab = OrderedDict()
|
|
603
|
-
with open(resolved_vocab_txt, "r", encoding="utf-8") as reader:
|
|
604
|
-
tokens = reader.readlines()
|
|
605
|
-
for index, token in enumerate(tokens):
|
|
606
|
-
token = token.rstrip("\n")
|
|
607
|
-
vocab[token] = index
|
|
608
|
-
files_loaded.append("vocab.txt")
|
|
609
|
-
kwargs["backend"] = "tokenizers"
|
|
610
|
-
kwargs["files_loaded"] = files_loaded
|
|
611
|
-
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, vocab=vocab, **kwargs)
|
|
612
|
-
except Exception:
|
|
613
|
-
pass
|
|
614
|
-
|
|
615
|
-
# If all methods failed, raise an error
|
|
616
|
-
raise ValueError(
|
|
617
|
-
f"Could not load tokenizer from {pretrained_model_name_or_path} using tokenizers backend. "
|
|
618
|
-
"No tokenizer.json, tekken.json, vocab.json+merges.txt, vocab.txt, or compatible SentencePiece model found."
|
|
619
|
-
)
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
def _try_load_tokenizer_with_fallbacks(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs):
|
|
623
|
-
"""
|
|
624
|
-
Try to load a tokenizer with backend selection.
|
|
625
|
-
|
|
626
|
-
This function routes to the appropriate backend based on the 'backend' parameter:
|
|
627
|
-
- "tokenizers" (default): Uses HuggingFace tokenizers library backend
|
|
628
|
-
- "sentencepiece": Uses SentencePiece backend
|
|
629
|
-
|
|
630
|
-
For the tokenizers backend, attempts to load with the following priority:
|
|
631
|
-
1. If tokenizer.json exists, load directly
|
|
632
|
-
2. If any .model file (SPM) exists, try extracting vocab and merges
|
|
633
|
-
3. If vocab.json and merges.txt exist, load with those
|
|
634
|
-
4. Fallback to SentencePieceBackend if available
|
|
635
|
-
|
|
636
|
-
Args:
|
|
637
|
-
tokenizer_class: The tokenizer class to instantiate (can be None)
|
|
638
|
-
pretrained_model_name_or_path: Path or model id
|
|
639
|
-
inputs: Additional positional arguments for tokenizer init
|
|
640
|
-
kwargs: Additional keyword arguments (may include 'backend' parameter, defaults to "tokenizers")
|
|
641
|
-
|
|
642
|
-
Returns:
|
|
643
|
-
An instantiated tokenizer object
|
|
644
|
-
|
|
645
|
-
Raises:
|
|
646
|
-
ValueError: If no tokenizer could be loaded
|
|
647
|
-
"""
|
|
648
|
-
# Extract the backend parameter - default to "tokenizers" to prioritize tokenizers backend
|
|
649
|
-
backend = kwargs.pop("backend", "tokenizers")
|
|
650
|
-
|
|
651
|
-
# Validate backend parameter
|
|
652
|
-
if backend not in ["sentencepiece", "tokenizers"]:
|
|
653
|
-
logger.warning(
|
|
654
|
-
f"Invalid backend '{backend}' specified. Valid options are 'tokenizers' or 'sentencepiece'. "
|
|
655
|
-
"Defaulting to 'tokenizers' backend."
|
|
656
|
-
)
|
|
657
|
-
backend = "tokenizers"
|
|
658
|
-
|
|
659
|
-
# Route to SentencePiece backend if requested
|
|
660
|
-
if backend == "sentencepiece":
|
|
661
|
-
if SentencePieceBackend is None:
|
|
662
|
-
raise ValueError(
|
|
663
|
-
"SentencePiece backend was requested but sentencepiece is not installed. "
|
|
664
|
-
"Please install it with: pip install sentencepiece"
|
|
665
|
-
)
|
|
666
|
-
logger.info("Loading tokenizer with SentencePiece backend")
|
|
667
|
-
# Track files loaded for SentencePiece backend
|
|
668
|
-
spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
|
|
669
|
-
files_loaded = [spm_file] if spm_file else []
|
|
670
|
-
kwargs["backend"] = "sentencepiece"
|
|
671
|
-
kwargs["files_loaded"] = files_loaded
|
|
672
|
-
# Resolve the SPM file path and pass it as vocab_file
|
|
673
|
-
if spm_file is not None:
|
|
674
|
-
resolved_vocab_file = cached_file(
|
|
675
|
-
pretrained_model_name_or_path,
|
|
676
|
-
spm_file,
|
|
677
|
-
cache_dir=kwargs.get("cache_dir"),
|
|
678
|
-
force_download=kwargs.get("force_download", False),
|
|
679
|
-
proxies=kwargs.get("proxies"),
|
|
680
|
-
token=kwargs.get("token"),
|
|
681
|
-
revision=kwargs.get("revision"),
|
|
682
|
-
local_files_only=kwargs.get("local_files_only", False),
|
|
683
|
-
subfolder=kwargs.get("subfolder", ""),
|
|
684
|
-
)
|
|
685
|
-
kwargs["vocab_file"] = resolved_vocab_file
|
|
686
|
-
if isinstance(tokenizer_class, type) and issubclass(tokenizer_class, SentencePieceBackend):
|
|
687
|
-
logger.info("Loading tokenizer with SentencePiece backend using tokenizer class")
|
|
688
|
-
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
689
|
-
return SentencePieceBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
690
|
-
|
|
691
|
-
# Route to tokenizers backend (default)
|
|
692
|
-
if backend == "tokenizers":
|
|
693
|
-
if tokenizer_class is not None:
|
|
694
|
-
# Check if tokenizer_class inherits from PreTrainedTokenizer (but not from TokenizersBackend/SentencePieceBackend)
|
|
695
|
-
# These are edge cases with custom logic (e.g., BioGptTokenizer with Moses tokenization)
|
|
696
|
-
from ...tokenization_python import PreTrainedTokenizer
|
|
697
|
-
|
|
698
|
-
# Build list of backend classes to check against
|
|
699
|
-
backend_classes = [TokenizersBackend] if TokenizersBackend else []
|
|
700
|
-
if SentencePieceBackend:
|
|
701
|
-
backend_classes.append(SentencePieceBackend)
|
|
702
|
-
|
|
703
|
-
# Check if it's a custom PreTrainedTokenizer (not a backend class)
|
|
704
|
-
is_custom_pre_trained = (
|
|
705
|
-
isinstance(tokenizer_class, type)
|
|
706
|
-
and issubclass(tokenizer_class, PreTrainedTokenizer)
|
|
707
|
-
and not any(issubclass(tokenizer_class, bc) for bc in backend_classes)
|
|
708
|
-
and tokenizer_class.__name__ not in ("PythonBackend", "PreTrainedTokenizer")
|
|
709
|
-
)
|
|
710
|
-
|
|
711
|
-
# Check if it's a completely custom tokenizer (not PreTrainedTokenizer, not backend class)
|
|
712
|
-
# e.g., MistralCommonBackend which has its own from_pretrained logic
|
|
713
|
-
inherits_from_backend = isinstance(tokenizer_class, type) and any(
|
|
714
|
-
bc and issubclass(tokenizer_class, bc) for bc in backend_classes
|
|
715
|
-
)
|
|
716
|
-
is_completely_custom = (
|
|
717
|
-
isinstance(tokenizer_class, type)
|
|
718
|
-
and not issubclass(tokenizer_class, PythonBackend)
|
|
719
|
-
and not inherits_from_backend
|
|
720
|
-
)
|
|
721
|
-
|
|
722
|
-
if is_custom_pre_trained:
|
|
723
|
-
logger.info("Loading tokenizer with custom PreTrainedTokenizer backend (edge case)")
|
|
724
|
-
# Track the backend type for custom tokenizers
|
|
725
|
-
kwargs["backend"] = "custom"
|
|
726
|
-
kwargs["files_loaded"] = [] # Custom tokenizers may load various files
|
|
727
|
-
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
728
|
-
|
|
729
|
-
if is_completely_custom:
|
|
730
|
-
# For completely custom tokenizers (like MistralCommonBackend), try calling from_pretrained directly
|
|
731
|
-
logger.info("Loading tokenizer with custom tokenizer class (non-PreTrainedTokenizer)")
|
|
732
|
-
# Filter out AutoTokenizer-specific kwargs that custom tokenizers don't accept
|
|
733
|
-
custom_kwargs = {k: v for k, v in kwargs.items() if k not in ["backend", "files_loaded"]}
|
|
734
|
-
custom_kwargs["_from_auto"] = True # Signal that this is called from AutoTokenizer
|
|
735
|
-
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **custom_kwargs)
|
|
736
|
-
|
|
737
|
-
if TokenizersBackend is None:
|
|
738
|
-
raise ValueError(
|
|
739
|
-
"Tokenizers backend is the default but tokenizers library is not installed. "
|
|
740
|
-
"Please install it with: pip install tokenizers"
|
|
741
|
-
)
|
|
742
|
-
logger.info("Loading tokenizer with tokenizers backend")
|
|
743
|
-
try:
|
|
744
|
-
return _load_tokenizers_backend(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs)
|
|
745
|
-
except ValueError as e:
|
|
746
|
-
# If tokenizers backend fails, try falling back to SentencePiece backend if available
|
|
747
|
-
spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
|
|
748
|
-
if spm_file is not None and SentencePieceBackend is not None:
|
|
749
|
-
logger.info(
|
|
750
|
-
f"Tokenizers backend failed: {e}. "
|
|
751
|
-
f"Falling back to SentencePieceBackend since {spm_file} file was found."
|
|
752
|
-
)
|
|
753
|
-
files_loaded = [spm_file]
|
|
754
|
-
kwargs["backend"] = "sentencepiece"
|
|
755
|
-
kwargs["files_loaded"] = files_loaded
|
|
756
|
-
# Resolve the SPM file path and pass it as vocab_file
|
|
757
|
-
resolved_vocab_file = cached_file(
|
|
758
|
-
pretrained_model_name_or_path,
|
|
759
|
-
spm_file,
|
|
760
|
-
cache_dir=kwargs.get("cache_dir"),
|
|
761
|
-
force_download=kwargs.get("force_download", False),
|
|
762
|
-
proxies=kwargs.get("proxies"),
|
|
763
|
-
token=kwargs.get("token"),
|
|
764
|
-
revision=kwargs.get("revision"),
|
|
765
|
-
local_files_only=kwargs.get("local_files_only", False),
|
|
766
|
-
subfolder=kwargs.get("subfolder", ""),
|
|
767
|
-
)
|
|
768
|
-
kwargs["vocab_file"] = resolved_vocab_file
|
|
769
|
-
if tokenizer_class is not None and issubclass(tokenizer_class, SentencePieceBackend):
|
|
770
|
-
logger.info(
|
|
771
|
-
"Falling back to SentencePiece backend using tokenizer class that inherits from SentencePieceBackend."
|
|
772
|
-
)
|
|
773
|
-
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
774
|
-
return SentencePieceBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
775
|
-
# If no fallback available, try calling tokenizer class directly as last resort
|
|
776
|
-
if hasattr(tokenizer_class, "from_pretrained"):
|
|
777
|
-
logger.info(
|
|
778
|
-
f"Tokenizers backend failed: {e}. Trying to load tokenizer directly from tokenizer class."
|
|
779
|
-
)
|
|
780
|
-
# Filter out AutoTokenizer-specific kwargs that custom tokenizers don't accept
|
|
781
|
-
custom_kwargs = {k: v for k, v in kwargs.items() if k not in ["backend", "files_loaded"]}
|
|
782
|
-
custom_kwargs["_from_auto"] = True # Signal that this is called from AutoTokenizer
|
|
783
|
-
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **custom_kwargs)
|
|
784
|
-
# Re-raise if no fallback options available
|
|
785
|
-
raise
|
|
786
|
-
|
|
787
|
-
# If no tokenizer class but tokenizers backend requested, fall back to SentencePiece if available
|
|
788
|
-
spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
|
|
789
|
-
if spm_file is not None and SentencePieceBackend is not None:
|
|
790
|
-
logger.info(
|
|
791
|
-
f"Tokenizers backend was requested but no tokenizer class found. "
|
|
792
|
-
f"Falling back to SentencePieceBackend since {spm_file} file was found."
|
|
793
|
-
)
|
|
794
|
-
files_loaded = [spm_file]
|
|
795
|
-
kwargs["backend"] = "sentencepiece"
|
|
796
|
-
kwargs["files_loaded"] = files_loaded
|
|
797
|
-
# Resolve the SPM file path and pass it as vocab_file
|
|
798
|
-
resolved_vocab_file = cached_file(
|
|
799
|
-
pretrained_model_name_or_path,
|
|
800
|
-
spm_file,
|
|
801
|
-
cache_dir=kwargs.get("cache_dir"),
|
|
802
|
-
force_download=kwargs.get("force_download", False),
|
|
803
|
-
proxies=kwargs.get("proxies"),
|
|
804
|
-
token=kwargs.get("token"),
|
|
805
|
-
revision=kwargs.get("revision"),
|
|
806
|
-
local_files_only=kwargs.get("local_files_only", False),
|
|
807
|
-
subfolder=kwargs.get("subfolder", ""),
|
|
808
|
-
)
|
|
809
|
-
kwargs["vocab_file"] = resolved_vocab_file
|
|
810
|
-
if (
|
|
811
|
-
tokenizer_class is not None
|
|
812
|
-
and SentencePieceBackend is not None
|
|
813
|
-
and issubclass(tokenizer_class, SentencePieceBackend)
|
|
814
|
-
):
|
|
815
|
-
logger.info(
|
|
816
|
-
"Falling back to SentencePiece backend using tokenizer class that inherits from SentencePieceBackend."
|
|
817
|
-
)
|
|
818
|
-
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
819
|
-
return SentencePieceBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
820
|
-
|
|
821
|
-
raise ValueError(
|
|
822
|
-
f"Could not load tokenizer from {pretrained_model_name_or_path}. "
|
|
823
|
-
"No tokenizer class could be determined and no SentencePiece model found."
|
|
824
|
-
)
|
|
825
|
-
|
|
826
|
-
|
|
827
382
|
def get_tokenizer_config(
|
|
828
383
|
pretrained_model_name_or_path: Union[str, os.PathLike[str]],
|
|
829
384
|
cache_dir: Optional[Union[str, os.PathLike[str]]] = None,
|
|
@@ -1054,11 +609,43 @@ class AutoTokenizer:
|
|
|
1054
609
|
|
|
1055
610
|
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
1056
611
|
|
|
612
|
+
if gguf_file:
|
|
613
|
+
gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **kwargs)
|
|
614
|
+
config_dict = load_gguf_checkpoint(gguf_path, return_tensors=False)["config"]
|
|
615
|
+
config = AutoConfig.for_model(**config_dict)
|
|
616
|
+
elif config is None:
|
|
617
|
+
try:
|
|
618
|
+
config = AutoConfig.from_pretrained(
|
|
619
|
+
pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
|
|
620
|
+
)
|
|
621
|
+
except Exception:
|
|
622
|
+
config = PreTrainedConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
|
623
|
+
|
|
624
|
+
config_model_type = config.model_type
|
|
625
|
+
|
|
1057
626
|
# Next, let's try to use the tokenizer_config file to get the tokenizer class.
|
|
1058
627
|
tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
|
|
628
|
+
tokenizer_config_class = tokenizer_config.get("tokenizer_class", None)
|
|
629
|
+
# if there is a config, we can check that the tokenizer class != than model class and can thus assume we need to use `TokenizersBackend`
|
|
630
|
+
if (
|
|
631
|
+
tokenizer_config_class is not None
|
|
632
|
+
and config_model_type is not None
|
|
633
|
+
and config_model_type != ""
|
|
634
|
+
and TOKENIZER_MAPPING_NAMES.get(config_model_type, "").replace("Fast", "")
|
|
635
|
+
!= tokenizer_config_class.replace("Fast", "")
|
|
636
|
+
):
|
|
637
|
+
# new model, but we ignore it unless the model type is the same
|
|
638
|
+
try:
|
|
639
|
+
return TokenizersBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
640
|
+
except Exception:
|
|
641
|
+
return tokenizer_class_from_name(tokenizer_config_class).from_pretrained(
|
|
642
|
+
pretrained_model_name_or_path, *inputs, **kwargs
|
|
643
|
+
)
|
|
644
|
+
|
|
1059
645
|
if "_commit_hash" in tokenizer_config:
|
|
1060
646
|
kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
|
|
1061
|
-
|
|
647
|
+
|
|
648
|
+
# Check for auto_map early to handle dynamic tokenizers properly
|
|
1062
649
|
tokenizer_auto_map = None
|
|
1063
650
|
if "auto_map" in tokenizer_config:
|
|
1064
651
|
if isinstance(tokenizer_config["auto_map"], (tuple, list)):
|
|
@@ -1067,34 +654,15 @@ class AutoTokenizer:
|
|
|
1067
654
|
else:
|
|
1068
655
|
tokenizer_auto_map = tokenizer_config["auto_map"].get("AutoTokenizer", None)
|
|
1069
656
|
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
if not isinstance(config, PreTrainedConfig):
|
|
1073
|
-
if gguf_file:
|
|
1074
|
-
gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **kwargs)
|
|
1075
|
-
config_dict = load_gguf_checkpoint(gguf_path, return_tensors=False)["config"]
|
|
1076
|
-
config = AutoConfig.for_model(**config_dict)
|
|
1077
|
-
else:
|
|
1078
|
-
config = AutoConfig.from_pretrained(
|
|
1079
|
-
pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
|
|
1080
|
-
)
|
|
1081
|
-
config_tokenizer_class = config.tokenizer_class
|
|
1082
|
-
if hasattr(config, "auto_map") and "AutoTokenizer" in config.auto_map:
|
|
1083
|
-
tokenizer_auto_map = config.auto_map["AutoTokenizer"]
|
|
1084
|
-
|
|
1085
|
-
if (
|
|
1086
|
-
config_tokenizer_class is not None
|
|
1087
|
-
and config_tokenizer_class != "PreTrainedTokenizerFast"
|
|
1088
|
-
and "Fast" in config_tokenizer_class
|
|
1089
|
-
):
|
|
1090
|
-
config_tokenizer_class = config_tokenizer_class[:-4]
|
|
657
|
+
if tokenizer_config_class:
|
|
658
|
+
tokenizer_config_class = tokenizer_config_class.replace("Fast", "")
|
|
1091
659
|
|
|
1092
660
|
has_remote_code = tokenizer_auto_map is not None
|
|
1093
661
|
has_local_code = type(config) in TOKENIZER_MAPPING or (
|
|
1094
|
-
|
|
662
|
+
tokenizer_config_class is not None
|
|
1095
663
|
and (
|
|
1096
|
-
tokenizer_class_from_name(
|
|
1097
|
-
or tokenizer_class_from_name(
|
|
664
|
+
tokenizer_class_from_name(tokenizer_config_class) is not None
|
|
665
|
+
or tokenizer_class_from_name(tokenizer_config_class + "Fast") is not None
|
|
1098
666
|
)
|
|
1099
667
|
)
|
|
1100
668
|
if has_remote_code:
|
|
@@ -1118,17 +686,24 @@ class AutoTokenizer:
|
|
|
1118
686
|
return tokenizer_class.from_pretrained(
|
|
1119
687
|
pretrained_model_name_or_path, *inputs, trust_remote_code=trust_remote_code, **kwargs
|
|
1120
688
|
)
|
|
1121
|
-
elif
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
689
|
+
elif tokenizer_config_class is not None:
|
|
690
|
+
tokenizer_class_candidate = tokenizer_config_class
|
|
691
|
+
tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
|
|
692
|
+
if tokenizer_class is None and not tokenizer_class_candidate.endswith("Fast"):
|
|
693
|
+
tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate + "Fast")
|
|
694
|
+
if tokenizer_class is not None and tokenizer_class.__name__ == "PythonBackend":
|
|
695
|
+
tokenizer_class = TokenizersBackend
|
|
696
|
+
# Fallback to TokenizersBackend if the class wasn't found
|
|
697
|
+
if tokenizer_class is None:
|
|
698
|
+
tokenizer_class = TokenizersBackend
|
|
1130
699
|
|
|
1131
|
-
return
|
|
700
|
+
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
701
|
+
elif getattr(config, "tokenizer_class"):
|
|
702
|
+
_class = config.tokenizer_class
|
|
703
|
+
if "PreTrainedTokenizerFast" not in _class:
|
|
704
|
+
_class = _class.replace("Fast", "")
|
|
705
|
+
tokenizer_class = tokenizer_class_from_name(_class)
|
|
706
|
+
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
1132
707
|
|
|
1133
708
|
# Otherwise we have to be creative.
|
|
1134
709
|
# if model is an encoder decoder, the encoder tokenizer class is used by default
|
|
@@ -1142,19 +717,25 @@ class AutoTokenizer:
|
|
|
1142
717
|
)
|
|
1143
718
|
config = config.encoder
|
|
1144
719
|
|
|
1145
|
-
model_type = config_class_to_model_type(type(config).__name__)
|
|
720
|
+
model_type = config_class_to_model_type(type(config).__name__) or config.get("model_type", None)
|
|
1146
721
|
if model_type is not None:
|
|
1147
|
-
tokenizer_class = TOKENIZER_MAPPING
|
|
1148
|
-
|
|
722
|
+
tokenizer_class = TOKENIZER_MAPPING.get(type(config), TokenizersBackend)
|
|
1149
723
|
if tokenizer_class is not None:
|
|
1150
|
-
return
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
724
|
+
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
725
|
+
|
|
726
|
+
# Fallback: try tokenizer_class from tokenizer_config.json
|
|
727
|
+
tokenizer_config_class = tokenizer_config.get("tokenizer_class", None)
|
|
728
|
+
if tokenizer_config_class is not None:
|
|
729
|
+
if tokenizer_config_class != "TokenizersBackend" and "Fast" in tokenizer_config_class:
|
|
730
|
+
tokenizer_config_class = tokenizer_config_class[:-4]
|
|
731
|
+
tokenizer_class = tokenizer_class_from_name(tokenizer_config_class)
|
|
732
|
+
if tokenizer_class is None and not tokenizer_config_class.endswith("Fast"):
|
|
733
|
+
tokenizer_class = tokenizer_class_from_name(tokenizer_config_class + "Fast")
|
|
734
|
+
if tokenizer_class is not None and tokenizer_class.__name__ == "PythonBackend":
|
|
735
|
+
tokenizer_class = TokenizersBackend
|
|
736
|
+
if tokenizer_class is None:
|
|
737
|
+
tokenizer_class = TokenizersBackend
|
|
738
|
+
return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
|
|
1158
739
|
|
|
1159
740
|
raise ValueError(
|
|
1160
741
|
f"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\n"
|