transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +30 -3
- transformers/cli/serve.py +47 -17
- transformers/conversion_mapping.py +15 -2
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +196 -135
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +1 -2
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +1 -2
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/configuration_utils.py +3 -2
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/continuous_api.py +134 -79
- transformers/image_processing_base.py +1 -2
- transformers/integrations/__init__.py +4 -2
- transformers/integrations/accelerate.py +15 -3
- transformers/integrations/aqlm.py +38 -66
- transformers/integrations/awq.py +48 -514
- transformers/integrations/bitnet.py +45 -100
- transformers/integrations/bitsandbytes.py +79 -191
- transformers/integrations/deepspeed.py +1 -0
- transformers/integrations/eetq.py +84 -79
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +236 -193
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +40 -62
- transformers/integrations/hub_kernels.py +42 -3
- transformers/integrations/integration_utils.py +10 -0
- transformers/integrations/mxfp4.py +25 -65
- transformers/integrations/peft.py +7 -29
- transformers/integrations/quanto.py +73 -55
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +44 -90
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +42 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +8 -0
- transformers/modeling_rope_utils.py +30 -6
- transformers/modeling_utils.py +116 -112
- transformers/models/__init__.py +3 -0
- transformers/models/afmoe/modeling_afmoe.py +4 -4
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +2 -0
- transformers/models/altclip/modeling_altclip.py +4 -0
- transformers/models/apertus/modeling_apertus.py +4 -4
- transformers/models/arcee/modeling_arcee.py +4 -4
- transformers/models/aria/modeling_aria.py +4 -4
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/auto/configuration_auto.py +11 -0
- transformers/models/auto/feature_extraction_auto.py +2 -0
- transformers/models/auto/image_processing_auto.py +1 -0
- transformers/models/auto/modeling_auto.py +6 -0
- transformers/models/auto/processing_auto.py +18 -10
- transformers/models/auto/tokenization_auto.py +74 -472
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/bamba/modeling_bamba.py +4 -3
- transformers/models/bark/modeling_bark.py +2 -0
- transformers/models/bart/modeling_bart.py +7 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/big_bird/modeling_big_bird.py +6 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +11 -2
- transformers/models/bitnet/modeling_bitnet.py +4 -4
- transformers/models/blenderbot/modeling_blenderbot.py +5 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
- transformers/models/blip/modeling_blip_text.py +2 -0
- transformers/models/blip_2/modeling_blip_2.py +2 -1
- transformers/models/bloom/modeling_bloom.py +4 -0
- transformers/models/blt/modeling_blt.py +2 -2
- transformers/models/blt/modular_blt.py +2 -2
- transformers/models/bridgetower/modeling_bridgetower.py +5 -1
- transformers/models/bros/modeling_bros.py +4 -0
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +5 -0
- transformers/models/chameleon/modeling_chameleon.py +2 -1
- transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
- transformers/models/clap/modeling_clap.py +5 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +5 -0
- transformers/models/clvp/modeling_clvp.py +5 -0
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +4 -3
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +7 -6
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
- transformers/models/convbert/modeling_convbert.py +6 -0
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/modeling_csm.py +4 -3
- transformers/models/ctrl/modeling_ctrl.py +1 -0
- transformers/models/cvt/modeling_cvt.py +2 -0
- transformers/models/cwm/modeling_cwm.py +4 -4
- transformers/models/d_fine/modeling_d_fine.py +2 -0
- transformers/models/d_fine/modular_d_fine.py +1 -0
- transformers/models/dab_detr/modeling_dab_detr.py +4 -0
- transformers/models/dac/modeling_dac.py +2 -2
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/dbrx/modeling_dbrx.py +2 -2
- transformers/models/deberta/modeling_deberta.py +5 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
- transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
- transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
- transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/modeling_detr.py +5 -0
- transformers/models/dia/modeling_dia.py +4 -3
- transformers/models/dia/modular_dia.py +0 -1
- transformers/models/diffllama/modeling_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +2 -3
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +2 -0
- transformers/models/dots1/modeling_dots1.py +10 -7
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/edgetam/modeling_edgetam.py +1 -1
- transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
- transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
- transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
- transformers/models/efficientnet/modeling_efficientnet.py +2 -0
- transformers/models/emu3/modeling_emu3.py +4 -4
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +14 -2
- transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
- transformers/models/esm/modeling_esmfold.py +5 -4
- transformers/models/evolla/modeling_evolla.py +4 -4
- transformers/models/exaone4/modeling_exaone4.py +2 -2
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +6 -1
- transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
- transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
- transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
- transformers/models/flaubert/modeling_flaubert.py +7 -0
- transformers/models/flava/modeling_flava.py +6 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
- transformers/models/florence2/modeling_florence2.py +2 -1
- transformers/models/florence2/modular_florence2.py +2 -1
- transformers/models/fnet/modeling_fnet.py +7 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/processing_fuyu.py +3 -3
- transformers/models/gemma/modeling_gemma.py +4 -4
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +4 -4
- transformers/models/gemma2/modular_gemma2.py +2 -1
- transformers/models/gemma3/modeling_gemma3.py +14 -84
- transformers/models/gemma3/modular_gemma3.py +12 -81
- transformers/models/gemma3n/modeling_gemma3n.py +18 -209
- transformers/models/gemma3n/modular_gemma3n.py +17 -59
- transformers/models/git/modeling_git.py +2 -0
- transformers/models/glm/modeling_glm.py +4 -4
- transformers/models/glm4/modeling_glm4.py +4 -4
- transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/modeling_glm4v.py +3 -3
- transformers/models/glm4v/modular_glm4v.py +6 -4
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
- transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/gpt2/modeling_gpt2.py +5 -1
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
- transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
- transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
- transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
- transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
- transformers/models/gptj/modeling_gptj.py +3 -0
- transformers/models/granite/modeling_granite.py +4 -4
- transformers/models/granitemoe/modeling_granitemoe.py +4 -6
- transformers/models/granitemoe/modular_granitemoe.py +0 -2
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
- transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
- transformers/models/groupvit/modeling_groupvit.py +3 -0
- transformers/models/helium/modeling_helium.py +4 -3
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +3 -0
- transformers/models/hubert/modular_hubert.py +1 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
- transformers/models/ibert/modeling_ibert.py +6 -0
- transformers/models/idefics/modeling_idefics.py +5 -21
- transformers/models/imagegpt/modeling_imagegpt.py +2 -1
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/internvl/modeling_internvl.py +2 -4
- transformers/models/internvl/modular_internvl.py +2 -4
- transformers/models/jamba/modeling_jamba.py +2 -2
- transformers/models/janus/modeling_janus.py +1 -0
- transformers/models/janus/modular_janus.py +1 -0
- transformers/models/jetmoe/modeling_jetmoe.py +2 -2
- transformers/models/kosmos2/modeling_kosmos2.py +1 -0
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +244 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +729 -0
- transformers/models/lasr/modular_lasr.py +569 -0
- transformers/models/lasr/processing_lasr.py +96 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +5 -0
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +6 -0
- transformers/models/levit/modeling_levit.py +3 -0
- transformers/models/lfm2/modeling_lfm2.py +4 -5
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +4 -0
- transformers/models/llama/modeling_llama.py +4 -4
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/modeling_llama4.py +3 -2
- transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
- transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -0
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +4 -0
- transformers/models/mamba/modeling_mamba.py +14 -22
- transformers/models/marian/modeling_marian.py +5 -0
- transformers/models/markuplm/modeling_markuplm.py +4 -0
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/modeling_mask2former.py +2 -0
- transformers/models/maskformer/modeling_maskformer.py +2 -0
- transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
- transformers/models/mbart/modeling_mbart.py +7 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +3 -1
- transformers/models/minimax/modeling_minimax.py +4 -4
- transformers/models/ministral/modeling_ministral.py +4 -4
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +4 -3
- transformers/models/mistral/modeling_mistral.py +4 -3
- transformers/models/mixtral/modeling_mixtral.py +4 -4
- transformers/models/mllama/modeling_mllama.py +2 -2
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/modeling_mobilevit.py +3 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
- transformers/models/modernbert/modeling_modernbert.py +4 -1
- transformers/models/modernbert/modular_modernbert.py +2 -0
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
- transformers/models/moonshine/modeling_moonshine.py +4 -2
- transformers/models/moshi/modeling_moshi.py +5 -2
- transformers/models/mpnet/modeling_mpnet.py +5 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +6 -0
- transformers/models/mt5/modeling_mt5.py +7 -0
- transformers/models/musicgen/modeling_musicgen.py +2 -0
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
- transformers/models/mvp/modeling_mvp.py +7 -0
- transformers/models/nanochat/modeling_nanochat.py +4 -4
- transformers/models/nemotron/modeling_nemotron.py +4 -2
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nougat/tokenization_nougat.py +11 -59
- transformers/models/nystromformer/modeling_nystromformer.py +6 -0
- transformers/models/olmo/modeling_olmo.py +4 -4
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +4 -5
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +4 -4
- transformers/models/olmoe/modeling_olmoe.py +4 -4
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
- transformers/models/oneformer/modeling_oneformer.py +4 -1
- transformers/models/openai/modeling_openai.py +3 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/owlv2/modeling_owlv2.py +4 -0
- transformers/models/owlvit/modeling_owlvit.py +4 -0
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +9 -6
- transformers/models/parakeet/modular_parakeet.py +2 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
- transformers/models/patchtst/modeling_patchtst.py +20 -2
- transformers/models/pegasus/modeling_pegasus.py +5 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
- transformers/models/perceiver/modeling_perceiver.py +8 -0
- transformers/models/persimmon/modeling_persimmon.py +2 -1
- transformers/models/phi/modeling_phi.py +4 -5
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +2 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
- transformers/models/phimoe/modeling_phimoe.py +4 -4
- transformers/models/phimoe/modular_phimoe.py +2 -2
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pixtral/modeling_pixtral.py +2 -1
- transformers/models/plbart/modeling_plbart.py +6 -0
- transformers/models/plbart/modular_plbart.py +2 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/modeling_poolformer.py +2 -0
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +3 -0
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +4 -4
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
- transformers/models/qwen3/modeling_qwen3.py +4 -4
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
- transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
- transformers/models/rag/modeling_rag.py +1 -0
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
- transformers/models/reformer/modeling_reformer.py +4 -0
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +6 -1
- transformers/models/rembert/modeling_rembert.py +6 -0
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +11 -2
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/modeling_rt_detr.py +2 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
- transformers/models/rwkv/modeling_rwkv.py +1 -0
- transformers/models/sam2/modeling_sam2.py +2 -2
- transformers/models/sam2/modular_sam2.py +2 -2
- transformers/models/sam2_video/modeling_sam2_video.py +1 -0
- transformers/models/sam2_video/modular_sam2_video.py +1 -0
- transformers/models/sam3/modeling_sam3.py +77 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
- transformers/models/sam3_video/modeling_sam3_video.py +1 -0
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
- transformers/models/seed_oss/modeling_seed_oss.py +2 -2
- transformers/models/segformer/modeling_segformer.py +4 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/siglip2/modeling_siglip2.py +4 -0
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +4 -4
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
- transformers/models/speecht5/modeling_speecht5.py +13 -1
- transformers/models/splinter/modeling_splinter.py +3 -0
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +6 -0
- transformers/models/stablelm/modeling_stablelm.py +3 -1
- transformers/models/starcoder2/modeling_starcoder2.py +4 -3
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +2 -0
- transformers/models/swin/modeling_swin.py +4 -0
- transformers/models/swin2sr/modeling_swin2sr.py +2 -0
- transformers/models/swinv2/modeling_swinv2.py +4 -0
- transformers/models/t5/modeling_t5.py +7 -0
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +5 -5
- transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
- transformers/models/table_transformer/modeling_table_transformer.py +4 -0
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +2 -0
- transformers/models/timesfm/modular_timesfm.py +2 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
- transformers/models/trocr/modeling_trocr.py +2 -0
- transformers/models/tvp/modeling_tvp.py +2 -0
- transformers/models/udop/modeling_udop.py +4 -0
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/modeling_umt5.py +7 -0
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
- transformers/models/vilt/modeling_vilt.py +6 -0
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +6 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/modeling_vitmatte.py +1 -0
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/modeling_whisper.py +6 -0
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +3 -0
- transformers/models/xglm/modeling_xglm.py +1 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +5 -0
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/yoso/modeling_yoso.py +6 -0
- transformers/models/zamba/modeling_zamba.py +2 -0
- transformers/models/zamba2/modeling_zamba2.py +4 -2
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/modeling_zoedepth.py +1 -0
- transformers/pipelines/__init__.py +2 -3
- transformers/pipelines/base.py +1 -9
- transformers/pipelines/document_question_answering.py +3 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/processing_utils.py +23 -11
- transformers/quantizers/base.py +35 -110
- transformers/quantizers/quantizer_aqlm.py +1 -5
- transformers/quantizers/quantizer_auto_round.py +1 -2
- transformers/quantizers/quantizer_awq.py +17 -81
- transformers/quantizers/quantizer_bitnet.py +3 -8
- transformers/quantizers/quantizer_bnb_4bit.py +13 -110
- transformers/quantizers/quantizer_bnb_8bit.py +16 -92
- transformers/quantizers/quantizer_compressed_tensors.py +1 -5
- transformers/quantizers/quantizer_eetq.py +14 -62
- transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
- transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
- transformers/quantizers/quantizer_fp_quant.py +48 -78
- transformers/quantizers/quantizer_gptq.py +7 -24
- transformers/quantizers/quantizer_higgs.py +40 -54
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +13 -167
- transformers/quantizers/quantizer_quanto.py +20 -64
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +1 -4
- transformers/quantizers/quantizer_torchao.py +23 -202
- transformers/quantizers/quantizer_vptq.py +8 -22
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +297 -36
- transformers/tokenization_mistral_common.py +4 -0
- transformers/tokenization_utils_base.py +113 -222
- transformers/tokenization_utils_tokenizers.py +168 -107
- transformers/trainer.py +28 -31
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +66 -28
- transformers/utils/__init__.py +3 -4
- transformers/utils/auto_docstring.py +1 -0
- transformers/utils/generic.py +27 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +61 -16
- transformers/utils/kernel_config.py +4 -2
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +75 -242
- transformers/video_processing_utils.py +1 -2
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
transformers/__init__.py
CHANGED
|
@@ -18,8 +18,11 @@
|
|
|
18
18
|
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
|
19
19
|
# in the namespace without actually importing anything (and especially none of the backends).
|
|
20
20
|
|
|
21
|
-
__version__ = "5.0.
|
|
21
|
+
__version__ = "5.0.0rc1"
|
|
22
22
|
|
|
23
|
+
import importlib
|
|
24
|
+
import sys
|
|
25
|
+
import types
|
|
23
26
|
from pathlib import Path
|
|
24
27
|
from typing import TYPE_CHECKING
|
|
25
28
|
|
|
@@ -174,6 +177,8 @@ _import_structure = {
|
|
|
174
177
|
"quantizers": [],
|
|
175
178
|
"testing_utils": [],
|
|
176
179
|
"tokenization_python": ["PreTrainedTokenizer", "PythonBackend"],
|
|
180
|
+
"tokenization_utils": [],
|
|
181
|
+
"tokenization_utils_fast": [],
|
|
177
182
|
"tokenization_utils_sentencepiece": ["SentencePieceBackend"],
|
|
178
183
|
"tokenization_utils_base": [
|
|
179
184
|
"AddedToken",
|
|
@@ -383,6 +388,8 @@ else:
|
|
|
383
388
|
"BayesianDetectorConfig",
|
|
384
389
|
"BayesianDetectorModel",
|
|
385
390
|
"ClassifierFreeGuidanceLogitsProcessor",
|
|
391
|
+
"ContinuousBatchingManager",
|
|
392
|
+
"ContinuousMixin",
|
|
386
393
|
"EncoderNoRepeatNGramLogitsProcessor",
|
|
387
394
|
"EncoderRepetitionPenaltyLogitsProcessor",
|
|
388
395
|
"EosTokenCriteria",
|
|
@@ -536,6 +543,8 @@ if TYPE_CHECKING:
|
|
|
536
543
|
from .generation import BayesianDetectorModel as BayesianDetectorModel
|
|
537
544
|
from .generation import ClassifierFreeGuidanceLogitsProcessor as ClassifierFreeGuidanceLogitsProcessor
|
|
538
545
|
from .generation import CompileConfig as CompileConfig
|
|
546
|
+
from .generation import ContinuousBatchingManager as ContinuousBatchingManager
|
|
547
|
+
from .generation import ContinuousMixin as ContinuousMixin
|
|
539
548
|
from .generation import EncoderNoRepeatNGramLogitsProcessor as EncoderNoRepeatNGramLogitsProcessor
|
|
540
549
|
from .generation import EncoderRepetitionPenaltyLogitsProcessor as EncoderRepetitionPenaltyLogitsProcessor
|
|
541
550
|
from .generation import EosTokenCriteria as EosTokenCriteria
|
|
@@ -764,8 +773,6 @@ if TYPE_CHECKING:
|
|
|
764
773
|
from .utils.quantization_config import VptqConfig as VptqConfig
|
|
765
774
|
from .video_processing_utils import BaseVideoProcessor as BaseVideoProcessor
|
|
766
775
|
else:
|
|
767
|
-
import sys
|
|
768
|
-
|
|
769
776
|
_import_structure = {k: set(v) for k, v in _import_structure.items()}
|
|
770
777
|
|
|
771
778
|
import_structure = define_import_structure(Path(__file__).parent / "models", prefix="models")
|
|
@@ -779,6 +786,26 @@ else:
|
|
|
779
786
|
extra_objects={"__version__": __version__},
|
|
780
787
|
)
|
|
781
788
|
|
|
789
|
+
def _create_tokenization_alias(alias: str, target: str) -> None:
|
|
790
|
+
"""
|
|
791
|
+
Lazily redirect legacy tokenization module paths to their replacements without importing heavy deps.
|
|
792
|
+
"""
|
|
793
|
+
|
|
794
|
+
module = types.ModuleType(alias)
|
|
795
|
+
module.__doc__ = f"Alias module for backward compatibility with `{target}`."
|
|
796
|
+
|
|
797
|
+
def _get_target():
|
|
798
|
+
return importlib.import_module(target, __name__)
|
|
799
|
+
|
|
800
|
+
module.__getattr__ = lambda name: getattr(_get_target(), name)
|
|
801
|
+
module.__dir__ = lambda: dir(_get_target())
|
|
802
|
+
|
|
803
|
+
sys.modules[alias] = module
|
|
804
|
+
setattr(sys.modules[__name__], alias.rsplit(".", 1)[-1], module)
|
|
805
|
+
|
|
806
|
+
_create_tokenization_alias(f"{__name__}.tokenization_utils_fast", ".tokenization_utils_tokenizers")
|
|
807
|
+
_create_tokenization_alias(f"{__name__}.tokenization_utils", ".tokenization_utils_sentencepiece")
|
|
808
|
+
|
|
782
809
|
|
|
783
810
|
if not is_torch_available():
|
|
784
811
|
logger.warning_advice(
|
transformers/cli/serve.py
CHANGED
|
@@ -36,7 +36,7 @@ from tokenizers.decoders import DecodeStream
|
|
|
36
36
|
from tqdm import tqdm
|
|
37
37
|
|
|
38
38
|
import transformers
|
|
39
|
-
from transformers import BitsAndBytesConfig, GenerationConfig
|
|
39
|
+
from transformers import AutoTokenizer, BitsAndBytesConfig, GenerationConfig, PreTrainedTokenizerBase
|
|
40
40
|
from transformers.utils.import_utils import (
|
|
41
41
|
is_fastapi_available,
|
|
42
42
|
is_librosa_available,
|
|
@@ -823,9 +823,9 @@ class Serve:
|
|
|
823
823
|
self.running_continuous_batching_manager.start()
|
|
824
824
|
|
|
825
825
|
# TODO (Joao, Lysandre): this should also work with tool support
|
|
826
|
-
inputs = processor.apply_chat_template(
|
|
827
|
-
|
|
828
|
-
)["input_ids"][0]
|
|
826
|
+
inputs = processor.apply_chat_template(
|
|
827
|
+
req["messages"], return_tensors="pt", add_generation_prompt=True, return_dict=True
|
|
828
|
+
).to(model.device)["input_ids"][0]
|
|
829
829
|
|
|
830
830
|
def stream_chat_completion(request_id, decode_stream):
|
|
831
831
|
from ..generation.continuous_batching import RequestStatus
|
|
@@ -841,8 +841,13 @@ class Serve:
|
|
|
841
841
|
|
|
842
842
|
if result.status == RequestStatus.FINISHED:
|
|
843
843
|
generated_all_tokens = n_tokens_generated >= generation_config.max_new_tokens
|
|
844
|
-
|
|
845
|
-
|
|
844
|
+
|
|
845
|
+
# If the tokenizer has an eos_token, we can have a more robust check.
|
|
846
|
+
if hasattr(tokenizer, "eos_token"):
|
|
847
|
+
final_token_is_eos = result == tokenizer.eos_token
|
|
848
|
+
generated_all_tokens = generated_all_tokens and not final_token_is_eos
|
|
849
|
+
|
|
850
|
+
reason = "length" if generated_all_tokens else "stop"
|
|
846
851
|
|
|
847
852
|
yield self.build_chat_completion_chunk(
|
|
848
853
|
request_id,
|
|
@@ -921,7 +926,11 @@ class Serve:
|
|
|
921
926
|
return JSONResponse(json_chunk, media_type="application/json")
|
|
922
927
|
|
|
923
928
|
@staticmethod
|
|
924
|
-
def get_model_modality(model: "PreTrainedModel") -> Modality:
|
|
929
|
+
def get_model_modality(model: "PreTrainedModel", processor=None) -> Modality:
|
|
930
|
+
if processor is not None:
|
|
931
|
+
if isinstance(processor, PreTrainedTokenizerBase):
|
|
932
|
+
return Modality.LLM
|
|
933
|
+
|
|
925
934
|
from transformers.models.auto.modeling_auto import (
|
|
926
935
|
MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
|
|
927
936
|
MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES,
|
|
@@ -1011,7 +1020,7 @@ class Serve:
|
|
|
1011
1020
|
self.last_model = model_id_and_revision
|
|
1012
1021
|
model, processor = self.load_model_and_processor(model_id_and_revision)
|
|
1013
1022
|
|
|
1014
|
-
modality = self.get_model_modality(model)
|
|
1023
|
+
modality = self.get_model_modality(model, processor=processor)
|
|
1015
1024
|
processor_inputs = self.get_processor_inputs_from_inbound_messages(messages, modality)
|
|
1016
1025
|
|
|
1017
1026
|
# ====== TOOL PREPROCESSING LOGIC ======
|
|
@@ -1184,8 +1193,14 @@ class Serve:
|
|
|
1184
1193
|
)
|
|
1185
1194
|
|
|
1186
1195
|
generated_all_tokens = n_tokens_generated >= generation_config.max_new_tokens
|
|
1187
|
-
|
|
1188
|
-
|
|
1196
|
+
|
|
1197
|
+
# If the tokenizer has an eos_token, we can have a more robust check.
|
|
1198
|
+
if hasattr(streamer.tokenizer, "eos_token"):
|
|
1199
|
+
final_token_is_eos = result == streamer.tokenizer.eos_token
|
|
1200
|
+
generated_all_tokens = generated_all_tokens and not final_token_is_eos
|
|
1201
|
+
|
|
1202
|
+
reason = "length" if generated_all_tokens else "stop"
|
|
1203
|
+
|
|
1189
1204
|
yield self.build_chat_completion_chunk(_request_id, finish_reason=reason, model=model_id_and_revision)
|
|
1190
1205
|
|
|
1191
1206
|
thread.join()
|
|
@@ -1272,7 +1287,9 @@ class Serve:
|
|
|
1272
1287
|
else:
|
|
1273
1288
|
raise TypeError("inputs should be a list, dict, or str")
|
|
1274
1289
|
|
|
1275
|
-
inputs = processor.apply_chat_template(
|
|
1290
|
+
inputs = processor.apply_chat_template(
|
|
1291
|
+
inputs, add_generation_prompt=True, return_tensors="pt", return_dict=True
|
|
1292
|
+
)["input_ids"]
|
|
1276
1293
|
inputs = inputs.to(model.device)
|
|
1277
1294
|
request_id = req.get("previous_response_id", "req_0")
|
|
1278
1295
|
|
|
@@ -1576,7 +1593,9 @@ class Serve:
|
|
|
1576
1593
|
else:
|
|
1577
1594
|
raise ValueError("inputs should be a list, dict, or str")
|
|
1578
1595
|
|
|
1579
|
-
inputs = processor.apply_chat_template(
|
|
1596
|
+
inputs = processor.apply_chat_template(
|
|
1597
|
+
inputs, add_generation_prompt=True, return_tensors="pt", return_dict=True
|
|
1598
|
+
)["input_ids"]
|
|
1580
1599
|
inputs = inputs.to(model.device)
|
|
1581
1600
|
request_id = req.get("previous_response_id", "req_0")
|
|
1582
1601
|
|
|
@@ -1775,11 +1794,22 @@ class Serve:
|
|
|
1775
1794
|
else:
|
|
1776
1795
|
model_id, revision = model_id_and_revision, "main"
|
|
1777
1796
|
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1797
|
+
try:
|
|
1798
|
+
data_processor = AutoProcessor.from_pretrained(
|
|
1799
|
+
model_id,
|
|
1800
|
+
revision=revision,
|
|
1801
|
+
trust_remote_code=self.trust_remote_code,
|
|
1802
|
+
)
|
|
1803
|
+
except OSError:
|
|
1804
|
+
try:
|
|
1805
|
+
data_processor = AutoTokenizer.from_pretrained(
|
|
1806
|
+
model_id,
|
|
1807
|
+
revision=revision,
|
|
1808
|
+
trust_remote_code=self.trust_remote_code,
|
|
1809
|
+
)
|
|
1810
|
+
except OSError:
|
|
1811
|
+
raise OSError("Failed to load processor with `AutoProcessor` and `AutoTokenizer`.")
|
|
1812
|
+
|
|
1783
1813
|
dtype = self.dtype if self.dtype in ["auto", None] else getattr(torch, self.dtype)
|
|
1784
1814
|
quantization_config = self.get_quantization_config()
|
|
1785
1815
|
|
|
@@ -186,10 +186,22 @@ _checkpoint_conversion_mapping_cache = None
|
|
|
186
186
|
|
|
187
187
|
def get_checkpoint_conversion_mapping(model_type):
|
|
188
188
|
global _checkpoint_conversion_mapping_cache
|
|
189
|
-
_checkpoint_conversion_mapping_cache
|
|
189
|
+
if _checkpoint_conversion_mapping_cache is None:
|
|
190
|
+
_checkpoint_conversion_mapping_cache = _build_checkpoint_conversion_mapping()
|
|
190
191
|
return deepcopy(_checkpoint_conversion_mapping_cache.get(model_type))
|
|
191
192
|
|
|
192
193
|
|
|
194
|
+
def register_checkpoint_conversion_mapping(
|
|
195
|
+
model_type: str, mapping: list[WeightConverter | WeightRenaming], overwrite: bool = False
|
|
196
|
+
) -> None:
|
|
197
|
+
global _checkpoint_conversion_mapping_cache
|
|
198
|
+
if _checkpoint_conversion_mapping_cache is None:
|
|
199
|
+
_checkpoint_conversion_mapping_cache = _build_checkpoint_conversion_mapping()
|
|
200
|
+
if model_type in _checkpoint_conversion_mapping_cache and not overwrite:
|
|
201
|
+
raise ValueError(f"Model type {model_type} already exists in the checkpoint conversion mapping.")
|
|
202
|
+
_checkpoint_conversion_mapping_cache[model_type] = mapping
|
|
203
|
+
|
|
204
|
+
|
|
193
205
|
# DO NOT MODIFY, KEPT FOR BC ONLY
|
|
194
206
|
VLMS = [
|
|
195
207
|
"aria",
|
|
@@ -213,6 +225,7 @@ VLMS = [
|
|
|
213
225
|
"sam3",
|
|
214
226
|
"sam3_tracker",
|
|
215
227
|
"sam3_tracker_video",
|
|
228
|
+
"paddleocrvl",
|
|
216
229
|
]
|
|
217
230
|
|
|
218
231
|
|
|
@@ -228,7 +241,7 @@ def get_model_conversion_mapping(
|
|
|
228
241
|
"""
|
|
229
242
|
weight_conversions = []
|
|
230
243
|
|
|
231
|
-
# Load models with key mapping
|
|
244
|
+
# Load models with explicit, user-provided key mapping
|
|
232
245
|
if key_mapping is not None:
|
|
233
246
|
weight_conversions = [WeightRenaming(source_patterns=k, target_patterns=v) for k, v in key_mapping.items()]
|
|
234
247
|
elif any(
|
|
@@ -19,6 +19,7 @@ allow to make our dependency on SentencePiece optional.
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
import warnings
|
|
22
|
+
from collections.abc import Collection
|
|
22
23
|
from functools import lru_cache
|
|
23
24
|
from typing import Optional
|
|
24
25
|
|
|
@@ -33,6 +34,64 @@ from .utils.import_utils import PROTOBUF_IMPORT_ERROR
|
|
|
33
34
|
|
|
34
35
|
logger = logging.get_logger(__name__)
|
|
35
36
|
|
|
37
|
+
MBART_LANGUAGES = [
|
|
38
|
+
"ar_AR",
|
|
39
|
+
"cs_CZ",
|
|
40
|
+
"de_DE",
|
|
41
|
+
"en_XX",
|
|
42
|
+
"es_XX",
|
|
43
|
+
"et_EE",
|
|
44
|
+
"fi_FI",
|
|
45
|
+
"fr_XX",
|
|
46
|
+
"gu_IN",
|
|
47
|
+
"hi_IN",
|
|
48
|
+
"it_IT",
|
|
49
|
+
"ja_XX",
|
|
50
|
+
"kk_KZ",
|
|
51
|
+
"ko_KR",
|
|
52
|
+
"lt_LT",
|
|
53
|
+
"lv_LV",
|
|
54
|
+
"my_MM",
|
|
55
|
+
"ne_NP",
|
|
56
|
+
"nl_XX",
|
|
57
|
+
"ro_RO",
|
|
58
|
+
"ru_RU",
|
|
59
|
+
"si_LK",
|
|
60
|
+
"tr_TR",
|
|
61
|
+
"vi_VN",
|
|
62
|
+
"zh_CN",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
MBART50_LANGUAGES = MBART_LANGUAGES + [
|
|
66
|
+
"af_ZA",
|
|
67
|
+
"az_AZ",
|
|
68
|
+
"bn_IN",
|
|
69
|
+
"fa_IR",
|
|
70
|
+
"he_IL",
|
|
71
|
+
"hr_HR",
|
|
72
|
+
"id_ID",
|
|
73
|
+
"ka_GE",
|
|
74
|
+
"km_KH",
|
|
75
|
+
"mk_MK",
|
|
76
|
+
"ml_IN",
|
|
77
|
+
"mn_MN",
|
|
78
|
+
"mr_IN",
|
|
79
|
+
"pl_PL",
|
|
80
|
+
"ps_AF",
|
|
81
|
+
"pt_XX",
|
|
82
|
+
"sv_SE",
|
|
83
|
+
"sw_KE",
|
|
84
|
+
"ta_IN",
|
|
85
|
+
"te_IN",
|
|
86
|
+
"th_TH",
|
|
87
|
+
"tl_XX",
|
|
88
|
+
"uk_UA",
|
|
89
|
+
"ur_PK",
|
|
90
|
+
"xh_ZA",
|
|
91
|
+
"gl_ES",
|
|
92
|
+
"sl_SI",
|
|
93
|
+
]
|
|
94
|
+
|
|
36
95
|
|
|
37
96
|
def import_protobuf(error_message=""):
|
|
38
97
|
if is_sentencepiece_available():
|
|
@@ -61,15 +120,20 @@ def _get_prepend_scheme(add_prefix_space: bool, original_tokenizer) -> str:
|
|
|
61
120
|
return prepend_scheme
|
|
62
121
|
|
|
63
122
|
|
|
64
|
-
def generate_merges(vocab, vocab_scores):
|
|
123
|
+
def generate_merges(vocab, vocab_scores, skip_tokens: Optional[Collection[str]] = None):
|
|
124
|
+
skip_tokens = set(skip_tokens) if skip_tokens is not None else set()
|
|
65
125
|
reverse = vocab_scores is not None
|
|
66
126
|
vocab_scores = dict(vocab_scores) if reverse else vocab
|
|
67
127
|
|
|
68
128
|
merges = []
|
|
69
129
|
for merge, piece_score in vocab_scores.items():
|
|
130
|
+
if merge in skip_tokens:
|
|
131
|
+
continue
|
|
70
132
|
local = []
|
|
71
133
|
for index in range(1, len(merge)):
|
|
72
134
|
piece_l, piece_r = merge[:index], merge[index:]
|
|
135
|
+
if piece_l in skip_tokens or piece_r in skip_tokens:
|
|
136
|
+
continue
|
|
73
137
|
if piece_l in vocab and piece_r in vocab:
|
|
74
138
|
local.append((piece_l, piece_r, piece_score))
|
|
75
139
|
local = sorted(local, key=lambda x: (vocab[x[0]], vocab[x[1]]))
|
|
@@ -87,22 +151,49 @@ class SentencePieceExtractor:
|
|
|
87
151
|
|
|
88
152
|
def __init__(self, model: str):
|
|
89
153
|
requires_backends(self, "sentencepiece")
|
|
90
|
-
|
|
154
|
+
requires_backends(self, "protobuf")
|
|
155
|
+
|
|
156
|
+
# from .utils import sentencepiece_model_pb2 as model_pb2
|
|
157
|
+
model_pb2 = import_protobuf()
|
|
91
158
|
|
|
92
|
-
|
|
93
|
-
|
|
159
|
+
m = model_pb2.ModelProto()
|
|
160
|
+
with open(model, "rb") as f:
|
|
161
|
+
m.ParseFromString(f.read())
|
|
162
|
+
self.proto = m
|
|
94
163
|
|
|
95
|
-
def extract(self,
|
|
164
|
+
def extract(self, model_type, **kwargs) -> tuple[dict[str, int], list[tuple]]:
|
|
96
165
|
"""
|
|
97
166
|
By default will return vocab and merges with respect to their order, by sending `vocab_scores` we're going to
|
|
98
167
|
order the merges with respect to the piece scores instead.
|
|
99
168
|
"""
|
|
100
|
-
|
|
101
|
-
|
|
169
|
+
self.proto.trainer_spec.unk_id
|
|
170
|
+
if model_type is None:
|
|
171
|
+
from tokenizers.models import BPE, Unigram
|
|
102
172
|
|
|
103
|
-
|
|
173
|
+
model_type = Unigram if self.proto.trainer_spec.model_type == 2 else BPE
|
|
174
|
+
vocab = [(piece.piece, piece.score) for piece in self.proto.pieces]
|
|
104
175
|
|
|
105
|
-
|
|
176
|
+
if model_type.__name__ != "BPE":
|
|
177
|
+
kwargs["unk_id"] = self.proto.trainer_spec.unk_id
|
|
178
|
+
kwargs["vocab"] = vocab
|
|
179
|
+
else:
|
|
180
|
+
from .tokenization_utils_base import generate_merges
|
|
181
|
+
|
|
182
|
+
vocab = {word: i for i, (word, score) in enumerate(vocab)}
|
|
183
|
+
merges = generate_merges(vocab)
|
|
184
|
+
kwargs["vocab"] = vocab
|
|
185
|
+
kwargs["merges"] = merges
|
|
186
|
+
|
|
187
|
+
# control tokens are special
|
|
188
|
+
# user defined symbols are not
|
|
189
|
+
# both user and control tokens are AddedTokens
|
|
190
|
+
# Add user defined symbols (type == 4) from sentencepiece (https://github.com/google/sentencepiece/blob/6225e08edb2577757163b3f5dbba4c0b670ef445/src/sentencepiece_model.proto#L299C29-L299C33)
|
|
191
|
+
spm_added_tokens = [(id, p.piece, p.type == 3) for id, p in enumerate(self.proto.pieces) if p.type in [3, 4]]
|
|
192
|
+
kwargs["additional_special_tokens"] = [
|
|
193
|
+
AddedToken(token, normalized=False, special=special)
|
|
194
|
+
for id, token, special in sorted(spm_added_tokens, key=lambda x: x[0])
|
|
195
|
+
]
|
|
196
|
+
return kwargs
|
|
106
197
|
|
|
107
198
|
|
|
108
199
|
class GemmaSentencePieceExtractor(SentencePieceExtractor):
|
|
@@ -549,6 +640,16 @@ class SpmConverter(Converter):
|
|
|
549
640
|
SpmExtractor = SentencePieceExtractor
|
|
550
641
|
special_tokens = {}
|
|
551
642
|
|
|
643
|
+
@classmethod
|
|
644
|
+
def convert_from_spm(cls, vocab=None, **kwargs):
|
|
645
|
+
"""
|
|
646
|
+
Hook used when converting directly from a SentencePiece model without a slow tokenizer instance.
|
|
647
|
+
By default, return kwargs unchanged.
|
|
648
|
+
"""
|
|
649
|
+
if vocab is not None:
|
|
650
|
+
kwargs["vocab"] = vocab
|
|
651
|
+
return kwargs
|
|
652
|
+
|
|
552
653
|
def __init__(self, *args):
|
|
553
654
|
requires_backends(self, "protobuf")
|
|
554
655
|
|
|
@@ -754,6 +855,25 @@ class CamembertConverter(SpmConverter):
|
|
|
754
855
|
],
|
|
755
856
|
)
|
|
756
857
|
|
|
858
|
+
@classmethod
|
|
859
|
+
def convert_from_spm(cls, vocab=None, **kwargs):
|
|
860
|
+
pad_token = str(kwargs.get("pad_token", "<pad>"))
|
|
861
|
+
unk_token = str(kwargs.get("unk_token", "<unk>"))
|
|
862
|
+
mask_token = str(kwargs.get("mask_token", "<mask>"))
|
|
863
|
+
|
|
864
|
+
vocab_list = [
|
|
865
|
+
("<s>NOTUSED", 0.0),
|
|
866
|
+
(pad_token, 0.0),
|
|
867
|
+
("</s>NOTUSED", 0.0),
|
|
868
|
+
(unk_token, 0.0),
|
|
869
|
+
("<unk>NOTUSED", -100.0),
|
|
870
|
+
]
|
|
871
|
+
if vocab is not None:
|
|
872
|
+
vocab_list.extend(list(vocab)[1:])
|
|
873
|
+
vocab_list.append((mask_token, 0.0))
|
|
874
|
+
kwargs["vocab"] = vocab_list
|
|
875
|
+
return kwargs
|
|
876
|
+
|
|
757
877
|
|
|
758
878
|
class DebertaV2Converter(SpmConverter):
|
|
759
879
|
def pre_tokenizer(self, replacement, add_prefix_space):
|
|
@@ -840,6 +960,27 @@ class MBartConverter(SpmConverter):
|
|
|
840
960
|
],
|
|
841
961
|
)
|
|
842
962
|
|
|
963
|
+
@classmethod
|
|
964
|
+
def convert_from_spm(cls, vocab=None, **kwargs):
|
|
965
|
+
bos_token = str(kwargs.get("bos_token", "<s>"))
|
|
966
|
+
pad_token = str(kwargs.get("pad_token", "<pad>"))
|
|
967
|
+
eos_token = str(kwargs.get("eos_token", "</s>"))
|
|
968
|
+
unk_token = str(kwargs.get("unk_token", "<unk>"))
|
|
969
|
+
mask_token = str(kwargs.get("mask_token", "<mask>"))
|
|
970
|
+
|
|
971
|
+
vocab_list = [
|
|
972
|
+
(bos_token, 0.0),
|
|
973
|
+
(pad_token, 0.0),
|
|
974
|
+
(eos_token, 0.0),
|
|
975
|
+
(unk_token, 0.0),
|
|
976
|
+
]
|
|
977
|
+
if vocab is not None:
|
|
978
|
+
vocab_list.extend(list(vocab)[3:])
|
|
979
|
+
vocab_list.extend((lang_code, 0.0) for lang_code in MBART_LANGUAGES)
|
|
980
|
+
vocab_list.append((mask_token, 0.0))
|
|
981
|
+
kwargs["vocab"] = vocab_list
|
|
982
|
+
return kwargs
|
|
983
|
+
|
|
843
984
|
|
|
844
985
|
class MBart50Converter(SpmConverter):
|
|
845
986
|
def vocab(self, proto):
|
|
@@ -867,6 +1008,27 @@ class MBart50Converter(SpmConverter):
|
|
|
867
1008
|
],
|
|
868
1009
|
)
|
|
869
1010
|
|
|
1011
|
+
@classmethod
|
|
1012
|
+
def convert_from_spm(cls, vocab=None, **kwargs):
|
|
1013
|
+
cls_token = str(kwargs.get("cls_token", "<s>"))
|
|
1014
|
+
pad_token = str(kwargs.get("pad_token", "<pad>"))
|
|
1015
|
+
eos_token = str(kwargs.get("eos_token", "</s>"))
|
|
1016
|
+
unk_token = str(kwargs.get("unk_token", "<unk>"))
|
|
1017
|
+
mask_token = str(kwargs.get("mask_token", "<mask>"))
|
|
1018
|
+
|
|
1019
|
+
vocab_list = [
|
|
1020
|
+
(cls_token, 0.0),
|
|
1021
|
+
(pad_token, 0.0),
|
|
1022
|
+
(eos_token, 0.0),
|
|
1023
|
+
(unk_token, 0.0),
|
|
1024
|
+
]
|
|
1025
|
+
if vocab is not None:
|
|
1026
|
+
vocab_list.extend(list(vocab)[3:])
|
|
1027
|
+
vocab_list.extend((lang_code, 0.0) for lang_code in MBART50_LANGUAGES)
|
|
1028
|
+
vocab_list.append((mask_token, 0.0))
|
|
1029
|
+
kwargs["vocab"] = vocab_list
|
|
1030
|
+
return kwargs
|
|
1031
|
+
|
|
870
1032
|
|
|
871
1033
|
class NllbConverter(SpmConverter):
|
|
872
1034
|
def vocab(self, proto):
|
|
@@ -892,6 +1054,28 @@ class NllbConverter(SpmConverter):
|
|
|
892
1054
|
],
|
|
893
1055
|
)
|
|
894
1056
|
|
|
1057
|
+
@classmethod
|
|
1058
|
+
def convert_from_spm(cls, vocab=None, **kwargs):
|
|
1059
|
+
bos_token = str(kwargs.get("bos_token", "<s>"))
|
|
1060
|
+
pad_token = str(kwargs.get("pad_token", "<pad>"))
|
|
1061
|
+
eos_token = str(kwargs.get("eos_token", "</s>"))
|
|
1062
|
+
unk_token = str(kwargs.get("unk_token", "<unk>"))
|
|
1063
|
+
|
|
1064
|
+
reordered_vocab = {
|
|
1065
|
+
bos_token: 0,
|
|
1066
|
+
pad_token: 1,
|
|
1067
|
+
eos_token: 2,
|
|
1068
|
+
unk_token: 3,
|
|
1069
|
+
}
|
|
1070
|
+
if vocab is not None:
|
|
1071
|
+
tokens = vocab.keys() if isinstance(vocab, dict) else [tok for tok, _ in vocab]
|
|
1072
|
+
for token in tokens:
|
|
1073
|
+
if token in reordered_vocab:
|
|
1074
|
+
continue
|
|
1075
|
+
reordered_vocab[token] = len(reordered_vocab)
|
|
1076
|
+
kwargs["vocab"] = reordered_vocab
|
|
1077
|
+
return kwargs
|
|
1078
|
+
|
|
895
1079
|
|
|
896
1080
|
class SeamlessM4TConverter(SpmConverter):
|
|
897
1081
|
def vocab(self, proto):
|
|
@@ -944,6 +1128,26 @@ class XLMRobertaConverter(SpmConverter):
|
|
|
944
1128
|
],
|
|
945
1129
|
)
|
|
946
1130
|
|
|
1131
|
+
@classmethod
|
|
1132
|
+
def convert_from_spm(cls, vocab=None, **kwargs):
|
|
1133
|
+
bos_token = str(kwargs.get("bos_token", "<s>"))
|
|
1134
|
+
pad_token = str(kwargs.get("pad_token", "<pad>"))
|
|
1135
|
+
eos_token = str(kwargs.get("eos_token", "</s>"))
|
|
1136
|
+
unk_token = str(kwargs.get("unk_token", "<unk>"))
|
|
1137
|
+
mask_token = str(kwargs.get("mask_token", "<mask>"))
|
|
1138
|
+
|
|
1139
|
+
vocab_list = [
|
|
1140
|
+
(bos_token, 0.0),
|
|
1141
|
+
(pad_token, 0.0),
|
|
1142
|
+
(eos_token, 0.0),
|
|
1143
|
+
(unk_token, 0.0),
|
|
1144
|
+
]
|
|
1145
|
+
if vocab is not None:
|
|
1146
|
+
vocab_list.extend(list(vocab)[3:])
|
|
1147
|
+
vocab_list.append((mask_token, 0.0))
|
|
1148
|
+
kwargs["vocab"] = vocab_list
|
|
1149
|
+
return kwargs
|
|
1150
|
+
|
|
947
1151
|
|
|
948
1152
|
class XLNetConverter(SpmConverter):
|
|
949
1153
|
def vocab(self, proto):
|
|
@@ -1078,6 +1282,17 @@ class T5Converter(SpmConverter):
|
|
|
1078
1282
|
],
|
|
1079
1283
|
)
|
|
1080
1284
|
|
|
1285
|
+
@classmethod
|
|
1286
|
+
def convert_from_spm(cls, vocab=None, **kwargs):
|
|
1287
|
+
extra_ids = kwargs.get("extra_ids", 100)
|
|
1288
|
+
extra_tokens = [f"<extra_id_{i}>" for i in range(extra_ids - 1, -1, -1)]
|
|
1289
|
+
vocab_list = list(vocab) if vocab is not None else []
|
|
1290
|
+
vocab_list.extend((token, 0.0) for token in extra_tokens)
|
|
1291
|
+
|
|
1292
|
+
kwargs.setdefault("additional_special_tokens", extra_tokens)
|
|
1293
|
+
kwargs["vocab"] = vocab_list
|
|
1294
|
+
return kwargs
|
|
1295
|
+
|
|
1081
1296
|
|
|
1082
1297
|
class UdopConverter(SpmConverter):
|
|
1083
1298
|
def post_processor(self):
|
|
@@ -1171,7 +1386,7 @@ class CLIPConverter(Converter):
|
|
|
1171
1386
|
)
|
|
1172
1387
|
tokenizer.decoder = decoders.ByteLevel()
|
|
1173
1388
|
|
|
1174
|
-
# Hack to have a ByteLevel and
|
|
1389
|
+
# Hack to have a ByteLevel and TemplateProcessor
|
|
1175
1390
|
tokenizer.post_processor = processors.RobertaProcessing(
|
|
1176
1391
|
sep=(self.original_tokenizer.eos_token, self.original_tokenizer.eos_token_id),
|
|
1177
1392
|
cls=(self.original_tokenizer.bos_token, self.original_tokenizer.bos_token_id),
|