transformers 5.0.0rc3__py3-none-any.whl → 5.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +4 -11
- transformers/activations.py +2 -2
- transformers/backbone_utils.py +326 -0
- transformers/cache_utils.py +11 -2
- transformers/cli/serve.py +11 -8
- transformers/configuration_utils.py +1 -69
- transformers/conversion_mapping.py +146 -26
- transformers/convert_slow_tokenizer.py +6 -4
- transformers/core_model_loading.py +207 -118
- transformers/dependency_versions_check.py +0 -1
- transformers/dependency_versions_table.py +7 -8
- transformers/file_utils.py +0 -2
- transformers/generation/candidate_generator.py +1 -2
- transformers/generation/continuous_batching/cache.py +40 -38
- transformers/generation/continuous_batching/cache_manager.py +3 -16
- transformers/generation/continuous_batching/continuous_api.py +94 -406
- transformers/generation/continuous_batching/input_ouputs.py +464 -0
- transformers/generation/continuous_batching/requests.py +54 -17
- transformers/generation/continuous_batching/scheduler.py +77 -95
- transformers/generation/logits_process.py +10 -5
- transformers/generation/stopping_criteria.py +1 -2
- transformers/generation/utils.py +75 -95
- transformers/image_processing_utils.py +0 -3
- transformers/image_processing_utils_fast.py +17 -18
- transformers/image_transforms.py +44 -13
- transformers/image_utils.py +0 -5
- transformers/initialization.py +57 -0
- transformers/integrations/__init__.py +10 -24
- transformers/integrations/accelerate.py +47 -11
- transformers/integrations/deepspeed.py +145 -3
- transformers/integrations/executorch.py +2 -6
- transformers/integrations/finegrained_fp8.py +142 -7
- transformers/integrations/flash_attention.py +2 -7
- transformers/integrations/hub_kernels.py +18 -7
- transformers/integrations/moe.py +226 -106
- transformers/integrations/mxfp4.py +47 -34
- transformers/integrations/peft.py +488 -176
- transformers/integrations/tensor_parallel.py +641 -581
- transformers/masking_utils.py +153 -9
- transformers/modeling_flash_attention_utils.py +1 -2
- transformers/modeling_utils.py +359 -358
- transformers/models/__init__.py +6 -0
- transformers/models/afmoe/configuration_afmoe.py +14 -4
- transformers/models/afmoe/modeling_afmoe.py +8 -8
- transformers/models/afmoe/modular_afmoe.py +7 -7
- transformers/models/aimv2/configuration_aimv2.py +2 -7
- transformers/models/aimv2/modeling_aimv2.py +26 -24
- transformers/models/aimv2/modular_aimv2.py +8 -12
- transformers/models/albert/configuration_albert.py +8 -1
- transformers/models/albert/modeling_albert.py +3 -3
- transformers/models/align/configuration_align.py +8 -5
- transformers/models/align/modeling_align.py +22 -24
- transformers/models/altclip/configuration_altclip.py +4 -6
- transformers/models/altclip/modeling_altclip.py +30 -26
- transformers/models/apertus/configuration_apertus.py +5 -7
- transformers/models/apertus/modeling_apertus.py +4 -4
- transformers/models/apertus/modular_apertus.py +8 -10
- transformers/models/arcee/configuration_arcee.py +5 -7
- transformers/models/arcee/modeling_arcee.py +4 -4
- transformers/models/aria/configuration_aria.py +11 -21
- transformers/models/aria/modeling_aria.py +39 -36
- transformers/models/aria/modular_aria.py +33 -39
- transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +3 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +39 -30
- transformers/models/audioflamingo3/modular_audioflamingo3.py +41 -27
- transformers/models/auto/auto_factory.py +8 -6
- transformers/models/auto/configuration_auto.py +22 -0
- transformers/models/auto/image_processing_auto.py +17 -13
- transformers/models/auto/modeling_auto.py +15 -0
- transformers/models/auto/processing_auto.py +9 -18
- transformers/models/auto/tokenization_auto.py +17 -15
- transformers/models/autoformer/modeling_autoformer.py +2 -1
- transformers/models/aya_vision/configuration_aya_vision.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +29 -62
- transformers/models/aya_vision/modular_aya_vision.py +20 -45
- transformers/models/bamba/configuration_bamba.py +17 -7
- transformers/models/bamba/modeling_bamba.py +23 -55
- transformers/models/bamba/modular_bamba.py +19 -54
- transformers/models/bark/configuration_bark.py +2 -1
- transformers/models/bark/modeling_bark.py +24 -10
- transformers/models/bart/configuration_bart.py +9 -4
- transformers/models/bart/modeling_bart.py +9 -12
- transformers/models/beit/configuration_beit.py +2 -4
- transformers/models/beit/image_processing_beit_fast.py +3 -3
- transformers/models/beit/modeling_beit.py +14 -9
- transformers/models/bert/configuration_bert.py +12 -1
- transformers/models/bert/modeling_bert.py +6 -30
- transformers/models/bert_generation/configuration_bert_generation.py +17 -1
- transformers/models/bert_generation/modeling_bert_generation.py +6 -6
- transformers/models/big_bird/configuration_big_bird.py +12 -8
- transformers/models/big_bird/modeling_big_bird.py +0 -15
- transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +9 -8
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +9 -7
- transformers/models/biogpt/configuration_biogpt.py +8 -1
- transformers/models/biogpt/modeling_biogpt.py +4 -8
- transformers/models/biogpt/modular_biogpt.py +1 -5
- transformers/models/bit/configuration_bit.py +2 -4
- transformers/models/bit/modeling_bit.py +6 -5
- transformers/models/bitnet/configuration_bitnet.py +5 -7
- transformers/models/bitnet/modeling_bitnet.py +3 -4
- transformers/models/bitnet/modular_bitnet.py +3 -4
- transformers/models/blenderbot/configuration_blenderbot.py +8 -4
- transformers/models/blenderbot/modeling_blenderbot.py +4 -4
- transformers/models/blenderbot_small/configuration_blenderbot_small.py +8 -4
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +4 -4
- transformers/models/blip/configuration_blip.py +9 -9
- transformers/models/blip/modeling_blip.py +55 -37
- transformers/models/blip_2/configuration_blip_2.py +2 -1
- transformers/models/blip_2/modeling_blip_2.py +81 -56
- transformers/models/bloom/configuration_bloom.py +5 -1
- transformers/models/bloom/modeling_bloom.py +2 -1
- transformers/models/blt/configuration_blt.py +23 -12
- transformers/models/blt/modeling_blt.py +20 -14
- transformers/models/blt/modular_blt.py +70 -10
- transformers/models/bridgetower/configuration_bridgetower.py +7 -1
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +6 -6
- transformers/models/bridgetower/modeling_bridgetower.py +29 -15
- transformers/models/bros/configuration_bros.py +24 -17
- transformers/models/camembert/configuration_camembert.py +8 -1
- transformers/models/camembert/modeling_camembert.py +6 -6
- transformers/models/canine/configuration_canine.py +4 -1
- transformers/models/chameleon/configuration_chameleon.py +5 -7
- transformers/models/chameleon/image_processing_chameleon_fast.py +5 -5
- transformers/models/chameleon/modeling_chameleon.py +82 -36
- transformers/models/chinese_clip/configuration_chinese_clip.py +10 -7
- transformers/models/chinese_clip/modeling_chinese_clip.py +28 -29
- transformers/models/clap/configuration_clap.py +4 -8
- transformers/models/clap/modeling_clap.py +21 -22
- transformers/models/clip/configuration_clip.py +4 -1
- transformers/models/clip/image_processing_clip_fast.py +9 -0
- transformers/models/clip/modeling_clip.py +25 -22
- transformers/models/clipseg/configuration_clipseg.py +4 -1
- transformers/models/clipseg/modeling_clipseg.py +27 -25
- transformers/models/clipseg/processing_clipseg.py +11 -3
- transformers/models/clvp/configuration_clvp.py +14 -2
- transformers/models/clvp/modeling_clvp.py +19 -30
- transformers/models/codegen/configuration_codegen.py +4 -3
- transformers/models/codegen/modeling_codegen.py +2 -1
- transformers/models/cohere/configuration_cohere.py +5 -7
- transformers/models/cohere/modeling_cohere.py +4 -4
- transformers/models/cohere/modular_cohere.py +3 -3
- transformers/models/cohere2/configuration_cohere2.py +6 -8
- transformers/models/cohere2/modeling_cohere2.py +4 -4
- transformers/models/cohere2/modular_cohere2.py +9 -11
- transformers/models/cohere2_vision/configuration_cohere2_vision.py +5 -1
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +3 -3
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +24 -25
- transformers/models/cohere2_vision/modular_cohere2_vision.py +20 -20
- transformers/models/colqwen2/modeling_colqwen2.py +7 -6
- transformers/models/colqwen2/modular_colqwen2.py +7 -6
- transformers/models/conditional_detr/configuration_conditional_detr.py +19 -46
- transformers/models/conditional_detr/image_processing_conditional_detr.py +3 -4
- transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +28 -14
- transformers/models/conditional_detr/modeling_conditional_detr.py +794 -942
- transformers/models/conditional_detr/modular_conditional_detr.py +901 -3
- transformers/models/convbert/configuration_convbert.py +11 -7
- transformers/models/convnext/configuration_convnext.py +2 -4
- transformers/models/convnext/image_processing_convnext_fast.py +2 -2
- transformers/models/convnext/modeling_convnext.py +7 -6
- transformers/models/convnextv2/configuration_convnextv2.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +7 -6
- transformers/models/cpmant/configuration_cpmant.py +4 -0
- transformers/models/csm/configuration_csm.py +9 -15
- transformers/models/csm/modeling_csm.py +3 -3
- transformers/models/ctrl/configuration_ctrl.py +16 -0
- transformers/models/ctrl/modeling_ctrl.py +13 -25
- transformers/models/cwm/configuration_cwm.py +5 -7
- transformers/models/cwm/modeling_cwm.py +4 -4
- transformers/models/d_fine/configuration_d_fine.py +10 -56
- transformers/models/d_fine/modeling_d_fine.py +728 -868
- transformers/models/d_fine/modular_d_fine.py +335 -412
- transformers/models/dab_detr/configuration_dab_detr.py +22 -48
- transformers/models/dab_detr/modeling_dab_detr.py +11 -7
- transformers/models/dac/modeling_dac.py +1 -1
- transformers/models/data2vec/configuration_data2vec_audio.py +4 -1
- transformers/models/data2vec/configuration_data2vec_text.py +11 -2
- transformers/models/data2vec/modeling_data2vec_audio.py +3 -3
- transformers/models/data2vec/modeling_data2vec_text.py +6 -6
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -2
- transformers/models/dbrx/configuration_dbrx.py +11 -3
- transformers/models/dbrx/modeling_dbrx.py +6 -6
- transformers/models/dbrx/modular_dbrx.py +6 -6
- transformers/models/deberta/configuration_deberta.py +6 -0
- transformers/models/deberta_v2/configuration_deberta_v2.py +6 -0
- transformers/models/decision_transformer/configuration_decision_transformer.py +3 -1
- transformers/models/decision_transformer/modeling_decision_transformer.py +3 -3
- transformers/models/deepseek_v2/configuration_deepseek_v2.py +7 -10
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +7 -8
- transformers/models/deepseek_v2/modular_deepseek_v2.py +8 -10
- transformers/models/deepseek_v3/configuration_deepseek_v3.py +7 -10
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +7 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +6 -5
- transformers/models/deepseek_vl/configuration_deepseek_vl.py +4 -0
- transformers/models/deepseek_vl/image_processing_deepseek_vl.py +2 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +5 -5
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +17 -12
- transformers/models/deepseek_vl/modular_deepseek_vl.py +4 -0
- transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +4 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +2 -2
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +6 -6
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +68 -24
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +70 -19
- transformers/models/deformable_detr/configuration_deformable_detr.py +22 -45
- transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +25 -11
- transformers/models/deformable_detr/modeling_deformable_detr.py +410 -607
- transformers/models/deformable_detr/modular_deformable_detr.py +1385 -3
- transformers/models/deit/modeling_deit.py +11 -7
- transformers/models/depth_anything/configuration_depth_anything.py +12 -42
- transformers/models/depth_anything/modeling_depth_anything.py +5 -3
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +2 -2
- transformers/models/depth_pro/modeling_depth_pro.py +8 -4
- transformers/models/detr/configuration_detr.py +18 -49
- transformers/models/detr/image_processing_detr_fast.py +11 -11
- transformers/models/detr/modeling_detr.py +695 -734
- transformers/models/dia/configuration_dia.py +4 -7
- transformers/models/dia/generation_dia.py +8 -17
- transformers/models/dia/modeling_dia.py +7 -7
- transformers/models/dia/modular_dia.py +4 -4
- transformers/models/diffllama/configuration_diffllama.py +5 -7
- transformers/models/diffllama/modeling_diffllama.py +3 -8
- transformers/models/diffllama/modular_diffllama.py +2 -7
- transformers/models/dinat/configuration_dinat.py +2 -4
- transformers/models/dinat/modeling_dinat.py +7 -6
- transformers/models/dinov2/configuration_dinov2.py +2 -4
- transformers/models/dinov2/modeling_dinov2.py +9 -8
- transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +2 -4
- transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +9 -8
- transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +6 -7
- transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +2 -4
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +2 -3
- transformers/models/dinov3_vit/configuration_dinov3_vit.py +2 -4
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +2 -2
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -6
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -6
- transformers/models/distilbert/configuration_distilbert.py +8 -1
- transformers/models/distilbert/modeling_distilbert.py +3 -3
- transformers/models/doge/configuration_doge.py +17 -7
- transformers/models/doge/modeling_doge.py +4 -4
- transformers/models/doge/modular_doge.py +20 -10
- transformers/models/donut/image_processing_donut_fast.py +4 -4
- transformers/models/dots1/configuration_dots1.py +16 -7
- transformers/models/dots1/modeling_dots1.py +4 -4
- transformers/models/dpr/configuration_dpr.py +19 -1
- transformers/models/dpt/configuration_dpt.py +23 -65
- transformers/models/dpt/image_processing_dpt_fast.py +5 -5
- transformers/models/dpt/modeling_dpt.py +19 -15
- transformers/models/dpt/modular_dpt.py +4 -4
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +53 -53
- transformers/models/edgetam/modular_edgetam.py +5 -7
- transformers/models/edgetam_video/modeling_edgetam_video.py +55 -56
- transformers/models/edgetam_video/modular_edgetam_video.py +9 -9
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +4 -3
- transformers/models/efficientloftr/modeling_efficientloftr.py +19 -9
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +2 -2
- transformers/models/electra/configuration_electra.py +13 -2
- transformers/models/electra/modeling_electra.py +6 -6
- transformers/models/emu3/configuration_emu3.py +12 -10
- transformers/models/emu3/modeling_emu3.py +84 -47
- transformers/models/emu3/modular_emu3.py +77 -39
- transformers/models/encoder_decoder/configuration_encoder_decoder.py +12 -1
- transformers/models/encoder_decoder/modeling_encoder_decoder.py +20 -24
- transformers/models/eomt/configuration_eomt.py +12 -13
- transformers/models/eomt/image_processing_eomt_fast.py +3 -3
- transformers/models/eomt/modeling_eomt.py +3 -3
- transformers/models/eomt/modular_eomt.py +17 -17
- transformers/models/eomt_dinov3/__init__.py +28 -0
- transformers/models/eomt_dinov3/configuration_eomt_dinov3.py +204 -0
- transformers/models/eomt_dinov3/modeling_eomt_dinov3.py +1376 -0
- transformers/models/eomt_dinov3/modular_eomt_dinov3.py +454 -0
- transformers/models/ernie/configuration_ernie.py +24 -2
- transformers/models/ernie/modeling_ernie.py +6 -30
- transformers/models/ernie4_5/configuration_ernie4_5.py +5 -7
- transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
- transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +7 -10
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +4 -4
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +17 -6
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +229 -188
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +79 -55
- transformers/models/esm/configuration_esm.py +9 -11
- transformers/models/esm/modeling_esm.py +3 -3
- transformers/models/esm/modeling_esmfold.py +1 -6
- transformers/models/esm/openfold_utils/protein.py +2 -3
- transformers/models/evolla/configuration_evolla.py +21 -8
- transformers/models/evolla/modeling_evolla.py +11 -7
- transformers/models/evolla/modular_evolla.py +5 -1
- transformers/models/exaone4/configuration_exaone4.py +8 -5
- transformers/models/exaone4/modeling_exaone4.py +4 -4
- transformers/models/exaone4/modular_exaone4.py +11 -8
- transformers/models/exaone_moe/__init__.py +27 -0
- transformers/models/exaone_moe/configuration_exaone_moe.py +235 -0
- transformers/models/exaone_moe/modeling_exaone_moe.py +665 -0
- transformers/models/exaone_moe/modular_exaone_moe.py +373 -0
- transformers/models/falcon/configuration_falcon.py +9 -1
- transformers/models/falcon/modeling_falcon.py +3 -8
- transformers/models/falcon_h1/configuration_falcon_h1.py +17 -8
- transformers/models/falcon_h1/modeling_falcon_h1.py +22 -54
- transformers/models/falcon_h1/modular_falcon_h1.py +21 -52
- transformers/models/falcon_mamba/configuration_falcon_mamba.py +5 -1
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +18 -26
- transformers/models/falcon_mamba/modular_falcon_mamba.py +4 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +10 -1
- transformers/models/fast_vlm/modeling_fast_vlm.py +37 -64
- transformers/models/fast_vlm/modular_fast_vlm.py +146 -35
- transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +0 -1
- transformers/models/flaubert/configuration_flaubert.py +10 -4
- transformers/models/flaubert/modeling_flaubert.py +1 -1
- transformers/models/flava/configuration_flava.py +4 -3
- transformers/models/flava/image_processing_flava_fast.py +4 -4
- transformers/models/flava/modeling_flava.py +36 -28
- transformers/models/flex_olmo/configuration_flex_olmo.py +11 -14
- transformers/models/flex_olmo/modeling_flex_olmo.py +4 -4
- transformers/models/flex_olmo/modular_flex_olmo.py +11 -14
- transformers/models/florence2/configuration_florence2.py +4 -0
- transformers/models/florence2/modeling_florence2.py +57 -32
- transformers/models/florence2/modular_florence2.py +48 -26
- transformers/models/fnet/configuration_fnet.py +6 -1
- transformers/models/focalnet/configuration_focalnet.py +2 -4
- transformers/models/focalnet/modeling_focalnet.py +10 -7
- transformers/models/fsmt/configuration_fsmt.py +12 -16
- transformers/models/funnel/configuration_funnel.py +8 -0
- transformers/models/fuyu/configuration_fuyu.py +5 -8
- transformers/models/fuyu/image_processing_fuyu_fast.py +5 -4
- transformers/models/fuyu/modeling_fuyu.py +24 -23
- transformers/models/gemma/configuration_gemma.py +5 -7
- transformers/models/gemma/modeling_gemma.py +4 -4
- transformers/models/gemma/modular_gemma.py +5 -7
- transformers/models/gemma2/configuration_gemma2.py +5 -7
- transformers/models/gemma2/modeling_gemma2.py +4 -4
- transformers/models/gemma2/modular_gemma2.py +8 -10
- transformers/models/gemma3/configuration_gemma3.py +28 -22
- transformers/models/gemma3/image_processing_gemma3_fast.py +2 -2
- transformers/models/gemma3/modeling_gemma3.py +37 -33
- transformers/models/gemma3/modular_gemma3.py +46 -42
- transformers/models/gemma3n/configuration_gemma3n.py +35 -22
- transformers/models/gemma3n/modeling_gemma3n.py +86 -58
- transformers/models/gemma3n/modular_gemma3n.py +112 -75
- transformers/models/git/configuration_git.py +5 -7
- transformers/models/git/modeling_git.py +31 -41
- transformers/models/glm/configuration_glm.py +7 -9
- transformers/models/glm/modeling_glm.py +4 -4
- transformers/models/glm4/configuration_glm4.py +7 -9
- transformers/models/glm4/modeling_glm4.py +4 -4
- transformers/models/glm46v/configuration_glm46v.py +4 -0
- transformers/models/glm46v/image_processing_glm46v.py +5 -2
- transformers/models/glm46v/image_processing_glm46v_fast.py +2 -2
- transformers/models/glm46v/modeling_glm46v.py +91 -46
- transformers/models/glm46v/modular_glm46v.py +4 -0
- transformers/models/glm4_moe/configuration_glm4_moe.py +17 -7
- transformers/models/glm4_moe/modeling_glm4_moe.py +4 -4
- transformers/models/glm4_moe/modular_glm4_moe.py +17 -7
- transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +8 -10
- transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +7 -7
- transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +8 -10
- transformers/models/glm4v/configuration_glm4v.py +12 -8
- transformers/models/glm4v/image_processing_glm4v.py +5 -2
- transformers/models/glm4v/image_processing_glm4v_fast.py +2 -2
- transformers/models/glm4v/modeling_glm4v.py +120 -63
- transformers/models/glm4v/modular_glm4v.py +82 -50
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +18 -6
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +115 -63
- transformers/models/glm4v_moe/modular_glm4v_moe.py +23 -12
- transformers/models/glm_image/configuration_glm_image.py +26 -20
- transformers/models/glm_image/image_processing_glm_image.py +1 -1
- transformers/models/glm_image/image_processing_glm_image_fast.py +5 -7
- transformers/models/glm_image/modeling_glm_image.py +337 -236
- transformers/models/glm_image/modular_glm_image.py +415 -255
- transformers/models/glm_image/processing_glm_image.py +65 -17
- transformers/{pipelines/deprecated → models/glm_ocr}/__init__.py +15 -2
- transformers/models/glm_ocr/configuration_glm_ocr.py +312 -0
- transformers/models/glm_ocr/modeling_glm_ocr.py +1633 -0
- transformers/models/glm_ocr/modular_glm_ocr.py +428 -0
- transformers/models/glmasr/modeling_glmasr.py +34 -28
- transformers/models/glmasr/modular_glmasr.py +23 -11
- transformers/models/glpn/image_processing_glpn_fast.py +3 -3
- transformers/models/glpn/modeling_glpn.py +4 -2
- transformers/models/got_ocr2/configuration_got_ocr2.py +6 -6
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +3 -3
- transformers/models/got_ocr2/modeling_got_ocr2.py +31 -37
- transformers/models/got_ocr2/modular_got_ocr2.py +30 -19
- transformers/models/gpt2/configuration_gpt2.py +13 -1
- transformers/models/gpt2/modeling_gpt2.py +5 -5
- transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +7 -1
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +5 -4
- transformers/models/gpt_neo/configuration_gpt_neo.py +9 -1
- transformers/models/gpt_neo/modeling_gpt_neo.py +3 -7
- transformers/models/gpt_neox/configuration_gpt_neox.py +8 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +4 -4
- transformers/models/gpt_neox/modular_gpt_neox.py +4 -4
- transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +9 -1
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +2 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +10 -6
- transformers/models/gpt_oss/modeling_gpt_oss.py +46 -79
- transformers/models/gpt_oss/modular_gpt_oss.py +45 -78
- transformers/models/gptj/configuration_gptj.py +4 -4
- transformers/models/gptj/modeling_gptj.py +3 -7
- transformers/models/granite/configuration_granite.py +5 -7
- transformers/models/granite/modeling_granite.py +4 -4
- transformers/models/granite_speech/modeling_granite_speech.py +63 -37
- transformers/models/granitemoe/configuration_granitemoe.py +5 -7
- transformers/models/granitemoe/modeling_granitemoe.py +4 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +17 -7
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +22 -54
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +39 -45
- transformers/models/granitemoeshared/configuration_granitemoeshared.py +6 -7
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -4
- transformers/models/grounding_dino/configuration_grounding_dino.py +10 -45
- transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +11 -11
- transformers/models/grounding_dino/modeling_grounding_dino.py +68 -86
- transformers/models/groupvit/configuration_groupvit.py +4 -1
- transformers/models/groupvit/modeling_groupvit.py +29 -22
- transformers/models/helium/configuration_helium.py +5 -7
- transformers/models/helium/modeling_helium.py +4 -4
- transformers/models/hgnet_v2/configuration_hgnet_v2.py +2 -4
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -5
- transformers/models/hgnet_v2/modular_hgnet_v2.py +7 -8
- transformers/models/hiera/configuration_hiera.py +2 -4
- transformers/models/hiera/modeling_hiera.py +11 -8
- transformers/models/hubert/configuration_hubert.py +4 -1
- transformers/models/hubert/modeling_hubert.py +7 -4
- transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +5 -7
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +28 -4
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +28 -6
- transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +6 -8
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +22 -9
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +22 -8
- transformers/models/ibert/configuration_ibert.py +4 -1
- transformers/models/idefics/configuration_idefics.py +5 -7
- transformers/models/idefics/modeling_idefics.py +3 -4
- transformers/models/idefics/vision.py +5 -4
- transformers/models/idefics2/configuration_idefics2.py +1 -2
- transformers/models/idefics2/image_processing_idefics2_fast.py +1 -0
- transformers/models/idefics2/modeling_idefics2.py +72 -50
- transformers/models/idefics3/configuration_idefics3.py +1 -3
- transformers/models/idefics3/image_processing_idefics3_fast.py +29 -3
- transformers/models/idefics3/modeling_idefics3.py +63 -40
- transformers/models/ijepa/modeling_ijepa.py +3 -3
- transformers/models/imagegpt/configuration_imagegpt.py +9 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +2 -2
- transformers/models/imagegpt/modeling_imagegpt.py +8 -4
- transformers/models/informer/modeling_informer.py +3 -3
- transformers/models/instructblip/configuration_instructblip.py +2 -1
- transformers/models/instructblip/modeling_instructblip.py +65 -39
- transformers/models/instructblipvideo/configuration_instructblipvideo.py +2 -1
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +60 -57
- transformers/models/instructblipvideo/modular_instructblipvideo.py +43 -32
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +2 -2
- transformers/models/internvl/configuration_internvl.py +5 -0
- transformers/models/internvl/modeling_internvl.py +35 -55
- transformers/models/internvl/modular_internvl.py +26 -38
- transformers/models/internvl/video_processing_internvl.py +2 -2
- transformers/models/jais2/configuration_jais2.py +5 -7
- transformers/models/jais2/modeling_jais2.py +4 -4
- transformers/models/jamba/configuration_jamba.py +5 -7
- transformers/models/jamba/modeling_jamba.py +4 -4
- transformers/models/jamba/modular_jamba.py +3 -3
- transformers/models/janus/image_processing_janus.py +2 -2
- transformers/models/janus/image_processing_janus_fast.py +8 -8
- transformers/models/janus/modeling_janus.py +63 -146
- transformers/models/janus/modular_janus.py +62 -20
- transformers/models/jetmoe/configuration_jetmoe.py +6 -4
- transformers/models/jetmoe/modeling_jetmoe.py +3 -3
- transformers/models/jetmoe/modular_jetmoe.py +3 -3
- transformers/models/kosmos2/configuration_kosmos2.py +10 -8
- transformers/models/kosmos2/modeling_kosmos2.py +56 -34
- transformers/models/kosmos2_5/configuration_kosmos2_5.py +8 -8
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +54 -63
- transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +8 -3
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +44 -40
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +1 -1
- transformers/models/lasr/configuration_lasr.py +2 -4
- transformers/models/lasr/modeling_lasr.py +3 -3
- transformers/models/lasr/modular_lasr.py +3 -3
- transformers/models/layoutlm/configuration_layoutlm.py +14 -1
- transformers/models/layoutlm/modeling_layoutlm.py +3 -3
- transformers/models/layoutlmv2/configuration_layoutlmv2.py +14 -16
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +2 -2
- transformers/models/layoutlmv3/configuration_layoutlmv3.py +16 -18
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +2 -2
- transformers/models/layoutxlm/configuration_layoutxlm.py +14 -16
- transformers/models/led/configuration_led.py +7 -8
- transformers/models/levit/image_processing_levit_fast.py +4 -4
- transformers/models/lfm2/configuration_lfm2.py +5 -7
- transformers/models/lfm2/modeling_lfm2.py +4 -4
- transformers/models/lfm2/modular_lfm2.py +3 -3
- transformers/models/lfm2_moe/configuration_lfm2_moe.py +5 -7
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -4
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +9 -15
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +42 -28
- transformers/models/lfm2_vl/modular_lfm2_vl.py +42 -27
- transformers/models/lightglue/image_processing_lightglue_fast.py +4 -3
- transformers/models/lightglue/modeling_lightglue.py +3 -3
- transformers/models/lightglue/modular_lightglue.py +3 -3
- transformers/models/lighton_ocr/modeling_lighton_ocr.py +31 -28
- transformers/models/lighton_ocr/modular_lighton_ocr.py +19 -18
- transformers/models/lilt/configuration_lilt.py +6 -1
- transformers/models/llama/configuration_llama.py +5 -7
- transformers/models/llama/modeling_llama.py +4 -4
- transformers/models/llama4/configuration_llama4.py +67 -47
- transformers/models/llama4/image_processing_llama4_fast.py +3 -3
- transformers/models/llama4/modeling_llama4.py +46 -44
- transformers/models/llava/configuration_llava.py +10 -0
- transformers/models/llava/image_processing_llava_fast.py +3 -3
- transformers/models/llava/modeling_llava.py +38 -65
- transformers/models/llava_next/configuration_llava_next.py +2 -1
- transformers/models/llava_next/image_processing_llava_next_fast.py +6 -6
- transformers/models/llava_next/modeling_llava_next.py +61 -60
- transformers/models/llava_next_video/configuration_llava_next_video.py +10 -6
- transformers/models/llava_next_video/modeling_llava_next_video.py +115 -100
- transformers/models/llava_next_video/modular_llava_next_video.py +110 -101
- transformers/models/llava_onevision/configuration_llava_onevision.py +10 -6
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +8 -7
- transformers/models/llava_onevision/modeling_llava_onevision.py +111 -105
- transformers/models/llava_onevision/modular_llava_onevision.py +106 -101
- transformers/models/longcat_flash/configuration_longcat_flash.py +7 -10
- transformers/models/longcat_flash/modeling_longcat_flash.py +7 -7
- transformers/models/longcat_flash/modular_longcat_flash.py +6 -5
- transformers/models/longformer/configuration_longformer.py +4 -1
- transformers/models/longt5/configuration_longt5.py +9 -6
- transformers/models/longt5/modeling_longt5.py +2 -1
- transformers/models/luke/configuration_luke.py +8 -1
- transformers/models/lw_detr/configuration_lw_detr.py +19 -31
- transformers/models/lw_detr/modeling_lw_detr.py +43 -44
- transformers/models/lw_detr/modular_lw_detr.py +36 -38
- transformers/models/lxmert/configuration_lxmert.py +16 -0
- transformers/models/m2m_100/configuration_m2m_100.py +7 -8
- transformers/models/m2m_100/modeling_m2m_100.py +3 -3
- transformers/models/mamba/configuration_mamba.py +5 -2
- transformers/models/mamba/modeling_mamba.py +18 -26
- transformers/models/mamba2/configuration_mamba2.py +5 -7
- transformers/models/mamba2/modeling_mamba2.py +22 -33
- transformers/models/marian/configuration_marian.py +10 -4
- transformers/models/marian/modeling_marian.py +4 -4
- transformers/models/markuplm/configuration_markuplm.py +4 -6
- transformers/models/markuplm/modeling_markuplm.py +3 -3
- transformers/models/mask2former/configuration_mask2former.py +12 -47
- transformers/models/mask2former/image_processing_mask2former_fast.py +8 -8
- transformers/models/mask2former/modeling_mask2former.py +18 -12
- transformers/models/maskformer/configuration_maskformer.py +14 -45
- transformers/models/maskformer/configuration_maskformer_swin.py +2 -4
- transformers/models/maskformer/image_processing_maskformer_fast.py +8 -8
- transformers/models/maskformer/modeling_maskformer.py +15 -9
- transformers/models/maskformer/modeling_maskformer_swin.py +2 -3
- transformers/models/mbart/configuration_mbart.py +9 -4
- transformers/models/mbart/modeling_mbart.py +9 -6
- transformers/models/megatron_bert/configuration_megatron_bert.py +13 -2
- transformers/models/megatron_bert/modeling_megatron_bert.py +0 -15
- transformers/models/metaclip_2/configuration_metaclip_2.py +4 -1
- transformers/models/metaclip_2/modeling_metaclip_2.py +49 -42
- transformers/models/metaclip_2/modular_metaclip_2.py +41 -25
- transformers/models/mgp_str/modeling_mgp_str.py +4 -2
- transformers/models/mimi/configuration_mimi.py +4 -0
- transformers/models/mimi/modeling_mimi.py +40 -36
- transformers/models/minimax/configuration_minimax.py +8 -11
- transformers/models/minimax/modeling_minimax.py +5 -5
- transformers/models/minimax/modular_minimax.py +9 -12
- transformers/models/minimax_m2/configuration_minimax_m2.py +8 -31
- transformers/models/minimax_m2/modeling_minimax_m2.py +4 -4
- transformers/models/minimax_m2/modular_minimax_m2.py +8 -31
- transformers/models/ministral/configuration_ministral.py +5 -7
- transformers/models/ministral/modeling_ministral.py +4 -4
- transformers/models/ministral/modular_ministral.py +5 -8
- transformers/models/ministral3/configuration_ministral3.py +4 -4
- transformers/models/ministral3/modeling_ministral3.py +4 -4
- transformers/models/ministral3/modular_ministral3.py +3 -3
- transformers/models/mistral/configuration_mistral.py +5 -7
- transformers/models/mistral/modeling_mistral.py +4 -4
- transformers/models/mistral/modular_mistral.py +3 -3
- transformers/models/mistral3/configuration_mistral3.py +4 -0
- transformers/models/mistral3/modeling_mistral3.py +36 -40
- transformers/models/mistral3/modular_mistral3.py +31 -32
- transformers/models/mixtral/configuration_mixtral.py +8 -11
- transformers/models/mixtral/modeling_mixtral.py +4 -4
- transformers/models/mlcd/modeling_mlcd.py +7 -5
- transformers/models/mlcd/modular_mlcd.py +7 -5
- transformers/models/mllama/configuration_mllama.py +5 -7
- transformers/models/mllama/image_processing_mllama_fast.py +6 -5
- transformers/models/mllama/modeling_mllama.py +19 -19
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +10 -45
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +66 -84
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +10 -45
- transformers/models/mobilebert/configuration_mobilebert.py +4 -1
- transformers/models/mobilebert/modeling_mobilebert.py +3 -3
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +4 -4
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +4 -2
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +4 -4
- transformers/models/mobilevit/modeling_mobilevit.py +4 -2
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +4 -2
- transformers/models/modernbert/configuration_modernbert.py +46 -21
- transformers/models/modernbert/modeling_modernbert.py +146 -899
- transformers/models/modernbert/modular_modernbert.py +185 -908
- transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +21 -13
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +9 -17
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +24 -23
- transformers/models/moonshine/configuration_moonshine.py +12 -7
- transformers/models/moonshine/modeling_moonshine.py +7 -7
- transformers/models/moonshine/modular_moonshine.py +19 -13
- transformers/models/moshi/configuration_moshi.py +28 -2
- transformers/models/moshi/modeling_moshi.py +4 -9
- transformers/models/mpnet/configuration_mpnet.py +6 -1
- transformers/models/mpt/configuration_mpt.py +16 -0
- transformers/models/mra/configuration_mra.py +8 -1
- transformers/models/mt5/configuration_mt5.py +9 -5
- transformers/models/mt5/modeling_mt5.py +5 -8
- transformers/models/musicgen/configuration_musicgen.py +12 -7
- transformers/models/musicgen/modeling_musicgen.py +6 -5
- transformers/models/musicgen_melody/configuration_musicgen_melody.py +15 -7
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -17
- transformers/models/mvp/configuration_mvp.py +8 -4
- transformers/models/mvp/modeling_mvp.py +6 -4
- transformers/models/nanochat/configuration_nanochat.py +5 -7
- transformers/models/nanochat/modeling_nanochat.py +4 -4
- transformers/models/nanochat/modular_nanochat.py +4 -4
- transformers/models/nemotron/configuration_nemotron.py +5 -7
- transformers/models/nemotron/modeling_nemotron.py +4 -14
- transformers/models/nllb/tokenization_nllb.py +7 -5
- transformers/models/nllb_moe/configuration_nllb_moe.py +7 -9
- transformers/models/nllb_moe/modeling_nllb_moe.py +3 -3
- transformers/models/nougat/image_processing_nougat_fast.py +8 -8
- transformers/models/nystromformer/configuration_nystromformer.py +8 -1
- transformers/models/olmo/configuration_olmo.py +5 -7
- transformers/models/olmo/modeling_olmo.py +4 -4
- transformers/models/olmo/modular_olmo.py +3 -3
- transformers/models/olmo2/configuration_olmo2.py +9 -11
- transformers/models/olmo2/modeling_olmo2.py +4 -4
- transformers/models/olmo2/modular_olmo2.py +7 -7
- transformers/models/olmo3/configuration_olmo3.py +10 -11
- transformers/models/olmo3/modeling_olmo3.py +4 -4
- transformers/models/olmo3/modular_olmo3.py +13 -14
- transformers/models/olmoe/configuration_olmoe.py +5 -7
- transformers/models/olmoe/modeling_olmoe.py +4 -4
- transformers/models/olmoe/modular_olmoe.py +3 -3
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +14 -49
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +22 -18
- transformers/models/oneformer/configuration_oneformer.py +9 -46
- transformers/models/oneformer/image_processing_oneformer_fast.py +8 -8
- transformers/models/oneformer/modeling_oneformer.py +14 -9
- transformers/models/openai/configuration_openai.py +16 -0
- transformers/models/opt/configuration_opt.py +6 -6
- transformers/models/opt/modeling_opt.py +5 -5
- transformers/models/ovis2/configuration_ovis2.py +4 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +3 -3
- transformers/models/ovis2/modeling_ovis2.py +58 -99
- transformers/models/ovis2/modular_ovis2.py +52 -13
- transformers/models/owlv2/configuration_owlv2.py +4 -1
- transformers/models/owlv2/image_processing_owlv2_fast.py +5 -5
- transformers/models/owlv2/modeling_owlv2.py +40 -27
- transformers/models/owlv2/modular_owlv2.py +5 -5
- transformers/models/owlvit/configuration_owlvit.py +4 -1
- transformers/models/owlvit/modeling_owlvit.py +40 -27
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +9 -10
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +88 -87
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +82 -53
- transformers/models/paligemma/configuration_paligemma.py +4 -0
- transformers/models/paligemma/modeling_paligemma.py +30 -26
- transformers/models/parakeet/configuration_parakeet.py +2 -4
- transformers/models/parakeet/modeling_parakeet.py +3 -3
- transformers/models/parakeet/modular_parakeet.py +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +3 -3
- transformers/models/patchtst/modeling_patchtst.py +3 -3
- transformers/models/pe_audio/modeling_pe_audio.py +4 -4
- transformers/models/pe_audio/modular_pe_audio.py +1 -1
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +4 -4
- transformers/models/pe_audio_video/modular_pe_audio_video.py +4 -4
- transformers/models/pe_video/modeling_pe_video.py +36 -24
- transformers/models/pe_video/modular_pe_video.py +36 -23
- transformers/models/pegasus/configuration_pegasus.py +8 -5
- transformers/models/pegasus/modeling_pegasus.py +4 -4
- transformers/models/pegasus_x/configuration_pegasus_x.py +5 -3
- transformers/models/pegasus_x/modeling_pegasus_x.py +3 -3
- transformers/models/perceiver/image_processing_perceiver_fast.py +2 -2
- transformers/models/perceiver/modeling_perceiver.py +17 -9
- transformers/models/perception_lm/modeling_perception_lm.py +26 -27
- transformers/models/perception_lm/modular_perception_lm.py +27 -25
- transformers/models/persimmon/configuration_persimmon.py +5 -7
- transformers/models/persimmon/modeling_persimmon.py +5 -5
- transformers/models/phi/configuration_phi.py +8 -6
- transformers/models/phi/modeling_phi.py +4 -4
- transformers/models/phi/modular_phi.py +3 -3
- transformers/models/phi3/configuration_phi3.py +9 -11
- transformers/models/phi3/modeling_phi3.py +4 -4
- transformers/models/phi3/modular_phi3.py +3 -3
- transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +11 -13
- transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +4 -4
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +46 -61
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +44 -30
- transformers/models/phimoe/configuration_phimoe.py +5 -7
- transformers/models/phimoe/modeling_phimoe.py +15 -39
- transformers/models/phimoe/modular_phimoe.py +12 -7
- transformers/models/pix2struct/configuration_pix2struct.py +12 -9
- transformers/models/pix2struct/image_processing_pix2struct_fast.py +5 -5
- transformers/models/pix2struct/modeling_pix2struct.py +14 -7
- transformers/models/pixio/configuration_pixio.py +2 -4
- transformers/models/pixio/modeling_pixio.py +9 -8
- transformers/models/pixio/modular_pixio.py +4 -2
- transformers/models/pixtral/image_processing_pixtral_fast.py +5 -5
- transformers/models/pixtral/modeling_pixtral.py +9 -12
- transformers/models/plbart/configuration_plbart.py +8 -5
- transformers/models/plbart/modeling_plbart.py +9 -7
- transformers/models/plbart/modular_plbart.py +1 -1
- transformers/models/poolformer/image_processing_poolformer_fast.py +7 -7
- transformers/models/pop2piano/configuration_pop2piano.py +7 -6
- transformers/models/pop2piano/modeling_pop2piano.py +2 -1
- transformers/models/pp_doclayout_v3/__init__.py +30 -0
- transformers/models/pp_doclayout_v3/configuration_pp_doclayout_v3.py +277 -0
- transformers/models/pp_doclayout_v3/image_processing_pp_doclayout_v3_fast.py +305 -0
- transformers/models/pp_doclayout_v3/modeling_pp_doclayout_v3.py +2083 -0
- transformers/models/pp_doclayout_v3/modular_pp_doclayout_v3.py +1549 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +12 -46
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +6 -6
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +8 -6
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +12 -10
- transformers/models/prophetnet/configuration_prophetnet.py +11 -10
- transformers/models/prophetnet/modeling_prophetnet.py +12 -23
- transformers/models/pvt/image_processing_pvt.py +7 -7
- transformers/models/pvt/image_processing_pvt_fast.py +1 -1
- transformers/models/pvt_v2/configuration_pvt_v2.py +2 -4
- transformers/models/pvt_v2/modeling_pvt_v2.py +6 -5
- transformers/models/qwen2/configuration_qwen2.py +14 -4
- transformers/models/qwen2/modeling_qwen2.py +4 -4
- transformers/models/qwen2/modular_qwen2.py +3 -3
- transformers/models/qwen2/tokenization_qwen2.py +0 -4
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +17 -5
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +108 -88
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +115 -87
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +7 -10
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +98 -53
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +18 -6
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +12 -12
- transformers/models/qwen2_moe/configuration_qwen2_moe.py +14 -4
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
- transformers/models/qwen2_moe/modular_qwen2_moe.py +3 -3
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +7 -10
- transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +4 -6
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +97 -53
- transformers/models/qwen2_vl/video_processing_qwen2_vl.py +4 -6
- transformers/models/qwen3/configuration_qwen3.py +15 -5
- transformers/models/qwen3/modeling_qwen3.py +4 -4
- transformers/models/qwen3/modular_qwen3.py +3 -3
- transformers/models/qwen3_moe/configuration_qwen3_moe.py +20 -7
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
- transformers/models/qwen3_next/configuration_qwen3_next.py +16 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +5 -5
- transformers/models/qwen3_next/modular_qwen3_next.py +4 -4
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +55 -19
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +161 -98
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +107 -34
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +7 -6
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +115 -49
- transformers/models/qwen3_vl/modular_qwen3_vl.py +88 -37
- transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +7 -6
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +173 -99
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +23 -7
- transformers/models/rag/configuration_rag.py +6 -6
- transformers/models/rag/modeling_rag.py +3 -3
- transformers/models/rag/retrieval_rag.py +1 -1
- transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +8 -6
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +4 -5
- transformers/models/reformer/configuration_reformer.py +7 -7
- transformers/models/rembert/configuration_rembert.py +8 -1
- transformers/models/rembert/modeling_rembert.py +0 -22
- transformers/models/resnet/configuration_resnet.py +2 -4
- transformers/models/resnet/modeling_resnet.py +6 -5
- transformers/models/roberta/configuration_roberta.py +11 -2
- transformers/models/roberta/modeling_roberta.py +6 -6
- transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +11 -2
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +6 -6
- transformers/models/roc_bert/configuration_roc_bert.py +8 -1
- transformers/models/roc_bert/modeling_roc_bert.py +6 -41
- transformers/models/roformer/configuration_roformer.py +13 -2
- transformers/models/roformer/modeling_roformer.py +0 -14
- transformers/models/rt_detr/configuration_rt_detr.py +8 -49
- transformers/models/rt_detr/configuration_rt_detr_resnet.py +2 -4
- transformers/models/rt_detr/image_processing_rt_detr_fast.py +24 -11
- transformers/models/rt_detr/modeling_rt_detr.py +578 -737
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +2 -3
- transformers/models/rt_detr/modular_rt_detr.py +1508 -6
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +12 -57
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +318 -453
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +25 -66
- transformers/models/rwkv/configuration_rwkv.py +2 -3
- transformers/models/rwkv/modeling_rwkv.py +0 -23
- transformers/models/sam/configuration_sam.py +2 -0
- transformers/models/sam/image_processing_sam_fast.py +4 -4
- transformers/models/sam/modeling_sam.py +13 -8
- transformers/models/sam/processing_sam.py +3 -3
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +56 -52
- transformers/models/sam2/modular_sam2.py +47 -55
- transformers/models/sam2_video/modeling_sam2_video.py +50 -51
- transformers/models/sam2_video/modular_sam2_video.py +12 -10
- transformers/models/sam3/modeling_sam3.py +43 -47
- transformers/models/sam3/processing_sam3.py +8 -4
- transformers/models/sam3_tracker/configuration_sam3_tracker.py +1 -2
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +50 -49
- transformers/models/sam3_tracker/modular_sam3_tracker.py +0 -1
- transformers/models/sam3_tracker/processing_sam3_tracker.py +0 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +50 -49
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +10 -22
- transformers/models/sam3_video/modeling_sam3_video.py +27 -14
- transformers/models/sam_hq/configuration_sam_hq.py +2 -0
- transformers/models/sam_hq/modeling_sam_hq.py +13 -9
- transformers/models/sam_hq/modular_sam_hq.py +6 -6
- transformers/models/sam_hq/processing_sam_hq.py +7 -6
- transformers/models/seamless_m4t/configuration_seamless_m4t.py +8 -9
- transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +8 -9
- transformers/models/seed_oss/configuration_seed_oss.py +7 -9
- transformers/models/seed_oss/modeling_seed_oss.py +4 -4
- transformers/models/seed_oss/modular_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +4 -4
- transformers/models/segformer/modeling_segformer.py +4 -2
- transformers/models/segformer/modular_segformer.py +3 -3
- transformers/models/seggpt/modeling_seggpt.py +20 -8
- transformers/models/sew/configuration_sew.py +4 -1
- transformers/models/sew/modeling_sew.py +9 -5
- transformers/models/sew/modular_sew.py +2 -1
- transformers/models/sew_d/configuration_sew_d.py +4 -1
- transformers/models/sew_d/modeling_sew_d.py +4 -1
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +4 -4
- transformers/models/siglip/configuration_siglip.py +4 -1
- transformers/models/siglip/modeling_siglip.py +27 -71
- transformers/models/siglip2/__init__.py +1 -0
- transformers/models/siglip2/configuration_siglip2.py +4 -2
- transformers/models/siglip2/image_processing_siglip2_fast.py +2 -2
- transformers/models/siglip2/modeling_siglip2.py +37 -78
- transformers/models/siglip2/modular_siglip2.py +74 -25
- transformers/models/siglip2/tokenization_siglip2.py +95 -0
- transformers/models/smollm3/configuration_smollm3.py +6 -6
- transformers/models/smollm3/modeling_smollm3.py +4 -4
- transformers/models/smollm3/modular_smollm3.py +9 -9
- transformers/models/smolvlm/configuration_smolvlm.py +1 -3
- transformers/models/smolvlm/image_processing_smolvlm_fast.py +29 -3
- transformers/models/smolvlm/modeling_smolvlm.py +75 -46
- transformers/models/smolvlm/modular_smolvlm.py +36 -23
- transformers/models/smolvlm/video_processing_smolvlm.py +9 -9
- transformers/models/solar_open/__init__.py +27 -0
- transformers/models/solar_open/configuration_solar_open.py +184 -0
- transformers/models/solar_open/modeling_solar_open.py +642 -0
- transformers/models/solar_open/modular_solar_open.py +224 -0
- transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +6 -4
- transformers/models/speech_to_text/configuration_speech_to_text.py +9 -8
- transformers/models/speech_to_text/modeling_speech_to_text.py +3 -3
- transformers/models/speecht5/configuration_speecht5.py +7 -8
- transformers/models/splinter/configuration_splinter.py +6 -6
- transformers/models/splinter/modeling_splinter.py +8 -3
- transformers/models/squeezebert/configuration_squeezebert.py +14 -1
- transformers/models/stablelm/configuration_stablelm.py +8 -6
- transformers/models/stablelm/modeling_stablelm.py +5 -5
- transformers/models/starcoder2/configuration_starcoder2.py +11 -5
- transformers/models/starcoder2/modeling_starcoder2.py +5 -5
- transformers/models/starcoder2/modular_starcoder2.py +4 -4
- transformers/models/superglue/configuration_superglue.py +4 -0
- transformers/models/superglue/image_processing_superglue_fast.py +4 -3
- transformers/models/superglue/modeling_superglue.py +9 -4
- transformers/models/superpoint/image_processing_superpoint_fast.py +3 -4
- transformers/models/superpoint/modeling_superpoint.py +4 -2
- transformers/models/swin/configuration_swin.py +2 -4
- transformers/models/swin/modeling_swin.py +11 -8
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +2 -2
- transformers/models/swin2sr/modeling_swin2sr.py +4 -2
- transformers/models/swinv2/configuration_swinv2.py +2 -4
- transformers/models/swinv2/modeling_swinv2.py +10 -7
- transformers/models/switch_transformers/configuration_switch_transformers.py +11 -6
- transformers/models/switch_transformers/modeling_switch_transformers.py +3 -3
- transformers/models/switch_transformers/modular_switch_transformers.py +3 -3
- transformers/models/t5/configuration_t5.py +9 -8
- transformers/models/t5/modeling_t5.py +5 -8
- transformers/models/t5gemma/configuration_t5gemma.py +10 -25
- transformers/models/t5gemma/modeling_t5gemma.py +9 -9
- transformers/models/t5gemma/modular_t5gemma.py +11 -24
- transformers/models/t5gemma2/configuration_t5gemma2.py +35 -48
- transformers/models/t5gemma2/modeling_t5gemma2.py +143 -100
- transformers/models/t5gemma2/modular_t5gemma2.py +152 -136
- transformers/models/table_transformer/configuration_table_transformer.py +18 -49
- transformers/models/table_transformer/modeling_table_transformer.py +27 -53
- transformers/models/tapas/configuration_tapas.py +12 -1
- transformers/models/tapas/modeling_tapas.py +1 -1
- transformers/models/tapas/tokenization_tapas.py +1 -0
- transformers/models/textnet/configuration_textnet.py +4 -6
- transformers/models/textnet/image_processing_textnet_fast.py +3 -3
- transformers/models/textnet/modeling_textnet.py +15 -14
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +3 -3
- transformers/models/timesfm/modeling_timesfm.py +5 -6
- transformers/models/timesfm/modular_timesfm.py +5 -6
- transformers/models/timm_backbone/configuration_timm_backbone.py +33 -7
- transformers/models/timm_backbone/modeling_timm_backbone.py +21 -24
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +9 -4
- transformers/models/trocr/configuration_trocr.py +11 -7
- transformers/models/trocr/modeling_trocr.py +4 -2
- transformers/models/tvp/configuration_tvp.py +10 -35
- transformers/models/tvp/image_processing_tvp_fast.py +6 -5
- transformers/models/tvp/modeling_tvp.py +1 -1
- transformers/models/udop/configuration_udop.py +16 -7
- transformers/models/udop/modeling_udop.py +10 -6
- transformers/models/umt5/configuration_umt5.py +8 -6
- transformers/models/umt5/modeling_umt5.py +7 -3
- transformers/models/unispeech/configuration_unispeech.py +4 -1
- transformers/models/unispeech/modeling_unispeech.py +7 -4
- transformers/models/unispeech_sat/configuration_unispeech_sat.py +4 -1
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +7 -4
- transformers/models/upernet/configuration_upernet.py +8 -35
- transformers/models/upernet/modeling_upernet.py +1 -1
- transformers/models/vaultgemma/configuration_vaultgemma.py +5 -7
- transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
- transformers/models/video_llama_3/configuration_video_llama_3.py +4 -0
- transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +4 -6
- transformers/models/video_llama_3/modeling_video_llama_3.py +85 -48
- transformers/models/video_llama_3/modular_video_llama_3.py +56 -43
- transformers/models/video_llama_3/video_processing_video_llama_3.py +29 -8
- transformers/models/video_llava/configuration_video_llava.py +4 -0
- transformers/models/video_llava/modeling_video_llava.py +87 -89
- transformers/models/videomae/modeling_videomae.py +4 -5
- transformers/models/vilt/configuration_vilt.py +4 -1
- transformers/models/vilt/image_processing_vilt_fast.py +6 -6
- transformers/models/vilt/modeling_vilt.py +27 -12
- transformers/models/vipllava/configuration_vipllava.py +4 -0
- transformers/models/vipllava/modeling_vipllava.py +57 -31
- transformers/models/vipllava/modular_vipllava.py +50 -24
- transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +10 -6
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +27 -20
- transformers/models/visual_bert/configuration_visual_bert.py +6 -1
- transformers/models/vit/configuration_vit.py +2 -2
- transformers/models/vit/modeling_vit.py +7 -5
- transformers/models/vit_mae/modeling_vit_mae.py +11 -7
- transformers/models/vit_msn/modeling_vit_msn.py +11 -7
- transformers/models/vitdet/configuration_vitdet.py +2 -4
- transformers/models/vitdet/modeling_vitdet.py +2 -3
- transformers/models/vitmatte/configuration_vitmatte.py +6 -35
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +2 -2
- transformers/models/vitmatte/modeling_vitmatte.py +1 -1
- transformers/models/vitpose/configuration_vitpose.py +6 -43
- transformers/models/vitpose/modeling_vitpose.py +5 -3
- transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +2 -4
- transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +5 -6
- transformers/models/vits/configuration_vits.py +4 -0
- transformers/models/vits/modeling_vits.py +9 -7
- transformers/models/vivit/modeling_vivit.py +4 -4
- transformers/models/vjepa2/modeling_vjepa2.py +9 -9
- transformers/models/voxtral/configuration_voxtral.py +0 -1
- transformers/models/voxtral/modeling_voxtral.py +25 -24
- transformers/models/voxtral/modular_voxtral.py +26 -20
- transformers/models/wav2vec2/configuration_wav2vec2.py +4 -1
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -4
- transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +4 -1
- transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +4 -1
- transformers/models/wavlm/configuration_wavlm.py +4 -1
- transformers/models/wavlm/modeling_wavlm.py +4 -1
- transformers/models/whisper/configuration_whisper.py +6 -4
- transformers/models/whisper/generation_whisper.py +0 -1
- transformers/models/whisper/modeling_whisper.py +3 -3
- transformers/models/x_clip/configuration_x_clip.py +4 -1
- transformers/models/x_clip/modeling_x_clip.py +26 -27
- transformers/models/xglm/configuration_xglm.py +9 -7
- transformers/models/xlm/configuration_xlm.py +10 -7
- transformers/models/xlm/modeling_xlm.py +1 -1
- transformers/models/xlm_roberta/configuration_xlm_roberta.py +11 -2
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +6 -6
- transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +10 -1
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +6 -6
- transformers/models/xlnet/configuration_xlnet.py +3 -1
- transformers/models/xlstm/configuration_xlstm.py +5 -7
- transformers/models/xlstm/modeling_xlstm.py +0 -32
- transformers/models/xmod/configuration_xmod.py +11 -2
- transformers/models/xmod/modeling_xmod.py +13 -16
- transformers/models/yolos/image_processing_yolos_fast.py +25 -28
- transformers/models/yolos/modeling_yolos.py +7 -7
- transformers/models/yolos/modular_yolos.py +16 -16
- transformers/models/yoso/configuration_yoso.py +8 -1
- transformers/models/youtu/__init__.py +27 -0
- transformers/models/youtu/configuration_youtu.py +194 -0
- transformers/models/youtu/modeling_youtu.py +619 -0
- transformers/models/youtu/modular_youtu.py +254 -0
- transformers/models/zamba/configuration_zamba.py +5 -7
- transformers/models/zamba/modeling_zamba.py +25 -56
- transformers/models/zamba2/configuration_zamba2.py +8 -13
- transformers/models/zamba2/modeling_zamba2.py +53 -78
- transformers/models/zamba2/modular_zamba2.py +36 -29
- transformers/models/zoedepth/configuration_zoedepth.py +17 -40
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +9 -9
- transformers/models/zoedepth/modeling_zoedepth.py +5 -3
- transformers/pipelines/__init__.py +1 -61
- transformers/pipelines/any_to_any.py +1 -1
- transformers/pipelines/automatic_speech_recognition.py +0 -2
- transformers/pipelines/base.py +1 -1
- transformers/pipelines/image_text_to_text.py +1 -1
- transformers/pipelines/text_to_audio.py +5 -1
- transformers/processing_utils.py +35 -44
- transformers/pytorch_utils.py +2 -26
- transformers/quantizers/quantizer_compressed_tensors.py +7 -5
- transformers/quantizers/quantizer_fbgemm_fp8.py +20 -23
- transformers/quantizers/quantizer_finegrained_fp8.py +14 -20
- transformers/quantizers/quantizer_mxfp4.py +1 -1
- transformers/quantizers/quantizer_torchao.py +0 -16
- transformers/safetensors_conversion.py +11 -4
- transformers/testing_utils.py +3 -28
- transformers/tokenization_mistral_common.py +9 -0
- transformers/tokenization_python.py +6 -4
- transformers/tokenization_utils_base.py +119 -219
- transformers/tokenization_utils_tokenizers.py +31 -2
- transformers/trainer.py +25 -33
- transformers/trainer_seq2seq.py +1 -1
- transformers/training_args.py +411 -417
- transformers/utils/__init__.py +1 -4
- transformers/utils/auto_docstring.py +15 -18
- transformers/utils/backbone_utils.py +13 -373
- transformers/utils/doc.py +4 -36
- transformers/utils/generic.py +69 -33
- transformers/utils/import_utils.py +72 -75
- transformers/utils/loading_report.py +133 -105
- transformers/utils/quantization_config.py +0 -21
- transformers/video_processing_utils.py +5 -5
- transformers/video_utils.py +3 -1
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/METADATA +118 -237
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/RECORD +1019 -994
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/WHEEL +1 -1
- transformers/pipelines/deprecated/text2text_generation.py +0 -408
- transformers/pipelines/image_to_text.py +0 -189
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/licenses/LICENSE +0 -0
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/top_level.txt +0 -0
|
@@ -28,7 +28,8 @@ def spawn_conversion(token: str, private: bool, model_id: str):
|
|
|
28
28
|
|
|
29
29
|
def start(_sse_connection):
|
|
30
30
|
for line in _sse_connection.iter_lines():
|
|
31
|
-
|
|
31
|
+
if not isinstance(line, str):
|
|
32
|
+
line = line.decode()
|
|
32
33
|
if line.startswith("event:"):
|
|
33
34
|
status = line[7:]
|
|
34
35
|
logger.debug(f"Safetensors conversion status: {status}")
|
|
@@ -83,7 +84,13 @@ def get_conversion_pr_reference(api: HfApi, model_id: str, **kwargs):
|
|
|
83
84
|
return sha
|
|
84
85
|
|
|
85
86
|
|
|
86
|
-
def auto_conversion(
|
|
87
|
+
def auto_conversion(
|
|
88
|
+
pretrained_model_name_or_path: str,
|
|
89
|
+
ignore_errors_during_conversion: bool = False,
|
|
90
|
+
safe_weights_name: str = "model.safetensors",
|
|
91
|
+
safe_weights_index_name: str = "model.safetensors.index.json",
|
|
92
|
+
**cached_file_kwargs,
|
|
93
|
+
):
|
|
87
94
|
try:
|
|
88
95
|
api = HfApi(token=cached_file_kwargs.get("token"), headers={"user-agent": http_user_agent()})
|
|
89
96
|
sha = get_conversion_pr_reference(api, pretrained_model_name_or_path, **cached_file_kwargs)
|
|
@@ -97,11 +104,11 @@ def auto_conversion(pretrained_model_name_or_path: str, ignore_errors_during_con
|
|
|
97
104
|
# description.
|
|
98
105
|
sharded = api.file_exists(
|
|
99
106
|
pretrained_model_name_or_path,
|
|
100
|
-
|
|
107
|
+
safe_weights_index_name,
|
|
101
108
|
revision=sha,
|
|
102
109
|
token=cached_file_kwargs.get("token"),
|
|
103
110
|
)
|
|
104
|
-
filename =
|
|
111
|
+
filename = safe_weights_index_name if sharded else safe_weights_name
|
|
105
112
|
|
|
106
113
|
resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
|
|
107
114
|
return resolved_archive_file, sha, sharded
|
transformers/testing_utils.py
CHANGED
|
@@ -48,7 +48,6 @@ from unittest import mock
|
|
|
48
48
|
from unittest.mock import patch
|
|
49
49
|
|
|
50
50
|
import httpx
|
|
51
|
-
import urllib3
|
|
52
51
|
from huggingface_hub import create_repo, delete_repo
|
|
53
52
|
from packaging import version
|
|
54
53
|
|
|
@@ -97,7 +96,6 @@ from .utils import (
|
|
|
97
96
|
is_flute_available,
|
|
98
97
|
is_fp_quant_available,
|
|
99
98
|
is_fsdp_available,
|
|
100
|
-
is_ftfy_available,
|
|
101
99
|
is_g2p_en_available,
|
|
102
100
|
is_galore_torch_available,
|
|
103
101
|
is_gguf_available,
|
|
@@ -106,7 +104,6 @@ from .utils import (
|
|
|
106
104
|
is_hadamard_available,
|
|
107
105
|
is_hqq_available,
|
|
108
106
|
is_huggingface_hub_greater_or_equal,
|
|
109
|
-
is_ipex_available,
|
|
110
107
|
is_jinja_available,
|
|
111
108
|
is_jmespath_available,
|
|
112
109
|
is_jumanpp_available,
|
|
@@ -678,21 +675,6 @@ def require_torchcodec(test_case):
|
|
|
678
675
|
return unittest.skipUnless(is_torchcodec_available(), "test requires Torchcodec")(test_case)
|
|
679
676
|
|
|
680
677
|
|
|
681
|
-
def require_intel_extension_for_pytorch(test_case):
|
|
682
|
-
"""
|
|
683
|
-
Decorator marking a test that requires Intel Extension for PyTorch.
|
|
684
|
-
|
|
685
|
-
These tests are skipped when Intel Extension for PyTorch isn't installed or it does not match current PyTorch
|
|
686
|
-
version.
|
|
687
|
-
|
|
688
|
-
"""
|
|
689
|
-
return unittest.skipUnless(
|
|
690
|
-
is_ipex_available(),
|
|
691
|
-
"test requires Intel Extension for PyTorch to be installed and match current PyTorch version, see"
|
|
692
|
-
" https://github.com/intel/intel-extension-for-pytorch",
|
|
693
|
-
)(test_case)
|
|
694
|
-
|
|
695
|
-
|
|
696
678
|
def require_torchaudio(test_case):
|
|
697
679
|
"""
|
|
698
680
|
Decorator marking a test that requires torchaudio. These tests are skipped when torchaudio isn't installed.
|
|
@@ -767,13 +749,6 @@ def require_vision(test_case):
|
|
|
767
749
|
return unittest.skipUnless(is_vision_available(), "test requires vision")(test_case)
|
|
768
750
|
|
|
769
751
|
|
|
770
|
-
def require_ftfy(test_case):
|
|
771
|
-
"""
|
|
772
|
-
Decorator marking a test that requires ftfy. These tests are skipped when ftfy isn't installed.
|
|
773
|
-
"""
|
|
774
|
-
return unittest.skipUnless(is_ftfy_available(), "test requires ftfy")(test_case)
|
|
775
|
-
|
|
776
|
-
|
|
777
752
|
def require_spacy(test_case):
|
|
778
753
|
"""
|
|
779
754
|
Decorator marking a test that requires SpaCy. These tests are skipped when SpaCy isn't installed.
|
|
@@ -903,9 +878,7 @@ def require_torch_xpu(test_case):
|
|
|
903
878
|
"""
|
|
904
879
|
Decorator marking a test that requires XPU (in PyTorch).
|
|
905
880
|
|
|
906
|
-
These tests are skipped when XPU backend is not available.
|
|
907
|
-
PyTorch (>=2.4) or via Intel Extension for PyTorch. In the latter case, if IPEX is installed, its version
|
|
908
|
-
must match match current PyTorch version.
|
|
881
|
+
These tests are skipped when XPU backend is not available.
|
|
909
882
|
"""
|
|
910
883
|
return unittest.skipUnless(is_torch_xpu_available(), "test requires XPU device")(test_case)
|
|
911
884
|
|
|
@@ -2515,6 +2488,8 @@ class RequestCounter:
|
|
|
2515
2488
|
|
|
2516
2489
|
return wrap
|
|
2517
2490
|
|
|
2491
|
+
import urllib3
|
|
2492
|
+
|
|
2518
2493
|
self.patcher = patch.object(
|
|
2519
2494
|
urllib3.connectionpool.log, "debug", side_effect=patched_with_thread_info(urllib3.connectionpool.log.debug)
|
|
2520
2495
|
)
|
|
@@ -268,6 +268,15 @@ class MistralCommonBackend(PreTrainedTokenizerBase):
|
|
|
268
268
|
if kwargs and not set(kwargs.keys()).issubset(_VALID_INIT_KWARGS):
|
|
269
269
|
raise ValueError(f"Kwargs {list(kwargs.keys())} are not supported to init `MistralCommonBackend`.")
|
|
270
270
|
|
|
271
|
+
self.init_kwargs = {
|
|
272
|
+
"tokenizer_path": tokenizer_path,
|
|
273
|
+
"mode": mode,
|
|
274
|
+
"model_max_length": model_max_length,
|
|
275
|
+
"padding_side": padding_side,
|
|
276
|
+
"truncation_side": truncation_side,
|
|
277
|
+
"model_input_names": model_input_names,
|
|
278
|
+
"clean_up_tokenization_spaces": clean_up_tokenization_spaces,
|
|
279
|
+
}
|
|
271
280
|
self._tokenizer_path = Path(tokenizer_path)
|
|
272
281
|
self._mode = self._get_validation_mode(mode)
|
|
273
282
|
|
|
@@ -415,6 +415,9 @@ class PythonBackend(PreTrainedTokenizerBase):
|
|
|
415
415
|
|
|
416
416
|
self.tokens_trie = Trie()
|
|
417
417
|
|
|
418
|
+
# Initialize total_vocab_size early to avoid issues if get_vocab() is called early (custom tokenizers)
|
|
419
|
+
self.total_vocab_size = 0
|
|
420
|
+
|
|
418
421
|
# 2. init `_added_tokens_decoder` if child class did not
|
|
419
422
|
if not hasattr(self, "_added_tokens_decoder"):
|
|
420
423
|
self._added_tokens_decoder: dict[int, AddedToken] = {}
|
|
@@ -439,9 +442,6 @@ class PythonBackend(PreTrainedTokenizerBase):
|
|
|
439
442
|
# 7. init the parent class
|
|
440
443
|
super().__init__(**kwargs)
|
|
441
444
|
|
|
442
|
-
if self._added_tokens_decoder:
|
|
443
|
-
self._update_total_vocab_size()
|
|
444
|
-
|
|
445
445
|
# 4. If some of the special tokens are not part of the vocab, we add them, at the end.
|
|
446
446
|
# V5: the order of addition follows self.SPECIAL_TOKENS_ATTRIBUTES, then extra special tokens
|
|
447
447
|
# Note: _add_tokens will automatically skip tokens that are already in the base vocab
|
|
@@ -449,7 +449,6 @@ class PythonBackend(PreTrainedTokenizerBase):
|
|
|
449
449
|
[token for token in self.all_special_tokens if token not in self._added_tokens_encoder],
|
|
450
450
|
special_tokens=True,
|
|
451
451
|
)
|
|
452
|
-
self._update_total_vocab_size()
|
|
453
452
|
|
|
454
453
|
@property
|
|
455
454
|
def is_fast(self) -> bool:
|
|
@@ -501,6 +500,9 @@ class PythonBackend(PreTrainedTokenizerBase):
|
|
|
501
500
|
"""
|
|
502
501
|
Size of the full vocabulary with the added tokens.
|
|
503
502
|
"""
|
|
503
|
+
# Lazy evaluation: compute if not already set (e.g., during initialization)
|
|
504
|
+
if self.total_vocab_size == 0:
|
|
505
|
+
self._update_total_vocab_size()
|
|
504
506
|
return self.total_vocab_size
|
|
505
507
|
|
|
506
508
|
def _update_total_vocab_size(self):
|
|
@@ -990,14 +990,13 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
990
990
|
if hasattr(self, key) and callable(getattr(self, key)):
|
|
991
991
|
raise AttributeError(f"{key} conflicts with the method {key} in {self.__class__.__name__}")
|
|
992
992
|
|
|
993
|
+
# V5: Convert deprecated additional_special_tokens to extra_special_tokens before storing init_kwargs
|
|
994
|
+
if "additional_special_tokens" in kwargs and "extra_special_tokens" not in kwargs:
|
|
995
|
+
kwargs["extra_special_tokens"] = kwargs.pop("additional_special_tokens")
|
|
996
|
+
|
|
993
997
|
self.init_kwargs = copy.deepcopy(kwargs)
|
|
994
998
|
self.name_or_path = kwargs.pop("name_or_path", "")
|
|
995
999
|
self._processor_class = kwargs.pop("processor_class", None)
|
|
996
|
-
# Store additional_special_tokens in init_kwargs before conversion for backward compatibility
|
|
997
|
-
additional_special_tokens_value = kwargs.pop("additional_special_tokens", None)
|
|
998
|
-
if "additional_special_tokens" not in self.init_kwargs:
|
|
999
|
-
self.init_kwargs["additional_special_tokens"] = additional_special_tokens_value
|
|
1000
|
-
kwargs.setdefault("extra_special_tokens", additional_special_tokens_value)
|
|
1001
1000
|
|
|
1002
1001
|
self._pad_token_type_id = 0
|
|
1003
1002
|
self.verbose = kwargs.pop("verbose", False)
|
|
@@ -1025,21 +1024,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1025
1024
|
else:
|
|
1026
1025
|
raise TypeError(f"Special token {key} has to be either str or AddedToken but got: {type(value)}")
|
|
1027
1026
|
elif key == "extra_special_tokens":
|
|
1028
|
-
# V5: Support extra_special_tokens in __init__
|
|
1029
1027
|
value = kwargs.pop(key)
|
|
1030
1028
|
if value is None:
|
|
1031
1029
|
continue
|
|
1032
|
-
# If dict: treat as model specific named special tokens (attributes)
|
|
1033
1030
|
if isinstance(value, dict):
|
|
1034
1031
|
self._set_model_specific_special_tokens(special_tokens=value)
|
|
1035
|
-
|
|
1036
|
-
if not isinstance(value, (list, tuple)) or not all(
|
|
1037
|
-
isinstance(t, (str, AddedToken)) for t in value
|
|
1038
|
-
):
|
|
1039
|
-
raise TypeError(
|
|
1040
|
-
"extra_special_tokens must be a list/tuple of str or AddedToken, or a dict mapping names to tokens"
|
|
1041
|
-
)
|
|
1032
|
+
elif isinstance(value, (list, tuple)):
|
|
1042
1033
|
self._extra_special_tokens = list(value)
|
|
1034
|
+
else:
|
|
1035
|
+
raise TypeError("extra_special_tokens must be a list/tuple of tokens or a dict of named tokens")
|
|
1043
1036
|
elif (
|
|
1044
1037
|
key.endswith("_token")
|
|
1045
1038
|
and key not in self.SPECIAL_TOKENS_ATTRIBUTES
|
|
@@ -1163,8 +1156,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1163
1156
|
# V5: Allowed keys are SPECIAL_TOKENS_ATTRIBUTES + "extra_special_tokens"
|
|
1164
1157
|
# Backward compatibility: convert "additional_special_tokens" to "extra_special_tokens"
|
|
1165
1158
|
special_tokens_dict = dict(special_tokens_dict)
|
|
1166
|
-
if "additional_special_tokens" in special_tokens_dict
|
|
1167
|
-
special_tokens_dict
|
|
1159
|
+
if "additional_special_tokens" in special_tokens_dict:
|
|
1160
|
+
special_tokens_dict.setdefault(
|
|
1161
|
+
"extra_special_tokens", special_tokens_dict.pop("additional_special_tokens")
|
|
1162
|
+
)
|
|
1168
1163
|
|
|
1169
1164
|
allowed_keys = set(self.SPECIAL_TOKENS_ATTRIBUTES) | {"extra_special_tokens"}
|
|
1170
1165
|
tokens_to_add = []
|
|
@@ -1251,81 +1246,50 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1251
1246
|
return self._pad_token_type_id
|
|
1252
1247
|
|
|
1253
1248
|
def __setattr__(self, key, value):
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1249
|
+
# Handle _id/_ids suffix (eg. bos_token_id -> bos_token)
|
|
1250
|
+
key_without_id = key.removesuffix("_ids").removesuffix("_id") if key.endswith(("_id", "_ids")) else key
|
|
1251
|
+
|
|
1252
|
+
# Named special tokens (bos_token, eos_token, etc.)
|
|
1253
|
+
if key_without_id in self.SPECIAL_TOKENS_ATTRIBUTES:
|
|
1254
|
+
if key != key_without_id and value is not None:
|
|
1255
|
+
value = self.convert_ids_to_tokens(value)
|
|
1256
|
+
if value is not None and not isinstance(value, (str, AddedToken)):
|
|
1257
|
+
raise ValueError(f"Cannot set a non-string value as the {key_without_id}")
|
|
1258
|
+
self._special_tokens_map[key_without_id] = value
|
|
1259
|
+
return
|
|
1258
1260
|
|
|
1259
|
-
#
|
|
1260
|
-
if
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
key = key_without_id
|
|
1268
|
-
|
|
1269
|
-
if not isinstance(value, (str, AddedToken)) and value is not None:
|
|
1270
|
-
raise ValueError(f"Cannot set a non-string value as the {key}")
|
|
1271
|
-
self._special_tokens_map[key] = value
|
|
1272
|
-
# Check if this is extra_special_tokens or extra_special_tokens_ids
|
|
1273
|
-
elif self.__dict__.get("_extra_special_tokens", None) is not None and key_without_id == "extra_special_tokens":
|
|
1274
|
-
if key_is_special_id:
|
|
1275
|
-
if value is not None:
|
|
1276
|
-
value = [self.convert_ids_to_tokens(val) for val in value]
|
|
1277
|
-
key = key_without_id
|
|
1261
|
+
# Extra special tokens: model-specific special tokens without standard names (eg. <mask_1>)
|
|
1262
|
+
if key_without_id == "extra_special_tokens":
|
|
1263
|
+
if key != key_without_id and value is not None and isinstance(value, (list, tuple)):
|
|
1264
|
+
value = [self.convert_ids_to_tokens(v) for v in value]
|
|
1265
|
+
if not isinstance(value, (list, tuple)) and value is not None:
|
|
1266
|
+
raise ValueError(f"extra_special_tokens must be a list or tuple, got {type(value)}")
|
|
1267
|
+
self._extra_special_tokens = [] if value is None else list(value)
|
|
1268
|
+
return
|
|
1278
1269
|
|
|
1279
|
-
|
|
1280
|
-
if value is None:
|
|
1281
|
-
self._extra_special_tokens = []
|
|
1282
|
-
elif isinstance(value, dict):
|
|
1283
|
-
# Dict is treated as model-specific special tokens (such as multimodal tokens)
|
|
1284
|
-
self._set_model_specific_special_tokens(special_tokens=value)
|
|
1285
|
-
elif isinstance(value, (list, tuple)):
|
|
1286
|
-
self._extra_special_tokens = list(value)
|
|
1287
|
-
else:
|
|
1288
|
-
raise ValueError(f"extra_special_tokens must be a list, tuple, or dict, got {type(value)}")
|
|
1289
|
-
else:
|
|
1290
|
-
super().__setattr__(key, value)
|
|
1270
|
+
super().__setattr__(key, value)
|
|
1291
1271
|
|
|
1292
1272
|
def __getattr__(self, key):
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
value = _special_tokens_map[key_without_id]
|
|
1310
|
-
return str(value)
|
|
1311
|
-
else:
|
|
1312
|
-
attr_as_tokens = getattr(self, key_without_id)
|
|
1313
|
-
return self.convert_tokens_to_ids(attr_as_tokens) if attr_as_tokens is not None else None
|
|
1314
|
-
|
|
1315
|
-
# Check if this is extra_special_tokens or extra_special_tokens_ids
|
|
1316
|
-
elif key_without_id == "extra_special_tokens":
|
|
1317
|
-
if self.__dict__.get("_extra_special_tokens", None) is not None:
|
|
1318
|
-
if not key_is_special_id:
|
|
1319
|
-
return [str(tok) for tok in self.__dict__["_extra_special_tokens"]]
|
|
1320
|
-
else:
|
|
1321
|
-
# extra_special_tokens_ids
|
|
1322
|
-
tokens = self.__dict__["_extra_special_tokens"]
|
|
1323
|
-
return self.convert_tokens_to_ids([str(tok) for tok in tokens]) if tokens else []
|
|
1273
|
+
# Handle _id/_ids suffix (eg. bos_token_id -> bos_token)
|
|
1274
|
+
key_without_id = key.removesuffix("_ids").removesuffix("_id") if key.endswith(("_id", "_ids")) else key
|
|
1275
|
+
|
|
1276
|
+
# Named special tokens (bos_token, eos_token, etc.)
|
|
1277
|
+
if key_without_id in self.SPECIAL_TOKENS_ATTRIBUTES:
|
|
1278
|
+
token_value = self._special_tokens_map.get(key_without_id)
|
|
1279
|
+
if token_value is None:
|
|
1280
|
+
if self.verbose:
|
|
1281
|
+
logger.error(f"Using {key}, but it is not set yet.")
|
|
1282
|
+
return None
|
|
1283
|
+
return self.convert_tokens_to_ids(str(token_value)) if key != key_without_id else str(token_value)
|
|
1284
|
+
|
|
1285
|
+
# Extra special tokens
|
|
1286
|
+
if key_without_id == "extra_special_tokens":
|
|
1287
|
+
tokens = [str(tok) for tok in self._extra_special_tokens]
|
|
1288
|
+
return self.convert_tokens_to_ids(tokens) if key != key_without_id else tokens
|
|
1324
1289
|
|
|
1325
1290
|
if key not in self.__dict__:
|
|
1326
1291
|
raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
|
|
1327
|
-
|
|
1328
|
-
return super().__getattr__(key)
|
|
1292
|
+
return super().__getattr__(key)
|
|
1329
1293
|
|
|
1330
1294
|
def get_special_tokens_mask(
|
|
1331
1295
|
self, token_ids_0: list[int], token_ids_1: list[int] | None = None, already_has_special_tokens: bool = False
|
|
@@ -1607,6 +1571,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1607
1571
|
|
|
1608
1572
|
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
|
|
1609
1573
|
vocab_files = {}
|
|
1574
|
+
additional_files_names = {}
|
|
1610
1575
|
init_configuration = {}
|
|
1611
1576
|
|
|
1612
1577
|
is_local = os.path.isdir(pretrained_model_name_or_path)
|
|
@@ -1648,29 +1613,26 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1648
1613
|
# Check for versioned tokenizer files
|
|
1649
1614
|
if "tokenizer_file" in vocab_files:
|
|
1650
1615
|
fast_tokenizer_file = FULL_TOKENIZER_FILE
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
|
|
1672
|
-
except Exception:
|
|
1673
|
-
pass
|
|
1616
|
+
resolved_config_file = cached_file(
|
|
1617
|
+
pretrained_model_name_or_path,
|
|
1618
|
+
TOKENIZER_CONFIG_FILE,
|
|
1619
|
+
cache_dir=cache_dir,
|
|
1620
|
+
force_download=force_download,
|
|
1621
|
+
proxies=proxies,
|
|
1622
|
+
token=token,
|
|
1623
|
+
revision=revision,
|
|
1624
|
+
local_files_only=local_files_only,
|
|
1625
|
+
subfolder=subfolder,
|
|
1626
|
+
user_agent=user_agent,
|
|
1627
|
+
_raise_exceptions_for_missing_entries=False,
|
|
1628
|
+
_commit_hash=commit_hash,
|
|
1629
|
+
)
|
|
1630
|
+
if resolved_config_file is not None:
|
|
1631
|
+
with open(resolved_config_file, encoding="utf-8") as reader:
|
|
1632
|
+
tokenizer_config = json.load(reader)
|
|
1633
|
+
if "fast_tokenizer_files" in tokenizer_config:
|
|
1634
|
+
fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
|
|
1635
|
+
commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
|
|
1674
1636
|
vocab_files["tokenizer_file"] = fast_tokenizer_file
|
|
1675
1637
|
|
|
1676
1638
|
# This block looks for any extra chat template files
|
|
@@ -1819,52 +1781,25 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1819
1781
|
if isinstance(init_kwargs["auto_map"], (tuple, list)):
|
|
1820
1782
|
init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]}
|
|
1821
1783
|
|
|
1822
|
-
# Preserve extra_special_tokens from tokenizer_config.json before updating with kwargs
|
|
1823
|
-
# extra_special_tokens should be a list (user-defined extra tokens)
|
|
1824
|
-
extra_special_tokens_from_config = init_kwargs.get("extra_special_tokens")
|
|
1825
|
-
if isinstance(extra_special_tokens_from_config, (list, tuple)):
|
|
1826
|
-
extra_special_tokens_from_config = list(extra_special_tokens_from_config)
|
|
1827
|
-
else:
|
|
1828
|
-
extra_special_tokens_from_config = None
|
|
1829
|
-
|
|
1830
1784
|
# Update with newly provided kwargs
|
|
1831
1785
|
init_kwargs.update(kwargs)
|
|
1832
1786
|
|
|
1833
|
-
# V5:
|
|
1834
|
-
if "additional_special_tokens" in init_kwargs
|
|
1835
|
-
init_kwargs
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
)
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
#
|
|
1845
|
-
if
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
for key in list(init_kwargs.keys())
|
|
1850
|
-
if key not in default_attrs
|
|
1851
|
-
and key.endswith("_token")
|
|
1852
|
-
and isinstance(init_kwargs[key], (str, AddedToken))
|
|
1853
|
-
}
|
|
1854
|
-
if model_specific_tokens:
|
|
1855
|
-
# If extra_special_tokens is already a list, we need to preserve it
|
|
1856
|
-
if "extra_special_tokens" in init_kwargs and isinstance(
|
|
1857
|
-
init_kwargs["extra_special_tokens"], (list, tuple)
|
|
1858
|
-
):
|
|
1859
|
-
# Keep the list as is, but also add model-specific tokens as a separate dict
|
|
1860
|
-
# Convert to model_specific_special_tokens so __init__ handles it
|
|
1861
|
-
init_kwargs["model_specific_special_tokens"] = model_specific_tokens
|
|
1862
|
-
else:
|
|
1863
|
-
init_kwargs["extra_special_tokens"] = model_specific_tokens
|
|
1864
|
-
elif isinstance(init_kwargs.get("extra_special_tokens"), dict):
|
|
1865
|
-
# If extra_special_tokens is already a dict, convert it to model_specific_special_tokens
|
|
1866
|
-
# so __init__ handles it properly
|
|
1867
|
-
init_kwargs["model_specific_special_tokens"] = init_kwargs.pop("extra_special_tokens")
|
|
1787
|
+
# V5: Convert deprecated additional_special_tokens to extra_special_tokens
|
|
1788
|
+
if "additional_special_tokens" in init_kwargs:
|
|
1789
|
+
init_kwargs.setdefault("extra_special_tokens", init_kwargs.pop("additional_special_tokens"))
|
|
1790
|
+
|
|
1791
|
+
# V5: Collect model-specific tokens (custom *_token keys not in standard attributes)
|
|
1792
|
+
default_attrs = set(cls.SPECIAL_TOKENS_ATTRIBUTES)
|
|
1793
|
+
model_specific_tokens = {
|
|
1794
|
+
key: init_kwargs.pop(key)
|
|
1795
|
+
for key in list(init_kwargs.keys())
|
|
1796
|
+
if key not in default_attrs and key.endswith("_token") and isinstance(init_kwargs[key], (str, AddedToken))
|
|
1797
|
+
}
|
|
1798
|
+
# If extra_special_tokens is a dict, merge it into model_specific_tokens
|
|
1799
|
+
if isinstance(init_kwargs.get("extra_special_tokens"), dict):
|
|
1800
|
+
model_specific_tokens.update(init_kwargs.pop("extra_special_tokens"))
|
|
1801
|
+
if model_specific_tokens:
|
|
1802
|
+
init_kwargs["model_specific_special_tokens"] = model_specific_tokens
|
|
1868
1803
|
|
|
1869
1804
|
# Merge resolved_vocab_files arguments in init_kwargs.
|
|
1870
1805
|
added_tokens_file = resolved_vocab_files.pop("added_tokens_file", None)
|
|
@@ -1893,82 +1828,45 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1893
1828
|
f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instance"
|
|
1894
1829
|
)
|
|
1895
1830
|
else:
|
|
1896
|
-
#
|
|
1831
|
+
# Legacy: read special_tokens_map.json and merge into init_kwargs
|
|
1897
1832
|
if special_tokens_map_file is not None:
|
|
1898
|
-
with open(special_tokens_map_file, encoding="utf-8") as
|
|
1899
|
-
special_tokens_map = json.load(
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
# Dict format for model-specific tokens - keep as is
|
|
1920
|
-
init_kwargs[key] = value
|
|
1921
|
-
continue
|
|
1922
|
-
elif isinstance(value, list):
|
|
1923
|
-
# List format - merge with existing if present
|
|
1924
|
-
existing = init_kwargs.pop("extra_special_tokens", []) or []
|
|
1925
|
-
if not isinstance(existing, (list, tuple)):
|
|
1926
|
-
existing = []
|
|
1927
|
-
for token in value:
|
|
1928
|
-
if isinstance(token, dict):
|
|
1929
|
-
token = AddedToken(**token, special=True)
|
|
1930
|
-
if token not in existing:
|
|
1931
|
-
existing.append(token)
|
|
1932
|
-
init_kwargs[key] = existing
|
|
1933
|
-
continue
|
|
1934
|
-
init_kwargs[key] = value
|
|
1935
|
-
|
|
1936
|
-
# Restore extra_special_tokens from tokenizer_config.json if not in special_tokens_map.json
|
|
1937
|
-
if (
|
|
1938
|
-
"extra_special_tokens" not in special_tokens_map
|
|
1939
|
-
and extra_special_tokens_before_map is not None
|
|
1940
|
-
):
|
|
1941
|
-
if "extra_special_tokens" not in init_kwargs or not isinstance(
|
|
1942
|
-
init_kwargs.get("extra_special_tokens"), (list, tuple)
|
|
1943
|
-
):
|
|
1944
|
-
init_kwargs["extra_special_tokens"] = extra_special_tokens_before_map
|
|
1945
|
-
|
|
1946
|
-
# Convert extra_special_tokens dict to model_specific_special_tokens if it's a dict
|
|
1947
|
-
if isinstance(init_kwargs.get("extra_special_tokens"), dict):
|
|
1948
|
-
init_kwargs["model_specific_special_tokens"] = init_kwargs.pop("extra_special_tokens")
|
|
1833
|
+
with open(special_tokens_map_file, encoding="utf-8") as f:
|
|
1834
|
+
special_tokens_map = json.load(f)
|
|
1835
|
+
for key, value in special_tokens_map.items():
|
|
1836
|
+
if key in kwargs and kwargs[key]:
|
|
1837
|
+
continue # User-provided kwargs take precedence
|
|
1838
|
+
if isinstance(value, dict) and key != "extra_special_tokens":
|
|
1839
|
+
value = AddedToken(**value, special=True)
|
|
1840
|
+
elif key == "extra_special_tokens" and isinstance(value, list):
|
|
1841
|
+
# Merge list tokens, converting dicts to AddedToken
|
|
1842
|
+
existing = list(init_kwargs.get("extra_special_tokens") or [])
|
|
1843
|
+
for tok in value:
|
|
1844
|
+
tok = AddedToken(**tok, special=True) if isinstance(tok, dict) else tok
|
|
1845
|
+
if tok not in existing:
|
|
1846
|
+
existing.append(tok)
|
|
1847
|
+
value = existing
|
|
1848
|
+
init_kwargs[key] = value
|
|
1849
|
+
# Convert dict extra_special_tokens to model_specific_special_tokens
|
|
1850
|
+
if isinstance(init_kwargs.get("extra_special_tokens"), dict):
|
|
1851
|
+
init_kwargs.setdefault("model_specific_special_tokens", {}).update(
|
|
1852
|
+
init_kwargs.pop("extra_special_tokens")
|
|
1853
|
+
)
|
|
1949
1854
|
|
|
1950
1855
|
# slow -> slow|fast, legacy: convert the `"added_tokens.json"` file to `added_tokens_decoder`.
|
|
1951
1856
|
# this is for legacy purpose. We don't add the tokens after init for efficiency.
|
|
1952
1857
|
if added_tokens_file is not None:
|
|
1953
|
-
special_tokens = []
|
|
1954
1858
|
# V5: Check both named and extra special tokens
|
|
1955
|
-
for
|
|
1956
|
-
|
|
1957
|
-
special_tokens.append(str(init_kwargs[key]))
|
|
1859
|
+
special_tokens = {str(init_kwargs[k]) for k in cls.SPECIAL_TOKENS_ATTRIBUTES if init_kwargs.get(k)}
|
|
1860
|
+
special_tokens.update(str(t) for t in (init_kwargs.get("extra_special_tokens") or []))
|
|
1958
1861
|
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
special_tokens += [str(token) for token in init_kwargs["extra_special_tokens"]]
|
|
1962
|
-
|
|
1963
|
-
with open(added_tokens_file, encoding="utf-8") as added_tokens_handle:
|
|
1964
|
-
added_tok_encoder = json.load(added_tokens_handle)
|
|
1862
|
+
with open(added_tokens_file, encoding="utf-8") as f:
|
|
1863
|
+
added_tok_encoder = json.load(f)
|
|
1965
1864
|
for str_token, index in added_tok_encoder.items():
|
|
1966
|
-
|
|
1967
|
-
special = str_token in special_tokens
|
|
1865
|
+
is_special = str_token in special_tokens
|
|
1968
1866
|
added_tokens_decoder[index] = AddedToken(
|
|
1969
|
-
str_token, rstrip=False, lstrip=False, normalized=not
|
|
1867
|
+
str_token, rstrip=False, lstrip=False, normalized=not is_special, special=is_special
|
|
1970
1868
|
)
|
|
1971
|
-
added_tokens_map[
|
|
1869
|
+
added_tokens_map[str_token] = added_tokens_decoder[index]
|
|
1972
1870
|
|
|
1973
1871
|
# allows converting a fast -> slow: add the `tokenizer.json`'s `"added_tokens"` to the slow tokenizer
|
|
1974
1872
|
# if `tokenizer_config.json` is `None`
|
|
@@ -3450,7 +3348,8 @@ def find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs):
|
|
|
3450
3348
|
):
|
|
3451
3349
|
return candidate
|
|
3452
3350
|
except Exception:
|
|
3453
|
-
|
|
3351
|
+
# TODO: tighten to OSError / ProxyError
|
|
3352
|
+
continue
|
|
3454
3353
|
|
|
3455
3354
|
subfolder = kwargs.get("subfolder", "")
|
|
3456
3355
|
local_files_only = kwargs.get("local_files_only", False)
|
|
@@ -3480,8 +3379,9 @@ def find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs):
|
|
|
3480
3379
|
for entry in entries:
|
|
3481
3380
|
if entry.path.endswith(".model"):
|
|
3482
3381
|
return entry.path if not subfolder else entry.path.removeprefix(f"{subfolder}/")
|
|
3483
|
-
except Exception:
|
|
3484
|
-
|
|
3382
|
+
except Exception as e:
|
|
3383
|
+
# TODO: tighten exception class
|
|
3384
|
+
logger.debug(f"Could not list Hub repository files: {e}")
|
|
3485
3385
|
|
|
3486
3386
|
return None
|
|
3487
3387
|
|