transformers 5.0.0rc2__py3-none-any.whl → 5.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +11 -37
- transformers/activations.py +2 -2
- transformers/audio_utils.py +32 -32
- transformers/backbone_utils.py +326 -0
- transformers/cache_utils.py +26 -126
- transformers/cli/chat.py +3 -3
- transformers/cli/serve.py +13 -10
- transformers/cli/transformers.py +2 -1
- transformers/configuration_utils.py +22 -92
- transformers/conversion_mapping.py +150 -26
- transformers/convert_slow_tokenizer.py +9 -12
- transformers/core_model_loading.py +217 -129
- transformers/data/processors/glue.py +0 -1
- transformers/data/processors/utils.py +0 -1
- transformers/data/processors/xnli.py +0 -1
- transformers/dependency_versions_check.py +0 -1
- transformers/dependency_versions_table.py +10 -11
- transformers/distributed/configuration_utils.py +1 -2
- transformers/dynamic_module_utils.py +23 -23
- transformers/feature_extraction_sequence_utils.py +19 -23
- transformers/feature_extraction_utils.py +14 -14
- transformers/file_utils.py +0 -2
- transformers/generation/candidate_generator.py +2 -4
- transformers/generation/configuration_utils.py +54 -39
- transformers/generation/continuous_batching/__init__.py +0 -1
- transformers/generation/continuous_batching/cache.py +74 -44
- transformers/generation/continuous_batching/cache_manager.py +28 -28
- transformers/generation/continuous_batching/continuous_api.py +133 -414
- transformers/generation/continuous_batching/input_ouputs.py +464 -0
- transformers/generation/continuous_batching/requests.py +77 -19
- transformers/generation/continuous_batching/scheduler.py +154 -104
- transformers/generation/logits_process.py +10 -133
- transformers/generation/stopping_criteria.py +1 -2
- transformers/generation/streamers.py +0 -1
- transformers/generation/utils.py +91 -121
- transformers/generation/watermarking.py +2 -3
- transformers/hf_argparser.py +9 -13
- transformers/hyperparameter_search.py +1 -2
- transformers/image_processing_base.py +9 -9
- transformers/image_processing_utils.py +11 -15
- transformers/image_processing_utils_fast.py +70 -71
- transformers/image_transforms.py +73 -42
- transformers/image_utils.py +30 -37
- transformers/initialization.py +57 -0
- transformers/integrations/__init__.py +10 -24
- transformers/integrations/accelerate.py +47 -11
- transformers/integrations/awq.py +1 -3
- transformers/integrations/deepspeed.py +146 -4
- transformers/integrations/eetq.py +0 -1
- transformers/integrations/executorch.py +2 -6
- transformers/integrations/fbgemm_fp8.py +1 -2
- transformers/integrations/finegrained_fp8.py +149 -13
- transformers/integrations/flash_attention.py +3 -8
- transformers/integrations/flex_attention.py +1 -1
- transformers/integrations/fp_quant.py +4 -6
- transformers/integrations/ggml.py +0 -1
- transformers/integrations/hub_kernels.py +18 -7
- transformers/integrations/integration_utils.py +2 -3
- transformers/integrations/moe.py +226 -106
- transformers/integrations/mxfp4.py +52 -40
- transformers/integrations/peft.py +488 -176
- transformers/integrations/quark.py +2 -4
- transformers/integrations/tensor_parallel.py +641 -581
- transformers/integrations/torchao.py +4 -6
- transformers/loss/loss_lw_detr.py +356 -0
- transformers/loss/loss_utils.py +2 -0
- transformers/masking_utils.py +199 -59
- transformers/model_debugging_utils.py +4 -5
- transformers/modelcard.py +14 -192
- transformers/modeling_attn_mask_utils.py +19 -19
- transformers/modeling_flash_attention_utils.py +28 -29
- transformers/modeling_gguf_pytorch_utils.py +5 -5
- transformers/modeling_layers.py +21 -22
- transformers/modeling_outputs.py +242 -253
- transformers/modeling_rope_utils.py +32 -32
- transformers/modeling_utils.py +416 -438
- transformers/models/__init__.py +10 -0
- transformers/models/afmoe/configuration_afmoe.py +40 -33
- transformers/models/afmoe/modeling_afmoe.py +38 -41
- transformers/models/afmoe/modular_afmoe.py +23 -25
- transformers/models/aimv2/configuration_aimv2.py +2 -10
- transformers/models/aimv2/modeling_aimv2.py +46 -45
- transformers/models/aimv2/modular_aimv2.py +13 -19
- transformers/models/albert/configuration_albert.py +8 -2
- transformers/models/albert/modeling_albert.py +70 -72
- transformers/models/albert/tokenization_albert.py +1 -4
- transformers/models/align/configuration_align.py +8 -6
- transformers/models/align/modeling_align.py +83 -86
- transformers/models/align/processing_align.py +2 -30
- transformers/models/altclip/configuration_altclip.py +4 -7
- transformers/models/altclip/modeling_altclip.py +106 -103
- transformers/models/altclip/processing_altclip.py +2 -15
- transformers/models/apertus/__init__.py +0 -1
- transformers/models/apertus/configuration_apertus.py +23 -28
- transformers/models/apertus/modeling_apertus.py +35 -38
- transformers/models/apertus/modular_apertus.py +36 -40
- transformers/models/arcee/configuration_arcee.py +25 -30
- transformers/models/arcee/modeling_arcee.py +35 -38
- transformers/models/arcee/modular_arcee.py +20 -23
- transformers/models/aria/configuration_aria.py +31 -44
- transformers/models/aria/image_processing_aria.py +25 -27
- transformers/models/aria/modeling_aria.py +102 -102
- transformers/models/aria/modular_aria.py +111 -124
- transformers/models/aria/processing_aria.py +28 -35
- transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +0 -1
- transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py +3 -6
- transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +9 -11
- transformers/models/audioflamingo3/__init__.py +0 -1
- transformers/models/audioflamingo3/configuration_audioflamingo3.py +0 -1
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +60 -52
- transformers/models/audioflamingo3/modular_audioflamingo3.py +52 -43
- transformers/models/audioflamingo3/processing_audioflamingo3.py +6 -8
- transformers/models/auto/auto_factory.py +12 -11
- transformers/models/auto/configuration_auto.py +48 -5
- transformers/models/auto/feature_extraction_auto.py +5 -7
- transformers/models/auto/image_processing_auto.py +30 -39
- transformers/models/auto/modeling_auto.py +33 -199
- transformers/models/auto/processing_auto.py +11 -19
- transformers/models/auto/tokenization_auto.py +38 -37
- transformers/models/auto/video_processing_auto.py +7 -8
- transformers/models/autoformer/configuration_autoformer.py +4 -7
- transformers/models/autoformer/modeling_autoformer.py +100 -101
- transformers/models/aya_vision/configuration_aya_vision.py +4 -1
- transformers/models/aya_vision/modeling_aya_vision.py +64 -99
- transformers/models/aya_vision/modular_aya_vision.py +46 -74
- transformers/models/aya_vision/processing_aya_vision.py +25 -53
- transformers/models/bamba/configuration_bamba.py +46 -39
- transformers/models/bamba/modeling_bamba.py +83 -119
- transformers/models/bamba/modular_bamba.py +70 -109
- transformers/models/bark/configuration_bark.py +6 -8
- transformers/models/bark/generation_configuration_bark.py +3 -5
- transformers/models/bark/modeling_bark.py +64 -65
- transformers/models/bark/processing_bark.py +19 -41
- transformers/models/bart/configuration_bart.py +9 -5
- transformers/models/bart/modeling_bart.py +124 -129
- transformers/models/barthez/tokenization_barthez.py +1 -4
- transformers/models/bartpho/tokenization_bartpho.py +6 -7
- transformers/models/beit/configuration_beit.py +2 -15
- transformers/models/beit/image_processing_beit.py +53 -56
- transformers/models/beit/image_processing_beit_fast.py +11 -12
- transformers/models/beit/modeling_beit.py +65 -62
- transformers/models/bert/configuration_bert.py +12 -2
- transformers/models/bert/modeling_bert.py +117 -152
- transformers/models/bert/tokenization_bert.py +2 -4
- transformers/models/bert/tokenization_bert_legacy.py +3 -5
- transformers/models/bert_generation/configuration_bert_generation.py +17 -2
- transformers/models/bert_generation/modeling_bert_generation.py +53 -55
- transformers/models/bert_generation/tokenization_bert_generation.py +2 -3
- transformers/models/bert_japanese/tokenization_bert_japanese.py +5 -6
- transformers/models/bertweet/tokenization_bertweet.py +1 -3
- transformers/models/big_bird/configuration_big_bird.py +12 -9
- transformers/models/big_bird/modeling_big_bird.py +107 -124
- transformers/models/big_bird/tokenization_big_bird.py +1 -4
- transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +9 -9
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +118 -118
- transformers/models/biogpt/configuration_biogpt.py +8 -2
- transformers/models/biogpt/modeling_biogpt.py +73 -79
- transformers/models/biogpt/modular_biogpt.py +60 -66
- transformers/models/biogpt/tokenization_biogpt.py +3 -5
- transformers/models/bit/configuration_bit.py +2 -5
- transformers/models/bit/image_processing_bit.py +21 -24
- transformers/models/bit/image_processing_bit_fast.py +0 -1
- transformers/models/bit/modeling_bit.py +15 -16
- transformers/models/bitnet/configuration_bitnet.py +23 -28
- transformers/models/bitnet/modeling_bitnet.py +34 -38
- transformers/models/bitnet/modular_bitnet.py +7 -10
- transformers/models/blenderbot/configuration_blenderbot.py +8 -5
- transformers/models/blenderbot/modeling_blenderbot.py +68 -99
- transformers/models/blenderbot/tokenization_blenderbot.py +0 -1
- transformers/models/blenderbot_small/configuration_blenderbot_small.py +8 -5
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +70 -72
- transformers/models/blenderbot_small/tokenization_blenderbot_small.py +1 -3
- transformers/models/blip/configuration_blip.py +9 -10
- transformers/models/blip/image_processing_blip.py +17 -20
- transformers/models/blip/image_processing_blip_fast.py +0 -1
- transformers/models/blip/modeling_blip.py +115 -108
- transformers/models/blip/modeling_blip_text.py +63 -65
- transformers/models/blip/processing_blip.py +5 -36
- transformers/models/blip_2/configuration_blip_2.py +2 -2
- transformers/models/blip_2/modeling_blip_2.py +145 -121
- transformers/models/blip_2/processing_blip_2.py +8 -38
- transformers/models/bloom/configuration_bloom.py +5 -2
- transformers/models/bloom/modeling_bloom.py +60 -60
- transformers/models/blt/configuration_blt.py +94 -86
- transformers/models/blt/modeling_blt.py +93 -90
- transformers/models/blt/modular_blt.py +127 -69
- transformers/models/bridgetower/configuration_bridgetower.py +7 -2
- transformers/models/bridgetower/image_processing_bridgetower.py +34 -35
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +13 -14
- transformers/models/bridgetower/modeling_bridgetower.py +136 -124
- transformers/models/bridgetower/processing_bridgetower.py +2 -16
- transformers/models/bros/configuration_bros.py +24 -18
- transformers/models/bros/modeling_bros.py +78 -80
- transformers/models/bros/processing_bros.py +2 -12
- transformers/models/byt5/tokenization_byt5.py +4 -6
- transformers/models/camembert/configuration_camembert.py +8 -2
- transformers/models/camembert/modeling_camembert.py +97 -99
- transformers/models/camembert/modular_camembert.py +51 -54
- transformers/models/camembert/tokenization_camembert.py +1 -4
- transformers/models/canine/configuration_canine.py +4 -2
- transformers/models/canine/modeling_canine.py +73 -75
- transformers/models/canine/tokenization_canine.py +0 -1
- transformers/models/chameleon/configuration_chameleon.py +29 -34
- transformers/models/chameleon/image_processing_chameleon.py +21 -24
- transformers/models/chameleon/image_processing_chameleon_fast.py +5 -6
- transformers/models/chameleon/modeling_chameleon.py +135 -92
- transformers/models/chameleon/processing_chameleon.py +16 -41
- transformers/models/chinese_clip/configuration_chinese_clip.py +10 -8
- transformers/models/chinese_clip/image_processing_chinese_clip.py +21 -24
- transformers/models/chinese_clip/image_processing_chinese_clip_fast.py +0 -1
- transformers/models/chinese_clip/modeling_chinese_clip.py +93 -95
- transformers/models/chinese_clip/processing_chinese_clip.py +2 -15
- transformers/models/clap/configuration_clap.py +4 -9
- transformers/models/clap/feature_extraction_clap.py +9 -10
- transformers/models/clap/modeling_clap.py +109 -111
- transformers/models/clap/processing_clap.py +2 -15
- transformers/models/clip/configuration_clip.py +4 -2
- transformers/models/clip/image_processing_clip.py +21 -24
- transformers/models/clip/image_processing_clip_fast.py +9 -1
- transformers/models/clip/modeling_clip.py +70 -68
- transformers/models/clip/processing_clip.py +2 -14
- transformers/models/clip/tokenization_clip.py +2 -5
- transformers/models/clipseg/configuration_clipseg.py +4 -2
- transformers/models/clipseg/modeling_clipseg.py +113 -112
- transformers/models/clipseg/processing_clipseg.py +19 -42
- transformers/models/clvp/configuration_clvp.py +15 -5
- transformers/models/clvp/feature_extraction_clvp.py +7 -10
- transformers/models/clvp/modeling_clvp.py +138 -145
- transformers/models/clvp/number_normalizer.py +1 -2
- transformers/models/clvp/processing_clvp.py +3 -20
- transformers/models/clvp/tokenization_clvp.py +0 -1
- transformers/models/code_llama/tokenization_code_llama.py +3 -6
- transformers/models/codegen/configuration_codegen.py +4 -4
- transformers/models/codegen/modeling_codegen.py +50 -49
- transformers/models/codegen/tokenization_codegen.py +5 -6
- transformers/models/cohere/configuration_cohere.py +25 -30
- transformers/models/cohere/modeling_cohere.py +39 -42
- transformers/models/cohere/modular_cohere.py +27 -31
- transformers/models/cohere/tokenization_cohere.py +5 -6
- transformers/models/cohere2/configuration_cohere2.py +27 -32
- transformers/models/cohere2/modeling_cohere2.py +38 -41
- transformers/models/cohere2/modular_cohere2.py +48 -52
- transformers/models/cohere2_vision/configuration_cohere2_vision.py +5 -1
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +9 -10
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +52 -55
- transformers/models/cohere2_vision/modular_cohere2_vision.py +41 -43
- transformers/models/cohere2_vision/processing_cohere2_vision.py +6 -36
- transformers/models/colpali/configuration_colpali.py +0 -1
- transformers/models/colpali/modeling_colpali.py +14 -16
- transformers/models/colpali/modular_colpali.py +11 -51
- transformers/models/colpali/processing_colpali.py +14 -52
- transformers/models/colqwen2/modeling_colqwen2.py +27 -28
- transformers/models/colqwen2/modular_colqwen2.py +36 -74
- transformers/models/colqwen2/processing_colqwen2.py +16 -52
- transformers/models/conditional_detr/configuration_conditional_detr.py +19 -47
- transformers/models/conditional_detr/image_processing_conditional_detr.py +67 -70
- transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +50 -36
- transformers/models/conditional_detr/modeling_conditional_detr.py +851 -1001
- transformers/models/conditional_detr/modular_conditional_detr.py +901 -5
- transformers/models/convbert/configuration_convbert.py +11 -8
- transformers/models/convbert/modeling_convbert.py +85 -87
- transformers/models/convbert/tokenization_convbert.py +0 -1
- transformers/models/convnext/configuration_convnext.py +2 -5
- transformers/models/convnext/image_processing_convnext.py +18 -21
- transformers/models/convnext/image_processing_convnext_fast.py +7 -8
- transformers/models/convnext/modeling_convnext.py +12 -14
- transformers/models/convnextv2/configuration_convnextv2.py +2 -5
- transformers/models/convnextv2/modeling_convnextv2.py +12 -14
- transformers/models/cpm/tokenization_cpm.py +6 -7
- transformers/models/cpm/tokenization_cpm_fast.py +3 -5
- transformers/models/cpmant/configuration_cpmant.py +4 -1
- transformers/models/cpmant/modeling_cpmant.py +38 -40
- transformers/models/cpmant/tokenization_cpmant.py +1 -3
- transformers/models/csm/configuration_csm.py +58 -66
- transformers/models/csm/generation_csm.py +13 -14
- transformers/models/csm/modeling_csm.py +81 -84
- transformers/models/csm/modular_csm.py +56 -58
- transformers/models/csm/processing_csm.py +25 -68
- transformers/models/ctrl/configuration_ctrl.py +16 -1
- transformers/models/ctrl/modeling_ctrl.py +51 -66
- transformers/models/ctrl/tokenization_ctrl.py +0 -1
- transformers/models/cvt/configuration_cvt.py +0 -1
- transformers/models/cvt/modeling_cvt.py +13 -15
- transformers/models/cwm/__init__.py +0 -1
- transformers/models/cwm/configuration_cwm.py +8 -12
- transformers/models/cwm/modeling_cwm.py +36 -38
- transformers/models/cwm/modular_cwm.py +10 -12
- transformers/models/d_fine/configuration_d_fine.py +10 -57
- transformers/models/d_fine/modeling_d_fine.py +786 -927
- transformers/models/d_fine/modular_d_fine.py +339 -417
- transformers/models/dab_detr/configuration_dab_detr.py +22 -49
- transformers/models/dab_detr/modeling_dab_detr.py +79 -77
- transformers/models/dac/configuration_dac.py +0 -1
- transformers/models/dac/feature_extraction_dac.py +6 -9
- transformers/models/dac/modeling_dac.py +22 -24
- transformers/models/data2vec/configuration_data2vec_audio.py +4 -2
- transformers/models/data2vec/configuration_data2vec_text.py +11 -3
- transformers/models/data2vec/configuration_data2vec_vision.py +0 -1
- transformers/models/data2vec/modeling_data2vec_audio.py +55 -59
- transformers/models/data2vec/modeling_data2vec_text.py +97 -99
- transformers/models/data2vec/modeling_data2vec_vision.py +45 -44
- transformers/models/data2vec/modular_data2vec_audio.py +6 -1
- transformers/models/data2vec/modular_data2vec_text.py +51 -54
- transformers/models/dbrx/configuration_dbrx.py +29 -22
- transformers/models/dbrx/modeling_dbrx.py +45 -48
- transformers/models/dbrx/modular_dbrx.py +37 -39
- transformers/models/deberta/configuration_deberta.py +6 -1
- transformers/models/deberta/modeling_deberta.py +57 -60
- transformers/models/deberta/tokenization_deberta.py +2 -5
- transformers/models/deberta_v2/configuration_deberta_v2.py +6 -1
- transformers/models/deberta_v2/modeling_deberta_v2.py +63 -65
- transformers/models/deberta_v2/tokenization_deberta_v2.py +1 -4
- transformers/models/decision_transformer/configuration_decision_transformer.py +3 -2
- transformers/models/decision_transformer/modeling_decision_transformer.py +51 -53
- transformers/models/deepseek_v2/configuration_deepseek_v2.py +41 -47
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +39 -41
- transformers/models/deepseek_v2/modular_deepseek_v2.py +48 -52
- transformers/models/deepseek_v3/configuration_deepseek_v3.py +42 -48
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +38 -40
- transformers/models/deepseek_v3/modular_deepseek_v3.py +10 -10
- transformers/models/deepseek_vl/configuration_deepseek_vl.py +6 -3
- transformers/models/deepseek_vl/image_processing_deepseek_vl.py +27 -28
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +12 -11
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +48 -43
- transformers/models/deepseek_vl/modular_deepseek_vl.py +15 -43
- transformers/models/deepseek_vl/processing_deepseek_vl.py +10 -41
- transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +7 -5
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +37 -37
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +22 -22
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +100 -56
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +141 -109
- transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py +12 -44
- transformers/models/deformable_detr/configuration_deformable_detr.py +22 -46
- transformers/models/deformable_detr/image_processing_deformable_detr.py +59 -61
- transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +42 -28
- transformers/models/deformable_detr/modeling_deformable_detr.py +454 -652
- transformers/models/deformable_detr/modular_deformable_detr.py +1385 -5
- transformers/models/deit/configuration_deit.py +0 -1
- transformers/models/deit/image_processing_deit.py +18 -21
- transformers/models/deit/image_processing_deit_fast.py +0 -1
- transformers/models/deit/modeling_deit.py +27 -25
- transformers/models/depth_anything/configuration_depth_anything.py +12 -43
- transformers/models/depth_anything/modeling_depth_anything.py +10 -11
- transformers/models/depth_pro/configuration_depth_pro.py +0 -1
- transformers/models/depth_pro/image_processing_depth_pro.py +22 -23
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +8 -9
- transformers/models/depth_pro/modeling_depth_pro.py +29 -27
- transformers/models/detr/configuration_detr.py +18 -50
- transformers/models/detr/image_processing_detr.py +64 -66
- transformers/models/detr/image_processing_detr_fast.py +33 -34
- transformers/models/detr/modeling_detr.py +748 -789
- transformers/models/dia/configuration_dia.py +9 -15
- transformers/models/dia/feature_extraction_dia.py +6 -9
- transformers/models/dia/generation_dia.py +48 -53
- transformers/models/dia/modeling_dia.py +68 -71
- transformers/models/dia/modular_dia.py +56 -58
- transformers/models/dia/processing_dia.py +39 -29
- transformers/models/dia/tokenization_dia.py +3 -6
- transformers/models/diffllama/configuration_diffllama.py +25 -30
- transformers/models/diffllama/modeling_diffllama.py +45 -53
- transformers/models/diffllama/modular_diffllama.py +18 -25
- transformers/models/dinat/configuration_dinat.py +2 -5
- transformers/models/dinat/modeling_dinat.py +47 -48
- transformers/models/dinov2/configuration_dinov2.py +2 -5
- transformers/models/dinov2/modeling_dinov2.py +20 -21
- transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +3 -5
- transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +21 -21
- transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +11 -14
- transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +6 -11
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +5 -9
- transformers/models/dinov3_vit/configuration_dinov3_vit.py +7 -12
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +7 -8
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +19 -22
- transformers/models/dinov3_vit/modular_dinov3_vit.py +16 -19
- transformers/models/distilbert/configuration_distilbert.py +8 -2
- transformers/models/distilbert/modeling_distilbert.py +47 -49
- transformers/models/distilbert/tokenization_distilbert.py +0 -1
- transformers/models/doge/__init__.py +0 -1
- transformers/models/doge/configuration_doge.py +42 -35
- transformers/models/doge/modeling_doge.py +46 -49
- transformers/models/doge/modular_doge.py +77 -68
- transformers/models/donut/configuration_donut_swin.py +0 -1
- transformers/models/donut/image_processing_donut.py +26 -29
- transformers/models/donut/image_processing_donut_fast.py +9 -14
- transformers/models/donut/modeling_donut_swin.py +44 -46
- transformers/models/donut/processing_donut.py +5 -26
- transformers/models/dots1/configuration_dots1.py +43 -36
- transformers/models/dots1/modeling_dots1.py +35 -38
- transformers/models/dots1/modular_dots1.py +0 -1
- transformers/models/dpr/configuration_dpr.py +19 -2
- transformers/models/dpr/modeling_dpr.py +37 -39
- transformers/models/dpr/tokenization_dpr.py +7 -9
- transformers/models/dpr/tokenization_dpr_fast.py +7 -9
- transformers/models/dpt/configuration_dpt.py +23 -66
- transformers/models/dpt/image_processing_dpt.py +65 -66
- transformers/models/dpt/image_processing_dpt_fast.py +18 -19
- transformers/models/dpt/modeling_dpt.py +38 -36
- transformers/models/dpt/modular_dpt.py +14 -15
- transformers/models/edgetam/configuration_edgetam.py +1 -2
- transformers/models/edgetam/modeling_edgetam.py +87 -89
- transformers/models/edgetam/modular_edgetam.py +7 -13
- transformers/models/edgetam_video/__init__.py +0 -1
- transformers/models/edgetam_video/configuration_edgetam_video.py +0 -1
- transformers/models/edgetam_video/modeling_edgetam_video.py +126 -128
- transformers/models/edgetam_video/modular_edgetam_video.py +25 -27
- transformers/models/efficientloftr/configuration_efficientloftr.py +4 -5
- transformers/models/efficientloftr/image_processing_efficientloftr.py +14 -16
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +8 -7
- transformers/models/efficientloftr/modeling_efficientloftr.py +46 -38
- transformers/models/efficientloftr/modular_efficientloftr.py +1 -3
- transformers/models/efficientnet/configuration_efficientnet.py +0 -1
- transformers/models/efficientnet/image_processing_efficientnet.py +23 -26
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +16 -17
- transformers/models/efficientnet/modeling_efficientnet.py +12 -14
- transformers/models/electra/configuration_electra.py +13 -3
- transformers/models/electra/modeling_electra.py +107 -109
- transformers/models/emu3/configuration_emu3.py +17 -17
- transformers/models/emu3/image_processing_emu3.py +44 -39
- transformers/models/emu3/modeling_emu3.py +143 -109
- transformers/models/emu3/modular_emu3.py +109 -73
- transformers/models/emu3/processing_emu3.py +18 -43
- transformers/models/encodec/configuration_encodec.py +2 -4
- transformers/models/encodec/feature_extraction_encodec.py +10 -13
- transformers/models/encodec/modeling_encodec.py +25 -29
- transformers/models/encoder_decoder/configuration_encoder_decoder.py +12 -2
- transformers/models/encoder_decoder/modeling_encoder_decoder.py +37 -43
- transformers/models/eomt/configuration_eomt.py +12 -14
- transformers/models/eomt/image_processing_eomt.py +53 -55
- transformers/models/eomt/image_processing_eomt_fast.py +18 -19
- transformers/models/eomt/modeling_eomt.py +19 -21
- transformers/models/eomt/modular_eomt.py +28 -30
- transformers/models/eomt_dinov3/__init__.py +28 -0
- transformers/models/eomt_dinov3/configuration_eomt_dinov3.py +204 -0
- transformers/models/eomt_dinov3/modeling_eomt_dinov3.py +1376 -0
- transformers/models/eomt_dinov3/modular_eomt_dinov3.py +454 -0
- transformers/models/ernie/configuration_ernie.py +24 -3
- transformers/models/ernie/modeling_ernie.py +127 -162
- transformers/models/ernie/modular_ernie.py +91 -103
- transformers/models/ernie4_5/configuration_ernie4_5.py +23 -27
- transformers/models/ernie4_5/modeling_ernie4_5.py +35 -37
- transformers/models/ernie4_5/modular_ernie4_5.py +1 -3
- transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +34 -39
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +40 -42
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +7 -9
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +17 -7
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +34 -35
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +6 -7
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +305 -267
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +163 -142
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +3 -5
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +17 -18
- transformers/models/esm/configuration_esm.py +11 -15
- transformers/models/esm/modeling_esm.py +35 -37
- transformers/models/esm/modeling_esmfold.py +43 -50
- transformers/models/esm/openfold_utils/chunk_utils.py +6 -6
- transformers/models/esm/openfold_utils/loss.py +1 -2
- transformers/models/esm/openfold_utils/protein.py +15 -16
- transformers/models/esm/openfold_utils/tensor_utils.py +6 -6
- transformers/models/esm/tokenization_esm.py +2 -4
- transformers/models/evolla/configuration_evolla.py +50 -40
- transformers/models/evolla/modeling_evolla.py +69 -68
- transformers/models/evolla/modular_evolla.py +50 -48
- transformers/models/evolla/processing_evolla.py +23 -35
- transformers/models/exaone4/configuration_exaone4.py +27 -27
- transformers/models/exaone4/modeling_exaone4.py +36 -39
- transformers/models/exaone4/modular_exaone4.py +51 -50
- transformers/models/exaone_moe/__init__.py +27 -0
- transformers/models/exaone_moe/configuration_exaone_moe.py +235 -0
- transformers/models/exaone_moe/modeling_exaone_moe.py +665 -0
- transformers/models/exaone_moe/modular_exaone_moe.py +373 -0
- transformers/models/falcon/configuration_falcon.py +31 -26
- transformers/models/falcon/modeling_falcon.py +76 -84
- transformers/models/falcon_h1/configuration_falcon_h1.py +57 -51
- transformers/models/falcon_h1/modeling_falcon_h1.py +74 -109
- transformers/models/falcon_h1/modular_falcon_h1.py +68 -100
- transformers/models/falcon_mamba/configuration_falcon_mamba.py +5 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +64 -73
- transformers/models/falcon_mamba/modular_falcon_mamba.py +14 -13
- transformers/models/fast_vlm/configuration_fast_vlm.py +10 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +70 -97
- transformers/models/fast_vlm/modular_fast_vlm.py +148 -38
- transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +2 -6
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +45 -47
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -3
- transformers/models/flaubert/configuration_flaubert.py +10 -5
- transformers/models/flaubert/modeling_flaubert.py +125 -129
- transformers/models/flaubert/tokenization_flaubert.py +3 -5
- transformers/models/flava/configuration_flava.py +9 -9
- transformers/models/flava/image_processing_flava.py +66 -67
- transformers/models/flava/image_processing_flava_fast.py +46 -47
- transformers/models/flava/modeling_flava.py +144 -135
- transformers/models/flava/processing_flava.py +2 -12
- transformers/models/flex_olmo/__init__.py +0 -1
- transformers/models/flex_olmo/configuration_flex_olmo.py +34 -39
- transformers/models/flex_olmo/modeling_flex_olmo.py +41 -43
- transformers/models/flex_olmo/modular_flex_olmo.py +46 -51
- transformers/models/florence2/configuration_florence2.py +4 -1
- transformers/models/florence2/modeling_florence2.py +96 -72
- transformers/models/florence2/modular_florence2.py +100 -107
- transformers/models/florence2/processing_florence2.py +18 -47
- transformers/models/fnet/configuration_fnet.py +6 -2
- transformers/models/fnet/modeling_fnet.py +69 -80
- transformers/models/fnet/tokenization_fnet.py +0 -1
- transformers/models/focalnet/configuration_focalnet.py +2 -5
- transformers/models/focalnet/modeling_focalnet.py +49 -48
- transformers/models/fsmt/configuration_fsmt.py +12 -17
- transformers/models/fsmt/modeling_fsmt.py +47 -48
- transformers/models/fsmt/tokenization_fsmt.py +3 -5
- transformers/models/funnel/configuration_funnel.py +8 -1
- transformers/models/funnel/modeling_funnel.py +91 -93
- transformers/models/funnel/tokenization_funnel.py +2 -5
- transformers/models/fuyu/configuration_fuyu.py +28 -34
- transformers/models/fuyu/image_processing_fuyu.py +29 -31
- transformers/models/fuyu/image_processing_fuyu_fast.py +17 -17
- transformers/models/fuyu/modeling_fuyu.py +50 -52
- transformers/models/fuyu/processing_fuyu.py +9 -36
- transformers/models/gemma/configuration_gemma.py +25 -30
- transformers/models/gemma/modeling_gemma.py +36 -38
- transformers/models/gemma/modular_gemma.py +33 -36
- transformers/models/gemma/tokenization_gemma.py +3 -6
- transformers/models/gemma2/configuration_gemma2.py +30 -35
- transformers/models/gemma2/modeling_gemma2.py +38 -41
- transformers/models/gemma2/modular_gemma2.py +63 -67
- transformers/models/gemma3/configuration_gemma3.py +53 -48
- transformers/models/gemma3/image_processing_gemma3.py +29 -31
- transformers/models/gemma3/image_processing_gemma3_fast.py +11 -12
- transformers/models/gemma3/modeling_gemma3.py +123 -122
- transformers/models/gemma3/modular_gemma3.py +128 -125
- transformers/models/gemma3/processing_gemma3.py +5 -5
- transformers/models/gemma3n/configuration_gemma3n.py +42 -30
- transformers/models/gemma3n/feature_extraction_gemma3n.py +9 -11
- transformers/models/gemma3n/modeling_gemma3n.py +166 -147
- transformers/models/gemma3n/modular_gemma3n.py +176 -148
- transformers/models/gemma3n/processing_gemma3n.py +12 -26
- transformers/models/git/configuration_git.py +5 -8
- transformers/models/git/modeling_git.py +115 -127
- transformers/models/git/processing_git.py +2 -14
- transformers/models/glm/configuration_glm.py +26 -30
- transformers/models/glm/modeling_glm.py +36 -39
- transformers/models/glm/modular_glm.py +4 -7
- transformers/models/glm4/configuration_glm4.py +26 -30
- transformers/models/glm4/modeling_glm4.py +39 -41
- transformers/models/glm4/modular_glm4.py +8 -10
- transformers/models/glm46v/configuration_glm46v.py +4 -1
- transformers/models/glm46v/image_processing_glm46v.py +40 -38
- transformers/models/glm46v/image_processing_glm46v_fast.py +9 -9
- transformers/models/glm46v/modeling_glm46v.py +138 -93
- transformers/models/glm46v/modular_glm46v.py +5 -3
- transformers/models/glm46v/processing_glm46v.py +7 -41
- transformers/models/glm46v/video_processing_glm46v.py +9 -11
- transformers/models/glm4_moe/configuration_glm4_moe.py +42 -35
- transformers/models/glm4_moe/modeling_glm4_moe.py +36 -39
- transformers/models/glm4_moe/modular_glm4_moe.py +43 -36
- transformers/models/glm4_moe_lite/__init__.py +28 -0
- transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +233 -0
- transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +740 -0
- transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +302 -0
- transformers/models/glm4v/configuration_glm4v.py +25 -24
- transformers/models/glm4v/image_processing_glm4v.py +39 -38
- transformers/models/glm4v/image_processing_glm4v_fast.py +8 -9
- transformers/models/glm4v/modeling_glm4v.py +249 -210
- transformers/models/glm4v/modular_glm4v.py +211 -230
- transformers/models/glm4v/processing_glm4v.py +7 -41
- transformers/models/glm4v/video_processing_glm4v.py +9 -11
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +136 -127
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +348 -356
- transformers/models/glm4v_moe/modular_glm4v_moe.py +76 -174
- transformers/models/glm_image/__init__.py +31 -0
- transformers/models/glm_image/configuration_glm_image.py +358 -0
- transformers/models/glm_image/image_processing_glm_image.py +503 -0
- transformers/models/glm_image/image_processing_glm_image_fast.py +294 -0
- transformers/models/glm_image/modeling_glm_image.py +1691 -0
- transformers/models/glm_image/modular_glm_image.py +1640 -0
- transformers/models/glm_image/processing_glm_image.py +265 -0
- transformers/models/glm_ocr/__init__.py +28 -0
- transformers/models/glm_ocr/configuration_glm_ocr.py +312 -0
- transformers/models/glm_ocr/modeling_glm_ocr.py +1633 -0
- transformers/models/glm_ocr/modular_glm_ocr.py +428 -0
- transformers/models/glmasr/__init__.py +0 -1
- transformers/models/glmasr/configuration_glmasr.py +0 -1
- transformers/models/glmasr/modeling_glmasr.py +51 -46
- transformers/models/glmasr/modular_glmasr.py +39 -29
- transformers/models/glmasr/processing_glmasr.py +7 -8
- transformers/models/glpn/configuration_glpn.py +0 -1
- transformers/models/glpn/image_processing_glpn.py +11 -12
- transformers/models/glpn/image_processing_glpn_fast.py +11 -12
- transformers/models/glpn/modeling_glpn.py +14 -14
- transformers/models/got_ocr2/configuration_got_ocr2.py +10 -13
- transformers/models/got_ocr2/image_processing_got_ocr2.py +22 -24
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +9 -10
- transformers/models/got_ocr2/modeling_got_ocr2.py +69 -77
- transformers/models/got_ocr2/modular_got_ocr2.py +60 -52
- transformers/models/got_ocr2/processing_got_ocr2.py +42 -63
- transformers/models/gpt2/configuration_gpt2.py +13 -2
- transformers/models/gpt2/modeling_gpt2.py +111 -113
- transformers/models/gpt2/tokenization_gpt2.py +6 -9
- transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +7 -2
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +78 -84
- transformers/models/gpt_neo/configuration_gpt_neo.py +9 -2
- transformers/models/gpt_neo/modeling_gpt_neo.py +66 -71
- transformers/models/gpt_neox/configuration_gpt_neox.py +27 -25
- transformers/models/gpt_neox/modeling_gpt_neox.py +74 -76
- transformers/models/gpt_neox/modular_gpt_neox.py +68 -70
- transformers/models/gpt_neox/tokenization_gpt_neox.py +2 -5
- transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +24 -19
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +43 -46
- transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +1 -3
- transformers/models/gpt_oss/configuration_gpt_oss.py +31 -30
- transformers/models/gpt_oss/modeling_gpt_oss.py +80 -114
- transformers/models/gpt_oss/modular_gpt_oss.py +62 -97
- transformers/models/gpt_sw3/tokenization_gpt_sw3.py +4 -4
- transformers/models/gptj/configuration_gptj.py +4 -5
- transformers/models/gptj/modeling_gptj.py +85 -88
- transformers/models/granite/configuration_granite.py +28 -33
- transformers/models/granite/modeling_granite.py +43 -45
- transformers/models/granite/modular_granite.py +29 -31
- transformers/models/granite_speech/configuration_granite_speech.py +0 -1
- transformers/models/granite_speech/feature_extraction_granite_speech.py +1 -3
- transformers/models/granite_speech/modeling_granite_speech.py +84 -60
- transformers/models/granite_speech/processing_granite_speech.py +11 -4
- transformers/models/granitemoe/configuration_granitemoe.py +31 -36
- transformers/models/granitemoe/modeling_granitemoe.py +39 -41
- transformers/models/granitemoe/modular_granitemoe.py +21 -23
- transformers/models/granitemoehybrid/__init__.py +0 -1
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +55 -48
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +82 -118
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +57 -65
- transformers/models/granitemoeshared/configuration_granitemoeshared.py +33 -37
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +52 -56
- transformers/models/granitemoeshared/modular_granitemoeshared.py +19 -21
- transformers/models/grounding_dino/configuration_grounding_dino.py +10 -46
- transformers/models/grounding_dino/image_processing_grounding_dino.py +60 -62
- transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +28 -29
- transformers/models/grounding_dino/modeling_grounding_dino.py +161 -181
- transformers/models/grounding_dino/modular_grounding_dino.py +2 -3
- transformers/models/grounding_dino/processing_grounding_dino.py +10 -38
- transformers/models/groupvit/configuration_groupvit.py +4 -2
- transformers/models/groupvit/modeling_groupvit.py +98 -92
- transformers/models/helium/configuration_helium.py +25 -29
- transformers/models/helium/modeling_helium.py +37 -40
- transformers/models/helium/modular_helium.py +3 -7
- transformers/models/herbert/tokenization_herbert.py +4 -6
- transformers/models/hgnet_v2/configuration_hgnet_v2.py +2 -5
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +12 -14
- transformers/models/hgnet_v2/modular_hgnet_v2.py +13 -17
- transformers/models/hiera/configuration_hiera.py +2 -5
- transformers/models/hiera/modeling_hiera.py +71 -70
- transformers/models/hubert/configuration_hubert.py +4 -2
- transformers/models/hubert/modeling_hubert.py +42 -41
- transformers/models/hubert/modular_hubert.py +8 -11
- transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +26 -31
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +58 -37
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +31 -11
- transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +31 -36
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +54 -44
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +27 -15
- transformers/models/ibert/configuration_ibert.py +4 -2
- transformers/models/ibert/modeling_ibert.py +60 -62
- transformers/models/ibert/quant_modules.py +0 -1
- transformers/models/idefics/configuration_idefics.py +5 -8
- transformers/models/idefics/image_processing_idefics.py +13 -15
- transformers/models/idefics/modeling_idefics.py +63 -65
- transformers/models/idefics/perceiver.py +1 -3
- transformers/models/idefics/processing_idefics.py +32 -48
- transformers/models/idefics/vision.py +27 -28
- transformers/models/idefics2/configuration_idefics2.py +1 -3
- transformers/models/idefics2/image_processing_idefics2.py +31 -32
- transformers/models/idefics2/image_processing_idefics2_fast.py +8 -8
- transformers/models/idefics2/modeling_idefics2.py +126 -106
- transformers/models/idefics2/processing_idefics2.py +10 -68
- transformers/models/idefics3/configuration_idefics3.py +1 -4
- transformers/models/idefics3/image_processing_idefics3.py +42 -43
- transformers/models/idefics3/image_processing_idefics3_fast.py +40 -15
- transformers/models/idefics3/modeling_idefics3.py +113 -92
- transformers/models/idefics3/processing_idefics3.py +15 -69
- transformers/models/ijepa/configuration_ijepa.py +0 -1
- transformers/models/ijepa/modeling_ijepa.py +13 -14
- transformers/models/ijepa/modular_ijepa.py +5 -7
- transformers/models/imagegpt/configuration_imagegpt.py +9 -2
- transformers/models/imagegpt/image_processing_imagegpt.py +17 -18
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +10 -11
- transformers/models/imagegpt/modeling_imagegpt.py +65 -62
- transformers/models/informer/configuration_informer.py +6 -9
- transformers/models/informer/modeling_informer.py +87 -89
- transformers/models/informer/modular_informer.py +13 -16
- transformers/models/instructblip/configuration_instructblip.py +2 -2
- transformers/models/instructblip/modeling_instructblip.py +104 -79
- transformers/models/instructblip/processing_instructblip.py +10 -36
- transformers/models/instructblipvideo/configuration_instructblipvideo.py +2 -2
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +108 -105
- transformers/models/instructblipvideo/modular_instructblipvideo.py +73 -64
- transformers/models/instructblipvideo/processing_instructblipvideo.py +14 -33
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +6 -7
- transformers/models/internvl/configuration_internvl.py +5 -1
- transformers/models/internvl/modeling_internvl.py +76 -98
- transformers/models/internvl/modular_internvl.py +45 -59
- transformers/models/internvl/processing_internvl.py +12 -45
- transformers/models/internvl/video_processing_internvl.py +10 -11
- transformers/models/jais2/configuration_jais2.py +25 -29
- transformers/models/jais2/modeling_jais2.py +36 -38
- transformers/models/jais2/modular_jais2.py +20 -22
- transformers/models/jamba/configuration_jamba.py +5 -8
- transformers/models/jamba/modeling_jamba.py +47 -50
- transformers/models/jamba/modular_jamba.py +40 -41
- transformers/models/janus/configuration_janus.py +0 -1
- transformers/models/janus/image_processing_janus.py +37 -39
- transformers/models/janus/image_processing_janus_fast.py +20 -21
- transformers/models/janus/modeling_janus.py +103 -188
- transformers/models/janus/modular_janus.py +122 -83
- transformers/models/janus/processing_janus.py +17 -43
- transformers/models/jetmoe/configuration_jetmoe.py +26 -27
- transformers/models/jetmoe/modeling_jetmoe.py +42 -45
- transformers/models/jetmoe/modular_jetmoe.py +33 -36
- transformers/models/kosmos2/configuration_kosmos2.py +10 -9
- transformers/models/kosmos2/modeling_kosmos2.py +199 -178
- transformers/models/kosmos2/processing_kosmos2.py +40 -55
- transformers/models/kosmos2_5/__init__.py +0 -1
- transformers/models/kosmos2_5/configuration_kosmos2_5.py +8 -9
- transformers/models/kosmos2_5/image_processing_kosmos2_5.py +10 -12
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -11
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +162 -172
- transformers/models/kosmos2_5/processing_kosmos2_5.py +8 -29
- transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +31 -28
- transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py +12 -14
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +103 -106
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +20 -22
- transformers/models/kyutai_speech_to_text/processing_kyutai_speech_to_text.py +2 -8
- transformers/models/lasr/configuration_lasr.py +3 -7
- transformers/models/lasr/feature_extraction_lasr.py +10 -12
- transformers/models/lasr/modeling_lasr.py +21 -24
- transformers/models/lasr/modular_lasr.py +11 -13
- transformers/models/lasr/processing_lasr.py +12 -6
- transformers/models/lasr/tokenization_lasr.py +2 -4
- transformers/models/layoutlm/configuration_layoutlm.py +14 -2
- transformers/models/layoutlm/modeling_layoutlm.py +70 -72
- transformers/models/layoutlmv2/configuration_layoutlmv2.py +14 -17
- transformers/models/layoutlmv2/image_processing_layoutlmv2.py +18 -21
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +7 -8
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +48 -50
- transformers/models/layoutlmv2/processing_layoutlmv2.py +14 -44
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +63 -74
- transformers/models/layoutlmv3/configuration_layoutlmv3.py +16 -19
- transformers/models/layoutlmv3/image_processing_layoutlmv3.py +24 -26
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +9 -10
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +49 -51
- transformers/models/layoutlmv3/processing_layoutlmv3.py +14 -46
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +64 -75
- transformers/models/layoutxlm/configuration_layoutxlm.py +14 -17
- transformers/models/layoutxlm/modular_layoutxlm.py +0 -1
- transformers/models/layoutxlm/processing_layoutxlm.py +14 -44
- transformers/models/layoutxlm/tokenization_layoutxlm.py +65 -76
- transformers/models/led/configuration_led.py +8 -12
- transformers/models/led/modeling_led.py +113 -267
- transformers/models/levit/configuration_levit.py +0 -1
- transformers/models/levit/image_processing_levit.py +19 -21
- transformers/models/levit/image_processing_levit_fast.py +4 -5
- transformers/models/levit/modeling_levit.py +17 -19
- transformers/models/lfm2/configuration_lfm2.py +27 -30
- transformers/models/lfm2/modeling_lfm2.py +46 -48
- transformers/models/lfm2/modular_lfm2.py +32 -32
- transformers/models/lfm2_moe/__init__.py +0 -1
- transformers/models/lfm2_moe/configuration_lfm2_moe.py +6 -9
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +48 -49
- transformers/models/lfm2_moe/modular_lfm2_moe.py +8 -9
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -1
- transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +43 -20
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +73 -61
- transformers/models/lfm2_vl/modular_lfm2_vl.py +66 -54
- transformers/models/lfm2_vl/processing_lfm2_vl.py +14 -34
- transformers/models/lightglue/image_processing_lightglue.py +16 -15
- transformers/models/lightglue/image_processing_lightglue_fast.py +8 -7
- transformers/models/lightglue/modeling_lightglue.py +31 -33
- transformers/models/lightglue/modular_lightglue.py +31 -31
- transformers/models/lighton_ocr/__init__.py +28 -0
- transformers/models/lighton_ocr/configuration_lighton_ocr.py +128 -0
- transformers/models/lighton_ocr/modeling_lighton_ocr.py +463 -0
- transformers/models/lighton_ocr/modular_lighton_ocr.py +404 -0
- transformers/models/lighton_ocr/processing_lighton_ocr.py +229 -0
- transformers/models/lilt/configuration_lilt.py +6 -2
- transformers/models/lilt/modeling_lilt.py +53 -55
- transformers/models/llama/configuration_llama.py +26 -31
- transformers/models/llama/modeling_llama.py +35 -38
- transformers/models/llama/tokenization_llama.py +2 -4
- transformers/models/llama4/configuration_llama4.py +87 -69
- transformers/models/llama4/image_processing_llama4_fast.py +11 -12
- transformers/models/llama4/modeling_llama4.py +116 -115
- transformers/models/llama4/processing_llama4.py +33 -57
- transformers/models/llava/configuration_llava.py +10 -1
- transformers/models/llava/image_processing_llava.py +25 -28
- transformers/models/llava/image_processing_llava_fast.py +9 -10
- transformers/models/llava/modeling_llava.py +73 -102
- transformers/models/llava/processing_llava.py +18 -51
- transformers/models/llava_next/configuration_llava_next.py +2 -2
- transformers/models/llava_next/image_processing_llava_next.py +43 -45
- transformers/models/llava_next/image_processing_llava_next_fast.py +11 -12
- transformers/models/llava_next/modeling_llava_next.py +103 -104
- transformers/models/llava_next/processing_llava_next.py +18 -47
- transformers/models/llava_next_video/configuration_llava_next_video.py +10 -7
- transformers/models/llava_next_video/modeling_llava_next_video.py +168 -155
- transformers/models/llava_next_video/modular_llava_next_video.py +154 -147
- transformers/models/llava_next_video/processing_llava_next_video.py +21 -63
- transformers/models/llava_next_video/video_processing_llava_next_video.py +0 -1
- transformers/models/llava_onevision/configuration_llava_onevision.py +10 -7
- transformers/models/llava_onevision/image_processing_llava_onevision.py +40 -42
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +14 -14
- transformers/models/llava_onevision/modeling_llava_onevision.py +170 -166
- transformers/models/llava_onevision/modular_llava_onevision.py +156 -152
- transformers/models/llava_onevision/processing_llava_onevision.py +21 -53
- transformers/models/llava_onevision/video_processing_llava_onevision.py +0 -1
- transformers/models/longcat_flash/__init__.py +0 -1
- transformers/models/longcat_flash/configuration_longcat_flash.py +39 -45
- transformers/models/longcat_flash/modeling_longcat_flash.py +37 -38
- transformers/models/longcat_flash/modular_longcat_flash.py +23 -24
- transformers/models/longformer/configuration_longformer.py +5 -5
- transformers/models/longformer/modeling_longformer.py +99 -101
- transformers/models/longt5/configuration_longt5.py +9 -7
- transformers/models/longt5/modeling_longt5.py +45 -45
- transformers/models/luke/configuration_luke.py +8 -2
- transformers/models/luke/modeling_luke.py +179 -181
- transformers/models/luke/tokenization_luke.py +99 -105
- transformers/{pipelines/deprecated → models/lw_detr}/__init__.py +14 -3
- transformers/models/lw_detr/configuration_lw_detr.py +362 -0
- transformers/models/lw_detr/modeling_lw_detr.py +1697 -0
- transformers/models/lw_detr/modular_lw_detr.py +1609 -0
- transformers/models/lxmert/configuration_lxmert.py +16 -1
- transformers/models/lxmert/modeling_lxmert.py +63 -74
- transformers/models/m2m_100/configuration_m2m_100.py +7 -9
- transformers/models/m2m_100/modeling_m2m_100.py +72 -74
- transformers/models/m2m_100/tokenization_m2m_100.py +8 -8
- transformers/models/mamba/configuration_mamba.py +5 -3
- transformers/models/mamba/modeling_mamba.py +61 -70
- transformers/models/mamba2/configuration_mamba2.py +5 -8
- transformers/models/mamba2/modeling_mamba2.py +66 -79
- transformers/models/marian/configuration_marian.py +10 -5
- transformers/models/marian/modeling_marian.py +88 -90
- transformers/models/marian/tokenization_marian.py +6 -6
- transformers/models/markuplm/configuration_markuplm.py +4 -7
- transformers/models/markuplm/feature_extraction_markuplm.py +1 -2
- transformers/models/markuplm/modeling_markuplm.py +63 -65
- transformers/models/markuplm/processing_markuplm.py +31 -38
- transformers/models/markuplm/tokenization_markuplm.py +67 -77
- transformers/models/mask2former/configuration_mask2former.py +14 -52
- transformers/models/mask2former/image_processing_mask2former.py +84 -85
- transformers/models/mask2former/image_processing_mask2former_fast.py +36 -36
- transformers/models/mask2former/modeling_mask2former.py +108 -104
- transformers/models/mask2former/modular_mask2former.py +6 -8
- transformers/models/maskformer/configuration_maskformer.py +17 -51
- transformers/models/maskformer/configuration_maskformer_swin.py +2 -5
- transformers/models/maskformer/image_processing_maskformer.py +84 -85
- transformers/models/maskformer/image_processing_maskformer_fast.py +35 -36
- transformers/models/maskformer/modeling_maskformer.py +71 -67
- transformers/models/maskformer/modeling_maskformer_swin.py +20 -23
- transformers/models/mbart/configuration_mbart.py +9 -5
- transformers/models/mbart/modeling_mbart.py +120 -119
- transformers/models/mbart/tokenization_mbart.py +2 -4
- transformers/models/mbart50/tokenization_mbart50.py +3 -5
- transformers/models/megatron_bert/configuration_megatron_bert.py +13 -3
- transformers/models/megatron_bert/modeling_megatron_bert.py +139 -165
- transformers/models/metaclip_2/configuration_metaclip_2.py +4 -1
- transformers/models/metaclip_2/modeling_metaclip_2.py +94 -87
- transformers/models/metaclip_2/modular_metaclip_2.py +59 -45
- transformers/models/mgp_str/configuration_mgp_str.py +0 -1
- transformers/models/mgp_str/modeling_mgp_str.py +18 -18
- transformers/models/mgp_str/processing_mgp_str.py +3 -20
- transformers/models/mgp_str/tokenization_mgp_str.py +1 -3
- transformers/models/mimi/configuration_mimi.py +42 -40
- transformers/models/mimi/modeling_mimi.py +116 -115
- transformers/models/minimax/__init__.py +0 -1
- transformers/models/minimax/configuration_minimax.py +40 -47
- transformers/models/minimax/modeling_minimax.py +46 -49
- transformers/models/minimax/modular_minimax.py +59 -65
- transformers/models/minimax_m2/__init__.py +28 -0
- transformers/models/minimax_m2/configuration_minimax_m2.py +188 -0
- transformers/models/minimax_m2/modeling_minimax_m2.py +704 -0
- transformers/models/minimax_m2/modular_minimax_m2.py +346 -0
- transformers/models/ministral/configuration_ministral.py +25 -29
- transformers/models/ministral/modeling_ministral.py +35 -37
- transformers/models/ministral/modular_ministral.py +32 -37
- transformers/models/ministral3/configuration_ministral3.py +23 -26
- transformers/models/ministral3/modeling_ministral3.py +35 -37
- transformers/models/ministral3/modular_ministral3.py +7 -8
- transformers/models/mistral/configuration_mistral.py +24 -29
- transformers/models/mistral/modeling_mistral.py +35 -37
- transformers/models/mistral/modular_mistral.py +14 -15
- transformers/models/mistral3/configuration_mistral3.py +4 -1
- transformers/models/mistral3/modeling_mistral3.py +79 -82
- transformers/models/mistral3/modular_mistral3.py +66 -67
- transformers/models/mixtral/configuration_mixtral.py +32 -38
- transformers/models/mixtral/modeling_mixtral.py +39 -42
- transformers/models/mixtral/modular_mixtral.py +26 -29
- transformers/models/mlcd/configuration_mlcd.py +0 -1
- transformers/models/mlcd/modeling_mlcd.py +17 -17
- transformers/models/mlcd/modular_mlcd.py +16 -16
- transformers/models/mllama/configuration_mllama.py +10 -15
- transformers/models/mllama/image_processing_mllama.py +23 -25
- transformers/models/mllama/image_processing_mllama_fast.py +11 -11
- transformers/models/mllama/modeling_mllama.py +100 -103
- transformers/models/mllama/processing_mllama.py +6 -55
- transformers/models/mluke/tokenization_mluke.py +97 -103
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +10 -46
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +159 -179
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +10 -46
- transformers/models/mobilebert/configuration_mobilebert.py +4 -2
- transformers/models/mobilebert/modeling_mobilebert.py +78 -88
- transformers/models/mobilebert/tokenization_mobilebert.py +0 -1
- transformers/models/mobilenet_v1/configuration_mobilenet_v1.py +0 -1
- transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +20 -23
- transformers/models/mobilenet_v1/image_processing_mobilenet_v1_fast.py +0 -1
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +13 -16
- transformers/models/mobilenet_v2/configuration_mobilenet_v2.py +0 -1
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +48 -51
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +14 -15
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +21 -22
- transformers/models/mobilevit/configuration_mobilevit.py +0 -1
- transformers/models/mobilevit/image_processing_mobilevit.py +41 -44
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +12 -13
- transformers/models/mobilevit/modeling_mobilevit.py +21 -21
- transformers/models/mobilevitv2/configuration_mobilevitv2.py +0 -1
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +21 -22
- transformers/models/modernbert/configuration_modernbert.py +76 -51
- transformers/models/modernbert/modeling_modernbert.py +188 -943
- transformers/models/modernbert/modular_modernbert.py +255 -978
- transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +50 -44
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +54 -64
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +92 -92
- transformers/models/moonshine/configuration_moonshine.py +34 -31
- transformers/models/moonshine/modeling_moonshine.py +70 -72
- transformers/models/moonshine/modular_moonshine.py +91 -86
- transformers/models/moshi/configuration_moshi.py +46 -23
- transformers/models/moshi/modeling_moshi.py +134 -142
- transformers/models/mpnet/configuration_mpnet.py +6 -2
- transformers/models/mpnet/modeling_mpnet.py +55 -57
- transformers/models/mpnet/tokenization_mpnet.py +1 -4
- transformers/models/mpt/configuration_mpt.py +17 -9
- transformers/models/mpt/modeling_mpt.py +58 -60
- transformers/models/mra/configuration_mra.py +8 -2
- transformers/models/mra/modeling_mra.py +54 -56
- transformers/models/mt5/configuration_mt5.py +9 -6
- transformers/models/mt5/modeling_mt5.py +80 -85
- transformers/models/musicgen/configuration_musicgen.py +12 -8
- transformers/models/musicgen/modeling_musicgen.py +114 -116
- transformers/models/musicgen/processing_musicgen.py +3 -21
- transformers/models/musicgen_melody/configuration_musicgen_melody.py +15 -8
- transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py +8 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +113 -126
- transformers/models/musicgen_melody/processing_musicgen_melody.py +3 -22
- transformers/models/mvp/configuration_mvp.py +8 -5
- transformers/models/mvp/modeling_mvp.py +121 -123
- transformers/models/myt5/tokenization_myt5.py +8 -10
- transformers/models/nanochat/configuration_nanochat.py +5 -8
- transformers/models/nanochat/modeling_nanochat.py +36 -39
- transformers/models/nanochat/modular_nanochat.py +16 -18
- transformers/models/nemotron/configuration_nemotron.py +25 -30
- transformers/models/nemotron/modeling_nemotron.py +53 -66
- transformers/models/nllb/tokenization_nllb.py +14 -14
- transformers/models/nllb_moe/configuration_nllb_moe.py +7 -10
- transformers/models/nllb_moe/modeling_nllb_moe.py +70 -72
- transformers/models/nougat/image_processing_nougat.py +29 -32
- transformers/models/nougat/image_processing_nougat_fast.py +12 -13
- transformers/models/nougat/processing_nougat.py +37 -39
- transformers/models/nougat/tokenization_nougat.py +5 -7
- transformers/models/nystromformer/configuration_nystromformer.py +8 -2
- transformers/models/nystromformer/modeling_nystromformer.py +61 -63
- transformers/models/olmo/configuration_olmo.py +23 -28
- transformers/models/olmo/modeling_olmo.py +35 -38
- transformers/models/olmo/modular_olmo.py +8 -12
- transformers/models/olmo2/configuration_olmo2.py +27 -32
- transformers/models/olmo2/modeling_olmo2.py +36 -39
- transformers/models/olmo2/modular_olmo2.py +36 -38
- transformers/models/olmo3/__init__.py +0 -1
- transformers/models/olmo3/configuration_olmo3.py +30 -34
- transformers/models/olmo3/modeling_olmo3.py +35 -38
- transformers/models/olmo3/modular_olmo3.py +44 -47
- transformers/models/olmoe/configuration_olmoe.py +29 -33
- transformers/models/olmoe/modeling_olmoe.py +41 -43
- transformers/models/olmoe/modular_olmoe.py +15 -16
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +14 -50
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +59 -57
- transformers/models/omdet_turbo/processing_omdet_turbo.py +19 -67
- transformers/models/oneformer/configuration_oneformer.py +11 -51
- transformers/models/oneformer/image_processing_oneformer.py +83 -84
- transformers/models/oneformer/image_processing_oneformer_fast.py +41 -42
- transformers/models/oneformer/modeling_oneformer.py +137 -133
- transformers/models/oneformer/processing_oneformer.py +28 -43
- transformers/models/openai/configuration_openai.py +16 -1
- transformers/models/openai/modeling_openai.py +50 -51
- transformers/models/openai/tokenization_openai.py +2 -5
- transformers/models/opt/configuration_opt.py +6 -7
- transformers/models/opt/modeling_opt.py +79 -80
- transformers/models/ovis2/__init__.py +0 -1
- transformers/models/ovis2/configuration_ovis2.py +4 -1
- transformers/models/ovis2/image_processing_ovis2.py +22 -24
- transformers/models/ovis2/image_processing_ovis2_fast.py +9 -10
- transformers/models/ovis2/modeling_ovis2.py +99 -142
- transformers/models/ovis2/modular_ovis2.py +82 -45
- transformers/models/ovis2/processing_ovis2.py +12 -40
- transformers/models/owlv2/configuration_owlv2.py +4 -2
- transformers/models/owlv2/image_processing_owlv2.py +20 -21
- transformers/models/owlv2/image_processing_owlv2_fast.py +12 -13
- transformers/models/owlv2/modeling_owlv2.py +122 -114
- transformers/models/owlv2/modular_owlv2.py +11 -12
- transformers/models/owlv2/processing_owlv2.py +20 -49
- transformers/models/owlvit/configuration_owlvit.py +4 -2
- transformers/models/owlvit/image_processing_owlvit.py +21 -22
- transformers/models/owlvit/image_processing_owlvit_fast.py +2 -3
- transformers/models/owlvit/modeling_owlvit.py +121 -113
- transformers/models/owlvit/processing_owlvit.py +20 -48
- transformers/models/paddleocr_vl/__init__.py +0 -1
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +28 -29
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +34 -35
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +12 -12
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +159 -158
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +148 -119
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +1 -3
- transformers/models/paligemma/configuration_paligemma.py +4 -1
- transformers/models/paligemma/modeling_paligemma.py +81 -79
- transformers/models/paligemma/processing_paligemma.py +13 -66
- transformers/models/parakeet/configuration_parakeet.py +3 -8
- transformers/models/parakeet/feature_extraction_parakeet.py +10 -12
- transformers/models/parakeet/modeling_parakeet.py +21 -25
- transformers/models/parakeet/modular_parakeet.py +19 -21
- transformers/models/parakeet/processing_parakeet.py +12 -5
- transformers/models/parakeet/tokenization_parakeet.py +2 -4
- transformers/models/patchtsmixer/configuration_patchtsmixer.py +5 -8
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +63 -65
- transformers/models/patchtst/configuration_patchtst.py +6 -9
- transformers/models/patchtst/modeling_patchtst.py +75 -77
- transformers/models/pe_audio/__init__.py +0 -1
- transformers/models/pe_audio/configuration_pe_audio.py +14 -16
- transformers/models/pe_audio/feature_extraction_pe_audio.py +6 -8
- transformers/models/pe_audio/modeling_pe_audio.py +30 -31
- transformers/models/pe_audio/modular_pe_audio.py +17 -18
- transformers/models/pe_audio/processing_pe_audio.py +0 -1
- transformers/models/pe_audio_video/__init__.py +0 -1
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +15 -17
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +64 -65
- transformers/models/pe_audio_video/modular_pe_audio_video.py +56 -57
- transformers/models/pe_audio_video/processing_pe_audio_video.py +0 -1
- transformers/models/pe_video/__init__.py +0 -1
- transformers/models/pe_video/configuration_pe_video.py +14 -16
- transformers/models/pe_video/modeling_pe_video.py +57 -46
- transformers/models/pe_video/modular_pe_video.py +47 -35
- transformers/models/pe_video/video_processing_pe_video.py +2 -4
- transformers/models/pegasus/configuration_pegasus.py +8 -6
- transformers/models/pegasus/modeling_pegasus.py +67 -69
- transformers/models/pegasus/tokenization_pegasus.py +1 -4
- transformers/models/pegasus_x/configuration_pegasus_x.py +5 -4
- transformers/models/pegasus_x/modeling_pegasus_x.py +53 -55
- transformers/models/perceiver/configuration_perceiver.py +0 -1
- transformers/models/perceiver/image_processing_perceiver.py +22 -25
- transformers/models/perceiver/image_processing_perceiver_fast.py +7 -8
- transformers/models/perceiver/modeling_perceiver.py +152 -145
- transformers/models/perceiver/tokenization_perceiver.py +3 -6
- transformers/models/perception_lm/configuration_perception_lm.py +0 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +8 -9
- transformers/models/perception_lm/modeling_perception_lm.py +64 -67
- transformers/models/perception_lm/modular_perception_lm.py +58 -58
- transformers/models/perception_lm/processing_perception_lm.py +13 -47
- transformers/models/perception_lm/video_processing_perception_lm.py +0 -1
- transformers/models/persimmon/configuration_persimmon.py +23 -28
- transformers/models/persimmon/modeling_persimmon.py +44 -47
- transformers/models/phi/configuration_phi.py +27 -28
- transformers/models/phi/modeling_phi.py +39 -41
- transformers/models/phi/modular_phi.py +26 -26
- transformers/models/phi3/configuration_phi3.py +32 -37
- transformers/models/phi3/modeling_phi3.py +37 -40
- transformers/models/phi3/modular_phi3.py +16 -20
- transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +36 -39
- transformers/models/phi4_multimodal/feature_extraction_phi4_multimodal.py +7 -9
- transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +11 -11
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +100 -117
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +103 -90
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +7 -42
- transformers/models/phimoe/configuration_phimoe.py +31 -36
- transformers/models/phimoe/modeling_phimoe.py +50 -77
- transformers/models/phimoe/modular_phimoe.py +12 -8
- transformers/models/phobert/tokenization_phobert.py +4 -6
- transformers/models/pix2struct/configuration_pix2struct.py +12 -10
- transformers/models/pix2struct/image_processing_pix2struct.py +15 -19
- transformers/models/pix2struct/image_processing_pix2struct_fast.py +12 -15
- transformers/models/pix2struct/modeling_pix2struct.py +56 -52
- transformers/models/pix2struct/processing_pix2struct.py +5 -26
- transformers/models/pixio/__init__.py +0 -1
- transformers/models/pixio/configuration_pixio.py +2 -5
- transformers/models/pixio/modeling_pixio.py +16 -17
- transformers/models/pixio/modular_pixio.py +7 -8
- transformers/models/pixtral/configuration_pixtral.py +11 -14
- transformers/models/pixtral/image_processing_pixtral.py +26 -28
- transformers/models/pixtral/image_processing_pixtral_fast.py +10 -11
- transformers/models/pixtral/modeling_pixtral.py +31 -37
- transformers/models/pixtral/processing_pixtral.py +18 -52
- transformers/models/plbart/configuration_plbart.py +8 -6
- transformers/models/plbart/modeling_plbart.py +109 -109
- transformers/models/plbart/modular_plbart.py +31 -33
- transformers/models/plbart/tokenization_plbart.py +4 -5
- transformers/models/poolformer/configuration_poolformer.py +0 -1
- transformers/models/poolformer/image_processing_poolformer.py +21 -24
- transformers/models/poolformer/image_processing_poolformer_fast.py +13 -14
- transformers/models/poolformer/modeling_poolformer.py +10 -12
- transformers/models/pop2piano/configuration_pop2piano.py +7 -7
- transformers/models/pop2piano/feature_extraction_pop2piano.py +6 -9
- transformers/models/pop2piano/modeling_pop2piano.py +24 -24
- transformers/models/pop2piano/processing_pop2piano.py +25 -33
- transformers/models/pop2piano/tokenization_pop2piano.py +15 -23
- transformers/models/pp_doclayout_v3/__init__.py +30 -0
- transformers/models/pp_doclayout_v3/configuration_pp_doclayout_v3.py +277 -0
- transformers/models/pp_doclayout_v3/image_processing_pp_doclayout_v3_fast.py +305 -0
- transformers/models/pp_doclayout_v3/modeling_pp_doclayout_v3.py +2083 -0
- transformers/models/pp_doclayout_v3/modular_pp_doclayout_v3.py +1549 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +13 -46
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py +28 -28
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +20 -21
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +17 -16
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +21 -20
- transformers/models/prophetnet/configuration_prophetnet.py +37 -38
- transformers/models/prophetnet/modeling_prophetnet.py +121 -153
- transformers/models/prophetnet/tokenization_prophetnet.py +14 -16
- transformers/models/pvt/configuration_pvt.py +0 -1
- transformers/models/pvt/image_processing_pvt.py +24 -27
- transformers/models/pvt/image_processing_pvt_fast.py +1 -2
- transformers/models/pvt/modeling_pvt.py +19 -21
- transformers/models/pvt_v2/configuration_pvt_v2.py +4 -8
- transformers/models/pvt_v2/modeling_pvt_v2.py +27 -28
- transformers/models/qwen2/configuration_qwen2.py +32 -25
- transformers/models/qwen2/modeling_qwen2.py +35 -37
- transformers/models/qwen2/modular_qwen2.py +14 -15
- transformers/models/qwen2/tokenization_qwen2.py +2 -9
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +36 -27
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +241 -214
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +228 -193
- transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py +41 -49
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +28 -34
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +188 -145
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +64 -91
- transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py +7 -43
- transformers/models/qwen2_audio/configuration_qwen2_audio.py +0 -1
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +39 -41
- transformers/models/qwen2_audio/processing_qwen2_audio.py +13 -42
- transformers/models/qwen2_moe/configuration_qwen2_moe.py +42 -35
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +40 -43
- transformers/models/qwen2_moe/modular_qwen2_moe.py +10 -13
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +28 -33
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +38 -40
- transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +12 -15
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +184 -141
- transformers/models/qwen2_vl/processing_qwen2_vl.py +7 -44
- transformers/models/qwen2_vl/video_processing_qwen2_vl.py +38 -18
- transformers/models/qwen3/configuration_qwen3.py +34 -27
- transformers/models/qwen3/modeling_qwen3.py +35 -38
- transformers/models/qwen3/modular_qwen3.py +7 -9
- transformers/models/qwen3_moe/configuration_qwen3_moe.py +45 -35
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +40 -43
- transformers/models/qwen3_moe/modular_qwen3_moe.py +10 -13
- transformers/models/qwen3_next/configuration_qwen3_next.py +47 -38
- transformers/models/qwen3_next/modeling_qwen3_next.py +44 -47
- transformers/models/qwen3_next/modular_qwen3_next.py +37 -38
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +139 -106
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +266 -206
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +228 -181
- transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py +40 -48
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +22 -24
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +185 -122
- transformers/models/qwen3_vl/modular_qwen3_vl.py +153 -139
- transformers/models/qwen3_vl/processing_qwen3_vl.py +6 -42
- transformers/models/qwen3_vl/video_processing_qwen3_vl.py +10 -12
- transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +27 -30
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +249 -178
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +55 -42
- transformers/models/rag/configuration_rag.py +6 -7
- transformers/models/rag/modeling_rag.py +119 -121
- transformers/models/rag/retrieval_rag.py +3 -5
- transformers/models/rag/tokenization_rag.py +0 -50
- transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +29 -30
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +35 -39
- transformers/models/reformer/configuration_reformer.py +7 -8
- transformers/models/reformer/modeling_reformer.py +67 -68
- transformers/models/reformer/tokenization_reformer.py +3 -6
- transformers/models/regnet/configuration_regnet.py +0 -1
- transformers/models/regnet/modeling_regnet.py +7 -9
- transformers/models/rembert/configuration_rembert.py +8 -2
- transformers/models/rembert/modeling_rembert.py +108 -132
- transformers/models/rembert/tokenization_rembert.py +1 -4
- transformers/models/resnet/configuration_resnet.py +2 -5
- transformers/models/resnet/modeling_resnet.py +14 -15
- transformers/models/roberta/configuration_roberta.py +11 -3
- transformers/models/roberta/modeling_roberta.py +97 -99
- transformers/models/roberta/modular_roberta.py +55 -58
- transformers/models/roberta/tokenization_roberta.py +2 -5
- transformers/models/roberta/tokenization_roberta_old.py +2 -4
- transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +11 -3
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +97 -99
- transformers/models/roc_bert/configuration_roc_bert.py +8 -2
- transformers/models/roc_bert/modeling_roc_bert.py +125 -162
- transformers/models/roc_bert/tokenization_roc_bert.py +88 -94
- transformers/models/roformer/configuration_roformer.py +13 -3
- transformers/models/roformer/modeling_roformer.py +79 -95
- transformers/models/roformer/tokenization_roformer.py +3 -6
- transformers/models/roformer/tokenization_utils.py +0 -1
- transformers/models/rt_detr/configuration_rt_detr.py +8 -50
- transformers/models/rt_detr/configuration_rt_detr_resnet.py +2 -5
- transformers/models/rt_detr/image_processing_rt_detr.py +54 -55
- transformers/models/rt_detr/image_processing_rt_detr_fast.py +39 -26
- transformers/models/rt_detr/modeling_rt_detr.py +643 -804
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +4 -7
- transformers/models/rt_detr/modular_rt_detr.py +1522 -20
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +12 -58
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +384 -521
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +27 -70
- transformers/models/rwkv/configuration_rwkv.py +2 -4
- transformers/models/rwkv/modeling_rwkv.py +29 -54
- transformers/models/sam/configuration_sam.py +2 -1
- transformers/models/sam/image_processing_sam.py +59 -60
- transformers/models/sam/image_processing_sam_fast.py +25 -26
- transformers/models/sam/modeling_sam.py +46 -43
- transformers/models/sam/processing_sam.py +39 -27
- transformers/models/sam2/configuration_sam2.py +1 -2
- transformers/models/sam2/image_processing_sam2_fast.py +14 -15
- transformers/models/sam2/modeling_sam2.py +96 -94
- transformers/models/sam2/modular_sam2.py +85 -94
- transformers/models/sam2/processing_sam2.py +31 -47
- transformers/models/sam2_video/configuration_sam2_video.py +0 -1
- transformers/models/sam2_video/modeling_sam2_video.py +114 -116
- transformers/models/sam2_video/modular_sam2_video.py +72 -89
- transformers/models/sam2_video/processing_sam2_video.py +49 -66
- transformers/models/sam2_video/video_processing_sam2_video.py +1 -4
- transformers/models/sam3/configuration_sam3.py +0 -1
- transformers/models/sam3/image_processing_sam3_fast.py +17 -20
- transformers/models/sam3/modeling_sam3.py +94 -100
- transformers/models/sam3/modular_sam3.py +3 -8
- transformers/models/sam3/processing_sam3.py +37 -52
- transformers/models/sam3_tracker/__init__.py +0 -1
- transformers/models/sam3_tracker/configuration_sam3_tracker.py +1 -3
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +79 -80
- transformers/models/sam3_tracker/modular_sam3_tracker.py +0 -2
- transformers/models/sam3_tracker/processing_sam3_tracker.py +31 -48
- transformers/models/sam3_tracker_video/__init__.py +0 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +0 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +115 -114
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +10 -24
- transformers/models/sam3_tracker_video/processing_sam3_tracker_video.py +50 -66
- transformers/models/sam3_video/configuration_sam3_video.py +0 -1
- transformers/models/sam3_video/modeling_sam3_video.py +56 -45
- transformers/models/sam3_video/processing_sam3_video.py +25 -45
- transformers/models/sam_hq/__init__.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +2 -1
- transformers/models/sam_hq/modeling_sam_hq.py +52 -50
- transformers/models/sam_hq/modular_sam_hq.py +23 -25
- transformers/models/sam_hq/{processing_samhq.py → processing_sam_hq.py} +41 -29
- transformers/models/seamless_m4t/configuration_seamless_m4t.py +8 -10
- transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py +8 -11
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +180 -182
- transformers/models/seamless_m4t/processing_seamless_m4t.py +18 -39
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +15 -20
- transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +8 -10
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +193 -195
- transformers/models/seed_oss/configuration_seed_oss.py +30 -34
- transformers/models/seed_oss/modeling_seed_oss.py +34 -36
- transformers/models/seed_oss/modular_seed_oss.py +6 -7
- transformers/models/segformer/configuration_segformer.py +0 -10
- transformers/models/segformer/image_processing_segformer.py +39 -42
- transformers/models/segformer/image_processing_segformer_fast.py +11 -12
- transformers/models/segformer/modeling_segformer.py +28 -28
- transformers/models/segformer/modular_segformer.py +8 -9
- transformers/models/seggpt/configuration_seggpt.py +0 -1
- transformers/models/seggpt/image_processing_seggpt.py +38 -41
- transformers/models/seggpt/modeling_seggpt.py +48 -38
- transformers/models/sew/configuration_sew.py +4 -2
- transformers/models/sew/modeling_sew.py +42 -40
- transformers/models/sew/modular_sew.py +12 -13
- transformers/models/sew_d/configuration_sew_d.py +4 -2
- transformers/models/sew_d/modeling_sew_d.py +32 -31
- transformers/models/shieldgemma2/configuration_shieldgemma2.py +0 -1
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +19 -21
- transformers/models/shieldgemma2/processing_shieldgemma2.py +3 -5
- transformers/models/siglip/configuration_siglip.py +4 -2
- transformers/models/siglip/image_processing_siglip.py +17 -20
- transformers/models/siglip/image_processing_siglip_fast.py +0 -1
- transformers/models/siglip/modeling_siglip.py +65 -110
- transformers/models/siglip/processing_siglip.py +2 -14
- transformers/models/siglip/tokenization_siglip.py +6 -7
- transformers/models/siglip2/__init__.py +1 -0
- transformers/models/siglip2/configuration_siglip2.py +4 -2
- transformers/models/siglip2/image_processing_siglip2.py +15 -16
- transformers/models/siglip2/image_processing_siglip2_fast.py +6 -7
- transformers/models/siglip2/modeling_siglip2.py +89 -130
- transformers/models/siglip2/modular_siglip2.py +95 -48
- transformers/models/siglip2/processing_siglip2.py +2 -14
- transformers/models/siglip2/tokenization_siglip2.py +95 -0
- transformers/models/smollm3/configuration_smollm3.py +29 -32
- transformers/models/smollm3/modeling_smollm3.py +35 -38
- transformers/models/smollm3/modular_smollm3.py +36 -38
- transformers/models/smolvlm/configuration_smolvlm.py +2 -4
- transformers/models/smolvlm/image_processing_smolvlm.py +42 -43
- transformers/models/smolvlm/image_processing_smolvlm_fast.py +41 -15
- transformers/models/smolvlm/modeling_smolvlm.py +124 -96
- transformers/models/smolvlm/modular_smolvlm.py +50 -39
- transformers/models/smolvlm/processing_smolvlm.py +15 -76
- transformers/models/smolvlm/video_processing_smolvlm.py +16 -17
- transformers/models/solar_open/__init__.py +27 -0
- transformers/models/solar_open/configuration_solar_open.py +184 -0
- transformers/models/solar_open/modeling_solar_open.py +642 -0
- transformers/models/solar_open/modular_solar_open.py +224 -0
- transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py +0 -1
- transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +26 -27
- transformers/models/speech_to_text/configuration_speech_to_text.py +9 -9
- transformers/models/speech_to_text/feature_extraction_speech_to_text.py +10 -13
- transformers/models/speech_to_text/modeling_speech_to_text.py +55 -57
- transformers/models/speech_to_text/processing_speech_to_text.py +4 -30
- transformers/models/speech_to_text/tokenization_speech_to_text.py +5 -6
- transformers/models/speecht5/configuration_speecht5.py +7 -9
- transformers/models/speecht5/feature_extraction_speecht5.py +16 -37
- transformers/models/speecht5/modeling_speecht5.py +172 -174
- transformers/models/speecht5/number_normalizer.py +0 -1
- transformers/models/speecht5/processing_speecht5.py +3 -37
- transformers/models/speecht5/tokenization_speecht5.py +4 -5
- transformers/models/splinter/configuration_splinter.py +6 -7
- transformers/models/splinter/modeling_splinter.py +62 -59
- transformers/models/splinter/tokenization_splinter.py +2 -4
- transformers/models/squeezebert/configuration_squeezebert.py +14 -2
- transformers/models/squeezebert/modeling_squeezebert.py +60 -62
- transformers/models/squeezebert/tokenization_squeezebert.py +0 -1
- transformers/models/stablelm/configuration_stablelm.py +28 -29
- transformers/models/stablelm/modeling_stablelm.py +44 -47
- transformers/models/starcoder2/configuration_starcoder2.py +30 -27
- transformers/models/starcoder2/modeling_starcoder2.py +38 -41
- transformers/models/starcoder2/modular_starcoder2.py +17 -19
- transformers/models/superglue/configuration_superglue.py +7 -3
- transformers/models/superglue/image_processing_superglue.py +15 -15
- transformers/models/superglue/image_processing_superglue_fast.py +8 -8
- transformers/models/superglue/modeling_superglue.py +41 -37
- transformers/models/superpoint/image_processing_superpoint.py +15 -15
- transformers/models/superpoint/image_processing_superpoint_fast.py +7 -9
- transformers/models/superpoint/modeling_superpoint.py +17 -16
- transformers/models/swiftformer/configuration_swiftformer.py +0 -1
- transformers/models/swiftformer/modeling_swiftformer.py +12 -14
- transformers/models/swin/configuration_swin.py +2 -5
- transformers/models/swin/modeling_swin.py +69 -78
- transformers/models/swin2sr/configuration_swin2sr.py +0 -1
- transformers/models/swin2sr/image_processing_swin2sr.py +10 -13
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +4 -7
- transformers/models/swin2sr/modeling_swin2sr.py +30 -30
- transformers/models/swinv2/configuration_swinv2.py +2 -5
- transformers/models/swinv2/modeling_swinv2.py +65 -74
- transformers/models/switch_transformers/configuration_switch_transformers.py +11 -7
- transformers/models/switch_transformers/modeling_switch_transformers.py +35 -36
- transformers/models/switch_transformers/modular_switch_transformers.py +32 -33
- transformers/models/t5/configuration_t5.py +9 -9
- transformers/models/t5/modeling_t5.py +80 -85
- transformers/models/t5/tokenization_t5.py +1 -3
- transformers/models/t5gemma/configuration_t5gemma.py +43 -59
- transformers/models/t5gemma/modeling_t5gemma.py +105 -108
- transformers/models/t5gemma/modular_t5gemma.py +128 -142
- transformers/models/t5gemma2/configuration_t5gemma2.py +86 -100
- transformers/models/t5gemma2/modeling_t5gemma2.py +234 -194
- transformers/models/t5gemma2/modular_t5gemma2.py +279 -264
- transformers/models/table_transformer/configuration_table_transformer.py +18 -50
- transformers/models/table_transformer/modeling_table_transformer.py +73 -101
- transformers/models/tapas/configuration_tapas.py +12 -2
- transformers/models/tapas/modeling_tapas.py +65 -67
- transformers/models/tapas/tokenization_tapas.py +116 -153
- transformers/models/textnet/configuration_textnet.py +4 -7
- transformers/models/textnet/image_processing_textnet.py +22 -25
- transformers/models/textnet/image_processing_textnet_fast.py +8 -9
- transformers/models/textnet/modeling_textnet.py +28 -28
- transformers/models/time_series_transformer/configuration_time_series_transformer.py +5 -8
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +82 -84
- transformers/models/timesfm/configuration_timesfm.py +0 -1
- transformers/models/timesfm/modeling_timesfm.py +22 -25
- transformers/models/timesfm/modular_timesfm.py +21 -24
- transformers/models/timesformer/configuration_timesformer.py +0 -1
- transformers/models/timesformer/modeling_timesformer.py +13 -16
- transformers/models/timm_backbone/configuration_timm_backbone.py +33 -8
- transformers/models/timm_backbone/modeling_timm_backbone.py +25 -30
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +2 -3
- transformers/models/timm_wrapper/image_processing_timm_wrapper.py +4 -5
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +22 -19
- transformers/models/trocr/configuration_trocr.py +11 -8
- transformers/models/trocr/modeling_trocr.py +42 -42
- transformers/models/trocr/processing_trocr.py +5 -25
- transformers/models/tvp/configuration_tvp.py +10 -36
- transformers/models/tvp/image_processing_tvp.py +50 -52
- transformers/models/tvp/image_processing_tvp_fast.py +15 -15
- transformers/models/tvp/modeling_tvp.py +26 -28
- transformers/models/tvp/processing_tvp.py +2 -14
- transformers/models/udop/configuration_udop.py +16 -8
- transformers/models/udop/modeling_udop.py +73 -72
- transformers/models/udop/processing_udop.py +7 -26
- transformers/models/udop/tokenization_udop.py +80 -93
- transformers/models/umt5/configuration_umt5.py +8 -7
- transformers/models/umt5/modeling_umt5.py +87 -84
- transformers/models/unispeech/configuration_unispeech.py +4 -2
- transformers/models/unispeech/modeling_unispeech.py +54 -53
- transformers/models/unispeech/modular_unispeech.py +20 -22
- transformers/models/unispeech_sat/configuration_unispeech_sat.py +4 -2
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +70 -69
- transformers/models/unispeech_sat/modular_unispeech_sat.py +21 -23
- transformers/models/univnet/feature_extraction_univnet.py +14 -14
- transformers/models/univnet/modeling_univnet.py +7 -8
- transformers/models/upernet/configuration_upernet.py +8 -36
- transformers/models/upernet/modeling_upernet.py +11 -14
- transformers/models/vaultgemma/__init__.py +0 -1
- transformers/models/vaultgemma/configuration_vaultgemma.py +29 -33
- transformers/models/vaultgemma/modeling_vaultgemma.py +38 -40
- transformers/models/vaultgemma/modular_vaultgemma.py +29 -31
- transformers/models/video_llama_3/configuration_video_llama_3.py +4 -0
- transformers/models/video_llama_3/image_processing_video_llama_3.py +40 -40
- transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +12 -14
- transformers/models/video_llama_3/modeling_video_llama_3.py +149 -112
- transformers/models/video_llama_3/modular_video_llama_3.py +152 -150
- transformers/models/video_llama_3/processing_video_llama_3.py +5 -39
- transformers/models/video_llama_3/video_processing_video_llama_3.py +45 -24
- transformers/models/video_llava/configuration_video_llava.py +4 -1
- transformers/models/video_llava/image_processing_video_llava.py +35 -38
- transformers/models/video_llava/modeling_video_llava.py +139 -143
- transformers/models/video_llava/processing_video_llava.py +38 -78
- transformers/models/video_llava/video_processing_video_llava.py +0 -1
- transformers/models/videomae/configuration_videomae.py +0 -1
- transformers/models/videomae/image_processing_videomae.py +31 -34
- transformers/models/videomae/modeling_videomae.py +17 -20
- transformers/models/videomae/video_processing_videomae.py +0 -1
- transformers/models/vilt/configuration_vilt.py +4 -2
- transformers/models/vilt/image_processing_vilt.py +29 -30
- transformers/models/vilt/image_processing_vilt_fast.py +15 -16
- transformers/models/vilt/modeling_vilt.py +103 -90
- transformers/models/vilt/processing_vilt.py +2 -14
- transformers/models/vipllava/configuration_vipllava.py +4 -1
- transformers/models/vipllava/modeling_vipllava.py +92 -67
- transformers/models/vipllava/modular_vipllava.py +78 -54
- transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +0 -1
- transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +28 -27
- transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py +0 -1
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +45 -41
- transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py +2 -16
- transformers/models/visual_bert/configuration_visual_bert.py +6 -2
- transformers/models/visual_bert/modeling_visual_bert.py +90 -92
- transformers/models/vit/configuration_vit.py +2 -3
- transformers/models/vit/image_processing_vit.py +19 -22
- transformers/models/vit/image_processing_vit_fast.py +0 -1
- transformers/models/vit/modeling_vit.py +20 -20
- transformers/models/vit_mae/configuration_vit_mae.py +0 -1
- transformers/models/vit_mae/modeling_vit_mae.py +32 -30
- transformers/models/vit_msn/configuration_vit_msn.py +0 -1
- transformers/models/vit_msn/modeling_vit_msn.py +21 -19
- transformers/models/vitdet/configuration_vitdet.py +2 -5
- transformers/models/vitdet/modeling_vitdet.py +14 -17
- transformers/models/vitmatte/configuration_vitmatte.py +7 -39
- transformers/models/vitmatte/image_processing_vitmatte.py +15 -18
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +16 -17
- transformers/models/vitmatte/modeling_vitmatte.py +10 -12
- transformers/models/vitpose/configuration_vitpose.py +7 -47
- transformers/models/vitpose/image_processing_vitpose.py +24 -25
- transformers/models/vitpose/image_processing_vitpose_fast.py +9 -10
- transformers/models/vitpose/modeling_vitpose.py +15 -15
- transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +2 -5
- transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +13 -16
- transformers/models/vits/configuration_vits.py +4 -1
- transformers/models/vits/modeling_vits.py +43 -42
- transformers/models/vits/tokenization_vits.py +3 -4
- transformers/models/vivit/configuration_vivit.py +0 -1
- transformers/models/vivit/image_processing_vivit.py +36 -39
- transformers/models/vivit/modeling_vivit.py +9 -11
- transformers/models/vjepa2/__init__.py +0 -1
- transformers/models/vjepa2/configuration_vjepa2.py +0 -1
- transformers/models/vjepa2/modeling_vjepa2.py +39 -41
- transformers/models/vjepa2/video_processing_vjepa2.py +0 -1
- transformers/models/voxtral/__init__.py +0 -1
- transformers/models/voxtral/configuration_voxtral.py +0 -2
- transformers/models/voxtral/modeling_voxtral.py +41 -48
- transformers/models/voxtral/modular_voxtral.py +35 -38
- transformers/models/voxtral/processing_voxtral.py +25 -48
- transformers/models/wav2vec2/configuration_wav2vec2.py +4 -2
- transformers/models/wav2vec2/feature_extraction_wav2vec2.py +7 -10
- transformers/models/wav2vec2/modeling_wav2vec2.py +74 -126
- transformers/models/wav2vec2/processing_wav2vec2.py +6 -35
- transformers/models/wav2vec2/tokenization_wav2vec2.py +20 -332
- transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +4 -2
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +49 -52
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +45 -48
- transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +6 -35
- transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +4 -2
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +62 -65
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +15 -18
- transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +16 -17
- transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +36 -55
- transformers/models/wavlm/configuration_wavlm.py +4 -2
- transformers/models/wavlm/modeling_wavlm.py +49 -49
- transformers/models/wavlm/modular_wavlm.py +4 -5
- transformers/models/whisper/configuration_whisper.py +6 -5
- transformers/models/whisper/english_normalizer.py +3 -4
- transformers/models/whisper/feature_extraction_whisper.py +9 -24
- transformers/models/whisper/generation_whisper.py +26 -49
- transformers/models/whisper/modeling_whisper.py +71 -73
- transformers/models/whisper/processing_whisper.py +3 -20
- transformers/models/whisper/tokenization_whisper.py +9 -30
- transformers/models/x_clip/configuration_x_clip.py +4 -2
- transformers/models/x_clip/modeling_x_clip.py +94 -96
- transformers/models/x_clip/processing_x_clip.py +2 -14
- transformers/models/xcodec/configuration_xcodec.py +4 -6
- transformers/models/xcodec/modeling_xcodec.py +15 -17
- transformers/models/xglm/configuration_xglm.py +9 -8
- transformers/models/xglm/modeling_xglm.py +49 -55
- transformers/models/xglm/tokenization_xglm.py +1 -4
- transformers/models/xlm/configuration_xlm.py +10 -8
- transformers/models/xlm/modeling_xlm.py +127 -131
- transformers/models/xlm/tokenization_xlm.py +3 -5
- transformers/models/xlm_roberta/configuration_xlm_roberta.py +11 -3
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +96 -98
- transformers/models/xlm_roberta/modular_xlm_roberta.py +50 -53
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +1 -4
- transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +10 -2
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +97 -99
- transformers/models/xlm_roberta_xl/modular_xlm_roberta_xl.py +67 -70
- transformers/models/xlnet/configuration_xlnet.py +3 -12
- transformers/models/xlnet/modeling_xlnet.py +149 -162
- transformers/models/xlnet/tokenization_xlnet.py +1 -4
- transformers/models/xlstm/configuration_xlstm.py +8 -12
- transformers/models/xlstm/modeling_xlstm.py +61 -96
- transformers/models/xmod/configuration_xmod.py +11 -3
- transformers/models/xmod/modeling_xmod.py +111 -116
- transformers/models/yolos/configuration_yolos.py +0 -1
- transformers/models/yolos/image_processing_yolos.py +60 -62
- transformers/models/yolos/image_processing_yolos_fast.py +42 -45
- transformers/models/yolos/modeling_yolos.py +19 -21
- transformers/models/yolos/modular_yolos.py +17 -19
- transformers/models/yoso/configuration_yoso.py +8 -2
- transformers/models/yoso/modeling_yoso.py +60 -62
- transformers/models/youtu/__init__.py +27 -0
- transformers/models/youtu/configuration_youtu.py +194 -0
- transformers/models/youtu/modeling_youtu.py +619 -0
- transformers/models/youtu/modular_youtu.py +254 -0
- transformers/models/zamba/configuration_zamba.py +5 -8
- transformers/models/zamba/modeling_zamba.py +93 -125
- transformers/models/zamba2/configuration_zamba2.py +44 -50
- transformers/models/zamba2/modeling_zamba2.py +137 -165
- transformers/models/zamba2/modular_zamba2.py +79 -74
- transformers/models/zoedepth/configuration_zoedepth.py +17 -41
- transformers/models/zoedepth/image_processing_zoedepth.py +28 -29
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +20 -21
- transformers/models/zoedepth/modeling_zoedepth.py +19 -19
- transformers/pipelines/__init__.py +47 -106
- transformers/pipelines/any_to_any.py +15 -23
- transformers/pipelines/audio_utils.py +1 -2
- transformers/pipelines/automatic_speech_recognition.py +0 -2
- transformers/pipelines/base.py +13 -17
- transformers/pipelines/image_text_to_text.py +1 -2
- transformers/pipelines/question_answering.py +4 -43
- transformers/pipelines/text_classification.py +1 -14
- transformers/pipelines/text_to_audio.py +5 -1
- transformers/pipelines/token_classification.py +1 -22
- transformers/pipelines/video_classification.py +1 -9
- transformers/pipelines/zero_shot_audio_classification.py +0 -1
- transformers/pipelines/zero_shot_classification.py +0 -6
- transformers/pipelines/zero_shot_image_classification.py +0 -7
- transformers/processing_utils.py +128 -137
- transformers/pytorch_utils.py +2 -26
- transformers/quantizers/base.py +10 -0
- transformers/quantizers/quantizer_compressed_tensors.py +7 -5
- transformers/quantizers/quantizer_fbgemm_fp8.py +20 -23
- transformers/quantizers/quantizer_finegrained_fp8.py +14 -20
- transformers/quantizers/quantizer_mxfp4.py +1 -1
- transformers/quantizers/quantizer_quark.py +0 -1
- transformers/quantizers/quantizer_torchao.py +3 -19
- transformers/safetensors_conversion.py +11 -4
- transformers/testing_utils.py +6 -65
- transformers/tokenization_mistral_common.py +563 -903
- transformers/tokenization_python.py +6 -4
- transformers/tokenization_utils_base.py +228 -341
- transformers/tokenization_utils_sentencepiece.py +5 -6
- transformers/tokenization_utils_tokenizers.py +36 -7
- transformers/trainer.py +30 -41
- transformers/trainer_jit_checkpoint.py +1 -2
- transformers/trainer_seq2seq.py +1 -1
- transformers/training_args.py +414 -420
- transformers/utils/__init__.py +1 -4
- transformers/utils/attention_visualizer.py +1 -1
- transformers/utils/auto_docstring.py +567 -18
- transformers/utils/backbone_utils.py +13 -373
- transformers/utils/doc.py +4 -36
- transformers/utils/dummy_pt_objects.py +0 -42
- transformers/utils/generic.py +70 -34
- transformers/utils/import_utils.py +72 -75
- transformers/utils/loading_report.py +135 -107
- transformers/utils/quantization_config.py +8 -31
- transformers/video_processing_utils.py +24 -25
- transformers/video_utils.py +21 -23
- {transformers-5.0.0rc2.dist-info → transformers-5.1.0.dist-info}/METADATA +120 -239
- transformers-5.1.0.dist-info/RECORD +2092 -0
- {transformers-5.0.0rc2.dist-info → transformers-5.1.0.dist-info}/WHEEL +1 -1
- transformers/pipelines/deprecated/text2text_generation.py +0 -408
- transformers/pipelines/image_to_text.py +0 -229
- transformers-5.0.0rc2.dist-info/RECORD +0 -2042
- {transformers-5.0.0rc2.dist-info → transformers-5.1.0.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc2.dist-info → transformers-5.1.0.dist-info}/licenses/LICENSE +0 -0
- {transformers-5.0.0rc2.dist-info → transformers-5.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
# base
|
|
2
|
-
# coding=utf-8
|
|
3
2
|
# Copyright 2020 The HuggingFace Inc. team.
|
|
4
3
|
#
|
|
5
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -30,7 +29,7 @@ from collections import OrderedDict, UserDict
|
|
|
30
29
|
from collections.abc import Callable, Collection, Mapping, Sequence, Sized
|
|
31
30
|
from dataclasses import dataclass
|
|
32
31
|
from pathlib import Path
|
|
33
|
-
from typing import TYPE_CHECKING, Any, NamedTuple,
|
|
32
|
+
from typing import TYPE_CHECKING, Any, NamedTuple, Union
|
|
34
33
|
|
|
35
34
|
import numpy as np
|
|
36
35
|
from huggingface_hub import create_repo, is_offline_mode, list_repo_files
|
|
@@ -219,11 +218,11 @@ class BatchEncoding(UserDict):
|
|
|
219
218
|
|
|
220
219
|
def __init__(
|
|
221
220
|
self,
|
|
222
|
-
data:
|
|
223
|
-
encoding:
|
|
224
|
-
tensor_type:
|
|
221
|
+
data: dict[str, Any] | None = None,
|
|
222
|
+
encoding: EncodingFast | Sequence[EncodingFast] | None = None,
|
|
223
|
+
tensor_type: None | str | TensorType = None,
|
|
225
224
|
prepend_batch_axis: bool = False,
|
|
226
|
-
n_sequences:
|
|
225
|
+
n_sequences: int | None = None,
|
|
227
226
|
):
|
|
228
227
|
super().__init__(data)
|
|
229
228
|
|
|
@@ -241,7 +240,7 @@ class BatchEncoding(UserDict):
|
|
|
241
240
|
self.convert_to_tensors(tensor_type=tensor_type, prepend_batch_axis=prepend_batch_axis)
|
|
242
241
|
|
|
243
242
|
@property
|
|
244
|
-
def n_sequences(self) ->
|
|
243
|
+
def n_sequences(self) -> int | None:
|
|
245
244
|
"""
|
|
246
245
|
`Optional[int]`: The number of sequences used to generate each sample from the batch encoded in this
|
|
247
246
|
[`BatchEncoding`]. Currently can be one of `None` (unknown), `1` (a single sentence) or `2` (a pair of
|
|
@@ -249,7 +248,7 @@ class BatchEncoding(UserDict):
|
|
|
249
248
|
"""
|
|
250
249
|
return self._n_sequences
|
|
251
250
|
|
|
252
|
-
def __getitem__(self, item:
|
|
251
|
+
def __getitem__(self, item: int | str) -> Any | EncodingFast:
|
|
253
252
|
"""
|
|
254
253
|
If the key is a string, returns the value of the dict associated to `key` ('input_ids', 'attention_mask',
|
|
255
254
|
etc.).
|
|
@@ -299,7 +298,7 @@ class BatchEncoding(UserDict):
|
|
|
299
298
|
return self._encodings is not None
|
|
300
299
|
|
|
301
300
|
@property
|
|
302
|
-
def encodings(self) ->
|
|
301
|
+
def encodings(self) -> list[EncodingFast] | None:
|
|
303
302
|
"""
|
|
304
303
|
`Optional[list[tokenizers.Encoding]]`: The list all encodings from the tokenization process. Returns `None` if
|
|
305
304
|
the input was tokenized through Python (i.e., not a fast) tokenizer.
|
|
@@ -324,7 +323,7 @@ class BatchEncoding(UserDict):
|
|
|
324
323
|
)
|
|
325
324
|
return self._encodings[batch_index].tokens
|
|
326
325
|
|
|
327
|
-
def sequence_ids(self, batch_index: int = 0) -> list[
|
|
326
|
+
def sequence_ids(self, batch_index: int = 0) -> list[int | None]:
|
|
328
327
|
"""
|
|
329
328
|
Return a list mapping the tokens to the id of their original sentences:
|
|
330
329
|
|
|
@@ -348,7 +347,7 @@ class BatchEncoding(UserDict):
|
|
|
348
347
|
)
|
|
349
348
|
return self._encodings[batch_index].sequence_ids
|
|
350
349
|
|
|
351
|
-
def word_ids(self, batch_index: int = 0) -> list[
|
|
350
|
+
def word_ids(self, batch_index: int = 0) -> list[int | None]:
|
|
352
351
|
"""
|
|
353
352
|
Return a list mapping the tokens to their actual word in the initial sentence for a fast tokenizer.
|
|
354
353
|
|
|
@@ -367,7 +366,7 @@ class BatchEncoding(UserDict):
|
|
|
367
366
|
)
|
|
368
367
|
return self._encodings[batch_index].word_ids
|
|
369
368
|
|
|
370
|
-
def token_to_sequence(self, batch_or_token_index: int, token_index:
|
|
369
|
+
def token_to_sequence(self, batch_or_token_index: int, token_index: int | None = None) -> int:
|
|
371
370
|
"""
|
|
372
371
|
Get the index of the sequence represented by the given token. In the general use case, this method returns `0`
|
|
373
372
|
for a single sequence or the first sequence of a pair, and `1` for the second sequence of a pair
|
|
@@ -406,7 +405,7 @@ class BatchEncoding(UserDict):
|
|
|
406
405
|
token_index = self._seq_len + token_index
|
|
407
406
|
return self._encodings[batch_index].token_to_sequence(token_index)
|
|
408
407
|
|
|
409
|
-
def token_to_word(self, batch_or_token_index: int, token_index:
|
|
408
|
+
def token_to_word(self, batch_or_token_index: int, token_index: int | None = None) -> int:
|
|
410
409
|
"""
|
|
411
410
|
Get the index of the word corresponding (i.e. comprising) to an encoded token in a sequence of the batch.
|
|
412
411
|
|
|
@@ -445,8 +444,8 @@ class BatchEncoding(UserDict):
|
|
|
445
444
|
return self._encodings[batch_index].token_to_word(token_index)
|
|
446
445
|
|
|
447
446
|
def word_to_tokens(
|
|
448
|
-
self, batch_or_word_index: int, word_index:
|
|
449
|
-
) ->
|
|
447
|
+
self, batch_or_word_index: int, word_index: int | None = None, sequence_index: int = 0
|
|
448
|
+
) -> TokenSpan | None:
|
|
450
449
|
"""
|
|
451
450
|
Get the encoded token span corresponding to a word in a sequence of the batch.
|
|
452
451
|
|
|
@@ -497,7 +496,7 @@ class BatchEncoding(UserDict):
|
|
|
497
496
|
span = self._encodings[batch_index].word_to_tokens(word_index, sequence_index)
|
|
498
497
|
return TokenSpan(*span) if span is not None else None
|
|
499
498
|
|
|
500
|
-
def token_to_chars(self, batch_or_token_index: int, token_index:
|
|
499
|
+
def token_to_chars(self, batch_or_token_index: int, token_index: int | None = None) -> CharSpan | None:
|
|
501
500
|
"""
|
|
502
501
|
Get the character span corresponding to an encoded token in a sequence of the batch.
|
|
503
502
|
|
|
@@ -536,9 +535,7 @@ class BatchEncoding(UserDict):
|
|
|
536
535
|
|
|
537
536
|
return CharSpan(*span_indices) if span_indices is not None else None
|
|
538
537
|
|
|
539
|
-
def char_to_token(
|
|
540
|
-
self, batch_or_char_index: int, char_index: Optional[int] = None, sequence_index: int = 0
|
|
541
|
-
) -> int:
|
|
538
|
+
def char_to_token(self, batch_or_char_index: int, char_index: int | None = None, sequence_index: int = 0) -> int:
|
|
542
539
|
"""
|
|
543
540
|
Get the index of the token in the encoded output comprising a character in the original string for a sequence
|
|
544
541
|
of the batch.
|
|
@@ -579,7 +576,7 @@ class BatchEncoding(UserDict):
|
|
|
579
576
|
return self._encodings[batch_index].char_to_token(char_index, sequence_index)
|
|
580
577
|
|
|
581
578
|
def word_to_chars(
|
|
582
|
-
self, batch_or_word_index: int, word_index:
|
|
579
|
+
self, batch_or_word_index: int, word_index: int | None = None, sequence_index: int = 0
|
|
583
580
|
) -> CharSpan:
|
|
584
581
|
"""
|
|
585
582
|
Get the character span in the original string corresponding to given word in a sequence of the batch.
|
|
@@ -623,7 +620,7 @@ class BatchEncoding(UserDict):
|
|
|
623
620
|
word_index = batch_or_word_index
|
|
624
621
|
return CharSpan(*(self._encodings[batch_index].word_to_chars(word_index, sequence_index)))
|
|
625
622
|
|
|
626
|
-
def char_to_word(self, batch_or_char_index: int, char_index:
|
|
623
|
+
def char_to_word(self, batch_or_char_index: int, char_index: int | None = None, sequence_index: int = 0) -> int:
|
|
627
624
|
"""
|
|
628
625
|
Get the word in the original string corresponding to a character in the original string of a sequence of the
|
|
629
626
|
batch.
|
|
@@ -662,9 +659,7 @@ class BatchEncoding(UserDict):
|
|
|
662
659
|
char_index = batch_or_char_index
|
|
663
660
|
return self._encodings[batch_index].char_to_word(char_index, sequence_index)
|
|
664
661
|
|
|
665
|
-
def convert_to_tensors(
|
|
666
|
-
self, tensor_type: Optional[Union[str, TensorType]] = None, prepend_batch_axis: bool = False
|
|
667
|
-
):
|
|
662
|
+
def convert_to_tensors(self, tensor_type: str | TensorType | None = None, prepend_batch_axis: bool = False):
|
|
668
663
|
"""
|
|
669
664
|
Convert the inner content to tensors.
|
|
670
665
|
|
|
@@ -758,7 +753,7 @@ class BatchEncoding(UserDict):
|
|
|
758
753
|
|
|
759
754
|
return self
|
|
760
755
|
|
|
761
|
-
def to(self, device:
|
|
756
|
+
def to(self, device: str | torch.device, *, non_blocking: bool = False) -> BatchEncoding:
|
|
762
757
|
"""
|
|
763
758
|
Send all values to device by calling `v.to(device, non_blocking=non_blocking)` (PyTorch only).
|
|
764
759
|
|
|
@@ -968,7 +963,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
968
963
|
|
|
969
964
|
vocab_files_names: dict[str, str] = {}
|
|
970
965
|
pretrained_vocab_files_map: dict[str, dict[str, str]] = {}
|
|
971
|
-
_auto_class:
|
|
966
|
+
_auto_class: str | None = None
|
|
972
967
|
|
|
973
968
|
# first name has to correspond to main model input name
|
|
974
969
|
# to make sure `tokenizer.pad(...)` works correctly
|
|
@@ -995,14 +990,13 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
995
990
|
if hasattr(self, key) and callable(getattr(self, key)):
|
|
996
991
|
raise AttributeError(f"{key} conflicts with the method {key} in {self.__class__.__name__}")
|
|
997
992
|
|
|
993
|
+
# V5: Convert deprecated additional_special_tokens to extra_special_tokens before storing init_kwargs
|
|
994
|
+
if "additional_special_tokens" in kwargs and "extra_special_tokens" not in kwargs:
|
|
995
|
+
kwargs["extra_special_tokens"] = kwargs.pop("additional_special_tokens")
|
|
996
|
+
|
|
998
997
|
self.init_kwargs = copy.deepcopy(kwargs)
|
|
999
998
|
self.name_or_path = kwargs.pop("name_or_path", "")
|
|
1000
999
|
self._processor_class = kwargs.pop("processor_class", None)
|
|
1001
|
-
# Store additional_special_tokens in init_kwargs before conversion for backward compatibility
|
|
1002
|
-
additional_special_tokens_value = kwargs.pop("additional_special_tokens", None)
|
|
1003
|
-
if "additional_special_tokens" not in self.init_kwargs:
|
|
1004
|
-
self.init_kwargs["additional_special_tokens"] = additional_special_tokens_value
|
|
1005
|
-
kwargs.setdefault("extra_special_tokens", additional_special_tokens_value)
|
|
1006
1000
|
|
|
1007
1001
|
self._pad_token_type_id = 0
|
|
1008
1002
|
self.verbose = kwargs.pop("verbose", False)
|
|
@@ -1030,21 +1024,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1030
1024
|
else:
|
|
1031
1025
|
raise TypeError(f"Special token {key} has to be either str or AddedToken but got: {type(value)}")
|
|
1032
1026
|
elif key == "extra_special_tokens":
|
|
1033
|
-
# V5: Support extra_special_tokens in __init__
|
|
1034
1027
|
value = kwargs.pop(key)
|
|
1035
1028
|
if value is None:
|
|
1036
1029
|
continue
|
|
1037
|
-
# If dict: treat as model specific named special tokens (attributes)
|
|
1038
1030
|
if isinstance(value, dict):
|
|
1039
1031
|
self._set_model_specific_special_tokens(special_tokens=value)
|
|
1040
|
-
|
|
1041
|
-
if not isinstance(value, (list, tuple)) or not all(
|
|
1042
|
-
isinstance(t, (str, AddedToken)) for t in value
|
|
1043
|
-
):
|
|
1044
|
-
raise TypeError(
|
|
1045
|
-
"extra_special_tokens must be a list/tuple of str or AddedToken, or a dict mapping names to tokens"
|
|
1046
|
-
)
|
|
1032
|
+
elif isinstance(value, (list, tuple)):
|
|
1047
1033
|
self._extra_special_tokens = list(value)
|
|
1034
|
+
else:
|
|
1035
|
+
raise TypeError("extra_special_tokens must be a list/tuple of tokens or a dict of named tokens")
|
|
1048
1036
|
elif (
|
|
1049
1037
|
key.endswith("_token")
|
|
1050
1038
|
and key not in self.SPECIAL_TOKENS_ATTRIBUTES
|
|
@@ -1104,7 +1092,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1104
1092
|
# ---- Special tokens API (moved from SpecialTokensMixin) ----
|
|
1105
1093
|
def add_special_tokens(
|
|
1106
1094
|
self,
|
|
1107
|
-
special_tokens_dict: dict[str,
|
|
1095
|
+
special_tokens_dict: dict[str, str | AddedToken | Sequence[str | AddedToken]],
|
|
1108
1096
|
replace_extra_special_tokens=True,
|
|
1109
1097
|
) -> int:
|
|
1110
1098
|
"""
|
|
@@ -1168,8 +1156,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1168
1156
|
# V5: Allowed keys are SPECIAL_TOKENS_ATTRIBUTES + "extra_special_tokens"
|
|
1169
1157
|
# Backward compatibility: convert "additional_special_tokens" to "extra_special_tokens"
|
|
1170
1158
|
special_tokens_dict = dict(special_tokens_dict)
|
|
1171
|
-
if "additional_special_tokens" in special_tokens_dict
|
|
1172
|
-
special_tokens_dict
|
|
1159
|
+
if "additional_special_tokens" in special_tokens_dict:
|
|
1160
|
+
special_tokens_dict.setdefault(
|
|
1161
|
+
"extra_special_tokens", special_tokens_dict.pop("additional_special_tokens")
|
|
1162
|
+
)
|
|
1173
1163
|
|
|
1174
1164
|
allowed_keys = set(self.SPECIAL_TOKENS_ATTRIBUTES) | {"extra_special_tokens"}
|
|
1175
1165
|
tokens_to_add = []
|
|
@@ -1208,7 +1198,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1208
1198
|
return self.add_tokens(tokens_to_add, special_tokens=True)
|
|
1209
1199
|
|
|
1210
1200
|
def add_tokens(
|
|
1211
|
-
self, new_tokens:
|
|
1201
|
+
self, new_tokens: str | AddedToken | Sequence[str | AddedToken], special_tokens: bool = False
|
|
1212
1202
|
) -> int:
|
|
1213
1203
|
"""
|
|
1214
1204
|
#TODO remove this from here! PreTrainedTOkeniuzerBase should be agnostic of AddedToken.
|
|
@@ -1248,7 +1238,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1248
1238
|
new_tokens = [new_tokens]
|
|
1249
1239
|
return self._add_tokens(new_tokens, special_tokens=special_tokens)
|
|
1250
1240
|
|
|
1251
|
-
def _add_tokens(self, new_tokens:
|
|
1241
|
+
def _add_tokens(self, new_tokens: list[str] | list[AddedToken], special_tokens: bool = False) -> int:
|
|
1252
1242
|
raise NotImplementedError
|
|
1253
1243
|
|
|
1254
1244
|
@property
|
|
@@ -1256,84 +1246,53 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1256
1246
|
return self._pad_token_type_id
|
|
1257
1247
|
|
|
1258
1248
|
def __setattr__(self, key, value):
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1249
|
+
# Handle _id/_ids suffix (eg. bos_token_id -> bos_token)
|
|
1250
|
+
key_without_id = key.removesuffix("_ids").removesuffix("_id") if key.endswith(("_id", "_ids")) else key
|
|
1251
|
+
|
|
1252
|
+
# Named special tokens (bos_token, eos_token, etc.)
|
|
1253
|
+
if key_without_id in self.SPECIAL_TOKENS_ATTRIBUTES:
|
|
1254
|
+
if key != key_without_id and value is not None:
|
|
1255
|
+
value = self.convert_ids_to_tokens(value)
|
|
1256
|
+
if value is not None and not isinstance(value, (str, AddedToken)):
|
|
1257
|
+
raise ValueError(f"Cannot set a non-string value as the {key_without_id}")
|
|
1258
|
+
self._special_tokens_map[key_without_id] = value
|
|
1259
|
+
return
|
|
1263
1260
|
|
|
1264
|
-
#
|
|
1265
|
-
if
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
key = key_without_id
|
|
1273
|
-
|
|
1274
|
-
if not isinstance(value, (str, AddedToken)) and value is not None:
|
|
1275
|
-
raise ValueError(f"Cannot set a non-string value as the {key}")
|
|
1276
|
-
self._special_tokens_map[key] = value
|
|
1277
|
-
# Check if this is extra_special_tokens or extra_special_tokens_ids
|
|
1278
|
-
elif self.__dict__.get("_extra_special_tokens", None) is not None and key_without_id == "extra_special_tokens":
|
|
1279
|
-
if key_is_special_id:
|
|
1280
|
-
if value is not None:
|
|
1281
|
-
value = [self.convert_ids_to_tokens(val) for val in value]
|
|
1282
|
-
key = key_without_id
|
|
1261
|
+
# Extra special tokens: model-specific special tokens without standard names (eg. <mask_1>)
|
|
1262
|
+
if key_without_id == "extra_special_tokens":
|
|
1263
|
+
if key != key_without_id and value is not None and isinstance(value, (list, tuple)):
|
|
1264
|
+
value = [self.convert_ids_to_tokens(v) for v in value]
|
|
1265
|
+
if not isinstance(value, (list, tuple)) and value is not None:
|
|
1266
|
+
raise ValueError(f"extra_special_tokens must be a list or tuple, got {type(value)}")
|
|
1267
|
+
self._extra_special_tokens = [] if value is None else list(value)
|
|
1268
|
+
return
|
|
1283
1269
|
|
|
1284
|
-
|
|
1285
|
-
if value is None:
|
|
1286
|
-
self._extra_special_tokens = []
|
|
1287
|
-
elif isinstance(value, dict):
|
|
1288
|
-
# Dict is treated as model-specific special tokens (such as multimodal tokens)
|
|
1289
|
-
self._set_model_specific_special_tokens(special_tokens=value)
|
|
1290
|
-
elif isinstance(value, (list, tuple)):
|
|
1291
|
-
self._extra_special_tokens = list(value)
|
|
1292
|
-
else:
|
|
1293
|
-
raise ValueError(f"extra_special_tokens must be a list, tuple, or dict, got {type(value)}")
|
|
1294
|
-
else:
|
|
1295
|
-
super().__setattr__(key, value)
|
|
1270
|
+
super().__setattr__(key, value)
|
|
1296
1271
|
|
|
1297
1272
|
def __getattr__(self, key):
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
value = _special_tokens_map[key_without_id]
|
|
1315
|
-
return str(value)
|
|
1316
|
-
else:
|
|
1317
|
-
attr_as_tokens = getattr(self, key_without_id)
|
|
1318
|
-
return self.convert_tokens_to_ids(attr_as_tokens) if attr_as_tokens is not None else None
|
|
1319
|
-
|
|
1320
|
-
# Check if this is extra_special_tokens or extra_special_tokens_ids
|
|
1321
|
-
elif key_without_id == "extra_special_tokens":
|
|
1322
|
-
if self.__dict__.get("_extra_special_tokens", None) is not None:
|
|
1323
|
-
if not key_is_special_id:
|
|
1324
|
-
return [str(tok) for tok in self.__dict__["_extra_special_tokens"]]
|
|
1325
|
-
else:
|
|
1326
|
-
# extra_special_tokens_ids
|
|
1327
|
-
tokens = self.__dict__["_extra_special_tokens"]
|
|
1328
|
-
return self.convert_tokens_to_ids([str(tok) for tok in tokens]) if tokens else []
|
|
1273
|
+
# Handle _id/_ids suffix (eg. bos_token_id -> bos_token)
|
|
1274
|
+
key_without_id = key.removesuffix("_ids").removesuffix("_id") if key.endswith(("_id", "_ids")) else key
|
|
1275
|
+
|
|
1276
|
+
# Named special tokens (bos_token, eos_token, etc.)
|
|
1277
|
+
if key_without_id in self.SPECIAL_TOKENS_ATTRIBUTES:
|
|
1278
|
+
token_value = self._special_tokens_map.get(key_without_id)
|
|
1279
|
+
if token_value is None:
|
|
1280
|
+
if self.verbose:
|
|
1281
|
+
logger.error(f"Using {key}, but it is not set yet.")
|
|
1282
|
+
return None
|
|
1283
|
+
return self.convert_tokens_to_ids(str(token_value)) if key != key_without_id else str(token_value)
|
|
1284
|
+
|
|
1285
|
+
# Extra special tokens
|
|
1286
|
+
if key_without_id == "extra_special_tokens":
|
|
1287
|
+
tokens = [str(tok) for tok in self._extra_special_tokens]
|
|
1288
|
+
return self.convert_tokens_to_ids(tokens) if key != key_without_id else tokens
|
|
1329
1289
|
|
|
1330
1290
|
if key not in self.__dict__:
|
|
1331
1291
|
raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
|
|
1332
|
-
|
|
1333
|
-
return super().__getattr__(key)
|
|
1292
|
+
return super().__getattr__(key)
|
|
1334
1293
|
|
|
1335
1294
|
def get_special_tokens_mask(
|
|
1336
|
-
self, token_ids_0: list[int], token_ids_1:
|
|
1295
|
+
self, token_ids_0: list[int], token_ids_1: list[int] | None = None, already_has_special_tokens: bool = False
|
|
1337
1296
|
) -> list[int]:
|
|
1338
1297
|
"""
|
|
1339
1298
|
Retrieve sequence ids from a token list that has no special tokens added.
|
|
@@ -1422,7 +1381,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1422
1381
|
"""
|
|
1423
1382
|
return self.convert_tokens_to_ids(self.all_special_tokens)
|
|
1424
1383
|
|
|
1425
|
-
def _set_model_specific_special_tokens(self, special_tokens: dict[str,
|
|
1384
|
+
def _set_model_specific_special_tokens(self, special_tokens: dict[str, str | AddedToken]):
|
|
1426
1385
|
"""
|
|
1427
1386
|
Adds new model-specific special tokens (e.g., for multimodal models).
|
|
1428
1387
|
|
|
@@ -1475,7 +1434,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1475
1434
|
"""
|
|
1476
1435
|
raise NotImplementedError()
|
|
1477
1436
|
|
|
1478
|
-
def convert_tokens_to_ids(self, tokens:
|
|
1437
|
+
def convert_tokens_to_ids(self, tokens: str | list[str]) -> int | list[int]:
|
|
1479
1438
|
"""
|
|
1480
1439
|
Converts a token string (or a sequence of tokens) in a single integer id (or a sequence of ids), using the
|
|
1481
1440
|
vocabulary.
|
|
@@ -1491,9 +1450,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1491
1450
|
|
|
1492
1451
|
return [self._convert_token_to_id_with_added_voc(token) for token in tokens]
|
|
1493
1452
|
|
|
1494
|
-
def convert_ids_to_tokens(
|
|
1495
|
-
self, ids: Union[int, list[int]], skip_special_tokens: bool = False
|
|
1496
|
-
) -> Union[str, list[str]]:
|
|
1453
|
+
def convert_ids_to_tokens(self, ids: int | list[int], skip_special_tokens: bool = False) -> str | list[str]:
|
|
1497
1454
|
"""
|
|
1498
1455
|
Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
|
|
1499
1456
|
added tokens.
|
|
@@ -1512,12 +1469,12 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1512
1469
|
@classmethod
|
|
1513
1470
|
def from_pretrained(
|
|
1514
1471
|
cls,
|
|
1515
|
-
pretrained_model_name_or_path:
|
|
1472
|
+
pretrained_model_name_or_path: str | os.PathLike,
|
|
1516
1473
|
*init_inputs,
|
|
1517
|
-
cache_dir:
|
|
1474
|
+
cache_dir: str | os.PathLike | None = None,
|
|
1518
1475
|
force_download: bool = False,
|
|
1519
1476
|
local_files_only: bool = False,
|
|
1520
|
-
token:
|
|
1477
|
+
token: str | bool | None = None,
|
|
1521
1478
|
revision: str = "main",
|
|
1522
1479
|
trust_remote_code=False,
|
|
1523
1480
|
**kwargs,
|
|
@@ -1614,6 +1571,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1614
1571
|
|
|
1615
1572
|
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
|
|
1616
1573
|
vocab_files = {}
|
|
1574
|
+
additional_files_names = {}
|
|
1617
1575
|
init_configuration = {}
|
|
1618
1576
|
|
|
1619
1577
|
is_local = os.path.isdir(pretrained_model_name_or_path)
|
|
@@ -1655,29 +1613,26 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1655
1613
|
# Check for versioned tokenizer files
|
|
1656
1614
|
if "tokenizer_file" in vocab_files:
|
|
1657
1615
|
fast_tokenizer_file = FULL_TOKENIZER_FILE
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
|
|
1679
|
-
except Exception:
|
|
1680
|
-
pass
|
|
1616
|
+
resolved_config_file = cached_file(
|
|
1617
|
+
pretrained_model_name_or_path,
|
|
1618
|
+
TOKENIZER_CONFIG_FILE,
|
|
1619
|
+
cache_dir=cache_dir,
|
|
1620
|
+
force_download=force_download,
|
|
1621
|
+
proxies=proxies,
|
|
1622
|
+
token=token,
|
|
1623
|
+
revision=revision,
|
|
1624
|
+
local_files_only=local_files_only,
|
|
1625
|
+
subfolder=subfolder,
|
|
1626
|
+
user_agent=user_agent,
|
|
1627
|
+
_raise_exceptions_for_missing_entries=False,
|
|
1628
|
+
_commit_hash=commit_hash,
|
|
1629
|
+
)
|
|
1630
|
+
if resolved_config_file is not None:
|
|
1631
|
+
with open(resolved_config_file, encoding="utf-8") as reader:
|
|
1632
|
+
tokenizer_config = json.load(reader)
|
|
1633
|
+
if "fast_tokenizer_files" in tokenizer_config:
|
|
1634
|
+
fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
|
|
1635
|
+
commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
|
|
1681
1636
|
vocab_files["tokenizer_file"] = fast_tokenizer_file
|
|
1682
1637
|
|
|
1683
1638
|
# This block looks for any extra chat template files
|
|
@@ -1826,52 +1781,25 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1826
1781
|
if isinstance(init_kwargs["auto_map"], (tuple, list)):
|
|
1827
1782
|
init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]}
|
|
1828
1783
|
|
|
1829
|
-
# Preserve extra_special_tokens from tokenizer_config.json before updating with kwargs
|
|
1830
|
-
# extra_special_tokens should be a list (user-defined extra tokens)
|
|
1831
|
-
extra_special_tokens_from_config = init_kwargs.get("extra_special_tokens")
|
|
1832
|
-
if isinstance(extra_special_tokens_from_config, (list, tuple)):
|
|
1833
|
-
extra_special_tokens_from_config = list(extra_special_tokens_from_config)
|
|
1834
|
-
else:
|
|
1835
|
-
extra_special_tokens_from_config = None
|
|
1836
|
-
|
|
1837
1784
|
# Update with newly provided kwargs
|
|
1838
1785
|
init_kwargs.update(kwargs)
|
|
1839
1786
|
|
|
1840
|
-
# V5:
|
|
1841
|
-
if "additional_special_tokens" in init_kwargs
|
|
1842
|
-
init_kwargs
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
)
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
#
|
|
1852
|
-
if
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
for key in list(init_kwargs.keys())
|
|
1857
|
-
if key not in default_attrs
|
|
1858
|
-
and key.endswith("_token")
|
|
1859
|
-
and isinstance(init_kwargs[key], (str, AddedToken))
|
|
1860
|
-
}
|
|
1861
|
-
if model_specific_tokens:
|
|
1862
|
-
# If extra_special_tokens is already a list, we need to preserve it
|
|
1863
|
-
if "extra_special_tokens" in init_kwargs and isinstance(
|
|
1864
|
-
init_kwargs["extra_special_tokens"], (list, tuple)
|
|
1865
|
-
):
|
|
1866
|
-
# Keep the list as is, but also add model-specific tokens as a separate dict
|
|
1867
|
-
# Convert to model_specific_special_tokens so __init__ handles it
|
|
1868
|
-
init_kwargs["model_specific_special_tokens"] = model_specific_tokens
|
|
1869
|
-
else:
|
|
1870
|
-
init_kwargs["extra_special_tokens"] = model_specific_tokens
|
|
1871
|
-
elif isinstance(init_kwargs.get("extra_special_tokens"), dict):
|
|
1872
|
-
# If extra_special_tokens is already a dict, convert it to model_specific_special_tokens
|
|
1873
|
-
# so __init__ handles it properly
|
|
1874
|
-
init_kwargs["model_specific_special_tokens"] = init_kwargs.pop("extra_special_tokens")
|
|
1787
|
+
# V5: Convert deprecated additional_special_tokens to extra_special_tokens
|
|
1788
|
+
if "additional_special_tokens" in init_kwargs:
|
|
1789
|
+
init_kwargs.setdefault("extra_special_tokens", init_kwargs.pop("additional_special_tokens"))
|
|
1790
|
+
|
|
1791
|
+
# V5: Collect model-specific tokens (custom *_token keys not in standard attributes)
|
|
1792
|
+
default_attrs = set(cls.SPECIAL_TOKENS_ATTRIBUTES)
|
|
1793
|
+
model_specific_tokens = {
|
|
1794
|
+
key: init_kwargs.pop(key)
|
|
1795
|
+
for key in list(init_kwargs.keys())
|
|
1796
|
+
if key not in default_attrs and key.endswith("_token") and isinstance(init_kwargs[key], (str, AddedToken))
|
|
1797
|
+
}
|
|
1798
|
+
# If extra_special_tokens is a dict, merge it into model_specific_tokens
|
|
1799
|
+
if isinstance(init_kwargs.get("extra_special_tokens"), dict):
|
|
1800
|
+
model_specific_tokens.update(init_kwargs.pop("extra_special_tokens"))
|
|
1801
|
+
if model_specific_tokens:
|
|
1802
|
+
init_kwargs["model_specific_special_tokens"] = model_specific_tokens
|
|
1875
1803
|
|
|
1876
1804
|
# Merge resolved_vocab_files arguments in init_kwargs.
|
|
1877
1805
|
added_tokens_file = resolved_vocab_files.pop("added_tokens_file", None)
|
|
@@ -1900,82 +1828,45 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1900
1828
|
f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instance"
|
|
1901
1829
|
)
|
|
1902
1830
|
else:
|
|
1903
|
-
#
|
|
1831
|
+
# Legacy: read special_tokens_map.json and merge into init_kwargs
|
|
1904
1832
|
if special_tokens_map_file is not None:
|
|
1905
|
-
with open(special_tokens_map_file, encoding="utf-8") as
|
|
1906
|
-
special_tokens_map = json.load(
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
# Dict format for model-specific tokens - keep as is
|
|
1927
|
-
init_kwargs[key] = value
|
|
1928
|
-
continue
|
|
1929
|
-
elif isinstance(value, list):
|
|
1930
|
-
# List format - merge with existing if present
|
|
1931
|
-
existing = init_kwargs.pop("extra_special_tokens", []) or []
|
|
1932
|
-
if not isinstance(existing, (list, tuple)):
|
|
1933
|
-
existing = []
|
|
1934
|
-
for token in value:
|
|
1935
|
-
if isinstance(token, dict):
|
|
1936
|
-
token = AddedToken(**token, special=True)
|
|
1937
|
-
if token not in existing:
|
|
1938
|
-
existing.append(token)
|
|
1939
|
-
init_kwargs[key] = existing
|
|
1940
|
-
continue
|
|
1941
|
-
init_kwargs[key] = value
|
|
1942
|
-
|
|
1943
|
-
# Restore extra_special_tokens from tokenizer_config.json if not in special_tokens_map.json
|
|
1944
|
-
if (
|
|
1945
|
-
"extra_special_tokens" not in special_tokens_map
|
|
1946
|
-
and extra_special_tokens_before_map is not None
|
|
1947
|
-
):
|
|
1948
|
-
if "extra_special_tokens" not in init_kwargs or not isinstance(
|
|
1949
|
-
init_kwargs.get("extra_special_tokens"), (list, tuple)
|
|
1950
|
-
):
|
|
1951
|
-
init_kwargs["extra_special_tokens"] = extra_special_tokens_before_map
|
|
1952
|
-
|
|
1953
|
-
# Convert extra_special_tokens dict to model_specific_special_tokens if it's a dict
|
|
1954
|
-
if isinstance(init_kwargs.get("extra_special_tokens"), dict):
|
|
1955
|
-
init_kwargs["model_specific_special_tokens"] = init_kwargs.pop("extra_special_tokens")
|
|
1833
|
+
with open(special_tokens_map_file, encoding="utf-8") as f:
|
|
1834
|
+
special_tokens_map = json.load(f)
|
|
1835
|
+
for key, value in special_tokens_map.items():
|
|
1836
|
+
if key in kwargs and kwargs[key]:
|
|
1837
|
+
continue # User-provided kwargs take precedence
|
|
1838
|
+
if isinstance(value, dict) and key != "extra_special_tokens":
|
|
1839
|
+
value = AddedToken(**value, special=True)
|
|
1840
|
+
elif key == "extra_special_tokens" and isinstance(value, list):
|
|
1841
|
+
# Merge list tokens, converting dicts to AddedToken
|
|
1842
|
+
existing = list(init_kwargs.get("extra_special_tokens") or [])
|
|
1843
|
+
for tok in value:
|
|
1844
|
+
tok = AddedToken(**tok, special=True) if isinstance(tok, dict) else tok
|
|
1845
|
+
if tok not in existing:
|
|
1846
|
+
existing.append(tok)
|
|
1847
|
+
value = existing
|
|
1848
|
+
init_kwargs[key] = value
|
|
1849
|
+
# Convert dict extra_special_tokens to model_specific_special_tokens
|
|
1850
|
+
if isinstance(init_kwargs.get("extra_special_tokens"), dict):
|
|
1851
|
+
init_kwargs.setdefault("model_specific_special_tokens", {}).update(
|
|
1852
|
+
init_kwargs.pop("extra_special_tokens")
|
|
1853
|
+
)
|
|
1956
1854
|
|
|
1957
1855
|
# slow -> slow|fast, legacy: convert the `"added_tokens.json"` file to `added_tokens_decoder`.
|
|
1958
1856
|
# this is for legacy purpose. We don't add the tokens after init for efficiency.
|
|
1959
1857
|
if added_tokens_file is not None:
|
|
1960
|
-
special_tokens = []
|
|
1961
1858
|
# V5: Check both named and extra special tokens
|
|
1962
|
-
for
|
|
1963
|
-
|
|
1964
|
-
special_tokens.append(str(init_kwargs[key]))
|
|
1965
|
-
|
|
1966
|
-
# Handle extra_special_tokens
|
|
1967
|
-
if "extra_special_tokens" in init_kwargs and init_kwargs["extra_special_tokens"] is not None:
|
|
1968
|
-
special_tokens += [str(token) for token in init_kwargs["extra_special_tokens"]]
|
|
1859
|
+
special_tokens = {str(init_kwargs[k]) for k in cls.SPECIAL_TOKENS_ATTRIBUTES if init_kwargs.get(k)}
|
|
1860
|
+
special_tokens.update(str(t) for t in (init_kwargs.get("extra_special_tokens") or []))
|
|
1969
1861
|
|
|
1970
|
-
with open(added_tokens_file, encoding="utf-8") as
|
|
1971
|
-
added_tok_encoder = json.load(
|
|
1862
|
+
with open(added_tokens_file, encoding="utf-8") as f:
|
|
1863
|
+
added_tok_encoder = json.load(f)
|
|
1972
1864
|
for str_token, index in added_tok_encoder.items():
|
|
1973
|
-
|
|
1974
|
-
special = str_token in special_tokens
|
|
1865
|
+
is_special = str_token in special_tokens
|
|
1975
1866
|
added_tokens_decoder[index] = AddedToken(
|
|
1976
|
-
str_token, rstrip=False, lstrip=False, normalized=not
|
|
1867
|
+
str_token, rstrip=False, lstrip=False, normalized=not is_special, special=is_special
|
|
1977
1868
|
)
|
|
1978
|
-
added_tokens_map[
|
|
1869
|
+
added_tokens_map[str_token] = added_tokens_decoder[index]
|
|
1979
1870
|
|
|
1980
1871
|
# allows converting a fast -> slow: add the `tokenizer.json`'s `"added_tokens"` to the slow tokenizer
|
|
1981
1872
|
# if `tokenizer_config.json` is `None`
|
|
@@ -2032,7 +1923,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2032
1923
|
return kwargs
|
|
2033
1924
|
|
|
2034
1925
|
@classmethod
|
|
2035
|
-
def convert_added_tokens(cls, obj:
|
|
1926
|
+
def convert_added_tokens(cls, obj: AddedToken | Any, save=False, add_type_field=True):
|
|
2036
1927
|
if isinstance(obj, dict) and "__type" in obj and obj["__type"] == "AddedToken":
|
|
2037
1928
|
obj.pop("__type")
|
|
2038
1929
|
return AddedToken(**obj)
|
|
@@ -2052,9 +1943,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2052
1943
|
|
|
2053
1944
|
def save_pretrained(
|
|
2054
1945
|
self,
|
|
2055
|
-
save_directory:
|
|
2056
|
-
legacy_format:
|
|
2057
|
-
filename_prefix:
|
|
1946
|
+
save_directory: str | os.PathLike,
|
|
1947
|
+
legacy_format: bool | None = None,
|
|
1948
|
+
filename_prefix: str | None = None,
|
|
2058
1949
|
push_to_hub: bool = False,
|
|
2059
1950
|
**kwargs,
|
|
2060
1951
|
) -> tuple[str, ...]:
|
|
@@ -2210,10 +2101,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2210
2101
|
|
|
2211
2102
|
def _save_pretrained(
|
|
2212
2103
|
self,
|
|
2213
|
-
save_directory:
|
|
2104
|
+
save_directory: str | os.PathLike,
|
|
2214
2105
|
file_names: tuple[str, ...],
|
|
2215
|
-
legacy_format:
|
|
2216
|
-
filename_prefix:
|
|
2106
|
+
legacy_format: bool | None = None,
|
|
2107
|
+
filename_prefix: str | None = None,
|
|
2217
2108
|
) -> tuple[str, ...]:
|
|
2218
2109
|
"""
|
|
2219
2110
|
Save a tokenizer using the slow-tokenizer/legacy format: vocabulary + added tokens.
|
|
@@ -2243,7 +2134,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2243
2134
|
|
|
2244
2135
|
return file_names + vocab_files + (added_tokens_file,)
|
|
2245
2136
|
|
|
2246
|
-
def save_vocabulary(self, save_directory: str, filename_prefix:
|
|
2137
|
+
def save_vocabulary(self, save_directory: str, filename_prefix: str | None = None) -> tuple[str, ...]:
|
|
2247
2138
|
"""
|
|
2248
2139
|
Save only the vocabulary of the tokenizer (vocabulary + added tokens).
|
|
2249
2140
|
|
|
@@ -2261,7 +2152,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2261
2152
|
"""
|
|
2262
2153
|
raise NotImplementedError
|
|
2263
2154
|
|
|
2264
|
-
def tokenize(self, text: str, pair:
|
|
2155
|
+
def tokenize(self, text: str, pair: str | None = None, add_special_tokens: bool = False, **kwargs) -> list[str]:
|
|
2265
2156
|
"""
|
|
2266
2157
|
Converts a string into a sequence of tokens, replacing unknown tokens with the `unk_token`.
|
|
2267
2158
|
|
|
@@ -2293,15 +2184,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2293
2184
|
)
|
|
2294
2185
|
def encode(
|
|
2295
2186
|
self,
|
|
2296
|
-
text:
|
|
2297
|
-
text_pair:
|
|
2187
|
+
text: TextInput | PreTokenizedInput | EncodedInput,
|
|
2188
|
+
text_pair: TextInput | PreTokenizedInput | EncodedInput | None = None,
|
|
2298
2189
|
add_special_tokens: bool = True,
|
|
2299
|
-
padding:
|
|
2300
|
-
truncation:
|
|
2301
|
-
max_length:
|
|
2190
|
+
padding: bool | str | PaddingStrategy = False,
|
|
2191
|
+
truncation: bool | str | TruncationStrategy | None = None,
|
|
2192
|
+
max_length: int | None = None,
|
|
2302
2193
|
stride: int = 0,
|
|
2303
|
-
padding_side:
|
|
2304
|
-
return_tensors:
|
|
2194
|
+
padding_side: str | None = None,
|
|
2195
|
+
return_tensors: str | TensorType | None = None,
|
|
2305
2196
|
**kwargs,
|
|
2306
2197
|
) -> list[int]:
|
|
2307
2198
|
"""
|
|
@@ -2319,15 +2210,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2319
2210
|
the `tokenize` method) or a list of integers (tokenized string ids using the `convert_tokens_to_ids`
|
|
2320
2211
|
method).
|
|
2321
2212
|
"""
|
|
2322
|
-
padding_strategy, truncation_strategy, max_length,
|
|
2213
|
+
padding_strategy, truncation_strategy, max_length, kwargs_updated = self._get_padding_truncation_strategies(
|
|
2323
2214
|
padding=padding,
|
|
2324
2215
|
truncation=truncation,
|
|
2325
2216
|
max_length=max_length,
|
|
2326
|
-
pad_to_multiple_of=kwargs.get("pad_to_multiple_of"),
|
|
2327
|
-
verbose=kwargs.get("verbose", True),
|
|
2328
2217
|
**kwargs,
|
|
2329
2218
|
)
|
|
2330
2219
|
|
|
2220
|
+
kwargs.update(kwargs_updated)
|
|
2221
|
+
|
|
2331
2222
|
encoded_inputs = self._encode_plus(
|
|
2332
2223
|
text,
|
|
2333
2224
|
text_pair=text_pair,
|
|
@@ -2470,29 +2361,27 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2470
2361
|
@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
|
|
2471
2362
|
def __call__(
|
|
2472
2363
|
self,
|
|
2473
|
-
text:
|
|
2474
|
-
text_pair:
|
|
2475
|
-
text_target:
|
|
2476
|
-
text_pair_target:
|
|
2477
|
-
Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]
|
|
2478
|
-
] = None,
|
|
2364
|
+
text: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
|
|
2365
|
+
text_pair: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
|
|
2366
|
+
text_target: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
|
|
2367
|
+
text_pair_target: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
|
|
2479
2368
|
add_special_tokens: bool = True,
|
|
2480
|
-
padding:
|
|
2481
|
-
truncation:
|
|
2482
|
-
max_length:
|
|
2369
|
+
padding: bool | str | PaddingStrategy = False,
|
|
2370
|
+
truncation: bool | str | TruncationStrategy | None = None,
|
|
2371
|
+
max_length: int | None = None,
|
|
2483
2372
|
stride: int = 0,
|
|
2484
2373
|
is_split_into_words: bool = False,
|
|
2485
|
-
pad_to_multiple_of:
|
|
2486
|
-
padding_side:
|
|
2487
|
-
return_tensors:
|
|
2488
|
-
return_token_type_ids:
|
|
2489
|
-
return_attention_mask:
|
|
2374
|
+
pad_to_multiple_of: int | None = None,
|
|
2375
|
+
padding_side: str | None = None,
|
|
2376
|
+
return_tensors: str | TensorType | None = None,
|
|
2377
|
+
return_token_type_ids: bool | None = None,
|
|
2378
|
+
return_attention_mask: bool | None = None,
|
|
2490
2379
|
return_overflowing_tokens: bool = False,
|
|
2491
2380
|
return_special_tokens_mask: bool = False,
|
|
2492
2381
|
return_offsets_mapping: bool = False,
|
|
2493
2382
|
return_length: bool = False,
|
|
2494
2383
|
verbose: bool = True,
|
|
2495
|
-
tokenizer_kwargs:
|
|
2384
|
+
tokenizer_kwargs: dict[str, Any] | None = None,
|
|
2496
2385
|
**kwargs,
|
|
2497
2386
|
) -> BatchEncoding:
|
|
2498
2387
|
"""
|
|
@@ -2597,19 +2486,19 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2597
2486
|
|
|
2598
2487
|
def _encode_plus(
|
|
2599
2488
|
self,
|
|
2600
|
-
text:
|
|
2601
|
-
text_pair:
|
|
2489
|
+
text: TextInput | PreTokenizedInput | EncodedInput,
|
|
2490
|
+
text_pair: TextInput | PreTokenizedInput | EncodedInput | None = None,
|
|
2602
2491
|
add_special_tokens: bool = True,
|
|
2603
2492
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
|
2604
2493
|
truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
|
|
2605
|
-
max_length:
|
|
2494
|
+
max_length: int | None = None,
|
|
2606
2495
|
stride: int = 0,
|
|
2607
2496
|
is_split_into_words: bool = False,
|
|
2608
|
-
pad_to_multiple_of:
|
|
2609
|
-
padding_side:
|
|
2610
|
-
return_tensors:
|
|
2611
|
-
return_token_type_ids:
|
|
2612
|
-
return_attention_mask:
|
|
2497
|
+
pad_to_multiple_of: int | None = None,
|
|
2498
|
+
padding_side: str | None = None,
|
|
2499
|
+
return_tensors: str | TensorType | None = None,
|
|
2500
|
+
return_token_type_ids: bool | None = None,
|
|
2501
|
+
return_attention_mask: bool | None = None,
|
|
2613
2502
|
return_overflowing_tokens: bool = False,
|
|
2614
2503
|
return_special_tokens_mask: bool = False,
|
|
2615
2504
|
return_offsets_mapping: bool = False,
|
|
@@ -2622,19 +2511,17 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2622
2511
|
|
|
2623
2512
|
def pad(
|
|
2624
2513
|
self,
|
|
2625
|
-
encoded_inputs:
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
|
|
2632
|
-
|
|
2633
|
-
|
|
2634
|
-
|
|
2635
|
-
|
|
2636
|
-
return_attention_mask: Optional[bool] = None,
|
|
2637
|
-
return_tensors: Optional[Union[str, TensorType]] = None,
|
|
2514
|
+
encoded_inputs: BatchEncoding
|
|
2515
|
+
| list[BatchEncoding]
|
|
2516
|
+
| dict[str, EncodedInput]
|
|
2517
|
+
| dict[str, list[EncodedInput]]
|
|
2518
|
+
| list[dict[str, EncodedInput]],
|
|
2519
|
+
padding: bool | str | PaddingStrategy = True,
|
|
2520
|
+
max_length: int | None = None,
|
|
2521
|
+
pad_to_multiple_of: int | None = None,
|
|
2522
|
+
padding_side: str | None = None,
|
|
2523
|
+
return_attention_mask: bool | None = None,
|
|
2524
|
+
return_tensors: str | TensorType | None = None,
|
|
2638
2525
|
verbose: bool = True,
|
|
2639
2526
|
) -> BatchEncoding:
|
|
2640
2527
|
"""
|
|
@@ -2795,12 +2682,12 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2795
2682
|
|
|
2796
2683
|
def _pad(
|
|
2797
2684
|
self,
|
|
2798
|
-
encoded_inputs:
|
|
2799
|
-
max_length:
|
|
2685
|
+
encoded_inputs: dict[str, EncodedInput] | BatchEncoding,
|
|
2686
|
+
max_length: int | None = None,
|
|
2800
2687
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
|
2801
|
-
pad_to_multiple_of:
|
|
2802
|
-
padding_side:
|
|
2803
|
-
return_attention_mask:
|
|
2688
|
+
pad_to_multiple_of: int | None = None,
|
|
2689
|
+
padding_side: str | None = None,
|
|
2690
|
+
return_attention_mask: bool | None = None,
|
|
2804
2691
|
) -> dict:
|
|
2805
2692
|
"""
|
|
2806
2693
|
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
|
@@ -2890,10 +2777,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2890
2777
|
|
|
2891
2778
|
def decode(
|
|
2892
2779
|
self,
|
|
2893
|
-
token_ids:
|
|
2780
|
+
token_ids: int | list[int] | list[list[int]] | np.ndarray | torch.Tensor,
|
|
2894
2781
|
skip_special_tokens: bool = False,
|
|
2895
2782
|
**kwargs,
|
|
2896
|
-
) ->
|
|
2783
|
+
) -> str | list[str]:
|
|
2897
2784
|
"""
|
|
2898
2785
|
Converts a sequence of ids into a string, or a list of sequences into a list of strings,
|
|
2899
2786
|
using the tokenizer and vocabulary with options to remove special tokens and clean up
|
|
@@ -2938,9 +2825,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2938
2825
|
|
|
2939
2826
|
def batch_decode(
|
|
2940
2827
|
self,
|
|
2941
|
-
sequences:
|
|
2828
|
+
sequences: list[int] | list[list[int]] | np.ndarray | torch.Tensor,
|
|
2942
2829
|
skip_special_tokens: bool = False,
|
|
2943
|
-
clean_up_tokenization_spaces:
|
|
2830
|
+
clean_up_tokenization_spaces: bool | None = None,
|
|
2944
2831
|
**kwargs,
|
|
2945
2832
|
) -> list[str]:
|
|
2946
2833
|
"""
|
|
@@ -2977,14 +2864,14 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2977
2864
|
|
|
2978
2865
|
def _decode(
|
|
2979
2866
|
self,
|
|
2980
|
-
token_ids:
|
|
2867
|
+
token_ids: int | list[int],
|
|
2981
2868
|
skip_special_tokens: bool = False,
|
|
2982
|
-
clean_up_tokenization_spaces:
|
|
2869
|
+
clean_up_tokenization_spaces: bool | None = None,
|
|
2983
2870
|
**kwargs,
|
|
2984
2871
|
) -> str:
|
|
2985
2872
|
raise NotImplementedError
|
|
2986
2873
|
|
|
2987
|
-
def _eventual_warn_about_too_long_sequence(self, ids: list[int], max_length:
|
|
2874
|
+
def _eventual_warn_about_too_long_sequence(self, ids: list[int], max_length: int | None, verbose: bool):
|
|
2988
2875
|
"""
|
|
2989
2876
|
Depending on the input and internal state we might trigger a warning about a sequence that is too long for its
|
|
2990
2877
|
corresponding model
|
|
@@ -3026,22 +2913,22 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3026
2913
|
|
|
3027
2914
|
def apply_chat_template(
|
|
3028
2915
|
self,
|
|
3029
|
-
conversation:
|
|
3030
|
-
tools:
|
|
3031
|
-
documents:
|
|
3032
|
-
chat_template:
|
|
2916
|
+
conversation: list[dict[str, str]] | list[list[dict[str, str]]],
|
|
2917
|
+
tools: list[dict | Callable] | None = None,
|
|
2918
|
+
documents: list[dict[str, str]] | None = None,
|
|
2919
|
+
chat_template: str | None = None,
|
|
3033
2920
|
add_generation_prompt: bool = False,
|
|
3034
2921
|
continue_final_message: bool = False,
|
|
3035
2922
|
tokenize: bool = True,
|
|
3036
|
-
padding:
|
|
2923
|
+
padding: bool | str | PaddingStrategy = False,
|
|
3037
2924
|
truncation: bool = False,
|
|
3038
|
-
max_length:
|
|
3039
|
-
return_tensors:
|
|
2925
|
+
max_length: int | None = None,
|
|
2926
|
+
return_tensors: str | TensorType | None = None,
|
|
3040
2927
|
return_dict: bool = True,
|
|
3041
2928
|
return_assistant_tokens_mask: bool = False,
|
|
3042
|
-
tokenizer_kwargs:
|
|
2929
|
+
tokenizer_kwargs: dict[str, Any] | None = None,
|
|
3043
2930
|
**kwargs,
|
|
3044
|
-
) ->
|
|
2931
|
+
) -> str | list[int] | list[str] | list[list[int]] | BatchEncoding:
|
|
3045
2932
|
"""
|
|
3046
2933
|
Converts a list of dictionaries with `"role"` and `"content"` keys to a list of token
|
|
3047
2934
|
ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
|
|
@@ -3095,7 +2982,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3095
2982
|
values are:
|
|
3096
2983
|
- `'pt'`: Return PyTorch `torch.Tensor` objects.
|
|
3097
2984
|
- `'np'`: Return NumPy `np.ndarray` objects.
|
|
3098
|
-
return_dict (`bool`, defaults to `
|
|
2985
|
+
return_dict (`bool`, defaults to `True`):
|
|
3099
2986
|
Whether to return a dictionary with named outputs. Has no effect if tokenize is `False`.
|
|
3100
2987
|
tokenizer_kwargs (`dict[str: Any]`, *optional*): Additional kwargs to pass to the tokenizer.
|
|
3101
2988
|
return_assistant_tokens_mask (`bool`, defaults to `False`):
|
|
@@ -3199,7 +3086,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3199
3086
|
def encode_message_with_chat_template(
|
|
3200
3087
|
self,
|
|
3201
3088
|
message: dict[str, str],
|
|
3202
|
-
conversation_history:
|
|
3089
|
+
conversation_history: list[dict[str, str]] | None = None,
|
|
3203
3090
|
**kwargs,
|
|
3204
3091
|
) -> list[int]:
|
|
3205
3092
|
"""
|
|
@@ -3256,7 +3143,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3256
3143
|
return tokens[i:]
|
|
3257
3144
|
return tokens[min_len:]
|
|
3258
3145
|
|
|
3259
|
-
def get_chat_template(self, chat_template:
|
|
3146
|
+
def get_chat_template(self, chat_template: str | None = None, tools: list[dict] | None = None) -> str:
|
|
3260
3147
|
"""
|
|
3261
3148
|
Retrieve the chat template string used for tokenizing chat messages. This template is used
|
|
3262
3149
|
internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
|
|
@@ -3312,9 +3199,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3312
3199
|
|
|
3313
3200
|
def save_chat_templates(
|
|
3314
3201
|
self,
|
|
3315
|
-
save_directory:
|
|
3202
|
+
save_directory: str | os.PathLike,
|
|
3316
3203
|
tokenizer_config: dict,
|
|
3317
|
-
filename_prefix:
|
|
3204
|
+
filename_prefix: str | None,
|
|
3318
3205
|
save_jinja_files: bool,
|
|
3319
3206
|
):
|
|
3320
3207
|
"""
|
|
@@ -3461,7 +3348,8 @@ def find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs):
|
|
|
3461
3348
|
):
|
|
3462
3349
|
return candidate
|
|
3463
3350
|
except Exception:
|
|
3464
|
-
|
|
3351
|
+
# TODO: tighten to OSError / ProxyError
|
|
3352
|
+
continue
|
|
3465
3353
|
|
|
3466
3354
|
subfolder = kwargs.get("subfolder", "")
|
|
3467
3355
|
local_files_only = kwargs.get("local_files_only", False)
|
|
@@ -3491,8 +3379,9 @@ def find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs):
|
|
|
3491
3379
|
for entry in entries:
|
|
3492
3380
|
if entry.path.endswith(".model"):
|
|
3493
3381
|
return entry.path if not subfolder else entry.path.removeprefix(f"{subfolder}/")
|
|
3494
|
-
except Exception:
|
|
3495
|
-
|
|
3382
|
+
except Exception as e:
|
|
3383
|
+
# TODO: tighten exception class
|
|
3384
|
+
logger.debug(f"Could not list Hub repository files: {e}")
|
|
3496
3385
|
|
|
3497
3386
|
return None
|
|
3498
3387
|
|
|
@@ -3613,9 +3502,7 @@ def _get_prepend_scheme(add_prefix_space: bool, original_tokenizer) -> str:
|
|
|
3613
3502
|
return prepend_scheme
|
|
3614
3503
|
|
|
3615
3504
|
|
|
3616
|
-
def generate_merges(
|
|
3617
|
-
vocab, vocab_scores: Optional[dict[str, float]] = None, skip_tokens: Optional[Collection[str]] = None
|
|
3618
|
-
):
|
|
3505
|
+
def generate_merges(vocab, vocab_scores: dict[str, float] | None = None, skip_tokens: Collection[str] | None = None):
|
|
3619
3506
|
skip_tokens = set(skip_tokens) if skip_tokens is not None else set()
|
|
3620
3507
|
reverse = vocab_scores is not None
|
|
3621
3508
|
vocab_scores = dict(vocab_scores) if reverse else vocab
|