transformers 5.0.0__py3-none-any.whl → 5.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +36 -55
- transformers/activations.py +1 -1
- transformers/audio_utils.py +33 -32
- transformers/cache_utils.py +139 -32
- transformers/cli/chat.py +3 -3
- transformers/cli/serve.py +19 -49
- transformers/cli/transformers.py +1 -2
- transformers/configuration_utils.py +155 -129
- transformers/conversion_mapping.py +22 -158
- transformers/convert_slow_tokenizer.py +17 -227
- transformers/core_model_loading.py +185 -528
- transformers/data/data_collator.py +4 -12
- transformers/data/processors/glue.py +1 -0
- transformers/data/processors/utils.py +1 -0
- transformers/data/processors/xnli.py +1 -0
- transformers/dependency_versions_check.py +1 -0
- transformers/dependency_versions_table.py +7 -5
- transformers/distributed/configuration_utils.py +2 -1
- transformers/dynamic_module_utils.py +25 -24
- transformers/feature_extraction_sequence_utils.py +23 -19
- transformers/feature_extraction_utils.py +33 -64
- transformers/file_utils.py +1 -0
- transformers/generation/__init__.py +1 -11
- transformers/generation/candidate_generator.py +33 -80
- transformers/generation/configuration_utils.py +133 -189
- transformers/generation/continuous_batching/__init__.py +1 -4
- transformers/generation/continuous_batching/cache.py +25 -83
- transformers/generation/continuous_batching/cache_manager.py +45 -155
- transformers/generation/continuous_batching/continuous_api.py +147 -270
- transformers/generation/continuous_batching/requests.py +3 -51
- transformers/generation/continuous_batching/scheduler.py +105 -160
- transformers/generation/logits_process.py +128 -0
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/streamers.py +1 -0
- transformers/generation/utils.py +123 -122
- transformers/generation/watermarking.py +6 -8
- transformers/hf_argparser.py +13 -9
- transformers/hyperparameter_search.py +2 -1
- transformers/image_processing_base.py +23 -12
- transformers/image_processing_utils.py +15 -11
- transformers/image_processing_utils_fast.py +75 -85
- transformers/image_transforms.py +42 -73
- transformers/image_utils.py +32 -30
- transformers/initialization.py +0 -37
- transformers/integrations/__init__.py +2 -16
- transformers/integrations/accelerate.py +113 -58
- transformers/integrations/aqlm.py +66 -36
- transformers/integrations/awq.py +516 -45
- transformers/integrations/bitnet.py +105 -47
- transformers/integrations/bitsandbytes.py +202 -91
- transformers/integrations/deepspeed.py +4 -161
- transformers/integrations/eetq.py +82 -84
- transformers/integrations/executorch.py +1 -1
- transformers/integrations/fbgemm_fp8.py +145 -190
- transformers/integrations/finegrained_fp8.py +215 -249
- transformers/integrations/flash_attention.py +3 -3
- transformers/integrations/flex_attention.py +1 -1
- transformers/integrations/fp_quant.py +0 -90
- transformers/integrations/ggml.py +2 -11
- transformers/integrations/higgs.py +62 -37
- transformers/integrations/hub_kernels.py +8 -65
- transformers/integrations/integration_utils.py +3 -47
- transformers/integrations/mistral.py +0 -12
- transformers/integrations/mxfp4.py +80 -33
- transformers/integrations/peft.py +191 -483
- transformers/integrations/quanto.py +56 -77
- transformers/integrations/spqr.py +90 -42
- transformers/integrations/tensor_parallel.py +221 -167
- transformers/integrations/torchao.py +43 -35
- transformers/integrations/vptq.py +59 -40
- transformers/kernels/__init__.py +0 -0
- transformers/{models/pe_audio_video/processing_pe_audio_video.py → kernels/falcon_mamba/__init__.py} +3 -12
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +529 -0
- transformers/loss/loss_utils.py +0 -2
- transformers/masking_utils.py +55 -51
- transformers/model_debugging_utils.py +5 -4
- transformers/modelcard.py +194 -15
- transformers/modeling_attn_mask_utils.py +19 -19
- transformers/modeling_flash_attention_utils.py +27 -27
- transformers/modeling_gguf_pytorch_utils.py +24 -79
- transformers/modeling_layers.py +22 -21
- transformers/modeling_outputs.py +253 -242
- transformers/modeling_rope_utils.py +117 -138
- transformers/modeling_utils.py +739 -850
- transformers/models/__init__.py +0 -27
- transformers/models/afmoe/configuration_afmoe.py +33 -40
- transformers/models/afmoe/modeling_afmoe.py +54 -42
- transformers/models/afmoe/modular_afmoe.py +33 -23
- transformers/models/aimv2/configuration_aimv2.py +10 -2
- transformers/models/aimv2/modeling_aimv2.py +42 -47
- transformers/models/aimv2/modular_aimv2.py +19 -17
- transformers/models/albert/configuration_albert.py +2 -8
- transformers/models/albert/modeling_albert.py +69 -70
- transformers/models/albert/tokenization_albert.py +14 -5
- transformers/models/align/configuration_align.py +6 -8
- transformers/models/align/modeling_align.py +89 -94
- transformers/models/align/processing_align.py +30 -2
- transformers/models/altclip/configuration_altclip.py +7 -4
- transformers/models/altclip/modeling_altclip.py +103 -114
- transformers/models/altclip/processing_altclip.py +15 -2
- transformers/models/apertus/__init__.py +1 -0
- transformers/models/apertus/configuration_apertus.py +28 -23
- transformers/models/apertus/modeling_apertus.py +40 -39
- transformers/models/apertus/modular_apertus.py +38 -37
- transformers/models/arcee/configuration_arcee.py +30 -25
- transformers/models/arcee/modeling_arcee.py +39 -36
- transformers/models/arcee/modular_arcee.py +23 -20
- transformers/models/aria/configuration_aria.py +44 -31
- transformers/models/aria/image_processing_aria.py +27 -25
- transformers/models/aria/modeling_aria.py +106 -110
- transformers/models/aria/modular_aria.py +127 -118
- transformers/models/aria/processing_aria.py +35 -28
- transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +1 -0
- transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py +6 -3
- transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +8 -6
- transformers/models/audioflamingo3/__init__.py +1 -0
- transformers/models/audioflamingo3/configuration_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +49 -58
- transformers/models/audioflamingo3/modular_audioflamingo3.py +43 -53
- transformers/models/audioflamingo3/processing_audioflamingo3.py +30 -33
- transformers/models/auto/auto_factory.py +7 -6
- transformers/models/auto/configuration_auto.py +5 -66
- transformers/models/auto/feature_extraction_auto.py +10 -14
- transformers/models/auto/image_processing_auto.py +41 -32
- transformers/models/auto/modeling_auto.py +188 -46
- transformers/models/auto/processing_auto.py +11 -24
- transformers/models/auto/tokenization_auto.py +588 -171
- transformers/models/auto/video_processing_auto.py +10 -12
- transformers/models/autoformer/configuration_autoformer.py +7 -4
- transformers/models/autoformer/modeling_autoformer.py +101 -104
- transformers/models/aya_vision/configuration_aya_vision.py +1 -4
- transformers/models/aya_vision/modeling_aya_vision.py +102 -71
- transformers/models/aya_vision/modular_aya_vision.py +74 -46
- transformers/models/aya_vision/processing_aya_vision.py +53 -25
- transformers/models/bamba/configuration_bamba.py +39 -34
- transformers/models/bamba/modeling_bamba.py +86 -82
- transformers/models/bamba/modular_bamba.py +72 -70
- transformers/models/bark/configuration_bark.py +8 -6
- transformers/models/bark/generation_configuration_bark.py +5 -3
- transformers/models/bark/modeling_bark.py +57 -54
- transformers/models/bark/processing_bark.py +41 -19
- transformers/models/bart/configuration_bart.py +6 -9
- transformers/models/bart/modeling_bart.py +126 -135
- transformers/models/barthez/tokenization_barthez.py +11 -3
- transformers/models/bartpho/tokenization_bartpho.py +7 -6
- transformers/models/beit/configuration_beit.py +11 -0
- transformers/models/beit/image_processing_beit.py +56 -53
- transformers/models/beit/image_processing_beit_fast.py +12 -10
- transformers/models/beit/modeling_beit.py +60 -69
- transformers/models/bert/configuration_bert.py +2 -12
- transformers/models/bert/modeling_bert.py +122 -114
- transformers/models/bert/tokenization_bert.py +23 -8
- transformers/models/bert/tokenization_bert_legacy.py +5 -3
- transformers/models/bert_generation/configuration_bert_generation.py +2 -17
- transformers/models/bert_generation/modeling_bert_generation.py +49 -49
- transformers/models/bert_generation/tokenization_bert_generation.py +3 -2
- transformers/models/bert_japanese/tokenization_bert_japanese.py +6 -5
- transformers/models/bertweet/tokenization_bertweet.py +3 -1
- transformers/models/big_bird/configuration_big_bird.py +9 -12
- transformers/models/big_bird/modeling_big_bird.py +109 -116
- transformers/models/big_bird/tokenization_big_bird.py +43 -16
- transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +9 -9
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +117 -130
- transformers/models/biogpt/configuration_biogpt.py +2 -8
- transformers/models/biogpt/modeling_biogpt.py +76 -72
- transformers/models/biogpt/modular_biogpt.py +66 -62
- transformers/models/biogpt/tokenization_biogpt.py +5 -3
- transformers/models/bit/configuration_bit.py +1 -0
- transformers/models/bit/image_processing_bit.py +24 -21
- transformers/models/bit/image_processing_bit_fast.py +1 -0
- transformers/models/bit/modeling_bit.py +12 -25
- transformers/models/bitnet/configuration_bitnet.py +28 -23
- transformers/models/bitnet/modeling_bitnet.py +39 -36
- transformers/models/bitnet/modular_bitnet.py +6 -4
- transformers/models/blenderbot/configuration_blenderbot.py +5 -8
- transformers/models/blenderbot/modeling_blenderbot.py +96 -77
- transformers/models/blenderbot/tokenization_blenderbot.py +24 -18
- transformers/models/blenderbot_small/configuration_blenderbot_small.py +5 -8
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +69 -79
- transformers/models/blenderbot_small/tokenization_blenderbot_small.py +3 -1
- transformers/models/blip/configuration_blip.py +10 -9
- transformers/models/blip/image_processing_blip.py +20 -17
- transformers/models/blip/image_processing_blip_fast.py +1 -0
- transformers/models/blip/modeling_blip.py +108 -117
- transformers/models/blip/modeling_blip_text.py +65 -73
- transformers/models/blip/processing_blip.py +36 -5
- transformers/models/blip_2/configuration_blip_2.py +2 -2
- transformers/models/blip_2/modeling_blip_2.py +118 -146
- transformers/models/blip_2/processing_blip_2.py +38 -8
- transformers/models/bloom/configuration_bloom.py +2 -5
- transformers/models/bloom/modeling_bloom.py +104 -77
- transformers/models/blt/configuration_blt.py +86 -94
- transformers/models/blt/modeling_blt.py +81 -238
- transformers/models/blt/modular_blt.py +65 -228
- transformers/models/bridgetower/configuration_bridgetower.py +2 -7
- transformers/models/bridgetower/image_processing_bridgetower.py +35 -34
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +16 -13
- transformers/models/bridgetower/modeling_bridgetower.py +119 -141
- transformers/models/bridgetower/processing_bridgetower.py +16 -2
- transformers/models/bros/configuration_bros.py +18 -24
- transformers/models/bros/modeling_bros.py +80 -90
- transformers/models/bros/processing_bros.py +12 -2
- transformers/models/byt5/tokenization_byt5.py +6 -4
- transformers/models/camembert/configuration_camembert.py +2 -8
- transformers/models/camembert/modeling_camembert.py +195 -196
- transformers/models/camembert/modular_camembert.py +54 -51
- transformers/models/camembert/tokenization_camembert.py +13 -6
- transformers/models/canine/configuration_canine.py +2 -4
- transformers/models/canine/modeling_canine.py +75 -84
- transformers/models/canine/tokenization_canine.py +1 -2
- transformers/models/chameleon/configuration_chameleon.py +34 -29
- transformers/models/chameleon/image_processing_chameleon.py +24 -21
- transformers/models/chameleon/image_processing_chameleon_fast.py +6 -5
- transformers/models/chameleon/modeling_chameleon.py +93 -142
- transformers/models/chameleon/processing_chameleon.py +41 -16
- transformers/models/chinese_clip/configuration_chinese_clip.py +8 -10
- transformers/models/chinese_clip/image_processing_chinese_clip.py +24 -21
- transformers/models/chinese_clip/image_processing_chinese_clip_fast.py +1 -0
- transformers/models/chinese_clip/modeling_chinese_clip.py +92 -96
- transformers/models/chinese_clip/processing_chinese_clip.py +15 -2
- transformers/models/clap/configuration_clap.py +9 -4
- transformers/models/clap/feature_extraction_clap.py +12 -11
- transformers/models/clap/modeling_clap.py +123 -136
- transformers/models/clap/processing_clap.py +15 -2
- transformers/models/clip/configuration_clip.py +2 -4
- transformers/models/clip/image_processing_clip.py +24 -21
- transformers/models/clip/image_processing_clip_fast.py +1 -9
- transformers/models/clip/modeling_clip.py +65 -65
- transformers/models/clip/processing_clip.py +14 -2
- transformers/models/clip/tokenization_clip.py +46 -21
- transformers/models/clipseg/configuration_clipseg.py +2 -4
- transformers/models/clipseg/modeling_clipseg.py +109 -119
- transformers/models/clipseg/processing_clipseg.py +42 -19
- transformers/models/clvp/configuration_clvp.py +5 -15
- transformers/models/clvp/feature_extraction_clvp.py +10 -7
- transformers/models/clvp/modeling_clvp.py +146 -155
- transformers/models/clvp/number_normalizer.py +2 -1
- transformers/models/clvp/processing_clvp.py +20 -3
- transformers/models/clvp/tokenization_clvp.py +64 -1
- transformers/models/code_llama/tokenization_code_llama.py +44 -18
- transformers/models/codegen/configuration_codegen.py +4 -4
- transformers/models/codegen/modeling_codegen.py +53 -63
- transformers/models/codegen/tokenization_codegen.py +47 -17
- transformers/models/cohere/configuration_cohere.py +30 -25
- transformers/models/cohere/modeling_cohere.py +42 -40
- transformers/models/cohere/modular_cohere.py +29 -26
- transformers/models/cohere/tokenization_cohere.py +46 -15
- transformers/models/cohere2/configuration_cohere2.py +32 -31
- transformers/models/cohere2/modeling_cohere2.py +44 -42
- transformers/models/cohere2/modular_cohere2.py +54 -54
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +14 -13
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +58 -59
- transformers/models/cohere2_vision/modular_cohere2_vision.py +46 -45
- transformers/models/cohere2_vision/processing_cohere2_vision.py +36 -6
- transformers/models/colpali/configuration_colpali.py +1 -0
- transformers/models/colpali/modeling_colpali.py +16 -14
- transformers/models/colpali/modular_colpali.py +51 -11
- transformers/models/colpali/processing_colpali.py +52 -14
- transformers/models/colqwen2/modeling_colqwen2.py +28 -28
- transformers/models/colqwen2/modular_colqwen2.py +74 -37
- transformers/models/colqwen2/processing_colqwen2.py +52 -16
- transformers/models/conditional_detr/configuration_conditional_detr.py +2 -1
- transformers/models/conditional_detr/image_processing_conditional_detr.py +70 -67
- transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +36 -36
- transformers/models/conditional_detr/modeling_conditional_detr.py +87 -99
- transformers/models/conditional_detr/modular_conditional_detr.py +3 -49
- transformers/models/convbert/configuration_convbert.py +8 -11
- transformers/models/convbert/modeling_convbert.py +87 -94
- transformers/models/convbert/tokenization_convbert.py +1 -0
- transformers/models/convnext/configuration_convnext.py +1 -0
- transformers/models/convnext/image_processing_convnext.py +23 -20
- transformers/models/convnext/image_processing_convnext_fast.py +21 -16
- transformers/models/convnext/modeling_convnext.py +12 -9
- transformers/models/convnextv2/configuration_convnextv2.py +1 -0
- transformers/models/convnextv2/modeling_convnextv2.py +12 -9
- transformers/models/cpm/tokenization_cpm.py +7 -6
- transformers/models/cpm/tokenization_cpm_fast.py +5 -3
- transformers/models/cpmant/configuration_cpmant.py +1 -4
- transformers/models/cpmant/modeling_cpmant.py +40 -38
- transformers/models/cpmant/tokenization_cpmant.py +3 -1
- transformers/models/csm/configuration_csm.py +66 -58
- transformers/models/csm/generation_csm.py +35 -31
- transformers/models/csm/modeling_csm.py +85 -85
- transformers/models/csm/modular_csm.py +58 -58
- transformers/models/csm/processing_csm.py +68 -25
- transformers/models/ctrl/configuration_ctrl.py +1 -16
- transformers/models/ctrl/modeling_ctrl.py +44 -54
- transformers/models/ctrl/tokenization_ctrl.py +1 -0
- transformers/models/cvt/configuration_cvt.py +1 -0
- transformers/models/cvt/modeling_cvt.py +16 -20
- transformers/models/cwm/__init__.py +1 -0
- transformers/models/cwm/configuration_cwm.py +12 -8
- transformers/models/cwm/modeling_cwm.py +39 -37
- transformers/models/cwm/modular_cwm.py +12 -10
- transformers/models/d_fine/configuration_d_fine.py +5 -7
- transformers/models/d_fine/modeling_d_fine.py +128 -138
- transformers/models/d_fine/modular_d_fine.py +18 -33
- transformers/models/dab_detr/configuration_dab_detr.py +3 -6
- transformers/models/dab_detr/modeling_dab_detr.py +75 -81
- transformers/models/dac/configuration_dac.py +1 -0
- transformers/models/dac/feature_extraction_dac.py +9 -6
- transformers/models/dac/modeling_dac.py +26 -24
- transformers/models/data2vec/configuration_data2vec_audio.py +2 -4
- transformers/models/data2vec/configuration_data2vec_text.py +3 -11
- transformers/models/data2vec/configuration_data2vec_vision.py +1 -0
- transformers/models/data2vec/modeling_data2vec_audio.py +56 -57
- transformers/models/data2vec/modeling_data2vec_text.py +93 -98
- transformers/models/data2vec/modeling_data2vec_vision.py +45 -49
- transformers/models/data2vec/modular_data2vec_audio.py +1 -6
- transformers/models/data2vec/modular_data2vec_text.py +54 -58
- transformers/models/dbrx/configuration_dbrx.py +22 -36
- transformers/models/dbrx/modeling_dbrx.py +45 -42
- transformers/models/dbrx/modular_dbrx.py +33 -31
- transformers/models/deberta/configuration_deberta.py +1 -6
- transformers/models/deberta/modeling_deberta.py +60 -64
- transformers/models/deberta/tokenization_deberta.py +21 -9
- transformers/models/deberta_v2/configuration_deberta_v2.py +1 -6
- transformers/models/deberta_v2/modeling_deberta_v2.py +65 -71
- transformers/models/deberta_v2/tokenization_deberta_v2.py +29 -11
- transformers/models/decision_transformer/configuration_decision_transformer.py +2 -3
- transformers/models/decision_transformer/modeling_decision_transformer.py +56 -60
- transformers/models/deepseek_v2/configuration_deepseek_v2.py +44 -39
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +43 -43
- transformers/models/deepseek_v2/modular_deepseek_v2.py +49 -48
- transformers/models/deepseek_v3/configuration_deepseek_v3.py +45 -40
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +42 -45
- transformers/models/deepseek_v3/modular_deepseek_v3.py +9 -14
- transformers/models/deepseek_vl/configuration_deepseek_vl.py +3 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl.py +26 -25
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +10 -10
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +48 -57
- transformers/models/deepseek_vl/modular_deepseek_vl.py +43 -14
- transformers/models/deepseek_vl/processing_deepseek_vl.py +41 -10
- transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +5 -3
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +35 -35
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +24 -20
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +61 -109
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +118 -146
- transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py +44 -12
- transformers/models/deformable_detr/configuration_deformable_detr.py +3 -2
- transformers/models/deformable_detr/image_processing_deformable_detr.py +61 -59
- transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +28 -28
- transformers/models/deformable_detr/modeling_deformable_detr.py +82 -88
- transformers/models/deformable_detr/modular_deformable_detr.py +3 -1
- transformers/models/deit/configuration_deit.py +1 -0
- transformers/models/deit/image_processing_deit.py +21 -18
- transformers/models/deit/image_processing_deit_fast.py +1 -0
- transformers/models/deit/modeling_deit.py +22 -24
- transformers/models/depth_anything/configuration_depth_anything.py +4 -2
- transformers/models/depth_anything/modeling_depth_anything.py +10 -10
- transformers/models/depth_pro/configuration_depth_pro.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro.py +23 -22
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +10 -8
- transformers/models/depth_pro/modeling_depth_pro.py +27 -31
- transformers/models/detr/configuration_detr.py +2 -1
- transformers/models/detr/image_processing_detr.py +66 -64
- transformers/models/detr/image_processing_detr_fast.py +34 -33
- transformers/models/detr/modeling_detr.py +79 -95
- transformers/models/dia/configuration_dia.py +15 -9
- transformers/models/dia/feature_extraction_dia.py +9 -6
- transformers/models/dia/generation_dia.py +50 -48
- transformers/models/dia/modeling_dia.py +69 -78
- transformers/models/dia/modular_dia.py +56 -64
- transformers/models/dia/processing_dia.py +29 -39
- transformers/models/dia/tokenization_dia.py +6 -3
- transformers/models/diffllama/configuration_diffllama.py +30 -25
- transformers/models/diffllama/modeling_diffllama.py +49 -46
- transformers/models/diffllama/modular_diffllama.py +19 -17
- transformers/models/dinat/configuration_dinat.py +1 -0
- transformers/models/dinat/modeling_dinat.py +44 -47
- transformers/models/dinov2/configuration_dinov2.py +1 -0
- transformers/models/dinov2/modeling_dinov2.py +15 -15
- transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +1 -1
- transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +15 -16
- transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +9 -9
- transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +7 -4
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +6 -3
- transformers/models/dinov3_vit/configuration_dinov3_vit.py +8 -5
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +9 -7
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +18 -19
- transformers/models/dinov3_vit/modular_dinov3_vit.py +15 -16
- transformers/models/distilbert/configuration_distilbert.py +2 -8
- transformers/models/distilbert/modeling_distilbert.py +55 -55
- transformers/models/distilbert/tokenization_distilbert.py +1 -13
- transformers/models/doge/__init__.py +1 -0
- transformers/models/doge/configuration_doge.py +32 -39
- transformers/models/doge/modeling_doge.py +49 -45
- transformers/models/doge/modular_doge.py +63 -71
- transformers/models/donut/configuration_donut_swin.py +1 -0
- transformers/models/donut/image_processing_donut.py +29 -26
- transformers/models/donut/image_processing_donut_fast.py +15 -9
- transformers/models/donut/modeling_donut_swin.py +58 -62
- transformers/models/donut/processing_donut.py +26 -5
- transformers/models/dots1/configuration_dots1.py +33 -41
- transformers/models/dots1/modeling_dots1.py +45 -54
- transformers/models/dots1/modular_dots1.py +4 -5
- transformers/models/dpr/configuration_dpr.py +2 -19
- transformers/models/dpr/modeling_dpr.py +39 -42
- transformers/models/dpr/tokenization_dpr.py +9 -19
- transformers/models/dpr/tokenization_dpr_fast.py +9 -7
- transformers/models/dpt/configuration_dpt.py +2 -1
- transformers/models/dpt/image_processing_dpt.py +66 -65
- transformers/models/dpt/image_processing_dpt_fast.py +20 -18
- transformers/models/dpt/modeling_dpt.py +30 -32
- transformers/models/dpt/modular_dpt.py +17 -15
- transformers/models/edgetam/configuration_edgetam.py +3 -2
- transformers/models/edgetam/modeling_edgetam.py +86 -86
- transformers/models/edgetam/modular_edgetam.py +26 -21
- transformers/models/edgetam_video/__init__.py +1 -0
- transformers/models/edgetam_video/configuration_edgetam_video.py +1 -0
- transformers/models/edgetam_video/modeling_edgetam_video.py +158 -169
- transformers/models/edgetam_video/modular_edgetam_video.py +37 -30
- transformers/models/efficientloftr/configuration_efficientloftr.py +5 -4
- transformers/models/efficientloftr/image_processing_efficientloftr.py +16 -14
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +9 -9
- transformers/models/efficientloftr/modeling_efficientloftr.py +38 -59
- transformers/models/efficientloftr/modular_efficientloftr.py +3 -1
- transformers/models/efficientnet/configuration_efficientnet.py +1 -0
- transformers/models/efficientnet/image_processing_efficientnet.py +32 -28
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +19 -17
- transformers/models/efficientnet/modeling_efficientnet.py +15 -19
- transformers/models/electra/configuration_electra.py +3 -13
- transformers/models/electra/modeling_electra.py +103 -108
- transformers/models/emu3/configuration_emu3.py +17 -13
- transformers/models/emu3/image_processing_emu3.py +39 -44
- transformers/models/emu3/modeling_emu3.py +108 -148
- transformers/models/emu3/modular_emu3.py +73 -115
- transformers/models/emu3/processing_emu3.py +43 -18
- transformers/models/encodec/configuration_encodec.py +4 -2
- transformers/models/encodec/feature_extraction_encodec.py +13 -10
- transformers/models/encodec/modeling_encodec.py +29 -39
- transformers/models/encoder_decoder/configuration_encoder_decoder.py +2 -12
- transformers/models/encoder_decoder/modeling_encoder_decoder.py +43 -37
- transformers/models/eomt/configuration_eomt.py +1 -0
- transformers/models/eomt/image_processing_eomt.py +56 -66
- transformers/models/eomt/image_processing_eomt_fast.py +33 -76
- transformers/models/eomt/modeling_eomt.py +18 -23
- transformers/models/eomt/modular_eomt.py +13 -18
- transformers/models/ernie/configuration_ernie.py +3 -24
- transformers/models/ernie/modeling_ernie.py +132 -127
- transformers/models/ernie/modular_ernie.py +103 -97
- transformers/models/ernie4_5/configuration_ernie4_5.py +27 -23
- transformers/models/ernie4_5/modeling_ernie4_5.py +38 -36
- transformers/models/ernie4_5/modular_ernie4_5.py +4 -3
- transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +36 -32
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +55 -56
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +46 -18
- transformers/models/esm/configuration_esm.py +15 -11
- transformers/models/esm/modeling_esm.py +34 -38
- transformers/models/esm/modeling_esmfold.py +49 -53
- transformers/models/esm/openfold_utils/chunk_utils.py +6 -6
- transformers/models/esm/openfold_utils/loss.py +2 -1
- transformers/models/esm/openfold_utils/protein.py +16 -15
- transformers/models/esm/openfold_utils/tensor_utils.py +6 -6
- transformers/models/esm/tokenization_esm.py +4 -2
- transformers/models/evolla/configuration_evolla.py +40 -50
- transformers/models/evolla/modeling_evolla.py +66 -71
- transformers/models/evolla/modular_evolla.py +47 -53
- transformers/models/evolla/processing_evolla.py +35 -23
- transformers/models/exaone4/configuration_exaone4.py +25 -23
- transformers/models/exaone4/modeling_exaone4.py +38 -35
- transformers/models/exaone4/modular_exaone4.py +46 -44
- transformers/models/falcon/configuration_falcon.py +26 -31
- transformers/models/falcon/modeling_falcon.py +80 -82
- transformers/models/falcon_h1/configuration_falcon_h1.py +51 -45
- transformers/models/falcon_h1/modeling_falcon_h1.py +82 -85
- transformers/models/falcon_h1/modular_falcon_h1.py +51 -56
- transformers/models/falcon_mamba/configuration_falcon_mamba.py +2 -1
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +82 -75
- transformers/models/falcon_mamba/modular_falcon_mamba.py +45 -28
- transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +6 -2
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +60 -76
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +3 -2
- transformers/models/flaubert/configuration_flaubert.py +5 -10
- transformers/models/flaubert/modeling_flaubert.py +143 -145
- transformers/models/flaubert/tokenization_flaubert.py +5 -3
- transformers/models/flava/configuration_flava.py +6 -5
- transformers/models/flava/image_processing_flava.py +67 -66
- transformers/models/flava/image_processing_flava_fast.py +49 -46
- transformers/models/flava/modeling_flava.py +136 -153
- transformers/models/flava/processing_flava.py +12 -2
- transformers/models/flex_olmo/__init__.py +1 -0
- transformers/models/flex_olmo/configuration_flex_olmo.py +32 -28
- transformers/models/flex_olmo/modeling_flex_olmo.py +47 -47
- transformers/models/flex_olmo/modular_flex_olmo.py +44 -40
- transformers/models/florence2/configuration_florence2.py +1 -0
- transformers/models/florence2/modeling_florence2.py +69 -111
- transformers/models/florence2/modular_florence2.py +101 -104
- transformers/models/florence2/processing_florence2.py +47 -18
- transformers/models/fnet/configuration_fnet.py +2 -6
- transformers/models/fnet/modeling_fnet.py +80 -83
- transformers/models/fnet/tokenization_fnet.py +1 -0
- transformers/models/focalnet/configuration_focalnet.py +1 -0
- transformers/models/focalnet/modeling_focalnet.py +45 -51
- transformers/models/fsmt/configuration_fsmt.py +17 -12
- transformers/models/fsmt/modeling_fsmt.py +48 -49
- transformers/models/fsmt/tokenization_fsmt.py +5 -3
- transformers/models/funnel/configuration_funnel.py +1 -8
- transformers/models/funnel/modeling_funnel.py +93 -99
- transformers/models/funnel/tokenization_funnel.py +27 -17
- transformers/models/fuyu/configuration_fuyu.py +34 -28
- transformers/models/fuyu/image_processing_fuyu.py +31 -29
- transformers/models/fuyu/image_processing_fuyu_fast.py +17 -17
- transformers/models/fuyu/modeling_fuyu.py +53 -53
- transformers/models/fuyu/processing_fuyu.py +34 -23
- transformers/models/gemma/configuration_gemma.py +30 -25
- transformers/models/gemma/modeling_gemma.py +50 -46
- transformers/models/gemma/modular_gemma.py +47 -42
- transformers/models/gemma/tokenization_gemma.py +30 -10
- transformers/models/gemma2/configuration_gemma2.py +35 -30
- transformers/models/gemma2/modeling_gemma2.py +42 -39
- transformers/models/gemma2/modular_gemma2.py +66 -63
- transformers/models/gemma3/configuration_gemma3.py +44 -44
- transformers/models/gemma3/image_processing_gemma3.py +31 -29
- transformers/models/gemma3/image_processing_gemma3_fast.py +13 -11
- transformers/models/gemma3/modeling_gemma3.py +207 -159
- transformers/models/gemma3/modular_gemma3.py +204 -153
- transformers/models/gemma3/processing_gemma3.py +5 -5
- transformers/models/gemma3n/configuration_gemma3n.py +26 -36
- transformers/models/gemma3n/feature_extraction_gemma3n.py +11 -9
- transformers/models/gemma3n/modeling_gemma3n.py +356 -222
- transformers/models/gemma3n/modular_gemma3n.py +207 -230
- transformers/models/gemma3n/processing_gemma3n.py +26 -12
- transformers/models/git/configuration_git.py +8 -5
- transformers/models/git/modeling_git.py +204 -266
- transformers/models/git/processing_git.py +14 -2
- transformers/models/glm/configuration_glm.py +28 -24
- transformers/models/glm/modeling_glm.py +40 -37
- transformers/models/glm/modular_glm.py +7 -4
- transformers/models/glm4/configuration_glm4.py +28 -24
- transformers/models/glm4/modeling_glm4.py +42 -40
- transformers/models/glm4/modular_glm4.py +10 -8
- transformers/models/glm46v/configuration_glm46v.py +1 -0
- transformers/models/glm46v/image_processing_glm46v.py +40 -35
- transformers/models/glm46v/image_processing_glm46v_fast.py +9 -9
- transformers/models/glm46v/modeling_glm46v.py +90 -137
- transformers/models/glm46v/modular_glm46v.py +3 -4
- transformers/models/glm46v/processing_glm46v.py +41 -7
- transformers/models/glm46v/video_processing_glm46v.py +11 -9
- transformers/models/glm4_moe/configuration_glm4_moe.py +32 -40
- transformers/models/glm4_moe/modeling_glm4_moe.py +42 -45
- transformers/models/glm4_moe/modular_glm4_moe.py +34 -42
- transformers/models/glm4v/configuration_glm4v.py +20 -18
- transformers/models/glm4v/image_processing_glm4v.py +40 -34
- transformers/models/glm4v/image_processing_glm4v_fast.py +9 -8
- transformers/models/glm4v/modeling_glm4v.py +205 -254
- transformers/models/glm4v/modular_glm4v.py +224 -210
- transformers/models/glm4v/processing_glm4v.py +41 -7
- transformers/models/glm4v/video_processing_glm4v.py +11 -9
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +125 -136
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +368 -377
- transformers/models/glm4v_moe/modular_glm4v_moe.py +169 -83
- transformers/models/glpn/configuration_glpn.py +1 -0
- transformers/models/glpn/image_processing_glpn.py +12 -11
- transformers/models/glpn/image_processing_glpn_fast.py +13 -11
- transformers/models/glpn/modeling_glpn.py +14 -16
- transformers/models/got_ocr2/configuration_got_ocr2.py +12 -4
- transformers/models/got_ocr2/image_processing_got_ocr2.py +24 -22
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +11 -9
- transformers/models/got_ocr2/modeling_got_ocr2.py +80 -77
- transformers/models/got_ocr2/modular_got_ocr2.py +51 -54
- transformers/models/got_ocr2/processing_got_ocr2.py +63 -42
- transformers/models/gpt2/configuration_gpt2.py +2 -13
- transformers/models/gpt2/modeling_gpt2.py +115 -120
- transformers/models/gpt2/tokenization_gpt2.py +46 -15
- transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +2 -5
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +89 -79
- transformers/models/gpt_neo/configuration_gpt_neo.py +2 -9
- transformers/models/gpt_neo/modeling_gpt_neo.py +67 -83
- transformers/models/gpt_neox/configuration_gpt_neox.py +25 -25
- transformers/models/gpt_neox/modeling_gpt_neox.py +75 -76
- transformers/models/gpt_neox/modular_gpt_neox.py +66 -67
- transformers/models/gpt_neox/tokenization_gpt_neox.py +51 -9
- transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +19 -24
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +47 -46
- transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +3 -1
- transformers/models/gpt_oss/configuration_gpt_oss.py +28 -46
- transformers/models/gpt_oss/modeling_gpt_oss.py +121 -83
- transformers/models/gpt_oss/modular_gpt_oss.py +103 -64
- transformers/models/gpt_sw3/tokenization_gpt_sw3.py +4 -4
- transformers/models/gptj/configuration_gptj.py +4 -4
- transformers/models/gptj/modeling_gptj.py +87 -101
- transformers/models/granite/configuration_granite.py +33 -28
- transformers/models/granite/modeling_granite.py +46 -44
- transformers/models/granite/modular_granite.py +31 -29
- transformers/models/granite_speech/configuration_granite_speech.py +1 -0
- transformers/models/granite_speech/feature_extraction_granite_speech.py +3 -1
- transformers/models/granite_speech/modeling_granite_speech.py +52 -82
- transformers/models/granite_speech/processing_granite_speech.py +4 -11
- transformers/models/granitemoe/configuration_granitemoe.py +36 -31
- transformers/models/granitemoe/modeling_granitemoe.py +46 -41
- transformers/models/granitemoe/modular_granitemoe.py +27 -22
- transformers/models/granitemoehybrid/__init__.py +1 -0
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +47 -46
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +93 -97
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +21 -54
- transformers/models/granitemoeshared/configuration_granitemoeshared.py +37 -33
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +61 -54
- transformers/models/granitemoeshared/modular_granitemoeshared.py +21 -19
- transformers/models/grounding_dino/configuration_grounding_dino.py +4 -6
- transformers/models/grounding_dino/image_processing_grounding_dino.py +62 -60
- transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +29 -28
- transformers/models/grounding_dino/modeling_grounding_dino.py +140 -155
- transformers/models/grounding_dino/modular_grounding_dino.py +3 -2
- transformers/models/grounding_dino/processing_grounding_dino.py +38 -10
- transformers/models/groupvit/configuration_groupvit.py +2 -4
- transformers/models/groupvit/modeling_groupvit.py +93 -107
- transformers/models/helium/configuration_helium.py +29 -25
- transformers/models/helium/modeling_helium.py +40 -38
- transformers/models/helium/modular_helium.py +7 -3
- transformers/models/herbert/tokenization_herbert.py +28 -10
- transformers/models/hgnet_v2/configuration_hgnet_v2.py +1 -0
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +10 -24
- transformers/models/hgnet_v2/modular_hgnet_v2.py +10 -24
- transformers/models/hiera/configuration_hiera.py +1 -0
- transformers/models/hiera/modeling_hiera.py +66 -72
- transformers/models/hubert/configuration_hubert.py +2 -4
- transformers/models/hubert/modeling_hubert.py +37 -42
- transformers/models/hubert/modular_hubert.py +11 -13
- transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +31 -26
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +38 -35
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +6 -4
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +36 -31
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +42 -47
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +9 -9
- transformers/models/ibert/configuration_ibert.py +2 -4
- transformers/models/ibert/modeling_ibert.py +62 -82
- transformers/models/ibert/quant_modules.py +1 -0
- transformers/models/idefics/configuration_idefics.py +8 -5
- transformers/models/idefics/image_processing_idefics.py +15 -13
- transformers/models/idefics/modeling_idefics.py +82 -75
- transformers/models/idefics/perceiver.py +3 -1
- transformers/models/idefics/processing_idefics.py +48 -32
- transformers/models/idefics/vision.py +25 -24
- transformers/models/idefics2/configuration_idefics2.py +3 -1
- transformers/models/idefics2/image_processing_idefics2.py +32 -31
- transformers/models/idefics2/image_processing_idefics2_fast.py +8 -8
- transformers/models/idefics2/modeling_idefics2.py +101 -127
- transformers/models/idefics2/processing_idefics2.py +68 -10
- transformers/models/idefics3/configuration_idefics3.py +4 -1
- transformers/models/idefics3/image_processing_idefics3.py +43 -42
- transformers/models/idefics3/image_processing_idefics3_fast.py +15 -40
- transformers/models/idefics3/modeling_idefics3.py +90 -115
- transformers/models/idefics3/processing_idefics3.py +69 -15
- transformers/models/ijepa/configuration_ijepa.py +1 -0
- transformers/models/ijepa/modeling_ijepa.py +11 -10
- transformers/models/ijepa/modular_ijepa.py +7 -5
- transformers/models/imagegpt/configuration_imagegpt.py +2 -9
- transformers/models/imagegpt/image_processing_imagegpt.py +18 -17
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +16 -11
- transformers/models/imagegpt/modeling_imagegpt.py +65 -76
- transformers/models/informer/configuration_informer.py +9 -6
- transformers/models/informer/modeling_informer.py +86 -88
- transformers/models/informer/modular_informer.py +16 -14
- transformers/models/instructblip/configuration_instructblip.py +2 -2
- transformers/models/instructblip/modeling_instructblip.py +63 -103
- transformers/models/instructblip/processing_instructblip.py +36 -10
- transformers/models/instructblipvideo/configuration_instructblipvideo.py +2 -2
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +139 -157
- transformers/models/instructblipvideo/modular_instructblipvideo.py +64 -73
- transformers/models/instructblipvideo/processing_instructblipvideo.py +33 -14
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +8 -6
- transformers/models/internvl/configuration_internvl.py +1 -0
- transformers/models/internvl/modeling_internvl.py +106 -85
- transformers/models/internvl/modular_internvl.py +67 -47
- transformers/models/internvl/processing_internvl.py +45 -12
- transformers/models/internvl/video_processing_internvl.py +12 -10
- transformers/models/jamba/configuration_jamba.py +8 -5
- transformers/models/jamba/modeling_jamba.py +66 -68
- transformers/models/jamba/modular_jamba.py +55 -54
- transformers/models/janus/configuration_janus.py +1 -0
- transformers/models/janus/image_processing_janus.py +37 -35
- transformers/models/janus/image_processing_janus_fast.py +20 -18
- transformers/models/janus/modeling_janus.py +191 -115
- transformers/models/janus/modular_janus.py +84 -133
- transformers/models/janus/processing_janus.py +43 -17
- transformers/models/jetmoe/configuration_jetmoe.py +26 -24
- transformers/models/jetmoe/modeling_jetmoe.py +46 -43
- transformers/models/jetmoe/modular_jetmoe.py +33 -31
- transformers/models/kosmos2/configuration_kosmos2.py +9 -10
- transformers/models/kosmos2/modeling_kosmos2.py +173 -208
- transformers/models/kosmos2/processing_kosmos2.py +55 -40
- transformers/models/kosmos2_5/__init__.py +1 -0
- transformers/models/kosmos2_5/configuration_kosmos2_5.py +9 -8
- transformers/models/kosmos2_5/image_processing_kosmos2_5.py +12 -10
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +13 -4
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +118 -132
- transformers/models/kosmos2_5/processing_kosmos2_5.py +29 -8
- transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +28 -31
- transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py +14 -12
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +100 -110
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +22 -28
- transformers/models/kyutai_speech_to_text/processing_kyutai_speech_to_text.py +8 -2
- transformers/models/layoutlm/configuration_layoutlm.py +2 -14
- transformers/models/layoutlm/modeling_layoutlm.py +72 -77
- transformers/models/layoutlmv2/configuration_layoutlmv2.py +17 -14
- transformers/models/layoutlmv2/image_processing_layoutlmv2.py +21 -18
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +9 -7
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +50 -64
- transformers/models/layoutlmv2/processing_layoutlmv2.py +44 -14
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +126 -73
- transformers/models/layoutlmv3/configuration_layoutlmv3.py +19 -16
- transformers/models/layoutlmv3/image_processing_layoutlmv3.py +26 -24
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +11 -9
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +56 -82
- transformers/models/layoutlmv3/processing_layoutlmv3.py +46 -14
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +134 -74
- transformers/models/layoutxlm/configuration_layoutxlm.py +17 -14
- transformers/models/layoutxlm/modular_layoutxlm.py +1 -0
- transformers/models/layoutxlm/processing_layoutxlm.py +44 -14
- transformers/models/layoutxlm/tokenization_layoutxlm.py +113 -77
- transformers/models/led/configuration_led.py +12 -8
- transformers/models/led/modeling_led.py +266 -124
- transformers/models/levit/configuration_levit.py +1 -0
- transformers/models/levit/image_processing_levit.py +21 -19
- transformers/models/levit/image_processing_levit_fast.py +5 -4
- transformers/models/levit/modeling_levit.py +19 -38
- transformers/models/lfm2/configuration_lfm2.py +30 -27
- transformers/models/lfm2/modeling_lfm2.py +50 -47
- transformers/models/lfm2/modular_lfm2.py +30 -29
- transformers/models/lfm2_moe/__init__.py +1 -0
- transformers/models/lfm2_moe/configuration_lfm2_moe.py +9 -6
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +53 -61
- transformers/models/lfm2_moe/modular_lfm2_moe.py +37 -13
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +1 -4
- transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +12 -41
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +66 -84
- transformers/models/lfm2_vl/modular_lfm2_vl.py +56 -70
- transformers/models/lfm2_vl/processing_lfm2_vl.py +76 -96
- transformers/models/lightglue/image_processing_lightglue.py +15 -16
- transformers/models/lightglue/image_processing_lightglue_fast.py +9 -9
- transformers/models/lightglue/modeling_lightglue.py +31 -31
- transformers/models/lightglue/modular_lightglue.py +28 -29
- transformers/models/lilt/configuration_lilt.py +2 -6
- transformers/models/lilt/modeling_lilt.py +70 -76
- transformers/models/llama/configuration_llama.py +31 -26
- transformers/models/llama/modeling_llama.py +39 -36
- transformers/models/llama/tokenization_llama.py +44 -14
- transformers/models/llama4/configuration_llama4.py +30 -27
- transformers/models/llama4/image_processing_llama4_fast.py +14 -12
- transformers/models/llama4/modeling_llama4.py +113 -120
- transformers/models/llama4/processing_llama4.py +57 -33
- transformers/models/llava/configuration_llava.py +1 -10
- transformers/models/llava/image_processing_llava.py +28 -25
- transformers/models/llava/image_processing_llava_fast.py +11 -9
- transformers/models/llava/modeling_llava.py +109 -85
- transformers/models/llava/processing_llava.py +51 -18
- transformers/models/llava_next/configuration_llava_next.py +2 -2
- transformers/models/llava_next/image_processing_llava_next.py +45 -43
- transformers/models/llava_next/image_processing_llava_next_fast.py +13 -11
- transformers/models/llava_next/modeling_llava_next.py +107 -110
- transformers/models/llava_next/processing_llava_next.py +47 -18
- transformers/models/llava_next_video/configuration_llava_next_video.py +7 -4
- transformers/models/llava_next_video/modeling_llava_next_video.py +158 -175
- transformers/models/llava_next_video/modular_llava_next_video.py +150 -155
- transformers/models/llava_next_video/processing_llava_next_video.py +63 -21
- transformers/models/llava_next_video/video_processing_llava_next_video.py +1 -0
- transformers/models/llava_onevision/configuration_llava_onevision.py +7 -4
- transformers/models/llava_onevision/image_processing_llava_onevision.py +42 -40
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +15 -14
- transformers/models/llava_onevision/modeling_llava_onevision.py +169 -177
- transformers/models/llava_onevision/modular_llava_onevision.py +156 -163
- transformers/models/llava_onevision/processing_llava_onevision.py +53 -21
- transformers/models/llava_onevision/video_processing_llava_onevision.py +1 -0
- transformers/models/longcat_flash/__init__.py +1 -0
- transformers/models/longcat_flash/configuration_longcat_flash.py +42 -37
- transformers/models/longcat_flash/modeling_longcat_flash.py +36 -36
- transformers/models/longcat_flash/modular_longcat_flash.py +21 -21
- transformers/models/longformer/configuration_longformer.py +5 -5
- transformers/models/longformer/modeling_longformer.py +101 -105
- transformers/models/longt5/configuration_longt5.py +7 -9
- transformers/models/longt5/modeling_longt5.py +49 -49
- transformers/models/luke/configuration_luke.py +2 -8
- transformers/models/luke/modeling_luke.py +181 -188
- transformers/models/luke/tokenization_luke.py +140 -107
- transformers/models/lxmert/configuration_lxmert.py +1 -16
- transformers/models/lxmert/modeling_lxmert.py +74 -65
- transformers/models/m2m_100/configuration_m2m_100.py +9 -7
- transformers/models/m2m_100/modeling_m2m_100.py +71 -83
- transformers/models/m2m_100/tokenization_m2m_100.py +8 -8
- transformers/models/mamba/configuration_mamba.py +2 -1
- transformers/models/mamba/modeling_mamba.py +66 -58
- transformers/models/mamba2/configuration_mamba2.py +8 -5
- transformers/models/mamba2/modeling_mamba2.py +69 -68
- transformers/models/marian/configuration_marian.py +5 -10
- transformers/models/marian/modeling_marian.py +87 -93
- transformers/models/marian/tokenization_marian.py +6 -6
- transformers/models/markuplm/configuration_markuplm.py +7 -4
- transformers/models/markuplm/feature_extraction_markuplm.py +2 -1
- transformers/models/markuplm/modeling_markuplm.py +70 -69
- transformers/models/markuplm/processing_markuplm.py +38 -31
- transformers/models/markuplm/tokenization_markuplm.py +136 -93
- transformers/models/mask2former/configuration_mask2former.py +8 -5
- transformers/models/mask2former/image_processing_mask2former.py +85 -84
- transformers/models/mask2former/image_processing_mask2former_fast.py +40 -37
- transformers/models/mask2former/modeling_mask2former.py +103 -118
- transformers/models/mask2former/modular_mask2former.py +8 -6
- transformers/models/maskformer/configuration_maskformer.py +9 -6
- transformers/models/maskformer/configuration_maskformer_swin.py +1 -0
- transformers/models/maskformer/image_processing_maskformer.py +85 -84
- transformers/models/maskformer/image_processing_maskformer_fast.py +40 -36
- transformers/models/maskformer/modeling_maskformer.py +65 -79
- transformers/models/maskformer/modeling_maskformer_swin.py +32 -36
- transformers/models/mbart/configuration_mbart.py +4 -9
- transformers/models/mbart/modeling_mbart.py +116 -131
- transformers/models/mbart/tokenization_mbart.py +54 -11
- transformers/models/mbart50/tokenization_mbart50.py +13 -8
- transformers/models/megatron_bert/configuration_megatron_bert.py +3 -13
- transformers/models/megatron_bert/modeling_megatron_bert.py +150 -148
- transformers/models/metaclip_2/configuration_metaclip_2.py +1 -4
- transformers/models/metaclip_2/modeling_metaclip_2.py +84 -91
- transformers/models/metaclip_2/modular_metaclip_2.py +45 -61
- transformers/models/mgp_str/configuration_mgp_str.py +1 -0
- transformers/models/mgp_str/modeling_mgp_str.py +18 -20
- transformers/models/mgp_str/processing_mgp_str.py +20 -3
- transformers/models/mgp_str/tokenization_mgp_str.py +3 -1
- transformers/models/mimi/configuration_mimi.py +40 -42
- transformers/models/mimi/modeling_mimi.py +113 -142
- transformers/models/minimax/__init__.py +1 -0
- transformers/models/minimax/configuration_minimax.py +43 -37
- transformers/models/minimax/modeling_minimax.py +51 -61
- transformers/models/minimax/modular_minimax.py +62 -68
- transformers/models/ministral/configuration_ministral.py +29 -25
- transformers/models/ministral/modeling_ministral.py +38 -36
- transformers/models/ministral/modular_ministral.py +37 -32
- transformers/models/ministral3/configuration_ministral3.py +27 -24
- transformers/models/ministral3/modeling_ministral3.py +37 -36
- transformers/models/ministral3/modular_ministral3.py +5 -4
- transformers/models/mistral/configuration_mistral.py +29 -24
- transformers/models/mistral/modeling_mistral.py +37 -36
- transformers/models/mistral/modular_mistral.py +12 -11
- transformers/models/mistral3/configuration_mistral3.py +1 -4
- transformers/models/mistral3/modeling_mistral3.py +86 -89
- transformers/models/mistral3/modular_mistral3.py +68 -69
- transformers/models/mixtral/configuration_mixtral.py +34 -29
- transformers/models/mixtral/modeling_mixtral.py +45 -50
- transformers/models/mixtral/modular_mixtral.py +31 -32
- transformers/models/mlcd/configuration_mlcd.py +1 -0
- transformers/models/mlcd/modeling_mlcd.py +14 -20
- transformers/models/mlcd/modular_mlcd.py +13 -17
- transformers/models/mllama/configuration_mllama.py +15 -10
- transformers/models/mllama/image_processing_mllama.py +25 -23
- transformers/models/mllama/image_processing_mllama_fast.py +11 -11
- transformers/models/mllama/modeling_mllama.py +94 -105
- transformers/models/mllama/processing_mllama.py +55 -6
- transformers/models/mluke/tokenization_mluke.py +107 -101
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +3 -5
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +140 -155
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +3 -5
- transformers/models/mobilebert/configuration_mobilebert.py +2 -4
- transformers/models/mobilebert/modeling_mobilebert.py +85 -77
- transformers/models/mobilebert/tokenization_mobilebert.py +1 -0
- transformers/models/mobilenet_v1/configuration_mobilenet_v1.py +1 -0
- transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +23 -20
- transformers/models/mobilenet_v1/image_processing_mobilenet_v1_fast.py +1 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +16 -15
- transformers/models/mobilenet_v2/configuration_mobilenet_v2.py +1 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +51 -48
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +15 -13
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +22 -24
- transformers/models/mobilevit/configuration_mobilevit.py +1 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +49 -46
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +14 -12
- transformers/models/mobilevit/modeling_mobilevit.py +21 -28
- transformers/models/mobilevitv2/configuration_mobilevitv2.py +1 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +22 -28
- transformers/models/modernbert/configuration_modernbert.py +42 -44
- transformers/models/modernbert/modeling_modernbert.py +133 -145
- transformers/models/modernbert/modular_modernbert.py +170 -186
- transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +40 -40
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +57 -62
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +86 -94
- transformers/models/moonshine/configuration_moonshine.py +31 -34
- transformers/models/moonshine/modeling_moonshine.py +71 -71
- transformers/models/moonshine/modular_moonshine.py +83 -88
- transformers/models/moshi/configuration_moshi.py +23 -46
- transformers/models/moshi/modeling_moshi.py +187 -157
- transformers/models/mpnet/configuration_mpnet.py +2 -6
- transformers/models/mpnet/modeling_mpnet.py +57 -62
- transformers/models/mpnet/tokenization_mpnet.py +15 -4
- transformers/models/mpt/configuration_mpt.py +9 -5
- transformers/models/mpt/modeling_mpt.py +60 -60
- transformers/models/mra/configuration_mra.py +2 -8
- transformers/models/mra/modeling_mra.py +57 -64
- transformers/models/mt5/configuration_mt5.py +8 -10
- transformers/models/mt5/modeling_mt5.py +95 -87
- transformers/models/musicgen/configuration_musicgen.py +8 -12
- transformers/models/musicgen/modeling_musicgen.py +122 -118
- transformers/models/musicgen/processing_musicgen.py +21 -3
- transformers/models/musicgen_melody/configuration_musicgen_melody.py +8 -15
- transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py +9 -8
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +123 -117
- transformers/models/musicgen_melody/processing_musicgen_melody.py +22 -3
- transformers/models/mvp/configuration_mvp.py +5 -8
- transformers/models/mvp/modeling_mvp.py +123 -135
- transformers/models/myt5/tokenization_myt5.py +10 -8
- transformers/models/nanochat/configuration_nanochat.py +8 -5
- transformers/models/nanochat/modeling_nanochat.py +40 -37
- transformers/models/nanochat/modular_nanochat.py +14 -12
- transformers/models/nemotron/configuration_nemotron.py +30 -25
- transformers/models/nemotron/modeling_nemotron.py +57 -56
- transformers/models/nllb/tokenization_nllb.py +28 -12
- transformers/models/nllb_moe/configuration_nllb_moe.py +9 -7
- transformers/models/nllb_moe/modeling_nllb_moe.py +69 -77
- transformers/models/nougat/image_processing_nougat.py +32 -29
- transformers/models/nougat/image_processing_nougat_fast.py +14 -12
- transformers/models/nougat/processing_nougat.py +39 -37
- transformers/models/nougat/tokenization_nougat.py +73 -18
- transformers/models/nystromformer/configuration_nystromformer.py +2 -8
- transformers/models/nystromformer/modeling_nystromformer.py +63 -74
- transformers/models/olmo/configuration_olmo.py +28 -23
- transformers/models/olmo/modeling_olmo.py +39 -36
- transformers/models/olmo/modular_olmo.py +11 -7
- transformers/models/olmo2/configuration_olmo2.py +28 -23
- transformers/models/olmo2/modeling_olmo2.py +41 -37
- transformers/models/olmo2/modular_olmo2.py +32 -29
- transformers/models/olmo3/__init__.py +1 -0
- transformers/models/olmo3/configuration_olmo3.py +30 -26
- transformers/models/olmo3/modeling_olmo3.py +39 -36
- transformers/models/olmo3/modular_olmo3.py +40 -37
- transformers/models/olmoe/configuration_olmoe.py +33 -29
- transformers/models/olmoe/modeling_olmoe.py +46 -52
- transformers/models/olmoe/modular_olmoe.py +15 -16
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +4 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +47 -53
- transformers/models/omdet_turbo/processing_omdet_turbo.py +67 -19
- transformers/models/oneformer/configuration_oneformer.py +8 -5
- transformers/models/oneformer/image_processing_oneformer.py +84 -83
- transformers/models/oneformer/image_processing_oneformer_fast.py +42 -41
- transformers/models/oneformer/modeling_oneformer.py +171 -147
- transformers/models/oneformer/processing_oneformer.py +43 -28
- transformers/models/openai/configuration_openai.py +1 -16
- transformers/models/openai/modeling_openai.py +51 -65
- transformers/models/openai/tokenization_openai.py +47 -8
- transformers/models/opt/configuration_opt.py +7 -6
- transformers/models/opt/modeling_opt.py +76 -78
- transformers/models/ovis2/__init__.py +1 -0
- transformers/models/ovis2/configuration_ovis2.py +1 -0
- transformers/models/ovis2/image_processing_ovis2.py +24 -22
- transformers/models/ovis2/image_processing_ovis2_fast.py +11 -9
- transformers/models/ovis2/modeling_ovis2.py +142 -111
- transformers/models/ovis2/modular_ovis2.py +45 -90
- transformers/models/ovis2/processing_ovis2.py +40 -12
- transformers/models/owlv2/configuration_owlv2.py +2 -4
- transformers/models/owlv2/image_processing_owlv2.py +21 -20
- transformers/models/owlv2/image_processing_owlv2_fast.py +15 -12
- transformers/models/owlv2/modeling_owlv2.py +117 -133
- transformers/models/owlv2/modular_owlv2.py +14 -11
- transformers/models/owlv2/processing_owlv2.py +49 -20
- transformers/models/owlvit/configuration_owlvit.py +2 -4
- transformers/models/owlvit/image_processing_owlvit.py +22 -21
- transformers/models/owlvit/image_processing_owlvit_fast.py +3 -2
- transformers/models/owlvit/modeling_owlvit.py +116 -132
- transformers/models/owlvit/processing_owlvit.py +48 -20
- transformers/models/paligemma/configuration_paligemma.py +1 -4
- transformers/models/paligemma/modeling_paligemma.py +93 -103
- transformers/models/paligemma/processing_paligemma.py +66 -13
- transformers/models/parakeet/configuration_parakeet.py +14 -7
- transformers/models/parakeet/feature_extraction_parakeet.py +12 -10
- transformers/models/parakeet/modeling_parakeet.py +28 -32
- transformers/models/parakeet/modular_parakeet.py +20 -23
- transformers/models/parakeet/processing_parakeet.py +5 -13
- transformers/models/parakeet/{tokenization_parakeet.py → tokenization_parakeet_fast.py} +7 -5
- transformers/models/patchtsmixer/configuration_patchtsmixer.py +8 -5
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +62 -70
- transformers/models/patchtst/configuration_patchtst.py +9 -6
- transformers/models/patchtst/modeling_patchtst.py +80 -97
- transformers/models/pegasus/configuration_pegasus.py +5 -8
- transformers/models/pegasus/modeling_pegasus.py +66 -72
- transformers/models/pegasus/tokenization_pegasus.py +45 -15
- transformers/models/pegasus_x/configuration_pegasus_x.py +4 -5
- transformers/models/pegasus_x/modeling_pegasus_x.py +52 -55
- transformers/models/perceiver/configuration_perceiver.py +1 -0
- transformers/models/perceiver/image_processing_perceiver.py +25 -22
- transformers/models/perceiver/image_processing_perceiver_fast.py +9 -7
- transformers/models/perceiver/modeling_perceiver.py +146 -165
- transformers/models/perceiver/tokenization_perceiver.py +6 -3
- transformers/models/perception_lm/configuration_perception_lm.py +1 -0
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +10 -8
- transformers/models/perception_lm/modeling_perception_lm.py +70 -71
- transformers/models/perception_lm/modular_perception_lm.py +61 -65
- transformers/models/perception_lm/processing_perception_lm.py +47 -13
- transformers/models/perception_lm/video_processing_perception_lm.py +1 -0
- transformers/models/persimmon/configuration_persimmon.py +28 -23
- transformers/models/persimmon/modeling_persimmon.py +45 -43
- transformers/models/phi/configuration_phi.py +28 -23
- transformers/models/phi/modeling_phi.py +43 -40
- transformers/models/phi/modular_phi.py +24 -23
- transformers/models/phi3/configuration_phi3.py +33 -28
- transformers/models/phi3/modeling_phi3.py +38 -36
- transformers/models/phi3/modular_phi3.py +17 -13
- transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +33 -30
- transformers/models/phi4_multimodal/feature_extraction_phi4_multimodal.py +9 -7
- transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +11 -11
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +78 -95
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +80 -98
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +44 -7
- transformers/models/phimoe/configuration_phimoe.py +36 -31
- transformers/models/phimoe/modeling_phimoe.py +45 -50
- transformers/models/phimoe/modular_phimoe.py +4 -3
- transformers/models/phobert/tokenization_phobert.py +6 -4
- transformers/models/pix2struct/configuration_pix2struct.py +10 -12
- transformers/models/pix2struct/image_processing_pix2struct.py +19 -15
- transformers/models/pix2struct/image_processing_pix2struct_fast.py +15 -12
- transformers/models/pix2struct/modeling_pix2struct.py +52 -58
- transformers/models/pix2struct/processing_pix2struct.py +30 -5
- transformers/models/pixtral/configuration_pixtral.py +14 -11
- transformers/models/pixtral/image_processing_pixtral.py +28 -26
- transformers/models/pixtral/image_processing_pixtral_fast.py +11 -10
- transformers/models/pixtral/modeling_pixtral.py +34 -28
- transformers/models/pixtral/processing_pixtral.py +53 -21
- transformers/models/plbart/configuration_plbart.py +5 -8
- transformers/models/plbart/modeling_plbart.py +106 -119
- transformers/models/plbart/modular_plbart.py +33 -39
- transformers/models/plbart/tokenization_plbart.py +7 -4
- transformers/models/poolformer/configuration_poolformer.py +1 -0
- transformers/models/poolformer/image_processing_poolformer.py +24 -21
- transformers/models/poolformer/image_processing_poolformer_fast.py +15 -13
- transformers/models/poolformer/modeling_poolformer.py +13 -23
- transformers/models/pop2piano/configuration_pop2piano.py +8 -7
- transformers/models/pop2piano/feature_extraction_pop2piano.py +9 -6
- transformers/models/pop2piano/modeling_pop2piano.py +24 -26
- transformers/models/pop2piano/processing_pop2piano.py +33 -25
- transformers/models/pop2piano/tokenization_pop2piano.py +23 -15
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +3 -3
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py +28 -28
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +21 -20
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +13 -16
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +13 -16
- transformers/models/prophetnet/configuration_prophetnet.py +38 -37
- transformers/models/prophetnet/modeling_prophetnet.py +131 -114
- transformers/models/prophetnet/tokenization_prophetnet.py +16 -14
- transformers/models/pvt/configuration_pvt.py +1 -0
- transformers/models/pvt/image_processing_pvt.py +27 -24
- transformers/models/pvt/image_processing_pvt_fast.py +2 -1
- transformers/models/pvt/modeling_pvt.py +21 -21
- transformers/models/pvt_v2/configuration_pvt_v2.py +4 -2
- transformers/models/pvt_v2/modeling_pvt_v2.py +25 -28
- transformers/models/qwen2/configuration_qwen2.py +25 -32
- transformers/models/qwen2/modeling_qwen2.py +38 -36
- transformers/models/qwen2/modular_qwen2.py +12 -11
- transformers/models/qwen2/tokenization_qwen2.py +23 -12
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +26 -32
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +277 -340
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +211 -278
- transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py +49 -41
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +35 -29
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +148 -203
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +118 -93
- transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py +43 -7
- transformers/models/qwen2_audio/configuration_qwen2_audio.py +1 -0
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +40 -40
- transformers/models/qwen2_audio/processing_qwen2_audio.py +42 -13
- transformers/models/qwen2_moe/configuration_qwen2_moe.py +35 -42
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +46 -51
- transformers/models/qwen2_moe/modular_qwen2_moe.py +10 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +34 -29
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +42 -41
- transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +15 -12
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +153 -199
- transformers/models/qwen2_vl/processing_qwen2_vl.py +44 -7
- transformers/models/qwen2_vl/video_processing_qwen2_vl.py +18 -38
- transformers/models/qwen3/configuration_qwen3.py +27 -34
- transformers/models/qwen3/modeling_qwen3.py +39 -36
- transformers/models/qwen3/modular_qwen3.py +6 -4
- transformers/models/qwen3_moe/configuration_qwen3_moe.py +32 -39
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +46 -51
- transformers/models/qwen3_moe/modular_qwen3_moe.py +13 -10
- transformers/models/qwen3_next/configuration_qwen3_next.py +35 -45
- transformers/models/qwen3_next/modeling_qwen3_next.py +51 -47
- transformers/models/qwen3_next/modular_qwen3_next.py +35 -34
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +101 -135
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +252 -355
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +196 -250
- transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py +48 -40
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +29 -27
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +155 -233
- transformers/models/qwen3_vl/modular_qwen3_vl.py +179 -206
- transformers/models/qwen3_vl/processing_qwen3_vl.py +42 -6
- transformers/models/qwen3_vl/video_processing_qwen3_vl.py +12 -10
- transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +30 -23
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +303 -358
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +124 -87
- transformers/models/rag/configuration_rag.py +15 -6
- transformers/models/rag/modeling_rag.py +130 -127
- transformers/models/rag/retrieval_rag.py +5 -3
- transformers/models/rag/tokenization_rag.py +50 -0
- transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +30 -29
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +42 -53
- transformers/models/reformer/configuration_reformer.py +8 -7
- transformers/models/reformer/modeling_reformer.py +69 -80
- transformers/models/reformer/tokenization_reformer.py +31 -11
- transformers/models/regnet/configuration_regnet.py +1 -0
- transformers/models/regnet/modeling_regnet.py +8 -15
- transformers/models/rembert/configuration_rembert.py +2 -8
- transformers/models/rembert/modeling_rembert.py +111 -121
- transformers/models/rembert/tokenization_rembert.py +12 -2
- transformers/models/resnet/configuration_resnet.py +1 -0
- transformers/models/resnet/modeling_resnet.py +13 -27
- transformers/models/roberta/configuration_roberta.py +3 -11
- transformers/models/roberta/modeling_roberta.py +93 -94
- transformers/models/roberta/modular_roberta.py +58 -58
- transformers/models/roberta/tokenization_roberta.py +29 -17
- transformers/models/roberta/tokenization_roberta_old.py +4 -2
- transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +3 -11
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +93 -94
- transformers/models/roc_bert/configuration_roc_bert.py +2 -8
- transformers/models/roc_bert/modeling_roc_bert.py +121 -122
- transformers/models/roc_bert/tokenization_roc_bert.py +94 -88
- transformers/models/roformer/configuration_roformer.py +3 -13
- transformers/models/roformer/modeling_roformer.py +81 -85
- transformers/models/roformer/tokenization_roformer.py +412 -74
- transformers/models/roformer/tokenization_roformer_fast.py +160 -0
- transformers/models/roformer/tokenization_utils.py +1 -0
- transformers/models/rt_detr/configuration_rt_detr.py +2 -1
- transformers/models/rt_detr/configuration_rt_detr_resnet.py +1 -0
- transformers/models/rt_detr/image_processing_rt_detr.py +55 -54
- transformers/models/rt_detr/image_processing_rt_detr_fast.py +26 -26
- transformers/models/rt_detr/modeling_rt_detr.py +90 -99
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +6 -13
- transformers/models/rt_detr/modular_rt_detr.py +16 -16
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +4 -6
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +90 -101
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +12 -19
- transformers/models/rwkv/configuration_rwkv.py +4 -2
- transformers/models/rwkv/modeling_rwkv.py +32 -31
- transformers/models/sam/configuration_sam.py +1 -3
- transformers/models/sam/image_processing_sam.py +60 -59
- transformers/models/sam/image_processing_sam_fast.py +27 -25
- transformers/models/sam/modeling_sam.py +41 -47
- transformers/models/sam/processing_sam.py +27 -39
- transformers/models/sam2/configuration_sam2.py +3 -2
- transformers/models/sam2/image_processing_sam2_fast.py +15 -14
- transformers/models/sam2/modeling_sam2.py +90 -96
- transformers/models/sam2/modular_sam2.py +91 -86
- transformers/models/sam2/processing_sam2.py +47 -31
- transformers/models/sam2_video/configuration_sam2_video.py +1 -0
- transformers/models/sam2_video/modeling_sam2_video.py +144 -151
- transformers/models/sam2_video/modular_sam2_video.py +104 -101
- transformers/models/sam2_video/processing_sam2_video.py +66 -49
- transformers/models/sam2_video/video_processing_sam2_video.py +4 -1
- transformers/models/sam3/configuration_sam3.py +2 -21
- transformers/models/sam3/image_processing_sam3_fast.py +20 -17
- transformers/models/sam3/modeling_sam3.py +170 -184
- transformers/models/sam3/modular_sam3.py +8 -3
- transformers/models/sam3/processing_sam3.py +52 -37
- transformers/models/sam3_tracker/__init__.py +1 -0
- transformers/models/sam3_tracker/configuration_sam3_tracker.py +3 -1
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +77 -82
- transformers/models/sam3_tracker/modular_sam3_tracker.py +3 -8
- transformers/models/sam3_tracker/processing_sam3_tracker.py +48 -31
- transformers/models/sam3_tracker_video/__init__.py +1 -0
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +1 -25
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +122 -135
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +26 -35
- transformers/models/sam3_tracker_video/processing_sam3_tracker_video.py +66 -50
- transformers/models/sam3_video/configuration_sam3_video.py +1 -14
- transformers/models/sam3_video/modeling_sam3_video.py +34 -33
- transformers/models/sam3_video/processing_sam3_video.py +46 -26
- transformers/models/sam_hq/__init__.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -3
- transformers/models/sam_hq/modeling_sam_hq.py +69 -74
- transformers/models/sam_hq/modular_sam_hq.py +25 -23
- transformers/models/sam_hq/{processing_sam_hq.py → processing_samhq.py} +29 -41
- transformers/models/seamless_m4t/configuration_seamless_m4t.py +10 -8
- transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py +11 -8
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +194 -212
- transformers/models/seamless_m4t/processing_seamless_m4t.py +39 -18
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +77 -40
- transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +10 -8
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +196 -204
- transformers/models/seed_oss/configuration_seed_oss.py +32 -28
- transformers/models/seed_oss/modeling_seed_oss.py +35 -33
- transformers/models/seed_oss/modular_seed_oss.py +4 -3
- transformers/models/segformer/configuration_segformer.py +10 -0
- transformers/models/segformer/image_processing_segformer.py +42 -39
- transformers/models/segformer/image_processing_segformer_fast.py +12 -10
- transformers/models/segformer/modeling_segformer.py +31 -34
- transformers/models/segformer/modular_segformer.py +10 -8
- transformers/models/seggpt/configuration_seggpt.py +1 -0
- transformers/models/seggpt/image_processing_seggpt.py +41 -38
- transformers/models/seggpt/modeling_seggpt.py +38 -50
- transformers/models/sew/configuration_sew.py +2 -4
- transformers/models/sew/modeling_sew.py +36 -38
- transformers/models/sew/modular_sew.py +13 -13
- transformers/models/sew_d/configuration_sew_d.py +2 -4
- transformers/models/sew_d/modeling_sew_d.py +30 -31
- transformers/models/shieldgemma2/configuration_shieldgemma2.py +1 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +17 -16
- transformers/models/shieldgemma2/processing_shieldgemma2.py +5 -3
- transformers/models/siglip/configuration_siglip.py +2 -4
- transformers/models/siglip/image_processing_siglip.py +20 -17
- transformers/models/siglip/image_processing_siglip_fast.py +1 -0
- transformers/models/siglip/modeling_siglip.py +75 -84
- transformers/models/siglip/processing_siglip.py +14 -2
- transformers/models/siglip/tokenization_siglip.py +7 -6
- transformers/models/siglip2/configuration_siglip2.py +2 -5
- transformers/models/siglip2/image_processing_siglip2.py +16 -15
- transformers/models/siglip2/image_processing_siglip2_fast.py +7 -6
- transformers/models/siglip2/modeling_siglip2.py +129 -143
- transformers/models/siglip2/modular_siglip2.py +46 -47
- transformers/models/siglip2/processing_siglip2.py +14 -2
- transformers/models/smollm3/configuration_smollm3.py +32 -29
- transformers/models/smollm3/modeling_smollm3.py +39 -36
- transformers/models/smollm3/modular_smollm3.py +35 -33
- transformers/models/smolvlm/configuration_smolvlm.py +4 -2
- transformers/models/smolvlm/image_processing_smolvlm.py +43 -42
- transformers/models/smolvlm/image_processing_smolvlm_fast.py +15 -41
- transformers/models/smolvlm/modeling_smolvlm.py +94 -126
- transformers/models/smolvlm/modular_smolvlm.py +39 -50
- transformers/models/smolvlm/processing_smolvlm.py +83 -15
- transformers/models/smolvlm/video_processing_smolvlm.py +18 -16
- transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py +1 -0
- transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +27 -26
- transformers/models/speech_to_text/configuration_speech_to_text.py +9 -9
- transformers/models/speech_to_text/feature_extraction_speech_to_text.py +13 -10
- transformers/models/speech_to_text/modeling_speech_to_text.py +54 -66
- transformers/models/speech_to_text/processing_speech_to_text.py +30 -4
- transformers/models/speech_to_text/tokenization_speech_to_text.py +6 -5
- transformers/models/speecht5/configuration_speecht5.py +9 -7
- transformers/models/speecht5/feature_extraction_speecht5.py +37 -16
- transformers/models/speecht5/modeling_speecht5.py +175 -213
- transformers/models/speecht5/number_normalizer.py +1 -0
- transformers/models/speecht5/processing_speecht5.py +37 -3
- transformers/models/speecht5/tokenization_speecht5.py +5 -4
- transformers/models/splinter/configuration_splinter.py +7 -6
- transformers/models/splinter/modeling_splinter.py +59 -71
- transformers/models/splinter/tokenization_splinter.py +30 -9
- transformers/models/squeezebert/configuration_squeezebert.py +2 -14
- transformers/models/squeezebert/modeling_squeezebert.py +62 -68
- transformers/models/squeezebert/tokenization_squeezebert.py +1 -0
- transformers/models/stablelm/configuration_stablelm.py +29 -24
- transformers/models/stablelm/modeling_stablelm.py +45 -44
- transformers/models/starcoder2/configuration_starcoder2.py +27 -30
- transformers/models/starcoder2/modeling_starcoder2.py +41 -39
- transformers/models/starcoder2/modular_starcoder2.py +16 -14
- transformers/models/superglue/configuration_superglue.py +3 -7
- transformers/models/superglue/image_processing_superglue.py +15 -15
- transformers/models/superglue/image_processing_superglue_fast.py +10 -9
- transformers/models/superglue/modeling_superglue.py +37 -42
- transformers/models/superpoint/image_processing_superpoint.py +15 -15
- transformers/models/superpoint/image_processing_superpoint_fast.py +11 -8
- transformers/models/superpoint/modeling_superpoint.py +16 -18
- transformers/models/swiftformer/configuration_swiftformer.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +14 -18
- transformers/models/swin/configuration_swin.py +1 -0
- transformers/models/swin/modeling_swin.py +86 -86
- transformers/models/swin2sr/configuration_swin2sr.py +1 -0
- transformers/models/swin2sr/image_processing_swin2sr.py +13 -10
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +8 -4
- transformers/models/swin2sr/modeling_swin2sr.py +63 -81
- transformers/models/swinv2/configuration_swinv2.py +1 -0
- transformers/models/swinv2/modeling_swinv2.py +104 -108
- transformers/models/switch_transformers/configuration_switch_transformers.py +7 -11
- transformers/models/switch_transformers/modeling_switch_transformers.py +44 -37
- transformers/models/switch_transformers/modular_switch_transformers.py +41 -34
- transformers/models/t5/configuration_t5.py +8 -14
- transformers/models/t5/modeling_t5.py +92 -88
- transformers/models/t5/tokenization_t5.py +9 -3
- transformers/models/t5gemma/configuration_t5gemma.py +41 -43
- transformers/models/t5gemma/modeling_t5gemma.py +107 -104
- transformers/models/t5gemma/modular_t5gemma.py +120 -124
- transformers/models/t5gemma2/configuration_t5gemma2.py +120 -80
- transformers/models/t5gemma2/modeling_t5gemma2.py +125 -141
- transformers/models/t5gemma2/modular_t5gemma2.py +104 -393
- transformers/models/table_transformer/configuration_table_transformer.py +2 -1
- transformers/models/table_transformer/modeling_table_transformer.py +49 -51
- transformers/models/tapas/configuration_tapas.py +2 -12
- transformers/models/tapas/modeling_tapas.py +67 -68
- transformers/models/tapas/tokenization_tapas.py +153 -115
- transformers/models/textnet/configuration_textnet.py +1 -0
- transformers/models/textnet/image_processing_textnet.py +25 -22
- transformers/models/textnet/image_processing_textnet_fast.py +10 -8
- transformers/models/textnet/modeling_textnet.py +16 -28
- transformers/models/time_series_transformer/configuration_time_series_transformer.py +8 -5
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +81 -83
- transformers/models/timesfm/configuration_timesfm.py +1 -0
- transformers/models/timesfm/modeling_timesfm.py +22 -33
- transformers/models/timesfm/modular_timesfm.py +21 -32
- transformers/models/timesformer/configuration_timesformer.py +1 -0
- transformers/models/timesformer/modeling_timesformer.py +16 -15
- transformers/models/timm_backbone/configuration_timm_backbone.py +1 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +15 -17
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -5
- transformers/models/timm_wrapper/image_processing_timm_wrapper.py +5 -4
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +29 -34
- transformers/models/trocr/configuration_trocr.py +8 -11
- transformers/models/trocr/modeling_trocr.py +44 -45
- transformers/models/trocr/processing_trocr.py +25 -5
- transformers/models/tvp/configuration_tvp.py +2 -5
- transformers/models/tvp/image_processing_tvp.py +52 -50
- transformers/models/tvp/image_processing_tvp_fast.py +15 -15
- transformers/models/tvp/modeling_tvp.py +27 -27
- transformers/models/tvp/processing_tvp.py +14 -2
- transformers/models/udop/configuration_udop.py +7 -16
- transformers/models/udop/modeling_udop.py +73 -71
- transformers/models/udop/processing_udop.py +26 -7
- transformers/models/udop/tokenization_udop.py +105 -84
- transformers/models/umt5/configuration_umt5.py +7 -8
- transformers/models/umt5/modeling_umt5.py +90 -94
- transformers/models/unispeech/configuration_unispeech.py +2 -4
- transformers/models/unispeech/modeling_unispeech.py +49 -51
- transformers/models/unispeech/modular_unispeech.py +22 -22
- transformers/models/unispeech_sat/configuration_unispeech_sat.py +2 -4
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +65 -69
- transformers/models/unispeech_sat/modular_unispeech_sat.py +23 -23
- transformers/models/univnet/feature_extraction_univnet.py +14 -14
- transformers/models/univnet/modeling_univnet.py +8 -8
- transformers/models/upernet/configuration_upernet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +13 -11
- transformers/models/vaultgemma/__init__.py +1 -0
- transformers/models/vaultgemma/configuration_vaultgemma.py +33 -29
- transformers/models/vaultgemma/modeling_vaultgemma.py +41 -39
- transformers/models/vaultgemma/modular_vaultgemma.py +31 -29
- transformers/models/video_llama_3/configuration_video_llama_3.py +0 -4
- transformers/models/video_llama_3/image_processing_video_llama_3.py +42 -43
- transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +14 -12
- transformers/models/video_llama_3/modeling_video_llama_3.py +109 -157
- transformers/models/video_llama_3/modular_video_llama_3.py +146 -155
- transformers/models/video_llama_3/processing_video_llama_3.py +39 -5
- transformers/models/video_llama_3/video_processing_video_llama_3.py +23 -42
- transformers/models/video_llava/configuration_video_llava.py +1 -4
- transformers/models/video_llava/image_processing_video_llava.py +38 -35
- transformers/models/video_llava/modeling_video_llava.py +146 -146
- transformers/models/video_llava/processing_video_llava.py +78 -38
- transformers/models/video_llava/video_processing_video_llava.py +1 -0
- transformers/models/videomae/configuration_videomae.py +1 -0
- transformers/models/videomae/image_processing_videomae.py +34 -31
- transformers/models/videomae/modeling_videomae.py +17 -14
- transformers/models/videomae/video_processing_videomae.py +1 -0
- transformers/models/vilt/configuration_vilt.py +4 -6
- transformers/models/vilt/image_processing_vilt.py +30 -29
- transformers/models/vilt/image_processing_vilt_fast.py +16 -15
- transformers/models/vilt/modeling_vilt.py +90 -116
- transformers/models/vilt/processing_vilt.py +14 -2
- transformers/models/vipllava/configuration_vipllava.py +1 -4
- transformers/models/vipllava/modeling_vipllava.py +70 -99
- transformers/models/vipllava/modular_vipllava.py +54 -78
- transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +1 -0
- transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +27 -28
- transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py +1 -0
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +41 -46
- transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py +16 -2
- transformers/models/visual_bert/configuration_visual_bert.py +2 -6
- transformers/models/visual_bert/modeling_visual_bert.py +92 -98
- transformers/models/vit/configuration_vit.py +1 -0
- transformers/models/vit/image_processing_vit.py +22 -19
- transformers/models/vit/image_processing_vit_fast.py +1 -0
- transformers/models/vit/modeling_vit.py +17 -17
- transformers/models/vit_mae/configuration_vit_mae.py +1 -0
- transformers/models/vit_mae/modeling_vit_mae.py +27 -29
- transformers/models/vit_msn/configuration_vit_msn.py +1 -0
- transformers/models/vit_msn/modeling_vit_msn.py +16 -18
- transformers/models/vitdet/configuration_vitdet.py +1 -0
- transformers/models/vitdet/modeling_vitdet.py +14 -14
- transformers/models/vitmatte/configuration_vitmatte.py +5 -2
- transformers/models/vitmatte/image_processing_vitmatte.py +18 -15
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +18 -16
- transformers/models/vitmatte/modeling_vitmatte.py +11 -14
- transformers/models/vitpose/configuration_vitpose.py +7 -4
- transformers/models/vitpose/image_processing_vitpose.py +25 -24
- transformers/models/vitpose/image_processing_vitpose_fast.py +11 -9
- transformers/models/vitpose/modeling_vitpose.py +14 -14
- transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +1 -0
- transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +10 -8
- transformers/models/vits/configuration_vits.py +1 -4
- transformers/models/vits/modeling_vits.py +42 -44
- transformers/models/vits/tokenization_vits.py +4 -3
- transformers/models/vivit/configuration_vivit.py +1 -0
- transformers/models/vivit/image_processing_vivit.py +39 -36
- transformers/models/vivit/modeling_vivit.py +8 -6
- transformers/models/vjepa2/__init__.py +1 -0
- transformers/models/vjepa2/configuration_vjepa2.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +32 -31
- transformers/models/vjepa2/video_processing_vjepa2.py +1 -0
- transformers/models/voxtral/__init__.py +1 -0
- transformers/models/voxtral/configuration_voxtral.py +2 -0
- transformers/models/voxtral/modeling_voxtral.py +47 -40
- transformers/models/voxtral/modular_voxtral.py +40 -37
- transformers/models/voxtral/processing_voxtral.py +48 -25
- transformers/models/wav2vec2/configuration_wav2vec2.py +2 -4
- transformers/models/wav2vec2/feature_extraction_wav2vec2.py +10 -7
- transformers/models/wav2vec2/modeling_wav2vec2.py +121 -73
- transformers/models/wav2vec2/processing_wav2vec2.py +35 -6
- transformers/models/wav2vec2/tokenization_wav2vec2.py +332 -20
- transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +2 -4
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +62 -70
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +48 -57
- transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +35 -6
- transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +2 -4
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +77 -90
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +30 -37
- transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +17 -16
- transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +55 -36
- transformers/models/wavlm/configuration_wavlm.py +2 -4
- transformers/models/wavlm/modeling_wavlm.py +48 -50
- transformers/models/wavlm/modular_wavlm.py +5 -4
- transformers/models/whisper/configuration_whisper.py +5 -6
- transformers/models/whisper/english_normalizer.py +4 -3
- transformers/models/whisper/feature_extraction_whisper.py +24 -9
- transformers/models/whisper/generation_whisper.py +48 -26
- transformers/models/whisper/modeling_whisper.py +73 -79
- transformers/models/whisper/processing_whisper.py +20 -3
- transformers/models/whisper/tokenization_whisper.py +43 -11
- transformers/models/x_clip/configuration_x_clip.py +2 -4
- transformers/models/x_clip/modeling_x_clip.py +93 -96
- transformers/models/x_clip/processing_x_clip.py +14 -2
- transformers/models/xcodec/configuration_xcodec.py +6 -4
- transformers/models/xcodec/modeling_xcodec.py +17 -20
- transformers/models/xglm/configuration_xglm.py +8 -9
- transformers/models/xglm/modeling_xglm.py +55 -60
- transformers/models/xglm/tokenization_xglm.py +11 -3
- transformers/models/xlm/configuration_xlm.py +8 -10
- transformers/models/xlm/modeling_xlm.py +144 -144
- transformers/models/xlm/tokenization_xlm.py +5 -3
- transformers/models/xlm_roberta/configuration_xlm_roberta.py +3 -11
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +194 -195
- transformers/models/xlm_roberta/modular_xlm_roberta.py +53 -50
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +18 -8
- transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +2 -10
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +93 -94
- transformers/models/xlm_roberta_xl/modular_xlm_roberta_xl.py +70 -67
- transformers/models/xlnet/configuration_xlnet.py +12 -3
- transformers/models/xlnet/modeling_xlnet.py +163 -152
- transformers/models/xlnet/tokenization_xlnet.py +9 -2
- transformers/models/xlstm/configuration_xlstm.py +12 -8
- transformers/models/xlstm/modeling_xlstm.py +65 -62
- transformers/models/xmod/configuration_xmod.py +3 -11
- transformers/models/xmod/modeling_xmod.py +110 -108
- transformers/models/yolos/configuration_yolos.py +1 -0
- transformers/models/yolos/image_processing_yolos.py +62 -60
- transformers/models/yolos/image_processing_yolos_fast.py +45 -42
- transformers/models/yolos/modeling_yolos.py +16 -16
- transformers/models/yolos/modular_yolos.py +19 -17
- transformers/models/yoso/configuration_yoso.py +2 -8
- transformers/models/yoso/modeling_yoso.py +63 -70
- transformers/models/zamba/configuration_zamba.py +8 -5
- transformers/models/zamba/modeling_zamba.py +78 -81
- transformers/models/zamba2/configuration_zamba2.py +50 -44
- transformers/models/zamba2/modeling_zamba2.py +97 -97
- transformers/models/zamba2/modular_zamba2.py +48 -46
- transformers/models/zoedepth/configuration_zoedepth.py +2 -1
- transformers/models/zoedepth/image_processing_zoedepth.py +29 -28
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +24 -21
- transformers/models/zoedepth/modeling_zoedepth.py +18 -26
- transformers/pipelines/__init__.py +114 -57
- transformers/pipelines/any_to_any.py +22 -14
- transformers/pipelines/audio_utils.py +2 -1
- transformers/pipelines/automatic_speech_recognition.py +12 -20
- transformers/pipelines/base.py +27 -15
- transformers/{models/pe_audio/processing_pe_audio.py → pipelines/deprecated/__init__.py} +3 -10
- transformers/pipelines/deprecated/text2text_generation.py +408 -0
- transformers/pipelines/document_question_answering.py +2 -4
- transformers/pipelines/image_text_to_text.py +1 -0
- transformers/pipelines/image_to_text.py +229 -0
- transformers/pipelines/question_answering.py +44 -5
- transformers/pipelines/text_classification.py +14 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/pipelines/token_classification.py +22 -1
- transformers/pipelines/video_classification.py +9 -1
- transformers/pipelines/zero_shot_audio_classification.py +1 -0
- transformers/pipelines/zero_shot_classification.py +6 -0
- transformers/pipelines/zero_shot_image_classification.py +7 -0
- transformers/processing_utils.py +145 -230
- transformers/quantizers/auto.py +4 -2
- transformers/quantizers/base.py +173 -53
- transformers/quantizers/quantizer_aqlm.py +23 -2
- transformers/quantizers/quantizer_auto_round.py +12 -2
- transformers/quantizers/quantizer_awq.py +89 -20
- transformers/quantizers/quantizer_bitnet.py +14 -4
- transformers/quantizers/quantizer_bnb_4bit.py +155 -18
- transformers/quantizers/quantizer_bnb_8bit.py +110 -24
- transformers/quantizers/quantizer_compressed_tensors.py +9 -2
- transformers/quantizers/quantizer_eetq.py +74 -16
- transformers/quantizers/quantizer_fbgemm_fp8.py +138 -38
- transformers/quantizers/quantizer_finegrained_fp8.py +113 -26
- transformers/quantizers/quantizer_fp_quant.py +82 -52
- transformers/quantizers/quantizer_gptq.py +28 -8
- transformers/quantizers/quantizer_higgs.py +60 -42
- transformers/quantizers/quantizer_hqq.py +153 -144
- transformers/quantizers/quantizer_mxfp4.py +194 -14
- transformers/quantizers/quantizer_quanto.py +79 -35
- transformers/quantizers/quantizer_quark.py +18 -36
- transformers/quantizers/quantizer_spqr.py +12 -4
- transformers/quantizers/quantizer_torchao.py +325 -50
- transformers/quantizers/quantizer_vptq.py +27 -4
- transformers/quantizers/quantizers_utils.py +0 -20
- transformers/safetensors_conversion.py +3 -9
- transformers/testing_utils.py +82 -326
- transformers/tokenization_mistral_common.py +903 -568
- transformers/tokenization_utils_base.py +340 -220
- transformers/tokenization_utils_sentencepiece.py +6 -5
- transformers/tokenization_utils_tokenizers.py +113 -226
- transformers/trainer.py +53 -60
- transformers/trainer_callback.py +0 -8
- transformers/trainer_seq2seq.py +1 -5
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +41 -77
- transformers/utils/__init__.py +4 -8
- transformers/utils/attention_visualizer.py +5 -5
- transformers/utils/auto_docstring.py +37 -599
- transformers/utils/doc.py +36 -4
- transformers/utils/dummy_pt_objects.py +42 -0
- transformers/utils/generic.py +28 -111
- transformers/utils/hub.py +15 -5
- transformers/utils/import_utils.py +32 -165
- transformers/utils/kernel_config.py +19 -74
- transformers/utils/loading_report.py +15 -25
- transformers/utils/quantization_config.py +241 -72
- transformers/video_processing_utils.py +39 -41
- transformers/video_utils.py +22 -18
- {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/METADATA +236 -284
- transformers-5.0.0rc0.dist-info/RECORD +1987 -0
- {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/WHEEL +1 -1
- transformers/integrations/moe.py +0 -360
- transformers/integrations/quark.py +0 -53
- transformers/loss/loss_lw_detr.py +0 -356
- transformers/models/ernie4_5_vl_moe/__init__.py +0 -31
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +0 -340
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +0 -455
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +0 -231
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +0 -1936
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +0 -1925
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +0 -249
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +0 -593
- transformers/models/fast_vlm/__init__.py +0 -27
- transformers/models/fast_vlm/configuration_fast_vlm.py +0 -137
- transformers/models/fast_vlm/modeling_fast_vlm.py +0 -432
- transformers/models/fast_vlm/modular_fast_vlm.py +0 -373
- transformers/models/glm4_moe_lite/__init__.py +0 -28
- transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +0 -233
- transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +0 -740
- transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +0 -302
- transformers/models/glm_image/__init__.py +0 -31
- transformers/models/glm_image/configuration_glm_image.py +0 -351
- transformers/models/glm_image/image_processing_glm_image.py +0 -503
- transformers/models/glm_image/image_processing_glm_image_fast.py +0 -294
- transformers/models/glm_image/modeling_glm_image.py +0 -1642
- transformers/models/glm_image/modular_glm_image.py +0 -1531
- transformers/models/glm_image/processing_glm_image.py +0 -217
- transformers/models/glmasr/__init__.py +0 -29
- transformers/models/glmasr/configuration_glmasr.py +0 -196
- transformers/models/glmasr/modeling_glmasr.py +0 -517
- transformers/models/glmasr/modular_glmasr.py +0 -443
- transformers/models/glmasr/processing_glmasr.py +0 -331
- transformers/models/jais2/__init__.py +0 -27
- transformers/models/jais2/configuration_jais2.py +0 -148
- transformers/models/jais2/modeling_jais2.py +0 -484
- transformers/models/jais2/modular_jais2.py +0 -194
- transformers/models/lasr/__init__.py +0 -29
- transformers/models/lasr/configuration_lasr.py +0 -244
- transformers/models/lasr/feature_extraction_lasr.py +0 -275
- transformers/models/lasr/modeling_lasr.py +0 -727
- transformers/models/lasr/modular_lasr.py +0 -574
- transformers/models/lasr/processing_lasr.py +0 -100
- transformers/models/lasr/tokenization_lasr.py +0 -184
- transformers/models/lighton_ocr/__init__.py +0 -28
- transformers/models/lighton_ocr/configuration_lighton_ocr.py +0 -128
- transformers/models/lighton_ocr/modeling_lighton_ocr.py +0 -463
- transformers/models/lighton_ocr/modular_lighton_ocr.py +0 -404
- transformers/models/lighton_ocr/processing_lighton_ocr.py +0 -229
- transformers/models/lw_detr/__init__.py +0 -27
- transformers/models/lw_detr/configuration_lw_detr.py +0 -374
- transformers/models/lw_detr/modeling_lw_detr.py +0 -1702
- transformers/models/lw_detr/modular_lw_detr.py +0 -1615
- transformers/models/minimax_m2/__init__.py +0 -28
- transformers/models/minimax_m2/configuration_minimax_m2.py +0 -188
- transformers/models/minimax_m2/modeling_minimax_m2.py +0 -704
- transformers/models/minimax_m2/modular_minimax_m2.py +0 -346
- transformers/models/paddleocr_vl/__init__.py +0 -31
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +0 -335
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +0 -503
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +0 -209
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +0 -1683
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +0 -1380
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +0 -133
- transformers/models/pe_audio/__init__.py +0 -29
- transformers/models/pe_audio/configuration_pe_audio.py +0 -204
- transformers/models/pe_audio/feature_extraction_pe_audio.py +0 -160
- transformers/models/pe_audio/modeling_pe_audio.py +0 -819
- transformers/models/pe_audio/modular_pe_audio.py +0 -298
- transformers/models/pe_audio_video/__init__.py +0 -28
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +0 -223
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +0 -971
- transformers/models/pe_audio_video/modular_pe_audio_video.py +0 -763
- transformers/models/pe_video/__init__.py +0 -29
- transformers/models/pe_video/configuration_pe_video.py +0 -209
- transformers/models/pe_video/modeling_pe_video.py +0 -647
- transformers/models/pe_video/modular_pe_video.py +0 -231
- transformers/models/pe_video/processing_pe_video.py +0 -10
- transformers/models/pe_video/video_processing_pe_video.py +0 -64
- transformers/models/pixio/__init__.py +0 -29
- transformers/models/pixio/configuration_pixio.py +0 -150
- transformers/models/pixio/modeling_pixio.py +0 -507
- transformers/models/pixio/modular_pixio.py +0 -403
- transformers/models/solar_open/__init__.py +0 -27
- transformers/models/solar_open/configuration_solar_open.py +0 -184
- transformers/models/solar_open/modeling_solar_open.py +0 -642
- transformers/models/solar_open/modular_solar_open.py +0 -224
- transformers/trainer_jit_checkpoint.py +0 -125
- transformers-5.0.0.dist-info/RECORD +0 -2068
- {transformers-5.0.0.dist-info/licenses → transformers-5.0.0rc0.dist-info}/LICENSE +0 -0
- {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# base
|
|
2
|
+
# coding=utf-8
|
|
2
3
|
# Copyright 2020 The HuggingFace Inc. team.
|
|
3
4
|
#
|
|
4
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -18,21 +19,19 @@ fronting encoding methods) Special token mixing (host the special tokens logic)
|
|
|
18
19
|
of output with special method for the Fast tokenizers)
|
|
19
20
|
"""
|
|
20
21
|
|
|
21
|
-
from __future__ import annotations
|
|
22
|
-
|
|
23
22
|
import copy
|
|
24
23
|
import json
|
|
25
24
|
import os
|
|
26
25
|
import re
|
|
27
26
|
import warnings
|
|
28
27
|
from collections import OrderedDict, UserDict
|
|
29
|
-
from collections.abc import Callable,
|
|
28
|
+
from collections.abc import Callable, Mapping, Sequence, Sized
|
|
30
29
|
from dataclasses import dataclass
|
|
31
30
|
from pathlib import Path
|
|
32
|
-
from typing import TYPE_CHECKING, Any, NamedTuple, Union
|
|
31
|
+
from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union
|
|
33
32
|
|
|
34
33
|
import numpy as np
|
|
35
|
-
from huggingface_hub import create_repo,
|
|
34
|
+
from huggingface_hub import create_repo, list_repo_files
|
|
36
35
|
from packaging import version
|
|
37
36
|
|
|
38
37
|
from . import __version__
|
|
@@ -50,6 +49,7 @@ from .utils import (
|
|
|
50
49
|
extract_commit_hash,
|
|
51
50
|
is_mlx_available,
|
|
52
51
|
is_numpy_array,
|
|
52
|
+
is_offline_mode,
|
|
53
53
|
is_protobuf_available,
|
|
54
54
|
is_tokenizers_available,
|
|
55
55
|
is_torch_available,
|
|
@@ -60,7 +60,6 @@ from .utils import (
|
|
|
60
60
|
requires_backends,
|
|
61
61
|
to_py_obj,
|
|
62
62
|
)
|
|
63
|
-
from .utils.chat_parsing_utils import recursive_parse
|
|
64
63
|
from .utils.chat_template_utils import render_jinja_template
|
|
65
64
|
from .utils.import_utils import PROTOBUF_IMPORT_ERROR
|
|
66
65
|
|
|
@@ -218,11 +217,11 @@ class BatchEncoding(UserDict):
|
|
|
218
217
|
|
|
219
218
|
def __init__(
|
|
220
219
|
self,
|
|
221
|
-
data: dict[str, Any]
|
|
222
|
-
encoding: EncodingFast
|
|
223
|
-
tensor_type: None
|
|
220
|
+
data: Optional[dict[str, Any]] = None,
|
|
221
|
+
encoding: Optional[Union[EncodingFast, Sequence[EncodingFast]]] = None,
|
|
222
|
+
tensor_type: Union[None, str, TensorType] = None,
|
|
224
223
|
prepend_batch_axis: bool = False,
|
|
225
|
-
n_sequences: int
|
|
224
|
+
n_sequences: Optional[int] = None,
|
|
226
225
|
):
|
|
227
226
|
super().__init__(data)
|
|
228
227
|
|
|
@@ -240,7 +239,7 @@ class BatchEncoding(UserDict):
|
|
|
240
239
|
self.convert_to_tensors(tensor_type=tensor_type, prepend_batch_axis=prepend_batch_axis)
|
|
241
240
|
|
|
242
241
|
@property
|
|
243
|
-
def n_sequences(self) -> int
|
|
242
|
+
def n_sequences(self) -> Optional[int]:
|
|
244
243
|
"""
|
|
245
244
|
`Optional[int]`: The number of sequences used to generate each sample from the batch encoded in this
|
|
246
245
|
[`BatchEncoding`]. Currently can be one of `None` (unknown), `1` (a single sentence) or `2` (a pair of
|
|
@@ -248,7 +247,7 @@ class BatchEncoding(UserDict):
|
|
|
248
247
|
"""
|
|
249
248
|
return self._n_sequences
|
|
250
249
|
|
|
251
|
-
def __getitem__(self, item: int
|
|
250
|
+
def __getitem__(self, item: Union[int, str]) -> Union[Any, EncodingFast]:
|
|
252
251
|
"""
|
|
253
252
|
If the key is a string, returns the value of the dict associated to `key` ('input_ids', 'attention_mask',
|
|
254
253
|
etc.).
|
|
@@ -298,7 +297,7 @@ class BatchEncoding(UserDict):
|
|
|
298
297
|
return self._encodings is not None
|
|
299
298
|
|
|
300
299
|
@property
|
|
301
|
-
def encodings(self) -> list[EncodingFast]
|
|
300
|
+
def encodings(self) -> Optional[list[EncodingFast]]:
|
|
302
301
|
"""
|
|
303
302
|
`Optional[list[tokenizers.Encoding]]`: The list all encodings from the tokenization process. Returns `None` if
|
|
304
303
|
the input was tokenized through Python (i.e., not a fast) tokenizer.
|
|
@@ -323,7 +322,7 @@ class BatchEncoding(UserDict):
|
|
|
323
322
|
)
|
|
324
323
|
return self._encodings[batch_index].tokens
|
|
325
324
|
|
|
326
|
-
def sequence_ids(self, batch_index: int = 0) -> list[int
|
|
325
|
+
def sequence_ids(self, batch_index: int = 0) -> list[Optional[int]]:
|
|
327
326
|
"""
|
|
328
327
|
Return a list mapping the tokens to the id of their original sentences:
|
|
329
328
|
|
|
@@ -347,7 +346,7 @@ class BatchEncoding(UserDict):
|
|
|
347
346
|
)
|
|
348
347
|
return self._encodings[batch_index].sequence_ids
|
|
349
348
|
|
|
350
|
-
def word_ids(self, batch_index: int = 0) -> list[int
|
|
349
|
+
def word_ids(self, batch_index: int = 0) -> list[Optional[int]]:
|
|
351
350
|
"""
|
|
352
351
|
Return a list mapping the tokens to their actual word in the initial sentence for a fast tokenizer.
|
|
353
352
|
|
|
@@ -366,7 +365,7 @@ class BatchEncoding(UserDict):
|
|
|
366
365
|
)
|
|
367
366
|
return self._encodings[batch_index].word_ids
|
|
368
367
|
|
|
369
|
-
def token_to_sequence(self, batch_or_token_index: int, token_index: int
|
|
368
|
+
def token_to_sequence(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int:
|
|
370
369
|
"""
|
|
371
370
|
Get the index of the sequence represented by the given token. In the general use case, this method returns `0`
|
|
372
371
|
for a single sequence or the first sequence of a pair, and `1` for the second sequence of a pair
|
|
@@ -405,7 +404,7 @@ class BatchEncoding(UserDict):
|
|
|
405
404
|
token_index = self._seq_len + token_index
|
|
406
405
|
return self._encodings[batch_index].token_to_sequence(token_index)
|
|
407
406
|
|
|
408
|
-
def token_to_word(self, batch_or_token_index: int, token_index: int
|
|
407
|
+
def token_to_word(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int:
|
|
409
408
|
"""
|
|
410
409
|
Get the index of the word corresponding (i.e. comprising) to an encoded token in a sequence of the batch.
|
|
411
410
|
|
|
@@ -444,8 +443,8 @@ class BatchEncoding(UserDict):
|
|
|
444
443
|
return self._encodings[batch_index].token_to_word(token_index)
|
|
445
444
|
|
|
446
445
|
def word_to_tokens(
|
|
447
|
-
self, batch_or_word_index: int, word_index: int
|
|
448
|
-
) -> TokenSpan
|
|
446
|
+
self, batch_or_word_index: int, word_index: Optional[int] = None, sequence_index: int = 0
|
|
447
|
+
) -> Optional[TokenSpan]:
|
|
449
448
|
"""
|
|
450
449
|
Get the encoded token span corresponding to a word in a sequence of the batch.
|
|
451
450
|
|
|
@@ -496,7 +495,7 @@ class BatchEncoding(UserDict):
|
|
|
496
495
|
span = self._encodings[batch_index].word_to_tokens(word_index, sequence_index)
|
|
497
496
|
return TokenSpan(*span) if span is not None else None
|
|
498
497
|
|
|
499
|
-
def token_to_chars(self, batch_or_token_index: int, token_index: int
|
|
498
|
+
def token_to_chars(self, batch_or_token_index: int, token_index: Optional[int] = None) -> Optional[CharSpan]:
|
|
500
499
|
"""
|
|
501
500
|
Get the character span corresponding to an encoded token in a sequence of the batch.
|
|
502
501
|
|
|
@@ -535,7 +534,9 @@ class BatchEncoding(UserDict):
|
|
|
535
534
|
|
|
536
535
|
return CharSpan(*span_indices) if span_indices is not None else None
|
|
537
536
|
|
|
538
|
-
def char_to_token(
|
|
537
|
+
def char_to_token(
|
|
538
|
+
self, batch_or_char_index: int, char_index: Optional[int] = None, sequence_index: int = 0
|
|
539
|
+
) -> int:
|
|
539
540
|
"""
|
|
540
541
|
Get the index of the token in the encoded output comprising a character in the original string for a sequence
|
|
541
542
|
of the batch.
|
|
@@ -576,7 +577,7 @@ class BatchEncoding(UserDict):
|
|
|
576
577
|
return self._encodings[batch_index].char_to_token(char_index, sequence_index)
|
|
577
578
|
|
|
578
579
|
def word_to_chars(
|
|
579
|
-
self, batch_or_word_index: int, word_index: int
|
|
580
|
+
self, batch_or_word_index: int, word_index: Optional[int] = None, sequence_index: int = 0
|
|
580
581
|
) -> CharSpan:
|
|
581
582
|
"""
|
|
582
583
|
Get the character span in the original string corresponding to given word in a sequence of the batch.
|
|
@@ -620,7 +621,7 @@ class BatchEncoding(UserDict):
|
|
|
620
621
|
word_index = batch_or_word_index
|
|
621
622
|
return CharSpan(*(self._encodings[batch_index].word_to_chars(word_index, sequence_index)))
|
|
622
623
|
|
|
623
|
-
def char_to_word(self, batch_or_char_index: int, char_index: int
|
|
624
|
+
def char_to_word(self, batch_or_char_index: int, char_index: Optional[int] = None, sequence_index: int = 0) -> int:
|
|
624
625
|
"""
|
|
625
626
|
Get the word in the original string corresponding to a character in the original string of a sequence of the
|
|
626
627
|
batch.
|
|
@@ -659,7 +660,9 @@ class BatchEncoding(UserDict):
|
|
|
659
660
|
char_index = batch_or_char_index
|
|
660
661
|
return self._encodings[batch_index].char_to_word(char_index, sequence_index)
|
|
661
662
|
|
|
662
|
-
def convert_to_tensors(
|
|
663
|
+
def convert_to_tensors(
|
|
664
|
+
self, tensor_type: Optional[Union[str, TensorType]] = None, prepend_batch_axis: bool = False
|
|
665
|
+
):
|
|
663
666
|
"""
|
|
664
667
|
Convert the inner content to tensors.
|
|
665
668
|
|
|
@@ -753,7 +756,7 @@ class BatchEncoding(UserDict):
|
|
|
753
756
|
|
|
754
757
|
return self
|
|
755
758
|
|
|
756
|
-
def to(self, device: str
|
|
759
|
+
def to(self, device: Union[str, "torch.device"], *, non_blocking: bool = False) -> "BatchEncoding":
|
|
757
760
|
"""
|
|
758
761
|
Send all values to device by calling `v.to(device, non_blocking=non_blocking)` (PyTorch only).
|
|
759
762
|
|
|
@@ -963,11 +966,11 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
963
966
|
|
|
964
967
|
vocab_files_names: dict[str, str] = {}
|
|
965
968
|
pretrained_vocab_files_map: dict[str, dict[str, str]] = {}
|
|
966
|
-
_auto_class: str
|
|
969
|
+
_auto_class: Optional[str] = None
|
|
967
970
|
|
|
968
971
|
# first name has to correspond to main model input name
|
|
969
972
|
# to make sure `tokenizer.pad(...)` works correctly
|
|
970
|
-
model_input_names: list[str] = ["input_ids", "attention_mask"]
|
|
973
|
+
model_input_names: list[str] = ["input_ids", "token_type_ids", "attention_mask"]
|
|
971
974
|
padding_side: str = "right"
|
|
972
975
|
truncation_side: str = "right"
|
|
973
976
|
slow_tokenizer_class = None
|
|
@@ -1099,7 +1102,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1099
1102
|
# ---- Special tokens API (moved from SpecialTokensMixin) ----
|
|
1100
1103
|
def add_special_tokens(
|
|
1101
1104
|
self,
|
|
1102
|
-
special_tokens_dict: dict[str, str
|
|
1105
|
+
special_tokens_dict: dict[str, Union[str, AddedToken, Sequence[Union[str, AddedToken]]]],
|
|
1103
1106
|
replace_extra_special_tokens=True,
|
|
1104
1107
|
) -> int:
|
|
1105
1108
|
"""
|
|
@@ -1203,7 +1206,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1203
1206
|
return self.add_tokens(tokens_to_add, special_tokens=True)
|
|
1204
1207
|
|
|
1205
1208
|
def add_tokens(
|
|
1206
|
-
self, new_tokens: str
|
|
1209
|
+
self, new_tokens: Union[str, AddedToken, Sequence[Union[str, AddedToken]]], special_tokens: bool = False
|
|
1207
1210
|
) -> int:
|
|
1208
1211
|
"""
|
|
1209
1212
|
#TODO remove this from here! PreTrainedTOkeniuzerBase should be agnostic of AddedToken.
|
|
@@ -1243,7 +1246,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1243
1246
|
new_tokens = [new_tokens]
|
|
1244
1247
|
return self._add_tokens(new_tokens, special_tokens=special_tokens)
|
|
1245
1248
|
|
|
1246
|
-
def _add_tokens(self, new_tokens: list[str]
|
|
1249
|
+
def _add_tokens(self, new_tokens: Union[list[str], list[AddedToken]], special_tokens: bool = False) -> int:
|
|
1247
1250
|
raise NotImplementedError
|
|
1248
1251
|
|
|
1249
1252
|
@property
|
|
@@ -1328,7 +1331,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1328
1331
|
return super().__getattr__(key)
|
|
1329
1332
|
|
|
1330
1333
|
def get_special_tokens_mask(
|
|
1331
|
-
self, token_ids_0: list[int], token_ids_1: list[int]
|
|
1334
|
+
self, token_ids_0: list[int], token_ids_1: Optional[list[int]] = None, already_has_special_tokens: bool = False
|
|
1332
1335
|
) -> list[int]:
|
|
1333
1336
|
"""
|
|
1334
1337
|
Retrieve sequence ids from a token list that has no special tokens added.
|
|
@@ -1417,7 +1420,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1417
1420
|
"""
|
|
1418
1421
|
return self.convert_tokens_to_ids(self.all_special_tokens)
|
|
1419
1422
|
|
|
1420
|
-
def _set_model_specific_special_tokens(self, special_tokens: dict[str, str
|
|
1423
|
+
def _set_model_specific_special_tokens(self, special_tokens: dict[str, Union[str, AddedToken]]):
|
|
1421
1424
|
"""
|
|
1422
1425
|
Adds new model-specific special tokens (e.g., for multimodal models).
|
|
1423
1426
|
|
|
@@ -1470,7 +1473,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1470
1473
|
"""
|
|
1471
1474
|
raise NotImplementedError()
|
|
1472
1475
|
|
|
1473
|
-
def convert_tokens_to_ids(self, tokens: str
|
|
1476
|
+
def convert_tokens_to_ids(self, tokens: Union[str, list[str]]) -> Union[int, list[int]]:
|
|
1474
1477
|
"""
|
|
1475
1478
|
Converts a token string (or a sequence of tokens) in a single integer id (or a sequence of ids), using the
|
|
1476
1479
|
vocabulary.
|
|
@@ -1486,7 +1489,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1486
1489
|
|
|
1487
1490
|
return [self._convert_token_to_id_with_added_voc(token) for token in tokens]
|
|
1488
1491
|
|
|
1489
|
-
def convert_ids_to_tokens(
|
|
1492
|
+
def convert_ids_to_tokens(
|
|
1493
|
+
self, ids: Union[int, list[int]], skip_special_tokens: bool = False
|
|
1494
|
+
) -> Union[str, list[str]]:
|
|
1490
1495
|
"""
|
|
1491
1496
|
Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
|
|
1492
1497
|
added tokens.
|
|
@@ -1505,12 +1510,12 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1505
1510
|
@classmethod
|
|
1506
1511
|
def from_pretrained(
|
|
1507
1512
|
cls,
|
|
1508
|
-
pretrained_model_name_or_path: str
|
|
1513
|
+
pretrained_model_name_or_path: Union[str, os.PathLike],
|
|
1509
1514
|
*init_inputs,
|
|
1510
|
-
cache_dir: str
|
|
1515
|
+
cache_dir: Optional[Union[str, os.PathLike]] = None,
|
|
1511
1516
|
force_download: bool = False,
|
|
1512
1517
|
local_files_only: bool = False,
|
|
1513
|
-
token: str
|
|
1518
|
+
token: Optional[Union[str, bool]] = None,
|
|
1514
1519
|
revision: str = "main",
|
|
1515
1520
|
trust_remote_code=False,
|
|
1516
1521
|
**kwargs,
|
|
@@ -1607,7 +1612,6 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1607
1612
|
|
|
1608
1613
|
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
|
|
1609
1614
|
vocab_files = {}
|
|
1610
|
-
additional_files_names = {}
|
|
1611
1615
|
init_configuration = {}
|
|
1612
1616
|
|
|
1613
1617
|
is_local = os.path.isdir(pretrained_model_name_or_path)
|
|
@@ -1625,9 +1629,11 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1625
1629
|
f"Calling {cls.__name__}.from_pretrained() with the path to a single file or url is not "
|
|
1626
1630
|
"supported for this tokenizer. Use a model identifier or the path to a directory instead."
|
|
1627
1631
|
)
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1632
|
+
# Use first vocab file that's not tokenizer_file
|
|
1633
|
+
file_id = list(cls.vocab_files_names.keys())[0]
|
|
1634
|
+
if file_id == "tokenizer_file" and vocab_files_count > 1:
|
|
1635
|
+
file_id = [k for k in cls.vocab_files_names.keys() if k != "tokenizer_file"][0]
|
|
1636
|
+
|
|
1631
1637
|
vocab_files[file_id] = pretrained_model_name_or_path
|
|
1632
1638
|
single_file_id = file_id
|
|
1633
1639
|
else:
|
|
@@ -1645,10 +1651,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1645
1651
|
}
|
|
1646
1652
|
|
|
1647
1653
|
vocab_files = {**cls.vocab_files_names, **additional_files_names}
|
|
1648
|
-
|
|
1649
|
-
# Check for versioned tokenizer files
|
|
1650
1654
|
if "tokenizer_file" in vocab_files:
|
|
1655
|
+
# Try to get the tokenizer config to see if there are versioned tokenizer files.
|
|
1651
1656
|
fast_tokenizer_file = FULL_TOKENIZER_FILE
|
|
1657
|
+
|
|
1652
1658
|
try:
|
|
1653
1659
|
resolved_config_file = cached_file(
|
|
1654
1660
|
pretrained_model_name_or_path,
|
|
@@ -1664,33 +1670,43 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1664
1670
|
_raise_exceptions_for_missing_entries=False,
|
|
1665
1671
|
_commit_hash=commit_hash,
|
|
1666
1672
|
)
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
if "fast_tokenizer_files" in tokenizer_config:
|
|
1671
|
-
fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
|
|
1672
|
-
commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
|
|
1673
|
+
except OSError:
|
|
1674
|
+
# Re-raise any error raised by cached_file in order to get a helpful error message
|
|
1675
|
+
raise
|
|
1673
1676
|
except Exception:
|
|
1674
|
-
|
|
1677
|
+
# For any other exception, we throw a generic error.
|
|
1678
|
+
raise OSError(
|
|
1679
|
+
f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from "
|
|
1680
|
+
"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
|
|
1681
|
+
f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
|
|
1682
|
+
f"containing all relevant files for a {cls.__name__} tokenizer."
|
|
1683
|
+
)
|
|
1684
|
+
|
|
1685
|
+
commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
|
|
1686
|
+
if resolved_config_file is not None:
|
|
1687
|
+
with open(resolved_config_file, encoding="utf-8") as reader:
|
|
1688
|
+
tokenizer_config = json.load(reader)
|
|
1689
|
+
if "fast_tokenizer_files" in tokenizer_config:
|
|
1690
|
+
fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
|
|
1675
1691
|
vocab_files["tokenizer_file"] = fast_tokenizer_file
|
|
1676
1692
|
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1693
|
+
# This block looks for any extra chat template files
|
|
1694
|
+
if is_local:
|
|
1695
|
+
template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR)
|
|
1696
|
+
if template_dir.is_dir():
|
|
1697
|
+
for template_file in template_dir.glob("*.jinja"):
|
|
1698
|
+
template_name = template_file.name.removesuffix(".jinja")
|
|
1699
|
+
vocab_files[f"chat_template_{template_name}"] = f"{CHAT_TEMPLATE_DIR}/{template_file.name}"
|
|
1700
|
+
else:
|
|
1701
|
+
for template in list_repo_templates(
|
|
1702
|
+
pretrained_model_name_or_path,
|
|
1703
|
+
local_files_only=local_files_only,
|
|
1704
|
+
revision=revision,
|
|
1705
|
+
cache_dir=cache_dir,
|
|
1706
|
+
token=token,
|
|
1707
|
+
):
|
|
1708
|
+
template = template.removesuffix(".jinja")
|
|
1709
|
+
vocab_files[f"chat_template_{template}"] = f"{CHAT_TEMPLATE_DIR}/{template}.jinja"
|
|
1694
1710
|
|
|
1695
1711
|
remote_files = []
|
|
1696
1712
|
if not is_local and not local_files_only:
|
|
@@ -1748,6 +1764,11 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1748
1764
|
if file_id not in resolved_vocab_files:
|
|
1749
1765
|
continue
|
|
1750
1766
|
|
|
1767
|
+
if is_local:
|
|
1768
|
+
logger.info(f"loading file {file_path}")
|
|
1769
|
+
else:
|
|
1770
|
+
logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
|
|
1771
|
+
|
|
1751
1772
|
return cls._from_pretrained(
|
|
1752
1773
|
resolved_vocab_files,
|
|
1753
1774
|
pretrained_model_name_or_path,
|
|
@@ -1777,6 +1798,29 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1777
1798
|
trust_remote_code=False,
|
|
1778
1799
|
**kwargs,
|
|
1779
1800
|
):
|
|
1801
|
+
# We instantiate fast tokenizers based on a slow tokenizer if we don't have access to the tokenizer.json
|
|
1802
|
+
# file or if `from_slow` is set to True.
|
|
1803
|
+
from_slow = kwargs.get("from_slow", False)
|
|
1804
|
+
gguf_file = kwargs.get("gguf_file")
|
|
1805
|
+
has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
|
|
1806
|
+
|
|
1807
|
+
# If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be
|
|
1808
|
+
# loaded directly from the GGUF file.
|
|
1809
|
+
if (from_slow or not has_tokenizer_file) and cls.slow_tokenizer_class is not None and not gguf_file:
|
|
1810
|
+
slow_tokenizer = (cls.slow_tokenizer_class)._from_pretrained(
|
|
1811
|
+
copy.deepcopy(resolved_vocab_files),
|
|
1812
|
+
pretrained_model_name_or_path,
|
|
1813
|
+
copy.deepcopy(init_configuration),
|
|
1814
|
+
*init_inputs,
|
|
1815
|
+
token=token,
|
|
1816
|
+
cache_dir=cache_dir,
|
|
1817
|
+
local_files_only=local_files_only,
|
|
1818
|
+
_commit_hash=_commit_hash,
|
|
1819
|
+
**(copy.deepcopy(kwargs)),
|
|
1820
|
+
)
|
|
1821
|
+
else:
|
|
1822
|
+
slow_tokenizer = None
|
|
1823
|
+
|
|
1780
1824
|
# Prepare tokenizer initialization kwargs
|
|
1781
1825
|
# Did we saved some inputs and kwargs to reload ?
|
|
1782
1826
|
tokenizer_config_file = resolved_vocab_files.pop("tokenizer_config_file", None)
|
|
@@ -1785,16 +1829,14 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1785
1829
|
init_kwargs = json.load(tokenizer_config_handle)
|
|
1786
1830
|
# used in the past to check if the tokenizer class matches the class in the repo
|
|
1787
1831
|
init_kwargs.pop("tokenizer_class", None)
|
|
1832
|
+
if not has_tokenizer_file:
|
|
1833
|
+
init_kwargs.get("tokenizer_file", None)
|
|
1788
1834
|
saved_init_inputs = init_kwargs.pop("init_inputs", ())
|
|
1789
1835
|
if not init_inputs:
|
|
1790
1836
|
init_inputs = saved_init_inputs
|
|
1791
1837
|
else:
|
|
1792
1838
|
init_kwargs = init_configuration
|
|
1793
1839
|
|
|
1794
|
-
if resolved_vocab_files.get("tokenizer_file", None) is not None:
|
|
1795
|
-
init_kwargs.pop("add_bos_token", None)
|
|
1796
|
-
init_kwargs.pop("add_eos_token", None)
|
|
1797
|
-
|
|
1798
1840
|
# If independent chat template file(s) exist, they take priority over template entries in the tokenizer config
|
|
1799
1841
|
chat_templates = {}
|
|
1800
1842
|
chat_template_file = resolved_vocab_files.pop("chat_template_file", None)
|
|
@@ -1875,6 +1917,8 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1875
1917
|
init_kwargs[args_name] = file_path
|
|
1876
1918
|
tokenizer_file = resolved_vocab_files.get("tokenizer_file", None)
|
|
1877
1919
|
|
|
1920
|
+
if slow_tokenizer is not None:
|
|
1921
|
+
init_kwargs["__slow_tokenizer"] = slow_tokenizer
|
|
1878
1922
|
init_kwargs["name_or_path"] = pretrained_model_name_or_path
|
|
1879
1923
|
init_kwargs["is_local"] = _is_local
|
|
1880
1924
|
|
|
@@ -1993,12 +2037,28 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
1993
2037
|
if key in init_kwargs and added_tokens_map != {} and init_kwargs[key] is not None:
|
|
1994
2038
|
init_kwargs[key] = added_tokens_map.get(str(init_kwargs[key]), init_kwargs[key])
|
|
1995
2039
|
|
|
1996
|
-
#
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2040
|
+
# Track which files were loaded (if not already set by AutoTokenizer)
|
|
2041
|
+
if "files_loaded" not in init_kwargs:
|
|
2042
|
+
files_loaded = []
|
|
2043
|
+
# Check which files this tokenizer class actually uses based on vocab_files_names
|
|
2044
|
+
tokenizer_needs_files = set(cls.vocab_files_names.keys()) if hasattr(cls, "vocab_files_names") else set()
|
|
2045
|
+
|
|
2046
|
+
# If tokenizer_file is in the class's vocab_files_names and exists, prioritize it (TokenizersBackend)
|
|
2047
|
+
if "tokenizer_file" in tokenizer_needs_files and resolved_vocab_files.get("tokenizer_file"):
|
|
2048
|
+
files_loaded.append(os.path.basename(resolved_vocab_files["tokenizer_file"]))
|
|
2049
|
+
else:
|
|
2050
|
+
# Otherwise, add the actual vocab files that were used by this tokenizer class
|
|
2051
|
+
for file_key, file_path in resolved_vocab_files.items():
|
|
2052
|
+
if (
|
|
2053
|
+
file_path
|
|
2054
|
+
and file_key not in ["tokenizer_config_file", "special_tokens_map_file", "added_tokens_file"]
|
|
2055
|
+
and file_key in tokenizer_needs_files
|
|
2056
|
+
):
|
|
2057
|
+
# Extract just the filename from the path
|
|
2058
|
+
files_loaded.append(os.path.basename(file_path))
|
|
2059
|
+
init_kwargs["files_loaded"] = files_loaded
|
|
2001
2060
|
|
|
2061
|
+
# Instantiate the tokenizer.
|
|
2002
2062
|
try:
|
|
2003
2063
|
tokenizer = cls(*init_inputs, **init_kwargs)
|
|
2004
2064
|
except import_protobuf_decode_error():
|
|
@@ -2019,14 +2079,120 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2019
2079
|
"Unable to load vocabulary from file. "
|
|
2020
2080
|
"Please check that the provided vocabulary is accessible and not corrupted."
|
|
2021
2081
|
)
|
|
2022
|
-
return tokenizer
|
|
2023
2082
|
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2083
|
+
# If tokenizer_file exists and tokenizer has a TokenizersBackend, replace the blank tokenizer with tokenizer.json
|
|
2084
|
+
if tokenizer_file is not None and hasattr(tokenizer, "_tokenizer"):
|
|
2085
|
+
from tokenizers import Tokenizer as TokenizerFast
|
|
2086
|
+
|
|
2087
|
+
tokenizer._tokenizer = TokenizerFast.from_file(tokenizer_file)
|
|
2088
|
+
# Re-run post-initialization if the tokenizer has it
|
|
2089
|
+
if hasattr(tokenizer, "_post_init"):
|
|
2090
|
+
tokenizer._post_init()
|
|
2091
|
+
# If only SPM exists, try to get vocab and merges and init to load a tokenizers-backend
|
|
2092
|
+
else:
|
|
2093
|
+
spm_filename = find_sentencepiece_model_file(
|
|
2094
|
+
pretrained_model_name_or_path,
|
|
2095
|
+
revision=kwargs.get("revision"),
|
|
2096
|
+
token=kwargs.get("token"),
|
|
2097
|
+
cache_dir=kwargs.get("cache_dir"),
|
|
2098
|
+
local_files_only=kwargs.get("local_files_only", False),
|
|
2099
|
+
subfolder=kwargs.get("subfolder", ""),
|
|
2100
|
+
)
|
|
2101
|
+
if spm_filename is not None:
|
|
2102
|
+
try:
|
|
2103
|
+
resolved_spm = cached_file(
|
|
2104
|
+
pretrained_model_name_or_path,
|
|
2105
|
+
spm_filename,
|
|
2106
|
+
cache_dir=kwargs.get("cache_dir"),
|
|
2107
|
+
force_download=kwargs.get("force_download", False),
|
|
2108
|
+
proxies=kwargs.get("proxies"),
|
|
2109
|
+
token=kwargs.get("token"),
|
|
2110
|
+
revision=kwargs.get("revision"),
|
|
2111
|
+
local_files_only=kwargs.get("local_files_only", False),
|
|
2112
|
+
subfolder=kwargs.get("subfolder", ""),
|
|
2113
|
+
)
|
|
2114
|
+
except Exception:
|
|
2115
|
+
resolved_spm = None
|
|
2116
|
+
if resolved_spm is not None:
|
|
2117
|
+
try:
|
|
2118
|
+
# Mirror AutoTokenizer fallback: extract vocab/merges from SentencePiece
|
|
2119
|
+
import inspect as _inspect
|
|
2120
|
+
|
|
2121
|
+
from .tokenization_utils_sentencepiece import SentencePieceExtractor
|
|
2122
|
+
|
|
2123
|
+
class_sig = _inspect.signature(getattr(cls, "__init__", cls))
|
|
2124
|
+
vocab_ids, vocab_scores, merges = SentencePieceExtractor(resolved_spm).extract()
|
|
2125
|
+
files_loaded = [spm_filename]
|
|
2126
|
+
init_kwargs["backend"] = "tokenizers"
|
|
2127
|
+
init_kwargs["files_loaded"] = files_loaded
|
|
2128
|
+
# If tokenizer needs merges too (BPE), pass both; unigram models only need vocab
|
|
2129
|
+
if "merges" in class_sig.parameters:
|
|
2130
|
+
return cls.from_pretrained(
|
|
2131
|
+
pretrained_model_name_or_path,
|
|
2132
|
+
*init_inputs,
|
|
2133
|
+
vocab=vocab_scores,
|
|
2134
|
+
merges=merges,
|
|
2135
|
+
**init_kwargs,
|
|
2136
|
+
)
|
|
2137
|
+
elif "vocab" in class_sig.parameters:
|
|
2138
|
+
return cls.from_pretrained(
|
|
2139
|
+
pretrained_model_name_or_path,
|
|
2140
|
+
*init_inputs,
|
|
2141
|
+
vocab=vocab_scores,
|
|
2142
|
+
**init_kwargs,
|
|
2143
|
+
)
|
|
2144
|
+
except Exception as e:
|
|
2145
|
+
logger.warning(
|
|
2146
|
+
f"Could not extract vocab/merges from the SentencePiece model to initialize a Tokenizers backend: {e}. We are falling back so we are falling back to the standard loading method."
|
|
2147
|
+
)
|
|
2148
|
+
pass
|
|
2149
|
+
# Fallback to vocab.json + merges.txt (BPE) or just vocab.json (WordLevel/WordPiece)
|
|
2150
|
+
vocab, merges, files_loaded = load_vocab_and_merges(
|
|
2151
|
+
pretrained_model_name_or_path,
|
|
2152
|
+
cache_dir=kwargs.get("cache_dir"),
|
|
2153
|
+
force_download=kwargs.get("force_download", False),
|
|
2154
|
+
proxies=kwargs.get("proxies"),
|
|
2155
|
+
token=kwargs.get("token"),
|
|
2156
|
+
revision=kwargs.get("revision"),
|
|
2157
|
+
local_files_only=kwargs.get("local_files_only", False),
|
|
2158
|
+
subfolder=kwargs.get("subfolder", ""),
|
|
2159
|
+
)
|
|
2160
|
+
|
|
2161
|
+
if vocab is not None:
|
|
2162
|
+
try:
|
|
2163
|
+
import inspect as _inspect
|
|
2164
|
+
|
|
2165
|
+
class_sig = _inspect.signature(getattr(cls, "__init__", cls))
|
|
2166
|
+
init_kwargs["backend"] = "tokenizers"
|
|
2167
|
+
init_kwargs["files_loaded"] = files_loaded
|
|
2168
|
+
|
|
2169
|
+
if merges is not None and "merges" in class_sig.parameters:
|
|
2170
|
+
return cls.from_pretrained(
|
|
2171
|
+
pretrained_model_name_or_path,
|
|
2172
|
+
*init_inputs,
|
|
2173
|
+
vocab=vocab,
|
|
2174
|
+
merges=merges,
|
|
2175
|
+
**init_kwargs,
|
|
2176
|
+
)
|
|
2177
|
+
elif "vocab" in class_sig.parameters:
|
|
2178
|
+
return cls.from_pretrained(
|
|
2179
|
+
pretrained_model_name_or_path,
|
|
2180
|
+
*init_inputs,
|
|
2181
|
+
vocab=vocab,
|
|
2182
|
+
**init_kwargs,
|
|
2183
|
+
)
|
|
2184
|
+
except Exception:
|
|
2185
|
+
pass
|
|
2186
|
+
if added_tokens_decoder != {} and max(list(added_tokens_decoder.keys())[-1], 0) > tokenizer.vocab_size:
|
|
2187
|
+
logger.info(
|
|
2188
|
+
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are"
|
|
2189
|
+
" fine-tuned or trained."
|
|
2190
|
+
)
|
|
2191
|
+
|
|
2192
|
+
return tokenizer
|
|
2027
2193
|
|
|
2028
2194
|
@classmethod
|
|
2029
|
-
def convert_added_tokens(cls, obj: AddedToken
|
|
2195
|
+
def convert_added_tokens(cls, obj: Union[AddedToken, Any], save=False, add_type_field=True):
|
|
2030
2196
|
if isinstance(obj, dict) and "__type" in obj and obj["__type"] == "AddedToken":
|
|
2031
2197
|
obj.pop("__type")
|
|
2032
2198
|
return AddedToken(**obj)
|
|
@@ -2046,9 +2212,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2046
2212
|
|
|
2047
2213
|
def save_pretrained(
|
|
2048
2214
|
self,
|
|
2049
|
-
save_directory: str
|
|
2050
|
-
legacy_format: bool
|
|
2051
|
-
filename_prefix: str
|
|
2215
|
+
save_directory: Union[str, os.PathLike],
|
|
2216
|
+
legacy_format: Optional[bool] = None,
|
|
2217
|
+
filename_prefix: Optional[str] = None,
|
|
2052
2218
|
push_to_hub: bool = False,
|
|
2053
2219
|
**kwargs,
|
|
2054
2220
|
) -> tuple[str, ...]:
|
|
@@ -2105,13 +2271,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2105
2271
|
)
|
|
2106
2272
|
|
|
2107
2273
|
tokenizer_config = copy.deepcopy(self.init_kwargs)
|
|
2108
|
-
tokenizer_config.pop("add_bos_token", None)
|
|
2109
|
-
tokenizer_config.pop("add_eos_token", None)
|
|
2110
2274
|
|
|
2111
2275
|
# Let's save the init kwargs
|
|
2112
2276
|
target_keys = set(self.init_kwargs.keys())
|
|
2113
|
-
target_keys.discard("add_bos_token")
|
|
2114
|
-
target_keys.discard("add_eos_token")
|
|
2115
2277
|
# Let's save the special tokens map (only the strings)
|
|
2116
2278
|
target_keys.update(["model_max_length"])
|
|
2117
2279
|
|
|
@@ -2146,10 +2308,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2146
2308
|
# Add tokenizer class to the tokenizer config to be able to reload it with from_pretrained
|
|
2147
2309
|
tokenizer_class = self.__class__.__name__
|
|
2148
2310
|
|
|
2149
|
-
# tokenizers backend don't need to save added_tokens_decoder
|
|
2311
|
+
# tokenizers backend don't need to save added_tokens_decoder
|
|
2150
2312
|
if any(base.__name__ == "TokenizersBackend" for base in self.__class__.__mro__):
|
|
2151
2313
|
tokenizer_config.pop("added_tokens_decoder", None)
|
|
2152
|
-
tokenizer_config.pop("additional_special_tokens", None)
|
|
2153
2314
|
|
|
2154
2315
|
# Remove the Fast at the end if we can save the slow tokenizer
|
|
2155
2316
|
if tokenizer_class.endswith("Fast") and getattr(self, "can_save_slow_tokenizer", False):
|
|
@@ -2204,10 +2365,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2204
2365
|
|
|
2205
2366
|
def _save_pretrained(
|
|
2206
2367
|
self,
|
|
2207
|
-
save_directory: str
|
|
2368
|
+
save_directory: Union[str, os.PathLike],
|
|
2208
2369
|
file_names: tuple[str, ...],
|
|
2209
|
-
legacy_format: bool
|
|
2210
|
-
filename_prefix: str
|
|
2370
|
+
legacy_format: Optional[bool] = None,
|
|
2371
|
+
filename_prefix: Optional[str] = None,
|
|
2211
2372
|
) -> tuple[str, ...]:
|
|
2212
2373
|
"""
|
|
2213
2374
|
Save a tokenizer using the slow-tokenizer/legacy format: vocabulary + added tokens.
|
|
@@ -2237,7 +2398,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2237
2398
|
|
|
2238
2399
|
return file_names + vocab_files + (added_tokens_file,)
|
|
2239
2400
|
|
|
2240
|
-
def save_vocabulary(self, save_directory: str, filename_prefix: str
|
|
2401
|
+
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str, ...]:
|
|
2241
2402
|
"""
|
|
2242
2403
|
Save only the vocabulary of the tokenizer (vocabulary + added tokens).
|
|
2243
2404
|
|
|
@@ -2255,7 +2416,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2255
2416
|
"""
|
|
2256
2417
|
raise NotImplementedError
|
|
2257
2418
|
|
|
2258
|
-
def tokenize(self, text: str, pair: str
|
|
2419
|
+
def tokenize(self, text: str, pair: Optional[str] = None, add_special_tokens: bool = False, **kwargs) -> list[str]:
|
|
2259
2420
|
"""
|
|
2260
2421
|
Converts a string into a sequence of tokens, replacing unknown tokens with the `unk_token`.
|
|
2261
2422
|
|
|
@@ -2287,15 +2448,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2287
2448
|
)
|
|
2288
2449
|
def encode(
|
|
2289
2450
|
self,
|
|
2290
|
-
text: TextInput
|
|
2291
|
-
text_pair: TextInput
|
|
2451
|
+
text: Union[TextInput, PreTokenizedInput, EncodedInput],
|
|
2452
|
+
text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
|
|
2292
2453
|
add_special_tokens: bool = True,
|
|
2293
|
-
padding: bool
|
|
2294
|
-
truncation: bool
|
|
2295
|
-
max_length: int
|
|
2454
|
+
padding: Union[bool, str, PaddingStrategy] = False,
|
|
2455
|
+
truncation: Union[bool, str, TruncationStrategy, None] = None,
|
|
2456
|
+
max_length: Optional[int] = None,
|
|
2296
2457
|
stride: int = 0,
|
|
2297
|
-
padding_side: str
|
|
2298
|
-
return_tensors: str
|
|
2458
|
+
padding_side: Optional[str] = None,
|
|
2459
|
+
return_tensors: Optional[Union[str, TensorType]] = None,
|
|
2299
2460
|
**kwargs,
|
|
2300
2461
|
) -> list[int]:
|
|
2301
2462
|
"""
|
|
@@ -2313,15 +2474,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2313
2474
|
the `tokenize` method) or a list of integers (tokenized string ids using the `convert_tokens_to_ids`
|
|
2314
2475
|
method).
|
|
2315
2476
|
"""
|
|
2316
|
-
padding_strategy, truncation_strategy, max_length,
|
|
2477
|
+
padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
|
|
2317
2478
|
padding=padding,
|
|
2318
2479
|
truncation=truncation,
|
|
2319
2480
|
max_length=max_length,
|
|
2481
|
+
pad_to_multiple_of=kwargs.get("pad_to_multiple_of"),
|
|
2482
|
+
verbose=kwargs.get("verbose", True),
|
|
2320
2483
|
**kwargs,
|
|
2321
2484
|
)
|
|
2322
2485
|
|
|
2323
|
-
kwargs.update(kwargs_updated)
|
|
2324
|
-
|
|
2325
2486
|
encoded_inputs = self._encode_plus(
|
|
2326
2487
|
text,
|
|
2327
2488
|
text_pair=text_pair,
|
|
@@ -2464,27 +2625,29 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2464
2625
|
@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
|
|
2465
2626
|
def __call__(
|
|
2466
2627
|
self,
|
|
2467
|
-
text: TextInput
|
|
2468
|
-
text_pair: TextInput
|
|
2469
|
-
text_target: TextInput
|
|
2470
|
-
text_pair_target:
|
|
2628
|
+
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput], None] = None,
|
|
2629
|
+
text_pair: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
|
|
2630
|
+
text_target: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput], None] = None,
|
|
2631
|
+
text_pair_target: Optional[
|
|
2632
|
+
Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]
|
|
2633
|
+
] = None,
|
|
2471
2634
|
add_special_tokens: bool = True,
|
|
2472
|
-
padding: bool
|
|
2473
|
-
truncation: bool
|
|
2474
|
-
max_length: int
|
|
2635
|
+
padding: Union[bool, str, PaddingStrategy] = False,
|
|
2636
|
+
truncation: Union[bool, str, TruncationStrategy, None] = None,
|
|
2637
|
+
max_length: Optional[int] = None,
|
|
2475
2638
|
stride: int = 0,
|
|
2476
2639
|
is_split_into_words: bool = False,
|
|
2477
|
-
pad_to_multiple_of: int
|
|
2478
|
-
padding_side: str
|
|
2479
|
-
return_tensors: str
|
|
2480
|
-
return_token_type_ids: bool
|
|
2481
|
-
return_attention_mask: bool
|
|
2640
|
+
pad_to_multiple_of: Optional[int] = None,
|
|
2641
|
+
padding_side: Optional[str] = None,
|
|
2642
|
+
return_tensors: Optional[Union[str, TensorType]] = None,
|
|
2643
|
+
return_token_type_ids: Optional[bool] = None,
|
|
2644
|
+
return_attention_mask: Optional[bool] = None,
|
|
2482
2645
|
return_overflowing_tokens: bool = False,
|
|
2483
2646
|
return_special_tokens_mask: bool = False,
|
|
2484
2647
|
return_offsets_mapping: bool = False,
|
|
2485
2648
|
return_length: bool = False,
|
|
2486
2649
|
verbose: bool = True,
|
|
2487
|
-
tokenizer_kwargs: dict[str, Any]
|
|
2650
|
+
tokenizer_kwargs: Optional[dict[str, Any]] = None,
|
|
2488
2651
|
**kwargs,
|
|
2489
2652
|
) -> BatchEncoding:
|
|
2490
2653
|
"""
|
|
@@ -2589,19 +2752,19 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2589
2752
|
|
|
2590
2753
|
def _encode_plus(
|
|
2591
2754
|
self,
|
|
2592
|
-
text: TextInput
|
|
2593
|
-
text_pair: TextInput
|
|
2755
|
+
text: Union[TextInput, PreTokenizedInput, EncodedInput],
|
|
2756
|
+
text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
|
|
2594
2757
|
add_special_tokens: bool = True,
|
|
2595
2758
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
|
2596
2759
|
truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
|
|
2597
|
-
max_length: int
|
|
2760
|
+
max_length: Optional[int] = None,
|
|
2598
2761
|
stride: int = 0,
|
|
2599
2762
|
is_split_into_words: bool = False,
|
|
2600
|
-
pad_to_multiple_of: int
|
|
2601
|
-
padding_side: str
|
|
2602
|
-
return_tensors: str
|
|
2603
|
-
return_token_type_ids: bool
|
|
2604
|
-
return_attention_mask: bool
|
|
2763
|
+
pad_to_multiple_of: Optional[int] = None,
|
|
2764
|
+
padding_side: Optional[str] = None,
|
|
2765
|
+
return_tensors: Optional[Union[str, TensorType]] = None,
|
|
2766
|
+
return_token_type_ids: Optional[bool] = None,
|
|
2767
|
+
return_attention_mask: Optional[bool] = None,
|
|
2605
2768
|
return_overflowing_tokens: bool = False,
|
|
2606
2769
|
return_special_tokens_mask: bool = False,
|
|
2607
2770
|
return_offsets_mapping: bool = False,
|
|
@@ -2614,17 +2777,19 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2614
2777
|
|
|
2615
2778
|
def pad(
|
|
2616
2779
|
self,
|
|
2617
|
-
encoded_inputs:
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
|
|
2780
|
+
encoded_inputs: Union[
|
|
2781
|
+
BatchEncoding,
|
|
2782
|
+
list[BatchEncoding],
|
|
2783
|
+
dict[str, EncodedInput],
|
|
2784
|
+
dict[str, list[EncodedInput]],
|
|
2785
|
+
list[dict[str, EncodedInput]],
|
|
2786
|
+
],
|
|
2787
|
+
padding: Union[bool, str, PaddingStrategy] = True,
|
|
2788
|
+
max_length: Optional[int] = None,
|
|
2789
|
+
pad_to_multiple_of: Optional[int] = None,
|
|
2790
|
+
padding_side: Optional[str] = None,
|
|
2791
|
+
return_attention_mask: Optional[bool] = None,
|
|
2792
|
+
return_tensors: Optional[Union[str, TensorType]] = None,
|
|
2628
2793
|
verbose: bool = True,
|
|
2629
2794
|
) -> BatchEncoding:
|
|
2630
2795
|
"""
|
|
@@ -2785,12 +2950,12 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2785
2950
|
|
|
2786
2951
|
def _pad(
|
|
2787
2952
|
self,
|
|
2788
|
-
encoded_inputs: dict[str, EncodedInput]
|
|
2789
|
-
max_length: int
|
|
2953
|
+
encoded_inputs: Union[dict[str, EncodedInput], BatchEncoding],
|
|
2954
|
+
max_length: Optional[int] = None,
|
|
2790
2955
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
|
2791
|
-
pad_to_multiple_of: int
|
|
2792
|
-
padding_side: str
|
|
2793
|
-
return_attention_mask: bool
|
|
2956
|
+
pad_to_multiple_of: Optional[int] = None,
|
|
2957
|
+
padding_side: Optional[str] = None,
|
|
2958
|
+
return_attention_mask: Optional[bool] = None,
|
|
2794
2959
|
) -> dict:
|
|
2795
2960
|
"""
|
|
2796
2961
|
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
|
@@ -2880,10 +3045,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2880
3045
|
|
|
2881
3046
|
def decode(
|
|
2882
3047
|
self,
|
|
2883
|
-
token_ids: int
|
|
3048
|
+
token_ids: Union[int, list[int], list[list[int]], np.ndarray, "torch.Tensor"],
|
|
2884
3049
|
skip_special_tokens: bool = False,
|
|
2885
3050
|
**kwargs,
|
|
2886
|
-
) -> str
|
|
3051
|
+
) -> Union[str, list[str]]:
|
|
2887
3052
|
"""
|
|
2888
3053
|
Converts a sequence of ids into a string, or a list of sequences into a list of strings,
|
|
2889
3054
|
using the tokenizer and vocabulary with options to remove special tokens and clean up
|
|
@@ -2928,9 +3093,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2928
3093
|
|
|
2929
3094
|
def batch_decode(
|
|
2930
3095
|
self,
|
|
2931
|
-
sequences: list[int]
|
|
3096
|
+
sequences: Union[list[int], list[list[int]], np.ndarray, "torch.Tensor"],
|
|
2932
3097
|
skip_special_tokens: bool = False,
|
|
2933
|
-
clean_up_tokenization_spaces: bool
|
|
3098
|
+
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
2934
3099
|
**kwargs,
|
|
2935
3100
|
) -> list[str]:
|
|
2936
3101
|
"""
|
|
@@ -2967,14 +3132,14 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2967
3132
|
|
|
2968
3133
|
def _decode(
|
|
2969
3134
|
self,
|
|
2970
|
-
token_ids: int
|
|
3135
|
+
token_ids: Union[int, list[int]],
|
|
2971
3136
|
skip_special_tokens: bool = False,
|
|
2972
|
-
clean_up_tokenization_spaces: bool
|
|
3137
|
+
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
2973
3138
|
**kwargs,
|
|
2974
3139
|
) -> str:
|
|
2975
3140
|
raise NotImplementedError
|
|
2976
3141
|
|
|
2977
|
-
def _eventual_warn_about_too_long_sequence(self, ids: list[int], max_length: int
|
|
3142
|
+
def _eventual_warn_about_too_long_sequence(self, ids: list[int], max_length: Optional[int], verbose: bool):
|
|
2978
3143
|
"""
|
|
2979
3144
|
Depending on the input and internal state we might trigger a warning about a sequence that is too long for its
|
|
2980
3145
|
corresponding model
|
|
@@ -3016,22 +3181,22 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3016
3181
|
|
|
3017
3182
|
def apply_chat_template(
|
|
3018
3183
|
self,
|
|
3019
|
-
conversation: list[dict[str, str]]
|
|
3020
|
-
tools: list[dict
|
|
3021
|
-
documents: list[dict[str, str]]
|
|
3022
|
-
chat_template: str
|
|
3184
|
+
conversation: Union[list[dict[str, str]], list[list[dict[str, str]]]],
|
|
3185
|
+
tools: Optional[list[Union[dict, Callable]]] = None,
|
|
3186
|
+
documents: Optional[list[dict[str, str]]] = None,
|
|
3187
|
+
chat_template: Optional[str] = None,
|
|
3023
3188
|
add_generation_prompt: bool = False,
|
|
3024
3189
|
continue_final_message: bool = False,
|
|
3025
3190
|
tokenize: bool = True,
|
|
3026
|
-
padding: bool
|
|
3191
|
+
padding: Union[bool, str, PaddingStrategy] = False,
|
|
3027
3192
|
truncation: bool = False,
|
|
3028
|
-
max_length: int
|
|
3029
|
-
return_tensors: str
|
|
3030
|
-
return_dict: bool =
|
|
3193
|
+
max_length: Optional[int] = None,
|
|
3194
|
+
return_tensors: Optional[Union[str, TensorType]] = None,
|
|
3195
|
+
return_dict: bool = False,
|
|
3031
3196
|
return_assistant_tokens_mask: bool = False,
|
|
3032
|
-
tokenizer_kwargs: dict[str, Any]
|
|
3197
|
+
tokenizer_kwargs: Optional[dict[str, Any]] = None,
|
|
3033
3198
|
**kwargs,
|
|
3034
|
-
) -> str
|
|
3199
|
+
) -> Union[str, list[int], list[str], list[list[int]], BatchEncoding]:
|
|
3035
3200
|
"""
|
|
3036
3201
|
Converts a list of dictionaries with `"role"` and `"content"` keys to a list of token
|
|
3037
3202
|
ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
|
|
@@ -3085,7 +3250,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3085
3250
|
values are:
|
|
3086
3251
|
- `'pt'`: Return PyTorch `torch.Tensor` objects.
|
|
3087
3252
|
- `'np'`: Return NumPy `np.ndarray` objects.
|
|
3088
|
-
return_dict (`bool`, defaults to `
|
|
3253
|
+
return_dict (`bool`, defaults to `False`):
|
|
3089
3254
|
Whether to return a dictionary with named outputs. Has no effect if tokenize is `False`.
|
|
3090
3255
|
tokenizer_kwargs (`dict[str: Any]`, *optional*): Additional kwargs to pass to the tokenizer.
|
|
3091
3256
|
return_assistant_tokens_mask (`bool`, defaults to `False`):
|
|
@@ -3100,11 +3265,14 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3100
3265
|
set, will return a dict of tokenizer outputs instead.
|
|
3101
3266
|
"""
|
|
3102
3267
|
|
|
3103
|
-
if not tokenize:
|
|
3104
|
-
|
|
3268
|
+
if return_dict and not tokenize:
|
|
3269
|
+
raise ValueError(
|
|
3270
|
+
"`return_dict=True` is incompatible with `tokenize=False`, because there is no dict "
|
|
3271
|
+
"of tokenizer outputs to return."
|
|
3272
|
+
)
|
|
3105
3273
|
|
|
3106
|
-
if return_assistant_tokens_mask and not
|
|
3107
|
-
raise ValueError("`return_assistant_tokens_mask=True`
|
|
3274
|
+
if return_assistant_tokens_mask and not return_dict:
|
|
3275
|
+
raise ValueError("`return_assistant_tokens_mask=True` is incompatible with `return_dict=False`")
|
|
3108
3276
|
|
|
3109
3277
|
if tokenizer_kwargs is None:
|
|
3110
3278
|
tokenizer_kwargs = {}
|
|
@@ -3189,7 +3357,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3189
3357
|
def encode_message_with_chat_template(
|
|
3190
3358
|
self,
|
|
3191
3359
|
message: dict[str, str],
|
|
3192
|
-
conversation_history: list[dict[str, str]]
|
|
3360
|
+
conversation_history: Optional[list[dict[str, str]]] = None,
|
|
3193
3361
|
**kwargs,
|
|
3194
3362
|
) -> list[int]:
|
|
3195
3363
|
"""
|
|
@@ -3219,17 +3387,13 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3219
3387
|
)
|
|
3220
3388
|
|
|
3221
3389
|
if conversation_history is None or len(conversation_history) == 0:
|
|
3222
|
-
return self.apply_chat_template(
|
|
3223
|
-
[message], add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
|
|
3224
|
-
)
|
|
3390
|
+
return self.apply_chat_template([message], add_generation_prompt=False, tokenize=True, **kwargs)
|
|
3225
3391
|
|
|
3226
3392
|
conversation = conversation_history + [message]
|
|
3227
|
-
tokens = self.apply_chat_template(
|
|
3228
|
-
conversation, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
|
|
3229
|
-
)
|
|
3393
|
+
tokens = self.apply_chat_template(conversation, add_generation_prompt=False, tokenize=True, **kwargs)
|
|
3230
3394
|
|
|
3231
3395
|
prefix_tokens = self.apply_chat_template(
|
|
3232
|
-
conversation_history, add_generation_prompt=False, tokenize=True,
|
|
3396
|
+
conversation_history, add_generation_prompt=False, tokenize=True, **kwargs
|
|
3233
3397
|
)
|
|
3234
3398
|
# It's possible that the prefix tokens are not a prefix of the full list of tokens.
|
|
3235
3399
|
# For example, if the prefix is `<s>User: Hi` and the full conversation is `<s>User: Hi</s><s>Assistant: Hello`.
|
|
@@ -3246,7 +3410,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3246
3410
|
return tokens[i:]
|
|
3247
3411
|
return tokens[min_len:]
|
|
3248
3412
|
|
|
3249
|
-
def get_chat_template(self, chat_template: str
|
|
3413
|
+
def get_chat_template(self, chat_template: Optional[str] = None, tools: Optional[list[dict]] = None) -> str:
|
|
3250
3414
|
"""
|
|
3251
3415
|
Retrieve the chat template string used for tokenizing chat messages. This template is used
|
|
3252
3416
|
internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
|
|
@@ -3302,9 +3466,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3302
3466
|
|
|
3303
3467
|
def save_chat_templates(
|
|
3304
3468
|
self,
|
|
3305
|
-
save_directory: str
|
|
3469
|
+
save_directory: Union[str, os.PathLike],
|
|
3306
3470
|
tokenizer_config: dict,
|
|
3307
|
-
filename_prefix: str
|
|
3471
|
+
filename_prefix: Optional[str],
|
|
3308
3472
|
save_jinja_files: bool,
|
|
3309
3473
|
):
|
|
3310
3474
|
"""
|
|
@@ -3355,45 +3519,6 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
3355
3519
|
tokenizer_config["chat_template"] = self.chat_template
|
|
3356
3520
|
return tokenizer_config, saved_raw_chat_template_files
|
|
3357
3521
|
|
|
3358
|
-
def parse_response(
|
|
3359
|
-
self,
|
|
3360
|
-
response: str | list[str | int | list[int]] | np.ndarray | torch.Tensor,
|
|
3361
|
-
schema: list | dict | None = None,
|
|
3362
|
-
):
|
|
3363
|
-
"""
|
|
3364
|
-
Converts an output string created by generating text from a model into a parsed message dictionary.
|
|
3365
|
-
This method is intended for use with chat models, and will read the tokenizer's `response_schema` attribute to
|
|
3366
|
-
control parsing, although this can be overridden by passing a `response_schema` argument directly.
|
|
3367
|
-
|
|
3368
|
-
This method is currently **highly experimental** and the schema specification is likely to change in future!
|
|
3369
|
-
We recommend not building production code on top of it just yet.
|
|
3370
|
-
|
|
3371
|
-
Args:
|
|
3372
|
-
response (`str`):
|
|
3373
|
-
The output string generated by the model. This can be either a decoded string or list of strings,
|
|
3374
|
-
or token IDs as a list/array.
|
|
3375
|
-
schema (`Union[list, dict]`, *optional*):
|
|
3376
|
-
A response schema that indicates the expected output format and how parsing should be performed.
|
|
3377
|
-
If not provided, the tokenizer's `response_schema` attribute will be used.
|
|
3378
|
-
"""
|
|
3379
|
-
batched = (
|
|
3380
|
-
(isinstance(response, list) and not isinstance(response[0], int))
|
|
3381
|
-
or getattr(response, "ndim", 0) > 1 # For torch/numpy tensors
|
|
3382
|
-
)
|
|
3383
|
-
|
|
3384
|
-
if schema is None:
|
|
3385
|
-
if getattr(self, "response_schema", None) is None:
|
|
3386
|
-
raise AttributeError("This tokenizer does not have a `response_schema` for parsing chat responses!")
|
|
3387
|
-
schema = self.response_schema
|
|
3388
|
-
if batched:
|
|
3389
|
-
if not (isinstance(response, list) and isinstance(response[0], str)):
|
|
3390
|
-
response = self.batch_decode(response)
|
|
3391
|
-
return [recursive_parse(single_response, schema) for single_response in response]
|
|
3392
|
-
else:
|
|
3393
|
-
if not isinstance(response, str):
|
|
3394
|
-
response = self.decode(response)
|
|
3395
|
-
return recursive_parse(response, schema)
|
|
3396
|
-
|
|
3397
3522
|
|
|
3398
3523
|
def get_fast_tokenizer_file(tokenization_files: list[str]) -> str:
|
|
3399
3524
|
"""
|
|
@@ -3603,20 +3728,15 @@ def _get_prepend_scheme(add_prefix_space: bool, original_tokenizer) -> str:
|
|
|
3603
3728
|
return prepend_scheme
|
|
3604
3729
|
|
|
3605
3730
|
|
|
3606
|
-
def generate_merges(vocab, vocab_scores: dict[str, float]
|
|
3607
|
-
skip_tokens = set(skip_tokens) if skip_tokens is not None else set()
|
|
3731
|
+
def generate_merges(vocab, vocab_scores: Optional[dict[str, float]] = None):
|
|
3608
3732
|
reverse = vocab_scores is not None
|
|
3609
3733
|
vocab_scores = dict(vocab_scores) if reverse else vocab
|
|
3610
3734
|
|
|
3611
3735
|
merges = []
|
|
3612
3736
|
for merge, piece_score in vocab_scores.items():
|
|
3613
|
-
if merge in skip_tokens:
|
|
3614
|
-
continue
|
|
3615
3737
|
local = []
|
|
3616
3738
|
for index in range(1, len(merge)):
|
|
3617
3739
|
piece_l, piece_r = merge[:index], merge[index:]
|
|
3618
|
-
if piece_l in skip_tokens or piece_r in skip_tokens:
|
|
3619
|
-
continue
|
|
3620
3740
|
if piece_l in vocab and piece_r in vocab:
|
|
3621
3741
|
local.append((piece_l, piece_r, piece_score))
|
|
3622
3742
|
local = sorted(local, key=lambda x: (vocab[x[0]], vocab[x[1]]))
|