transformers 5.0.0rc2__py3-none-any.whl → 5.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +9 -28
- transformers/audio_utils.py +32 -32
- transformers/cache_utils.py +15 -124
- transformers/cli/chat.py +3 -3
- transformers/cli/serve.py +2 -2
- transformers/cli/transformers.py +2 -1
- transformers/configuration_utils.py +31 -33
- transformers/conversion_mapping.py +5 -1
- transformers/convert_slow_tokenizer.py +3 -8
- transformers/core_model_loading.py +14 -15
- transformers/data/processors/glue.py +0 -1
- transformers/data/processors/utils.py +0 -1
- transformers/data/processors/xnli.py +0 -1
- transformers/dependency_versions_table.py +4 -4
- transformers/distributed/configuration_utils.py +1 -2
- transformers/dynamic_module_utils.py +23 -23
- transformers/feature_extraction_sequence_utils.py +19 -23
- transformers/feature_extraction_utils.py +14 -14
- transformers/generation/candidate_generator.py +1 -2
- transformers/generation/configuration_utils.py +54 -39
- transformers/generation/continuous_batching/__init__.py +0 -1
- transformers/generation/continuous_batching/cache.py +34 -6
- transformers/generation/continuous_batching/cache_manager.py +25 -12
- transformers/generation/continuous_batching/continuous_api.py +54 -23
- transformers/generation/continuous_batching/requests.py +25 -4
- transformers/generation/continuous_batching/scheduler.py +117 -49
- transformers/generation/logits_process.py +0 -128
- transformers/generation/streamers.py +0 -1
- transformers/generation/utils.py +16 -26
- transformers/generation/watermarking.py +2 -3
- transformers/hf_argparser.py +9 -13
- transformers/hyperparameter_search.py +1 -2
- transformers/image_processing_base.py +9 -9
- transformers/image_processing_utils.py +11 -12
- transformers/image_processing_utils_fast.py +53 -53
- transformers/image_transforms.py +29 -29
- transformers/image_utils.py +30 -32
- transformers/integrations/awq.py +1 -3
- transformers/integrations/deepspeed.py +1 -1
- transformers/integrations/eetq.py +0 -1
- transformers/integrations/fbgemm_fp8.py +1 -2
- transformers/integrations/finegrained_fp8.py +8 -7
- transformers/integrations/flash_attention.py +1 -1
- transformers/integrations/flex_attention.py +1 -1
- transformers/integrations/fp_quant.py +4 -6
- transformers/integrations/ggml.py +0 -1
- transformers/integrations/integration_utils.py +2 -3
- transformers/integrations/mxfp4.py +5 -6
- transformers/integrations/quark.py +2 -4
- transformers/integrations/torchao.py +4 -6
- transformers/loss/loss_lw_detr.py +356 -0
- transformers/loss/loss_utils.py +2 -0
- transformers/masking_utils.py +47 -51
- transformers/model_debugging_utils.py +4 -5
- transformers/modelcard.py +14 -192
- transformers/modeling_attn_mask_utils.py +19 -19
- transformers/modeling_flash_attention_utils.py +27 -27
- transformers/modeling_gguf_pytorch_utils.py +5 -5
- transformers/modeling_layers.py +21 -22
- transformers/modeling_outputs.py +242 -253
- transformers/modeling_rope_utils.py +32 -32
- transformers/modeling_utils.py +67 -90
- transformers/models/__init__.py +4 -0
- transformers/models/afmoe/configuration_afmoe.py +26 -29
- transformers/models/afmoe/modeling_afmoe.py +30 -33
- transformers/models/afmoe/modular_afmoe.py +16 -18
- transformers/models/aimv2/configuration_aimv2.py +2 -5
- transformers/models/aimv2/modeling_aimv2.py +20 -21
- transformers/models/aimv2/modular_aimv2.py +7 -9
- transformers/models/albert/configuration_albert.py +0 -1
- transformers/models/albert/modeling_albert.py +67 -69
- transformers/models/albert/tokenization_albert.py +1 -4
- transformers/models/align/configuration_align.py +0 -1
- transformers/models/align/modeling_align.py +61 -62
- transformers/models/align/processing_align.py +2 -30
- transformers/models/altclip/configuration_altclip.py +0 -1
- transformers/models/altclip/modeling_altclip.py +76 -77
- transformers/models/altclip/processing_altclip.py +2 -15
- transformers/models/apertus/__init__.py +0 -1
- transformers/models/apertus/configuration_apertus.py +18 -21
- transformers/models/apertus/modeling_apertus.py +31 -34
- transformers/models/apertus/modular_apertus.py +28 -30
- transformers/models/arcee/configuration_arcee.py +20 -23
- transformers/models/arcee/modeling_arcee.py +31 -34
- transformers/models/arcee/modular_arcee.py +20 -23
- transformers/models/aria/configuration_aria.py +20 -23
- transformers/models/aria/image_processing_aria.py +25 -27
- transformers/models/aria/modeling_aria.py +63 -66
- transformers/models/aria/modular_aria.py +78 -85
- transformers/models/aria/processing_aria.py +28 -35
- transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +0 -1
- transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py +3 -6
- transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +6 -8
- transformers/models/audioflamingo3/__init__.py +0 -1
- transformers/models/audioflamingo3/configuration_audioflamingo3.py +0 -1
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +22 -23
- transformers/models/audioflamingo3/modular_audioflamingo3.py +12 -17
- transformers/models/audioflamingo3/processing_audioflamingo3.py +6 -8
- transformers/models/auto/auto_factory.py +4 -5
- transformers/models/auto/configuration_auto.py +26 -5
- transformers/models/auto/feature_extraction_auto.py +5 -7
- transformers/models/auto/image_processing_auto.py +13 -26
- transformers/models/auto/modeling_auto.py +18 -199
- transformers/models/auto/processing_auto.py +2 -1
- transformers/models/auto/tokenization_auto.py +21 -22
- transformers/models/auto/video_processing_auto.py +7 -8
- transformers/models/autoformer/configuration_autoformer.py +4 -7
- transformers/models/autoformer/modeling_autoformer.py +98 -100
- transformers/models/aya_vision/configuration_aya_vision.py +0 -1
- transformers/models/aya_vision/modeling_aya_vision.py +35 -37
- transformers/models/aya_vision/modular_aya_vision.py +26 -29
- transformers/models/aya_vision/processing_aya_vision.py +25 -53
- transformers/models/bamba/configuration_bamba.py +29 -32
- transformers/models/bamba/modeling_bamba.py +60 -64
- transformers/models/bamba/modular_bamba.py +51 -55
- transformers/models/bark/configuration_bark.py +4 -7
- transformers/models/bark/generation_configuration_bark.py +3 -5
- transformers/models/bark/modeling_bark.py +40 -55
- transformers/models/bark/processing_bark.py +19 -41
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +115 -117
- transformers/models/barthez/tokenization_barthez.py +1 -4
- transformers/models/bartpho/tokenization_bartpho.py +6 -7
- transformers/models/beit/configuration_beit.py +0 -11
- transformers/models/beit/image_processing_beit.py +53 -56
- transformers/models/beit/image_processing_beit_fast.py +8 -9
- transformers/models/beit/modeling_beit.py +51 -53
- transformers/models/bert/configuration_bert.py +0 -1
- transformers/models/bert/modeling_bert.py +111 -122
- transformers/models/bert/tokenization_bert.py +2 -4
- transformers/models/bert/tokenization_bert_legacy.py +3 -5
- transformers/models/bert_generation/configuration_bert_generation.py +0 -1
- transformers/models/bert_generation/modeling_bert_generation.py +47 -49
- transformers/models/bert_generation/tokenization_bert_generation.py +2 -3
- transformers/models/bert_japanese/tokenization_bert_japanese.py +5 -6
- transformers/models/bertweet/tokenization_bertweet.py +1 -3
- transformers/models/big_bird/configuration_big_bird.py +0 -1
- transformers/models/big_bird/modeling_big_bird.py +107 -109
- transformers/models/big_bird/tokenization_big_bird.py +1 -4
- transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +0 -1
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +109 -111
- transformers/models/biogpt/configuration_biogpt.py +0 -1
- transformers/models/biogpt/modeling_biogpt.py +69 -71
- transformers/models/biogpt/modular_biogpt.py +59 -61
- transformers/models/biogpt/tokenization_biogpt.py +3 -5
- transformers/models/bit/configuration_bit.py +0 -1
- transformers/models/bit/image_processing_bit.py +21 -24
- transformers/models/bit/image_processing_bit_fast.py +0 -1
- transformers/models/bit/modeling_bit.py +9 -11
- transformers/models/bitnet/configuration_bitnet.py +18 -21
- transformers/models/bitnet/modeling_bitnet.py +31 -34
- transformers/models/bitnet/modular_bitnet.py +4 -6
- transformers/models/blenderbot/configuration_blenderbot.py +0 -1
- transformers/models/blenderbot/modeling_blenderbot.py +64 -95
- transformers/models/blenderbot/tokenization_blenderbot.py +0 -1
- transformers/models/blenderbot_small/configuration_blenderbot_small.py +0 -1
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +66 -68
- transformers/models/blenderbot_small/tokenization_blenderbot_small.py +1 -3
- transformers/models/blip/configuration_blip.py +0 -1
- transformers/models/blip/image_processing_blip.py +17 -20
- transformers/models/blip/image_processing_blip_fast.py +0 -1
- transformers/models/blip/modeling_blip.py +60 -71
- transformers/models/blip/modeling_blip_text.py +63 -65
- transformers/models/blip/processing_blip.py +5 -36
- transformers/models/blip_2/configuration_blip_2.py +0 -1
- transformers/models/blip_2/modeling_blip_2.py +70 -71
- transformers/models/blip_2/processing_blip_2.py +8 -38
- transformers/models/bloom/configuration_bloom.py +0 -1
- transformers/models/bloom/modeling_bloom.py +58 -59
- transformers/models/blt/configuration_blt.py +71 -74
- transformers/models/blt/modeling_blt.py +73 -76
- transformers/models/blt/modular_blt.py +57 -59
- transformers/models/bridgetower/configuration_bridgetower.py +0 -1
- transformers/models/bridgetower/image_processing_bridgetower.py +34 -35
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +7 -8
- transformers/models/bridgetower/modeling_bridgetower.py +107 -109
- transformers/models/bridgetower/processing_bridgetower.py +2 -16
- transformers/models/bros/configuration_bros.py +0 -1
- transformers/models/bros/modeling_bros.py +78 -80
- transformers/models/bros/processing_bros.py +2 -12
- transformers/models/byt5/tokenization_byt5.py +4 -6
- transformers/models/camembert/configuration_camembert.py +0 -1
- transformers/models/camembert/modeling_camembert.py +91 -93
- transformers/models/camembert/modular_camembert.py +51 -54
- transformers/models/camembert/tokenization_camembert.py +1 -4
- transformers/models/canine/configuration_canine.py +0 -1
- transformers/models/canine/modeling_canine.py +73 -75
- transformers/models/canine/tokenization_canine.py +0 -1
- transformers/models/chameleon/configuration_chameleon.py +24 -27
- transformers/models/chameleon/image_processing_chameleon.py +21 -24
- transformers/models/chameleon/image_processing_chameleon_fast.py +0 -1
- transformers/models/chameleon/modeling_chameleon.py +53 -56
- transformers/models/chameleon/processing_chameleon.py +16 -41
- transformers/models/chinese_clip/configuration_chinese_clip.py +0 -1
- transformers/models/chinese_clip/image_processing_chinese_clip.py +21 -24
- transformers/models/chinese_clip/image_processing_chinese_clip_fast.py +0 -1
- transformers/models/chinese_clip/modeling_chinese_clip.py +65 -66
- transformers/models/chinese_clip/processing_chinese_clip.py +2 -15
- transformers/models/clap/configuration_clap.py +0 -1
- transformers/models/clap/feature_extraction_clap.py +9 -10
- transformers/models/clap/modeling_clap.py +88 -89
- transformers/models/clap/processing_clap.py +2 -15
- transformers/models/clip/configuration_clip.py +0 -1
- transformers/models/clip/image_processing_clip.py +21 -24
- transformers/models/clip/image_processing_clip_fast.py +0 -1
- transformers/models/clip/modeling_clip.py +45 -46
- transformers/models/clip/processing_clip.py +2 -14
- transformers/models/clip/tokenization_clip.py +2 -5
- transformers/models/clipseg/configuration_clipseg.py +0 -1
- transformers/models/clipseg/modeling_clipseg.py +86 -87
- transformers/models/clipseg/processing_clipseg.py +8 -39
- transformers/models/clvp/configuration_clvp.py +1 -3
- transformers/models/clvp/feature_extraction_clvp.py +7 -10
- transformers/models/clvp/modeling_clvp.py +119 -115
- transformers/models/clvp/number_normalizer.py +1 -2
- transformers/models/clvp/processing_clvp.py +3 -20
- transformers/models/clvp/tokenization_clvp.py +0 -1
- transformers/models/code_llama/tokenization_code_llama.py +3 -6
- transformers/models/codegen/configuration_codegen.py +0 -1
- transformers/models/codegen/modeling_codegen.py +48 -48
- transformers/models/codegen/tokenization_codegen.py +5 -6
- transformers/models/cohere/configuration_cohere.py +20 -23
- transformers/models/cohere/modeling_cohere.py +35 -38
- transformers/models/cohere/modular_cohere.py +24 -28
- transformers/models/cohere/tokenization_cohere.py +5 -6
- transformers/models/cohere2/configuration_cohere2.py +21 -24
- transformers/models/cohere2/modeling_cohere2.py +34 -37
- transformers/models/cohere2/modular_cohere2.py +39 -41
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +6 -7
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +28 -30
- transformers/models/cohere2_vision/modular_cohere2_vision.py +21 -23
- transformers/models/cohere2_vision/processing_cohere2_vision.py +6 -36
- transformers/models/colpali/configuration_colpali.py +0 -1
- transformers/models/colpali/modeling_colpali.py +14 -16
- transformers/models/colpali/modular_colpali.py +11 -51
- transformers/models/colpali/processing_colpali.py +14 -52
- transformers/models/colqwen2/modeling_colqwen2.py +20 -22
- transformers/models/colqwen2/modular_colqwen2.py +29 -68
- transformers/models/colqwen2/processing_colqwen2.py +16 -52
- transformers/models/conditional_detr/configuration_conditional_detr.py +0 -1
- transformers/models/conditional_detr/image_processing_conditional_detr.py +64 -66
- transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +22 -22
- transformers/models/conditional_detr/modeling_conditional_detr.py +78 -80
- transformers/models/conditional_detr/modular_conditional_detr.py +1 -3
- transformers/models/convbert/configuration_convbert.py +0 -1
- transformers/models/convbert/modeling_convbert.py +85 -87
- transformers/models/convbert/tokenization_convbert.py +0 -1
- transformers/models/convnext/configuration_convnext.py +0 -1
- transformers/models/convnext/image_processing_convnext.py +18 -21
- transformers/models/convnext/image_processing_convnext_fast.py +5 -6
- transformers/models/convnext/modeling_convnext.py +5 -8
- transformers/models/convnextv2/configuration_convnextv2.py +0 -1
- transformers/models/convnextv2/modeling_convnextv2.py +5 -8
- transformers/models/cpm/tokenization_cpm.py +6 -7
- transformers/models/cpm/tokenization_cpm_fast.py +3 -5
- transformers/models/cpmant/configuration_cpmant.py +0 -1
- transformers/models/cpmant/modeling_cpmant.py +38 -40
- transformers/models/cpmant/tokenization_cpmant.py +1 -3
- transformers/models/csm/configuration_csm.py +49 -51
- transformers/models/csm/generation_csm.py +13 -14
- transformers/models/csm/modeling_csm.py +78 -81
- transformers/models/csm/modular_csm.py +56 -58
- transformers/models/csm/processing_csm.py +25 -68
- transformers/models/ctrl/configuration_ctrl.py +0 -1
- transformers/models/ctrl/modeling_ctrl.py +38 -41
- transformers/models/ctrl/tokenization_ctrl.py +0 -1
- transformers/models/cvt/configuration_cvt.py +0 -1
- transformers/models/cvt/modeling_cvt.py +13 -15
- transformers/models/cwm/__init__.py +0 -1
- transformers/models/cwm/configuration_cwm.py +3 -5
- transformers/models/cwm/modeling_cwm.py +32 -34
- transformers/models/cwm/modular_cwm.py +10 -12
- transformers/models/d_fine/configuration_d_fine.py +0 -1
- transformers/models/d_fine/modeling_d_fine.py +81 -82
- transformers/models/d_fine/modular_d_fine.py +8 -9
- transformers/models/dab_detr/configuration_dab_detr.py +0 -1
- transformers/models/dab_detr/modeling_dab_detr.py +68 -70
- transformers/models/dac/configuration_dac.py +0 -1
- transformers/models/dac/feature_extraction_dac.py +6 -9
- transformers/models/dac/modeling_dac.py +21 -23
- transformers/models/data2vec/configuration_data2vec_audio.py +0 -1
- transformers/models/data2vec/configuration_data2vec_text.py +0 -1
- transformers/models/data2vec/configuration_data2vec_vision.py +0 -1
- transformers/models/data2vec/modeling_data2vec_audio.py +52 -56
- transformers/models/data2vec/modeling_data2vec_text.py +91 -93
- transformers/models/data2vec/modeling_data2vec_vision.py +41 -42
- transformers/models/data2vec/modular_data2vec_audio.py +6 -1
- transformers/models/data2vec/modular_data2vec_text.py +51 -54
- transformers/models/dbrx/configuration_dbrx.py +18 -19
- transformers/models/dbrx/modeling_dbrx.py +39 -42
- transformers/models/dbrx/modular_dbrx.py +31 -33
- transformers/models/deberta/configuration_deberta.py +0 -1
- transformers/models/deberta/modeling_deberta.py +57 -60
- transformers/models/deberta/tokenization_deberta.py +2 -5
- transformers/models/deberta_v2/configuration_deberta_v2.py +0 -1
- transformers/models/deberta_v2/modeling_deberta_v2.py +63 -65
- transformers/models/deberta_v2/tokenization_deberta_v2.py +1 -4
- transformers/models/decision_transformer/configuration_decision_transformer.py +0 -1
- transformers/models/decision_transformer/modeling_decision_transformer.py +48 -50
- transformers/models/deepseek_v2/configuration_deepseek_v2.py +34 -37
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +32 -33
- transformers/models/deepseek_v2/modular_deepseek_v2.py +40 -42
- transformers/models/deepseek_v3/configuration_deepseek_v3.py +35 -38
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +31 -33
- transformers/models/deepseek_v3/modular_deepseek_v3.py +4 -5
- transformers/models/deepseek_vl/configuration_deepseek_vl.py +2 -3
- transformers/models/deepseek_vl/image_processing_deepseek_vl.py +25 -26
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +7 -6
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +31 -31
- transformers/models/deepseek_vl/modular_deepseek_vl.py +11 -43
- transformers/models/deepseek_vl/processing_deepseek_vl.py +10 -41
- transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +3 -5
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +35 -35
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +16 -16
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +33 -33
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +71 -90
- transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py +12 -44
- transformers/models/deformable_detr/configuration_deformable_detr.py +0 -1
- transformers/models/deformable_detr/image_processing_deformable_detr.py +59 -61
- transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +17 -17
- transformers/models/deformable_detr/modeling_deformable_detr.py +66 -67
- transformers/models/deformable_detr/modular_deformable_detr.py +1 -3
- transformers/models/deit/configuration_deit.py +0 -1
- transformers/models/deit/image_processing_deit.py +18 -21
- transformers/models/deit/image_processing_deit_fast.py +0 -1
- transformers/models/deit/modeling_deit.py +16 -18
- transformers/models/depth_anything/configuration_depth_anything.py +0 -1
- transformers/models/depth_anything/modeling_depth_anything.py +5 -8
- transformers/models/depth_pro/configuration_depth_pro.py +0 -1
- transformers/models/depth_pro/image_processing_depth_pro.py +22 -23
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +6 -7
- transformers/models/depth_pro/modeling_depth_pro.py +21 -23
- transformers/models/detr/configuration_detr.py +0 -1
- transformers/models/detr/image_processing_detr.py +64 -66
- transformers/models/detr/image_processing_detr_fast.py +22 -23
- transformers/models/detr/modeling_detr.py +70 -72
- transformers/models/dia/configuration_dia.py +5 -8
- transformers/models/dia/feature_extraction_dia.py +6 -9
- transformers/models/dia/generation_dia.py +40 -36
- transformers/models/dia/modeling_dia.py +61 -64
- transformers/models/dia/modular_dia.py +52 -54
- transformers/models/dia/processing_dia.py +39 -29
- transformers/models/dia/tokenization_dia.py +3 -6
- transformers/models/diffllama/configuration_diffllama.py +20 -23
- transformers/models/diffllama/modeling_diffllama.py +42 -45
- transformers/models/diffllama/modular_diffllama.py +16 -18
- transformers/models/dinat/configuration_dinat.py +0 -1
- transformers/models/dinat/modeling_dinat.py +40 -42
- transformers/models/dinov2/configuration_dinov2.py +0 -1
- transformers/models/dinov2/modeling_dinov2.py +11 -13
- transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +1 -1
- transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +12 -13
- transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +5 -7
- transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +4 -7
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +3 -6
- transformers/models/dinov3_vit/configuration_dinov3_vit.py +5 -8
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +5 -6
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +14 -16
- transformers/models/dinov3_vit/modular_dinov3_vit.py +11 -13
- transformers/models/distilbert/configuration_distilbert.py +0 -1
- transformers/models/distilbert/modeling_distilbert.py +44 -46
- transformers/models/distilbert/tokenization_distilbert.py +0 -1
- transformers/models/doge/__init__.py +0 -1
- transformers/models/doge/configuration_doge.py +25 -28
- transformers/models/doge/modeling_doge.py +42 -45
- transformers/models/doge/modular_doge.py +57 -58
- transformers/models/donut/configuration_donut_swin.py +0 -1
- transformers/models/donut/image_processing_donut.py +26 -29
- transformers/models/donut/image_processing_donut_fast.py +5 -10
- transformers/models/donut/modeling_donut_swin.py +44 -46
- transformers/models/donut/processing_donut.py +5 -26
- transformers/models/dots1/configuration_dots1.py +27 -29
- transformers/models/dots1/modeling_dots1.py +31 -34
- transformers/models/dots1/modular_dots1.py +0 -1
- transformers/models/dpr/configuration_dpr.py +0 -1
- transformers/models/dpr/modeling_dpr.py +37 -39
- transformers/models/dpr/tokenization_dpr.py +7 -9
- transformers/models/dpr/tokenization_dpr_fast.py +7 -9
- transformers/models/dpt/configuration_dpt.py +0 -1
- transformers/models/dpt/image_processing_dpt.py +65 -66
- transformers/models/dpt/image_processing_dpt_fast.py +13 -14
- transformers/models/dpt/modeling_dpt.py +19 -21
- transformers/models/dpt/modular_dpt.py +10 -11
- transformers/models/edgetam/configuration_edgetam.py +0 -1
- transformers/models/edgetam/modeling_edgetam.py +39 -41
- transformers/models/edgetam/modular_edgetam.py +2 -6
- transformers/models/edgetam_video/__init__.py +0 -1
- transformers/models/edgetam_video/configuration_edgetam_video.py +0 -1
- transformers/models/edgetam_video/modeling_edgetam_video.py +76 -77
- transformers/models/edgetam_video/modular_edgetam_video.py +16 -18
- transformers/models/efficientloftr/configuration_efficientloftr.py +4 -5
- transformers/models/efficientloftr/image_processing_efficientloftr.py +14 -16
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +4 -4
- transformers/models/efficientloftr/modeling_efficientloftr.py +27 -29
- transformers/models/efficientloftr/modular_efficientloftr.py +1 -3
- transformers/models/efficientnet/configuration_efficientnet.py +0 -1
- transformers/models/efficientnet/image_processing_efficientnet.py +23 -26
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +14 -15
- transformers/models/efficientnet/modeling_efficientnet.py +12 -14
- transformers/models/electra/configuration_electra.py +0 -1
- transformers/models/electra/modeling_electra.py +101 -103
- transformers/models/emu3/configuration_emu3.py +5 -7
- transformers/models/emu3/image_processing_emu3.py +44 -39
- transformers/models/emu3/modeling_emu3.py +59 -62
- transformers/models/emu3/modular_emu3.py +32 -34
- transformers/models/emu3/processing_emu3.py +18 -43
- transformers/models/encodec/configuration_encodec.py +2 -4
- transformers/models/encodec/feature_extraction_encodec.py +10 -13
- transformers/models/encodec/modeling_encodec.py +25 -29
- transformers/models/encoder_decoder/configuration_encoder_decoder.py +0 -1
- transformers/models/encoder_decoder/modeling_encoder_decoder.py +17 -19
- transformers/models/eomt/configuration_eomt.py +0 -1
- transformers/models/eomt/image_processing_eomt.py +53 -55
- transformers/models/eomt/image_processing_eomt_fast.py +15 -16
- transformers/models/eomt/modeling_eomt.py +16 -18
- transformers/models/eomt/modular_eomt.py +11 -13
- transformers/models/ernie/configuration_ernie.py +0 -1
- transformers/models/ernie/modeling_ernie.py +121 -132
- transformers/models/ernie/modular_ernie.py +91 -103
- transformers/models/ernie4_5/configuration_ernie4_5.py +18 -20
- transformers/models/ernie4_5/modeling_ernie4_5.py +31 -33
- transformers/models/ernie4_5/modular_ernie4_5.py +1 -3
- transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +27 -29
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +36 -38
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +7 -9
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +0 -1
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +34 -35
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +6 -7
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +84 -87
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +86 -89
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +3 -5
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +17 -18
- transformers/models/esm/configuration_esm.py +2 -4
- transformers/models/esm/modeling_esm.py +32 -34
- transformers/models/esm/modeling_esmfold.py +42 -44
- transformers/models/esm/openfold_utils/chunk_utils.py +6 -6
- transformers/models/esm/openfold_utils/loss.py +1 -2
- transformers/models/esm/openfold_utils/protein.py +13 -13
- transformers/models/esm/openfold_utils/tensor_utils.py +6 -6
- transformers/models/esm/tokenization_esm.py +2 -4
- transformers/models/evolla/configuration_evolla.py +29 -32
- transformers/models/evolla/modeling_evolla.py +58 -61
- transformers/models/evolla/modular_evolla.py +45 -47
- transformers/models/evolla/processing_evolla.py +23 -35
- transformers/models/exaone4/configuration_exaone4.py +19 -22
- transformers/models/exaone4/modeling_exaone4.py +32 -35
- transformers/models/exaone4/modular_exaone4.py +40 -42
- transformers/models/falcon/configuration_falcon.py +22 -25
- transformers/models/falcon/modeling_falcon.py +73 -76
- transformers/models/falcon_h1/configuration_falcon_h1.py +40 -43
- transformers/models/falcon_h1/modeling_falcon_h1.py +52 -55
- transformers/models/falcon_h1/modular_falcon_h1.py +47 -48
- transformers/models/falcon_mamba/configuration_falcon_mamba.py +0 -1
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +46 -47
- transformers/models/falcon_mamba/modular_falcon_mamba.py +10 -13
- transformers/models/fast_vlm/configuration_fast_vlm.py +1 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +36 -36
- transformers/models/fast_vlm/modular_fast_vlm.py +2 -3
- transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +2 -5
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +45 -47
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -3
- transformers/models/flaubert/configuration_flaubert.py +0 -1
- transformers/models/flaubert/modeling_flaubert.py +124 -128
- transformers/models/flaubert/tokenization_flaubert.py +3 -5
- transformers/models/flava/configuration_flava.py +5 -6
- transformers/models/flava/image_processing_flava.py +66 -67
- transformers/models/flava/image_processing_flava_fast.py +42 -43
- transformers/models/flava/modeling_flava.py +108 -107
- transformers/models/flava/processing_flava.py +2 -12
- transformers/models/flex_olmo/__init__.py +0 -1
- transformers/models/flex_olmo/configuration_flex_olmo.py +23 -25
- transformers/models/flex_olmo/modeling_flex_olmo.py +37 -39
- transformers/models/flex_olmo/modular_flex_olmo.py +35 -37
- transformers/models/florence2/configuration_florence2.py +0 -1
- transformers/models/florence2/modeling_florence2.py +39 -40
- transformers/models/florence2/modular_florence2.py +52 -81
- transformers/models/florence2/processing_florence2.py +18 -47
- transformers/models/fnet/configuration_fnet.py +0 -1
- transformers/models/fnet/modeling_fnet.py +69 -80
- transformers/models/fnet/tokenization_fnet.py +0 -1
- transformers/models/focalnet/configuration_focalnet.py +0 -1
- transformers/models/focalnet/modeling_focalnet.py +39 -41
- transformers/models/fsmt/configuration_fsmt.py +0 -1
- transformers/models/fsmt/modeling_fsmt.py +47 -48
- transformers/models/fsmt/tokenization_fsmt.py +3 -5
- transformers/models/funnel/configuration_funnel.py +0 -1
- transformers/models/funnel/modeling_funnel.py +91 -93
- transformers/models/funnel/tokenization_funnel.py +2 -5
- transformers/models/fuyu/configuration_fuyu.py +23 -26
- transformers/models/fuyu/image_processing_fuyu.py +29 -31
- transformers/models/fuyu/image_processing_fuyu_fast.py +12 -13
- transformers/models/fuyu/modeling_fuyu.py +26 -29
- transformers/models/fuyu/processing_fuyu.py +9 -36
- transformers/models/gemma/configuration_gemma.py +20 -23
- transformers/models/gemma/modeling_gemma.py +32 -34
- transformers/models/gemma/modular_gemma.py +28 -29
- transformers/models/gemma/tokenization_gemma.py +3 -6
- transformers/models/gemma2/configuration_gemma2.py +25 -28
- transformers/models/gemma2/modeling_gemma2.py +34 -37
- transformers/models/gemma2/modular_gemma2.py +55 -57
- transformers/models/gemma3/configuration_gemma3.py +28 -29
- transformers/models/gemma3/image_processing_gemma3.py +29 -31
- transformers/models/gemma3/image_processing_gemma3_fast.py +9 -10
- transformers/models/gemma3/modeling_gemma3.py +86 -89
- transformers/models/gemma3/modular_gemma3.py +85 -86
- transformers/models/gemma3/processing_gemma3.py +5 -5
- transformers/models/gemma3n/configuration_gemma3n.py +9 -10
- transformers/models/gemma3n/feature_extraction_gemma3n.py +9 -11
- transformers/models/gemma3n/modeling_gemma3n.py +80 -89
- transformers/models/gemma3n/modular_gemma3n.py +66 -75
- transformers/models/gemma3n/processing_gemma3n.py +12 -26
- transformers/models/git/configuration_git.py +0 -1
- transformers/models/git/modeling_git.py +84 -86
- transformers/models/git/processing_git.py +2 -14
- transformers/models/glm/configuration_glm.py +19 -21
- transformers/models/glm/modeling_glm.py +32 -35
- transformers/models/glm/modular_glm.py +4 -7
- transformers/models/glm4/configuration_glm4.py +19 -21
- transformers/models/glm4/modeling_glm4.py +35 -37
- transformers/models/glm4/modular_glm4.py +8 -10
- transformers/models/glm46v/configuration_glm46v.py +0 -1
- transformers/models/glm46v/image_processing_glm46v.py +35 -36
- transformers/models/glm46v/image_processing_glm46v_fast.py +7 -7
- transformers/models/glm46v/modeling_glm46v.py +51 -51
- transformers/models/glm46v/modular_glm46v.py +1 -3
- transformers/models/glm46v/processing_glm46v.py +7 -41
- transformers/models/glm46v/video_processing_glm46v.py +9 -11
- transformers/models/glm4_moe/configuration_glm4_moe.py +25 -28
- transformers/models/glm4_moe/modeling_glm4_moe.py +32 -35
- transformers/models/glm4_moe/modular_glm4_moe.py +26 -29
- transformers/models/glm4_moe_lite/__init__.py +28 -0
- transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +235 -0
- transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +740 -0
- transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +304 -0
- transformers/models/glm4v/configuration_glm4v.py +14 -17
- transformers/models/glm4v/image_processing_glm4v.py +34 -36
- transformers/models/glm4v/image_processing_glm4v_fast.py +6 -7
- transformers/models/glm4v/modeling_glm4v.py +133 -151
- transformers/models/glm4v/modular_glm4v.py +131 -182
- transformers/models/glm4v/processing_glm4v.py +7 -41
- transformers/models/glm4v/video_processing_glm4v.py +9 -11
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +119 -122
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +237 -297
- transformers/models/glm4v_moe/modular_glm4v_moe.py +54 -163
- transformers/models/glm_image/__init__.py +31 -0
- transformers/models/glm_image/configuration_glm_image.py +352 -0
- transformers/models/glm_image/image_processing_glm_image.py +503 -0
- transformers/models/glm_image/image_processing_glm_image_fast.py +296 -0
- transformers/models/glm_image/modeling_glm_image.py +1590 -0
- transformers/models/glm_image/modular_glm_image.py +1480 -0
- transformers/models/glm_image/processing_glm_image.py +217 -0
- transformers/models/glmasr/__init__.py +0 -1
- transformers/models/glmasr/configuration_glmasr.py +0 -1
- transformers/models/glmasr/modeling_glmasr.py +17 -18
- transformers/models/glmasr/modular_glmasr.py +16 -18
- transformers/models/glmasr/processing_glmasr.py +7 -8
- transformers/models/glpn/configuration_glpn.py +0 -1
- transformers/models/glpn/image_processing_glpn.py +11 -12
- transformers/models/glpn/image_processing_glpn_fast.py +8 -9
- transformers/models/glpn/modeling_glpn.py +10 -12
- transformers/models/got_ocr2/configuration_got_ocr2.py +5 -8
- transformers/models/got_ocr2/image_processing_got_ocr2.py +22 -24
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +6 -7
- transformers/models/got_ocr2/modeling_got_ocr2.py +40 -42
- transformers/models/got_ocr2/modular_got_ocr2.py +31 -34
- transformers/models/got_ocr2/processing_got_ocr2.py +42 -63
- transformers/models/gpt2/configuration_gpt2.py +0 -1
- transformers/models/gpt2/modeling_gpt2.py +106 -108
- transformers/models/gpt2/tokenization_gpt2.py +6 -9
- transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +0 -1
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +73 -80
- transformers/models/gpt_neo/configuration_gpt_neo.py +0 -1
- transformers/models/gpt_neo/modeling_gpt_neo.py +63 -64
- transformers/models/gpt_neox/configuration_gpt_neox.py +19 -22
- transformers/models/gpt_neox/modeling_gpt_neox.py +70 -72
- transformers/models/gpt_neox/modular_gpt_neox.py +64 -66
- transformers/models/gpt_neox/tokenization_gpt_neox.py +2 -5
- transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +15 -18
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +41 -44
- transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +1 -3
- transformers/models/gpt_oss/configuration_gpt_oss.py +21 -24
- transformers/models/gpt_oss/modeling_gpt_oss.py +34 -35
- transformers/models/gpt_oss/modular_gpt_oss.py +17 -19
- transformers/models/gpt_sw3/tokenization_gpt_sw3.py +4 -4
- transformers/models/gptj/configuration_gptj.py +0 -1
- transformers/models/gptj/modeling_gptj.py +82 -81
- transformers/models/granite/configuration_granite.py +23 -26
- transformers/models/granite/modeling_granite.py +39 -41
- transformers/models/granite/modular_granite.py +29 -31
- transformers/models/granite_speech/configuration_granite_speech.py +0 -1
- transformers/models/granite_speech/feature_extraction_granite_speech.py +1 -3
- transformers/models/granite_speech/modeling_granite_speech.py +21 -23
- transformers/models/granite_speech/processing_granite_speech.py +11 -4
- transformers/models/granitemoe/configuration_granitemoe.py +26 -29
- transformers/models/granitemoe/modeling_granitemoe.py +35 -37
- transformers/models/granitemoe/modular_granitemoe.py +21 -23
- transformers/models/granitemoehybrid/__init__.py +0 -1
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +38 -41
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +60 -64
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +18 -20
- transformers/models/granitemoeshared/configuration_granitemoeshared.py +27 -30
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +48 -52
- transformers/models/granitemoeshared/modular_granitemoeshared.py +19 -21
- transformers/models/grounding_dino/configuration_grounding_dino.py +0 -1
- transformers/models/grounding_dino/image_processing_grounding_dino.py +60 -62
- transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +17 -18
- transformers/models/grounding_dino/modeling_grounding_dino.py +94 -96
- transformers/models/grounding_dino/modular_grounding_dino.py +2 -3
- transformers/models/grounding_dino/processing_grounding_dino.py +10 -38
- transformers/models/groupvit/configuration_groupvit.py +0 -1
- transformers/models/groupvit/modeling_groupvit.py +69 -70
- transformers/models/helium/configuration_helium.py +20 -22
- transformers/models/helium/modeling_helium.py +33 -36
- transformers/models/helium/modular_helium.py +3 -7
- transformers/models/herbert/tokenization_herbert.py +4 -6
- transformers/models/hgnet_v2/configuration_hgnet_v2.py +0 -1
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -9
- transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -9
- transformers/models/hiera/configuration_hiera.py +0 -1
- transformers/models/hiera/modeling_hiera.py +60 -62
- transformers/models/hubert/configuration_hubert.py +0 -1
- transformers/models/hubert/modeling_hubert.py +35 -37
- transformers/models/hubert/modular_hubert.py +8 -11
- transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +21 -24
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +30 -33
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +3 -5
- transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +25 -28
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +32 -35
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +5 -7
- transformers/models/ibert/configuration_ibert.py +0 -1
- transformers/models/ibert/modeling_ibert.py +60 -62
- transformers/models/ibert/quant_modules.py +0 -1
- transformers/models/idefics/configuration_idefics.py +0 -1
- transformers/models/idefics/image_processing_idefics.py +13 -15
- transformers/models/idefics/modeling_idefics.py +60 -61
- transformers/models/idefics/perceiver.py +1 -3
- transformers/models/idefics/processing_idefics.py +32 -48
- transformers/models/idefics/vision.py +22 -24
- transformers/models/idefics2/configuration_idefics2.py +0 -1
- transformers/models/idefics2/image_processing_idefics2.py +31 -32
- transformers/models/idefics2/image_processing_idefics2_fast.py +7 -8
- transformers/models/idefics2/modeling_idefics2.py +56 -58
- transformers/models/idefics2/processing_idefics2.py +10 -68
- transformers/models/idefics3/configuration_idefics3.py +0 -1
- transformers/models/idefics3/image_processing_idefics3.py +42 -43
- transformers/models/idefics3/image_processing_idefics3_fast.py +11 -12
- transformers/models/idefics3/modeling_idefics3.py +52 -54
- transformers/models/idefics3/processing_idefics3.py +15 -69
- transformers/models/ijepa/configuration_ijepa.py +0 -1
- transformers/models/ijepa/modeling_ijepa.py +10 -11
- transformers/models/ijepa/modular_ijepa.py +5 -7
- transformers/models/imagegpt/configuration_imagegpt.py +0 -1
- transformers/models/imagegpt/image_processing_imagegpt.py +17 -18
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +8 -9
- transformers/models/imagegpt/modeling_imagegpt.py +57 -58
- transformers/models/informer/configuration_informer.py +6 -9
- transformers/models/informer/modeling_informer.py +84 -86
- transformers/models/informer/modular_informer.py +13 -16
- transformers/models/instructblip/configuration_instructblip.py +0 -1
- transformers/models/instructblip/modeling_instructblip.py +43 -44
- transformers/models/instructblip/processing_instructblip.py +10 -36
- transformers/models/instructblipvideo/configuration_instructblipvideo.py +0 -1
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +55 -55
- transformers/models/instructblipvideo/modular_instructblipvideo.py +34 -36
- transformers/models/instructblipvideo/processing_instructblipvideo.py +14 -33
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +4 -5
- transformers/models/internvl/configuration_internvl.py +0 -1
- transformers/models/internvl/modeling_internvl.py +41 -43
- transformers/models/internvl/modular_internvl.py +19 -21
- transformers/models/internvl/processing_internvl.py +12 -45
- transformers/models/internvl/video_processing_internvl.py +8 -9
- transformers/models/jais2/configuration_jais2.py +20 -22
- transformers/models/jais2/modeling_jais2.py +32 -34
- transformers/models/jais2/modular_jais2.py +20 -22
- transformers/models/jamba/configuration_jamba.py +0 -1
- transformers/models/jamba/modeling_jamba.py +43 -46
- transformers/models/jamba/modular_jamba.py +37 -38
- transformers/models/janus/configuration_janus.py +0 -1
- transformers/models/janus/image_processing_janus.py +35 -37
- transformers/models/janus/image_processing_janus_fast.py +12 -13
- transformers/models/janus/modeling_janus.py +41 -43
- transformers/models/janus/modular_janus.py +60 -63
- transformers/models/janus/processing_janus.py +17 -43
- transformers/models/jetmoe/configuration_jetmoe.py +20 -23
- transformers/models/jetmoe/modeling_jetmoe.py +39 -42
- transformers/models/jetmoe/modular_jetmoe.py +30 -33
- transformers/models/kosmos2/configuration_kosmos2.py +0 -1
- transformers/models/kosmos2/modeling_kosmos2.py +145 -146
- transformers/models/kosmos2/processing_kosmos2.py +40 -55
- transformers/models/kosmos2_5/__init__.py +0 -1
- transformers/models/kosmos2_5/configuration_kosmos2_5.py +0 -1
- transformers/models/kosmos2_5/image_processing_kosmos2_5.py +10 -12
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -11
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +108 -109
- transformers/models/kosmos2_5/processing_kosmos2_5.py +8 -29
- transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +23 -25
- transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py +12 -14
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +59 -66
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +19 -21
- transformers/models/kyutai_speech_to_text/processing_kyutai_speech_to_text.py +2 -8
- transformers/models/lasr/configuration_lasr.py +1 -3
- transformers/models/lasr/feature_extraction_lasr.py +10 -12
- transformers/models/lasr/modeling_lasr.py +18 -21
- transformers/models/lasr/modular_lasr.py +8 -10
- transformers/models/lasr/processing_lasr.py +12 -6
- transformers/models/lasr/tokenization_lasr.py +2 -4
- transformers/models/layoutlm/configuration_layoutlm.py +0 -1
- transformers/models/layoutlm/modeling_layoutlm.py +67 -69
- transformers/models/layoutlmv2/configuration_layoutlmv2.py +0 -1
- transformers/models/layoutlmv2/image_processing_layoutlmv2.py +18 -21
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +5 -6
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +48 -50
- transformers/models/layoutlmv2/processing_layoutlmv2.py +14 -44
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +63 -74
- transformers/models/layoutlmv3/configuration_layoutlmv3.py +0 -1
- transformers/models/layoutlmv3/image_processing_layoutlmv3.py +24 -26
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +7 -8
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +49 -51
- transformers/models/layoutlmv3/processing_layoutlmv3.py +14 -46
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +64 -75
- transformers/models/layoutxlm/configuration_layoutxlm.py +0 -1
- transformers/models/layoutxlm/modular_layoutxlm.py +0 -1
- transformers/models/layoutxlm/processing_layoutxlm.py +14 -44
- transformers/models/layoutxlm/tokenization_layoutxlm.py +65 -76
- transformers/models/led/configuration_led.py +1 -4
- transformers/models/led/modeling_led.py +113 -267
- transformers/models/levit/configuration_levit.py +0 -1
- transformers/models/levit/image_processing_levit.py +19 -21
- transformers/models/levit/image_processing_levit_fast.py +0 -1
- transformers/models/levit/modeling_levit.py +17 -19
- transformers/models/lfm2/configuration_lfm2.py +22 -23
- transformers/models/lfm2/modeling_lfm2.py +42 -44
- transformers/models/lfm2/modular_lfm2.py +29 -29
- transformers/models/lfm2_moe/__init__.py +0 -1
- transformers/models/lfm2_moe/configuration_lfm2_moe.py +1 -2
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +44 -45
- transformers/models/lfm2_moe/modular_lfm2_moe.py +8 -9
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +0 -1
- transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +34 -5
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +31 -33
- transformers/models/lfm2_vl/modular_lfm2_vl.py +24 -27
- transformers/models/lfm2_vl/processing_lfm2_vl.py +14 -34
- transformers/models/lightglue/image_processing_lightglue.py +16 -15
- transformers/models/lightglue/image_processing_lightglue_fast.py +4 -4
- transformers/models/lightglue/modeling_lightglue.py +28 -30
- transformers/models/lightglue/modular_lightglue.py +28 -28
- transformers/models/lighton_ocr/__init__.py +28 -0
- transformers/models/lighton_ocr/configuration_lighton_ocr.py +128 -0
- transformers/models/lighton_ocr/modeling_lighton_ocr.py +460 -0
- transformers/models/lighton_ocr/modular_lighton_ocr.py +403 -0
- transformers/models/lighton_ocr/processing_lighton_ocr.py +229 -0
- transformers/models/lilt/configuration_lilt.py +0 -1
- transformers/models/lilt/modeling_lilt.py +53 -55
- transformers/models/llama/configuration_llama.py +21 -24
- transformers/models/llama/modeling_llama.py +31 -34
- transformers/models/llama/tokenization_llama.py +2 -4
- transformers/models/llama4/configuration_llama4.py +20 -22
- transformers/models/llama4/image_processing_llama4_fast.py +8 -9
- transformers/models/llama4/modeling_llama4.py +70 -71
- transformers/models/llama4/processing_llama4.py +33 -57
- transformers/models/llava/configuration_llava.py +0 -1
- transformers/models/llava/image_processing_llava.py +25 -28
- transformers/models/llava/image_processing_llava_fast.py +6 -7
- transformers/models/llava/modeling_llava.py +35 -37
- transformers/models/llava/processing_llava.py +18 -51
- transformers/models/llava_next/configuration_llava_next.py +0 -1
- transformers/models/llava_next/image_processing_llava_next.py +43 -45
- transformers/models/llava_next/image_processing_llava_next_fast.py +5 -6
- transformers/models/llava_next/modeling_llava_next.py +42 -44
- transformers/models/llava_next/processing_llava_next.py +18 -47
- transformers/models/llava_next_video/configuration_llava_next_video.py +0 -1
- transformers/models/llava_next_video/modeling_llava_next_video.py +53 -55
- transformers/models/llava_next_video/modular_llava_next_video.py +44 -46
- transformers/models/llava_next_video/processing_llava_next_video.py +21 -63
- transformers/models/llava_next_video/video_processing_llava_next_video.py +0 -1
- transformers/models/llava_onevision/configuration_llava_onevision.py +0 -1
- transformers/models/llava_onevision/image_processing_llava_onevision.py +40 -42
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +6 -7
- transformers/models/llava_onevision/modeling_llava_onevision.py +60 -62
- transformers/models/llava_onevision/modular_llava_onevision.py +51 -52
- transformers/models/llava_onevision/processing_llava_onevision.py +21 -53
- transformers/models/llava_onevision/video_processing_llava_onevision.py +0 -1
- transformers/models/longcat_flash/__init__.py +0 -1
- transformers/models/longcat_flash/configuration_longcat_flash.py +32 -35
- transformers/models/longcat_flash/modeling_longcat_flash.py +30 -31
- transformers/models/longcat_flash/modular_longcat_flash.py +17 -19
- transformers/models/longformer/configuration_longformer.py +1 -4
- transformers/models/longformer/modeling_longformer.py +99 -101
- transformers/models/longt5/configuration_longt5.py +0 -1
- transformers/models/longt5/modeling_longt5.py +43 -44
- transformers/models/luke/configuration_luke.py +0 -1
- transformers/models/luke/modeling_luke.py +179 -181
- transformers/models/luke/tokenization_luke.py +99 -105
- transformers/models/lw_detr/__init__.py +27 -0
- transformers/models/lw_detr/configuration_lw_detr.py +374 -0
- transformers/models/lw_detr/modeling_lw_detr.py +1698 -0
- transformers/models/lw_detr/modular_lw_detr.py +1611 -0
- transformers/models/lxmert/configuration_lxmert.py +0 -1
- transformers/models/lxmert/modeling_lxmert.py +63 -74
- transformers/models/m2m_100/configuration_m2m_100.py +0 -1
- transformers/models/m2m_100/modeling_m2m_100.py +69 -71
- transformers/models/m2m_100/tokenization_m2m_100.py +8 -8
- transformers/models/mamba/configuration_mamba.py +0 -1
- transformers/models/mamba/modeling_mamba.py +43 -44
- transformers/models/mamba2/configuration_mamba2.py +0 -1
- transformers/models/mamba2/modeling_mamba2.py +44 -46
- transformers/models/marian/configuration_marian.py +0 -1
- transformers/models/marian/modeling_marian.py +84 -86
- transformers/models/marian/tokenization_marian.py +6 -6
- transformers/models/markuplm/configuration_markuplm.py +0 -1
- transformers/models/markuplm/feature_extraction_markuplm.py +1 -2
- transformers/models/markuplm/modeling_markuplm.py +60 -62
- transformers/models/markuplm/processing_markuplm.py +31 -38
- transformers/models/markuplm/tokenization_markuplm.py +67 -77
- transformers/models/mask2former/configuration_mask2former.py +4 -7
- transformers/models/mask2former/image_processing_mask2former.py +84 -85
- transformers/models/mask2former/image_processing_mask2former_fast.py +29 -29
- transformers/models/mask2former/modeling_mask2former.py +90 -92
- transformers/models/mask2former/modular_mask2former.py +6 -8
- transformers/models/maskformer/configuration_maskformer.py +5 -8
- transformers/models/maskformer/configuration_maskformer_swin.py +0 -1
- transformers/models/maskformer/image_processing_maskformer.py +84 -85
- transformers/models/maskformer/image_processing_maskformer_fast.py +28 -29
- transformers/models/maskformer/modeling_maskformer.py +56 -58
- transformers/models/maskformer/modeling_maskformer_swin.py +18 -20
- transformers/models/mbart/configuration_mbart.py +0 -1
- transformers/models/mbart/modeling_mbart.py +111 -113
- transformers/models/mbart/tokenization_mbart.py +2 -4
- transformers/models/mbart50/tokenization_mbart50.py +3 -5
- transformers/models/megatron_bert/configuration_megatron_bert.py +0 -1
- transformers/models/megatron_bert/modeling_megatron_bert.py +139 -150
- transformers/models/metaclip_2/modeling_metaclip_2.py +46 -46
- transformers/models/metaclip_2/modular_metaclip_2.py +19 -21
- transformers/models/mgp_str/configuration_mgp_str.py +0 -1
- transformers/models/mgp_str/modeling_mgp_str.py +14 -16
- transformers/models/mgp_str/processing_mgp_str.py +3 -20
- transformers/models/mgp_str/tokenization_mgp_str.py +1 -3
- transformers/models/mimi/configuration_mimi.py +38 -40
- transformers/models/mimi/modeling_mimi.py +76 -79
- transformers/models/minimax/__init__.py +0 -1
- transformers/models/minimax/configuration_minimax.py +32 -36
- transformers/models/minimax/modeling_minimax.py +41 -44
- transformers/models/minimax/modular_minimax.py +50 -53
- transformers/models/minimax_m2/__init__.py +28 -0
- transformers/models/minimax_m2/configuration_minimax_m2.py +211 -0
- transformers/models/minimax_m2/modeling_minimax_m2.py +704 -0
- transformers/models/minimax_m2/modular_minimax_m2.py +369 -0
- transformers/models/ministral/configuration_ministral.py +20 -22
- transformers/models/ministral/modeling_ministral.py +31 -33
- transformers/models/ministral/modular_ministral.py +27 -29
- transformers/models/ministral3/configuration_ministral3.py +19 -22
- transformers/models/ministral3/modeling_ministral3.py +31 -33
- transformers/models/ministral3/modular_ministral3.py +4 -5
- transformers/models/mistral/configuration_mistral.py +19 -22
- transformers/models/mistral/modeling_mistral.py +31 -33
- transformers/models/mistral/modular_mistral.py +11 -12
- transformers/models/mistral3/configuration_mistral3.py +0 -1
- transformers/models/mistral3/modeling_mistral3.py +43 -42
- transformers/models/mistral3/modular_mistral3.py +35 -35
- transformers/models/mixtral/configuration_mixtral.py +24 -27
- transformers/models/mixtral/modeling_mixtral.py +35 -38
- transformers/models/mixtral/modular_mixtral.py +26 -29
- transformers/models/mlcd/configuration_mlcd.py +0 -1
- transformers/models/mlcd/modeling_mlcd.py +10 -12
- transformers/models/mlcd/modular_mlcd.py +9 -11
- transformers/models/mllama/configuration_mllama.py +5 -8
- transformers/models/mllama/image_processing_mllama.py +23 -25
- transformers/models/mllama/image_processing_mllama_fast.py +5 -6
- transformers/models/mllama/modeling_mllama.py +81 -84
- transformers/models/mllama/processing_mllama.py +6 -55
- transformers/models/mluke/tokenization_mluke.py +97 -103
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +0 -1
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +94 -96
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +0 -1
- transformers/models/mobilebert/configuration_mobilebert.py +0 -1
- transformers/models/mobilebert/modeling_mobilebert.py +75 -85
- transformers/models/mobilebert/tokenization_mobilebert.py +0 -1
- transformers/models/mobilenet_v1/configuration_mobilenet_v1.py +0 -1
- transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +20 -23
- transformers/models/mobilenet_v1/image_processing_mobilenet_v1_fast.py +0 -1
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +13 -16
- transformers/models/mobilenet_v2/configuration_mobilenet_v2.py +0 -1
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +48 -51
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +10 -11
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +17 -20
- transformers/models/mobilevit/configuration_mobilevit.py +0 -1
- transformers/models/mobilevit/image_processing_mobilevit.py +41 -44
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +8 -9
- transformers/models/mobilevit/modeling_mobilevit.py +17 -19
- transformers/models/mobilevitv2/configuration_mobilevitv2.py +0 -1
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +17 -20
- transformers/models/modernbert/configuration_modernbert.py +34 -34
- transformers/models/modernbert/modeling_modernbert.py +123 -125
- transformers/models/modernbert/modular_modernbert.py +155 -155
- transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +30 -32
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +45 -47
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +69 -70
- transformers/models/moonshine/configuration_moonshine.py +22 -24
- transformers/models/moonshine/modeling_moonshine.py +63 -65
- transformers/models/moonshine/modular_moonshine.py +72 -73
- transformers/models/moshi/configuration_moshi.py +18 -21
- transformers/models/moshi/modeling_moshi.py +130 -133
- transformers/models/mpnet/configuration_mpnet.py +0 -1
- transformers/models/mpnet/modeling_mpnet.py +55 -57
- transformers/models/mpnet/tokenization_mpnet.py +1 -4
- transformers/models/mpt/configuration_mpt.py +1 -9
- transformers/models/mpt/modeling_mpt.py +58 -60
- transformers/models/mra/configuration_mra.py +0 -1
- transformers/models/mra/modeling_mra.py +54 -56
- transformers/models/mt5/configuration_mt5.py +0 -1
- transformers/models/mt5/modeling_mt5.py +75 -77
- transformers/models/musicgen/configuration_musicgen.py +0 -1
- transformers/models/musicgen/modeling_musicgen.py +108 -111
- transformers/models/musicgen/processing_musicgen.py +3 -21
- transformers/models/musicgen_melody/configuration_musicgen_melody.py +0 -1
- transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py +8 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +106 -109
- transformers/models/musicgen_melody/processing_musicgen_melody.py +3 -22
- transformers/models/mvp/configuration_mvp.py +0 -1
- transformers/models/mvp/modeling_mvp.py +115 -119
- transformers/models/myt5/tokenization_myt5.py +8 -10
- transformers/models/nanochat/configuration_nanochat.py +0 -1
- transformers/models/nanochat/modeling_nanochat.py +32 -35
- transformers/models/nanochat/modular_nanochat.py +12 -14
- transformers/models/nemotron/configuration_nemotron.py +20 -23
- transformers/models/nemotron/modeling_nemotron.py +49 -52
- transformers/models/nllb/tokenization_nllb.py +7 -9
- transformers/models/nllb_moe/configuration_nllb_moe.py +0 -1
- transformers/models/nllb_moe/modeling_nllb_moe.py +67 -69
- transformers/models/nougat/image_processing_nougat.py +29 -32
- transformers/models/nougat/image_processing_nougat_fast.py +4 -5
- transformers/models/nougat/processing_nougat.py +37 -39
- transformers/models/nougat/tokenization_nougat.py +5 -7
- transformers/models/nystromformer/configuration_nystromformer.py +0 -1
- transformers/models/nystromformer/modeling_nystromformer.py +61 -63
- transformers/models/olmo/configuration_olmo.py +18 -21
- transformers/models/olmo/modeling_olmo.py +31 -34
- transformers/models/olmo/modular_olmo.py +5 -9
- transformers/models/olmo2/configuration_olmo2.py +18 -21
- transformers/models/olmo2/modeling_olmo2.py +32 -35
- transformers/models/olmo2/modular_olmo2.py +29 -31
- transformers/models/olmo3/__init__.py +0 -1
- transformers/models/olmo3/configuration_olmo3.py +20 -23
- transformers/models/olmo3/modeling_olmo3.py +31 -34
- transformers/models/olmo3/modular_olmo3.py +31 -33
- transformers/models/olmoe/configuration_olmoe.py +24 -26
- transformers/models/olmoe/modeling_olmoe.py +37 -39
- transformers/models/olmoe/modular_olmoe.py +12 -13
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +0 -1
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +38 -40
- transformers/models/omdet_turbo/processing_omdet_turbo.py +19 -67
- transformers/models/oneformer/configuration_oneformer.py +4 -7
- transformers/models/oneformer/image_processing_oneformer.py +83 -84
- transformers/models/oneformer/image_processing_oneformer_fast.py +33 -34
- transformers/models/oneformer/modeling_oneformer.py +123 -124
- transformers/models/oneformer/processing_oneformer.py +28 -43
- transformers/models/openai/configuration_openai.py +0 -1
- transformers/models/openai/modeling_openai.py +50 -51
- transformers/models/openai/tokenization_openai.py +2 -5
- transformers/models/opt/configuration_opt.py +0 -1
- transformers/models/opt/modeling_opt.py +74 -75
- transformers/models/ovis2/__init__.py +0 -1
- transformers/models/ovis2/configuration_ovis2.py +0 -1
- transformers/models/ovis2/image_processing_ovis2.py +22 -24
- transformers/models/ovis2/image_processing_ovis2_fast.py +6 -7
- transformers/models/ovis2/modeling_ovis2.py +43 -45
- transformers/models/ovis2/modular_ovis2.py +30 -32
- transformers/models/ovis2/processing_ovis2.py +12 -40
- transformers/models/owlv2/configuration_owlv2.py +0 -1
- transformers/models/owlv2/image_processing_owlv2.py +20 -21
- transformers/models/owlv2/image_processing_owlv2_fast.py +7 -8
- transformers/models/owlv2/modeling_owlv2.py +82 -87
- transformers/models/owlv2/modular_owlv2.py +6 -7
- transformers/models/owlv2/processing_owlv2.py +20 -49
- transformers/models/owlvit/configuration_owlvit.py +0 -1
- transformers/models/owlvit/image_processing_owlvit.py +21 -22
- transformers/models/owlvit/image_processing_owlvit_fast.py +2 -3
- transformers/models/owlvit/modeling_owlvit.py +81 -86
- transformers/models/owlvit/processing_owlvit.py +20 -48
- transformers/models/paddleocr_vl/__init__.py +0 -1
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +19 -19
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +34 -35
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +12 -12
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +76 -76
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +68 -68
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +1 -3
- transformers/models/paligemma/configuration_paligemma.py +0 -1
- transformers/models/paligemma/modeling_paligemma.py +51 -53
- transformers/models/paligemma/processing_paligemma.py +13 -66
- transformers/models/parakeet/configuration_parakeet.py +1 -4
- transformers/models/parakeet/feature_extraction_parakeet.py +10 -12
- transformers/models/parakeet/modeling_parakeet.py +18 -22
- transformers/models/parakeet/modular_parakeet.py +16 -18
- transformers/models/parakeet/processing_parakeet.py +12 -5
- transformers/models/parakeet/tokenization_parakeet.py +2 -4
- transformers/models/patchtsmixer/configuration_patchtsmixer.py +5 -8
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +60 -62
- transformers/models/patchtst/configuration_patchtst.py +6 -9
- transformers/models/patchtst/modeling_patchtst.py +72 -74
- transformers/models/pe_audio/__init__.py +0 -1
- transformers/models/pe_audio/configuration_pe_audio.py +14 -16
- transformers/models/pe_audio/feature_extraction_pe_audio.py +6 -8
- transformers/models/pe_audio/modeling_pe_audio.py +26 -27
- transformers/models/pe_audio/modular_pe_audio.py +16 -17
- transformers/models/pe_audio/processing_pe_audio.py +0 -1
- transformers/models/pe_audio_video/__init__.py +0 -1
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +15 -17
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +60 -61
- transformers/models/pe_audio_video/modular_pe_audio_video.py +52 -53
- transformers/models/pe_audio_video/processing_pe_audio_video.py +0 -1
- transformers/models/pe_video/__init__.py +0 -1
- transformers/models/pe_video/configuration_pe_video.py +14 -16
- transformers/models/pe_video/modeling_pe_video.py +21 -22
- transformers/models/pe_video/modular_pe_video.py +11 -12
- transformers/models/pe_video/video_processing_pe_video.py +2 -4
- transformers/models/pegasus/configuration_pegasus.py +0 -1
- transformers/models/pegasus/modeling_pegasus.py +63 -65
- transformers/models/pegasus/tokenization_pegasus.py +1 -4
- transformers/models/pegasus_x/configuration_pegasus_x.py +0 -1
- transformers/models/pegasus_x/modeling_pegasus_x.py +50 -52
- transformers/models/perceiver/configuration_perceiver.py +0 -1
- transformers/models/perceiver/image_processing_perceiver.py +22 -25
- transformers/models/perceiver/image_processing_perceiver_fast.py +5 -6
- transformers/models/perceiver/modeling_perceiver.py +135 -136
- transformers/models/perceiver/tokenization_perceiver.py +3 -6
- transformers/models/perception_lm/configuration_perception_lm.py +0 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +8 -9
- transformers/models/perception_lm/modeling_perception_lm.py +38 -40
- transformers/models/perception_lm/modular_perception_lm.py +31 -33
- transformers/models/perception_lm/processing_perception_lm.py +13 -47
- transformers/models/perception_lm/video_processing_perception_lm.py +0 -1
- transformers/models/persimmon/configuration_persimmon.py +18 -21
- transformers/models/persimmon/modeling_persimmon.py +39 -42
- transformers/models/phi/configuration_phi.py +19 -22
- transformers/models/phi/modeling_phi.py +35 -37
- transformers/models/phi/modular_phi.py +23 -23
- transformers/models/phi3/configuration_phi3.py +23 -26
- transformers/models/phi3/modeling_phi3.py +33 -36
- transformers/models/phi3/modular_phi3.py +13 -17
- transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +25 -26
- transformers/models/phi4_multimodal/feature_extraction_phi4_multimodal.py +7 -9
- transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +7 -7
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +54 -56
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +59 -60
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +7 -42
- transformers/models/phimoe/configuration_phimoe.py +26 -29
- transformers/models/phimoe/modeling_phimoe.py +35 -38
- transformers/models/phimoe/modular_phimoe.py +0 -1
- transformers/models/phobert/tokenization_phobert.py +4 -6
- transformers/models/pix2struct/configuration_pix2struct.py +0 -1
- transformers/models/pix2struct/image_processing_pix2struct.py +15 -19
- transformers/models/pix2struct/image_processing_pix2struct_fast.py +7 -10
- transformers/models/pix2struct/modeling_pix2struct.py +42 -45
- transformers/models/pix2struct/processing_pix2struct.py +5 -26
- transformers/models/pixio/__init__.py +0 -1
- transformers/models/pixio/configuration_pixio.py +0 -1
- transformers/models/pixio/modeling_pixio.py +7 -9
- transformers/models/pixio/modular_pixio.py +3 -6
- transformers/models/pixtral/configuration_pixtral.py +11 -14
- transformers/models/pixtral/image_processing_pixtral.py +26 -28
- transformers/models/pixtral/image_processing_pixtral_fast.py +5 -6
- transformers/models/pixtral/modeling_pixtral.py +22 -25
- transformers/models/pixtral/processing_pixtral.py +18 -52
- transformers/models/plbart/configuration_plbart.py +0 -1
- transformers/models/plbart/modeling_plbart.py +100 -102
- transformers/models/plbart/modular_plbart.py +30 -32
- transformers/models/plbart/tokenization_plbart.py +4 -5
- transformers/models/poolformer/configuration_poolformer.py +0 -1
- transformers/models/poolformer/image_processing_poolformer.py +21 -24
- transformers/models/poolformer/image_processing_poolformer_fast.py +6 -7
- transformers/models/poolformer/modeling_poolformer.py +10 -12
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/feature_extraction_pop2piano.py +6 -9
- transformers/models/pop2piano/modeling_pop2piano.py +22 -23
- transformers/models/pop2piano/processing_pop2piano.py +25 -33
- transformers/models/pop2piano/tokenization_pop2piano.py +15 -23
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py +28 -28
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +14 -15
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +9 -10
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +9 -10
- transformers/models/prophetnet/configuration_prophetnet.py +26 -28
- transformers/models/prophetnet/modeling_prophetnet.py +109 -130
- transformers/models/prophetnet/tokenization_prophetnet.py +14 -16
- transformers/models/pvt/configuration_pvt.py +0 -1
- transformers/models/pvt/image_processing_pvt.py +17 -20
- transformers/models/pvt/image_processing_pvt_fast.py +0 -1
- transformers/models/pvt/modeling_pvt.py +19 -21
- transformers/models/pvt_v2/configuration_pvt_v2.py +2 -4
- transformers/models/pvt_v2/modeling_pvt_v2.py +21 -23
- transformers/models/qwen2/configuration_qwen2.py +18 -21
- transformers/models/qwen2/modeling_qwen2.py +31 -33
- transformers/models/qwen2/modular_qwen2.py +11 -12
- transformers/models/qwen2/tokenization_qwen2.py +2 -5
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +20 -23
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +135 -128
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +116 -109
- transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py +41 -49
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +22 -25
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +94 -96
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +46 -85
- transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py +7 -43
- transformers/models/qwen2_audio/configuration_qwen2_audio.py +0 -1
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +27 -29
- transformers/models/qwen2_audio/processing_qwen2_audio.py +13 -42
- transformers/models/qwen2_moe/configuration_qwen2_moe.py +28 -31
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +36 -39
- transformers/models/qwen2_moe/modular_qwen2_moe.py +7 -10
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +22 -24
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +38 -40
- transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +8 -9
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +91 -92
- transformers/models/qwen2_vl/processing_qwen2_vl.py +7 -44
- transformers/models/qwen2_vl/video_processing_qwen2_vl.py +35 -13
- transformers/models/qwen3/configuration_qwen3.py +20 -23
- transformers/models/qwen3/modeling_qwen3.py +31 -34
- transformers/models/qwen3/modular_qwen3.py +4 -6
- transformers/models/qwen3_moe/configuration_qwen3_moe.py +25 -28
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +36 -39
- transformers/models/qwen3_moe/modular_qwen3_moe.py +10 -13
- transformers/models/qwen3_next/configuration_qwen3_next.py +31 -34
- transformers/models/qwen3_next/modeling_qwen3_next.py +39 -42
- transformers/models/qwen3_next/modular_qwen3_next.py +33 -34
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +85 -88
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +107 -110
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +122 -148
- transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py +40 -48
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +16 -19
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +74 -77
- transformers/models/qwen3_vl/modular_qwen3_vl.py +68 -105
- transformers/models/qwen3_vl/processing_qwen3_vl.py +6 -42
- transformers/models/qwen3_vl/video_processing_qwen3_vl.py +10 -12
- transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +21 -25
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +80 -83
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +33 -36
- transformers/models/rag/configuration_rag.py +0 -1
- transformers/models/rag/modeling_rag.py +116 -118
- transformers/models/rag/retrieval_rag.py +2 -4
- transformers/models/rag/tokenization_rag.py +0 -50
- transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +21 -24
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +31 -34
- transformers/models/reformer/configuration_reformer.py +0 -1
- transformers/models/reformer/modeling_reformer.py +67 -68
- transformers/models/reformer/tokenization_reformer.py +3 -6
- transformers/models/regnet/configuration_regnet.py +0 -1
- transformers/models/regnet/modeling_regnet.py +7 -9
- transformers/models/rembert/configuration_rembert.py +0 -1
- transformers/models/rembert/modeling_rembert.py +108 -110
- transformers/models/rembert/tokenization_rembert.py +1 -4
- transformers/models/resnet/configuration_resnet.py +0 -1
- transformers/models/resnet/modeling_resnet.py +8 -10
- transformers/models/roberta/configuration_roberta.py +0 -1
- transformers/models/roberta/modeling_roberta.py +91 -93
- transformers/models/roberta/modular_roberta.py +55 -58
- transformers/models/roberta/tokenization_roberta.py +2 -5
- transformers/models/roberta/tokenization_roberta_old.py +2 -4
- transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +0 -1
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +91 -93
- transformers/models/roc_bert/configuration_roc_bert.py +0 -1
- transformers/models/roc_bert/modeling_roc_bert.py +119 -121
- transformers/models/roc_bert/tokenization_roc_bert.py +88 -94
- transformers/models/roformer/configuration_roformer.py +0 -1
- transformers/models/roformer/modeling_roformer.py +79 -81
- transformers/models/roformer/tokenization_roformer.py +3 -6
- transformers/models/roformer/tokenization_utils.py +0 -1
- transformers/models/rt_detr/configuration_rt_detr.py +0 -1
- transformers/models/rt_detr/configuration_rt_detr_resnet.py +0 -1
- transformers/models/rt_detr/image_processing_rt_detr.py +54 -55
- transformers/models/rt_detr/image_processing_rt_detr_fast.py +15 -15
- transformers/models/rt_detr/modeling_rt_detr.py +80 -82
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +2 -4
- transformers/models/rt_detr/modular_rt_detr.py +14 -14
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +0 -1
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +79 -81
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +2 -4
- transformers/models/rwkv/configuration_rwkv.py +0 -1
- transformers/models/rwkv/modeling_rwkv.py +29 -31
- transformers/models/sam/configuration_sam.py +0 -1
- transformers/models/sam/image_processing_sam.py +59 -60
- transformers/models/sam/image_processing_sam_fast.py +21 -22
- transformers/models/sam/modeling_sam.py +33 -35
- transformers/models/sam/processing_sam.py +39 -27
- transformers/models/sam2/configuration_sam2.py +0 -1
- transformers/models/sam2/image_processing_sam2_fast.py +14 -15
- transformers/models/sam2/modeling_sam2.py +45 -47
- transformers/models/sam2/modular_sam2.py +43 -44
- transformers/models/sam2/processing_sam2.py +31 -47
- transformers/models/sam2_video/configuration_sam2_video.py +0 -1
- transformers/models/sam2_video/modeling_sam2_video.py +69 -70
- transformers/models/sam2_video/modular_sam2_video.py +60 -79
- transformers/models/sam2_video/processing_sam2_video.py +49 -66
- transformers/models/sam2_video/video_processing_sam2_video.py +1 -4
- transformers/models/sam3/configuration_sam3.py +0 -1
- transformers/models/sam3/image_processing_sam3_fast.py +17 -20
- transformers/models/sam3/modeling_sam3.py +54 -56
- transformers/models/sam3/modular_sam3.py +3 -8
- transformers/models/sam3/processing_sam3.py +29 -48
- transformers/models/sam3_tracker/__init__.py +0 -1
- transformers/models/sam3_tracker/configuration_sam3_tracker.py +0 -1
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +34 -36
- transformers/models/sam3_tracker/modular_sam3_tracker.py +0 -1
- transformers/models/sam3_tracker/processing_sam3_tracker.py +31 -47
- transformers/models/sam3_tracker_video/__init__.py +0 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +0 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +70 -70
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +2 -4
- transformers/models/sam3_tracker_video/processing_sam3_tracker_video.py +50 -66
- transformers/models/sam3_video/configuration_sam3_video.py +0 -1
- transformers/models/sam3_video/modeling_sam3_video.py +29 -31
- transformers/models/sam3_video/processing_sam3_video.py +25 -45
- transformers/models/sam_hq/__init__.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +0 -1
- transformers/models/sam_hq/modeling_sam_hq.py +39 -41
- transformers/models/sam_hq/modular_sam_hq.py +17 -19
- transformers/models/sam_hq/{processing_samhq.py → processing_sam_hq.py} +39 -28
- transformers/models/seamless_m4t/configuration_seamless_m4t.py +0 -1
- transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py +8 -11
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +180 -182
- transformers/models/seamless_m4t/processing_seamless_m4t.py +18 -39
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +15 -20
- transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +0 -1
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +193 -195
- transformers/models/seed_oss/configuration_seed_oss.py +23 -25
- transformers/models/seed_oss/modeling_seed_oss.py +30 -32
- transformers/models/seed_oss/modular_seed_oss.py +3 -4
- transformers/models/segformer/configuration_segformer.py +0 -10
- transformers/models/segformer/image_processing_segformer.py +39 -42
- transformers/models/segformer/image_processing_segformer_fast.py +7 -8
- transformers/models/segformer/modeling_segformer.py +24 -26
- transformers/models/segformer/modular_segformer.py +5 -6
- transformers/models/seggpt/configuration_seggpt.py +0 -1
- transformers/models/seggpt/image_processing_seggpt.py +38 -41
- transformers/models/seggpt/modeling_seggpt.py +28 -30
- transformers/models/sew/configuration_sew.py +0 -1
- transformers/models/sew/modeling_sew.py +33 -35
- transformers/models/sew/modular_sew.py +10 -12
- transformers/models/sew_d/configuration_sew_d.py +0 -1
- transformers/models/sew_d/modeling_sew_d.py +28 -30
- transformers/models/shieldgemma2/configuration_shieldgemma2.py +0 -1
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +15 -17
- transformers/models/shieldgemma2/processing_shieldgemma2.py +3 -5
- transformers/models/siglip/configuration_siglip.py +0 -1
- transformers/models/siglip/image_processing_siglip.py +17 -20
- transformers/models/siglip/image_processing_siglip_fast.py +0 -1
- transformers/models/siglip/modeling_siglip.py +38 -39
- transformers/models/siglip/processing_siglip.py +2 -14
- transformers/models/siglip/tokenization_siglip.py +6 -7
- transformers/models/siglip2/configuration_siglip2.py +1 -1
- transformers/models/siglip2/image_processing_siglip2.py +15 -16
- transformers/models/siglip2/image_processing_siglip2_fast.py +4 -5
- transformers/models/siglip2/modeling_siglip2.py +54 -54
- transformers/models/siglip2/modular_siglip2.py +23 -25
- transformers/models/siglip2/processing_siglip2.py +2 -14
- transformers/models/smollm3/configuration_smollm3.py +23 -26
- transformers/models/smollm3/modeling_smollm3.py +31 -34
- transformers/models/smollm3/modular_smollm3.py +27 -29
- transformers/models/smolvlm/configuration_smolvlm.py +1 -1
- transformers/models/smolvlm/image_processing_smolvlm.py +42 -43
- transformers/models/smolvlm/image_processing_smolvlm_fast.py +12 -12
- transformers/models/smolvlm/modeling_smolvlm.py +51 -52
- transformers/models/smolvlm/modular_smolvlm.py +15 -17
- transformers/models/smolvlm/processing_smolvlm.py +15 -76
- transformers/models/smolvlm/video_processing_smolvlm.py +7 -8
- transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py +0 -1
- transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +20 -23
- transformers/models/speech_to_text/configuration_speech_to_text.py +0 -1
- transformers/models/speech_to_text/feature_extraction_speech_to_text.py +10 -13
- transformers/models/speech_to_text/modeling_speech_to_text.py +52 -54
- transformers/models/speech_to_text/processing_speech_to_text.py +4 -30
- transformers/models/speech_to_text/tokenization_speech_to_text.py +5 -6
- transformers/models/speecht5/configuration_speecht5.py +0 -1
- transformers/models/speecht5/feature_extraction_speecht5.py +16 -37
- transformers/models/speecht5/modeling_speecht5.py +172 -174
- transformers/models/speecht5/number_normalizer.py +0 -1
- transformers/models/speecht5/processing_speecht5.py +3 -37
- transformers/models/speecht5/tokenization_speecht5.py +4 -5
- transformers/models/splinter/configuration_splinter.py +0 -1
- transformers/models/splinter/modeling_splinter.py +54 -56
- transformers/models/splinter/tokenization_splinter.py +2 -4
- transformers/models/squeezebert/configuration_squeezebert.py +0 -1
- transformers/models/squeezebert/modeling_squeezebert.py +60 -62
- transformers/models/squeezebert/tokenization_squeezebert.py +0 -1
- transformers/models/stablelm/configuration_stablelm.py +20 -23
- transformers/models/stablelm/modeling_stablelm.py +39 -42
- transformers/models/starcoder2/configuration_starcoder2.py +19 -22
- transformers/models/starcoder2/modeling_starcoder2.py +33 -36
- transformers/models/starcoder2/modular_starcoder2.py +13 -15
- transformers/models/superglue/configuration_superglue.py +3 -3
- transformers/models/superglue/image_processing_superglue.py +15 -15
- transformers/models/superglue/image_processing_superglue_fast.py +4 -5
- transformers/models/superglue/modeling_superglue.py +32 -33
- transformers/models/superpoint/image_processing_superpoint.py +15 -15
- transformers/models/superpoint/image_processing_superpoint_fast.py +4 -5
- transformers/models/superpoint/modeling_superpoint.py +13 -14
- transformers/models/swiftformer/configuration_swiftformer.py +0 -1
- transformers/models/swiftformer/modeling_swiftformer.py +12 -14
- transformers/models/swin/configuration_swin.py +0 -1
- transformers/models/swin/modeling_swin.py +58 -70
- transformers/models/swin2sr/configuration_swin2sr.py +0 -1
- transformers/models/swin2sr/image_processing_swin2sr.py +10 -13
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +2 -5
- transformers/models/swin2sr/modeling_swin2sr.py +26 -28
- transformers/models/swinv2/configuration_swinv2.py +0 -1
- transformers/models/swinv2/modeling_swinv2.py +55 -67
- transformers/models/switch_transformers/configuration_switch_transformers.py +0 -1
- transformers/models/switch_transformers/modeling_switch_transformers.py +32 -33
- transformers/models/switch_transformers/modular_switch_transformers.py +29 -30
- transformers/models/t5/configuration_t5.py +0 -1
- transformers/models/t5/modeling_t5.py +75 -77
- transformers/models/t5/tokenization_t5.py +1 -3
- transformers/models/t5gemma/configuration_t5gemma.py +33 -34
- transformers/models/t5gemma/modeling_t5gemma.py +96 -99
- transformers/models/t5gemma/modular_t5gemma.py +117 -118
- transformers/models/t5gemma2/configuration_t5gemma2.py +53 -54
- transformers/models/t5gemma2/modeling_t5gemma2.py +96 -99
- transformers/models/t5gemma2/modular_t5gemma2.py +134 -135
- transformers/models/table_transformer/configuration_table_transformer.py +0 -1
- transformers/models/table_transformer/modeling_table_transformer.py +46 -48
- transformers/models/tapas/configuration_tapas.py +0 -1
- transformers/models/tapas/modeling_tapas.py +64 -66
- transformers/models/tapas/tokenization_tapas.py +115 -153
- transformers/models/textnet/configuration_textnet.py +0 -1
- transformers/models/textnet/image_processing_textnet.py +22 -25
- transformers/models/textnet/image_processing_textnet_fast.py +5 -6
- transformers/models/textnet/modeling_textnet.py +13 -14
- transformers/models/time_series_transformer/configuration_time_series_transformer.py +5 -8
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +79 -81
- transformers/models/timesfm/configuration_timesfm.py +0 -1
- transformers/models/timesfm/modeling_timesfm.py +17 -19
- transformers/models/timesfm/modular_timesfm.py +16 -18
- transformers/models/timesformer/configuration_timesformer.py +0 -1
- transformers/models/timesformer/modeling_timesformer.py +13 -16
- transformers/models/timm_backbone/configuration_timm_backbone.py +0 -1
- transformers/models/timm_backbone/modeling_timm_backbone.py +4 -6
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +2 -3
- transformers/models/timm_wrapper/image_processing_timm_wrapper.py +4 -5
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +13 -15
- transformers/models/trocr/configuration_trocr.py +0 -1
- transformers/models/trocr/modeling_trocr.py +38 -40
- transformers/models/trocr/processing_trocr.py +5 -25
- transformers/models/tvp/configuration_tvp.py +0 -1
- transformers/models/tvp/image_processing_tvp.py +50 -52
- transformers/models/tvp/image_processing_tvp_fast.py +9 -10
- transformers/models/tvp/modeling_tvp.py +25 -27
- transformers/models/tvp/processing_tvp.py +2 -14
- transformers/models/udop/configuration_udop.py +0 -1
- transformers/models/udop/modeling_udop.py +63 -66
- transformers/models/udop/processing_udop.py +7 -26
- transformers/models/udop/tokenization_udop.py +80 -93
- transformers/models/umt5/configuration_umt5.py +0 -1
- transformers/models/umt5/modeling_umt5.py +80 -81
- transformers/models/unispeech/configuration_unispeech.py +0 -1
- transformers/models/unispeech/modeling_unispeech.py +47 -49
- transformers/models/unispeech/modular_unispeech.py +20 -22
- transformers/models/unispeech_sat/configuration_unispeech_sat.py +0 -1
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +63 -65
- transformers/models/unispeech_sat/modular_unispeech_sat.py +21 -23
- transformers/models/univnet/feature_extraction_univnet.py +14 -14
- transformers/models/univnet/modeling_univnet.py +7 -8
- transformers/models/upernet/configuration_upernet.py +0 -1
- transformers/models/upernet/modeling_upernet.py +10 -13
- transformers/models/vaultgemma/__init__.py +0 -1
- transformers/models/vaultgemma/configuration_vaultgemma.py +24 -26
- transformers/models/vaultgemma/modeling_vaultgemma.py +34 -36
- transformers/models/vaultgemma/modular_vaultgemma.py +29 -31
- transformers/models/video_llama_3/image_processing_video_llama_3.py +40 -40
- transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +8 -8
- transformers/models/video_llama_3/modeling_video_llama_3.py +66 -66
- transformers/models/video_llama_3/modular_video_llama_3.py +101 -112
- transformers/models/video_llama_3/processing_video_llama_3.py +5 -39
- transformers/models/video_llama_3/video_processing_video_llama_3.py +18 -18
- transformers/models/video_llava/configuration_video_llava.py +0 -1
- transformers/models/video_llava/image_processing_video_llava.py +35 -38
- transformers/models/video_llava/modeling_video_llava.py +52 -54
- transformers/models/video_llava/processing_video_llava.py +38 -78
- transformers/models/video_llava/video_processing_video_llava.py +0 -1
- transformers/models/videomae/configuration_videomae.py +0 -1
- transformers/models/videomae/image_processing_videomae.py +31 -34
- transformers/models/videomae/modeling_videomae.py +13 -15
- transformers/models/videomae/video_processing_videomae.py +0 -1
- transformers/models/vilt/configuration_vilt.py +0 -1
- transformers/models/vilt/image_processing_vilt.py +29 -30
- transformers/models/vilt/image_processing_vilt_fast.py +9 -10
- transformers/models/vilt/modeling_vilt.py +76 -78
- transformers/models/vilt/processing_vilt.py +2 -14
- transformers/models/vipllava/configuration_vipllava.py +0 -1
- transformers/models/vipllava/modeling_vipllava.py +38 -39
- transformers/models/vipllava/modular_vipllava.py +30 -32
- transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +0 -1
- transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +18 -21
- transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py +0 -1
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +18 -21
- transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py +2 -16
- transformers/models/visual_bert/configuration_visual_bert.py +0 -1
- transformers/models/visual_bert/modeling_visual_bert.py +90 -92
- transformers/models/vit/configuration_vit.py +0 -1
- transformers/models/vit/image_processing_vit.py +19 -22
- transformers/models/vit/image_processing_vit_fast.py +0 -1
- transformers/models/vit/modeling_vit.py +13 -15
- transformers/models/vit_mae/configuration_vit_mae.py +0 -1
- transformers/models/vit_mae/modeling_vit_mae.py +21 -23
- transformers/models/vit_msn/configuration_vit_msn.py +0 -1
- transformers/models/vit_msn/modeling_vit_msn.py +10 -12
- transformers/models/vitdet/configuration_vitdet.py +0 -1
- transformers/models/vitdet/modeling_vitdet.py +12 -14
- transformers/models/vitmatte/configuration_vitmatte.py +1 -4
- transformers/models/vitmatte/image_processing_vitmatte.py +15 -18
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +14 -15
- transformers/models/vitmatte/modeling_vitmatte.py +9 -11
- transformers/models/vitpose/configuration_vitpose.py +3 -6
- transformers/models/vitpose/image_processing_vitpose.py +24 -25
- transformers/models/vitpose/image_processing_vitpose_fast.py +9 -10
- transformers/models/vitpose/modeling_vitpose.py +10 -12
- transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +0 -1
- transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +8 -10
- transformers/models/vits/configuration_vits.py +0 -1
- transformers/models/vits/modeling_vits.py +34 -35
- transformers/models/vits/tokenization_vits.py +3 -4
- transformers/models/vivit/configuration_vivit.py +0 -1
- transformers/models/vivit/image_processing_vivit.py +36 -39
- transformers/models/vivit/modeling_vivit.py +5 -7
- transformers/models/vjepa2/__init__.py +0 -1
- transformers/models/vjepa2/configuration_vjepa2.py +0 -1
- transformers/models/vjepa2/modeling_vjepa2.py +30 -32
- transformers/models/vjepa2/video_processing_vjepa2.py +0 -1
- transformers/models/voxtral/__init__.py +0 -1
- transformers/models/voxtral/configuration_voxtral.py +0 -1
- transformers/models/voxtral/modeling_voxtral.py +17 -25
- transformers/models/voxtral/modular_voxtral.py +10 -19
- transformers/models/voxtral/processing_voxtral.py +25 -48
- transformers/models/wav2vec2/configuration_wav2vec2.py +0 -1
- transformers/models/wav2vec2/feature_extraction_wav2vec2.py +7 -10
- transformers/models/wav2vec2/modeling_wav2vec2.py +67 -122
- transformers/models/wav2vec2/processing_wav2vec2.py +6 -35
- transformers/models/wav2vec2/tokenization_wav2vec2.py +20 -332
- transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +0 -1
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +49 -52
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +45 -48
- transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +6 -35
- transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +0 -1
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +62 -65
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +15 -18
- transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +16 -17
- transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +36 -55
- transformers/models/wavlm/configuration_wavlm.py +0 -1
- transformers/models/wavlm/modeling_wavlm.py +45 -48
- transformers/models/wavlm/modular_wavlm.py +4 -5
- transformers/models/whisper/configuration_whisper.py +0 -1
- transformers/models/whisper/english_normalizer.py +3 -4
- transformers/models/whisper/feature_extraction_whisper.py +9 -24
- transformers/models/whisper/generation_whisper.py +26 -48
- transformers/models/whisper/modeling_whisper.py +68 -70
- transformers/models/whisper/processing_whisper.py +3 -20
- transformers/models/whisper/tokenization_whisper.py +9 -30
- transformers/models/x_clip/configuration_x_clip.py +0 -1
- transformers/models/x_clip/modeling_x_clip.py +68 -69
- transformers/models/x_clip/processing_x_clip.py +2 -14
- transformers/models/xcodec/configuration_xcodec.py +4 -6
- transformers/models/xcodec/modeling_xcodec.py +15 -17
- transformers/models/xglm/configuration_xglm.py +0 -1
- transformers/models/xglm/modeling_xglm.py +49 -55
- transformers/models/xglm/tokenization_xglm.py +1 -4
- transformers/models/xlm/configuration_xlm.py +0 -1
- transformers/models/xlm/modeling_xlm.py +126 -130
- transformers/models/xlm/tokenization_xlm.py +3 -5
- transformers/models/xlm_roberta/configuration_xlm_roberta.py +0 -1
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +90 -92
- transformers/models/xlm_roberta/modular_xlm_roberta.py +50 -53
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +1 -4
- transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +0 -1
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +91 -93
- transformers/models/xlm_roberta_xl/modular_xlm_roberta_xl.py +67 -70
- transformers/models/xlnet/configuration_xlnet.py +0 -11
- transformers/models/xlnet/modeling_xlnet.py +149 -162
- transformers/models/xlnet/tokenization_xlnet.py +1 -4
- transformers/models/xlstm/configuration_xlstm.py +3 -5
- transformers/models/xlstm/modeling_xlstm.py +62 -65
- transformers/models/xmod/configuration_xmod.py +0 -1
- transformers/models/xmod/modeling_xmod.py +98 -100
- transformers/models/yolos/configuration_yolos.py +0 -1
- transformers/models/yolos/image_processing_yolos.py +60 -62
- transformers/models/yolos/image_processing_yolos_fast.py +18 -18
- transformers/models/yolos/modeling_yolos.py +12 -14
- transformers/models/yolos/modular_yolos.py +2 -4
- transformers/models/yoso/configuration_yoso.py +0 -1
- transformers/models/yoso/modeling_yoso.py +60 -62
- transformers/models/zamba/configuration_zamba.py +0 -1
- transformers/models/zamba/modeling_zamba.py +68 -69
- transformers/models/zamba2/configuration_zamba2.py +36 -37
- transformers/models/zamba2/modeling_zamba2.py +84 -87
- transformers/models/zamba2/modular_zamba2.py +43 -45
- transformers/models/zoedepth/configuration_zoedepth.py +0 -1
- transformers/models/zoedepth/image_processing_zoedepth.py +28 -29
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +11 -12
- transformers/models/zoedepth/modeling_zoedepth.py +14 -16
- transformers/pipelines/__init__.py +50 -49
- transformers/pipelines/any_to_any.py +14 -22
- transformers/pipelines/audio_utils.py +1 -2
- transformers/pipelines/base.py +12 -16
- transformers/pipelines/deprecated/__init__.py +0 -1
- transformers/pipelines/image_text_to_text.py +0 -1
- transformers/pipelines/image_to_text.py +4 -44
- transformers/pipelines/question_answering.py +4 -43
- transformers/pipelines/text_classification.py +1 -14
- transformers/pipelines/token_classification.py +1 -22
- transformers/pipelines/video_classification.py +1 -9
- transformers/pipelines/zero_shot_audio_classification.py +0 -1
- transformers/pipelines/zero_shot_classification.py +0 -6
- transformers/pipelines/zero_shot_image_classification.py +0 -7
- transformers/processing_utils.py +95 -95
- transformers/quantizers/base.py +10 -0
- transformers/quantizers/quantizer_quark.py +0 -1
- transformers/quantizers/quantizer_torchao.py +3 -3
- transformers/testing_utils.py +3 -37
- transformers/tokenization_mistral_common.py +554 -903
- transformers/tokenization_utils_base.py +109 -122
- transformers/tokenization_utils_sentencepiece.py +5 -6
- transformers/tokenization_utils_tokenizers.py +5 -5
- transformers/trainer.py +6 -9
- transformers/trainer_jit_checkpoint.py +1 -2
- transformers/training_args.py +3 -3
- transformers/utils/attention_visualizer.py +1 -1
- transformers/utils/auto_docstring.py +564 -12
- transformers/utils/doc.py +1 -1
- transformers/utils/dummy_pt_objects.py +0 -42
- transformers/utils/generic.py +1 -1
- transformers/utils/loading_report.py +3 -3
- transformers/utils/quantization_config.py +8 -10
- transformers/video_processing_utils.py +19 -20
- transformers/video_utils.py +18 -22
- {transformers-5.0.0rc2.dist-info → transformers-5.0.0rc3.dist-info}/METADATA +19 -19
- transformers-5.0.0rc3.dist-info/RECORD +2067 -0
- transformers-5.0.0rc2.dist-info/RECORD +0 -2042
- {transformers-5.0.0rc2.dist-info → transformers-5.0.0rc3.dist-info}/WHEEL +0 -0
- {transformers-5.0.0rc2.dist-info → transformers-5.0.0rc3.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc2.dist-info → transformers-5.0.0rc3.dist-info}/licenses/LICENSE +0 -0
- {transformers-5.0.0rc2.dist-info → transformers-5.0.0rc3.dist-info}/top_level.txt +0 -0
|
@@ -14,39 +14,39 @@
|
|
|
14
14
|
import os
|
|
15
15
|
import re
|
|
16
16
|
import shutil
|
|
17
|
-
import
|
|
18
|
-
from collections.abc import Callable, Mapping, Sized
|
|
17
|
+
from collections.abc import Callable, Sequence
|
|
19
18
|
from enum import Enum
|
|
20
19
|
from pathlib import Path
|
|
21
|
-
from typing import Any, Union, overload
|
|
20
|
+
from typing import Any, Literal, Union, overload
|
|
22
21
|
|
|
23
22
|
import numpy as np
|
|
24
23
|
from huggingface_hub import create_repo
|
|
25
24
|
|
|
26
25
|
from transformers.audio_utils import load_audio_as
|
|
27
26
|
from transformers.tokenization_utils_base import (
|
|
28
|
-
LARGE_INTEGER,
|
|
29
27
|
VERY_LARGE_INTEGER,
|
|
28
|
+
AddedToken,
|
|
30
29
|
BatchEncoding,
|
|
31
30
|
EncodedInput,
|
|
32
31
|
PreTokenizedInput,
|
|
32
|
+
PreTrainedTokenizerBase,
|
|
33
33
|
TextInput,
|
|
34
34
|
TruncationStrategy,
|
|
35
35
|
)
|
|
36
36
|
from transformers.utils import PaddingStrategy, TensorType, add_end_docstrings, logging, to_py_obj
|
|
37
|
-
from transformers.utils.generic import is_torch_tensor
|
|
38
|
-
from transformers.utils.hub import PushToHubMixin
|
|
39
37
|
from transformers.utils.import_utils import is_mistral_common_available, is_torch_available, requires
|
|
40
38
|
|
|
41
39
|
|
|
42
40
|
if is_mistral_common_available():
|
|
43
41
|
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
|
44
42
|
from mistral_common.protocol.instruct.validator import ValidationMode
|
|
45
|
-
from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy,
|
|
46
|
-
from mistral_common.tokens.tokenizers.image import MultiModalVersion
|
|
43
|
+
from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy, SpecialTokens
|
|
47
44
|
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
|
48
45
|
from mistral_common.tokens.tokenizers.tekken import Tekkenizer
|
|
49
|
-
from mistral_common.tokens.tokenizers.utils import
|
|
46
|
+
from mistral_common.tokens.tokenizers.utils import (
|
|
47
|
+
download_tokenizer_from_hf_hub,
|
|
48
|
+
get_one_valid_tokenizer_file,
|
|
49
|
+
)
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
if is_torch_available():
|
|
@@ -103,6 +103,10 @@ ENCODE_KWARGS_DOCSTRING = r"""
|
|
|
103
103
|
"""
|
|
104
104
|
|
|
105
105
|
ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
|
|
106
|
+
return_token_type_ids (`bool`, *optional*):
|
|
107
|
+
Whether to return token type IDs. For `MistralCommonBackend` it returns a list of zeros of the sequence length as only one sequence is supported.
|
|
108
|
+
|
|
109
|
+
[What are token type IDs?](../glossary#token-type-ids)
|
|
106
110
|
return_attention_mask (`bool`, *optional*):
|
|
107
111
|
Whether to return the attention mask. If left to the default, will return the attention mask according
|
|
108
112
|
to the specific tokenizer's default, defined by the `return_outputs` attribute.
|
|
@@ -118,6 +122,8 @@ ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
|
|
|
118
122
|
Whether or not to return the lengths of the encoded inputs.
|
|
119
123
|
verbose (`bool`, *optional*, defaults to `True`):
|
|
120
124
|
Whether or not to print more information and warnings.
|
|
125
|
+
return_offsets_mapping (`Literal[False]`, *optional*): False, kept to match Transformers' signature.
|
|
126
|
+
split_special_tokens (`Literal[False]`, *optional*): False, kept to match Transformers' signature.
|
|
121
127
|
**kwargs: passed to the `self.tokenize()` method
|
|
122
128
|
|
|
123
129
|
Return:
|
|
@@ -149,8 +155,35 @@ class MistralTokenizerType(str, Enum):
|
|
|
149
155
|
tekken = "tekken"
|
|
150
156
|
|
|
151
157
|
|
|
158
|
+
@overload
|
|
159
|
+
def _maybe_remove_lang(text: str, skip_special_tokens: bool) -> str: ...
|
|
160
|
+
@overload
|
|
161
|
+
def _maybe_remove_lang(text: list[str], skip_special_tokens: bool) -> list[str]: ...
|
|
162
|
+
def _maybe_remove_lang(text: str | list[str], skip_special_tokens: bool) -> str | list[str]:
|
|
163
|
+
# in the specific case of Voxtral, the added f"lang:xx" (always a two char language code since it follows ISO 639-1 alpha-2 format)
|
|
164
|
+
# is not considered as a special token by mistral-common and is encoded/ decoded as normal text.
|
|
165
|
+
# Nevertheless we should remove it to ease users life.
|
|
166
|
+
if not skip_special_tokens:
|
|
167
|
+
return text
|
|
168
|
+
|
|
169
|
+
if isinstance(text, str):
|
|
170
|
+
return re.sub(r"^lang:[a-z]{2}", "", text)
|
|
171
|
+
|
|
172
|
+
return [re.sub(r"^lang:[a-z]{2}", "", string) for string in text]
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
_MAP_SPECIAL_TOKENS = {
|
|
176
|
+
"bos_token": SpecialTokens.bos.value,
|
|
177
|
+
"eos_token": SpecialTokens.eos.value,
|
|
178
|
+
"pad_token": SpecialTokens.pad.value,
|
|
179
|
+
"unk_token": SpecialTokens.unk.value,
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
_VALID_INIT_KWARGS = {"_from_auto", "backend", "files_loaded"}
|
|
183
|
+
|
|
184
|
+
|
|
152
185
|
@requires(backends=("mistral-common",))
|
|
153
|
-
class MistralCommonBackend(
|
|
186
|
+
class MistralCommonBackend(PreTrainedTokenizerBase):
|
|
154
187
|
"""
|
|
155
188
|
Class to wrap `mistral-common` tokenizers.
|
|
156
189
|
|
|
@@ -165,34 +198,13 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
165
198
|
For more info on `mistral-common`, see [mistral-common](https://github.com/mistralai/mistral-common).
|
|
166
199
|
|
|
167
200
|
This class is a wrapper around a `mistral_common.tokens.tokenizers.mistral.MistralTokenizer`.
|
|
168
|
-
It provides a Hugging Face compatible interface to tokenize using the official mistral-common tokenizer.
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
-
|
|
173
|
-
This is a lossy conversion for Tekkenizer as some decoding errors are collapsed into the same token.
|
|
174
|
-
- [`~MistralCommonBackend.encode`]: Encode a string to a list of integers.
|
|
175
|
-
- [`~MistralCommonBackend.decode`]: Decode a list of integers to a string.
|
|
176
|
-
- [`~MistralCommonBackend.batch_decode`]: Decode a batch of list of integers to a list of strings.
|
|
177
|
-
- [`~MistralCommonBackend.convert_tokens_to_ids`]: Convert a list of tokens to a list of integers.
|
|
178
|
-
- [`~MistralCommonBackend.convert_ids_to_tokens`]: Convert a list of integers to a list of tokens.
|
|
179
|
-
- [`~MistralCommonBackend.tokenize`]: Tokenize a string.
|
|
180
|
-
- [`~MistralCommonBackend.get_special_tokens_mask`]: Get the special tokens mask for a list of tokens.
|
|
181
|
-
- [`~MistralCommonBackend.prepare_for_model`]: Prepare a list of inputs for the model.
|
|
182
|
-
- [`~MistralCommonBackend.pad`]: Pad a list of inputs to the same length.
|
|
183
|
-
- [`~MistralCommonBackend.truncate_sequences`]: Truncate a list of sequences to the same length.
|
|
184
|
-
- [`~MistralCommonBackend.apply_chat_template`]: Apply a chat template to a list of messages.
|
|
185
|
-
- [`~MistralCommonBackend.__call__`]: Tokenize a string or a list of strings.
|
|
186
|
-
- [`~MistralCommonBackend.from_pretrained`]: Download and cache a pretrained tokenizer from the Hugging Face model hub or local directory.
|
|
187
|
-
- [`~MistralCommonBackend.save_pretrained`]: Save a tokenizer to a directory, so it can be reloaded using the `from_pretrained` class method.
|
|
188
|
-
- [`~MistralCommonBackend.push_to_hub`]: Upload tokenizer to the Hugging Face model hub.
|
|
189
|
-
|
|
190
|
-
Here are the key differences with the `PreTrainedTokenizerBase` class:
|
|
191
|
-
|
|
192
|
-
- Pair of sequences are not supported. The signature have been kept for compatibility but all arguments related to pair of sequences are ignored. The return values of pairs are returned as `None`.
|
|
201
|
+
It provides a Hugging Face compatible interface to tokenize using the official mistral-common tokenizer and inherits from the `PreTrainedTokenizerBase` class.
|
|
202
|
+
|
|
203
|
+
Here are the key behavior differences with the `PythonBackend` class:
|
|
204
|
+
|
|
205
|
+
- Pair of sequences are not supported. The signature has been kept for compatibility but all arguments related to pair of sequences are ignored. The return values for pairs are returned as `None`.
|
|
193
206
|
- The `is_split_into_words` argument is not supported.
|
|
194
|
-
-
|
|
195
|
-
- It is not possible to add new tokens to the tokenizer. Also the special tokens are handled differently from Transformers. In `mistral-common`, special tokens are never encoded directly. This means that: `tokenizer.encode("<s>")` will not return the ID of the `<s>` token. Instead, it will return a list of IDs corresponding to the tokenization of the string `"<s>"`. For more information, see the [mistral-common documentation](https://mistralai.github.io/mistral-common/usage/tokenizers/#special-tokens).
|
|
207
|
+
- It is not possible to add new tokens to the tokenizer. Special tokens are handled differently from Transformers. In `mistral-common`, special tokens are never encoded directly. This means that: `tokenizer.encode("<s>")` will not return the ID of the `<s>` token. Instead, it will return a list of IDs corresponding to the tokenization of the string `"<s>"`. For more information, see the [mistral-common documentation](https://mistralai.github.io/mistral-common/usage/tokenizers/#special-tokens).
|
|
196
208
|
|
|
197
209
|
If you have suggestions to improve this class, please open an issue on the [mistral-common GitHub repository](https://github.com/mistralai/mistral-common/issues) if it is related to the tokenizer or on the [Transformers GitHub repository](https://github.com/huggingface/transformers/issues) if it is related to the Hugging Face interface.
|
|
198
210
|
"""
|
|
@@ -200,6 +212,12 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
200
212
|
model_input_names: list[str] = ["input_ids", "attention_mask"]
|
|
201
213
|
padding_side: str = "left"
|
|
202
214
|
truncation_side: str = "right"
|
|
215
|
+
SPECIAL_TOKENS_ATTRIBUTES = [
|
|
216
|
+
"bos_token",
|
|
217
|
+
"eos_token",
|
|
218
|
+
"unk_token",
|
|
219
|
+
"pad_token",
|
|
220
|
+
]
|
|
203
221
|
|
|
204
222
|
def __init__(
|
|
205
223
|
self,
|
|
@@ -226,7 +244,7 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
226
244
|
Path to the tokenizer file to load the `MistralTokenizer`.
|
|
227
245
|
mode (`Union[str, ValidationMode]`, *optional*, defaults to `ValidationMode.test`):
|
|
228
246
|
The mode to use for the tokenizer. This will be passed to the `MistralTokenizer` constructor. Possible values are:
|
|
229
|
-
- `"finetuning"` or `ValidationMode.finetuning`: The
|
|
247
|
+
- `"finetuning"` or `ValidationMode.finetuning`: The fine-tuning mode.
|
|
230
248
|
- `"test"` or `ValidationMode.test`: The test mode.
|
|
231
249
|
It changes how the tokenizer validates the input and prepares the request to the model.
|
|
232
250
|
model_max_length (`int`, *optional*):
|
|
@@ -240,60 +258,40 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
240
258
|
truncation_side (`str`, *optional*):
|
|
241
259
|
The side on which the model should have truncation applied. Should be selected between ['right', 'left'].
|
|
242
260
|
Default value is picked from the class attribute of the same name.
|
|
243
|
-
model_input_names (`List[
|
|
261
|
+
model_input_names (`List[str]`, *optional*):
|
|
244
262
|
The list of inputs accepted by the forward pass of the model (like `"token_type_ids"` or
|
|
245
263
|
`"attention_mask"`). Default value is picked from the class attribute of the same name.
|
|
246
264
|
clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
|
|
247
|
-
Whether or not the model should
|
|
265
|
+
Whether or not the model should clean up the spaces that were added when splitting the input text during the
|
|
248
266
|
tokenization process.
|
|
249
267
|
"""
|
|
250
|
-
if kwargs:
|
|
268
|
+
if kwargs and not set(kwargs.keys()).issubset(_VALID_INIT_KWARGS):
|
|
251
269
|
raise ValueError(f"Kwargs {list(kwargs.keys())} are not supported to init `MistralCommonBackend`.")
|
|
252
270
|
|
|
253
271
|
self._tokenizer_path = Path(tokenizer_path)
|
|
254
272
|
self._mode = self._get_validation_mode(mode)
|
|
273
|
+
|
|
255
274
|
self.tokenizer: MistralTokenizer = MistralTokenizer.from_file(str(self._tokenizer_path), mode=self._mode)
|
|
256
275
|
self._tokenizer_type = (
|
|
257
276
|
MistralTokenizerType.tekken
|
|
258
277
|
if isinstance(self.tokenizer.instruct_tokenizer.tokenizer, Tekkenizer)
|
|
259
278
|
else MistralTokenizerType.spm
|
|
260
279
|
)
|
|
261
|
-
self.truncation_side = truncation_side
|
|
262
|
-
self.padding_side = padding_side
|
|
263
|
-
self.model_max_length = model_max_length
|
|
264
|
-
self.cleanup_tokenization_spaces = clean_up_tokenization_spaces
|
|
265
|
-
self.deprecation_warnings = {} # Use to store when we have already noticed a deprecation warning (avoid overlogging).
|
|
266
|
-
self._all_special_tokens_ids = self._get_all_special_ids()
|
|
267
|
-
|
|
268
|
-
if model_input_names is not None:
|
|
269
|
-
if (
|
|
270
|
-
not isinstance(model_input_names, (list, tuple))
|
|
271
|
-
and len(model_input_names) == 0
|
|
272
|
-
and not all(isinstance(i, str) for i in model_input_names)
|
|
273
|
-
):
|
|
274
|
-
raise ValueError(
|
|
275
|
-
"`model_input_names` should be a non-empty list or tuple of str but got an empty value."
|
|
276
|
-
)
|
|
277
|
-
self.model_input_names = model_input_names
|
|
278
|
-
|
|
279
280
|
self._cache_get_vocab: dict[str, int] | None = None
|
|
280
281
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
.replace(" 's", "'s")
|
|
295
|
-
.replace(" 've", "'ve")
|
|
296
|
-
.replace(" 're", "'re")
|
|
282
|
+
self._all_special_ids = self._get_all_special_ids()
|
|
283
|
+
self._all_special_tokens = self.convert_ids_to_tokens(self.all_special_ids)
|
|
284
|
+
|
|
285
|
+
super().__init__(
|
|
286
|
+
truncation_side=truncation_side,
|
|
287
|
+
padding_side=padding_side,
|
|
288
|
+
model_max_length=model_max_length,
|
|
289
|
+
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
|
290
|
+
extra_special_tokens=None, # Not used by this backend.
|
|
291
|
+
model_specific_special_tokens=None, # Not used by this backend.
|
|
292
|
+
model_input_names=model_input_names or self.model_input_names,
|
|
293
|
+
**_MAP_SPECIAL_TOKENS,
|
|
294
|
+
**kwargs,
|
|
297
295
|
)
|
|
298
296
|
|
|
299
297
|
@property
|
|
@@ -306,75 +304,19 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
306
304
|
"""
|
|
307
305
|
return self._mode
|
|
308
306
|
|
|
309
|
-
@property
|
|
310
|
-
def bos_token_id(self) -> int:
|
|
311
|
-
"""
|
|
312
|
-
Id of the beginning of sentence token in the vocabulary.
|
|
313
|
-
"""
|
|
314
|
-
return self.tokenizer.instruct_tokenizer.tokenizer.bos_id
|
|
315
|
-
|
|
316
|
-
@property
|
|
317
|
-
def eos_token_id(self) -> int:
|
|
318
|
-
"""
|
|
319
|
-
Id of the end of sentence token in the vocabulary.
|
|
320
|
-
"""
|
|
321
|
-
return self.tokenizer.instruct_tokenizer.tokenizer.eos_id
|
|
322
|
-
|
|
323
|
-
@property
|
|
324
|
-
def unk_token_id(self) -> int:
|
|
325
|
-
"""
|
|
326
|
-
Id of the unknown token in the vocabulary.
|
|
327
|
-
"""
|
|
328
|
-
return self.tokenizer.instruct_tokenizer.tokenizer.unk_id
|
|
329
|
-
|
|
330
|
-
@property
|
|
331
|
-
def pad_token_id(self) -> int:
|
|
332
|
-
"""
|
|
333
|
-
Id of the padding token in the vocabulary.
|
|
334
|
-
"""
|
|
335
|
-
return self.tokenizer.instruct_tokenizer.tokenizer.pad_id
|
|
336
|
-
|
|
337
|
-
@property
|
|
338
|
-
def bos_token(self) -> str:
|
|
339
|
-
"""
|
|
340
|
-
String associated to the beginning of sentence token in the vocabulary.
|
|
341
|
-
"""
|
|
342
|
-
return self.convert_ids_to_tokens(self.bos_token_id)
|
|
343
|
-
|
|
344
|
-
@property
|
|
345
|
-
def eos_token(self) -> str:
|
|
346
|
-
"""
|
|
347
|
-
String associated to the end of sentence token in the vocabulary.
|
|
348
|
-
"""
|
|
349
|
-
return self.convert_ids_to_tokens(self.eos_token_id)
|
|
350
|
-
|
|
351
|
-
@property
|
|
352
|
-
def unk_token(self) -> str:
|
|
353
|
-
"""
|
|
354
|
-
String associated to the unknown token in the vocabulary.
|
|
355
|
-
"""
|
|
356
|
-
return self.convert_ids_to_tokens(self.unk_token_id)
|
|
357
|
-
|
|
358
|
-
@property
|
|
359
|
-
def pad_token(self) -> str:
|
|
360
|
-
"""
|
|
361
|
-
String associated to the padding token in the vocabulary.
|
|
362
|
-
"""
|
|
363
|
-
return self.convert_ids_to_tokens(self.pad_token_id)
|
|
364
|
-
|
|
365
307
|
@property
|
|
366
308
|
def all_special_ids(self) -> list[int]:
|
|
367
309
|
"""
|
|
368
310
|
`list[int]`: List the ids of the special tokens(`'<unk>'`, `'<cls>'`, etc.).
|
|
369
311
|
"""
|
|
370
|
-
return sorted(self.
|
|
312
|
+
return sorted(self._all_special_ids)
|
|
371
313
|
|
|
372
314
|
@property
|
|
373
315
|
def all_special_tokens(self) -> list[str]:
|
|
374
316
|
"""
|
|
375
317
|
`list[str]`: A list of all unique special tokens.
|
|
376
318
|
"""
|
|
377
|
-
return self.
|
|
319
|
+
return self._all_special_tokens
|
|
378
320
|
|
|
379
321
|
@property
|
|
380
322
|
def vocab_size(self) -> int:
|
|
@@ -435,6 +377,8 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
435
377
|
padding_side: str | None = None,
|
|
436
378
|
return_tensors: str | TensorType | None = None,
|
|
437
379
|
verbose: bool = True,
|
|
380
|
+
return_offsets_mapping: Literal[False] = False,
|
|
381
|
+
split_special_tokens: Literal[False] = False,
|
|
438
382
|
**kwargs,
|
|
439
383
|
) -> list[int]:
|
|
440
384
|
"""
|
|
@@ -446,37 +390,81 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
446
390
|
text_pair (`None`, *optional*):
|
|
447
391
|
Not supported by `MistralCommonBackend.encode`. Kept to match `PreTrainedTokenizerBase.encode` signature.
|
|
448
392
|
"""
|
|
393
|
+
if return_offsets_mapping or split_special_tokens:
|
|
394
|
+
raise ValueError(
|
|
395
|
+
"`MistralCommonBackend` does not support `return_offsets_mapping` and `split_special_tokens`."
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
if truncation in [TruncationStrategy.ONLY_FIRST, TruncationStrategy.ONLY_SECOND, "only_first", "only_second"]:
|
|
399
|
+
raise ValueError(
|
|
400
|
+
"Truncation strategy `only_first` and `only_second` are not supported by `MistralCommonBackend`."
|
|
401
|
+
)
|
|
402
|
+
|
|
449
403
|
if kwargs:
|
|
450
404
|
raise ValueError(f"Kwargs {list(kwargs.keys())} are not supported by `MistralCommonBackend.encode`.")
|
|
405
|
+
|
|
451
406
|
if text_pair:
|
|
452
407
|
raise ValueError("`MistralCommonBackend.encode` does not support `text_pair`.")
|
|
453
408
|
|
|
454
|
-
|
|
409
|
+
return super().encode(
|
|
410
|
+
text=text,
|
|
411
|
+
text_pair=text_pair,
|
|
412
|
+
add_special_tokens=add_special_tokens,
|
|
455
413
|
padding=padding,
|
|
456
414
|
truncation=truncation,
|
|
457
415
|
max_length=max_length,
|
|
458
|
-
pad_to_multiple_of=pad_to_multiple_of,
|
|
459
|
-
verbose=verbose,
|
|
460
|
-
)
|
|
461
|
-
|
|
462
|
-
encoded_inputs = self._encode_plus(
|
|
463
|
-
text,
|
|
464
|
-
add_special_tokens=add_special_tokens,
|
|
465
|
-
padding_strategy=padding_strategy,
|
|
466
|
-
truncation_strategy=truncation_strategy,
|
|
467
|
-
max_length=max_length,
|
|
468
416
|
stride=stride,
|
|
417
|
+
return_tensors=return_tensors,
|
|
469
418
|
pad_to_multiple_of=pad_to_multiple_of,
|
|
470
419
|
padding_side=padding_side,
|
|
471
|
-
return_tensors=return_tensors,
|
|
472
|
-
return_attention_mask=False,
|
|
473
|
-
return_overflowing_tokens=False,
|
|
474
|
-
return_special_tokens_mask=False,
|
|
475
|
-
return_length=False,
|
|
476
420
|
verbose=verbose,
|
|
477
421
|
)
|
|
478
422
|
|
|
479
|
-
|
|
423
|
+
def _decode(
|
|
424
|
+
self,
|
|
425
|
+
token_ids: int | list[int],
|
|
426
|
+
skip_special_tokens: bool = False,
|
|
427
|
+
clean_up_tokenization_spaces: bool | None = None,
|
|
428
|
+
**kwargs,
|
|
429
|
+
) -> str:
|
|
430
|
+
if kwargs:
|
|
431
|
+
raise ValueError(f"Kwargs {list(kwargs.keys())} are not supported by `MistralCommonBackend.decode`.")
|
|
432
|
+
|
|
433
|
+
token_ids = to_py_obj(token_ids)
|
|
434
|
+
|
|
435
|
+
if isinstance(token_ids, int):
|
|
436
|
+
token_ids = [token_ids]
|
|
437
|
+
|
|
438
|
+
special_token_policy = SpecialTokenPolicy.IGNORE if skip_special_tokens else SpecialTokenPolicy.KEEP
|
|
439
|
+
|
|
440
|
+
text = self.tokenizer.decode(token_ids, special_token_policy=special_token_policy)
|
|
441
|
+
|
|
442
|
+
# Apply tokenizer-specific cleanup if available and requested
|
|
443
|
+
clean_up_tokenization_spaces = (
|
|
444
|
+
clean_up_tokenization_spaces
|
|
445
|
+
if clean_up_tokenization_spaces is not None
|
|
446
|
+
else self.clean_up_tokenization_spaces
|
|
447
|
+
)
|
|
448
|
+
if clean_up_tokenization_spaces:
|
|
449
|
+
# Call custom cleanup method if it exists (e.g., for CLVP's [SPACE] token replacement)
|
|
450
|
+
if hasattr(self, "clean_up_tokenization") and callable(self.clean_up_tokenization):
|
|
451
|
+
text = self.clean_up_tokenization(text)
|
|
452
|
+
else:
|
|
453
|
+
# Otherwise apply standard cleanup
|
|
454
|
+
text = (
|
|
455
|
+
text.replace(" .", ".")
|
|
456
|
+
.replace(" ?", "?")
|
|
457
|
+
.replace(" !", "!")
|
|
458
|
+
.replace(" ,", ",")
|
|
459
|
+
.replace(" ' ", "'")
|
|
460
|
+
.replace(" n't", "n't")
|
|
461
|
+
.replace(" 'm", "'m")
|
|
462
|
+
.replace(" 's", "'s")
|
|
463
|
+
.replace(" 've", "'ve")
|
|
464
|
+
.replace(" 're", "'re")
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
return _maybe_remove_lang(text=text, skip_special_tokens=skip_special_tokens)
|
|
480
468
|
|
|
481
469
|
def decode(
|
|
482
470
|
self,
|
|
@@ -484,7 +472,7 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
484
472
|
skip_special_tokens: bool = False,
|
|
485
473
|
clean_up_tokenization_spaces: bool | None = None,
|
|
486
474
|
**kwargs,
|
|
487
|
-
) ->
|
|
475
|
+
) -> str | list[str]:
|
|
488
476
|
"""
|
|
489
477
|
Converts a sequence of ids in a string, using the tokenizer and vocabulary with options to remove special
|
|
490
478
|
tokens and clean up tokenization spaces.
|
|
@@ -509,16 +497,7 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
509
497
|
if kwargs:
|
|
510
498
|
raise ValueError(f"Kwargs {list(kwargs.keys())} are not supported by `MistralCommonBackend.decode`.")
|
|
511
499
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
if isinstance(token_ids, (list, tuple)) and len(token_ids) > 0 and isinstance(token_ids[0], (list, tuple)):
|
|
515
|
-
return self._batch_decode(
|
|
516
|
-
sequences=token_ids,
|
|
517
|
-
skip_special_tokens=skip_special_tokens,
|
|
518
|
-
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
|
519
|
-
)
|
|
520
|
-
|
|
521
|
-
return self._decode(
|
|
500
|
+
return super().decode(
|
|
522
501
|
token_ids=token_ids,
|
|
523
502
|
skip_special_tokens=skip_special_tokens,
|
|
524
503
|
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
|
@@ -555,63 +534,12 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
555
534
|
if kwargs:
|
|
556
535
|
raise ValueError(f"Kwargs {list(kwargs.keys())} are not supported by `MistralCommonBackend.batch_decode`.")
|
|
557
536
|
|
|
558
|
-
return
|
|
537
|
+
return super().batch_decode(
|
|
559
538
|
sequences=sequences,
|
|
560
539
|
skip_special_tokens=skip_special_tokens,
|
|
561
540
|
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
|
562
541
|
)
|
|
563
542
|
|
|
564
|
-
def _decode(
|
|
565
|
-
self,
|
|
566
|
-
token_ids: Union[int, list[int], list[list[int]], np.ndarray, "torch.Tensor"],
|
|
567
|
-
skip_special_tokens: bool = False,
|
|
568
|
-
clean_up_tokenization_spaces: bool | None = None,
|
|
569
|
-
) -> str:
|
|
570
|
-
clean_up_tokenization_spaces = clean_up_tokenization_spaces or self.cleanup_tokenization_spaces
|
|
571
|
-
|
|
572
|
-
# Convert inputs to python lists
|
|
573
|
-
if isinstance(token_ids, int):
|
|
574
|
-
token_ids = [token_ids]
|
|
575
|
-
|
|
576
|
-
token_ids = to_py_obj(token_ids)
|
|
577
|
-
|
|
578
|
-
special_token_policy = SpecialTokenPolicy.IGNORE if skip_special_tokens else SpecialTokenPolicy.KEEP
|
|
579
|
-
|
|
580
|
-
decoded_string = self.tokenizer.decode(token_ids, special_token_policy=special_token_policy)
|
|
581
|
-
if clean_up_tokenization_spaces:
|
|
582
|
-
decoded_string = self.clean_up_tokenization(decoded_string)
|
|
583
|
-
|
|
584
|
-
# in the specific case of Voxtral, the added f"lang:xx" (always a two char language code since it follows ISO 639-1 alpha-2 format)
|
|
585
|
-
# is not considered as a special token by mistral-common and is encoded/ decoded as normal text.
|
|
586
|
-
# Nevertheless we should remove it to ease users life.
|
|
587
|
-
if skip_special_tokens:
|
|
588
|
-
decoded_string = re.sub(r"^lang:[a-z]{2}", "", decoded_string)
|
|
589
|
-
|
|
590
|
-
return decoded_string
|
|
591
|
-
|
|
592
|
-
def _batch_decode(
|
|
593
|
-
self,
|
|
594
|
-
sequences: Union[list[int], list[list[int]], np.ndarray, "torch.Tensor"],
|
|
595
|
-
skip_special_tokens: bool = False,
|
|
596
|
-
clean_up_tokenization_spaces: bool | None = None,
|
|
597
|
-
) -> list[str]:
|
|
598
|
-
return [
|
|
599
|
-
self._decode(
|
|
600
|
-
seq,
|
|
601
|
-
skip_special_tokens=skip_special_tokens,
|
|
602
|
-
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
|
603
|
-
)
|
|
604
|
-
for seq in sequences
|
|
605
|
-
]
|
|
606
|
-
|
|
607
|
-
def _is_control_token(self, token_id: int) -> bool:
|
|
608
|
-
if self._tokenizer_type == MistralTokenizerType.spm:
|
|
609
|
-
return token_id in self.tokenizer.instruct_tokenizer.tokenizer._control_tokens
|
|
610
|
-
elif self._tokenizer_type == MistralTokenizerType.tekken:
|
|
611
|
-
return token_id < self.tokenizer.instruct_tokenizer.tokenizer.num_special_tokens
|
|
612
|
-
else:
|
|
613
|
-
raise ValueError(f"Unknown tokenizer type: {self._tokenizer_type}")
|
|
614
|
-
|
|
615
543
|
@overload
|
|
616
544
|
def convert_ids_to_tokens(self, ids: int, skip_special_tokens: bool = False) -> str: ...
|
|
617
545
|
@overload
|
|
@@ -632,22 +560,22 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
632
560
|
"""
|
|
633
561
|
|
|
634
562
|
if isinstance(ids, int):
|
|
635
|
-
|
|
563
|
+
return_int = True
|
|
636
564
|
ids = [ids]
|
|
637
565
|
else:
|
|
638
|
-
|
|
566
|
+
return_int = False
|
|
639
567
|
|
|
640
568
|
tokens: list[str] = []
|
|
641
569
|
for token_id in ids:
|
|
642
|
-
if self.
|
|
570
|
+
if self.tokenizer.instruct_tokenizer.tokenizer.is_special(token_id) and skip_special_tokens:
|
|
643
571
|
continue
|
|
644
572
|
tokens.append(self.tokenizer.instruct_tokenizer.tokenizer.id_to_piece(token_id))
|
|
645
573
|
|
|
646
|
-
if
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
574
|
+
if return_int and tokens == []:
|
|
575
|
+
raise ValueError(f"Invalid token id {ids[0]}.")
|
|
576
|
+
elif return_int:
|
|
650
577
|
return tokens[0]
|
|
578
|
+
|
|
651
579
|
return tokens
|
|
652
580
|
|
|
653
581
|
def _tekken_piece_to_id(self, piece: str, warn: bool) -> int:
|
|
@@ -708,7 +636,13 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
708
636
|
tokens_ids = self.tokenizer.instruct_tokenizer.tokenizer.encode(text, bos=add_special_tokens, eos=add_eos)
|
|
709
637
|
return tokens_ids
|
|
710
638
|
|
|
711
|
-
def tokenize(
|
|
639
|
+
def tokenize(
|
|
640
|
+
self,
|
|
641
|
+
text: TextInput,
|
|
642
|
+
return_offsets_mapping: Literal[False] = False,
|
|
643
|
+
split_special_tokens: Literal[False] = False,
|
|
644
|
+
**kwargs,
|
|
645
|
+
) -> list[str]:
|
|
712
646
|
"""
|
|
713
647
|
Converts a string into a sequence of tokens, using the tokenizer.
|
|
714
648
|
|
|
@@ -717,6 +651,8 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
717
651
|
Args:
|
|
718
652
|
text (`str`):
|
|
719
653
|
The sequence to be encoded.
|
|
654
|
+
return_offsets_mapping (`Literal[False]`, *optional*): False, kept to match Transformers' signature.
|
|
655
|
+
split_special_tokens (`Literal[False]`, *optional*): False, kept to match Transformers' signature.
|
|
720
656
|
**kwargs (additional keyword arguments):
|
|
721
657
|
Not supported by `MistralCommonBackend.tokenize`.
|
|
722
658
|
Will raise an error if used.
|
|
@@ -724,40 +660,164 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
724
660
|
Returns:
|
|
725
661
|
`list[str]`: The list of tokens.
|
|
726
662
|
"""
|
|
663
|
+
if return_offsets_mapping or split_special_tokens:
|
|
664
|
+
raise ValueError(
|
|
665
|
+
"`MistralCommonBackend` does not support `return_offsets_mapping` and `split_special_tokens`."
|
|
666
|
+
)
|
|
667
|
+
|
|
727
668
|
if kwargs:
|
|
728
669
|
raise ValueError(f"Kwargs {list(kwargs.keys())} are not supported by `MistralCommonBackend.tokenize`.")
|
|
729
670
|
|
|
730
671
|
return self.convert_ids_to_tokens(self._text_to_ids(text, add_special_tokens=False), skip_special_tokens=False)
|
|
731
672
|
|
|
732
|
-
def
|
|
673
|
+
def _get_all_special_ids(self) -> set[int]:
|
|
674
|
+
if self._tokenizer_type == MistralTokenizerType.tekken:
|
|
675
|
+
return self.tokenizer.instruct_tokenizer.tokenizer._special_token_ids
|
|
676
|
+
elif self._tokenizer_type == MistralTokenizerType.spm:
|
|
677
|
+
return {
|
|
678
|
+
token_id
|
|
679
|
+
for token_id in range(self.tokenizer.instruct_tokenizer.tokenizer.n_words)
|
|
680
|
+
if self.tokenizer.instruct_tokenizer.tokenizer.is_special(token_id)
|
|
681
|
+
}
|
|
682
|
+
else:
|
|
683
|
+
raise ValueError(f"Unknown tokenizer type: {self._tokenizer_type}")
|
|
684
|
+
|
|
685
|
+
def get_special_tokens_mask(
|
|
686
|
+
self, token_ids_0: list[int], token_ids_1: None = None, already_has_special_tokens: bool = False
|
|
687
|
+
) -> list[int]:
|
|
688
|
+
"""
|
|
689
|
+
Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
|
|
690
|
+
special tokens using the tokenizer `prepare_for_model` or `encode_plus` methods.
|
|
691
|
+
|
|
692
|
+
Args:
|
|
693
|
+
token_ids_0 (`list[int]`): List of ids of the sequence.
|
|
694
|
+
token_ids_1 (`None`, *optional*): None, kept to match Transformers' implementation.
|
|
695
|
+
already_has_special_tokens (`bool`, *optional*, defaults to `False`):
|
|
696
|
+
Whether or not the token list is already formatted with special tokens for the model.
|
|
697
|
+
|
|
698
|
+
Returns:
|
|
699
|
+
A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
|
|
700
|
+
"""
|
|
701
|
+
if token_ids_1 is not None:
|
|
702
|
+
raise ValueError(
|
|
703
|
+
"`token_ids_1` is not supported by `MistralCommonBackend` and should be `None`, kept for compatibility."
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
if already_has_special_tokens:
|
|
707
|
+
return [1 if int(token_id) in self._all_special_ids else 0 for token_id in token_ids_0]
|
|
708
|
+
|
|
709
|
+
if self.mode == ValidationMode.test:
|
|
710
|
+
# [BOS] seq0
|
|
711
|
+
return [1] + ([0] * len(token_ids_0))
|
|
712
|
+
else:
|
|
713
|
+
# [BOS] seq0 [EOS]
|
|
714
|
+
return [1] + ([0] * len(token_ids_0)) + [1]
|
|
715
|
+
|
|
716
|
+
def _encode_plus( # type: ignore[override]
|
|
733
717
|
self,
|
|
734
|
-
text: TextInput | EncodedInput,
|
|
718
|
+
text: TextInput | PreTokenizedInput | EncodedInput,
|
|
719
|
+
text_pair: None = None,
|
|
735
720
|
add_special_tokens: bool = True,
|
|
736
721
|
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
|
737
722
|
truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
|
|
738
723
|
max_length: int | None = None,
|
|
739
724
|
stride: int = 0,
|
|
725
|
+
is_split_into_words: bool = False,
|
|
740
726
|
pad_to_multiple_of: int | None = None,
|
|
741
727
|
padding_side: str | None = None,
|
|
742
728
|
return_tensors: str | TensorType | None = None,
|
|
729
|
+
return_token_type_ids: bool | None = None,
|
|
743
730
|
return_attention_mask: bool | None = None,
|
|
744
731
|
return_overflowing_tokens: bool = False,
|
|
745
732
|
return_special_tokens_mask: bool = False,
|
|
746
733
|
return_length: bool = False,
|
|
747
734
|
verbose: bool = True,
|
|
735
|
+
return_offsets_mapping: Literal[False] = False,
|
|
736
|
+
split_special_tokens: Literal[False] = False,
|
|
737
|
+
**kwargs,
|
|
748
738
|
) -> BatchEncoding:
|
|
739
|
+
# Detect batched inputs (list of sequences)
|
|
740
|
+
if text_pair is not None:
|
|
741
|
+
raise ValueError("`MistralCommonBackend` does not support `text_pair != None` for `_encode_plus`.")
|
|
742
|
+
|
|
743
|
+
if return_offsets_mapping or split_special_tokens:
|
|
744
|
+
raise ValueError(
|
|
745
|
+
"`MistralCommonBackend` does not support `return_offsets_mapping` and `split_special_tokens`."
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
if kwargs:
|
|
749
|
+
raise ValueError(f"Kwargs {list(kwargs.keys())} are not supported by `MistralCommonBackend._encode_plus`.")
|
|
750
|
+
|
|
751
|
+
is_batched = isinstance(text, (list, tuple)) and (
|
|
752
|
+
(not text and not is_split_into_words)
|
|
753
|
+
or (text and is_split_into_words and isinstance(text[0], (list, tuple)))
|
|
754
|
+
or (text and not is_split_into_words and isinstance(text[0], (str, list, tuple)))
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
if is_batched:
|
|
758
|
+
batch_outputs = {}
|
|
759
|
+
one_overflowed = False
|
|
760
|
+
for current_text in text:
|
|
761
|
+
current_output = self._encode_plus(
|
|
762
|
+
text=current_text,
|
|
763
|
+
text_pair=None,
|
|
764
|
+
add_special_tokens=add_special_tokens,
|
|
765
|
+
padding_strategy=PaddingStrategy.DO_NOT_PAD, # we pad in batch afterward
|
|
766
|
+
truncation_strategy=truncation_strategy,
|
|
767
|
+
max_length=max_length,
|
|
768
|
+
stride=stride,
|
|
769
|
+
is_split_into_words=is_split_into_words,
|
|
770
|
+
pad_to_multiple_of=None, # we pad in batch afterward
|
|
771
|
+
padding_side=None, # we pad in batch afterward
|
|
772
|
+
return_tensors=None, # We convert the whole batch to tensors at the end
|
|
773
|
+
return_token_type_ids=return_token_type_ids,
|
|
774
|
+
return_attention_mask=False, # we pad in batch afterward
|
|
775
|
+
return_overflowing_tokens=return_overflowing_tokens,
|
|
776
|
+
return_special_tokens_mask=return_special_tokens_mask,
|
|
777
|
+
return_length=return_length,
|
|
778
|
+
verbose=verbose,
|
|
779
|
+
)
|
|
780
|
+
for key, value in current_output.items():
|
|
781
|
+
batch_outputs.setdefault(key, []).append(value)
|
|
782
|
+
|
|
783
|
+
# To ensure the list is built for each sample, we need to add this.
|
|
784
|
+
if return_overflowing_tokens and not return_tensors:
|
|
785
|
+
if "overflowing_tokens" not in current_output:
|
|
786
|
+
batch_outputs.setdefault("overflowing_tokens", []).append([0])
|
|
787
|
+
batch_outputs.setdefault("num_truncated_tokens", []).append([0])
|
|
788
|
+
else:
|
|
789
|
+
one_overflowed = True
|
|
790
|
+
|
|
791
|
+
# Remove overflow-related keys before tensor conversion if return_tensors is set
|
|
792
|
+
# Slow tokenizers don't support returning these as tensors
|
|
793
|
+
if return_overflowing_tokens and (return_tensors or not one_overflowed):
|
|
794
|
+
batch_outputs.pop("overflowing_tokens", None)
|
|
795
|
+
batch_outputs.pop("num_truncated_tokens", None)
|
|
796
|
+
|
|
797
|
+
batch_outputs = self.pad(
|
|
798
|
+
batch_outputs,
|
|
799
|
+
padding=padding_strategy.value,
|
|
800
|
+
max_length=max_length,
|
|
801
|
+
pad_to_multiple_of=pad_to_multiple_of,
|
|
802
|
+
padding_side=padding_side,
|
|
803
|
+
return_attention_mask=return_attention_mask,
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
return BatchEncoding(batch_outputs, tensor_type=return_tensors)
|
|
807
|
+
|
|
749
808
|
def get_input_ids(text):
|
|
750
809
|
if isinstance(text, str):
|
|
751
|
-
return self._text_to_ids(text,
|
|
810
|
+
return self._text_to_ids(text, False)
|
|
752
811
|
elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], int):
|
|
753
812
|
return text
|
|
754
813
|
else:
|
|
755
814
|
raise ValueError(f"Input {text} is not valid. Should be a string, or a list/tuple of integers.")
|
|
756
815
|
|
|
757
|
-
|
|
816
|
+
first_ids = get_input_ids(text)
|
|
758
817
|
|
|
759
818
|
return self.prepare_for_model(
|
|
760
|
-
|
|
819
|
+
first_ids,
|
|
820
|
+
pair_ids=None,
|
|
761
821
|
add_special_tokens=add_special_tokens,
|
|
762
822
|
padding=padding_strategy.value,
|
|
763
823
|
truncation=truncation_strategy.value,
|
|
@@ -768,202 +828,62 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
768
828
|
return_tensors=return_tensors,
|
|
769
829
|
prepend_batch_axis=True,
|
|
770
830
|
return_attention_mask=return_attention_mask,
|
|
831
|
+
return_token_type_ids=return_token_type_ids,
|
|
771
832
|
return_overflowing_tokens=return_overflowing_tokens,
|
|
772
833
|
return_special_tokens_mask=return_special_tokens_mask,
|
|
773
834
|
return_length=return_length,
|
|
774
835
|
verbose=verbose,
|
|
775
836
|
)
|
|
776
837
|
|
|
777
|
-
|
|
838
|
+
@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
|
|
839
|
+
def prepare_for_model(
|
|
778
840
|
self,
|
|
779
|
-
|
|
841
|
+
ids: list[int],
|
|
842
|
+
pair_ids: None = None,
|
|
780
843
|
add_special_tokens: bool = True,
|
|
781
|
-
|
|
782
|
-
|
|
844
|
+
padding: bool | str | PaddingStrategy = False,
|
|
845
|
+
truncation: bool | str | TruncationStrategy | None = None,
|
|
783
846
|
max_length: int | None = None,
|
|
784
847
|
stride: int = 0,
|
|
785
848
|
pad_to_multiple_of: int | None = None,
|
|
786
849
|
padding_side: str | None = None,
|
|
787
850
|
return_tensors: str | TensorType | None = None,
|
|
851
|
+
return_token_type_ids: bool | None = None,
|
|
788
852
|
return_attention_mask: bool | None = None,
|
|
789
853
|
return_overflowing_tokens: bool = False,
|
|
790
854
|
return_special_tokens_mask: bool = False,
|
|
791
855
|
return_length: bool = False,
|
|
792
856
|
verbose: bool = True,
|
|
857
|
+
prepend_batch_axis: bool = False,
|
|
858
|
+
return_offsets_mapping: Literal[False] = False,
|
|
859
|
+
split_special_tokens: Literal[False] = False,
|
|
860
|
+
**kwargs,
|
|
793
861
|
) -> BatchEncoding:
|
|
794
|
-
def get_input_ids(text):
|
|
795
|
-
if isinstance(text, str):
|
|
796
|
-
return self._text_to_ids(text, add_special_tokens)
|
|
797
|
-
elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], int):
|
|
798
|
-
return text
|
|
799
|
-
else:
|
|
800
|
-
raise ValueError("Input is not valid. Should be a string or a list/tuple of integers.")
|
|
801
|
-
|
|
802
|
-
input_ids = []
|
|
803
|
-
for ids in batch_text:
|
|
804
|
-
input_ids.append(get_input_ids(ids))
|
|
805
|
-
|
|
806
|
-
batch_outputs = self._batch_prepare_for_model(
|
|
807
|
-
input_ids,
|
|
808
|
-
add_special_tokens=add_special_tokens,
|
|
809
|
-
padding_strategy=padding_strategy,
|
|
810
|
-
truncation_strategy=truncation_strategy,
|
|
811
|
-
max_length=max_length,
|
|
812
|
-
stride=stride,
|
|
813
|
-
pad_to_multiple_of=pad_to_multiple_of,
|
|
814
|
-
padding_side=padding_side,
|
|
815
|
-
return_attention_mask=return_attention_mask,
|
|
816
|
-
return_overflowing_tokens=return_overflowing_tokens,
|
|
817
|
-
return_special_tokens_mask=return_special_tokens_mask,
|
|
818
|
-
return_length=return_length,
|
|
819
|
-
return_tensors=return_tensors,
|
|
820
|
-
verbose=verbose,
|
|
821
|
-
)
|
|
822
|
-
|
|
823
|
-
return BatchEncoding(batch_outputs)
|
|
824
|
-
|
|
825
|
-
def _get_all_special_ids(self) -> set[int]:
|
|
826
|
-
if self._tokenizer_type == MistralTokenizerType.tekken:
|
|
827
|
-
return {t["rank"] for t in self.tokenizer.instruct_tokenizer.tokenizer._all_special_tokens}
|
|
828
|
-
elif self._tokenizer_type == MistralTokenizerType.spm:
|
|
829
|
-
return self.tokenizer.instruct_tokenizer.tokenizer._control_tokens
|
|
830
|
-
else:
|
|
831
|
-
raise ValueError(f"Unknown tokenizer type: {self._tokenizer_type}")
|
|
832
|
-
|
|
833
|
-
def get_special_tokens_mask(
|
|
834
|
-
self, token_ids_0: list, token_ids_1: None = None, already_has_special_tokens: bool = False
|
|
835
|
-
) -> list[int]:
|
|
836
862
|
"""
|
|
837
|
-
|
|
838
|
-
special tokens
|
|
863
|
+
Prepares a sequence of input id so that it can be used by the model. It
|
|
864
|
+
adds special tokens, truncates sequences if overflowing while taking into account the special tokens and
|
|
865
|
+
manages a moving window (with user defined stride) for overflowing tokens.
|
|
839
866
|
|
|
840
867
|
Args:
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
868
|
+
ids (`list[int]`):
|
|
869
|
+
Tokenized input ids of the first sequence.
|
|
870
|
+
pair_ids (`None`, *optional*):
|
|
844
871
|
Not supported by `MistralCommonBackend`. Kept to match the interface of `PreTrainedTokenizerBase`.
|
|
845
|
-
already_has_special_tokens (`bool`, *optional*, defaults to `False`):
|
|
846
|
-
Whether or not the token list is already formatted with special tokens for the model.
|
|
847
|
-
|
|
848
|
-
Returns:
|
|
849
|
-
A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
|
|
850
872
|
"""
|
|
851
|
-
if
|
|
873
|
+
if return_offsets_mapping or split_special_tokens:
|
|
852
874
|
raise ValueError(
|
|
853
|
-
"`
|
|
875
|
+
"`MistralCommonBackend` does not support `return_offsets_mapping` and `split_special_tokens`."
|
|
854
876
|
)
|
|
855
|
-
|
|
877
|
+
|
|
878
|
+
if pair_ids is not None:
|
|
856
879
|
raise ValueError(
|
|
857
|
-
"`
|
|
880
|
+
"`pair_ids` is not supported by `MistralCommonBackend` and should be `None`, kept for compatibility."
|
|
858
881
|
)
|
|
859
882
|
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
self,
|
|
865
|
-
batch_ids: list[PreTokenizedInput | list[int]],
|
|
866
|
-
add_special_tokens: bool = True,
|
|
867
|
-
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
|
868
|
-
truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
|
|
869
|
-
max_length: int | None = None,
|
|
870
|
-
stride: int = 0,
|
|
871
|
-
pad_to_multiple_of: int | None = None,
|
|
872
|
-
padding_side: str | None = None,
|
|
873
|
-
return_tensors: str | None = None,
|
|
874
|
-
return_attention_mask: bool | None = None,
|
|
875
|
-
return_overflowing_tokens: bool = False,
|
|
876
|
-
return_special_tokens_mask: bool = False,
|
|
877
|
-
return_length: bool = False,
|
|
878
|
-
verbose: bool = True,
|
|
879
|
-
) -> BatchEncoding:
|
|
880
|
-
"""
|
|
881
|
-
Prepares a sequence of input id so that it can be used by the model. It
|
|
882
|
-
adds special tokens, truncates sequences if overflowing while taking into account the special tokens and
|
|
883
|
-
manages a moving window (with user defined stride) for overflowing tokens.
|
|
884
|
-
|
|
885
|
-
Args:
|
|
886
|
-
batch_ids: list of tokenized input ids
|
|
887
|
-
"""
|
|
888
|
-
|
|
889
|
-
batch_outputs = {}
|
|
890
|
-
for ids in batch_ids:
|
|
891
|
-
outputs = self.prepare_for_model(
|
|
892
|
-
ids,
|
|
893
|
-
add_special_tokens=add_special_tokens,
|
|
894
|
-
padding=PaddingStrategy.DO_NOT_PAD.value, # we pad in batch afterward
|
|
895
|
-
truncation=truncation_strategy.value,
|
|
896
|
-
max_length=max_length,
|
|
897
|
-
stride=stride,
|
|
898
|
-
pad_to_multiple_of=None, # we pad in batch afterward
|
|
899
|
-
padding_side=None, # we pad in batch afterward
|
|
900
|
-
return_attention_mask=False, # we pad in batch afterward
|
|
901
|
-
return_overflowing_tokens=return_overflowing_tokens,
|
|
902
|
-
return_special_tokens_mask=return_special_tokens_mask,
|
|
903
|
-
return_length=return_length,
|
|
904
|
-
return_tensors=None, # We convert the whole batch to tensors at the end
|
|
905
|
-
prepend_batch_axis=False,
|
|
906
|
-
verbose=verbose,
|
|
907
|
-
)
|
|
908
|
-
|
|
909
|
-
for key, value in outputs.items():
|
|
910
|
-
if key not in batch_outputs:
|
|
911
|
-
batch_outputs[key] = []
|
|
912
|
-
batch_outputs[key].append(value)
|
|
913
|
-
|
|
914
|
-
batch_outputs = self.pad(
|
|
915
|
-
batch_outputs,
|
|
916
|
-
padding=padding_strategy.value,
|
|
917
|
-
max_length=max_length,
|
|
918
|
-
pad_to_multiple_of=pad_to_multiple_of,
|
|
919
|
-
padding_side=padding_side,
|
|
920
|
-
return_attention_mask=return_attention_mask,
|
|
921
|
-
)
|
|
922
|
-
|
|
923
|
-
batch_outputs = BatchEncoding(batch_outputs, tensor_type=return_tensors)
|
|
924
|
-
|
|
925
|
-
return batch_outputs
|
|
926
|
-
|
|
927
|
-
@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
|
|
928
|
-
def prepare_for_model(
|
|
929
|
-
self,
|
|
930
|
-
ids: list[int],
|
|
931
|
-
pair_ids: None = None,
|
|
932
|
-
add_special_tokens: bool = True,
|
|
933
|
-
padding: bool | str | PaddingStrategy = False,
|
|
934
|
-
truncation: bool | str | TruncationStrategy | None = None,
|
|
935
|
-
max_length: int | None = None,
|
|
936
|
-
stride: int = 0,
|
|
937
|
-
pad_to_multiple_of: int | None = None,
|
|
938
|
-
padding_side: str | None = None,
|
|
939
|
-
return_tensors: str | TensorType | None = None,
|
|
940
|
-
return_attention_mask: bool | None = None,
|
|
941
|
-
return_overflowing_tokens: bool = False,
|
|
942
|
-
return_special_tokens_mask: bool = False,
|
|
943
|
-
return_length: bool = False,
|
|
944
|
-
verbose: bool = True,
|
|
945
|
-
prepend_batch_axis: bool = False,
|
|
946
|
-
**kwargs,
|
|
947
|
-
) -> BatchEncoding:
|
|
948
|
-
"""
|
|
949
|
-
Prepares a sequence of input id so that it can be used by the model. It
|
|
950
|
-
adds special tokens, truncates sequences if overflowing while taking into account the special tokens and
|
|
951
|
-
manages a moving window (with user defined stride) for overflowing tokens.
|
|
952
|
-
|
|
953
|
-
Args:
|
|
954
|
-
ids (`list[int]`):
|
|
955
|
-
Tokenized input ids of the first sequence.
|
|
956
|
-
pair_ids (`None`, *optional*):
|
|
957
|
-
Not supported by `MistralCommonBackend`. Kept to match the interface of `PreTrainedTokenizerBase`.
|
|
958
|
-
"""
|
|
959
|
-
if pair_ids is not None:
|
|
960
|
-
raise ValueError(
|
|
961
|
-
"`pair_ids` is not supported by `MistralCommonBackend` and should be `None`, kept for compatibility."
|
|
962
|
-
)
|
|
963
|
-
if kwargs:
|
|
964
|
-
raise ValueError(
|
|
965
|
-
f"Kwargs {list(kwargs.keys())} are not supported by `MistralCommonBackend.prepare_for_model`."
|
|
966
|
-
)
|
|
883
|
+
if kwargs:
|
|
884
|
+
raise ValueError(
|
|
885
|
+
f"Kwargs {list(kwargs.keys())} are not supported by `MistralCommonBackend.prepare_for_model`."
|
|
886
|
+
)
|
|
967
887
|
|
|
968
888
|
padding_strategy, truncation_strategy, max_length, _ = self._get_padding_truncation_strategies(
|
|
969
889
|
padding=padding,
|
|
@@ -971,39 +891,65 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
971
891
|
max_length=max_length,
|
|
972
892
|
pad_to_multiple_of=pad_to_multiple_of,
|
|
973
893
|
verbose=verbose,
|
|
894
|
+
**kwargs,
|
|
974
895
|
)
|
|
975
896
|
|
|
976
|
-
|
|
897
|
+
# Validation
|
|
898
|
+
if (
|
|
899
|
+
return_overflowing_tokens
|
|
900
|
+
and truncation_strategy == TruncationStrategy.LONGEST_FIRST
|
|
901
|
+
and pair_ids is not None
|
|
902
|
+
):
|
|
903
|
+
raise ValueError(
|
|
904
|
+
"Not possible to return overflowing tokens for pair of sequences with the "
|
|
905
|
+
"`longest_first`. Please select another truncation strategy than `longest_first`, "
|
|
906
|
+
"for instance `only_second` or `only_first`."
|
|
907
|
+
)
|
|
977
908
|
|
|
978
|
-
#
|
|
909
|
+
# Defaults
|
|
910
|
+
if return_token_type_ids is None:
|
|
911
|
+
return_token_type_ids = "token_type_ids" in self.model_input_names
|
|
979
912
|
if return_attention_mask is None:
|
|
980
913
|
return_attention_mask = "attention_mask" in self.model_input_names
|
|
981
914
|
|
|
982
|
-
|
|
915
|
+
# Truncation
|
|
916
|
+
num_special = self.num_special_tokens_to_add(pair=False) if add_special_tokens else 0
|
|
917
|
+
total_len = len(ids) + len(pair_ids or []) + num_special
|
|
983
918
|
|
|
984
|
-
# Truncation: Handle max sequence length
|
|
985
919
|
overflowing_tokens = []
|
|
986
|
-
if truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE and max_length and
|
|
920
|
+
if truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE and max_length and total_len > max_length:
|
|
987
921
|
ids, _, overflowing_tokens = self.truncate_sequences(
|
|
988
922
|
ids,
|
|
989
|
-
|
|
923
|
+
pair_ids=None,
|
|
924
|
+
num_tokens_to_remove=total_len - max_length,
|
|
990
925
|
truncation_strategy=truncation_strategy,
|
|
991
926
|
stride=stride,
|
|
992
927
|
)
|
|
993
928
|
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
929
|
+
# Add special tokens
|
|
930
|
+
if add_special_tokens:
|
|
931
|
+
sequence = self.build_inputs_with_special_tokens(ids, None)
|
|
932
|
+
token_type_ids = self.create_token_type_ids_from_sequences(ids, None)
|
|
933
|
+
else:
|
|
934
|
+
sequence = ids
|
|
935
|
+
token_type_ids = [0] * len(sequence)
|
|
997
936
|
|
|
998
|
-
# Build output
|
|
999
|
-
encoded_inputs
|
|
937
|
+
# Build output
|
|
938
|
+
encoded_inputs = {"input_ids": sequence}
|
|
939
|
+
if return_token_type_ids:
|
|
940
|
+
encoded_inputs["token_type_ids"] = token_type_ids
|
|
1000
941
|
if return_special_tokens_mask:
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
942
|
+
encoded_inputs["special_tokens_mask"] = (
|
|
943
|
+
self.get_special_tokens_mask(ids, None) if add_special_tokens else [0] * len(sequence)
|
|
944
|
+
)
|
|
945
|
+
if return_overflowing_tokens and not return_tensors and overflowing_tokens:
|
|
946
|
+
encoded_inputs["overflowing_tokens"] = overflowing_tokens
|
|
947
|
+
encoded_inputs["num_truncated_tokens"] = total_len - max_length if max_length else 0
|
|
948
|
+
|
|
949
|
+
# Check sequence length and warn if needed
|
|
950
|
+
self._eventual_warn_about_too_long_sequence(encoded_inputs["input_ids"], max_length, verbose)
|
|
1005
951
|
|
|
1006
|
-
#
|
|
952
|
+
# Pad
|
|
1007
953
|
if padding_strategy != PaddingStrategy.DO_NOT_PAD or return_attention_mask:
|
|
1008
954
|
encoded_inputs = self.pad(
|
|
1009
955
|
encoded_inputs,
|
|
@@ -1017,362 +963,9 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1017
963
|
if return_length:
|
|
1018
964
|
encoded_inputs["length"] = len(encoded_inputs["input_ids"])
|
|
1019
965
|
|
|
1020
|
-
|
|
1021
|
-
encoded_inputs, tensor_type=return_tensors, prepend_batch_axis=prepend_batch_axis
|
|
1022
|
-
)
|
|
1023
|
-
|
|
1024
|
-
return batch_outputs
|
|
1025
|
-
|
|
1026
|
-
def _get_padding_truncation_strategies(
|
|
1027
|
-
self,
|
|
1028
|
-
padding: str | PaddingStrategy | bool = False,
|
|
1029
|
-
truncation: str | TruncationStrategy | bool | None = None,
|
|
1030
|
-
max_length: int | None = None,
|
|
1031
|
-
pad_to_multiple_of: int | None = None,
|
|
1032
|
-
verbose: bool = True,
|
|
1033
|
-
**kwargs,
|
|
1034
|
-
):
|
|
1035
|
-
"""
|
|
1036
|
-
Find the correct padding/truncation strategy.
|
|
1037
|
-
"""
|
|
1038
|
-
|
|
1039
|
-
# Backward compatibility for previous behavior, maybe we should deprecate it:
|
|
1040
|
-
# If you only set max_length, it activates truncation for max_length
|
|
1041
|
-
if max_length is not None and padding is False and truncation is None:
|
|
1042
|
-
if verbose:
|
|
1043
|
-
if not self.deprecation_warnings.get("Truncation-not-explicitly-activated", False):
|
|
1044
|
-
logger.warning(
|
|
1045
|
-
"Truncation was not explicitly activated but `max_length` is provided a specific value, please"
|
|
1046
|
-
" use `truncation=True` to explicitly truncate examples to max length. Defaulting to"
|
|
1047
|
-
" 'longest_first' truncation strategy."
|
|
1048
|
-
)
|
|
1049
|
-
self.deprecation_warnings["Truncation-not-explicitly-activated"] = True
|
|
1050
|
-
truncation = "longest_first"
|
|
1051
|
-
|
|
1052
|
-
# Get padding strategy
|
|
1053
|
-
if padding is not False:
|
|
1054
|
-
if padding is True:
|
|
1055
|
-
if verbose:
|
|
1056
|
-
if max_length is not None and (
|
|
1057
|
-
truncation is None or truncation is False or truncation == "do_not_truncate"
|
|
1058
|
-
):
|
|
1059
|
-
warnings.warn(
|
|
1060
|
-
"`max_length` is ignored when `padding`=`True` and there is no truncation strategy. "
|
|
1061
|
-
"To pad to max length, use `padding='max_length'`."
|
|
1062
|
-
)
|
|
1063
|
-
padding_strategy = PaddingStrategy.LONGEST # Default to pad to the longest sequence in the batch
|
|
1064
|
-
elif not isinstance(padding, PaddingStrategy):
|
|
1065
|
-
padding_strategy = PaddingStrategy(padding)
|
|
1066
|
-
elif isinstance(padding, PaddingStrategy):
|
|
1067
|
-
padding_strategy = padding
|
|
1068
|
-
else:
|
|
1069
|
-
padding_strategy = PaddingStrategy.DO_NOT_PAD
|
|
1070
|
-
|
|
1071
|
-
# Get truncation strategy
|
|
1072
|
-
if truncation is not False and truncation is not None:
|
|
1073
|
-
if truncation is True:
|
|
1074
|
-
truncation_strategy = (
|
|
1075
|
-
TruncationStrategy.LONGEST_FIRST
|
|
1076
|
-
) # Default to truncate the longest sequences in pairs of inputs
|
|
1077
|
-
elif not isinstance(truncation, TruncationStrategy):
|
|
1078
|
-
truncation_strategy = TruncationStrategy(truncation)
|
|
1079
|
-
elif isinstance(truncation, TruncationStrategy):
|
|
1080
|
-
truncation_strategy = truncation
|
|
1081
|
-
if truncation in [TruncationStrategy.ONLY_FIRST, TruncationStrategy.ONLY_SECOND]:
|
|
1082
|
-
raise ValueError(
|
|
1083
|
-
"Truncation strategy `only_first` and `only_second` are not supported by `MistralCommonBackend`."
|
|
1084
|
-
)
|
|
1085
|
-
else:
|
|
1086
|
-
truncation_strategy = TruncationStrategy.DO_NOT_TRUNCATE
|
|
1087
|
-
|
|
1088
|
-
# Set max length if needed
|
|
1089
|
-
if max_length is None:
|
|
1090
|
-
if padding_strategy == PaddingStrategy.MAX_LENGTH:
|
|
1091
|
-
if self.model_max_length > LARGE_INTEGER:
|
|
1092
|
-
if verbose:
|
|
1093
|
-
if not self.deprecation_warnings.get("Asking-to-pad-to-max_length", False):
|
|
1094
|
-
logger.warning(
|
|
1095
|
-
"Asking to pad to max_length but no maximum length is provided and the model has no"
|
|
1096
|
-
" predefined maximum length. Default to no padding."
|
|
1097
|
-
)
|
|
1098
|
-
self.deprecation_warnings["Asking-to-pad-to-max_length"] = True
|
|
1099
|
-
padding_strategy = PaddingStrategy.DO_NOT_PAD
|
|
1100
|
-
else:
|
|
1101
|
-
max_length = self.model_max_length
|
|
1102
|
-
|
|
1103
|
-
if truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE:
|
|
1104
|
-
if self.model_max_length > LARGE_INTEGER:
|
|
1105
|
-
if verbose:
|
|
1106
|
-
if not self.deprecation_warnings.get("Asking-to-truncate-to-max_length", False):
|
|
1107
|
-
logger.warning(
|
|
1108
|
-
"Asking to truncate to max_length but no maximum length is provided and the model has"
|
|
1109
|
-
" no predefined maximum length. Default to no truncation."
|
|
1110
|
-
)
|
|
1111
|
-
self.deprecation_warnings["Asking-to-truncate-to-max_length"] = True
|
|
1112
|
-
truncation_strategy = TruncationStrategy.DO_NOT_TRUNCATE
|
|
1113
|
-
else:
|
|
1114
|
-
max_length = self.model_max_length
|
|
1115
|
-
|
|
1116
|
-
# Test if we have a padding token
|
|
1117
|
-
if padding_strategy != PaddingStrategy.DO_NOT_PAD and (self.pad_token_id is None or self.pad_token_id < 0):
|
|
1118
|
-
raise ValueError(
|
|
1119
|
-
"Asking to pad but the tokenizer does not have a padding token. "
|
|
1120
|
-
"Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` "
|
|
1121
|
-
"or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`."
|
|
1122
|
-
)
|
|
1123
|
-
|
|
1124
|
-
# Check that we will truncate to a multiple of pad_to_multiple_of if both are provided
|
|
1125
|
-
if (
|
|
1126
|
-
truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE
|
|
1127
|
-
and padding_strategy != PaddingStrategy.DO_NOT_PAD
|
|
1128
|
-
and pad_to_multiple_of is not None
|
|
1129
|
-
and max_length is not None
|
|
1130
|
-
and (max_length % pad_to_multiple_of != 0)
|
|
1131
|
-
):
|
|
1132
|
-
raise ValueError(
|
|
1133
|
-
"Truncation and padding are both activated but "
|
|
1134
|
-
f"truncation length ({max_length}) is not a multiple of pad_to_multiple_of ({pad_to_multiple_of})."
|
|
1135
|
-
)
|
|
1136
|
-
|
|
1137
|
-
return padding_strategy, truncation_strategy, max_length, kwargs
|
|
1138
|
-
|
|
1139
|
-
def _pad(
|
|
1140
|
-
self,
|
|
1141
|
-
encoded_inputs: dict[str, EncodedInput] | BatchEncoding,
|
|
1142
|
-
max_length: int | None = None,
|
|
1143
|
-
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
|
1144
|
-
pad_to_multiple_of: int | None = None,
|
|
1145
|
-
padding_side: str | None = None,
|
|
1146
|
-
return_attention_mask: bool | None = None,
|
|
1147
|
-
) -> dict:
|
|
1148
|
-
"""
|
|
1149
|
-
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
|
1150
|
-
|
|
1151
|
-
Args:
|
|
1152
|
-
encoded_inputs:
|
|
1153
|
-
Dictionary of tokenized inputs (`list[int]`) or batch of tokenized inputs (`list[list[int]]`).
|
|
1154
|
-
max_length: maximum length of the returned list and optionally padding length (see below).
|
|
1155
|
-
Will truncate by taking into account the special tokens.
|
|
1156
|
-
padding_strategy: PaddingStrategy to use for padding.
|
|
1157
|
-
|
|
1158
|
-
- PaddingStrategy.LONGEST Pad to the longest sequence in the batch
|
|
1159
|
-
- PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
|
|
1160
|
-
- PaddingStrategy.DO_NOT_PAD: Do not pad
|
|
1161
|
-
The tokenizer padding sides are defined in `padding_side` argument:
|
|
1162
|
-
|
|
1163
|
-
- 'left': pads on the left of the sequences
|
|
1164
|
-
- 'right': pads on the right of the sequences
|
|
1165
|
-
pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
|
|
1166
|
-
This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
|
|
1167
|
-
`>= 7.5` (Volta).
|
|
1168
|
-
padding_side:
|
|
1169
|
-
The side on which the model should have padding applied. Should be selected between ['right', 'left'].
|
|
1170
|
-
Default value is picked from the class attribute of the same name.
|
|
1171
|
-
return_attention_mask:
|
|
1172
|
-
(optional) Set to False to avoid returning attention mask (default: set to model specifics)
|
|
1173
|
-
"""
|
|
1174
|
-
# Load from model defaults
|
|
1175
|
-
if return_attention_mask is None:
|
|
1176
|
-
return_attention_mask = "attention_mask" in self.model_input_names
|
|
1177
|
-
|
|
1178
|
-
required_input = encoded_inputs[self.model_input_names[0]]
|
|
1179
|
-
|
|
1180
|
-
if padding_strategy == PaddingStrategy.LONGEST:
|
|
1181
|
-
max_length = len(required_input)
|
|
1182
|
-
|
|
1183
|
-
if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
|
|
1184
|
-
max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
|
|
1185
|
-
|
|
1186
|
-
needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length
|
|
1187
|
-
|
|
1188
|
-
# Initialize attention mask if not present.
|
|
1189
|
-
if return_attention_mask and "attention_mask" not in encoded_inputs:
|
|
1190
|
-
encoded_inputs["attention_mask"] = [1] * len(required_input)
|
|
1191
|
-
|
|
1192
|
-
if needs_to_be_padded:
|
|
1193
|
-
difference = max_length - len(required_input)
|
|
1194
|
-
padding_side = padding_side if padding_side is not None else self.padding_side
|
|
1195
|
-
|
|
1196
|
-
if padding_side == "right":
|
|
1197
|
-
if return_attention_mask:
|
|
1198
|
-
encoded_inputs["attention_mask"] = encoded_inputs["attention_mask"] + [0] * difference
|
|
1199
|
-
if "special_tokens_mask" in encoded_inputs:
|
|
1200
|
-
encoded_inputs["special_tokens_mask"] = encoded_inputs["special_tokens_mask"] + [1] * difference
|
|
1201
|
-
encoded_inputs[self.model_input_names[0]] = required_input + [self.pad_token_id] * difference
|
|
1202
|
-
elif padding_side == "left":
|
|
1203
|
-
if return_attention_mask:
|
|
1204
|
-
encoded_inputs["attention_mask"] = [0] * difference + encoded_inputs["attention_mask"]
|
|
1205
|
-
if "special_tokens_mask" in encoded_inputs:
|
|
1206
|
-
encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
|
|
1207
|
-
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
|
|
1208
|
-
else:
|
|
1209
|
-
raise ValueError(f"Invalid padding strategy:{padding_side}")
|
|
1210
|
-
|
|
1211
|
-
return encoded_inputs
|
|
966
|
+
return BatchEncoding(encoded_inputs, tensor_type=return_tensors, prepend_batch_axis=prepend_batch_axis)
|
|
1212
967
|
|
|
1213
|
-
def
|
|
1214
|
-
self,
|
|
1215
|
-
encoded_inputs: BatchEncoding
|
|
1216
|
-
| list[BatchEncoding]
|
|
1217
|
-
| dict[str, EncodedInput]
|
|
1218
|
-
| dict[str, list[EncodedInput]]
|
|
1219
|
-
| list[dict[str, EncodedInput]],
|
|
1220
|
-
padding: bool | str | PaddingStrategy = True,
|
|
1221
|
-
max_length: int | None = None,
|
|
1222
|
-
pad_to_multiple_of: int | None = None,
|
|
1223
|
-
padding_side: str | None = None,
|
|
1224
|
-
return_attention_mask: bool | None = None,
|
|
1225
|
-
return_tensors: str | TensorType | None = None,
|
|
1226
|
-
verbose: bool = True,
|
|
1227
|
-
) -> BatchEncoding:
|
|
1228
|
-
"""
|
|
1229
|
-
Pad a single encoded input or a batch of encoded inputs up to predefined length or to the max sequence length
|
|
1230
|
-
in the batch.
|
|
1231
|
-
|
|
1232
|
-
Padding side (left/right) padding token ids are defined at the tokenizer level (with `self.padding_side`,
|
|
1233
|
-
`self.pad_token_id`).
|
|
1234
|
-
<Tip>
|
|
1235
|
-
|
|
1236
|
-
If the `encoded_inputs` passed are dictionary of numpy arrays, PyTorch tensors, the
|
|
1237
|
-
result will use the same type unless you provide a different tensor type with `return_tensors`. In the case of
|
|
1238
|
-
PyTorch tensors, you will lose the specific device of your tensors however.
|
|
1239
|
-
|
|
1240
|
-
</Tip>
|
|
1241
|
-
|
|
1242
|
-
Args:
|
|
1243
|
-
encoded_inputs ([`BatchEncoding`], list of [`BatchEncoding`], `Dict[str, list[int]]`, `Dict[str, list[list[int]]` or `List[Dict[str, list[int]]]`):
|
|
1244
|
-
Tokenized inputs. Can represent one input ([`BatchEncoding`] or `Dict[str, list[int]]`) or a batch of
|
|
1245
|
-
tokenized inputs (list of [`BatchEncoding`], *Dict[str, list[list[int]]]* or *List[Dict[str,
|
|
1246
|
-
list[int]]]*) so you can use this method during preprocessing as well as in a PyTorch Dataloader
|
|
1247
|
-
collate function.
|
|
1248
|
-
|
|
1249
|
-
Instead of `list[int]` you can have tensors (numpy arrays, PyTorch tensors), see
|
|
1250
|
-
the note above for the return type.
|
|
1251
|
-
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
|
|
1252
|
-
Select a strategy to pad the returned sequences (according to the model's padding side and padding
|
|
1253
|
-
index) among:
|
|
1254
|
-
|
|
1255
|
-
- `True` or `'longest'` (default): Pad to the longest sequence in the batch (or no padding if only a single
|
|
1256
|
-
sequence if provided).
|
|
1257
|
-
- `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
|
|
1258
|
-
acceptable input length for the model if that argument is not provided.
|
|
1259
|
-
- `False` or `'do_not_pad'`: No padding (i.e., can output a batch with sequences of different
|
|
1260
|
-
lengths).
|
|
1261
|
-
max_length (`int`, *optional*):
|
|
1262
|
-
Maximum length of the returned list and optionally padding length (see above).
|
|
1263
|
-
pad_to_multiple_of (`int`, *optional*):
|
|
1264
|
-
If set will pad the sequence to a multiple of the provided value.
|
|
1265
|
-
|
|
1266
|
-
This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability
|
|
1267
|
-
`>= 7.5` (Volta).
|
|
1268
|
-
padding_side (`str`, *optional*):
|
|
1269
|
-
The side on which the model should have padding applied. Should be selected between ['right', 'left'].
|
|
1270
|
-
Default value is picked from the class attribute of the same name.
|
|
1271
|
-
return_attention_mask (`bool`, *optional*):
|
|
1272
|
-
Whether to return the attention mask. If left to the default, will return the attention mask according
|
|
1273
|
-
to the specific tokenizer's default, defined by the `return_outputs` attribute.
|
|
1274
|
-
|
|
1275
|
-
[What are attention masks?](../glossary#attention-mask)
|
|
1276
|
-
return_tensors (`str` or [`~utils.TensorType`], *optional*):
|
|
1277
|
-
If set, will return tensors instead of list of python integers. Acceptable values are:
|
|
1278
|
-
|
|
1279
|
-
- `'pt'`: Return PyTorch `torch.Tensor` objects.
|
|
1280
|
-
- `'np'`: Return Numpy `np.ndarray` objects.
|
|
1281
|
-
verbose (`bool`, *optional*, defaults to `True`):
|
|
1282
|
-
Whether or not to print more information and warnings.
|
|
1283
|
-
"""
|
|
1284
|
-
# If we have a list of dicts, let's convert it in a dict of lists
|
|
1285
|
-
# We do this to allow using this method as a collate_fn function in PyTorch Dataloader
|
|
1286
|
-
if isinstance(encoded_inputs, (list, tuple)) and isinstance(encoded_inputs[0], Mapping):
|
|
1287
|
-
# Call .keys() explicitly for compatibility with TensorDict and other Mapping subclasses
|
|
1288
|
-
encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0].keys()}
|
|
1289
|
-
|
|
1290
|
-
# The model's main input name, usually `input_ids`, has been passed for padding
|
|
1291
|
-
if self.model_input_names[0] not in encoded_inputs:
|
|
1292
|
-
raise ValueError(
|
|
1293
|
-
"You should supply an encoding or a list of encodings to this method "
|
|
1294
|
-
f"that includes {self.model_input_names[0]}, but you provided {list(encoded_inputs.keys())}"
|
|
1295
|
-
)
|
|
1296
|
-
|
|
1297
|
-
required_input = encoded_inputs[self.model_input_names[0]]
|
|
1298
|
-
|
|
1299
|
-
if required_input is None or (isinstance(required_input, Sized) and len(required_input) == 0):
|
|
1300
|
-
if return_attention_mask:
|
|
1301
|
-
encoded_inputs["attention_mask"] = []
|
|
1302
|
-
return encoded_inputs
|
|
1303
|
-
|
|
1304
|
-
# If we have PyTorch/NumPy tensors/arrays as inputs, we cast them as python objects
|
|
1305
|
-
# and rebuild them afterwards if no return_tensors is specified
|
|
1306
|
-
# Note that we lose the specific device the tensor may be on for PyTorch
|
|
1307
|
-
|
|
1308
|
-
first_element = required_input[0]
|
|
1309
|
-
if isinstance(first_element, (list, tuple)):
|
|
1310
|
-
# first_element might be an empty list/tuple in some edge cases so we grab the first non empty element.
|
|
1311
|
-
for item in required_input:
|
|
1312
|
-
if len(item) != 0:
|
|
1313
|
-
first_element = item[0]
|
|
1314
|
-
break
|
|
1315
|
-
# At this state, if `first_element` is still a list/tuple, it's an empty one so there is nothing to do.
|
|
1316
|
-
if not isinstance(first_element, (int, list, tuple)):
|
|
1317
|
-
if is_torch_tensor(first_element):
|
|
1318
|
-
return_tensors = "pt" if return_tensors is None else return_tensors
|
|
1319
|
-
elif isinstance(first_element, np.ndarray):
|
|
1320
|
-
return_tensors = "np" if return_tensors is None else return_tensors
|
|
1321
|
-
else:
|
|
1322
|
-
raise ValueError(
|
|
1323
|
-
f"type of {first_element} unknown: {type(first_element)}. "
|
|
1324
|
-
"Should be one of a python, numpy, or pytorch object."
|
|
1325
|
-
)
|
|
1326
|
-
|
|
1327
|
-
for key, value in encoded_inputs.items():
|
|
1328
|
-
encoded_inputs[key] = to_py_obj(value)
|
|
1329
|
-
|
|
1330
|
-
# Convert padding_strategy in PaddingStrategy
|
|
1331
|
-
padding_strategy, _, max_length, _ = self._get_padding_truncation_strategies(
|
|
1332
|
-
padding=padding, max_length=max_length, verbose=verbose
|
|
1333
|
-
)
|
|
1334
|
-
|
|
1335
|
-
required_input = encoded_inputs[self.model_input_names[0]]
|
|
1336
|
-
if required_input and not isinstance(required_input[0], (list, tuple)):
|
|
1337
|
-
encoded_inputs = self._pad(
|
|
1338
|
-
encoded_inputs,
|
|
1339
|
-
max_length=max_length,
|
|
1340
|
-
padding_strategy=padding_strategy,
|
|
1341
|
-
pad_to_multiple_of=pad_to_multiple_of,
|
|
1342
|
-
padding_side=padding_side,
|
|
1343
|
-
return_attention_mask=return_attention_mask,
|
|
1344
|
-
)
|
|
1345
|
-
return BatchEncoding(encoded_inputs, tensor_type=return_tensors)
|
|
1346
|
-
|
|
1347
|
-
batch_size = len(required_input)
|
|
1348
|
-
assert all(len(v) == batch_size for v in encoded_inputs.values()), (
|
|
1349
|
-
"Some items in the output dictionary have a different batch size than others."
|
|
1350
|
-
)
|
|
1351
|
-
|
|
1352
|
-
if padding_strategy == PaddingStrategy.LONGEST:
|
|
1353
|
-
max_length = max(len(inputs) for inputs in required_input)
|
|
1354
|
-
padding_strategy = PaddingStrategy.MAX_LENGTH
|
|
1355
|
-
|
|
1356
|
-
batch_outputs = {}
|
|
1357
|
-
for i in range(batch_size):
|
|
1358
|
-
inputs = {k: v[i] for k, v in encoded_inputs.items()}
|
|
1359
|
-
outputs = self._pad(
|
|
1360
|
-
inputs,
|
|
1361
|
-
max_length=max_length,
|
|
1362
|
-
padding_strategy=padding_strategy,
|
|
1363
|
-
pad_to_multiple_of=pad_to_multiple_of,
|
|
1364
|
-
padding_side=padding_side,
|
|
1365
|
-
return_attention_mask=return_attention_mask,
|
|
1366
|
-
)
|
|
1367
|
-
|
|
1368
|
-
for key, value in outputs.items():
|
|
1369
|
-
if key not in batch_outputs:
|
|
1370
|
-
batch_outputs[key] = []
|
|
1371
|
-
batch_outputs[key].append(value)
|
|
1372
|
-
|
|
1373
|
-
return BatchEncoding(batch_outputs, tensor_type=return_tensors)
|
|
1374
|
-
|
|
1375
|
-
def truncate_sequences(
|
|
968
|
+
def truncate_sequences( # type: ignore[override]
|
|
1376
969
|
self,
|
|
1377
970
|
ids: list[int],
|
|
1378
971
|
pair_ids: None = None,
|
|
@@ -1407,47 +1000,36 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1407
1000
|
`Tuple[list[int], None, list[int]]`: The truncated `ids` and the list of
|
|
1408
1001
|
overflowing tokens. `None` is returned to match Transformers signature.
|
|
1409
1002
|
"""
|
|
1410
|
-
|
|
1411
|
-
raise ValueError(
|
|
1412
|
-
f"Kwargs {list(kwargs.keys())} are not supported by `MistralCommonBackend.truncate_sequences`."
|
|
1413
|
-
)
|
|
1003
|
+
|
|
1414
1004
|
if pair_ids:
|
|
1415
1005
|
raise ValueError("`pair_ids` is not supported by `MistralCommonBackend.truncate_sequences`.")
|
|
1416
1006
|
|
|
1417
|
-
if num_tokens_to_remove <= 0:
|
|
1418
|
-
return (ids, None, [])
|
|
1419
|
-
|
|
1420
1007
|
if not isinstance(truncation_strategy, TruncationStrategy):
|
|
1421
1008
|
truncation_strategy = TruncationStrategy(truncation_strategy)
|
|
1422
1009
|
|
|
1423
|
-
if truncation_strategy in [
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1010
|
+
if truncation_strategy in [
|
|
1011
|
+
TruncationStrategy.ONLY_FIRST,
|
|
1012
|
+
TruncationStrategy.ONLY_SECOND,
|
|
1013
|
+
]:
|
|
1014
|
+
raise ValueError(f"{truncation_strategy=} is not supported by `MistralCommonBackend`.")
|
|
1015
|
+
|
|
1016
|
+
if num_tokens_to_remove <= 0:
|
|
1017
|
+
return ids, None, []
|
|
1427
1018
|
|
|
1428
1019
|
overflowing_tokens = []
|
|
1429
|
-
if truncation_strategy == TruncationStrategy.LONGEST_FIRST:
|
|
1430
|
-
if len(ids) > num_tokens_to_remove:
|
|
1431
|
-
window_len = min(len(ids), stride + num_tokens_to_remove)
|
|
1432
|
-
if self.truncation_side == "left":
|
|
1433
|
-
overflowing_tokens = ids[:window_len]
|
|
1434
|
-
ids = ids[num_tokens_to_remove:]
|
|
1435
|
-
elif self.truncation_side == "right":
|
|
1436
|
-
overflowing_tokens = ids[-window_len:]
|
|
1437
|
-
ids = ids[:-num_tokens_to_remove]
|
|
1438
|
-
else:
|
|
1439
|
-
raise ValueError(f"invalid truncation strategy: {self.truncation_side}, use 'left' or 'right'.")
|
|
1440
1020
|
|
|
1021
|
+
if truncation_strategy == TruncationStrategy.LONGEST_FIRST:
|
|
1022
|
+
window_len = min(len(ids), stride + num_tokens_to_remove)
|
|
1023
|
+
if self.truncation_side == "left":
|
|
1024
|
+
overflowing_tokens = ids[:window_len]
|
|
1025
|
+
ids = ids[num_tokens_to_remove:]
|
|
1441
1026
|
else:
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
f"but the first sequence has a length {len(ids)}. "
|
|
1445
|
-
)
|
|
1446
|
-
logger.error(error_msg)
|
|
1027
|
+
overflowing_tokens = ids[-window_len:]
|
|
1028
|
+
ids = ids[:-num_tokens_to_remove]
|
|
1447
1029
|
|
|
1448
|
-
return
|
|
1030
|
+
return ids, None, overflowing_tokens
|
|
1449
1031
|
|
|
1450
|
-
def apply_chat_template(
|
|
1032
|
+
def apply_chat_template( # type: ignore[override]
|
|
1451
1033
|
self,
|
|
1452
1034
|
conversation: list[dict[str, str]] | list[list[dict[str, str]]],
|
|
1453
1035
|
tools: list[dict | Callable] | None = None,
|
|
@@ -1475,8 +1057,8 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1475
1057
|
[chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
|
|
1476
1058
|
for more information.
|
|
1477
1059
|
add_generation_prompt (`bool`, *optional*):
|
|
1478
|
-
This argument is a no-op for `MistralCommonBackend`. However it cannot be used at the same time as `continue_final_message` to keep the API consistent
|
|
1479
|
-
|
|
1060
|
+
This argument is a no-op for `MistralCommonBackend`. However, it cannot be used at the same time as `continue_final_message` to keep the API consistent.
|
|
1061
|
+
If any conversation ends with an assistant message, it will raise an error. In such cases, use `continue_final_message` instead.
|
|
1480
1062
|
continue_final_message (bool, *optional*):
|
|
1481
1063
|
If this is set, the chat will be formatted so that the final
|
|
1482
1064
|
message in the chat is open-ended, without any EOS tokens. The model will continue this message
|
|
@@ -1511,8 +1093,7 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1511
1093
|
Will raise an error if used.
|
|
1512
1094
|
|
|
1513
1095
|
Returns:
|
|
1514
|
-
`Union[str, list[int], list[str], list[list[int]], BatchEncoding]`:
|
|
1515
|
-
tokens. This output is ready to pass to the model, either directly or via methods like `generate()`.
|
|
1096
|
+
`Union[str, list[int], list[str], list[list[int]], BatchEncoding]`: The tokenized chat so far, including control tokens. This output is ready to pass to the model, either directly or via methods like `generate()`.
|
|
1516
1097
|
"""
|
|
1517
1098
|
if kwargs:
|
|
1518
1099
|
raise ValueError(
|
|
@@ -1659,6 +1240,83 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1659
1240
|
)
|
|
1660
1241
|
return outputs
|
|
1661
1242
|
|
|
1243
|
+
def build_inputs_with_special_tokens(self, token_ids_0: list[int], token_ids_1: None = None) -> list[int]:
|
|
1244
|
+
"""
|
|
1245
|
+
Build model inputs from a sequence by adding special tokens.
|
|
1246
|
+
|
|
1247
|
+
This method dynamically builds inputs based on the tokenizer's `mode`:
|
|
1248
|
+
- `"test"`: seq0 [EOS]
|
|
1249
|
+
- `"finetuning"`: [BOS] seq0
|
|
1250
|
+
|
|
1251
|
+
Args:
|
|
1252
|
+
token_ids_0 (`list[int]`):
|
|
1253
|
+
List of IDs to which the special tokens will be added.
|
|
1254
|
+
token_ids_1 (`None`, *optional*): None, kept to match Transformers' signature.
|
|
1255
|
+
|
|
1256
|
+
Returns:
|
|
1257
|
+
`list[int]`: List of input IDs with the appropriate special tokens.
|
|
1258
|
+
"""
|
|
1259
|
+
if token_ids_1 is not None:
|
|
1260
|
+
raise ValueError(
|
|
1261
|
+
"`MistralCommonBackend` does not implement `token_ids_1 != None` for `build_inputs_with_special_tokens`."
|
|
1262
|
+
)
|
|
1263
|
+
|
|
1264
|
+
if self.mode == ValidationMode.test:
|
|
1265
|
+
# [BOS] seq0
|
|
1266
|
+
return [self.bos_token_id] + token_ids_0
|
|
1267
|
+
|
|
1268
|
+
else:
|
|
1269
|
+
# [BOS] seq0 [EOS]
|
|
1270
|
+
return [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
|
1271
|
+
|
|
1272
|
+
def create_token_type_ids_from_sequences(self, token_ids_0: list[int], token_ids_1: None = None) -> list[int]:
|
|
1273
|
+
"""
|
|
1274
|
+
Create a mask of zeroes from the token ids with special tokens added.
|
|
1275
|
+
|
|
1276
|
+
Kept to match Transformers' implementation.
|
|
1277
|
+
|
|
1278
|
+
Args:
|
|
1279
|
+
token_ids_0 (`list[int]`):
|
|
1280
|
+
List of IDs.
|
|
1281
|
+
token_ids_1 (`None`, *optional*): None, kept to match Transformers' signature.
|
|
1282
|
+
|
|
1283
|
+
|
|
1284
|
+
Returns:
|
|
1285
|
+
`list[int]`: Token type IDs according to the configured pattern.
|
|
1286
|
+
"""
|
|
1287
|
+
if token_ids_1 is not None:
|
|
1288
|
+
raise ValueError(
|
|
1289
|
+
"`MistralCommonBackend` does not implement `token_ids_1 != None` for `create_token_type_ids_from_sequences`."
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
sequence = self.build_inputs_with_special_tokens(token_ids_0)
|
|
1293
|
+
|
|
1294
|
+
return [0] * len(sequence)
|
|
1295
|
+
|
|
1296
|
+
def num_special_tokens_to_add(self, pair: Literal[False] = False) -> int:
|
|
1297
|
+
"""
|
|
1298
|
+
Returns the number of added tokens when encoding a sequence with special tokens.
|
|
1299
|
+
|
|
1300
|
+
<Tip>
|
|
1301
|
+
|
|
1302
|
+
This encodes a dummy input and checks the number of added tokens, and is therefore not efficient. Do not put
|
|
1303
|
+
this inside your training loop.
|
|
1304
|
+
|
|
1305
|
+
</Tip>
|
|
1306
|
+
|
|
1307
|
+
Args:
|
|
1308
|
+
pair (`Literal[False]`, *optional*): False, kept to match Transformer's signature.
|
|
1309
|
+
|
|
1310
|
+
Returns:
|
|
1311
|
+
`int`: Number of special tokens added to sequences.
|
|
1312
|
+
"""
|
|
1313
|
+
if pair:
|
|
1314
|
+
raise ValueError(
|
|
1315
|
+
"`MistralCommonBackend` does not implement `pair = True` for `num_special_tokens_to_add`."
|
|
1316
|
+
)
|
|
1317
|
+
|
|
1318
|
+
return len(self.build_inputs_with_special_tokens([], None))
|
|
1319
|
+
|
|
1662
1320
|
@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
|
|
1663
1321
|
def __call__(
|
|
1664
1322
|
self,
|
|
@@ -1679,6 +1337,8 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1679
1337
|
return_special_tokens_mask: bool = False,
|
|
1680
1338
|
return_length: bool = False,
|
|
1681
1339
|
verbose: bool = True,
|
|
1340
|
+
return_offsets_mapping: Literal[False] = False,
|
|
1341
|
+
split_special_tokens: Literal[False] = False,
|
|
1682
1342
|
**kwargs,
|
|
1683
1343
|
) -> BatchEncoding:
|
|
1684
1344
|
"""
|
|
@@ -1696,92 +1356,49 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1696
1356
|
text_pair_target (`None`, *optional*):
|
|
1697
1357
|
Not supported by `MistralCommonBackend`. Kept to match the signature of `PreTrainedTokenizerBase.__call__`.
|
|
1698
1358
|
"""
|
|
1699
|
-
if
|
|
1700
|
-
raise ValueError(
|
|
1359
|
+
if return_offsets_mapping or split_special_tokens:
|
|
1360
|
+
raise ValueError(
|
|
1361
|
+
"`MistralCommonBackend` does not support `return_offsets_mapping` and `split_special_tokens`."
|
|
1362
|
+
)
|
|
1701
1363
|
|
|
1702
|
-
if
|
|
1364
|
+
if truncation in [TruncationStrategy.ONLY_FIRST, TruncationStrategy.ONLY_SECOND, "only_first", "only_second"]:
|
|
1703
1365
|
raise ValueError(
|
|
1704
|
-
"
|
|
1366
|
+
"Truncation strategy `only_first` and `only_second` are not supported by `MistralCommonBackend`."
|
|
1705
1367
|
)
|
|
1706
1368
|
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
# Strings are fine
|
|
1710
|
-
return True
|
|
1711
|
-
elif isinstance(t, (list, tuple)):
|
|
1712
|
-
# List are fine as long as they are...
|
|
1713
|
-
if len(t) == 0:
|
|
1714
|
-
# ... empty
|
|
1715
|
-
return True
|
|
1716
|
-
elif isinstance(t[0], (str, int)):
|
|
1717
|
-
# ... list of strings or int
|
|
1718
|
-
return True
|
|
1719
|
-
elif isinstance(t[0], (list, tuple)):
|
|
1720
|
-
# ... list with an empty list or with a list of strings or with a list of ints
|
|
1721
|
-
return len(t[0]) == 0 or isinstance(t[0][0], (str, int))
|
|
1722
|
-
else:
|
|
1723
|
-
return False
|
|
1724
|
-
else:
|
|
1725
|
-
return False
|
|
1369
|
+
if kwargs:
|
|
1370
|
+
raise ValueError(f"Kwargs {list(kwargs.keys())} are not supported by `MistralCommonBackend.__call__`.")
|
|
1726
1371
|
|
|
1727
|
-
if
|
|
1372
|
+
if text_pair or text_target or text_pair_target:
|
|
1728
1373
|
raise ValueError(
|
|
1729
|
-
"
|
|
1730
|
-
"or `list[list[int]]` (batch of encoded examples)."
|
|
1374
|
+
"`text_pair`, `text_target` and `text_pair_target` are not supported by `MistralCommonBackend`."
|
|
1731
1375
|
)
|
|
1732
1376
|
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1377
|
+
return super().__call__(
|
|
1378
|
+
text=text,
|
|
1379
|
+
text_pair=text_pair,
|
|
1380
|
+
text_target=text_target,
|
|
1381
|
+
add_special_tokens=add_special_tokens,
|
|
1736
1382
|
padding=padding,
|
|
1737
1383
|
truncation=truncation,
|
|
1738
1384
|
max_length=max_length,
|
|
1385
|
+
stride=stride,
|
|
1739
1386
|
pad_to_multiple_of=pad_to_multiple_of,
|
|
1387
|
+
padding_side=padding_side,
|
|
1388
|
+
return_tensors=return_tensors,
|
|
1389
|
+
return_attention_mask=return_attention_mask,
|
|
1390
|
+
return_overflowing_tokens=return_overflowing_tokens,
|
|
1391
|
+
return_special_tokens_mask=return_special_tokens_mask,
|
|
1392
|
+
return_length=return_length,
|
|
1740
1393
|
verbose=verbose,
|
|
1741
|
-
**kwargs,
|
|
1742
1394
|
)
|
|
1743
1395
|
|
|
1744
|
-
if is_batched:
|
|
1745
|
-
return self._batch_encode_plus(
|
|
1746
|
-
batch_text=text,
|
|
1747
|
-
add_special_tokens=add_special_tokens,
|
|
1748
|
-
padding_strategy=padding_strategy,
|
|
1749
|
-
truncation_strategy=truncation_strategy,
|
|
1750
|
-
max_length=max_length,
|
|
1751
|
-
stride=stride,
|
|
1752
|
-
pad_to_multiple_of=pad_to_multiple_of,
|
|
1753
|
-
padding_side=padding_side,
|
|
1754
|
-
return_tensors=return_tensors,
|
|
1755
|
-
return_attention_mask=return_attention_mask,
|
|
1756
|
-
return_overflowing_tokens=return_overflowing_tokens,
|
|
1757
|
-
return_special_tokens_mask=return_special_tokens_mask,
|
|
1758
|
-
return_length=return_length,
|
|
1759
|
-
verbose=verbose,
|
|
1760
|
-
)
|
|
1761
|
-
else:
|
|
1762
|
-
return self._encode_plus(
|
|
1763
|
-
text=text,
|
|
1764
|
-
add_special_tokens=add_special_tokens,
|
|
1765
|
-
padding_strategy=padding_strategy,
|
|
1766
|
-
truncation_strategy=truncation_strategy,
|
|
1767
|
-
max_length=max_length,
|
|
1768
|
-
stride=stride,
|
|
1769
|
-
pad_to_multiple_of=pad_to_multiple_of,
|
|
1770
|
-
padding_side=padding_side,
|
|
1771
|
-
return_tensors=return_tensors,
|
|
1772
|
-
return_attention_mask=return_attention_mask,
|
|
1773
|
-
return_overflowing_tokens=return_overflowing_tokens,
|
|
1774
|
-
return_special_tokens_mask=return_special_tokens_mask,
|
|
1775
|
-
return_length=return_length,
|
|
1776
|
-
verbose=verbose,
|
|
1777
|
-
)
|
|
1778
|
-
|
|
1779
1396
|
@classmethod
|
|
1780
1397
|
def from_pretrained(
|
|
1781
1398
|
cls,
|
|
1782
1399
|
pretrained_model_name_or_path: str | os.PathLike,
|
|
1783
1400
|
*init_inputs,
|
|
1784
|
-
mode:
|
|
1401
|
+
mode: str | ValidationMode = ValidationMode.test,
|
|
1785
1402
|
cache_dir: str | os.PathLike | None = None,
|
|
1786
1403
|
force_download: bool = False,
|
|
1787
1404
|
local_files_only: bool = False,
|
|
@@ -1808,9 +1425,9 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1808
1425
|
`./my_model_directory/`.
|
|
1809
1426
|
mode (`Union[str, ValidationMode]`, *optional*, defaults to `ValidationMode.test`):
|
|
1810
1427
|
Validation mode for the `MistralTokenizer` tokenizer. Possible values are:
|
|
1811
|
-
- `"finetuning"` or `ValidationMode.finetuning`: The
|
|
1428
|
+
- `"finetuning"` or `ValidationMode.finetuning`: The fine-tuning mode.
|
|
1812
1429
|
- `"test"` or `ValidationMode.test`: The test mode.
|
|
1813
|
-
It changes how the tokenizer validates the input and
|
|
1430
|
+
It changes how the tokenizer validates the input and prepares the request to the model.
|
|
1814
1431
|
cache_dir (`str` or `os.PathLike`, *optional*):
|
|
1815
1432
|
Path to a directory in which a downloaded predefined tokenizer vocabulary files should be cached if the
|
|
1816
1433
|
standard cache should not be used.
|
|
@@ -1837,11 +1454,11 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1837
1454
|
Default value is picked from the class attribute of the same name.
|
|
1838
1455
|
truncation_side (`str`, *optional*, defaults to `"right"`):
|
|
1839
1456
|
The side on which the model should have truncation applied. Should be selected between ['right', 'left'].
|
|
1840
|
-
model_input_names (`List[
|
|
1457
|
+
model_input_names (`List[str]`, *optional*):
|
|
1841
1458
|
The list of inputs accepted by the forward pass of the model (like `"token_type_ids"` or
|
|
1842
1459
|
`"attention_mask"`). Default value is picked from the class attribute of the same name.
|
|
1843
1460
|
clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
|
|
1844
|
-
Whether or not the model should
|
|
1461
|
+
Whether or not the model should clean up the spaces that were added when splitting the input text during the
|
|
1845
1462
|
tokenization process.
|
|
1846
1463
|
kwargs (additional keyword arguments, *optional*):
|
|
1847
1464
|
Not supported by `MistralCommonBackend.from_pretrained`.
|
|
@@ -1851,11 +1468,13 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1851
1468
|
raise ValueError("`init_inputs` are not supported by `MistralCommonBackend.from_pretrained`.")
|
|
1852
1469
|
|
|
1853
1470
|
# Handle kwargs and AutoTokenizer/AutoProcessor case
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
):
|
|
1858
|
-
raise ValueError(
|
|
1471
|
+
valid_kwargs = _VALID_INIT_KWARGS.union(
|
|
1472
|
+
{"trust_remote_code", "_from_pipeline", "_commit_hash", "dtype", "subfolder"}
|
|
1473
|
+
)
|
|
1474
|
+
if kwargs and not set(kwargs.keys()).issubset(valid_kwargs):
|
|
1475
|
+
raise ValueError(
|
|
1476
|
+
f"Some kwargs in {list(kwargs.keys())} are not supported by `MistralCommonBackend.from_pretrained`."
|
|
1477
|
+
)
|
|
1859
1478
|
|
|
1860
1479
|
mode = cls._get_validation_mode(mode)
|
|
1861
1480
|
|
|
@@ -1869,35 +1488,8 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1869
1488
|
local_files_only=local_files_only,
|
|
1870
1489
|
)
|
|
1871
1490
|
else:
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
instruct_versions = list(TokenizerVersion.__members__)
|
|
1876
|
-
mm_versions = list(MultiModalVersion.__members__) + [""] # allow no mm version
|
|
1877
|
-
sentencepiece_suffixes = [f".model.{v}{m}" for v in instruct_versions for m in mm_versions] + [".model"]
|
|
1878
|
-
|
|
1879
|
-
for path in os.listdir(pretrained_model_name_or_path):
|
|
1880
|
-
pathlib_repo_file = Path(path)
|
|
1881
|
-
file_name = pathlib_repo_file.name
|
|
1882
|
-
suffix = "".join(pathlib_repo_file.suffixes)
|
|
1883
|
-
if file_name == "tekken.json" or suffix in sentencepiece_suffixes:
|
|
1884
|
-
valid_tokenizer_files.append(file_name)
|
|
1885
|
-
|
|
1886
|
-
if len(valid_tokenizer_files) == 0:
|
|
1887
|
-
raise ValueError(f"No tokenizer file found in directory: {pretrained_model_name_or_path}")
|
|
1888
|
-
# If there are multiple tokenizer files, we use tekken.json if it exists, otherwise the versioned one.
|
|
1889
|
-
if len(valid_tokenizer_files) > 1:
|
|
1890
|
-
if "tekken.json" in valid_tokenizer_files:
|
|
1891
|
-
tokenizer_file = "tekken.json"
|
|
1892
|
-
else:
|
|
1893
|
-
tokenizer_file = max(valid_tokenizer_files)
|
|
1894
|
-
logger.warning(
|
|
1895
|
-
f"Multiple tokenizer files found in directory: {pretrained_model_name_or_path}. Using {tokenizer_file}."
|
|
1896
|
-
)
|
|
1897
|
-
else:
|
|
1898
|
-
tokenizer_file = valid_tokenizer_files[0]
|
|
1899
|
-
|
|
1900
|
-
tokenizer_path = os.path.join(pretrained_model_name_or_path, tokenizer_file)
|
|
1491
|
+
candidate_files = os.listdir(pretrained_model_name_or_path)
|
|
1492
|
+
tokenizer_path = os.path.join(pretrained_model_name_or_path, get_one_valid_tokenizer_file(candidate_files))
|
|
1901
1493
|
|
|
1902
1494
|
return cls(
|
|
1903
1495
|
tokenizer_path=tokenizer_path,
|
|
@@ -1909,7 +1501,7 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1909
1501
|
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
|
1910
1502
|
)
|
|
1911
1503
|
|
|
1912
|
-
def save_pretrained(
|
|
1504
|
+
def save_pretrained( # type: ignore[override]
|
|
1913
1505
|
self,
|
|
1914
1506
|
save_directory: str | os.PathLike | Path,
|
|
1915
1507
|
push_to_hub: bool = False,
|
|
@@ -1971,7 +1563,7 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1971
1563
|
return (str(save_directory / self._tokenizer_path.name),)
|
|
1972
1564
|
|
|
1973
1565
|
@staticmethod
|
|
1974
|
-
def _get_validation_mode(mode:
|
|
1566
|
+
def _get_validation_mode(mode: str | ValidationMode) -> ValidationMode:
|
|
1975
1567
|
"""Get the validation mode from a string or a ValidationMode."""
|
|
1976
1568
|
_invalid_mode_msg = (
|
|
1977
1569
|
f"Invalid `mistral-common` tokenizer mode: {mode}. Possible values are 'finetuning' or 'test'."
|
|
@@ -1988,6 +1580,65 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1988
1580
|
raise ValueError(_invalid_mode_msg)
|
|
1989
1581
|
return mode
|
|
1990
1582
|
|
|
1583
|
+
def add_special_tokens(
|
|
1584
|
+
self,
|
|
1585
|
+
special_tokens_dict: dict[str, str | AddedToken | Sequence[str | AddedToken]],
|
|
1586
|
+
replace_extra_special_tokens: bool = True,
|
|
1587
|
+
):
|
|
1588
|
+
r"""`MistralCommonBackend` does not implement `add_special_tokens` by design.
|
|
1589
|
+
|
|
1590
|
+
If you would like this behaviour to be implemented, please open an issue in the `Transformers` or `mistral-common` repositories to request it.
|
|
1591
|
+
"""
|
|
1592
|
+
|
|
1593
|
+
raise NotImplementedError("`MistralCommonBackend` does not implement `add_special_tokens`.")
|
|
1594
|
+
|
|
1595
|
+
def add_tokens( # type: ignore[override]
|
|
1596
|
+
self,
|
|
1597
|
+
special_tokens_dict: dict[str, str | AddedToken | Sequence[str | AddedToken]],
|
|
1598
|
+
replace_extra_special_tokens: bool = True,
|
|
1599
|
+
):
|
|
1600
|
+
"""
|
|
1601
|
+
`MistralCommonBackend` does not implement `add_special_tokens` by design.
|
|
1602
|
+
|
|
1603
|
+
If you would like this behaviour to be implemented, please open an issue in the `Transformers` or `mistral-common` repositories to request it.
|
|
1604
|
+
"""
|
|
1605
|
+
|
|
1606
|
+
raise NotImplementedError("`MistralCommonBackend` does not implement `add_tokens`.")
|
|
1607
|
+
|
|
1608
|
+
def convert_added_tokens(cls, obj: AddedToken | Any, save: bool = False, add_type_field: bool = True): # type: ignore[override]
|
|
1609
|
+
"""
|
|
1610
|
+
`MistralCommonBackend` does not implement `convert_added_tokens` by design.
|
|
1611
|
+
|
|
1612
|
+
If you would like this behaviour to be implemented, please open an issue in the `Transformers` or `mistral-common` repositories to request it.
|
|
1613
|
+
"""
|
|
1614
|
+
|
|
1615
|
+
raise NotImplementedError("`MistralCommonBackend` does not implement `convert_added_tokens`.")
|
|
1616
|
+
|
|
1617
|
+
def get_chat_template(self, chat_template: str | None = None, tools: list[dict] | None = None) -> str:
|
|
1618
|
+
"""`MistralCommonBackend` does not implement `get_chat_template` by design as `mistral-common` does not use chat templates."""
|
|
1619
|
+
|
|
1620
|
+
raise NotImplementedError("`MistralCommonBackend` does not implement `get_chat_template`.")
|
|
1621
|
+
|
|
1622
|
+
def save_chat_templates(
|
|
1623
|
+
self,
|
|
1624
|
+
save_directory: str | os.PathLike,
|
|
1625
|
+
tokenizer_config: dict,
|
|
1626
|
+
filename_prefix: str | None,
|
|
1627
|
+
save_jinja_files: bool,
|
|
1628
|
+
):
|
|
1629
|
+
"""`MistralCommonBackend` does not implement `save_chat_templates` by design as `mistral-common` does not use chat templates."""
|
|
1630
|
+
|
|
1631
|
+
raise NotImplementedError("`MistralCommonBackend` does not implement `save_chat_templates`.")
|
|
1632
|
+
|
|
1633
|
+
def save_vocabulary(self, save_directory: str, filename_prefix: str | None = None) -> tuple[str, ...]:
|
|
1634
|
+
"""
|
|
1635
|
+
`MistralCommonBackend` does not implement `save_vocabulary` by design.
|
|
1636
|
+
|
|
1637
|
+
This is because `mistral-common` is configured by one tokenizer file. If you'd like to save the vocabulary, please consider using the `save_pretrained` method instead.
|
|
1638
|
+
"""
|
|
1639
|
+
|
|
1640
|
+
raise NotImplementedError("`MistralCommonBackend` does not implement `save_vocabulary`.")
|
|
1641
|
+
|
|
1991
1642
|
|
|
1992
1643
|
# Backward compatibility alias for codebases still importing the legacy name.
|
|
1993
1644
|
MistralCommonTokenizer = MistralCommonBackend
|