transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
|
2
|
+
# This file was automatically generated from src/transformers/models/lasr/modular_lasr.py.
|
|
3
|
+
# Do NOT edit this file manually as any edits will be overwritten by the generation of
|
|
4
|
+
# the file from the modular. If any change should be done, please apply the change to the
|
|
5
|
+
# modular_lasr.py file directly. One of our CI enforces this.
|
|
6
|
+
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
|
7
|
+
# coding=utf-8
|
|
8
|
+
# Copyright 2025 The HuggingFace Inc. team and Google LLC. All rights reserved.
|
|
9
|
+
#
|
|
10
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
11
|
+
# you may not use this file except in compliance with the License.
|
|
12
|
+
# You may obtain a copy of the License at
|
|
13
|
+
#
|
|
14
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
15
|
+
#
|
|
16
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
17
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
+
# See the License for the specific language governing permissions and
|
|
20
|
+
# limitations under the License.
|
|
21
|
+
|
|
22
|
+
import itertools
|
|
23
|
+
import re
|
|
24
|
+
from typing import Optional, Union
|
|
25
|
+
|
|
26
|
+
from tokenizers import Tokenizer, decoders, pre_tokenizers, processors
|
|
27
|
+
from tokenizers.models import Unigram
|
|
28
|
+
|
|
29
|
+
from ...tokenization_utils_tokenizers import TokenizersBackend
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer.json"}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class LasrTokenizer(TokenizersBackend):
|
|
36
|
+
"""
|
|
37
|
+
Construct a LASR tokenizer (backed by HuggingFace's *tokenizers* library). Based on
|
|
38
|
+
[Unigram](https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=unigram#models).
|
|
39
|
+
|
|
40
|
+
This tokenizer inherits from [`TokenizersBackend`] which contains most of the main methods. Users should
|
|
41
|
+
refer to this superclass for more information regarding those methods.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
vocab_file (`str`, *optional*):
|
|
45
|
+
[SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
|
|
46
|
+
contains the vocabulary necessary to instantiate a tokenizer.
|
|
47
|
+
eos_token (`str`, *optional*, defaults to `"</s>"`):
|
|
48
|
+
The end of sequence token.
|
|
49
|
+
|
|
50
|
+
<Tip>
|
|
51
|
+
|
|
52
|
+
When building a sequence using special tokens, this is not the token that is used for the end of sequence.
|
|
53
|
+
The token used is the `sep_token`.
|
|
54
|
+
|
|
55
|
+
</Tip>
|
|
56
|
+
|
|
57
|
+
unk_token (`str`, *optional*, defaults to `"<unk>"`):
|
|
58
|
+
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
|
|
59
|
+
token instead.
|
|
60
|
+
pad_token (`str`, *optional*, defaults to `"<pad>"`):
|
|
61
|
+
The token used for padding, for example when batching sequences of different lengths.
|
|
62
|
+
extra_ids (`int`, *optional*, defaults to 100):
|
|
63
|
+
Add a number of extra ids added to the vocabulary for use as sentinels. These tokens are accessible as
|
|
64
|
+
"<extra_id_{%d}>" where "{%d}" is a number between 0 and extra_ids-1. These tokens can be retrieved by
|
|
65
|
+
calling get_sentinel_tokens method and token ids can be by calling get_sentinel_token_ids method
|
|
66
|
+
additional_special_tokens (`list[str]`, *optional*):
|
|
67
|
+
Additional special tokens used by the tokenizer.
|
|
68
|
+
vocab (`str`, `dict` or `list`, *optional*):
|
|
69
|
+
Custom vocabulary dict. If not provided, a minimal vocabulary is created using the special tokens.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
vocab_files_names = VOCAB_FILES_NAMES
|
|
73
|
+
model_input_names = ["input_ids", "attention_mask"]
|
|
74
|
+
model = Unigram
|
|
75
|
+
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
eos_token="</s>",
|
|
79
|
+
unk_token="<unk>",
|
|
80
|
+
pad_token="<pad>",
|
|
81
|
+
extra_ids=100,
|
|
82
|
+
additional_special_tokens=None,
|
|
83
|
+
vocab=None,
|
|
84
|
+
vocab_file=None,
|
|
85
|
+
**kwargs,
|
|
86
|
+
):
|
|
87
|
+
self._extra_ids = extra_ids
|
|
88
|
+
|
|
89
|
+
# Handle extra_ids and additional_special_tokens
|
|
90
|
+
if additional_special_tokens is not None:
|
|
91
|
+
extra_tokens = [x for x in additional_special_tokens if "<extra_id_" in str(x)]
|
|
92
|
+
if len(extra_tokens) < 1:
|
|
93
|
+
additional_special_tokens += [f"<extra_id_{i}>" for i in range(extra_ids)]
|
|
94
|
+
elif extra_ids > 0 and extra_ids != len(extra_tokens):
|
|
95
|
+
raise ValueError(
|
|
96
|
+
f"Both extra_ids ({extra_ids}) and additional_special_tokens ({additional_special_tokens}) are"
|
|
97
|
+
" provided to LasrTokenizer. In this case the additional_special_tokens must include the extra_ids"
|
|
98
|
+
" tokens"
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
extra_tokens = [f"<extra_id_{i}>" for i in range(extra_ids)]
|
|
102
|
+
additional_special_tokens = extra_tokens
|
|
103
|
+
|
|
104
|
+
# LASR vocab structure: <pad>=0, </s>=1, <unk>=2, then regular vocab, then extra_ids in reverse
|
|
105
|
+
if vocab is not None:
|
|
106
|
+
self._vocab_scores = vocab
|
|
107
|
+
else:
|
|
108
|
+
self._vocab_scores = [
|
|
109
|
+
(str(pad_token), 0.0),
|
|
110
|
+
(str(eos_token), 0.0),
|
|
111
|
+
(str(unk_token), 0.0),
|
|
112
|
+
("▁", -2.0), # Space token
|
|
113
|
+
]
|
|
114
|
+
for i in range(extra_ids - 1, -1, -1):
|
|
115
|
+
self._vocab_scores.append((f"<extra_id_{i}>", 0.0))
|
|
116
|
+
self._tokenizer = Tokenizer(
|
|
117
|
+
Unigram(
|
|
118
|
+
self._vocab_scores,
|
|
119
|
+
unk_id=3,
|
|
120
|
+
byte_fallback=False,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
self._tokenizer.normalizer = None
|
|
125
|
+
|
|
126
|
+
self._tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
|
|
127
|
+
[
|
|
128
|
+
pre_tokenizers.WhitespaceSplit(),
|
|
129
|
+
pre_tokenizers.Metaspace(replacement="▁", prepend_scheme="always", split=True),
|
|
130
|
+
]
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme="always", split=True)
|
|
134
|
+
|
|
135
|
+
super().__init__(
|
|
136
|
+
eos_token=eos_token,
|
|
137
|
+
unk_token=unk_token,
|
|
138
|
+
pad_token=pad_token,
|
|
139
|
+
extra_ids=extra_ids,
|
|
140
|
+
additional_special_tokens=additional_special_tokens,
|
|
141
|
+
**kwargs,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
self._tokenizer.post_processor = processors.TemplateProcessing(
|
|
145
|
+
single=["$A", "</s>"],
|
|
146
|
+
pair=["$A", "</s>", "$B", "</s>"],
|
|
147
|
+
special_tokens=[
|
|
148
|
+
("</s>", self.eos_token_id),
|
|
149
|
+
],
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def get_sentinel_tokens(self):
|
|
153
|
+
"""Get the list of sentinel tokens (extra_id tokens) from additional_special_tokens."""
|
|
154
|
+
return list(
|
|
155
|
+
set(filter(lambda x: bool(re.search(r"<extra_id_\d+>", x)) is not None, self.additional_special_tokens))
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def get_sentinel_token_ids(self):
|
|
159
|
+
"""Get the token IDs for sentinel tokens."""
|
|
160
|
+
return [self.convert_tokens_to_ids(token) for token in self.get_sentinel_tokens()]
|
|
161
|
+
|
|
162
|
+
def _decode(
|
|
163
|
+
self,
|
|
164
|
+
token_ids: Union[int, list[int]],
|
|
165
|
+
skip_special_tokens: bool = False,
|
|
166
|
+
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
167
|
+
group_tokens: bool = True,
|
|
168
|
+
**kwargs,
|
|
169
|
+
) -> str:
|
|
170
|
+
if isinstance(token_ids, int):
|
|
171
|
+
token_ids = [token_ids]
|
|
172
|
+
if group_tokens:
|
|
173
|
+
token_ids = [token_group[0] for token_group in itertools.groupby(token_ids)]
|
|
174
|
+
|
|
175
|
+
# for CTC we filter out the blank token, which is the pad token
|
|
176
|
+
token_ids = [token for token in token_ids if token != self.pad_token_id]
|
|
177
|
+
|
|
178
|
+
return super()._decode(
|
|
179
|
+
token_ids=token_ids,
|
|
180
|
+
skip_special_tokens=skip_special_tokens,
|
|
181
|
+
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
|
182
|
+
**kwargs,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
__all__ = ["LasrTokenizer"]
|
|
@@ -337,9 +337,9 @@ class LayoutLMEncoder(nn.Module):
|
|
|
337
337
|
all_hidden_states = all_hidden_states + (hidden_states,)
|
|
338
338
|
|
|
339
339
|
layer_outputs = layer_module(
|
|
340
|
-
hidden_states
|
|
341
|
-
attention_mask
|
|
342
|
-
output_attentions
|
|
340
|
+
hidden_states,
|
|
341
|
+
attention_mask,
|
|
342
|
+
output_attentions,
|
|
343
343
|
**kwargs,
|
|
344
344
|
)
|
|
345
345
|
|
|
@@ -431,6 +431,8 @@ class LayoutLMPreTrainedModel(PreTrainedModel):
|
|
|
431
431
|
super()._init_weights(module)
|
|
432
432
|
if isinstance(module, LayoutLMLMPredictionHead):
|
|
433
433
|
init.zeros_(module.bias)
|
|
434
|
+
elif isinstance(module, LayoutLMEmbeddings):
|
|
435
|
+
init.copy_(module.position_ids, torch.arange(module.position_ids.shape[-1]).expand((1, -1)))
|
|
434
436
|
|
|
435
437
|
|
|
436
438
|
@auto_docstring
|
|
@@ -465,6 +467,7 @@ class LayoutLMModel(LayoutLMPreTrainedModel):
|
|
|
465
467
|
output_attentions: Optional[bool] = None,
|
|
466
468
|
output_hidden_states: Optional[bool] = None,
|
|
467
469
|
return_dict: Optional[bool] = None,
|
|
470
|
+
**kwargs,
|
|
468
471
|
) -> Union[tuple, BaseModelOutputWithPooling]:
|
|
469
472
|
r"""
|
|
470
473
|
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
|
|
@@ -600,6 +603,7 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel):
|
|
|
600
603
|
output_attentions: Optional[bool] = None,
|
|
601
604
|
output_hidden_states: Optional[bool] = None,
|
|
602
605
|
return_dict: Optional[bool] = None,
|
|
606
|
+
**kwargs,
|
|
603
607
|
) -> Union[tuple, MaskedLMOutput]:
|
|
604
608
|
r"""
|
|
605
609
|
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
|
|
@@ -716,6 +720,7 @@ class LayoutLMForSequenceClassification(LayoutLMPreTrainedModel):
|
|
|
716
720
|
output_attentions: Optional[bool] = None,
|
|
717
721
|
output_hidden_states: Optional[bool] = None,
|
|
718
722
|
return_dict: Optional[bool] = None,
|
|
723
|
+
**kwargs,
|
|
719
724
|
) -> Union[tuple, SequenceClassifierOutput]:
|
|
720
725
|
r"""
|
|
721
726
|
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
|
|
@@ -850,6 +855,7 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel):
|
|
|
850
855
|
output_attentions: Optional[bool] = None,
|
|
851
856
|
output_hidden_states: Optional[bool] = None,
|
|
852
857
|
return_dict: Optional[bool] = None,
|
|
858
|
+
**kwargs,
|
|
853
859
|
) -> Union[tuple, TokenClassifierOutput]:
|
|
854
860
|
r"""
|
|
855
861
|
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
|
|
@@ -963,6 +969,7 @@ class LayoutLMForQuestionAnswering(LayoutLMPreTrainedModel):
|
|
|
963
969
|
output_attentions: Optional[bool] = None,
|
|
964
970
|
output_hidden_states: Optional[bool] = None,
|
|
965
971
|
return_dict: Optional[bool] = None,
|
|
972
|
+
**kwargs,
|
|
966
973
|
) -> Union[tuple, QuestionAnsweringModelOutput]:
|
|
967
974
|
r"""
|
|
968
975
|
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
|
|
@@ -101,7 +101,6 @@ class LayoutLMv2ImageProcessorFast(BaseImageProcessorFast):
|
|
|
101
101
|
processed_images_grouped[shape] = stacked_images
|
|
102
102
|
|
|
103
103
|
processed_images = reorder_images(processed_images_grouped, grouped_images_index)
|
|
104
|
-
processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images
|
|
105
104
|
|
|
106
105
|
data = BatchFeature(data={"pixel_values": processed_images}, tensor_type=return_tensors)
|
|
107
106
|
|
|
@@ -467,9 +467,21 @@ class LayoutLMv2PreTrainedModel(PreTrainedModel):
|
|
|
467
467
|
if self.config.fast_qkv:
|
|
468
468
|
init.zeros_(module.q_bias)
|
|
469
469
|
init.zeros_(module.v_bias)
|
|
470
|
+
elif isinstance(module, LayoutLMv2Embeddings):
|
|
471
|
+
init.copy_(module.position_ids, torch.arange(module.position_ids.shape[-1]).expand((1, -1)))
|
|
472
|
+
elif isinstance(module, LayoutLMv2VisualBackbone):
|
|
473
|
+
num_channels = len(module.cfg.MODEL.PIXEL_MEAN)
|
|
474
|
+
init.copy_(module.pixel_mean, torch.Tensor(module.cfg.MODEL.PIXEL_MEAN).view(num_channels, 1, 1))
|
|
475
|
+
init.copy_(module.pixel_std, torch.Tensor(module.cfg.MODEL.PIXEL_STD).view(num_channels, 1, 1))
|
|
470
476
|
elif isinstance(module, LayoutLMv2Model):
|
|
471
477
|
if hasattr(module, "visual_segment_embedding"):
|
|
472
478
|
init.normal_(module.visual_segment_embedding, mean=0.0, std=self.config.initializer_range)
|
|
479
|
+
# We check the existence of each one since detectron2 seems to do weird things
|
|
480
|
+
elif isinstance(module, detectron2.layers.FrozenBatchNorm2d):
|
|
481
|
+
init.ones_(module.weight)
|
|
482
|
+
init.zeros_(module.bias)
|
|
483
|
+
init.zeros_(module.running_mean)
|
|
484
|
+
init.constant_(module.running_var, 1.0 - module.eps)
|
|
473
485
|
|
|
474
486
|
|
|
475
487
|
def my_convert_sync_batchnorm(module, process_group=None):
|
|
@@ -701,6 +713,7 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
|
|
|
701
713
|
output_attentions: Optional[bool] = None,
|
|
702
714
|
output_hidden_states: Optional[bool] = None,
|
|
703
715
|
return_dict: Optional[bool] = None,
|
|
716
|
+
**kwargs,
|
|
704
717
|
) -> Union[tuple, BaseModelOutputWithPooling]:
|
|
705
718
|
r"""
|
|
706
719
|
bbox (`torch.LongTensor` of shape `((batch_size, sequence_length), 4)`, *optional*):
|
|
@@ -858,6 +871,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
|
|
|
858
871
|
output_attentions: Optional[bool] = None,
|
|
859
872
|
output_hidden_states: Optional[bool] = None,
|
|
860
873
|
return_dict: Optional[bool] = None,
|
|
874
|
+
**kwargs,
|
|
861
875
|
) -> Union[tuple, SequenceClassifierOutput]:
|
|
862
876
|
r"""
|
|
863
877
|
input_ids (`torch.LongTensor` of shape `batch_size, sequence_length`):
|
|
@@ -1061,6 +1075,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
|
|
|
1061
1075
|
output_attentions: Optional[bool] = None,
|
|
1062
1076
|
output_hidden_states: Optional[bool] = None,
|
|
1063
1077
|
return_dict: Optional[bool] = None,
|
|
1078
|
+
**kwargs,
|
|
1064
1079
|
) -> Union[tuple, TokenClassifierOutput]:
|
|
1065
1080
|
r"""
|
|
1066
1081
|
input_ids (`torch.LongTensor` of shape `batch_size, sequence_length`):
|
|
@@ -1212,6 +1227,7 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
|
|
|
1212
1227
|
output_attentions: Optional[bool] = None,
|
|
1213
1228
|
output_hidden_states: Optional[bool] = None,
|
|
1214
1229
|
return_dict: Optional[bool] = None,
|
|
1230
|
+
**kwargs,
|
|
1215
1231
|
) -> Union[tuple, QuestionAnsweringModelOutput]:
|
|
1216
1232
|
r"""
|
|
1217
1233
|
input_ids (`torch.LongTensor` of shape `batch_size, sequence_length`):
|
|
@@ -159,22 +159,12 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
|
|
|
159
159
|
"""
|
|
160
160
|
|
|
161
161
|
vocab_files_names = VOCAB_FILES_NAMES
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
@staticmethod
|
|
165
|
-
def _load_vocab_from_file(vocab_file):
|
|
166
|
-
"""Load vocab from a BERT-style vocab file (one token per line)."""
|
|
167
|
-
vocab = {}
|
|
168
|
-
with open(vocab_file, "r", encoding="utf-8") as reader:
|
|
169
|
-
for index, line in enumerate(reader):
|
|
170
|
-
token = line.rstrip("\n")
|
|
171
|
-
vocab[token] = index
|
|
172
|
-
return vocab
|
|
162
|
+
model = models.WordPiece
|
|
163
|
+
model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
|
|
173
164
|
|
|
174
165
|
def __init__(
|
|
175
166
|
self,
|
|
176
|
-
vocab=None,
|
|
177
|
-
vocab_file=None,
|
|
167
|
+
vocab: Optional[Union[str, dict[str, int]]] = None,
|
|
178
168
|
do_lower_case=True,
|
|
179
169
|
unk_token="[UNK]",
|
|
180
170
|
sep_token="[SEP]",
|
|
@@ -190,21 +180,12 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
|
|
|
190
180
|
strip_accents=None,
|
|
191
181
|
**kwargs,
|
|
192
182
|
):
|
|
193
|
-
self.vocab_file = vocab_file
|
|
194
183
|
self.do_lower_case = do_lower_case
|
|
195
184
|
|
|
196
|
-
# Build vocab for WordPiece
|
|
197
185
|
if vocab is not None:
|
|
198
|
-
|
|
199
|
-
_vocab = vocab
|
|
200
|
-
else:
|
|
201
|
-
raise ValueError("vocab must be a dict mapping tokens to ids")
|
|
202
|
-
elif vocab_file is not None:
|
|
203
|
-
# Load vocab from file (BERT format: one token per line)
|
|
204
|
-
_vocab = self._load_vocab_from_file(vocab_file)
|
|
186
|
+
self._vocab = vocab
|
|
205
187
|
else:
|
|
206
|
-
|
|
207
|
-
_vocab = {
|
|
188
|
+
self._vocab = {
|
|
208
189
|
str(pad_token): 0,
|
|
209
190
|
str(unk_token): 1,
|
|
210
191
|
str(cls_token): 2,
|
|
@@ -212,10 +193,7 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
|
|
|
212
193
|
str(mask_token): 4,
|
|
213
194
|
}
|
|
214
195
|
|
|
215
|
-
|
|
216
|
-
self._tokenizer = Tokenizer(models.WordPiece(vocab=_vocab, unk_token=str(unk_token)))
|
|
217
|
-
|
|
218
|
-
# Set normalizer
|
|
196
|
+
self._tokenizer = Tokenizer(models.WordPiece(vocab=self._vocab, unk_token=str(unk_token)))
|
|
219
197
|
self._tokenizer.normalizer = normalizers.BertNormalizer(
|
|
220
198
|
clean_text=True,
|
|
221
199
|
handle_chinese_chars=tokenize_chinese_chars,
|
|
@@ -223,27 +201,9 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
|
|
|
223
201
|
lowercase=do_lower_case,
|
|
224
202
|
)
|
|
225
203
|
|
|
226
|
-
# Set pre_tokenizer
|
|
227
204
|
self._tokenizer.pre_tokenizer = pre_tokenizers.BertPreTokenizer()
|
|
228
|
-
|
|
229
|
-
# Set decoder
|
|
230
205
|
self._tokenizer.decoder = decoders.WordPiece(prefix="##")
|
|
231
|
-
|
|
232
|
-
# Set post_processor (will be set after super().__init__ when we have token IDs)
|
|
233
|
-
# Temporarily set to None, will be configured after parent init
|
|
234
|
-
self._tokenizer.post_processor = None
|
|
235
|
-
|
|
236
|
-
tokenizer_object = self._tokenizer
|
|
237
|
-
|
|
238
|
-
# additional properties
|
|
239
|
-
self.cls_token_box = cls_token_box
|
|
240
|
-
self.sep_token_box = sep_token_box
|
|
241
|
-
self.pad_token_box = pad_token_box
|
|
242
|
-
self.pad_token_label = pad_token_label
|
|
243
|
-
self.only_label_first_subword = only_label_first_subword
|
|
244
|
-
|
|
245
206
|
super().__init__(
|
|
246
|
-
tokenizer_object=tokenizer_object,
|
|
247
207
|
do_lower_case=do_lower_case,
|
|
248
208
|
unk_token=unk_token,
|
|
249
209
|
sep_token=sep_token,
|
|
@@ -260,6 +220,11 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
|
|
|
260
220
|
**kwargs,
|
|
261
221
|
)
|
|
262
222
|
|
|
223
|
+
self.cls_token_box = cls_token_box
|
|
224
|
+
self.sep_token_box = sep_token_box
|
|
225
|
+
self.pad_token_box = pad_token_box
|
|
226
|
+
self.pad_token_label = pad_token_label
|
|
227
|
+
|
|
263
228
|
# Now set post_processor with actual token IDs
|
|
264
229
|
cls = str(self.cls_token)
|
|
265
230
|
sep = str(self.sep_token)
|
|
@@ -275,13 +240,6 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
|
|
|
275
240
|
],
|
|
276
241
|
)
|
|
277
242
|
|
|
278
|
-
# additional properties
|
|
279
|
-
self.cls_token_box = cls_token_box
|
|
280
|
-
self.sep_token_box = sep_token_box
|
|
281
|
-
self.pad_token_box = pad_token_box
|
|
282
|
-
self.pad_token_label = pad_token_label
|
|
283
|
-
self.only_label_first_subword = only_label_first_subword
|
|
284
|
-
|
|
285
243
|
@add_end_docstrings(LAYOUTLMV2_ENCODE_KWARGS_DOCSTRING, LAYOUTLMV2_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
|
|
286
244
|
def __call__(
|
|
287
245
|
self,
|
|
@@ -115,7 +115,6 @@ class LayoutLMv3ImageProcessorFast(BaseImageProcessorFast):
|
|
|
115
115
|
processed_images_grouped[shape] = stacked_images
|
|
116
116
|
|
|
117
117
|
processed_images = reorder_images(processed_images_grouped, grouped_images_index)
|
|
118
|
-
processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images
|
|
119
118
|
|
|
120
119
|
data = BatchFeature(data={"pixel_values": processed_images}, tensor_type=return_tensors)
|
|
121
120
|
|
|
@@ -212,6 +212,10 @@ class LayoutLMv3PreTrainedModel(PreTrainedModel):
|
|
|
212
212
|
if self.config.visual_embed:
|
|
213
213
|
init.zeros_(module.cls_token)
|
|
214
214
|
init.zeros_(module.pos_embed)
|
|
215
|
+
if hasattr(module, "visual_bbox"):
|
|
216
|
+
init.copy_(module.visual_bbox, module.create_visual_bbox(image_size=(module.size, module.size)))
|
|
217
|
+
elif isinstance(module, LayoutLMv3TextEmbeddings):
|
|
218
|
+
init.copy_(module.position_ids, torch.arange(module.position_ids.shape[-1]).expand((1, -1)))
|
|
215
219
|
|
|
216
220
|
|
|
217
221
|
class LayoutLMv3SelfAttention(nn.Module):
|
|
@@ -576,16 +580,18 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
|
|
|
576
580
|
# when the input_size is larger in fine-tuning, we will interpolate the position embeddings in forward
|
|
577
581
|
self.patch_embed = LayoutLMv3PatchEmbeddings(config)
|
|
578
582
|
|
|
579
|
-
size = int(config.input_size / config.patch_size)
|
|
583
|
+
self.size = int(config.input_size / config.patch_size)
|
|
580
584
|
self.cls_token = nn.Parameter(torch.zeros(1, 1, config.hidden_size))
|
|
581
|
-
self.pos_embed = nn.Parameter(torch.zeros(1, size * size + 1, config.hidden_size))
|
|
585
|
+
self.pos_embed = nn.Parameter(torch.zeros(1, self.size * self.size + 1, config.hidden_size))
|
|
582
586
|
self.pos_drop = nn.Dropout(p=0.0)
|
|
583
587
|
|
|
584
588
|
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
|
585
589
|
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
|
586
590
|
|
|
587
591
|
if self.config.has_relative_attention_bias or self.config.has_spatial_attention_bias:
|
|
588
|
-
self.
|
|
592
|
+
self.register_buffer(
|
|
593
|
+
"visual_bbox", self.create_visual_bbox(image_size=(self.size, self.size)), persistent=False
|
|
594
|
+
)
|
|
589
595
|
|
|
590
596
|
self.norm = nn.LayerNorm(config.hidden_size, eps=1e-6)
|
|
591
597
|
|
|
@@ -599,7 +605,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
|
|
|
599
605
|
def set_input_embeddings(self, value):
|
|
600
606
|
self.embeddings.word_embeddings = value
|
|
601
607
|
|
|
602
|
-
def
|
|
608
|
+
def create_visual_bbox(self, image_size=(14, 14), max_len=1000):
|
|
603
609
|
"""
|
|
604
610
|
Create the bounding boxes for the visual (patch) tokens.
|
|
605
611
|
"""
|
|
@@ -620,7 +626,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
|
|
|
620
626
|
).view(-1, 4)
|
|
621
627
|
|
|
622
628
|
cls_token_box = torch.tensor([[0 + 1, 0 + 1, max_len - 1, max_len - 1]])
|
|
623
|
-
|
|
629
|
+
return torch.cat([cls_token_box, visual_bbox], dim=0)
|
|
624
630
|
|
|
625
631
|
def calculate_visual_bbox(self, device, dtype, batch_size):
|
|
626
632
|
visual_bbox = self.visual_bbox.repeat(batch_size, 1, 1)
|
|
@@ -657,6 +663,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
|
|
|
657
663
|
output_attentions: Optional[bool] = None,
|
|
658
664
|
output_hidden_states: Optional[bool] = None,
|
|
659
665
|
return_dict: Optional[bool] = None,
|
|
666
|
+
**kwargs,
|
|
660
667
|
) -> Union[tuple, BaseModelOutput]:
|
|
661
668
|
r"""
|
|
662
669
|
input_ids (`torch.LongTensor` of shape `(batch_size, token_sequence_length)`):
|
|
@@ -883,6 +890,12 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
|
|
|
883
890
|
|
|
884
891
|
self.post_init()
|
|
885
892
|
|
|
893
|
+
def get_input_embeddings(self):
|
|
894
|
+
return self.layoutlmv3.get_input_embeddings()
|
|
895
|
+
|
|
896
|
+
def set_input_embeddings(self, value):
|
|
897
|
+
self.layoutlmv3.set_input_embeddings(value)
|
|
898
|
+
|
|
886
899
|
@auto_docstring
|
|
887
900
|
def forward(
|
|
888
901
|
self,
|
|
@@ -897,6 +910,7 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
|
|
|
897
910
|
output_hidden_states: Optional[bool] = None,
|
|
898
911
|
return_dict: Optional[bool] = None,
|
|
899
912
|
pixel_values: Optional[torch.LongTensor] = None,
|
|
913
|
+
**kwargs,
|
|
900
914
|
) -> Union[tuple, TokenClassifierOutput]:
|
|
901
915
|
r"""
|
|
902
916
|
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
|
|
@@ -982,6 +996,12 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
|
|
|
982
996
|
|
|
983
997
|
self.post_init()
|
|
984
998
|
|
|
999
|
+
def get_input_embeddings(self):
|
|
1000
|
+
return self.layoutlmv3.get_input_embeddings()
|
|
1001
|
+
|
|
1002
|
+
def set_input_embeddings(self, value):
|
|
1003
|
+
self.layoutlmv3.set_input_embeddings(value)
|
|
1004
|
+
|
|
985
1005
|
@auto_docstring
|
|
986
1006
|
def forward(
|
|
987
1007
|
self,
|
|
@@ -997,6 +1017,7 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
|
|
|
997
1017
|
return_dict: Optional[bool] = None,
|
|
998
1018
|
bbox: Optional[torch.LongTensor] = None,
|
|
999
1019
|
pixel_values: Optional[torch.LongTensor] = None,
|
|
1020
|
+
**kwargs,
|
|
1000
1021
|
) -> Union[tuple, QuestionAnsweringModelOutput]:
|
|
1001
1022
|
r"""
|
|
1002
1023
|
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
|
|
@@ -1101,6 +1122,12 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
|
|
|
1101
1122
|
|
|
1102
1123
|
self.post_init()
|
|
1103
1124
|
|
|
1125
|
+
def get_input_embeddings(self):
|
|
1126
|
+
return self.layoutlmv3.get_input_embeddings()
|
|
1127
|
+
|
|
1128
|
+
def set_input_embeddings(self, value):
|
|
1129
|
+
self.layoutlmv3.set_input_embeddings(value)
|
|
1130
|
+
|
|
1104
1131
|
@auto_docstring
|
|
1105
1132
|
def forward(
|
|
1106
1133
|
self,
|
|
@@ -1115,6 +1142,7 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
|
|
|
1115
1142
|
return_dict: Optional[bool] = None,
|
|
1116
1143
|
bbox: Optional[torch.LongTensor] = None,
|
|
1117
1144
|
pixel_values: Optional[torch.LongTensor] = None,
|
|
1145
|
+
**kwargs,
|
|
1118
1146
|
) -> Union[tuple, SequenceClassifierOutput]:
|
|
1119
1147
|
r"""
|
|
1120
1148
|
bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Tokenization class for LayoutLMv3. Same as LayoutLMv2, but RoBERTa-like BPE tokenization instead of WordPiece."""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
from typing import Optional, Union
|
|
19
18
|
|
|
20
19
|
from tokenizers import Tokenizer, decoders, models, pre_tokenizers, processors
|
|
@@ -159,15 +158,16 @@ class LayoutLMv3Tokenizer(TokenizersBackend):
|
|
|
159
158
|
CrossEntropyLoss.
|
|
160
159
|
only_label_first_subword (`bool`, *optional*, defaults to `True`):
|
|
161
160
|
Whether or not to only label the first subword, in case word labels are provided.
|
|
162
|
-
vocab (`dict`, *optional*):
|
|
163
|
-
Custom vocabulary dictionary. If not provided, vocabulary is loaded from vocab_file when using
|
|
164
|
-
|
|
165
|
-
|
|
161
|
+
vocab (`str` or `dict[str, int]`, *optional*):
|
|
162
|
+
Custom vocabulary dictionary. If not provided, vocabulary is loaded from `vocab_file` when using
|
|
163
|
+
`from_pretrained`.
|
|
164
|
+
merges (`str` or `list[str]`, *optional*):
|
|
165
|
+
Custom merges list. If not provided, merges are loaded from `merges_file` when using `from_pretrained`.
|
|
166
166
|
"""
|
|
167
167
|
|
|
168
168
|
vocab_files_names = VOCAB_FILES_NAMES
|
|
169
169
|
model_input_names = ["input_ids", "attention_mask", "bbox"]
|
|
170
|
-
|
|
170
|
+
model = models.BPE
|
|
171
171
|
|
|
172
172
|
def __init__(
|
|
173
173
|
self,
|
|
@@ -185,69 +185,26 @@ class LayoutLMv3Tokenizer(TokenizersBackend):
|
|
|
185
185
|
pad_token_box=[0, 0, 0, 0],
|
|
186
186
|
pad_token_label=-100,
|
|
187
187
|
only_label_first_subword=True,
|
|
188
|
-
vocab: Optional[dict] = None,
|
|
189
|
-
merges: Optional[list] = None,
|
|
190
|
-
vocab_file: Optional[str] = None,
|
|
191
|
-
merges_file: Optional[str] = None,
|
|
188
|
+
vocab: Optional[Union[str, dict[str, int]]] = None,
|
|
189
|
+
merges: Optional[Union[str, list[str]]] = None,
|
|
192
190
|
**kwargs,
|
|
193
191
|
):
|
|
194
192
|
self.add_prefix_space = add_prefix_space
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
# Priority: 1) vocab/merges dicts/lists, 2) vocab_file/merges_file paths, 3) empty
|
|
198
|
-
if vocab is not None:
|
|
199
|
-
_vocab = vocab
|
|
200
|
-
elif vocab_file is not None:
|
|
201
|
-
with open(vocab_file, encoding="utf-8") as f:
|
|
202
|
-
_vocab = json.load(f)
|
|
203
|
-
else:
|
|
204
|
-
_vocab = {}
|
|
205
|
-
|
|
206
|
-
if merges is not None:
|
|
207
|
-
_merges = merges
|
|
208
|
-
elif merges_file is not None:
|
|
209
|
-
_merges = []
|
|
210
|
-
with open(merges_file, encoding="utf-8") as f:
|
|
211
|
-
for line in f:
|
|
212
|
-
line = line.strip()
|
|
213
|
-
if line and not line.startswith("#"):
|
|
214
|
-
_merges.append(tuple(line.split()))
|
|
215
|
-
else:
|
|
216
|
-
_merges = []
|
|
217
|
-
|
|
218
|
-
# Initialize BPE tokenizer
|
|
193
|
+
self._vocab = vocab or {}
|
|
194
|
+
self._merges = merges or []
|
|
219
195
|
self._tokenizer = Tokenizer(
|
|
220
196
|
models.BPE(
|
|
221
|
-
vocab=_vocab,
|
|
222
|
-
merges=_merges,
|
|
197
|
+
vocab=self._vocab,
|
|
198
|
+
merges=self._merges,
|
|
223
199
|
dropout=None,
|
|
224
200
|
continuing_subword_prefix="",
|
|
225
201
|
end_of_word_suffix="",
|
|
226
202
|
fuse_unk=False,
|
|
227
203
|
)
|
|
228
204
|
)
|
|
229
|
-
|
|
230
|
-
# Set pre_tokenizer (ByteLevel)
|
|
231
205
|
self._tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=add_prefix_space)
|
|
232
|
-
|
|
233
|
-
# Set decoder
|
|
234
206
|
self._tokenizer.decoder = decoders.ByteLevel()
|
|
235
|
-
|
|
236
|
-
# Set post_processor (will be set after super().__init__ when we have token IDs)
|
|
237
|
-
# Temporarily set to None, will be configured after parent init
|
|
238
|
-
self._tokenizer.post_processor = None
|
|
239
|
-
|
|
240
|
-
tokenizer_object = self._tokenizer
|
|
241
|
-
|
|
242
|
-
# additional properties
|
|
243
|
-
self.cls_token_box = cls_token_box
|
|
244
|
-
self.sep_token_box = sep_token_box
|
|
245
|
-
self.pad_token_box = pad_token_box
|
|
246
|
-
self.pad_token_label = pad_token_label
|
|
247
|
-
self.only_label_first_subword = only_label_first_subword
|
|
248
|
-
|
|
249
207
|
super().__init__(
|
|
250
|
-
tokenizer_object=tokenizer_object,
|
|
251
208
|
errors=errors,
|
|
252
209
|
bos_token=bos_token,
|
|
253
210
|
eos_token=eos_token,
|
|
@@ -277,18 +234,12 @@ class LayoutLMv3Tokenizer(TokenizersBackend):
|
|
|
277
234
|
add_prefix_space=add_prefix_space,
|
|
278
235
|
trim_offsets=True,
|
|
279
236
|
)
|
|
280
|
-
|
|
281
|
-
# additional properties
|
|
282
237
|
self.cls_token_box = cls_token_box
|
|
283
238
|
self.sep_token_box = sep_token_box
|
|
284
239
|
self.pad_token_box = pad_token_box
|
|
285
240
|
self.pad_token_label = pad_token_label
|
|
286
241
|
self.only_label_first_subword = only_label_first_subword
|
|
287
242
|
|
|
288
|
-
# Call _post_init for tokenizers created directly (not from_pretrained)
|
|
289
|
-
# For from_pretrained, this will be called again after loading the tokenizer from file
|
|
290
|
-
self._post_init()
|
|
291
|
-
|
|
292
243
|
@add_end_docstrings(LAYOUTLMV3_ENCODE_KWARGS_DOCSTRING, LAYOUTLMV3_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
|
|
293
244
|
def __call__(
|
|
294
245
|
self,
|