transformers 5.0.0rc3__py3-none-any.whl → 5.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +4 -11
- transformers/activations.py +2 -2
- transformers/backbone_utils.py +326 -0
- transformers/cache_utils.py +11 -2
- transformers/cli/serve.py +11 -8
- transformers/configuration_utils.py +1 -69
- transformers/conversion_mapping.py +146 -26
- transformers/convert_slow_tokenizer.py +6 -4
- transformers/core_model_loading.py +207 -118
- transformers/dependency_versions_check.py +0 -1
- transformers/dependency_versions_table.py +7 -8
- transformers/file_utils.py +0 -2
- transformers/generation/candidate_generator.py +1 -2
- transformers/generation/continuous_batching/cache.py +40 -38
- transformers/generation/continuous_batching/cache_manager.py +3 -16
- transformers/generation/continuous_batching/continuous_api.py +94 -406
- transformers/generation/continuous_batching/input_ouputs.py +464 -0
- transformers/generation/continuous_batching/requests.py +54 -17
- transformers/generation/continuous_batching/scheduler.py +77 -95
- transformers/generation/logits_process.py +10 -5
- transformers/generation/stopping_criteria.py +1 -2
- transformers/generation/utils.py +75 -95
- transformers/image_processing_utils.py +0 -3
- transformers/image_processing_utils_fast.py +17 -18
- transformers/image_transforms.py +44 -13
- transformers/image_utils.py +0 -5
- transformers/initialization.py +57 -0
- transformers/integrations/__init__.py +10 -24
- transformers/integrations/accelerate.py +47 -11
- transformers/integrations/deepspeed.py +145 -3
- transformers/integrations/executorch.py +2 -6
- transformers/integrations/finegrained_fp8.py +142 -7
- transformers/integrations/flash_attention.py +2 -7
- transformers/integrations/hub_kernels.py +18 -7
- transformers/integrations/moe.py +226 -106
- transformers/integrations/mxfp4.py +47 -34
- transformers/integrations/peft.py +488 -176
- transformers/integrations/tensor_parallel.py +641 -581
- transformers/masking_utils.py +153 -9
- transformers/modeling_flash_attention_utils.py +1 -2
- transformers/modeling_utils.py +359 -358
- transformers/models/__init__.py +6 -0
- transformers/models/afmoe/configuration_afmoe.py +14 -4
- transformers/models/afmoe/modeling_afmoe.py +8 -8
- transformers/models/afmoe/modular_afmoe.py +7 -7
- transformers/models/aimv2/configuration_aimv2.py +2 -7
- transformers/models/aimv2/modeling_aimv2.py +26 -24
- transformers/models/aimv2/modular_aimv2.py +8 -12
- transformers/models/albert/configuration_albert.py +8 -1
- transformers/models/albert/modeling_albert.py +3 -3
- transformers/models/align/configuration_align.py +8 -5
- transformers/models/align/modeling_align.py +22 -24
- transformers/models/altclip/configuration_altclip.py +4 -6
- transformers/models/altclip/modeling_altclip.py +30 -26
- transformers/models/apertus/configuration_apertus.py +5 -7
- transformers/models/apertus/modeling_apertus.py +4 -4
- transformers/models/apertus/modular_apertus.py +8 -10
- transformers/models/arcee/configuration_arcee.py +5 -7
- transformers/models/arcee/modeling_arcee.py +4 -4
- transformers/models/aria/configuration_aria.py +11 -21
- transformers/models/aria/modeling_aria.py +39 -36
- transformers/models/aria/modular_aria.py +33 -39
- transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +3 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +39 -30
- transformers/models/audioflamingo3/modular_audioflamingo3.py +41 -27
- transformers/models/auto/auto_factory.py +8 -6
- transformers/models/auto/configuration_auto.py +22 -0
- transformers/models/auto/image_processing_auto.py +17 -13
- transformers/models/auto/modeling_auto.py +15 -0
- transformers/models/auto/processing_auto.py +9 -18
- transformers/models/auto/tokenization_auto.py +17 -15
- transformers/models/autoformer/modeling_autoformer.py +2 -1
- transformers/models/aya_vision/configuration_aya_vision.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +29 -62
- transformers/models/aya_vision/modular_aya_vision.py +20 -45
- transformers/models/bamba/configuration_bamba.py +17 -7
- transformers/models/bamba/modeling_bamba.py +23 -55
- transformers/models/bamba/modular_bamba.py +19 -54
- transformers/models/bark/configuration_bark.py +2 -1
- transformers/models/bark/modeling_bark.py +24 -10
- transformers/models/bart/configuration_bart.py +9 -4
- transformers/models/bart/modeling_bart.py +9 -12
- transformers/models/beit/configuration_beit.py +2 -4
- transformers/models/beit/image_processing_beit_fast.py +3 -3
- transformers/models/beit/modeling_beit.py +14 -9
- transformers/models/bert/configuration_bert.py +12 -1
- transformers/models/bert/modeling_bert.py +6 -30
- transformers/models/bert_generation/configuration_bert_generation.py +17 -1
- transformers/models/bert_generation/modeling_bert_generation.py +6 -6
- transformers/models/big_bird/configuration_big_bird.py +12 -8
- transformers/models/big_bird/modeling_big_bird.py +0 -15
- transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +9 -8
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +9 -7
- transformers/models/biogpt/configuration_biogpt.py +8 -1
- transformers/models/biogpt/modeling_biogpt.py +4 -8
- transformers/models/biogpt/modular_biogpt.py +1 -5
- transformers/models/bit/configuration_bit.py +2 -4
- transformers/models/bit/modeling_bit.py +6 -5
- transformers/models/bitnet/configuration_bitnet.py +5 -7
- transformers/models/bitnet/modeling_bitnet.py +3 -4
- transformers/models/bitnet/modular_bitnet.py +3 -4
- transformers/models/blenderbot/configuration_blenderbot.py +8 -4
- transformers/models/blenderbot/modeling_blenderbot.py +4 -4
- transformers/models/blenderbot_small/configuration_blenderbot_small.py +8 -4
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +4 -4
- transformers/models/blip/configuration_blip.py +9 -9
- transformers/models/blip/modeling_blip.py +55 -37
- transformers/models/blip_2/configuration_blip_2.py +2 -1
- transformers/models/blip_2/modeling_blip_2.py +81 -56
- transformers/models/bloom/configuration_bloom.py +5 -1
- transformers/models/bloom/modeling_bloom.py +2 -1
- transformers/models/blt/configuration_blt.py +23 -12
- transformers/models/blt/modeling_blt.py +20 -14
- transformers/models/blt/modular_blt.py +70 -10
- transformers/models/bridgetower/configuration_bridgetower.py +7 -1
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +6 -6
- transformers/models/bridgetower/modeling_bridgetower.py +29 -15
- transformers/models/bros/configuration_bros.py +24 -17
- transformers/models/camembert/configuration_camembert.py +8 -1
- transformers/models/camembert/modeling_camembert.py +6 -6
- transformers/models/canine/configuration_canine.py +4 -1
- transformers/models/chameleon/configuration_chameleon.py +5 -7
- transformers/models/chameleon/image_processing_chameleon_fast.py +5 -5
- transformers/models/chameleon/modeling_chameleon.py +82 -36
- transformers/models/chinese_clip/configuration_chinese_clip.py +10 -7
- transformers/models/chinese_clip/modeling_chinese_clip.py +28 -29
- transformers/models/clap/configuration_clap.py +4 -8
- transformers/models/clap/modeling_clap.py +21 -22
- transformers/models/clip/configuration_clip.py +4 -1
- transformers/models/clip/image_processing_clip_fast.py +9 -0
- transformers/models/clip/modeling_clip.py +25 -22
- transformers/models/clipseg/configuration_clipseg.py +4 -1
- transformers/models/clipseg/modeling_clipseg.py +27 -25
- transformers/models/clipseg/processing_clipseg.py +11 -3
- transformers/models/clvp/configuration_clvp.py +14 -2
- transformers/models/clvp/modeling_clvp.py +19 -30
- transformers/models/codegen/configuration_codegen.py +4 -3
- transformers/models/codegen/modeling_codegen.py +2 -1
- transformers/models/cohere/configuration_cohere.py +5 -7
- transformers/models/cohere/modeling_cohere.py +4 -4
- transformers/models/cohere/modular_cohere.py +3 -3
- transformers/models/cohere2/configuration_cohere2.py +6 -8
- transformers/models/cohere2/modeling_cohere2.py +4 -4
- transformers/models/cohere2/modular_cohere2.py +9 -11
- transformers/models/cohere2_vision/configuration_cohere2_vision.py +5 -1
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +3 -3
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +24 -25
- transformers/models/cohere2_vision/modular_cohere2_vision.py +20 -20
- transformers/models/colqwen2/modeling_colqwen2.py +7 -6
- transformers/models/colqwen2/modular_colqwen2.py +7 -6
- transformers/models/conditional_detr/configuration_conditional_detr.py +19 -46
- transformers/models/conditional_detr/image_processing_conditional_detr.py +3 -4
- transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +28 -14
- transformers/models/conditional_detr/modeling_conditional_detr.py +794 -942
- transformers/models/conditional_detr/modular_conditional_detr.py +901 -3
- transformers/models/convbert/configuration_convbert.py +11 -7
- transformers/models/convnext/configuration_convnext.py +2 -4
- transformers/models/convnext/image_processing_convnext_fast.py +2 -2
- transformers/models/convnext/modeling_convnext.py +7 -6
- transformers/models/convnextv2/configuration_convnextv2.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +7 -6
- transformers/models/cpmant/configuration_cpmant.py +4 -0
- transformers/models/csm/configuration_csm.py +9 -15
- transformers/models/csm/modeling_csm.py +3 -3
- transformers/models/ctrl/configuration_ctrl.py +16 -0
- transformers/models/ctrl/modeling_ctrl.py +13 -25
- transformers/models/cwm/configuration_cwm.py +5 -7
- transformers/models/cwm/modeling_cwm.py +4 -4
- transformers/models/d_fine/configuration_d_fine.py +10 -56
- transformers/models/d_fine/modeling_d_fine.py +728 -868
- transformers/models/d_fine/modular_d_fine.py +335 -412
- transformers/models/dab_detr/configuration_dab_detr.py +22 -48
- transformers/models/dab_detr/modeling_dab_detr.py +11 -7
- transformers/models/dac/modeling_dac.py +1 -1
- transformers/models/data2vec/configuration_data2vec_audio.py +4 -1
- transformers/models/data2vec/configuration_data2vec_text.py +11 -2
- transformers/models/data2vec/modeling_data2vec_audio.py +3 -3
- transformers/models/data2vec/modeling_data2vec_text.py +6 -6
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -2
- transformers/models/dbrx/configuration_dbrx.py +11 -3
- transformers/models/dbrx/modeling_dbrx.py +6 -6
- transformers/models/dbrx/modular_dbrx.py +6 -6
- transformers/models/deberta/configuration_deberta.py +6 -0
- transformers/models/deberta_v2/configuration_deberta_v2.py +6 -0
- transformers/models/decision_transformer/configuration_decision_transformer.py +3 -1
- transformers/models/decision_transformer/modeling_decision_transformer.py +3 -3
- transformers/models/deepseek_v2/configuration_deepseek_v2.py +7 -10
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +7 -8
- transformers/models/deepseek_v2/modular_deepseek_v2.py +8 -10
- transformers/models/deepseek_v3/configuration_deepseek_v3.py +7 -10
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +7 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +6 -5
- transformers/models/deepseek_vl/configuration_deepseek_vl.py +4 -0
- transformers/models/deepseek_vl/image_processing_deepseek_vl.py +2 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +5 -5
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +17 -12
- transformers/models/deepseek_vl/modular_deepseek_vl.py +4 -0
- transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +4 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +2 -2
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +6 -6
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +68 -24
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +70 -19
- transformers/models/deformable_detr/configuration_deformable_detr.py +22 -45
- transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +25 -11
- transformers/models/deformable_detr/modeling_deformable_detr.py +410 -607
- transformers/models/deformable_detr/modular_deformable_detr.py +1385 -3
- transformers/models/deit/modeling_deit.py +11 -7
- transformers/models/depth_anything/configuration_depth_anything.py +12 -42
- transformers/models/depth_anything/modeling_depth_anything.py +5 -3
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +2 -2
- transformers/models/depth_pro/modeling_depth_pro.py +8 -4
- transformers/models/detr/configuration_detr.py +18 -49
- transformers/models/detr/image_processing_detr_fast.py +11 -11
- transformers/models/detr/modeling_detr.py +695 -734
- transformers/models/dia/configuration_dia.py +4 -7
- transformers/models/dia/generation_dia.py +8 -17
- transformers/models/dia/modeling_dia.py +7 -7
- transformers/models/dia/modular_dia.py +4 -4
- transformers/models/diffllama/configuration_diffllama.py +5 -7
- transformers/models/diffllama/modeling_diffllama.py +3 -8
- transformers/models/diffllama/modular_diffllama.py +2 -7
- transformers/models/dinat/configuration_dinat.py +2 -4
- transformers/models/dinat/modeling_dinat.py +7 -6
- transformers/models/dinov2/configuration_dinov2.py +2 -4
- transformers/models/dinov2/modeling_dinov2.py +9 -8
- transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +2 -4
- transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +9 -8
- transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +6 -7
- transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +2 -4
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +2 -3
- transformers/models/dinov3_vit/configuration_dinov3_vit.py +2 -4
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +2 -2
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -6
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -6
- transformers/models/distilbert/configuration_distilbert.py +8 -1
- transformers/models/distilbert/modeling_distilbert.py +3 -3
- transformers/models/doge/configuration_doge.py +17 -7
- transformers/models/doge/modeling_doge.py +4 -4
- transformers/models/doge/modular_doge.py +20 -10
- transformers/models/donut/image_processing_donut_fast.py +4 -4
- transformers/models/dots1/configuration_dots1.py +16 -7
- transformers/models/dots1/modeling_dots1.py +4 -4
- transformers/models/dpr/configuration_dpr.py +19 -1
- transformers/models/dpt/configuration_dpt.py +23 -65
- transformers/models/dpt/image_processing_dpt_fast.py +5 -5
- transformers/models/dpt/modeling_dpt.py +19 -15
- transformers/models/dpt/modular_dpt.py +4 -4
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +53 -53
- transformers/models/edgetam/modular_edgetam.py +5 -7
- transformers/models/edgetam_video/modeling_edgetam_video.py +55 -56
- transformers/models/edgetam_video/modular_edgetam_video.py +9 -9
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +4 -3
- transformers/models/efficientloftr/modeling_efficientloftr.py +19 -9
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +2 -2
- transformers/models/electra/configuration_electra.py +13 -2
- transformers/models/electra/modeling_electra.py +6 -6
- transformers/models/emu3/configuration_emu3.py +12 -10
- transformers/models/emu3/modeling_emu3.py +84 -47
- transformers/models/emu3/modular_emu3.py +77 -39
- transformers/models/encoder_decoder/configuration_encoder_decoder.py +12 -1
- transformers/models/encoder_decoder/modeling_encoder_decoder.py +20 -24
- transformers/models/eomt/configuration_eomt.py +12 -13
- transformers/models/eomt/image_processing_eomt_fast.py +3 -3
- transformers/models/eomt/modeling_eomt.py +3 -3
- transformers/models/eomt/modular_eomt.py +17 -17
- transformers/models/eomt_dinov3/__init__.py +28 -0
- transformers/models/eomt_dinov3/configuration_eomt_dinov3.py +204 -0
- transformers/models/eomt_dinov3/modeling_eomt_dinov3.py +1376 -0
- transformers/models/eomt_dinov3/modular_eomt_dinov3.py +454 -0
- transformers/models/ernie/configuration_ernie.py +24 -2
- transformers/models/ernie/modeling_ernie.py +6 -30
- transformers/models/ernie4_5/configuration_ernie4_5.py +5 -7
- transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
- transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +7 -10
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +4 -4
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +17 -6
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +229 -188
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +79 -55
- transformers/models/esm/configuration_esm.py +9 -11
- transformers/models/esm/modeling_esm.py +3 -3
- transformers/models/esm/modeling_esmfold.py +1 -6
- transformers/models/esm/openfold_utils/protein.py +2 -3
- transformers/models/evolla/configuration_evolla.py +21 -8
- transformers/models/evolla/modeling_evolla.py +11 -7
- transformers/models/evolla/modular_evolla.py +5 -1
- transformers/models/exaone4/configuration_exaone4.py +8 -5
- transformers/models/exaone4/modeling_exaone4.py +4 -4
- transformers/models/exaone4/modular_exaone4.py +11 -8
- transformers/models/exaone_moe/__init__.py +27 -0
- transformers/models/exaone_moe/configuration_exaone_moe.py +235 -0
- transformers/models/exaone_moe/modeling_exaone_moe.py +665 -0
- transformers/models/exaone_moe/modular_exaone_moe.py +373 -0
- transformers/models/falcon/configuration_falcon.py +9 -1
- transformers/models/falcon/modeling_falcon.py +3 -8
- transformers/models/falcon_h1/configuration_falcon_h1.py +17 -8
- transformers/models/falcon_h1/modeling_falcon_h1.py +22 -54
- transformers/models/falcon_h1/modular_falcon_h1.py +21 -52
- transformers/models/falcon_mamba/configuration_falcon_mamba.py +5 -1
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +18 -26
- transformers/models/falcon_mamba/modular_falcon_mamba.py +4 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +10 -1
- transformers/models/fast_vlm/modeling_fast_vlm.py +37 -64
- transformers/models/fast_vlm/modular_fast_vlm.py +146 -35
- transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +0 -1
- transformers/models/flaubert/configuration_flaubert.py +10 -4
- transformers/models/flaubert/modeling_flaubert.py +1 -1
- transformers/models/flava/configuration_flava.py +4 -3
- transformers/models/flava/image_processing_flava_fast.py +4 -4
- transformers/models/flava/modeling_flava.py +36 -28
- transformers/models/flex_olmo/configuration_flex_olmo.py +11 -14
- transformers/models/flex_olmo/modeling_flex_olmo.py +4 -4
- transformers/models/flex_olmo/modular_flex_olmo.py +11 -14
- transformers/models/florence2/configuration_florence2.py +4 -0
- transformers/models/florence2/modeling_florence2.py +57 -32
- transformers/models/florence2/modular_florence2.py +48 -26
- transformers/models/fnet/configuration_fnet.py +6 -1
- transformers/models/focalnet/configuration_focalnet.py +2 -4
- transformers/models/focalnet/modeling_focalnet.py +10 -7
- transformers/models/fsmt/configuration_fsmt.py +12 -16
- transformers/models/funnel/configuration_funnel.py +8 -0
- transformers/models/fuyu/configuration_fuyu.py +5 -8
- transformers/models/fuyu/image_processing_fuyu_fast.py +5 -4
- transformers/models/fuyu/modeling_fuyu.py +24 -23
- transformers/models/gemma/configuration_gemma.py +5 -7
- transformers/models/gemma/modeling_gemma.py +4 -4
- transformers/models/gemma/modular_gemma.py +5 -7
- transformers/models/gemma2/configuration_gemma2.py +5 -7
- transformers/models/gemma2/modeling_gemma2.py +4 -4
- transformers/models/gemma2/modular_gemma2.py +8 -10
- transformers/models/gemma3/configuration_gemma3.py +28 -22
- transformers/models/gemma3/image_processing_gemma3_fast.py +2 -2
- transformers/models/gemma3/modeling_gemma3.py +37 -33
- transformers/models/gemma3/modular_gemma3.py +46 -42
- transformers/models/gemma3n/configuration_gemma3n.py +35 -22
- transformers/models/gemma3n/modeling_gemma3n.py +86 -58
- transformers/models/gemma3n/modular_gemma3n.py +112 -75
- transformers/models/git/configuration_git.py +5 -7
- transformers/models/git/modeling_git.py +31 -41
- transformers/models/glm/configuration_glm.py +7 -9
- transformers/models/glm/modeling_glm.py +4 -4
- transformers/models/glm4/configuration_glm4.py +7 -9
- transformers/models/glm4/modeling_glm4.py +4 -4
- transformers/models/glm46v/configuration_glm46v.py +4 -0
- transformers/models/glm46v/image_processing_glm46v.py +5 -2
- transformers/models/glm46v/image_processing_glm46v_fast.py +2 -2
- transformers/models/glm46v/modeling_glm46v.py +91 -46
- transformers/models/glm46v/modular_glm46v.py +4 -0
- transformers/models/glm4_moe/configuration_glm4_moe.py +17 -7
- transformers/models/glm4_moe/modeling_glm4_moe.py +4 -4
- transformers/models/glm4_moe/modular_glm4_moe.py +17 -7
- transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +8 -10
- transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +7 -7
- transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +8 -10
- transformers/models/glm4v/configuration_glm4v.py +12 -8
- transformers/models/glm4v/image_processing_glm4v.py +5 -2
- transformers/models/glm4v/image_processing_glm4v_fast.py +2 -2
- transformers/models/glm4v/modeling_glm4v.py +120 -63
- transformers/models/glm4v/modular_glm4v.py +82 -50
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +18 -6
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +115 -63
- transformers/models/glm4v_moe/modular_glm4v_moe.py +23 -12
- transformers/models/glm_image/configuration_glm_image.py +26 -20
- transformers/models/glm_image/image_processing_glm_image.py +1 -1
- transformers/models/glm_image/image_processing_glm_image_fast.py +5 -7
- transformers/models/glm_image/modeling_glm_image.py +337 -236
- transformers/models/glm_image/modular_glm_image.py +415 -255
- transformers/models/glm_image/processing_glm_image.py +65 -17
- transformers/{pipelines/deprecated → models/glm_ocr}/__init__.py +15 -2
- transformers/models/glm_ocr/configuration_glm_ocr.py +312 -0
- transformers/models/glm_ocr/modeling_glm_ocr.py +1633 -0
- transformers/models/glm_ocr/modular_glm_ocr.py +428 -0
- transformers/models/glmasr/modeling_glmasr.py +34 -28
- transformers/models/glmasr/modular_glmasr.py +23 -11
- transformers/models/glpn/image_processing_glpn_fast.py +3 -3
- transformers/models/glpn/modeling_glpn.py +4 -2
- transformers/models/got_ocr2/configuration_got_ocr2.py +6 -6
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +3 -3
- transformers/models/got_ocr2/modeling_got_ocr2.py +31 -37
- transformers/models/got_ocr2/modular_got_ocr2.py +30 -19
- transformers/models/gpt2/configuration_gpt2.py +13 -1
- transformers/models/gpt2/modeling_gpt2.py +5 -5
- transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +7 -1
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +5 -4
- transformers/models/gpt_neo/configuration_gpt_neo.py +9 -1
- transformers/models/gpt_neo/modeling_gpt_neo.py +3 -7
- transformers/models/gpt_neox/configuration_gpt_neox.py +8 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +4 -4
- transformers/models/gpt_neox/modular_gpt_neox.py +4 -4
- transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +9 -1
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +2 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +10 -6
- transformers/models/gpt_oss/modeling_gpt_oss.py +46 -79
- transformers/models/gpt_oss/modular_gpt_oss.py +45 -78
- transformers/models/gptj/configuration_gptj.py +4 -4
- transformers/models/gptj/modeling_gptj.py +3 -7
- transformers/models/granite/configuration_granite.py +5 -7
- transformers/models/granite/modeling_granite.py +4 -4
- transformers/models/granite_speech/modeling_granite_speech.py +63 -37
- transformers/models/granitemoe/configuration_granitemoe.py +5 -7
- transformers/models/granitemoe/modeling_granitemoe.py +4 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +17 -7
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +22 -54
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +39 -45
- transformers/models/granitemoeshared/configuration_granitemoeshared.py +6 -7
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -4
- transformers/models/grounding_dino/configuration_grounding_dino.py +10 -45
- transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +11 -11
- transformers/models/grounding_dino/modeling_grounding_dino.py +68 -86
- transformers/models/groupvit/configuration_groupvit.py +4 -1
- transformers/models/groupvit/modeling_groupvit.py +29 -22
- transformers/models/helium/configuration_helium.py +5 -7
- transformers/models/helium/modeling_helium.py +4 -4
- transformers/models/hgnet_v2/configuration_hgnet_v2.py +2 -4
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -5
- transformers/models/hgnet_v2/modular_hgnet_v2.py +7 -8
- transformers/models/hiera/configuration_hiera.py +2 -4
- transformers/models/hiera/modeling_hiera.py +11 -8
- transformers/models/hubert/configuration_hubert.py +4 -1
- transformers/models/hubert/modeling_hubert.py +7 -4
- transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +5 -7
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +28 -4
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +28 -6
- transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +6 -8
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +22 -9
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +22 -8
- transformers/models/ibert/configuration_ibert.py +4 -1
- transformers/models/idefics/configuration_idefics.py +5 -7
- transformers/models/idefics/modeling_idefics.py +3 -4
- transformers/models/idefics/vision.py +5 -4
- transformers/models/idefics2/configuration_idefics2.py +1 -2
- transformers/models/idefics2/image_processing_idefics2_fast.py +1 -0
- transformers/models/idefics2/modeling_idefics2.py +72 -50
- transformers/models/idefics3/configuration_idefics3.py +1 -3
- transformers/models/idefics3/image_processing_idefics3_fast.py +29 -3
- transformers/models/idefics3/modeling_idefics3.py +63 -40
- transformers/models/ijepa/modeling_ijepa.py +3 -3
- transformers/models/imagegpt/configuration_imagegpt.py +9 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +2 -2
- transformers/models/imagegpt/modeling_imagegpt.py +8 -4
- transformers/models/informer/modeling_informer.py +3 -3
- transformers/models/instructblip/configuration_instructblip.py +2 -1
- transformers/models/instructblip/modeling_instructblip.py +65 -39
- transformers/models/instructblipvideo/configuration_instructblipvideo.py +2 -1
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +60 -57
- transformers/models/instructblipvideo/modular_instructblipvideo.py +43 -32
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +2 -2
- transformers/models/internvl/configuration_internvl.py +5 -0
- transformers/models/internvl/modeling_internvl.py +35 -55
- transformers/models/internvl/modular_internvl.py +26 -38
- transformers/models/internvl/video_processing_internvl.py +2 -2
- transformers/models/jais2/configuration_jais2.py +5 -7
- transformers/models/jais2/modeling_jais2.py +4 -4
- transformers/models/jamba/configuration_jamba.py +5 -7
- transformers/models/jamba/modeling_jamba.py +4 -4
- transformers/models/jamba/modular_jamba.py +3 -3
- transformers/models/janus/image_processing_janus.py +2 -2
- transformers/models/janus/image_processing_janus_fast.py +8 -8
- transformers/models/janus/modeling_janus.py +63 -146
- transformers/models/janus/modular_janus.py +62 -20
- transformers/models/jetmoe/configuration_jetmoe.py +6 -4
- transformers/models/jetmoe/modeling_jetmoe.py +3 -3
- transformers/models/jetmoe/modular_jetmoe.py +3 -3
- transformers/models/kosmos2/configuration_kosmos2.py +10 -8
- transformers/models/kosmos2/modeling_kosmos2.py +56 -34
- transformers/models/kosmos2_5/configuration_kosmos2_5.py +8 -8
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +54 -63
- transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +8 -3
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +44 -40
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +1 -1
- transformers/models/lasr/configuration_lasr.py +2 -4
- transformers/models/lasr/modeling_lasr.py +3 -3
- transformers/models/lasr/modular_lasr.py +3 -3
- transformers/models/layoutlm/configuration_layoutlm.py +14 -1
- transformers/models/layoutlm/modeling_layoutlm.py +3 -3
- transformers/models/layoutlmv2/configuration_layoutlmv2.py +14 -16
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +2 -2
- transformers/models/layoutlmv3/configuration_layoutlmv3.py +16 -18
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +2 -2
- transformers/models/layoutxlm/configuration_layoutxlm.py +14 -16
- transformers/models/led/configuration_led.py +7 -8
- transformers/models/levit/image_processing_levit_fast.py +4 -4
- transformers/models/lfm2/configuration_lfm2.py +5 -7
- transformers/models/lfm2/modeling_lfm2.py +4 -4
- transformers/models/lfm2/modular_lfm2.py +3 -3
- transformers/models/lfm2_moe/configuration_lfm2_moe.py +5 -7
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -4
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +9 -15
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +42 -28
- transformers/models/lfm2_vl/modular_lfm2_vl.py +42 -27
- transformers/models/lightglue/image_processing_lightglue_fast.py +4 -3
- transformers/models/lightglue/modeling_lightglue.py +3 -3
- transformers/models/lightglue/modular_lightglue.py +3 -3
- transformers/models/lighton_ocr/modeling_lighton_ocr.py +31 -28
- transformers/models/lighton_ocr/modular_lighton_ocr.py +19 -18
- transformers/models/lilt/configuration_lilt.py +6 -1
- transformers/models/llama/configuration_llama.py +5 -7
- transformers/models/llama/modeling_llama.py +4 -4
- transformers/models/llama4/configuration_llama4.py +67 -47
- transformers/models/llama4/image_processing_llama4_fast.py +3 -3
- transformers/models/llama4/modeling_llama4.py +46 -44
- transformers/models/llava/configuration_llava.py +10 -0
- transformers/models/llava/image_processing_llava_fast.py +3 -3
- transformers/models/llava/modeling_llava.py +38 -65
- transformers/models/llava_next/configuration_llava_next.py +2 -1
- transformers/models/llava_next/image_processing_llava_next_fast.py +6 -6
- transformers/models/llava_next/modeling_llava_next.py +61 -60
- transformers/models/llava_next_video/configuration_llava_next_video.py +10 -6
- transformers/models/llava_next_video/modeling_llava_next_video.py +115 -100
- transformers/models/llava_next_video/modular_llava_next_video.py +110 -101
- transformers/models/llava_onevision/configuration_llava_onevision.py +10 -6
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +8 -7
- transformers/models/llava_onevision/modeling_llava_onevision.py +111 -105
- transformers/models/llava_onevision/modular_llava_onevision.py +106 -101
- transformers/models/longcat_flash/configuration_longcat_flash.py +7 -10
- transformers/models/longcat_flash/modeling_longcat_flash.py +7 -7
- transformers/models/longcat_flash/modular_longcat_flash.py +6 -5
- transformers/models/longformer/configuration_longformer.py +4 -1
- transformers/models/longt5/configuration_longt5.py +9 -6
- transformers/models/longt5/modeling_longt5.py +2 -1
- transformers/models/luke/configuration_luke.py +8 -1
- transformers/models/lw_detr/configuration_lw_detr.py +19 -31
- transformers/models/lw_detr/modeling_lw_detr.py +43 -44
- transformers/models/lw_detr/modular_lw_detr.py +36 -38
- transformers/models/lxmert/configuration_lxmert.py +16 -0
- transformers/models/m2m_100/configuration_m2m_100.py +7 -8
- transformers/models/m2m_100/modeling_m2m_100.py +3 -3
- transformers/models/mamba/configuration_mamba.py +5 -2
- transformers/models/mamba/modeling_mamba.py +18 -26
- transformers/models/mamba2/configuration_mamba2.py +5 -7
- transformers/models/mamba2/modeling_mamba2.py +22 -33
- transformers/models/marian/configuration_marian.py +10 -4
- transformers/models/marian/modeling_marian.py +4 -4
- transformers/models/markuplm/configuration_markuplm.py +4 -6
- transformers/models/markuplm/modeling_markuplm.py +3 -3
- transformers/models/mask2former/configuration_mask2former.py +12 -47
- transformers/models/mask2former/image_processing_mask2former_fast.py +8 -8
- transformers/models/mask2former/modeling_mask2former.py +18 -12
- transformers/models/maskformer/configuration_maskformer.py +14 -45
- transformers/models/maskformer/configuration_maskformer_swin.py +2 -4
- transformers/models/maskformer/image_processing_maskformer_fast.py +8 -8
- transformers/models/maskformer/modeling_maskformer.py +15 -9
- transformers/models/maskformer/modeling_maskformer_swin.py +2 -3
- transformers/models/mbart/configuration_mbart.py +9 -4
- transformers/models/mbart/modeling_mbart.py +9 -6
- transformers/models/megatron_bert/configuration_megatron_bert.py +13 -2
- transformers/models/megatron_bert/modeling_megatron_bert.py +0 -15
- transformers/models/metaclip_2/configuration_metaclip_2.py +4 -1
- transformers/models/metaclip_2/modeling_metaclip_2.py +49 -42
- transformers/models/metaclip_2/modular_metaclip_2.py +41 -25
- transformers/models/mgp_str/modeling_mgp_str.py +4 -2
- transformers/models/mimi/configuration_mimi.py +4 -0
- transformers/models/mimi/modeling_mimi.py +40 -36
- transformers/models/minimax/configuration_minimax.py +8 -11
- transformers/models/minimax/modeling_minimax.py +5 -5
- transformers/models/minimax/modular_minimax.py +9 -12
- transformers/models/minimax_m2/configuration_minimax_m2.py +8 -31
- transformers/models/minimax_m2/modeling_minimax_m2.py +4 -4
- transformers/models/minimax_m2/modular_minimax_m2.py +8 -31
- transformers/models/ministral/configuration_ministral.py +5 -7
- transformers/models/ministral/modeling_ministral.py +4 -4
- transformers/models/ministral/modular_ministral.py +5 -8
- transformers/models/ministral3/configuration_ministral3.py +4 -4
- transformers/models/ministral3/modeling_ministral3.py +4 -4
- transformers/models/ministral3/modular_ministral3.py +3 -3
- transformers/models/mistral/configuration_mistral.py +5 -7
- transformers/models/mistral/modeling_mistral.py +4 -4
- transformers/models/mistral/modular_mistral.py +3 -3
- transformers/models/mistral3/configuration_mistral3.py +4 -0
- transformers/models/mistral3/modeling_mistral3.py +36 -40
- transformers/models/mistral3/modular_mistral3.py +31 -32
- transformers/models/mixtral/configuration_mixtral.py +8 -11
- transformers/models/mixtral/modeling_mixtral.py +4 -4
- transformers/models/mlcd/modeling_mlcd.py +7 -5
- transformers/models/mlcd/modular_mlcd.py +7 -5
- transformers/models/mllama/configuration_mllama.py +5 -7
- transformers/models/mllama/image_processing_mllama_fast.py +6 -5
- transformers/models/mllama/modeling_mllama.py +19 -19
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +10 -45
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +66 -84
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +10 -45
- transformers/models/mobilebert/configuration_mobilebert.py +4 -1
- transformers/models/mobilebert/modeling_mobilebert.py +3 -3
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +4 -4
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +4 -2
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +4 -4
- transformers/models/mobilevit/modeling_mobilevit.py +4 -2
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +4 -2
- transformers/models/modernbert/configuration_modernbert.py +46 -21
- transformers/models/modernbert/modeling_modernbert.py +146 -899
- transformers/models/modernbert/modular_modernbert.py +185 -908
- transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +21 -13
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +9 -17
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +24 -23
- transformers/models/moonshine/configuration_moonshine.py +12 -7
- transformers/models/moonshine/modeling_moonshine.py +7 -7
- transformers/models/moonshine/modular_moonshine.py +19 -13
- transformers/models/moshi/configuration_moshi.py +28 -2
- transformers/models/moshi/modeling_moshi.py +4 -9
- transformers/models/mpnet/configuration_mpnet.py +6 -1
- transformers/models/mpt/configuration_mpt.py +16 -0
- transformers/models/mra/configuration_mra.py +8 -1
- transformers/models/mt5/configuration_mt5.py +9 -5
- transformers/models/mt5/modeling_mt5.py +5 -8
- transformers/models/musicgen/configuration_musicgen.py +12 -7
- transformers/models/musicgen/modeling_musicgen.py +6 -5
- transformers/models/musicgen_melody/configuration_musicgen_melody.py +15 -7
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -17
- transformers/models/mvp/configuration_mvp.py +8 -4
- transformers/models/mvp/modeling_mvp.py +6 -4
- transformers/models/nanochat/configuration_nanochat.py +5 -7
- transformers/models/nanochat/modeling_nanochat.py +4 -4
- transformers/models/nanochat/modular_nanochat.py +4 -4
- transformers/models/nemotron/configuration_nemotron.py +5 -7
- transformers/models/nemotron/modeling_nemotron.py +4 -14
- transformers/models/nllb/tokenization_nllb.py +7 -5
- transformers/models/nllb_moe/configuration_nllb_moe.py +7 -9
- transformers/models/nllb_moe/modeling_nllb_moe.py +3 -3
- transformers/models/nougat/image_processing_nougat_fast.py +8 -8
- transformers/models/nystromformer/configuration_nystromformer.py +8 -1
- transformers/models/olmo/configuration_olmo.py +5 -7
- transformers/models/olmo/modeling_olmo.py +4 -4
- transformers/models/olmo/modular_olmo.py +3 -3
- transformers/models/olmo2/configuration_olmo2.py +9 -11
- transformers/models/olmo2/modeling_olmo2.py +4 -4
- transformers/models/olmo2/modular_olmo2.py +7 -7
- transformers/models/olmo3/configuration_olmo3.py +10 -11
- transformers/models/olmo3/modeling_olmo3.py +4 -4
- transformers/models/olmo3/modular_olmo3.py +13 -14
- transformers/models/olmoe/configuration_olmoe.py +5 -7
- transformers/models/olmoe/modeling_olmoe.py +4 -4
- transformers/models/olmoe/modular_olmoe.py +3 -3
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +14 -49
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +22 -18
- transformers/models/oneformer/configuration_oneformer.py +9 -46
- transformers/models/oneformer/image_processing_oneformer_fast.py +8 -8
- transformers/models/oneformer/modeling_oneformer.py +14 -9
- transformers/models/openai/configuration_openai.py +16 -0
- transformers/models/opt/configuration_opt.py +6 -6
- transformers/models/opt/modeling_opt.py +5 -5
- transformers/models/ovis2/configuration_ovis2.py +4 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +3 -3
- transformers/models/ovis2/modeling_ovis2.py +58 -99
- transformers/models/ovis2/modular_ovis2.py +52 -13
- transformers/models/owlv2/configuration_owlv2.py +4 -1
- transformers/models/owlv2/image_processing_owlv2_fast.py +5 -5
- transformers/models/owlv2/modeling_owlv2.py +40 -27
- transformers/models/owlv2/modular_owlv2.py +5 -5
- transformers/models/owlvit/configuration_owlvit.py +4 -1
- transformers/models/owlvit/modeling_owlvit.py +40 -27
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +9 -10
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +88 -87
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +82 -53
- transformers/models/paligemma/configuration_paligemma.py +4 -0
- transformers/models/paligemma/modeling_paligemma.py +30 -26
- transformers/models/parakeet/configuration_parakeet.py +2 -4
- transformers/models/parakeet/modeling_parakeet.py +3 -3
- transformers/models/parakeet/modular_parakeet.py +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +3 -3
- transformers/models/patchtst/modeling_patchtst.py +3 -3
- transformers/models/pe_audio/modeling_pe_audio.py +4 -4
- transformers/models/pe_audio/modular_pe_audio.py +1 -1
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +4 -4
- transformers/models/pe_audio_video/modular_pe_audio_video.py +4 -4
- transformers/models/pe_video/modeling_pe_video.py +36 -24
- transformers/models/pe_video/modular_pe_video.py +36 -23
- transformers/models/pegasus/configuration_pegasus.py +8 -5
- transformers/models/pegasus/modeling_pegasus.py +4 -4
- transformers/models/pegasus_x/configuration_pegasus_x.py +5 -3
- transformers/models/pegasus_x/modeling_pegasus_x.py +3 -3
- transformers/models/perceiver/image_processing_perceiver_fast.py +2 -2
- transformers/models/perceiver/modeling_perceiver.py +17 -9
- transformers/models/perception_lm/modeling_perception_lm.py +26 -27
- transformers/models/perception_lm/modular_perception_lm.py +27 -25
- transformers/models/persimmon/configuration_persimmon.py +5 -7
- transformers/models/persimmon/modeling_persimmon.py +5 -5
- transformers/models/phi/configuration_phi.py +8 -6
- transformers/models/phi/modeling_phi.py +4 -4
- transformers/models/phi/modular_phi.py +3 -3
- transformers/models/phi3/configuration_phi3.py +9 -11
- transformers/models/phi3/modeling_phi3.py +4 -4
- transformers/models/phi3/modular_phi3.py +3 -3
- transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +11 -13
- transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +4 -4
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +46 -61
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +44 -30
- transformers/models/phimoe/configuration_phimoe.py +5 -7
- transformers/models/phimoe/modeling_phimoe.py +15 -39
- transformers/models/phimoe/modular_phimoe.py +12 -7
- transformers/models/pix2struct/configuration_pix2struct.py +12 -9
- transformers/models/pix2struct/image_processing_pix2struct_fast.py +5 -5
- transformers/models/pix2struct/modeling_pix2struct.py +14 -7
- transformers/models/pixio/configuration_pixio.py +2 -4
- transformers/models/pixio/modeling_pixio.py +9 -8
- transformers/models/pixio/modular_pixio.py +4 -2
- transformers/models/pixtral/image_processing_pixtral_fast.py +5 -5
- transformers/models/pixtral/modeling_pixtral.py +9 -12
- transformers/models/plbart/configuration_plbart.py +8 -5
- transformers/models/plbart/modeling_plbart.py +9 -7
- transformers/models/plbart/modular_plbart.py +1 -1
- transformers/models/poolformer/image_processing_poolformer_fast.py +7 -7
- transformers/models/pop2piano/configuration_pop2piano.py +7 -6
- transformers/models/pop2piano/modeling_pop2piano.py +2 -1
- transformers/models/pp_doclayout_v3/__init__.py +30 -0
- transformers/models/pp_doclayout_v3/configuration_pp_doclayout_v3.py +277 -0
- transformers/models/pp_doclayout_v3/image_processing_pp_doclayout_v3_fast.py +305 -0
- transformers/models/pp_doclayout_v3/modeling_pp_doclayout_v3.py +2083 -0
- transformers/models/pp_doclayout_v3/modular_pp_doclayout_v3.py +1549 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +12 -46
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +6 -6
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +8 -6
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +12 -10
- transformers/models/prophetnet/configuration_prophetnet.py +11 -10
- transformers/models/prophetnet/modeling_prophetnet.py +12 -23
- transformers/models/pvt/image_processing_pvt.py +7 -7
- transformers/models/pvt/image_processing_pvt_fast.py +1 -1
- transformers/models/pvt_v2/configuration_pvt_v2.py +2 -4
- transformers/models/pvt_v2/modeling_pvt_v2.py +6 -5
- transformers/models/qwen2/configuration_qwen2.py +14 -4
- transformers/models/qwen2/modeling_qwen2.py +4 -4
- transformers/models/qwen2/modular_qwen2.py +3 -3
- transformers/models/qwen2/tokenization_qwen2.py +0 -4
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +17 -5
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +108 -88
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +115 -87
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +7 -10
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +98 -53
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +18 -6
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +12 -12
- transformers/models/qwen2_moe/configuration_qwen2_moe.py +14 -4
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
- transformers/models/qwen2_moe/modular_qwen2_moe.py +3 -3
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +7 -10
- transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +4 -6
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +97 -53
- transformers/models/qwen2_vl/video_processing_qwen2_vl.py +4 -6
- transformers/models/qwen3/configuration_qwen3.py +15 -5
- transformers/models/qwen3/modeling_qwen3.py +4 -4
- transformers/models/qwen3/modular_qwen3.py +3 -3
- transformers/models/qwen3_moe/configuration_qwen3_moe.py +20 -7
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
- transformers/models/qwen3_next/configuration_qwen3_next.py +16 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +5 -5
- transformers/models/qwen3_next/modular_qwen3_next.py +4 -4
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +55 -19
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +161 -98
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +107 -34
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +7 -6
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +115 -49
- transformers/models/qwen3_vl/modular_qwen3_vl.py +88 -37
- transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +7 -6
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +173 -99
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +23 -7
- transformers/models/rag/configuration_rag.py +6 -6
- transformers/models/rag/modeling_rag.py +3 -3
- transformers/models/rag/retrieval_rag.py +1 -1
- transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +8 -6
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +4 -5
- transformers/models/reformer/configuration_reformer.py +7 -7
- transformers/models/rembert/configuration_rembert.py +8 -1
- transformers/models/rembert/modeling_rembert.py +0 -22
- transformers/models/resnet/configuration_resnet.py +2 -4
- transformers/models/resnet/modeling_resnet.py +6 -5
- transformers/models/roberta/configuration_roberta.py +11 -2
- transformers/models/roberta/modeling_roberta.py +6 -6
- transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +11 -2
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +6 -6
- transformers/models/roc_bert/configuration_roc_bert.py +8 -1
- transformers/models/roc_bert/modeling_roc_bert.py +6 -41
- transformers/models/roformer/configuration_roformer.py +13 -2
- transformers/models/roformer/modeling_roformer.py +0 -14
- transformers/models/rt_detr/configuration_rt_detr.py +8 -49
- transformers/models/rt_detr/configuration_rt_detr_resnet.py +2 -4
- transformers/models/rt_detr/image_processing_rt_detr_fast.py +24 -11
- transformers/models/rt_detr/modeling_rt_detr.py +578 -737
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +2 -3
- transformers/models/rt_detr/modular_rt_detr.py +1508 -6
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +12 -57
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +318 -453
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +25 -66
- transformers/models/rwkv/configuration_rwkv.py +2 -3
- transformers/models/rwkv/modeling_rwkv.py +0 -23
- transformers/models/sam/configuration_sam.py +2 -0
- transformers/models/sam/image_processing_sam_fast.py +4 -4
- transformers/models/sam/modeling_sam.py +13 -8
- transformers/models/sam/processing_sam.py +3 -3
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +56 -52
- transformers/models/sam2/modular_sam2.py +47 -55
- transformers/models/sam2_video/modeling_sam2_video.py +50 -51
- transformers/models/sam2_video/modular_sam2_video.py +12 -10
- transformers/models/sam3/modeling_sam3.py +43 -47
- transformers/models/sam3/processing_sam3.py +8 -4
- transformers/models/sam3_tracker/configuration_sam3_tracker.py +1 -2
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +50 -49
- transformers/models/sam3_tracker/modular_sam3_tracker.py +0 -1
- transformers/models/sam3_tracker/processing_sam3_tracker.py +0 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +50 -49
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +10 -22
- transformers/models/sam3_video/modeling_sam3_video.py +27 -14
- transformers/models/sam_hq/configuration_sam_hq.py +2 -0
- transformers/models/sam_hq/modeling_sam_hq.py +13 -9
- transformers/models/sam_hq/modular_sam_hq.py +6 -6
- transformers/models/sam_hq/processing_sam_hq.py +7 -6
- transformers/models/seamless_m4t/configuration_seamless_m4t.py +8 -9
- transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +8 -9
- transformers/models/seed_oss/configuration_seed_oss.py +7 -9
- transformers/models/seed_oss/modeling_seed_oss.py +4 -4
- transformers/models/seed_oss/modular_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +4 -4
- transformers/models/segformer/modeling_segformer.py +4 -2
- transformers/models/segformer/modular_segformer.py +3 -3
- transformers/models/seggpt/modeling_seggpt.py +20 -8
- transformers/models/sew/configuration_sew.py +4 -1
- transformers/models/sew/modeling_sew.py +9 -5
- transformers/models/sew/modular_sew.py +2 -1
- transformers/models/sew_d/configuration_sew_d.py +4 -1
- transformers/models/sew_d/modeling_sew_d.py +4 -1
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +4 -4
- transformers/models/siglip/configuration_siglip.py +4 -1
- transformers/models/siglip/modeling_siglip.py +27 -71
- transformers/models/siglip2/__init__.py +1 -0
- transformers/models/siglip2/configuration_siglip2.py +4 -2
- transformers/models/siglip2/image_processing_siglip2_fast.py +2 -2
- transformers/models/siglip2/modeling_siglip2.py +37 -78
- transformers/models/siglip2/modular_siglip2.py +74 -25
- transformers/models/siglip2/tokenization_siglip2.py +95 -0
- transformers/models/smollm3/configuration_smollm3.py +6 -6
- transformers/models/smollm3/modeling_smollm3.py +4 -4
- transformers/models/smollm3/modular_smollm3.py +9 -9
- transformers/models/smolvlm/configuration_smolvlm.py +1 -3
- transformers/models/smolvlm/image_processing_smolvlm_fast.py +29 -3
- transformers/models/smolvlm/modeling_smolvlm.py +75 -46
- transformers/models/smolvlm/modular_smolvlm.py +36 -23
- transformers/models/smolvlm/video_processing_smolvlm.py +9 -9
- transformers/models/solar_open/__init__.py +27 -0
- transformers/models/solar_open/configuration_solar_open.py +184 -0
- transformers/models/solar_open/modeling_solar_open.py +642 -0
- transformers/models/solar_open/modular_solar_open.py +224 -0
- transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +6 -4
- transformers/models/speech_to_text/configuration_speech_to_text.py +9 -8
- transformers/models/speech_to_text/modeling_speech_to_text.py +3 -3
- transformers/models/speecht5/configuration_speecht5.py +7 -8
- transformers/models/splinter/configuration_splinter.py +6 -6
- transformers/models/splinter/modeling_splinter.py +8 -3
- transformers/models/squeezebert/configuration_squeezebert.py +14 -1
- transformers/models/stablelm/configuration_stablelm.py +8 -6
- transformers/models/stablelm/modeling_stablelm.py +5 -5
- transformers/models/starcoder2/configuration_starcoder2.py +11 -5
- transformers/models/starcoder2/modeling_starcoder2.py +5 -5
- transformers/models/starcoder2/modular_starcoder2.py +4 -4
- transformers/models/superglue/configuration_superglue.py +4 -0
- transformers/models/superglue/image_processing_superglue_fast.py +4 -3
- transformers/models/superglue/modeling_superglue.py +9 -4
- transformers/models/superpoint/image_processing_superpoint_fast.py +3 -4
- transformers/models/superpoint/modeling_superpoint.py +4 -2
- transformers/models/swin/configuration_swin.py +2 -4
- transformers/models/swin/modeling_swin.py +11 -8
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +2 -2
- transformers/models/swin2sr/modeling_swin2sr.py +4 -2
- transformers/models/swinv2/configuration_swinv2.py +2 -4
- transformers/models/swinv2/modeling_swinv2.py +10 -7
- transformers/models/switch_transformers/configuration_switch_transformers.py +11 -6
- transformers/models/switch_transformers/modeling_switch_transformers.py +3 -3
- transformers/models/switch_transformers/modular_switch_transformers.py +3 -3
- transformers/models/t5/configuration_t5.py +9 -8
- transformers/models/t5/modeling_t5.py +5 -8
- transformers/models/t5gemma/configuration_t5gemma.py +10 -25
- transformers/models/t5gemma/modeling_t5gemma.py +9 -9
- transformers/models/t5gemma/modular_t5gemma.py +11 -24
- transformers/models/t5gemma2/configuration_t5gemma2.py +35 -48
- transformers/models/t5gemma2/modeling_t5gemma2.py +143 -100
- transformers/models/t5gemma2/modular_t5gemma2.py +152 -136
- transformers/models/table_transformer/configuration_table_transformer.py +18 -49
- transformers/models/table_transformer/modeling_table_transformer.py +27 -53
- transformers/models/tapas/configuration_tapas.py +12 -1
- transformers/models/tapas/modeling_tapas.py +1 -1
- transformers/models/tapas/tokenization_tapas.py +1 -0
- transformers/models/textnet/configuration_textnet.py +4 -6
- transformers/models/textnet/image_processing_textnet_fast.py +3 -3
- transformers/models/textnet/modeling_textnet.py +15 -14
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +3 -3
- transformers/models/timesfm/modeling_timesfm.py +5 -6
- transformers/models/timesfm/modular_timesfm.py +5 -6
- transformers/models/timm_backbone/configuration_timm_backbone.py +33 -7
- transformers/models/timm_backbone/modeling_timm_backbone.py +21 -24
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +9 -4
- transformers/models/trocr/configuration_trocr.py +11 -7
- transformers/models/trocr/modeling_trocr.py +4 -2
- transformers/models/tvp/configuration_tvp.py +10 -35
- transformers/models/tvp/image_processing_tvp_fast.py +6 -5
- transformers/models/tvp/modeling_tvp.py +1 -1
- transformers/models/udop/configuration_udop.py +16 -7
- transformers/models/udop/modeling_udop.py +10 -6
- transformers/models/umt5/configuration_umt5.py +8 -6
- transformers/models/umt5/modeling_umt5.py +7 -3
- transformers/models/unispeech/configuration_unispeech.py +4 -1
- transformers/models/unispeech/modeling_unispeech.py +7 -4
- transformers/models/unispeech_sat/configuration_unispeech_sat.py +4 -1
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +7 -4
- transformers/models/upernet/configuration_upernet.py +8 -35
- transformers/models/upernet/modeling_upernet.py +1 -1
- transformers/models/vaultgemma/configuration_vaultgemma.py +5 -7
- transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
- transformers/models/video_llama_3/configuration_video_llama_3.py +4 -0
- transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +4 -6
- transformers/models/video_llama_3/modeling_video_llama_3.py +85 -48
- transformers/models/video_llama_3/modular_video_llama_3.py +56 -43
- transformers/models/video_llama_3/video_processing_video_llama_3.py +29 -8
- transformers/models/video_llava/configuration_video_llava.py +4 -0
- transformers/models/video_llava/modeling_video_llava.py +87 -89
- transformers/models/videomae/modeling_videomae.py +4 -5
- transformers/models/vilt/configuration_vilt.py +4 -1
- transformers/models/vilt/image_processing_vilt_fast.py +6 -6
- transformers/models/vilt/modeling_vilt.py +27 -12
- transformers/models/vipllava/configuration_vipllava.py +4 -0
- transformers/models/vipllava/modeling_vipllava.py +57 -31
- transformers/models/vipllava/modular_vipllava.py +50 -24
- transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +10 -6
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +27 -20
- transformers/models/visual_bert/configuration_visual_bert.py +6 -1
- transformers/models/vit/configuration_vit.py +2 -2
- transformers/models/vit/modeling_vit.py +7 -5
- transformers/models/vit_mae/modeling_vit_mae.py +11 -7
- transformers/models/vit_msn/modeling_vit_msn.py +11 -7
- transformers/models/vitdet/configuration_vitdet.py +2 -4
- transformers/models/vitdet/modeling_vitdet.py +2 -3
- transformers/models/vitmatte/configuration_vitmatte.py +6 -35
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +2 -2
- transformers/models/vitmatte/modeling_vitmatte.py +1 -1
- transformers/models/vitpose/configuration_vitpose.py +6 -43
- transformers/models/vitpose/modeling_vitpose.py +5 -3
- transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +2 -4
- transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +5 -6
- transformers/models/vits/configuration_vits.py +4 -0
- transformers/models/vits/modeling_vits.py +9 -7
- transformers/models/vivit/modeling_vivit.py +4 -4
- transformers/models/vjepa2/modeling_vjepa2.py +9 -9
- transformers/models/voxtral/configuration_voxtral.py +0 -1
- transformers/models/voxtral/modeling_voxtral.py +25 -24
- transformers/models/voxtral/modular_voxtral.py +26 -20
- transformers/models/wav2vec2/configuration_wav2vec2.py +4 -1
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -4
- transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +4 -1
- transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +4 -1
- transformers/models/wavlm/configuration_wavlm.py +4 -1
- transformers/models/wavlm/modeling_wavlm.py +4 -1
- transformers/models/whisper/configuration_whisper.py +6 -4
- transformers/models/whisper/generation_whisper.py +0 -1
- transformers/models/whisper/modeling_whisper.py +3 -3
- transformers/models/x_clip/configuration_x_clip.py +4 -1
- transformers/models/x_clip/modeling_x_clip.py +26 -27
- transformers/models/xglm/configuration_xglm.py +9 -7
- transformers/models/xlm/configuration_xlm.py +10 -7
- transformers/models/xlm/modeling_xlm.py +1 -1
- transformers/models/xlm_roberta/configuration_xlm_roberta.py +11 -2
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +6 -6
- transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +10 -1
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +6 -6
- transformers/models/xlnet/configuration_xlnet.py +3 -1
- transformers/models/xlstm/configuration_xlstm.py +5 -7
- transformers/models/xlstm/modeling_xlstm.py +0 -32
- transformers/models/xmod/configuration_xmod.py +11 -2
- transformers/models/xmod/modeling_xmod.py +13 -16
- transformers/models/yolos/image_processing_yolos_fast.py +25 -28
- transformers/models/yolos/modeling_yolos.py +7 -7
- transformers/models/yolos/modular_yolos.py +16 -16
- transformers/models/yoso/configuration_yoso.py +8 -1
- transformers/models/youtu/__init__.py +27 -0
- transformers/models/youtu/configuration_youtu.py +194 -0
- transformers/models/youtu/modeling_youtu.py +619 -0
- transformers/models/youtu/modular_youtu.py +254 -0
- transformers/models/zamba/configuration_zamba.py +5 -7
- transformers/models/zamba/modeling_zamba.py +25 -56
- transformers/models/zamba2/configuration_zamba2.py +8 -13
- transformers/models/zamba2/modeling_zamba2.py +53 -78
- transformers/models/zamba2/modular_zamba2.py +36 -29
- transformers/models/zoedepth/configuration_zoedepth.py +17 -40
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +9 -9
- transformers/models/zoedepth/modeling_zoedepth.py +5 -3
- transformers/pipelines/__init__.py +1 -61
- transformers/pipelines/any_to_any.py +1 -1
- transformers/pipelines/automatic_speech_recognition.py +0 -2
- transformers/pipelines/base.py +1 -1
- transformers/pipelines/image_text_to_text.py +1 -1
- transformers/pipelines/text_to_audio.py +5 -1
- transformers/processing_utils.py +35 -44
- transformers/pytorch_utils.py +2 -26
- transformers/quantizers/quantizer_compressed_tensors.py +7 -5
- transformers/quantizers/quantizer_fbgemm_fp8.py +20 -23
- transformers/quantizers/quantizer_finegrained_fp8.py +14 -20
- transformers/quantizers/quantizer_mxfp4.py +1 -1
- transformers/quantizers/quantizer_torchao.py +0 -16
- transformers/safetensors_conversion.py +11 -4
- transformers/testing_utils.py +3 -28
- transformers/tokenization_mistral_common.py +9 -0
- transformers/tokenization_python.py +6 -4
- transformers/tokenization_utils_base.py +119 -219
- transformers/tokenization_utils_tokenizers.py +31 -2
- transformers/trainer.py +25 -33
- transformers/trainer_seq2seq.py +1 -1
- transformers/training_args.py +411 -417
- transformers/utils/__init__.py +1 -4
- transformers/utils/auto_docstring.py +15 -18
- transformers/utils/backbone_utils.py +13 -373
- transformers/utils/doc.py +4 -36
- transformers/utils/generic.py +69 -33
- transformers/utils/import_utils.py +72 -75
- transformers/utils/loading_report.py +133 -105
- transformers/utils/quantization_config.py +0 -21
- transformers/video_processing_utils.py +5 -5
- transformers/video_utils.py +3 -1
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/METADATA +118 -237
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/RECORD +1019 -994
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/WHEEL +1 -1
- transformers/pipelines/deprecated/text2text_generation.py +0 -408
- transformers/pipelines/image_to_text.py +0 -189
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/licenses/LICENSE +0 -0
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/top_level.txt +0 -0
transformers/utils/__init__.py
CHANGED
|
@@ -31,7 +31,6 @@ from .auto_docstring import (
|
|
|
31
31
|
parse_docstring,
|
|
32
32
|
set_min_indent,
|
|
33
33
|
)
|
|
34
|
-
from .backbone_utils import BackboneConfigMixin, BackboneMixin
|
|
35
34
|
from .chat_template_utils import DocstringParsingException, TypeHintParsingException, get_json_schema
|
|
36
35
|
from .constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD
|
|
37
36
|
from .doc import (
|
|
@@ -118,7 +117,6 @@ from .import_utils import (
|
|
|
118
117
|
is_av_available,
|
|
119
118
|
is_bitsandbytes_available,
|
|
120
119
|
is_bs4_available,
|
|
121
|
-
is_ccl_available,
|
|
122
120
|
is_coloredlogs_available,
|
|
123
121
|
is_compressed_tensors_available,
|
|
124
122
|
is_cuda_platform,
|
|
@@ -139,7 +137,6 @@ from .import_utils import (
|
|
|
139
137
|
is_flute_available,
|
|
140
138
|
is_fp_quant_available,
|
|
141
139
|
is_fsdp_available,
|
|
142
|
-
is_ftfy_available,
|
|
143
140
|
is_g2p_en_available,
|
|
144
141
|
is_galore_torch_available,
|
|
145
142
|
is_gguf_available,
|
|
@@ -151,7 +148,6 @@ from .import_utils import (
|
|
|
151
148
|
is_hqq_available,
|
|
152
149
|
is_huggingface_hub_greater_or_equal,
|
|
153
150
|
is_in_notebook,
|
|
154
|
-
is_ipex_available,
|
|
155
151
|
is_jinja_available,
|
|
156
152
|
is_jmespath_available,
|
|
157
153
|
is_jumanpp_available,
|
|
@@ -248,6 +244,7 @@ from .import_utils import (
|
|
|
248
244
|
is_xlstm_available,
|
|
249
245
|
is_yt_dlp_available,
|
|
250
246
|
requires_backends,
|
|
247
|
+
torch_compilable_check,
|
|
251
248
|
torch_only_method,
|
|
252
249
|
)
|
|
253
250
|
from .kernel_config import KernelConfig
|
|
@@ -11,10 +11,12 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
from __future__ import annotations
|
|
14
15
|
|
|
15
16
|
import inspect
|
|
16
17
|
import os
|
|
17
18
|
import textwrap
|
|
19
|
+
from collections.abc import Mapping
|
|
18
20
|
from pathlib import Path
|
|
19
21
|
from types import UnionType
|
|
20
22
|
from typing import Union, get_args, get_origin
|
|
@@ -1456,7 +1458,7 @@ def generate_processor_intro(cls) -> str:
|
|
|
1456
1458
|
return intro
|
|
1457
1459
|
|
|
1458
1460
|
|
|
1459
|
-
def get_placeholders_dict(placeholders:
|
|
1461
|
+
def get_placeholders_dict(placeholders: set[str], model_name: str) -> Mapping[str, str | None]:
|
|
1460
1462
|
"""
|
|
1461
1463
|
Get the dictionary of placeholders for the given model name.
|
|
1462
1464
|
"""
|
|
@@ -1487,7 +1489,7 @@ def get_placeholders_dict(placeholders: list, model_name: str) -> dict:
|
|
|
1487
1489
|
return placeholders_dict
|
|
1488
1490
|
|
|
1489
1491
|
|
|
1490
|
-
def format_args_docstring(docstring, model_name):
|
|
1492
|
+
def format_args_docstring(docstring: str, model_name: str) -> str:
|
|
1491
1493
|
"""
|
|
1492
1494
|
Replaces placeholders such as {image_processor_class} in the docstring with the actual values,
|
|
1493
1495
|
deducted from the model name and the auto modules.
|
|
@@ -1502,10 +1504,7 @@ def format_args_docstring(docstring, model_name):
|
|
|
1502
1504
|
# replace the placeholders in the docstring with the values from the placeholders_dict
|
|
1503
1505
|
for placeholder, value in placeholders_dict.items():
|
|
1504
1506
|
if placeholder is not None:
|
|
1505
|
-
|
|
1506
|
-
docstring = docstring.replace(f"{{{placeholder}}}", value)
|
|
1507
|
-
except Exception:
|
|
1508
|
-
pass
|
|
1507
|
+
docstring = docstring.replace(f"{{{placeholder}}}", value)
|
|
1509
1508
|
return docstring
|
|
1510
1509
|
|
|
1511
1510
|
|
|
@@ -1825,18 +1824,16 @@ def _is_processor_class(func, parent_class):
|
|
|
1825
1824
|
# Single-modality processors are in "image_processing_*.py", "video_processing_*.py", etc.
|
|
1826
1825
|
try:
|
|
1827
1826
|
source_file = inspect.getsourcefile(func)
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
# Default to False (conservative approach)
|
|
1839
|
-
return False
|
|
1827
|
+
except TypeError:
|
|
1828
|
+
return False
|
|
1829
|
+
if not source_file:
|
|
1830
|
+
return False
|
|
1831
|
+
|
|
1832
|
+
filename = os.path.basename(source_file)
|
|
1833
|
+
|
|
1834
|
+
# Multimodal processors are implemented in processing_*.py modules
|
|
1835
|
+
# (single-modality processors use image_processing_*, video_processing_*, etc.)self.
|
|
1836
|
+
return filename.startswith("processing_") and filename.endswith(".py")
|
|
1840
1837
|
|
|
1841
1838
|
|
|
1842
1839
|
def _process_kwargs_parameters(sig, func, parent_class, documented_kwargs, indent_level, undocumented_parameters):
|
|
@@ -1,379 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
1
|
+
import warnings
|
|
14
2
|
|
|
15
|
-
|
|
3
|
+
from ..backbone_utils import BackboneConfigMixin, BackboneMixin
|
|
16
4
|
|
|
17
|
-
import enum
|
|
18
|
-
import inspect
|
|
19
|
-
from collections.abc import Iterable
|
|
20
|
-
from typing import TYPE_CHECKING, Union
|
|
21
5
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class BackboneType(enum.Enum):
|
|
28
|
-
TIMM = "timm"
|
|
29
|
-
TRANSFORMERS = "transformers"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def verify_out_features_out_indices(
|
|
33
|
-
out_features: Iterable[str] | None, out_indices: Iterable[int] | None, stage_names: Iterable[str] | None
|
|
34
|
-
):
|
|
35
|
-
"""
|
|
36
|
-
Verify that out_indices and out_features are valid for the given stage_names.
|
|
37
|
-
"""
|
|
38
|
-
if stage_names is None:
|
|
39
|
-
raise ValueError("Stage_names must be set for transformers backbones")
|
|
40
|
-
|
|
41
|
-
if out_features is not None:
|
|
42
|
-
if not isinstance(out_features, (list,)):
|
|
43
|
-
raise ValueError(f"out_features must be a list got {type(out_features)}")
|
|
44
|
-
if any(feat not in stage_names for feat in out_features):
|
|
45
|
-
raise ValueError(f"out_features must be a subset of stage_names: {stage_names} got {out_features}")
|
|
46
|
-
if len(out_features) != len(set(out_features)):
|
|
47
|
-
raise ValueError(f"out_features must not contain any duplicates, got {out_features}")
|
|
48
|
-
if out_features != (sorted_feats := [feat for feat in stage_names if feat in out_features]):
|
|
49
|
-
raise ValueError(
|
|
50
|
-
f"out_features must be in the same order as stage_names, expected {sorted_feats} got {out_features}"
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
if out_indices is not None:
|
|
54
|
-
if not isinstance(out_indices, list):
|
|
55
|
-
raise ValueError(f"out_indices must be a list, got {type(out_indices)}")
|
|
56
|
-
# Convert negative indices to their positive equivalent: [-1,] -> [len(stage_names) - 1,]
|
|
57
|
-
positive_indices = tuple(idx % len(stage_names) if idx < 0 else idx for idx in out_indices)
|
|
58
|
-
if any(idx for idx in positive_indices if idx not in range(len(stage_names))):
|
|
59
|
-
raise ValueError(f"out_indices must be valid indices for stage_names {stage_names}, got {out_indices}")
|
|
60
|
-
if len(positive_indices) != len(set(positive_indices)):
|
|
61
|
-
msg = f"out_indices must not contain any duplicates, got {out_indices}"
|
|
62
|
-
msg += f"(equivalent to {positive_indices}))" if positive_indices != out_indices else ""
|
|
63
|
-
raise ValueError(msg)
|
|
64
|
-
if positive_indices != tuple(sorted(positive_indices)):
|
|
65
|
-
sorted_negative = [idx for _, idx in sorted(zip(positive_indices, out_indices), key=lambda x: x[0])]
|
|
66
|
-
raise ValueError(
|
|
67
|
-
f"out_indices must be in the same order as stage_names, expected {sorted_negative} got {out_indices}"
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
if out_features is not None and out_indices is not None:
|
|
71
|
-
if len(out_features) != len(out_indices):
|
|
72
|
-
raise ValueError("out_features and out_indices should have the same length if both are set")
|
|
73
|
-
if out_features != [stage_names[idx] for idx in out_indices]:
|
|
74
|
-
raise ValueError("out_features and out_indices should correspond to the same stages if both are set")
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def _align_output_features_output_indices(
|
|
78
|
-
out_features: list[str] | None,
|
|
79
|
-
out_indices: list[int] | tuple[int, ...] | None,
|
|
80
|
-
stage_names: list[str],
|
|
81
|
-
):
|
|
82
|
-
"""
|
|
83
|
-
Finds the corresponding `out_features` and `out_indices` for the given `stage_names`.
|
|
84
|
-
|
|
85
|
-
The logic is as follows:
|
|
86
|
-
- `out_features` not set, `out_indices` set: `out_features` is set to the `out_features` corresponding to the
|
|
87
|
-
`out_indices`.
|
|
88
|
-
- `out_indices` not set, `out_features` set: `out_indices` is set to the `out_indices` corresponding to the
|
|
89
|
-
`out_features`.
|
|
90
|
-
- `out_indices` and `out_features` not set: `out_indices` and `out_features` are set to the last stage.
|
|
91
|
-
- `out_indices` and `out_features` set: input `out_indices` and `out_features` are returned.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
out_features (`list[str]`): The names of the features for the backbone to output.
|
|
95
|
-
out_indices (`list[int]` or `tuple[int]`): The indices of the features for the backbone to output.
|
|
96
|
-
stage_names (`list[str]`): The names of the stages of the backbone.
|
|
97
|
-
"""
|
|
98
|
-
if out_indices is None and out_features is None:
|
|
99
|
-
out_indices = [len(stage_names) - 1]
|
|
100
|
-
out_features = [stage_names[-1]]
|
|
101
|
-
elif out_indices is None and out_features is not None:
|
|
102
|
-
out_indices = [stage_names.index(layer) for layer in out_features]
|
|
103
|
-
elif out_features is None and out_indices is not None:
|
|
104
|
-
out_features = [stage_names[idx] for idx in out_indices]
|
|
105
|
-
return out_features, out_indices
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def get_aligned_output_features_output_indices(
|
|
109
|
-
out_features: list[str] | None,
|
|
110
|
-
out_indices: list[int] | tuple[int] | None,
|
|
111
|
-
stage_names: list[str],
|
|
112
|
-
) -> tuple[list[str], list[int]]:
|
|
113
|
-
"""
|
|
114
|
-
Get the `out_features` and `out_indices` so that they are aligned.
|
|
115
|
-
|
|
116
|
-
The logic is as follows:
|
|
117
|
-
- `out_features` not set, `out_indices` set: `out_features` is set to the `out_features` corresponding to the
|
|
118
|
-
`out_indices`.
|
|
119
|
-
- `out_indices` not set, `out_features` set: `out_indices` is set to the `out_indices` corresponding to the
|
|
120
|
-
`out_features`.
|
|
121
|
-
- `out_indices` and `out_features` not set: `out_indices` and `out_features` are set to the last stage.
|
|
122
|
-
- `out_indices` and `out_features` set: they are verified to be aligned.
|
|
123
|
-
|
|
124
|
-
Args:
|
|
125
|
-
out_features (`list[str]`): The names of the features for the backbone to output.
|
|
126
|
-
out_indices (`list[int]` or `tuple[int]`): The indices of the features for the backbone to output.
|
|
127
|
-
stage_names (`list[str]`): The names of the stages of the backbone.
|
|
128
|
-
"""
|
|
129
|
-
out_indices = list(out_indices) if out_indices is not None else None
|
|
130
|
-
# First verify that the out_features and out_indices are valid
|
|
131
|
-
verify_out_features_out_indices(out_features=out_features, out_indices=out_indices, stage_names=stage_names)
|
|
132
|
-
output_features, output_indices = _align_output_features_output_indices(
|
|
133
|
-
out_features=out_features, out_indices=out_indices, stage_names=stage_names
|
|
6
|
+
class BackboneConfigMixin(BackboneConfigMixin):
|
|
7
|
+
warnings.warn(
|
|
8
|
+
"Importing `BackboneConfigMixin` from `utils/backbone_utils.py` is deprecated and will be removed in "
|
|
9
|
+
"Transformers v5.10. Import as `from transformers.backbone_utils import BackboneConfigMixin` instead.",
|
|
10
|
+
FutureWarning,
|
|
134
11
|
)
|
|
135
|
-
# Verify that the aligned out_features and out_indices are valid
|
|
136
|
-
verify_out_features_out_indices(out_features=output_features, out_indices=output_indices, stage_names=stage_names)
|
|
137
|
-
return output_features, output_indices
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
class BackboneMixin:
|
|
141
|
-
backbone_type: BackboneType | None = None
|
|
142
|
-
|
|
143
|
-
# Attribute to indicate if the backbone has attention and can return attention outputs.
|
|
144
|
-
# Should be set to `False` for conv-based models to be able to run `forward_with_filtered_kwargs`
|
|
145
|
-
has_attentions: bool = True
|
|
146
|
-
|
|
147
|
-
def _init_timm_backbone(self, config) -> None:
|
|
148
|
-
"""
|
|
149
|
-
Initialize the backbone model from timm The backbone must already be loaded to self._backbone
|
|
150
|
-
"""
|
|
151
|
-
if getattr(self, "_backbone", None) is None:
|
|
152
|
-
raise ValueError("self._backbone must be set before calling _init_timm_backbone")
|
|
153
|
-
|
|
154
|
-
# These will diagree with the defaults for the transformers models e.g. for resnet50
|
|
155
|
-
# the transformer model has out_features = ['stem', 'stage1', 'stage2', 'stage3', 'stage4']
|
|
156
|
-
# the timm model has out_features = ['act', 'layer1', 'layer2', 'layer3', 'layer4']
|
|
157
|
-
self.stage_names = [stage["module"] for stage in self._backbone.feature_info.info]
|
|
158
|
-
self.num_features = [stage["num_chs"] for stage in self._backbone.feature_info.info]
|
|
159
|
-
|
|
160
|
-
# In some timm versions, out_indices reflects the input type of out_indices on the `create_model` call,
|
|
161
|
-
# in later versions >= 1, it is always a tuple
|
|
162
|
-
out_indices = list(self._backbone.feature_info.out_indices)
|
|
163
|
-
out_features = self._backbone.feature_info.module_name()
|
|
164
|
-
|
|
165
|
-
# We verify the out indices and out features are valid
|
|
166
|
-
verify_out_features_out_indices(
|
|
167
|
-
out_features=out_features, out_indices=out_indices, stage_names=self.stage_names
|
|
168
|
-
)
|
|
169
|
-
self._out_features, self._out_indices = out_features, out_indices
|
|
170
|
-
|
|
171
|
-
def _init_transformers_backbone(self, config) -> None:
|
|
172
|
-
stage_names = getattr(config, "stage_names")
|
|
173
|
-
out_features = getattr(config, "out_features", None)
|
|
174
|
-
out_indices = getattr(config, "out_indices", None)
|
|
175
|
-
|
|
176
|
-
self.stage_names = stage_names
|
|
177
|
-
self._out_features, self._out_indices = get_aligned_output_features_output_indices(
|
|
178
|
-
out_features=out_features, out_indices=out_indices, stage_names=stage_names
|
|
179
|
-
)
|
|
180
|
-
# Number of channels for each stage. This is set in the transformer backbone model init
|
|
181
|
-
self.num_features = None
|
|
182
|
-
|
|
183
|
-
def _init_backbone(self, config) -> None:
|
|
184
|
-
"""
|
|
185
|
-
Method to initialize the backbone. This method is called by the constructor of the base class after the
|
|
186
|
-
pretrained model weights have been loaded.
|
|
187
|
-
"""
|
|
188
|
-
self.config = config
|
|
189
|
-
|
|
190
|
-
self.use_timm_backbone = getattr(config, "use_timm_backbone", False)
|
|
191
|
-
self.backbone_type = BackboneType.TIMM if self.use_timm_backbone else BackboneType.TRANSFORMERS
|
|
192
|
-
|
|
193
|
-
if self.backbone_type == BackboneType.TIMM:
|
|
194
|
-
self._init_timm_backbone(config)
|
|
195
|
-
elif self.backbone_type == BackboneType.TRANSFORMERS:
|
|
196
|
-
self._init_transformers_backbone(config)
|
|
197
|
-
else:
|
|
198
|
-
raise ValueError(f"backbone_type {self.backbone_type} not supported.")
|
|
199
12
|
|
|
200
|
-
@property
|
|
201
|
-
def out_features(self):
|
|
202
|
-
return self._out_features
|
|
203
13
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
""
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
out_features=out_features, out_indices=None, stage_names=self.stage_names
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
@property
|
|
214
|
-
def out_indices(self):
|
|
215
|
-
return self._out_indices
|
|
216
|
-
|
|
217
|
-
@out_indices.setter
|
|
218
|
-
def out_indices(self, out_indices: tuple[int] | list[int]):
|
|
219
|
-
"""
|
|
220
|
-
Set the out_indices attribute. This will also update the out_features attribute to match the new out_indices.
|
|
221
|
-
"""
|
|
222
|
-
self._out_features, self._out_indices = get_aligned_output_features_output_indices(
|
|
223
|
-
out_features=None, out_indices=out_indices, stage_names=self.stage_names
|
|
224
|
-
)
|
|
225
|
-
|
|
226
|
-
@property
|
|
227
|
-
def out_feature_channels(self):
|
|
228
|
-
# the current backbones will output the number of channels for each stage
|
|
229
|
-
# even if that stage is not in the out_features list.
|
|
230
|
-
return {stage: self.num_features[i] for i, stage in enumerate(self.stage_names)}
|
|
231
|
-
|
|
232
|
-
@property
|
|
233
|
-
def channels(self):
|
|
234
|
-
return [self.out_feature_channels[name] for name in self.out_features]
|
|
235
|
-
|
|
236
|
-
def forward_with_filtered_kwargs(self, *args, **kwargs):
|
|
237
|
-
if not self.has_attentions:
|
|
238
|
-
kwargs.pop("output_attentions", None)
|
|
239
|
-
if self.backbone_type == BackboneType.TIMM:
|
|
240
|
-
signature = dict(inspect.signature(self.forward).parameters)
|
|
241
|
-
kwargs = {k: v for k, v in kwargs.items() if k in signature}
|
|
242
|
-
return self(*args, **kwargs)
|
|
243
|
-
|
|
244
|
-
def forward(
|
|
245
|
-
self,
|
|
246
|
-
pixel_values,
|
|
247
|
-
output_hidden_states: bool | None = None,
|
|
248
|
-
output_attentions: bool | None = None,
|
|
249
|
-
return_dict: bool | None = None,
|
|
250
|
-
):
|
|
251
|
-
raise NotImplementedError("This method should be implemented by the derived class.")
|
|
252
|
-
|
|
253
|
-
def to_dict(self):
|
|
254
|
-
"""
|
|
255
|
-
Serializes this instance to a Python dictionary. Override the default `to_dict()` from `PreTrainedConfig` to
|
|
256
|
-
include the `out_features` and `out_indices` attributes.
|
|
257
|
-
"""
|
|
258
|
-
output = super().to_dict()
|
|
259
|
-
output["out_features"] = output.pop("_out_features")
|
|
260
|
-
output["out_indices"] = output.pop("_out_indices")
|
|
261
|
-
return output
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
class BackboneConfigMixin:
|
|
265
|
-
"""
|
|
266
|
-
A Mixin to support handling the `out_features` and `out_indices` attributes for the backbone configurations.
|
|
267
|
-
"""
|
|
268
|
-
|
|
269
|
-
@property
|
|
270
|
-
def out_features(self):
|
|
271
|
-
return self._out_features
|
|
272
|
-
|
|
273
|
-
@out_features.setter
|
|
274
|
-
def out_features(self, out_features: list[str]):
|
|
275
|
-
"""
|
|
276
|
-
Set the out_features attribute. This will also update the out_indices attribute to match the new out_features.
|
|
277
|
-
"""
|
|
278
|
-
self._out_features, self._out_indices = get_aligned_output_features_output_indices(
|
|
279
|
-
out_features=out_features, out_indices=None, stage_names=self.stage_names
|
|
280
|
-
)
|
|
281
|
-
|
|
282
|
-
@property
|
|
283
|
-
def out_indices(self):
|
|
284
|
-
return self._out_indices
|
|
285
|
-
|
|
286
|
-
@out_indices.setter
|
|
287
|
-
def out_indices(self, out_indices: tuple[int, ...] | list[int]):
|
|
288
|
-
"""
|
|
289
|
-
Set the out_indices attribute. This will also update the out_features attribute to match the new out_indices.
|
|
290
|
-
"""
|
|
291
|
-
self._out_features, self._out_indices = get_aligned_output_features_output_indices(
|
|
292
|
-
out_features=None, out_indices=out_indices, stage_names=self.stage_names
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
def to_dict(self):
|
|
296
|
-
"""
|
|
297
|
-
Serializes this instance to a Python dictionary. Override the default `to_dict()` from `PreTrainedConfig` to
|
|
298
|
-
include the `out_features` and `out_indices` attributes.
|
|
299
|
-
"""
|
|
300
|
-
output = super().to_dict()
|
|
301
|
-
output["out_features"] = output.pop("_out_features")
|
|
302
|
-
output["out_indices"] = output.pop("_out_indices")
|
|
303
|
-
return output
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
def load_backbone(config):
|
|
307
|
-
"""
|
|
308
|
-
Loads the backbone model from a config object.
|
|
309
|
-
|
|
310
|
-
If the config is from the backbone model itself, then we return a backbone model with randomly initialized
|
|
311
|
-
weights.
|
|
312
|
-
|
|
313
|
-
If the config is from the parent model of the backbone model itself, then we load the pretrained backbone weights
|
|
314
|
-
if specified.
|
|
315
|
-
"""
|
|
316
|
-
from transformers import AutoBackbone, AutoConfig
|
|
317
|
-
|
|
318
|
-
backbone_config = getattr(config, "backbone_config", None)
|
|
319
|
-
use_timm_backbone = getattr(config, "use_timm_backbone", None)
|
|
320
|
-
use_pretrained_backbone = getattr(config, "use_pretrained_backbone", None)
|
|
321
|
-
backbone_checkpoint = getattr(config, "backbone", None)
|
|
322
|
-
backbone_kwargs = getattr(config, "backbone_kwargs", None)
|
|
323
|
-
backbone_kwargs = {} if backbone_kwargs is None else backbone_kwargs
|
|
324
|
-
|
|
325
|
-
if backbone_kwargs and backbone_config is not None:
|
|
326
|
-
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")
|
|
327
|
-
|
|
328
|
-
# If there is a backbone_config and a backbone checkpoint, and use_pretrained_backbone=False then the desired
|
|
329
|
-
# behaviour is ill-defined: do you want to load from the checkpoint's config or the backbone_config?
|
|
330
|
-
if backbone_config is not None and backbone_checkpoint is not None and use_pretrained_backbone is not None:
|
|
331
|
-
raise ValueError("Cannot specify both config.backbone_config and config.backbone")
|
|
332
|
-
|
|
333
|
-
# If any of the following are set, then the config passed in is from a model which contains a backbone.
|
|
334
|
-
if backbone_config is None and use_timm_backbone is None and backbone_checkpoint is None:
|
|
335
|
-
return AutoBackbone.from_config(config=config, **backbone_kwargs)
|
|
336
|
-
|
|
337
|
-
# config from the parent model that has a backbone
|
|
338
|
-
if use_timm_backbone:
|
|
339
|
-
if backbone_checkpoint is None:
|
|
340
|
-
raise ValueError("config.backbone must be set if use_timm_backbone is True")
|
|
341
|
-
# Because of how timm backbones were originally added to models, we need to pass in use_pretrained_backbone
|
|
342
|
-
# to determine whether to load the pretrained weights.
|
|
343
|
-
backbone = AutoBackbone.from_pretrained(
|
|
344
|
-
backbone_checkpoint,
|
|
345
|
-
use_timm_backbone=use_timm_backbone,
|
|
346
|
-
use_pretrained_backbone=use_pretrained_backbone,
|
|
347
|
-
**backbone_kwargs,
|
|
348
|
-
)
|
|
349
|
-
elif use_pretrained_backbone:
|
|
350
|
-
if backbone_checkpoint is None:
|
|
351
|
-
raise ValueError("config.backbone must be set if use_pretrained_backbone is True")
|
|
352
|
-
backbone = AutoBackbone.from_pretrained(backbone_checkpoint, **backbone_kwargs)
|
|
353
|
-
else:
|
|
354
|
-
if backbone_config is None and backbone_checkpoint is None:
|
|
355
|
-
raise ValueError("Either config.backbone_config or config.backbone must be set")
|
|
356
|
-
if backbone_config is None:
|
|
357
|
-
backbone_config = AutoConfig.from_pretrained(backbone_checkpoint, **backbone_kwargs)
|
|
358
|
-
backbone = AutoBackbone.from_config(config=backbone_config)
|
|
359
|
-
return backbone
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
def verify_backbone_config_arguments(
|
|
363
|
-
use_timm_backbone: bool,
|
|
364
|
-
use_pretrained_backbone: bool,
|
|
365
|
-
backbone: str | None,
|
|
366
|
-
backbone_config: Union[dict, "PreTrainedConfig"] | None,
|
|
367
|
-
backbone_kwargs: dict | None,
|
|
368
|
-
):
|
|
369
|
-
"""
|
|
370
|
-
Verify that the config arguments to be passed to load_backbone are valid
|
|
371
|
-
"""
|
|
372
|
-
if backbone_config is not None and backbone is not None:
|
|
373
|
-
raise ValueError("You can't specify both `backbone` and `backbone_config`.")
|
|
374
|
-
|
|
375
|
-
if backbone_config is not None and use_timm_backbone:
|
|
376
|
-
raise ValueError("You can't specify both `backbone_config` and `use_timm_backbone`.")
|
|
377
|
-
|
|
378
|
-
if backbone_kwargs is not None and backbone_kwargs and backbone_config is not None:
|
|
379
|
-
raise ValueError("You can't specify both `backbone_kwargs` and `backbone_config`.")
|
|
14
|
+
class BackboneMixin(BackboneMixin):
|
|
15
|
+
warnings.warn(
|
|
16
|
+
"Importing `BackboneMixin` from `utils/backbone_utils.py` is deprecated and will be removed in "
|
|
17
|
+
"Transformers v5.10. Import as `from transformers.backbone_utils import BackboneMixin` instead.",
|
|
18
|
+
FutureWarning,
|
|
19
|
+
)
|
transformers/utils/doc.py
CHANGED
|
@@ -677,27 +677,6 @@ AUDIO_FRAME_CLASSIFICATION_SAMPLE = PT_SPEECH_FRAME_CLASS_SAMPLE
|
|
|
677
677
|
AUDIO_XVECTOR_SAMPLE = PT_SPEECH_XVECTOR_SAMPLE
|
|
678
678
|
|
|
679
679
|
|
|
680
|
-
IMAGE_TO_TEXT_SAMPLE = r"""
|
|
681
|
-
Example:
|
|
682
|
-
|
|
683
|
-
```python
|
|
684
|
-
>>> from PIL import Image
|
|
685
|
-
>>> import requests
|
|
686
|
-
>>> from transformers import AutoProcessor, {model_class}
|
|
687
|
-
|
|
688
|
-
>>> processor = AutoProcessor.from_pretrained("{checkpoint}")
|
|
689
|
-
>>> model = {model_class}.from_pretrained("{checkpoint}")
|
|
690
|
-
|
|
691
|
-
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
|
692
|
-
>>> image = Image.open(requests.get(url, stream=True).raw)
|
|
693
|
-
|
|
694
|
-
>>> inputs = processor(images=image, return_tensors="pt")
|
|
695
|
-
|
|
696
|
-
>>> outputs = model(**inputs)
|
|
697
|
-
```
|
|
698
|
-
"""
|
|
699
|
-
|
|
700
|
-
|
|
701
680
|
DEPTH_ESTIMATION_SAMPLE = r"""
|
|
702
681
|
Example:
|
|
703
682
|
|
|
@@ -705,10 +684,12 @@ DEPTH_ESTIMATION_SAMPLE = r"""
|
|
|
705
684
|
>>> from transformers import AutoImageProcessor, {model_class}
|
|
706
685
|
>>> import torch
|
|
707
686
|
>>> from PIL import Image
|
|
708
|
-
>>> import
|
|
687
|
+
>>> import httpx
|
|
688
|
+
>>> from io import BytesIO
|
|
709
689
|
|
|
710
690
|
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
|
711
|
-
>>>
|
|
691
|
+
>>> with httpx.stream("GET", url) as response:
|
|
692
|
+
... image = Image.open(BytesIO(response.read())).convert("RGB")
|
|
712
693
|
|
|
713
694
|
>>> processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
|
714
695
|
>>> model = {model_class}.from_pretrained("{checkpoint}")
|
|
@@ -842,14 +823,6 @@ OBJECT_DETECTION_SAMPLE = r"""
|
|
|
842
823
|
QUESTION_ANSWERING_SAMPLE = PT_QUESTION_ANSWERING_SAMPLE
|
|
843
824
|
|
|
844
825
|
|
|
845
|
-
TEXT2TEXT_GENERATION_SAMPLE = r"""
|
|
846
|
-
Example:
|
|
847
|
-
|
|
848
|
-
```python
|
|
849
|
-
```
|
|
850
|
-
"""
|
|
851
|
-
|
|
852
|
-
|
|
853
826
|
TEXT_CLASSIFICATION_SAMPLE = PT_SEQUENCE_CLASSIFICATION_SAMPLE
|
|
854
827
|
|
|
855
828
|
|
|
@@ -883,7 +856,6 @@ IMAGE_TEXT_TO_TEXT_GENERATION_SAMPLE = r"""
|
|
|
883
856
|
|
|
884
857
|
```python
|
|
885
858
|
>>> from PIL import Image
|
|
886
|
-
>>> import requests
|
|
887
859
|
>>> from transformers import AutoProcessor, {model_class}
|
|
888
860
|
|
|
889
861
|
>>> model = {model_class}.from_pretrained("{checkpoint}")
|
|
@@ -921,7 +893,6 @@ PIPELINE_TASKS_TO_SAMPLE_DOCSTRINGS = OrderedDict(
|
|
|
921
893
|
("audio-classification", AUDIO_CLASSIFICATION_SAMPLE),
|
|
922
894
|
("audio-xvector", AUDIO_XVECTOR_SAMPLE),
|
|
923
895
|
("image-text-to-text", IMAGE_TEXT_TO_TEXT_GENERATION_SAMPLE),
|
|
924
|
-
("image-to-text", IMAGE_TO_TEXT_SAMPLE),
|
|
925
896
|
("visual-question-answering", VISUAL_QUESTION_ANSWERING_SAMPLE),
|
|
926
897
|
("depth-estimation", DEPTH_ESTIMATION_SAMPLE),
|
|
927
898
|
("video-classification", VIDEO_CLASSIFICATION_SAMPLE),
|
|
@@ -936,7 +907,6 @@ PIPELINE_TASKS_TO_SAMPLE_DOCSTRINGS = OrderedDict(
|
|
|
936
907
|
("table-question-answering", TABLE_QUESTION_ANSWERING_SAMPLE),
|
|
937
908
|
("document-question-answering", DOCUMENT_QUESTION_ANSWERING_SAMPLE),
|
|
938
909
|
("question-answering", QUESTION_ANSWERING_SAMPLE),
|
|
939
|
-
("text2text-generation", TEXT2TEXT_GENERATION_SAMPLE),
|
|
940
910
|
("next-sentence-prediction", NEXT_SENTENCE_PREDICTION_SAMPLE),
|
|
941
911
|
("multiple-choice", MULTIPLE_CHOICE_SAMPLE),
|
|
942
912
|
("text-classification", TEXT_CLASSIFICATION_SAMPLE),
|
|
@@ -961,7 +931,6 @@ MODELS_TO_PIPELINE = OrderedDict(
|
|
|
961
931
|
("MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES", "audio-xvector"),
|
|
962
932
|
# Vision
|
|
963
933
|
("MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES", "image-text-to-text"),
|
|
964
|
-
("MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES", "image-to-text"),
|
|
965
934
|
("MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES", "visual-question-answering"),
|
|
966
935
|
("MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES", "depth-estimation"),
|
|
967
936
|
("MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES", "video-classification"),
|
|
@@ -977,7 +946,6 @@ MODELS_TO_PIPELINE = OrderedDict(
|
|
|
977
946
|
("MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES", "table-question-answering"),
|
|
978
947
|
("MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES", "document-question-answering"),
|
|
979
948
|
("MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES", "question-answering"),
|
|
980
|
-
("MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES", "text2text-generation"),
|
|
981
949
|
("MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMES", "next-sentence-prediction"),
|
|
982
950
|
("MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES", "multiple-choice"),
|
|
983
951
|
("MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES", "text-classification"),
|