transformers 5.0.0rc3__py3-none-any.whl → 5.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +4 -11
- transformers/activations.py +2 -2
- transformers/backbone_utils.py +326 -0
- transformers/cache_utils.py +11 -2
- transformers/cli/serve.py +11 -8
- transformers/configuration_utils.py +1 -69
- transformers/conversion_mapping.py +146 -26
- transformers/convert_slow_tokenizer.py +6 -4
- transformers/core_model_loading.py +207 -118
- transformers/dependency_versions_check.py +0 -1
- transformers/dependency_versions_table.py +7 -8
- transformers/file_utils.py +0 -2
- transformers/generation/candidate_generator.py +1 -2
- transformers/generation/continuous_batching/cache.py +40 -38
- transformers/generation/continuous_batching/cache_manager.py +3 -16
- transformers/generation/continuous_batching/continuous_api.py +94 -406
- transformers/generation/continuous_batching/input_ouputs.py +464 -0
- transformers/generation/continuous_batching/requests.py +54 -17
- transformers/generation/continuous_batching/scheduler.py +77 -95
- transformers/generation/logits_process.py +10 -5
- transformers/generation/stopping_criteria.py +1 -2
- transformers/generation/utils.py +75 -95
- transformers/image_processing_utils.py +0 -3
- transformers/image_processing_utils_fast.py +17 -18
- transformers/image_transforms.py +44 -13
- transformers/image_utils.py +0 -5
- transformers/initialization.py +57 -0
- transformers/integrations/__init__.py +10 -24
- transformers/integrations/accelerate.py +47 -11
- transformers/integrations/deepspeed.py +145 -3
- transformers/integrations/executorch.py +2 -6
- transformers/integrations/finegrained_fp8.py +142 -7
- transformers/integrations/flash_attention.py +2 -7
- transformers/integrations/hub_kernels.py +18 -7
- transformers/integrations/moe.py +226 -106
- transformers/integrations/mxfp4.py +47 -34
- transformers/integrations/peft.py +488 -176
- transformers/integrations/tensor_parallel.py +641 -581
- transformers/masking_utils.py +153 -9
- transformers/modeling_flash_attention_utils.py +1 -2
- transformers/modeling_utils.py +359 -358
- transformers/models/__init__.py +6 -0
- transformers/models/afmoe/configuration_afmoe.py +14 -4
- transformers/models/afmoe/modeling_afmoe.py +8 -8
- transformers/models/afmoe/modular_afmoe.py +7 -7
- transformers/models/aimv2/configuration_aimv2.py +2 -7
- transformers/models/aimv2/modeling_aimv2.py +26 -24
- transformers/models/aimv2/modular_aimv2.py +8 -12
- transformers/models/albert/configuration_albert.py +8 -1
- transformers/models/albert/modeling_albert.py +3 -3
- transformers/models/align/configuration_align.py +8 -5
- transformers/models/align/modeling_align.py +22 -24
- transformers/models/altclip/configuration_altclip.py +4 -6
- transformers/models/altclip/modeling_altclip.py +30 -26
- transformers/models/apertus/configuration_apertus.py +5 -7
- transformers/models/apertus/modeling_apertus.py +4 -4
- transformers/models/apertus/modular_apertus.py +8 -10
- transformers/models/arcee/configuration_arcee.py +5 -7
- transformers/models/arcee/modeling_arcee.py +4 -4
- transformers/models/aria/configuration_aria.py +11 -21
- transformers/models/aria/modeling_aria.py +39 -36
- transformers/models/aria/modular_aria.py +33 -39
- transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +3 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +39 -30
- transformers/models/audioflamingo3/modular_audioflamingo3.py +41 -27
- transformers/models/auto/auto_factory.py +8 -6
- transformers/models/auto/configuration_auto.py +22 -0
- transformers/models/auto/image_processing_auto.py +17 -13
- transformers/models/auto/modeling_auto.py +15 -0
- transformers/models/auto/processing_auto.py +9 -18
- transformers/models/auto/tokenization_auto.py +17 -15
- transformers/models/autoformer/modeling_autoformer.py +2 -1
- transformers/models/aya_vision/configuration_aya_vision.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +29 -62
- transformers/models/aya_vision/modular_aya_vision.py +20 -45
- transformers/models/bamba/configuration_bamba.py +17 -7
- transformers/models/bamba/modeling_bamba.py +23 -55
- transformers/models/bamba/modular_bamba.py +19 -54
- transformers/models/bark/configuration_bark.py +2 -1
- transformers/models/bark/modeling_bark.py +24 -10
- transformers/models/bart/configuration_bart.py +9 -4
- transformers/models/bart/modeling_bart.py +9 -12
- transformers/models/beit/configuration_beit.py +2 -4
- transformers/models/beit/image_processing_beit_fast.py +3 -3
- transformers/models/beit/modeling_beit.py +14 -9
- transformers/models/bert/configuration_bert.py +12 -1
- transformers/models/bert/modeling_bert.py +6 -30
- transformers/models/bert_generation/configuration_bert_generation.py +17 -1
- transformers/models/bert_generation/modeling_bert_generation.py +6 -6
- transformers/models/big_bird/configuration_big_bird.py +12 -8
- transformers/models/big_bird/modeling_big_bird.py +0 -15
- transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +9 -8
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +9 -7
- transformers/models/biogpt/configuration_biogpt.py +8 -1
- transformers/models/biogpt/modeling_biogpt.py +4 -8
- transformers/models/biogpt/modular_biogpt.py +1 -5
- transformers/models/bit/configuration_bit.py +2 -4
- transformers/models/bit/modeling_bit.py +6 -5
- transformers/models/bitnet/configuration_bitnet.py +5 -7
- transformers/models/bitnet/modeling_bitnet.py +3 -4
- transformers/models/bitnet/modular_bitnet.py +3 -4
- transformers/models/blenderbot/configuration_blenderbot.py +8 -4
- transformers/models/blenderbot/modeling_blenderbot.py +4 -4
- transformers/models/blenderbot_small/configuration_blenderbot_small.py +8 -4
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +4 -4
- transformers/models/blip/configuration_blip.py +9 -9
- transformers/models/blip/modeling_blip.py +55 -37
- transformers/models/blip_2/configuration_blip_2.py +2 -1
- transformers/models/blip_2/modeling_blip_2.py +81 -56
- transformers/models/bloom/configuration_bloom.py +5 -1
- transformers/models/bloom/modeling_bloom.py +2 -1
- transformers/models/blt/configuration_blt.py +23 -12
- transformers/models/blt/modeling_blt.py +20 -14
- transformers/models/blt/modular_blt.py +70 -10
- transformers/models/bridgetower/configuration_bridgetower.py +7 -1
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +6 -6
- transformers/models/bridgetower/modeling_bridgetower.py +29 -15
- transformers/models/bros/configuration_bros.py +24 -17
- transformers/models/camembert/configuration_camembert.py +8 -1
- transformers/models/camembert/modeling_camembert.py +6 -6
- transformers/models/canine/configuration_canine.py +4 -1
- transformers/models/chameleon/configuration_chameleon.py +5 -7
- transformers/models/chameleon/image_processing_chameleon_fast.py +5 -5
- transformers/models/chameleon/modeling_chameleon.py +82 -36
- transformers/models/chinese_clip/configuration_chinese_clip.py +10 -7
- transformers/models/chinese_clip/modeling_chinese_clip.py +28 -29
- transformers/models/clap/configuration_clap.py +4 -8
- transformers/models/clap/modeling_clap.py +21 -22
- transformers/models/clip/configuration_clip.py +4 -1
- transformers/models/clip/image_processing_clip_fast.py +9 -0
- transformers/models/clip/modeling_clip.py +25 -22
- transformers/models/clipseg/configuration_clipseg.py +4 -1
- transformers/models/clipseg/modeling_clipseg.py +27 -25
- transformers/models/clipseg/processing_clipseg.py +11 -3
- transformers/models/clvp/configuration_clvp.py +14 -2
- transformers/models/clvp/modeling_clvp.py +19 -30
- transformers/models/codegen/configuration_codegen.py +4 -3
- transformers/models/codegen/modeling_codegen.py +2 -1
- transformers/models/cohere/configuration_cohere.py +5 -7
- transformers/models/cohere/modeling_cohere.py +4 -4
- transformers/models/cohere/modular_cohere.py +3 -3
- transformers/models/cohere2/configuration_cohere2.py +6 -8
- transformers/models/cohere2/modeling_cohere2.py +4 -4
- transformers/models/cohere2/modular_cohere2.py +9 -11
- transformers/models/cohere2_vision/configuration_cohere2_vision.py +5 -1
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +3 -3
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +24 -25
- transformers/models/cohere2_vision/modular_cohere2_vision.py +20 -20
- transformers/models/colqwen2/modeling_colqwen2.py +7 -6
- transformers/models/colqwen2/modular_colqwen2.py +7 -6
- transformers/models/conditional_detr/configuration_conditional_detr.py +19 -46
- transformers/models/conditional_detr/image_processing_conditional_detr.py +3 -4
- transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +28 -14
- transformers/models/conditional_detr/modeling_conditional_detr.py +794 -942
- transformers/models/conditional_detr/modular_conditional_detr.py +901 -3
- transformers/models/convbert/configuration_convbert.py +11 -7
- transformers/models/convnext/configuration_convnext.py +2 -4
- transformers/models/convnext/image_processing_convnext_fast.py +2 -2
- transformers/models/convnext/modeling_convnext.py +7 -6
- transformers/models/convnextv2/configuration_convnextv2.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +7 -6
- transformers/models/cpmant/configuration_cpmant.py +4 -0
- transformers/models/csm/configuration_csm.py +9 -15
- transformers/models/csm/modeling_csm.py +3 -3
- transformers/models/ctrl/configuration_ctrl.py +16 -0
- transformers/models/ctrl/modeling_ctrl.py +13 -25
- transformers/models/cwm/configuration_cwm.py +5 -7
- transformers/models/cwm/modeling_cwm.py +4 -4
- transformers/models/d_fine/configuration_d_fine.py +10 -56
- transformers/models/d_fine/modeling_d_fine.py +728 -868
- transformers/models/d_fine/modular_d_fine.py +335 -412
- transformers/models/dab_detr/configuration_dab_detr.py +22 -48
- transformers/models/dab_detr/modeling_dab_detr.py +11 -7
- transformers/models/dac/modeling_dac.py +1 -1
- transformers/models/data2vec/configuration_data2vec_audio.py +4 -1
- transformers/models/data2vec/configuration_data2vec_text.py +11 -2
- transformers/models/data2vec/modeling_data2vec_audio.py +3 -3
- transformers/models/data2vec/modeling_data2vec_text.py +6 -6
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -2
- transformers/models/dbrx/configuration_dbrx.py +11 -3
- transformers/models/dbrx/modeling_dbrx.py +6 -6
- transformers/models/dbrx/modular_dbrx.py +6 -6
- transformers/models/deberta/configuration_deberta.py +6 -0
- transformers/models/deberta_v2/configuration_deberta_v2.py +6 -0
- transformers/models/decision_transformer/configuration_decision_transformer.py +3 -1
- transformers/models/decision_transformer/modeling_decision_transformer.py +3 -3
- transformers/models/deepseek_v2/configuration_deepseek_v2.py +7 -10
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +7 -8
- transformers/models/deepseek_v2/modular_deepseek_v2.py +8 -10
- transformers/models/deepseek_v3/configuration_deepseek_v3.py +7 -10
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +7 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +6 -5
- transformers/models/deepseek_vl/configuration_deepseek_vl.py +4 -0
- transformers/models/deepseek_vl/image_processing_deepseek_vl.py +2 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +5 -5
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +17 -12
- transformers/models/deepseek_vl/modular_deepseek_vl.py +4 -0
- transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +4 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +2 -2
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +6 -6
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +68 -24
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +70 -19
- transformers/models/deformable_detr/configuration_deformable_detr.py +22 -45
- transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +25 -11
- transformers/models/deformable_detr/modeling_deformable_detr.py +410 -607
- transformers/models/deformable_detr/modular_deformable_detr.py +1385 -3
- transformers/models/deit/modeling_deit.py +11 -7
- transformers/models/depth_anything/configuration_depth_anything.py +12 -42
- transformers/models/depth_anything/modeling_depth_anything.py +5 -3
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +2 -2
- transformers/models/depth_pro/modeling_depth_pro.py +8 -4
- transformers/models/detr/configuration_detr.py +18 -49
- transformers/models/detr/image_processing_detr_fast.py +11 -11
- transformers/models/detr/modeling_detr.py +695 -734
- transformers/models/dia/configuration_dia.py +4 -7
- transformers/models/dia/generation_dia.py +8 -17
- transformers/models/dia/modeling_dia.py +7 -7
- transformers/models/dia/modular_dia.py +4 -4
- transformers/models/diffllama/configuration_diffllama.py +5 -7
- transformers/models/diffllama/modeling_diffllama.py +3 -8
- transformers/models/diffllama/modular_diffllama.py +2 -7
- transformers/models/dinat/configuration_dinat.py +2 -4
- transformers/models/dinat/modeling_dinat.py +7 -6
- transformers/models/dinov2/configuration_dinov2.py +2 -4
- transformers/models/dinov2/modeling_dinov2.py +9 -8
- transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +2 -4
- transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +9 -8
- transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +6 -7
- transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +2 -4
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +2 -3
- transformers/models/dinov3_vit/configuration_dinov3_vit.py +2 -4
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +2 -2
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -6
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -6
- transformers/models/distilbert/configuration_distilbert.py +8 -1
- transformers/models/distilbert/modeling_distilbert.py +3 -3
- transformers/models/doge/configuration_doge.py +17 -7
- transformers/models/doge/modeling_doge.py +4 -4
- transformers/models/doge/modular_doge.py +20 -10
- transformers/models/donut/image_processing_donut_fast.py +4 -4
- transformers/models/dots1/configuration_dots1.py +16 -7
- transformers/models/dots1/modeling_dots1.py +4 -4
- transformers/models/dpr/configuration_dpr.py +19 -1
- transformers/models/dpt/configuration_dpt.py +23 -65
- transformers/models/dpt/image_processing_dpt_fast.py +5 -5
- transformers/models/dpt/modeling_dpt.py +19 -15
- transformers/models/dpt/modular_dpt.py +4 -4
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +53 -53
- transformers/models/edgetam/modular_edgetam.py +5 -7
- transformers/models/edgetam_video/modeling_edgetam_video.py +55 -56
- transformers/models/edgetam_video/modular_edgetam_video.py +9 -9
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +4 -3
- transformers/models/efficientloftr/modeling_efficientloftr.py +19 -9
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +2 -2
- transformers/models/electra/configuration_electra.py +13 -2
- transformers/models/electra/modeling_electra.py +6 -6
- transformers/models/emu3/configuration_emu3.py +12 -10
- transformers/models/emu3/modeling_emu3.py +84 -47
- transformers/models/emu3/modular_emu3.py +77 -39
- transformers/models/encoder_decoder/configuration_encoder_decoder.py +12 -1
- transformers/models/encoder_decoder/modeling_encoder_decoder.py +20 -24
- transformers/models/eomt/configuration_eomt.py +12 -13
- transformers/models/eomt/image_processing_eomt_fast.py +3 -3
- transformers/models/eomt/modeling_eomt.py +3 -3
- transformers/models/eomt/modular_eomt.py +17 -17
- transformers/models/eomt_dinov3/__init__.py +28 -0
- transformers/models/eomt_dinov3/configuration_eomt_dinov3.py +204 -0
- transformers/models/eomt_dinov3/modeling_eomt_dinov3.py +1376 -0
- transformers/models/eomt_dinov3/modular_eomt_dinov3.py +454 -0
- transformers/models/ernie/configuration_ernie.py +24 -2
- transformers/models/ernie/modeling_ernie.py +6 -30
- transformers/models/ernie4_5/configuration_ernie4_5.py +5 -7
- transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
- transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +7 -10
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +4 -4
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +17 -6
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +229 -188
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +79 -55
- transformers/models/esm/configuration_esm.py +9 -11
- transformers/models/esm/modeling_esm.py +3 -3
- transformers/models/esm/modeling_esmfold.py +1 -6
- transformers/models/esm/openfold_utils/protein.py +2 -3
- transformers/models/evolla/configuration_evolla.py +21 -8
- transformers/models/evolla/modeling_evolla.py +11 -7
- transformers/models/evolla/modular_evolla.py +5 -1
- transformers/models/exaone4/configuration_exaone4.py +8 -5
- transformers/models/exaone4/modeling_exaone4.py +4 -4
- transformers/models/exaone4/modular_exaone4.py +11 -8
- transformers/models/exaone_moe/__init__.py +27 -0
- transformers/models/exaone_moe/configuration_exaone_moe.py +235 -0
- transformers/models/exaone_moe/modeling_exaone_moe.py +665 -0
- transformers/models/exaone_moe/modular_exaone_moe.py +373 -0
- transformers/models/falcon/configuration_falcon.py +9 -1
- transformers/models/falcon/modeling_falcon.py +3 -8
- transformers/models/falcon_h1/configuration_falcon_h1.py +17 -8
- transformers/models/falcon_h1/modeling_falcon_h1.py +22 -54
- transformers/models/falcon_h1/modular_falcon_h1.py +21 -52
- transformers/models/falcon_mamba/configuration_falcon_mamba.py +5 -1
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +18 -26
- transformers/models/falcon_mamba/modular_falcon_mamba.py +4 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +10 -1
- transformers/models/fast_vlm/modeling_fast_vlm.py +37 -64
- transformers/models/fast_vlm/modular_fast_vlm.py +146 -35
- transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +0 -1
- transformers/models/flaubert/configuration_flaubert.py +10 -4
- transformers/models/flaubert/modeling_flaubert.py +1 -1
- transformers/models/flava/configuration_flava.py +4 -3
- transformers/models/flava/image_processing_flava_fast.py +4 -4
- transformers/models/flava/modeling_flava.py +36 -28
- transformers/models/flex_olmo/configuration_flex_olmo.py +11 -14
- transformers/models/flex_olmo/modeling_flex_olmo.py +4 -4
- transformers/models/flex_olmo/modular_flex_olmo.py +11 -14
- transformers/models/florence2/configuration_florence2.py +4 -0
- transformers/models/florence2/modeling_florence2.py +57 -32
- transformers/models/florence2/modular_florence2.py +48 -26
- transformers/models/fnet/configuration_fnet.py +6 -1
- transformers/models/focalnet/configuration_focalnet.py +2 -4
- transformers/models/focalnet/modeling_focalnet.py +10 -7
- transformers/models/fsmt/configuration_fsmt.py +12 -16
- transformers/models/funnel/configuration_funnel.py +8 -0
- transformers/models/fuyu/configuration_fuyu.py +5 -8
- transformers/models/fuyu/image_processing_fuyu_fast.py +5 -4
- transformers/models/fuyu/modeling_fuyu.py +24 -23
- transformers/models/gemma/configuration_gemma.py +5 -7
- transformers/models/gemma/modeling_gemma.py +4 -4
- transformers/models/gemma/modular_gemma.py +5 -7
- transformers/models/gemma2/configuration_gemma2.py +5 -7
- transformers/models/gemma2/modeling_gemma2.py +4 -4
- transformers/models/gemma2/modular_gemma2.py +8 -10
- transformers/models/gemma3/configuration_gemma3.py +28 -22
- transformers/models/gemma3/image_processing_gemma3_fast.py +2 -2
- transformers/models/gemma3/modeling_gemma3.py +37 -33
- transformers/models/gemma3/modular_gemma3.py +46 -42
- transformers/models/gemma3n/configuration_gemma3n.py +35 -22
- transformers/models/gemma3n/modeling_gemma3n.py +86 -58
- transformers/models/gemma3n/modular_gemma3n.py +112 -75
- transformers/models/git/configuration_git.py +5 -7
- transformers/models/git/modeling_git.py +31 -41
- transformers/models/glm/configuration_glm.py +7 -9
- transformers/models/glm/modeling_glm.py +4 -4
- transformers/models/glm4/configuration_glm4.py +7 -9
- transformers/models/glm4/modeling_glm4.py +4 -4
- transformers/models/glm46v/configuration_glm46v.py +4 -0
- transformers/models/glm46v/image_processing_glm46v.py +5 -2
- transformers/models/glm46v/image_processing_glm46v_fast.py +2 -2
- transformers/models/glm46v/modeling_glm46v.py +91 -46
- transformers/models/glm46v/modular_glm46v.py +4 -0
- transformers/models/glm4_moe/configuration_glm4_moe.py +17 -7
- transformers/models/glm4_moe/modeling_glm4_moe.py +4 -4
- transformers/models/glm4_moe/modular_glm4_moe.py +17 -7
- transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +8 -10
- transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +7 -7
- transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +8 -10
- transformers/models/glm4v/configuration_glm4v.py +12 -8
- transformers/models/glm4v/image_processing_glm4v.py +5 -2
- transformers/models/glm4v/image_processing_glm4v_fast.py +2 -2
- transformers/models/glm4v/modeling_glm4v.py +120 -63
- transformers/models/glm4v/modular_glm4v.py +82 -50
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +18 -6
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +115 -63
- transformers/models/glm4v_moe/modular_glm4v_moe.py +23 -12
- transformers/models/glm_image/configuration_glm_image.py +26 -20
- transformers/models/glm_image/image_processing_glm_image.py +1 -1
- transformers/models/glm_image/image_processing_glm_image_fast.py +5 -7
- transformers/models/glm_image/modeling_glm_image.py +337 -236
- transformers/models/glm_image/modular_glm_image.py +415 -255
- transformers/models/glm_image/processing_glm_image.py +65 -17
- transformers/{pipelines/deprecated → models/glm_ocr}/__init__.py +15 -2
- transformers/models/glm_ocr/configuration_glm_ocr.py +312 -0
- transformers/models/glm_ocr/modeling_glm_ocr.py +1633 -0
- transformers/models/glm_ocr/modular_glm_ocr.py +428 -0
- transformers/models/glmasr/modeling_glmasr.py +34 -28
- transformers/models/glmasr/modular_glmasr.py +23 -11
- transformers/models/glpn/image_processing_glpn_fast.py +3 -3
- transformers/models/glpn/modeling_glpn.py +4 -2
- transformers/models/got_ocr2/configuration_got_ocr2.py +6 -6
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +3 -3
- transformers/models/got_ocr2/modeling_got_ocr2.py +31 -37
- transformers/models/got_ocr2/modular_got_ocr2.py +30 -19
- transformers/models/gpt2/configuration_gpt2.py +13 -1
- transformers/models/gpt2/modeling_gpt2.py +5 -5
- transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +7 -1
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +5 -4
- transformers/models/gpt_neo/configuration_gpt_neo.py +9 -1
- transformers/models/gpt_neo/modeling_gpt_neo.py +3 -7
- transformers/models/gpt_neox/configuration_gpt_neox.py +8 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +4 -4
- transformers/models/gpt_neox/modular_gpt_neox.py +4 -4
- transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +9 -1
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +2 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +10 -6
- transformers/models/gpt_oss/modeling_gpt_oss.py +46 -79
- transformers/models/gpt_oss/modular_gpt_oss.py +45 -78
- transformers/models/gptj/configuration_gptj.py +4 -4
- transformers/models/gptj/modeling_gptj.py +3 -7
- transformers/models/granite/configuration_granite.py +5 -7
- transformers/models/granite/modeling_granite.py +4 -4
- transformers/models/granite_speech/modeling_granite_speech.py +63 -37
- transformers/models/granitemoe/configuration_granitemoe.py +5 -7
- transformers/models/granitemoe/modeling_granitemoe.py +4 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +17 -7
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +22 -54
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +39 -45
- transformers/models/granitemoeshared/configuration_granitemoeshared.py +6 -7
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -4
- transformers/models/grounding_dino/configuration_grounding_dino.py +10 -45
- transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +11 -11
- transformers/models/grounding_dino/modeling_grounding_dino.py +68 -86
- transformers/models/groupvit/configuration_groupvit.py +4 -1
- transformers/models/groupvit/modeling_groupvit.py +29 -22
- transformers/models/helium/configuration_helium.py +5 -7
- transformers/models/helium/modeling_helium.py +4 -4
- transformers/models/hgnet_v2/configuration_hgnet_v2.py +2 -4
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -5
- transformers/models/hgnet_v2/modular_hgnet_v2.py +7 -8
- transformers/models/hiera/configuration_hiera.py +2 -4
- transformers/models/hiera/modeling_hiera.py +11 -8
- transformers/models/hubert/configuration_hubert.py +4 -1
- transformers/models/hubert/modeling_hubert.py +7 -4
- transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +5 -7
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +28 -4
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +28 -6
- transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +6 -8
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +22 -9
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +22 -8
- transformers/models/ibert/configuration_ibert.py +4 -1
- transformers/models/idefics/configuration_idefics.py +5 -7
- transformers/models/idefics/modeling_idefics.py +3 -4
- transformers/models/idefics/vision.py +5 -4
- transformers/models/idefics2/configuration_idefics2.py +1 -2
- transformers/models/idefics2/image_processing_idefics2_fast.py +1 -0
- transformers/models/idefics2/modeling_idefics2.py +72 -50
- transformers/models/idefics3/configuration_idefics3.py +1 -3
- transformers/models/idefics3/image_processing_idefics3_fast.py +29 -3
- transformers/models/idefics3/modeling_idefics3.py +63 -40
- transformers/models/ijepa/modeling_ijepa.py +3 -3
- transformers/models/imagegpt/configuration_imagegpt.py +9 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +2 -2
- transformers/models/imagegpt/modeling_imagegpt.py +8 -4
- transformers/models/informer/modeling_informer.py +3 -3
- transformers/models/instructblip/configuration_instructblip.py +2 -1
- transformers/models/instructblip/modeling_instructblip.py +65 -39
- transformers/models/instructblipvideo/configuration_instructblipvideo.py +2 -1
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +60 -57
- transformers/models/instructblipvideo/modular_instructblipvideo.py +43 -32
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +2 -2
- transformers/models/internvl/configuration_internvl.py +5 -0
- transformers/models/internvl/modeling_internvl.py +35 -55
- transformers/models/internvl/modular_internvl.py +26 -38
- transformers/models/internvl/video_processing_internvl.py +2 -2
- transformers/models/jais2/configuration_jais2.py +5 -7
- transformers/models/jais2/modeling_jais2.py +4 -4
- transformers/models/jamba/configuration_jamba.py +5 -7
- transformers/models/jamba/modeling_jamba.py +4 -4
- transformers/models/jamba/modular_jamba.py +3 -3
- transformers/models/janus/image_processing_janus.py +2 -2
- transformers/models/janus/image_processing_janus_fast.py +8 -8
- transformers/models/janus/modeling_janus.py +63 -146
- transformers/models/janus/modular_janus.py +62 -20
- transformers/models/jetmoe/configuration_jetmoe.py +6 -4
- transformers/models/jetmoe/modeling_jetmoe.py +3 -3
- transformers/models/jetmoe/modular_jetmoe.py +3 -3
- transformers/models/kosmos2/configuration_kosmos2.py +10 -8
- transformers/models/kosmos2/modeling_kosmos2.py +56 -34
- transformers/models/kosmos2_5/configuration_kosmos2_5.py +8 -8
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +54 -63
- transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +8 -3
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +44 -40
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +1 -1
- transformers/models/lasr/configuration_lasr.py +2 -4
- transformers/models/lasr/modeling_lasr.py +3 -3
- transformers/models/lasr/modular_lasr.py +3 -3
- transformers/models/layoutlm/configuration_layoutlm.py +14 -1
- transformers/models/layoutlm/modeling_layoutlm.py +3 -3
- transformers/models/layoutlmv2/configuration_layoutlmv2.py +14 -16
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +2 -2
- transformers/models/layoutlmv3/configuration_layoutlmv3.py +16 -18
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +2 -2
- transformers/models/layoutxlm/configuration_layoutxlm.py +14 -16
- transformers/models/led/configuration_led.py +7 -8
- transformers/models/levit/image_processing_levit_fast.py +4 -4
- transformers/models/lfm2/configuration_lfm2.py +5 -7
- transformers/models/lfm2/modeling_lfm2.py +4 -4
- transformers/models/lfm2/modular_lfm2.py +3 -3
- transformers/models/lfm2_moe/configuration_lfm2_moe.py +5 -7
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -4
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +9 -15
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +42 -28
- transformers/models/lfm2_vl/modular_lfm2_vl.py +42 -27
- transformers/models/lightglue/image_processing_lightglue_fast.py +4 -3
- transformers/models/lightglue/modeling_lightglue.py +3 -3
- transformers/models/lightglue/modular_lightglue.py +3 -3
- transformers/models/lighton_ocr/modeling_lighton_ocr.py +31 -28
- transformers/models/lighton_ocr/modular_lighton_ocr.py +19 -18
- transformers/models/lilt/configuration_lilt.py +6 -1
- transformers/models/llama/configuration_llama.py +5 -7
- transformers/models/llama/modeling_llama.py +4 -4
- transformers/models/llama4/configuration_llama4.py +67 -47
- transformers/models/llama4/image_processing_llama4_fast.py +3 -3
- transformers/models/llama4/modeling_llama4.py +46 -44
- transformers/models/llava/configuration_llava.py +10 -0
- transformers/models/llava/image_processing_llava_fast.py +3 -3
- transformers/models/llava/modeling_llava.py +38 -65
- transformers/models/llava_next/configuration_llava_next.py +2 -1
- transformers/models/llava_next/image_processing_llava_next_fast.py +6 -6
- transformers/models/llava_next/modeling_llava_next.py +61 -60
- transformers/models/llava_next_video/configuration_llava_next_video.py +10 -6
- transformers/models/llava_next_video/modeling_llava_next_video.py +115 -100
- transformers/models/llava_next_video/modular_llava_next_video.py +110 -101
- transformers/models/llava_onevision/configuration_llava_onevision.py +10 -6
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +8 -7
- transformers/models/llava_onevision/modeling_llava_onevision.py +111 -105
- transformers/models/llava_onevision/modular_llava_onevision.py +106 -101
- transformers/models/longcat_flash/configuration_longcat_flash.py +7 -10
- transformers/models/longcat_flash/modeling_longcat_flash.py +7 -7
- transformers/models/longcat_flash/modular_longcat_flash.py +6 -5
- transformers/models/longformer/configuration_longformer.py +4 -1
- transformers/models/longt5/configuration_longt5.py +9 -6
- transformers/models/longt5/modeling_longt5.py +2 -1
- transformers/models/luke/configuration_luke.py +8 -1
- transformers/models/lw_detr/configuration_lw_detr.py +19 -31
- transformers/models/lw_detr/modeling_lw_detr.py +43 -44
- transformers/models/lw_detr/modular_lw_detr.py +36 -38
- transformers/models/lxmert/configuration_lxmert.py +16 -0
- transformers/models/m2m_100/configuration_m2m_100.py +7 -8
- transformers/models/m2m_100/modeling_m2m_100.py +3 -3
- transformers/models/mamba/configuration_mamba.py +5 -2
- transformers/models/mamba/modeling_mamba.py +18 -26
- transformers/models/mamba2/configuration_mamba2.py +5 -7
- transformers/models/mamba2/modeling_mamba2.py +22 -33
- transformers/models/marian/configuration_marian.py +10 -4
- transformers/models/marian/modeling_marian.py +4 -4
- transformers/models/markuplm/configuration_markuplm.py +4 -6
- transformers/models/markuplm/modeling_markuplm.py +3 -3
- transformers/models/mask2former/configuration_mask2former.py +12 -47
- transformers/models/mask2former/image_processing_mask2former_fast.py +8 -8
- transformers/models/mask2former/modeling_mask2former.py +18 -12
- transformers/models/maskformer/configuration_maskformer.py +14 -45
- transformers/models/maskformer/configuration_maskformer_swin.py +2 -4
- transformers/models/maskformer/image_processing_maskformer_fast.py +8 -8
- transformers/models/maskformer/modeling_maskformer.py +15 -9
- transformers/models/maskformer/modeling_maskformer_swin.py +2 -3
- transformers/models/mbart/configuration_mbart.py +9 -4
- transformers/models/mbart/modeling_mbart.py +9 -6
- transformers/models/megatron_bert/configuration_megatron_bert.py +13 -2
- transformers/models/megatron_bert/modeling_megatron_bert.py +0 -15
- transformers/models/metaclip_2/configuration_metaclip_2.py +4 -1
- transformers/models/metaclip_2/modeling_metaclip_2.py +49 -42
- transformers/models/metaclip_2/modular_metaclip_2.py +41 -25
- transformers/models/mgp_str/modeling_mgp_str.py +4 -2
- transformers/models/mimi/configuration_mimi.py +4 -0
- transformers/models/mimi/modeling_mimi.py +40 -36
- transformers/models/minimax/configuration_minimax.py +8 -11
- transformers/models/minimax/modeling_minimax.py +5 -5
- transformers/models/minimax/modular_minimax.py +9 -12
- transformers/models/minimax_m2/configuration_minimax_m2.py +8 -31
- transformers/models/minimax_m2/modeling_minimax_m2.py +4 -4
- transformers/models/minimax_m2/modular_minimax_m2.py +8 -31
- transformers/models/ministral/configuration_ministral.py +5 -7
- transformers/models/ministral/modeling_ministral.py +4 -4
- transformers/models/ministral/modular_ministral.py +5 -8
- transformers/models/ministral3/configuration_ministral3.py +4 -4
- transformers/models/ministral3/modeling_ministral3.py +4 -4
- transformers/models/ministral3/modular_ministral3.py +3 -3
- transformers/models/mistral/configuration_mistral.py +5 -7
- transformers/models/mistral/modeling_mistral.py +4 -4
- transformers/models/mistral/modular_mistral.py +3 -3
- transformers/models/mistral3/configuration_mistral3.py +4 -0
- transformers/models/mistral3/modeling_mistral3.py +36 -40
- transformers/models/mistral3/modular_mistral3.py +31 -32
- transformers/models/mixtral/configuration_mixtral.py +8 -11
- transformers/models/mixtral/modeling_mixtral.py +4 -4
- transformers/models/mlcd/modeling_mlcd.py +7 -5
- transformers/models/mlcd/modular_mlcd.py +7 -5
- transformers/models/mllama/configuration_mllama.py +5 -7
- transformers/models/mllama/image_processing_mllama_fast.py +6 -5
- transformers/models/mllama/modeling_mllama.py +19 -19
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +10 -45
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +66 -84
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +10 -45
- transformers/models/mobilebert/configuration_mobilebert.py +4 -1
- transformers/models/mobilebert/modeling_mobilebert.py +3 -3
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +4 -4
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +4 -2
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +4 -4
- transformers/models/mobilevit/modeling_mobilevit.py +4 -2
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +4 -2
- transformers/models/modernbert/configuration_modernbert.py +46 -21
- transformers/models/modernbert/modeling_modernbert.py +146 -899
- transformers/models/modernbert/modular_modernbert.py +185 -908
- transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +21 -13
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +9 -17
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +24 -23
- transformers/models/moonshine/configuration_moonshine.py +12 -7
- transformers/models/moonshine/modeling_moonshine.py +7 -7
- transformers/models/moonshine/modular_moonshine.py +19 -13
- transformers/models/moshi/configuration_moshi.py +28 -2
- transformers/models/moshi/modeling_moshi.py +4 -9
- transformers/models/mpnet/configuration_mpnet.py +6 -1
- transformers/models/mpt/configuration_mpt.py +16 -0
- transformers/models/mra/configuration_mra.py +8 -1
- transformers/models/mt5/configuration_mt5.py +9 -5
- transformers/models/mt5/modeling_mt5.py +5 -8
- transformers/models/musicgen/configuration_musicgen.py +12 -7
- transformers/models/musicgen/modeling_musicgen.py +6 -5
- transformers/models/musicgen_melody/configuration_musicgen_melody.py +15 -7
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -17
- transformers/models/mvp/configuration_mvp.py +8 -4
- transformers/models/mvp/modeling_mvp.py +6 -4
- transformers/models/nanochat/configuration_nanochat.py +5 -7
- transformers/models/nanochat/modeling_nanochat.py +4 -4
- transformers/models/nanochat/modular_nanochat.py +4 -4
- transformers/models/nemotron/configuration_nemotron.py +5 -7
- transformers/models/nemotron/modeling_nemotron.py +4 -14
- transformers/models/nllb/tokenization_nllb.py +7 -5
- transformers/models/nllb_moe/configuration_nllb_moe.py +7 -9
- transformers/models/nllb_moe/modeling_nllb_moe.py +3 -3
- transformers/models/nougat/image_processing_nougat_fast.py +8 -8
- transformers/models/nystromformer/configuration_nystromformer.py +8 -1
- transformers/models/olmo/configuration_olmo.py +5 -7
- transformers/models/olmo/modeling_olmo.py +4 -4
- transformers/models/olmo/modular_olmo.py +3 -3
- transformers/models/olmo2/configuration_olmo2.py +9 -11
- transformers/models/olmo2/modeling_olmo2.py +4 -4
- transformers/models/olmo2/modular_olmo2.py +7 -7
- transformers/models/olmo3/configuration_olmo3.py +10 -11
- transformers/models/olmo3/modeling_olmo3.py +4 -4
- transformers/models/olmo3/modular_olmo3.py +13 -14
- transformers/models/olmoe/configuration_olmoe.py +5 -7
- transformers/models/olmoe/modeling_olmoe.py +4 -4
- transformers/models/olmoe/modular_olmoe.py +3 -3
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +14 -49
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +22 -18
- transformers/models/oneformer/configuration_oneformer.py +9 -46
- transformers/models/oneformer/image_processing_oneformer_fast.py +8 -8
- transformers/models/oneformer/modeling_oneformer.py +14 -9
- transformers/models/openai/configuration_openai.py +16 -0
- transformers/models/opt/configuration_opt.py +6 -6
- transformers/models/opt/modeling_opt.py +5 -5
- transformers/models/ovis2/configuration_ovis2.py +4 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +3 -3
- transformers/models/ovis2/modeling_ovis2.py +58 -99
- transformers/models/ovis2/modular_ovis2.py +52 -13
- transformers/models/owlv2/configuration_owlv2.py +4 -1
- transformers/models/owlv2/image_processing_owlv2_fast.py +5 -5
- transformers/models/owlv2/modeling_owlv2.py +40 -27
- transformers/models/owlv2/modular_owlv2.py +5 -5
- transformers/models/owlvit/configuration_owlvit.py +4 -1
- transformers/models/owlvit/modeling_owlvit.py +40 -27
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +9 -10
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +88 -87
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +82 -53
- transformers/models/paligemma/configuration_paligemma.py +4 -0
- transformers/models/paligemma/modeling_paligemma.py +30 -26
- transformers/models/parakeet/configuration_parakeet.py +2 -4
- transformers/models/parakeet/modeling_parakeet.py +3 -3
- transformers/models/parakeet/modular_parakeet.py +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +3 -3
- transformers/models/patchtst/modeling_patchtst.py +3 -3
- transformers/models/pe_audio/modeling_pe_audio.py +4 -4
- transformers/models/pe_audio/modular_pe_audio.py +1 -1
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +4 -4
- transformers/models/pe_audio_video/modular_pe_audio_video.py +4 -4
- transformers/models/pe_video/modeling_pe_video.py +36 -24
- transformers/models/pe_video/modular_pe_video.py +36 -23
- transformers/models/pegasus/configuration_pegasus.py +8 -5
- transformers/models/pegasus/modeling_pegasus.py +4 -4
- transformers/models/pegasus_x/configuration_pegasus_x.py +5 -3
- transformers/models/pegasus_x/modeling_pegasus_x.py +3 -3
- transformers/models/perceiver/image_processing_perceiver_fast.py +2 -2
- transformers/models/perceiver/modeling_perceiver.py +17 -9
- transformers/models/perception_lm/modeling_perception_lm.py +26 -27
- transformers/models/perception_lm/modular_perception_lm.py +27 -25
- transformers/models/persimmon/configuration_persimmon.py +5 -7
- transformers/models/persimmon/modeling_persimmon.py +5 -5
- transformers/models/phi/configuration_phi.py +8 -6
- transformers/models/phi/modeling_phi.py +4 -4
- transformers/models/phi/modular_phi.py +3 -3
- transformers/models/phi3/configuration_phi3.py +9 -11
- transformers/models/phi3/modeling_phi3.py +4 -4
- transformers/models/phi3/modular_phi3.py +3 -3
- transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +11 -13
- transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +4 -4
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +46 -61
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +44 -30
- transformers/models/phimoe/configuration_phimoe.py +5 -7
- transformers/models/phimoe/modeling_phimoe.py +15 -39
- transformers/models/phimoe/modular_phimoe.py +12 -7
- transformers/models/pix2struct/configuration_pix2struct.py +12 -9
- transformers/models/pix2struct/image_processing_pix2struct_fast.py +5 -5
- transformers/models/pix2struct/modeling_pix2struct.py +14 -7
- transformers/models/pixio/configuration_pixio.py +2 -4
- transformers/models/pixio/modeling_pixio.py +9 -8
- transformers/models/pixio/modular_pixio.py +4 -2
- transformers/models/pixtral/image_processing_pixtral_fast.py +5 -5
- transformers/models/pixtral/modeling_pixtral.py +9 -12
- transformers/models/plbart/configuration_plbart.py +8 -5
- transformers/models/plbart/modeling_plbart.py +9 -7
- transformers/models/plbart/modular_plbart.py +1 -1
- transformers/models/poolformer/image_processing_poolformer_fast.py +7 -7
- transformers/models/pop2piano/configuration_pop2piano.py +7 -6
- transformers/models/pop2piano/modeling_pop2piano.py +2 -1
- transformers/models/pp_doclayout_v3/__init__.py +30 -0
- transformers/models/pp_doclayout_v3/configuration_pp_doclayout_v3.py +277 -0
- transformers/models/pp_doclayout_v3/image_processing_pp_doclayout_v3_fast.py +305 -0
- transformers/models/pp_doclayout_v3/modeling_pp_doclayout_v3.py +2083 -0
- transformers/models/pp_doclayout_v3/modular_pp_doclayout_v3.py +1549 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +12 -46
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +6 -6
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +8 -6
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +12 -10
- transformers/models/prophetnet/configuration_prophetnet.py +11 -10
- transformers/models/prophetnet/modeling_prophetnet.py +12 -23
- transformers/models/pvt/image_processing_pvt.py +7 -7
- transformers/models/pvt/image_processing_pvt_fast.py +1 -1
- transformers/models/pvt_v2/configuration_pvt_v2.py +2 -4
- transformers/models/pvt_v2/modeling_pvt_v2.py +6 -5
- transformers/models/qwen2/configuration_qwen2.py +14 -4
- transformers/models/qwen2/modeling_qwen2.py +4 -4
- transformers/models/qwen2/modular_qwen2.py +3 -3
- transformers/models/qwen2/tokenization_qwen2.py +0 -4
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +17 -5
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +108 -88
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +115 -87
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +7 -10
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +98 -53
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +18 -6
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +12 -12
- transformers/models/qwen2_moe/configuration_qwen2_moe.py +14 -4
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
- transformers/models/qwen2_moe/modular_qwen2_moe.py +3 -3
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +7 -10
- transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +4 -6
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +97 -53
- transformers/models/qwen2_vl/video_processing_qwen2_vl.py +4 -6
- transformers/models/qwen3/configuration_qwen3.py +15 -5
- transformers/models/qwen3/modeling_qwen3.py +4 -4
- transformers/models/qwen3/modular_qwen3.py +3 -3
- transformers/models/qwen3_moe/configuration_qwen3_moe.py +20 -7
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
- transformers/models/qwen3_next/configuration_qwen3_next.py +16 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +5 -5
- transformers/models/qwen3_next/modular_qwen3_next.py +4 -4
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +55 -19
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +161 -98
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +107 -34
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +7 -6
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +115 -49
- transformers/models/qwen3_vl/modular_qwen3_vl.py +88 -37
- transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +7 -6
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +173 -99
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +23 -7
- transformers/models/rag/configuration_rag.py +6 -6
- transformers/models/rag/modeling_rag.py +3 -3
- transformers/models/rag/retrieval_rag.py +1 -1
- transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +8 -6
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +4 -5
- transformers/models/reformer/configuration_reformer.py +7 -7
- transformers/models/rembert/configuration_rembert.py +8 -1
- transformers/models/rembert/modeling_rembert.py +0 -22
- transformers/models/resnet/configuration_resnet.py +2 -4
- transformers/models/resnet/modeling_resnet.py +6 -5
- transformers/models/roberta/configuration_roberta.py +11 -2
- transformers/models/roberta/modeling_roberta.py +6 -6
- transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +11 -2
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +6 -6
- transformers/models/roc_bert/configuration_roc_bert.py +8 -1
- transformers/models/roc_bert/modeling_roc_bert.py +6 -41
- transformers/models/roformer/configuration_roformer.py +13 -2
- transformers/models/roformer/modeling_roformer.py +0 -14
- transformers/models/rt_detr/configuration_rt_detr.py +8 -49
- transformers/models/rt_detr/configuration_rt_detr_resnet.py +2 -4
- transformers/models/rt_detr/image_processing_rt_detr_fast.py +24 -11
- transformers/models/rt_detr/modeling_rt_detr.py +578 -737
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +2 -3
- transformers/models/rt_detr/modular_rt_detr.py +1508 -6
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +12 -57
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +318 -453
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +25 -66
- transformers/models/rwkv/configuration_rwkv.py +2 -3
- transformers/models/rwkv/modeling_rwkv.py +0 -23
- transformers/models/sam/configuration_sam.py +2 -0
- transformers/models/sam/image_processing_sam_fast.py +4 -4
- transformers/models/sam/modeling_sam.py +13 -8
- transformers/models/sam/processing_sam.py +3 -3
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +56 -52
- transformers/models/sam2/modular_sam2.py +47 -55
- transformers/models/sam2_video/modeling_sam2_video.py +50 -51
- transformers/models/sam2_video/modular_sam2_video.py +12 -10
- transformers/models/sam3/modeling_sam3.py +43 -47
- transformers/models/sam3/processing_sam3.py +8 -4
- transformers/models/sam3_tracker/configuration_sam3_tracker.py +1 -2
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +50 -49
- transformers/models/sam3_tracker/modular_sam3_tracker.py +0 -1
- transformers/models/sam3_tracker/processing_sam3_tracker.py +0 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +50 -49
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +10 -22
- transformers/models/sam3_video/modeling_sam3_video.py +27 -14
- transformers/models/sam_hq/configuration_sam_hq.py +2 -0
- transformers/models/sam_hq/modeling_sam_hq.py +13 -9
- transformers/models/sam_hq/modular_sam_hq.py +6 -6
- transformers/models/sam_hq/processing_sam_hq.py +7 -6
- transformers/models/seamless_m4t/configuration_seamless_m4t.py +8 -9
- transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +8 -9
- transformers/models/seed_oss/configuration_seed_oss.py +7 -9
- transformers/models/seed_oss/modeling_seed_oss.py +4 -4
- transformers/models/seed_oss/modular_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +4 -4
- transformers/models/segformer/modeling_segformer.py +4 -2
- transformers/models/segformer/modular_segformer.py +3 -3
- transformers/models/seggpt/modeling_seggpt.py +20 -8
- transformers/models/sew/configuration_sew.py +4 -1
- transformers/models/sew/modeling_sew.py +9 -5
- transformers/models/sew/modular_sew.py +2 -1
- transformers/models/sew_d/configuration_sew_d.py +4 -1
- transformers/models/sew_d/modeling_sew_d.py +4 -1
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +4 -4
- transformers/models/siglip/configuration_siglip.py +4 -1
- transformers/models/siglip/modeling_siglip.py +27 -71
- transformers/models/siglip2/__init__.py +1 -0
- transformers/models/siglip2/configuration_siglip2.py +4 -2
- transformers/models/siglip2/image_processing_siglip2_fast.py +2 -2
- transformers/models/siglip2/modeling_siglip2.py +37 -78
- transformers/models/siglip2/modular_siglip2.py +74 -25
- transformers/models/siglip2/tokenization_siglip2.py +95 -0
- transformers/models/smollm3/configuration_smollm3.py +6 -6
- transformers/models/smollm3/modeling_smollm3.py +4 -4
- transformers/models/smollm3/modular_smollm3.py +9 -9
- transformers/models/smolvlm/configuration_smolvlm.py +1 -3
- transformers/models/smolvlm/image_processing_smolvlm_fast.py +29 -3
- transformers/models/smolvlm/modeling_smolvlm.py +75 -46
- transformers/models/smolvlm/modular_smolvlm.py +36 -23
- transformers/models/smolvlm/video_processing_smolvlm.py +9 -9
- transformers/models/solar_open/__init__.py +27 -0
- transformers/models/solar_open/configuration_solar_open.py +184 -0
- transformers/models/solar_open/modeling_solar_open.py +642 -0
- transformers/models/solar_open/modular_solar_open.py +224 -0
- transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +6 -4
- transformers/models/speech_to_text/configuration_speech_to_text.py +9 -8
- transformers/models/speech_to_text/modeling_speech_to_text.py +3 -3
- transformers/models/speecht5/configuration_speecht5.py +7 -8
- transformers/models/splinter/configuration_splinter.py +6 -6
- transformers/models/splinter/modeling_splinter.py +8 -3
- transformers/models/squeezebert/configuration_squeezebert.py +14 -1
- transformers/models/stablelm/configuration_stablelm.py +8 -6
- transformers/models/stablelm/modeling_stablelm.py +5 -5
- transformers/models/starcoder2/configuration_starcoder2.py +11 -5
- transformers/models/starcoder2/modeling_starcoder2.py +5 -5
- transformers/models/starcoder2/modular_starcoder2.py +4 -4
- transformers/models/superglue/configuration_superglue.py +4 -0
- transformers/models/superglue/image_processing_superglue_fast.py +4 -3
- transformers/models/superglue/modeling_superglue.py +9 -4
- transformers/models/superpoint/image_processing_superpoint_fast.py +3 -4
- transformers/models/superpoint/modeling_superpoint.py +4 -2
- transformers/models/swin/configuration_swin.py +2 -4
- transformers/models/swin/modeling_swin.py +11 -8
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +2 -2
- transformers/models/swin2sr/modeling_swin2sr.py +4 -2
- transformers/models/swinv2/configuration_swinv2.py +2 -4
- transformers/models/swinv2/modeling_swinv2.py +10 -7
- transformers/models/switch_transformers/configuration_switch_transformers.py +11 -6
- transformers/models/switch_transformers/modeling_switch_transformers.py +3 -3
- transformers/models/switch_transformers/modular_switch_transformers.py +3 -3
- transformers/models/t5/configuration_t5.py +9 -8
- transformers/models/t5/modeling_t5.py +5 -8
- transformers/models/t5gemma/configuration_t5gemma.py +10 -25
- transformers/models/t5gemma/modeling_t5gemma.py +9 -9
- transformers/models/t5gemma/modular_t5gemma.py +11 -24
- transformers/models/t5gemma2/configuration_t5gemma2.py +35 -48
- transformers/models/t5gemma2/modeling_t5gemma2.py +143 -100
- transformers/models/t5gemma2/modular_t5gemma2.py +152 -136
- transformers/models/table_transformer/configuration_table_transformer.py +18 -49
- transformers/models/table_transformer/modeling_table_transformer.py +27 -53
- transformers/models/tapas/configuration_tapas.py +12 -1
- transformers/models/tapas/modeling_tapas.py +1 -1
- transformers/models/tapas/tokenization_tapas.py +1 -0
- transformers/models/textnet/configuration_textnet.py +4 -6
- transformers/models/textnet/image_processing_textnet_fast.py +3 -3
- transformers/models/textnet/modeling_textnet.py +15 -14
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +3 -3
- transformers/models/timesfm/modeling_timesfm.py +5 -6
- transformers/models/timesfm/modular_timesfm.py +5 -6
- transformers/models/timm_backbone/configuration_timm_backbone.py +33 -7
- transformers/models/timm_backbone/modeling_timm_backbone.py +21 -24
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +9 -4
- transformers/models/trocr/configuration_trocr.py +11 -7
- transformers/models/trocr/modeling_trocr.py +4 -2
- transformers/models/tvp/configuration_tvp.py +10 -35
- transformers/models/tvp/image_processing_tvp_fast.py +6 -5
- transformers/models/tvp/modeling_tvp.py +1 -1
- transformers/models/udop/configuration_udop.py +16 -7
- transformers/models/udop/modeling_udop.py +10 -6
- transformers/models/umt5/configuration_umt5.py +8 -6
- transformers/models/umt5/modeling_umt5.py +7 -3
- transformers/models/unispeech/configuration_unispeech.py +4 -1
- transformers/models/unispeech/modeling_unispeech.py +7 -4
- transformers/models/unispeech_sat/configuration_unispeech_sat.py +4 -1
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +7 -4
- transformers/models/upernet/configuration_upernet.py +8 -35
- transformers/models/upernet/modeling_upernet.py +1 -1
- transformers/models/vaultgemma/configuration_vaultgemma.py +5 -7
- transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
- transformers/models/video_llama_3/configuration_video_llama_3.py +4 -0
- transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +4 -6
- transformers/models/video_llama_3/modeling_video_llama_3.py +85 -48
- transformers/models/video_llama_3/modular_video_llama_3.py +56 -43
- transformers/models/video_llama_3/video_processing_video_llama_3.py +29 -8
- transformers/models/video_llava/configuration_video_llava.py +4 -0
- transformers/models/video_llava/modeling_video_llava.py +87 -89
- transformers/models/videomae/modeling_videomae.py +4 -5
- transformers/models/vilt/configuration_vilt.py +4 -1
- transformers/models/vilt/image_processing_vilt_fast.py +6 -6
- transformers/models/vilt/modeling_vilt.py +27 -12
- transformers/models/vipllava/configuration_vipllava.py +4 -0
- transformers/models/vipllava/modeling_vipllava.py +57 -31
- transformers/models/vipllava/modular_vipllava.py +50 -24
- transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +10 -6
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +27 -20
- transformers/models/visual_bert/configuration_visual_bert.py +6 -1
- transformers/models/vit/configuration_vit.py +2 -2
- transformers/models/vit/modeling_vit.py +7 -5
- transformers/models/vit_mae/modeling_vit_mae.py +11 -7
- transformers/models/vit_msn/modeling_vit_msn.py +11 -7
- transformers/models/vitdet/configuration_vitdet.py +2 -4
- transformers/models/vitdet/modeling_vitdet.py +2 -3
- transformers/models/vitmatte/configuration_vitmatte.py +6 -35
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +2 -2
- transformers/models/vitmatte/modeling_vitmatte.py +1 -1
- transformers/models/vitpose/configuration_vitpose.py +6 -43
- transformers/models/vitpose/modeling_vitpose.py +5 -3
- transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +2 -4
- transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +5 -6
- transformers/models/vits/configuration_vits.py +4 -0
- transformers/models/vits/modeling_vits.py +9 -7
- transformers/models/vivit/modeling_vivit.py +4 -4
- transformers/models/vjepa2/modeling_vjepa2.py +9 -9
- transformers/models/voxtral/configuration_voxtral.py +0 -1
- transformers/models/voxtral/modeling_voxtral.py +25 -24
- transformers/models/voxtral/modular_voxtral.py +26 -20
- transformers/models/wav2vec2/configuration_wav2vec2.py +4 -1
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -4
- transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +4 -1
- transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +4 -1
- transformers/models/wavlm/configuration_wavlm.py +4 -1
- transformers/models/wavlm/modeling_wavlm.py +4 -1
- transformers/models/whisper/configuration_whisper.py +6 -4
- transformers/models/whisper/generation_whisper.py +0 -1
- transformers/models/whisper/modeling_whisper.py +3 -3
- transformers/models/x_clip/configuration_x_clip.py +4 -1
- transformers/models/x_clip/modeling_x_clip.py +26 -27
- transformers/models/xglm/configuration_xglm.py +9 -7
- transformers/models/xlm/configuration_xlm.py +10 -7
- transformers/models/xlm/modeling_xlm.py +1 -1
- transformers/models/xlm_roberta/configuration_xlm_roberta.py +11 -2
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +6 -6
- transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +10 -1
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +6 -6
- transformers/models/xlnet/configuration_xlnet.py +3 -1
- transformers/models/xlstm/configuration_xlstm.py +5 -7
- transformers/models/xlstm/modeling_xlstm.py +0 -32
- transformers/models/xmod/configuration_xmod.py +11 -2
- transformers/models/xmod/modeling_xmod.py +13 -16
- transformers/models/yolos/image_processing_yolos_fast.py +25 -28
- transformers/models/yolos/modeling_yolos.py +7 -7
- transformers/models/yolos/modular_yolos.py +16 -16
- transformers/models/yoso/configuration_yoso.py +8 -1
- transformers/models/youtu/__init__.py +27 -0
- transformers/models/youtu/configuration_youtu.py +194 -0
- transformers/models/youtu/modeling_youtu.py +619 -0
- transformers/models/youtu/modular_youtu.py +254 -0
- transformers/models/zamba/configuration_zamba.py +5 -7
- transformers/models/zamba/modeling_zamba.py +25 -56
- transformers/models/zamba2/configuration_zamba2.py +8 -13
- transformers/models/zamba2/modeling_zamba2.py +53 -78
- transformers/models/zamba2/modular_zamba2.py +36 -29
- transformers/models/zoedepth/configuration_zoedepth.py +17 -40
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +9 -9
- transformers/models/zoedepth/modeling_zoedepth.py +5 -3
- transformers/pipelines/__init__.py +1 -61
- transformers/pipelines/any_to_any.py +1 -1
- transformers/pipelines/automatic_speech_recognition.py +0 -2
- transformers/pipelines/base.py +1 -1
- transformers/pipelines/image_text_to_text.py +1 -1
- transformers/pipelines/text_to_audio.py +5 -1
- transformers/processing_utils.py +35 -44
- transformers/pytorch_utils.py +2 -26
- transformers/quantizers/quantizer_compressed_tensors.py +7 -5
- transformers/quantizers/quantizer_fbgemm_fp8.py +20 -23
- transformers/quantizers/quantizer_finegrained_fp8.py +14 -20
- transformers/quantizers/quantizer_mxfp4.py +1 -1
- transformers/quantizers/quantizer_torchao.py +0 -16
- transformers/safetensors_conversion.py +11 -4
- transformers/testing_utils.py +3 -28
- transformers/tokenization_mistral_common.py +9 -0
- transformers/tokenization_python.py +6 -4
- transformers/tokenization_utils_base.py +119 -219
- transformers/tokenization_utils_tokenizers.py +31 -2
- transformers/trainer.py +25 -33
- transformers/trainer_seq2seq.py +1 -1
- transformers/training_args.py +411 -417
- transformers/utils/__init__.py +1 -4
- transformers/utils/auto_docstring.py +15 -18
- transformers/utils/backbone_utils.py +13 -373
- transformers/utils/doc.py +4 -36
- transformers/utils/generic.py +69 -33
- transformers/utils/import_utils.py +72 -75
- transformers/utils/loading_report.py +133 -105
- transformers/utils/quantization_config.py +0 -21
- transformers/video_processing_utils.py +5 -5
- transformers/video_utils.py +3 -1
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/METADATA +118 -237
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/RECORD +1019 -994
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/WHEEL +1 -1
- transformers/pipelines/deprecated/text2text_generation.py +0 -408
- transformers/pipelines/image_to_text.py +0 -189
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/licenses/LICENSE +0 -0
- {transformers-5.0.0rc3.dist-info → transformers-5.1.0.dist-info}/top_level.txt +0 -0
|
@@ -21,6 +21,7 @@ from torch import Tensor, nn
|
|
|
21
21
|
|
|
22
22
|
from ... import initialization as init
|
|
23
23
|
from ...activations import ACT2FN
|
|
24
|
+
from ...backbone_utils import load_backbone
|
|
24
25
|
from ...modeling_attn_mask_utils import _prepare_4d_attention_mask
|
|
25
26
|
from ...modeling_layers import GradientCheckpointingLayer
|
|
26
27
|
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithCrossAttentions, Seq2SeqModelOutput
|
|
@@ -28,18 +29,11 @@ from ...modeling_utils import PreTrainedModel
|
|
|
28
29
|
from ...utils import (
|
|
29
30
|
ModelOutput,
|
|
30
31
|
auto_docstring,
|
|
31
|
-
is_timm_available,
|
|
32
32
|
logging,
|
|
33
|
-
requires_backends,
|
|
34
33
|
)
|
|
35
|
-
from ...utils.backbone_utils import load_backbone
|
|
36
34
|
from .configuration_table_transformer import TableTransformerConfig
|
|
37
35
|
|
|
38
36
|
|
|
39
|
-
if is_timm_available():
|
|
40
|
-
from timm import create_model
|
|
41
|
-
|
|
42
|
-
|
|
43
37
|
logger = logging.get_logger(__name__)
|
|
44
38
|
|
|
45
39
|
|
|
@@ -196,7 +190,7 @@ def replace_batch_norm(model):
|
|
|
196
190
|
replace_batch_norm(module)
|
|
197
191
|
|
|
198
192
|
|
|
199
|
-
# Copied from transformers.models.detr.modeling_detr.DetrConvEncoder with Detr->TableTransformer
|
|
193
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrConvEncoder with Detr->TableTransformer
|
|
200
194
|
class TableTransformerConvEncoder(nn.Module):
|
|
201
195
|
"""
|
|
202
196
|
Convolutional backbone, using either the AutoBackbone API or one from the timm library.
|
|
@@ -210,47 +204,25 @@ class TableTransformerConvEncoder(nn.Module):
|
|
|
210
204
|
|
|
211
205
|
self.config = config
|
|
212
206
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
# We default to values which were previously hard-coded. This enables configurability from the config
|
|
216
|
-
# using backbone arguments, while keeping the default behavior the same.
|
|
217
|
-
requires_backends(self, ["timm"])
|
|
218
|
-
kwargs = getattr(config, "backbone_kwargs", {})
|
|
219
|
-
kwargs = {} if kwargs is None else kwargs.copy()
|
|
220
|
-
out_indices = kwargs.pop("out_indices", (1, 2, 3, 4))
|
|
221
|
-
num_channels = kwargs.pop("in_chans", config.num_channels)
|
|
222
|
-
if config.dilation:
|
|
223
|
-
kwargs["output_stride"] = kwargs.get("output_stride", 16)
|
|
224
|
-
backbone = create_model(
|
|
225
|
-
config.backbone,
|
|
226
|
-
pretrained=config.use_pretrained_backbone,
|
|
227
|
-
features_only=True,
|
|
228
|
-
out_indices=out_indices,
|
|
229
|
-
in_chans=num_channels,
|
|
230
|
-
**kwargs,
|
|
231
|
-
)
|
|
232
|
-
else:
|
|
233
|
-
backbone = load_backbone(config)
|
|
207
|
+
backbone = load_backbone(config)
|
|
208
|
+
self.intermediate_channel_sizes = backbone.channels
|
|
234
209
|
|
|
235
210
|
# replace batch norm by frozen batch norm
|
|
236
211
|
with torch.no_grad():
|
|
237
212
|
replace_batch_norm(backbone)
|
|
238
|
-
self.model = backbone
|
|
239
|
-
self.intermediate_channel_sizes = (
|
|
240
|
-
self.model.feature_info.channels() if config.use_timm_backbone else self.model.channels
|
|
241
|
-
)
|
|
242
213
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
214
|
+
# We used to load with timm library directly instead of the AutoBackbone API
|
|
215
|
+
# so we need to unwrap the `backbone._backbone` module to load weights without mismatch
|
|
216
|
+
is_timm_model = False
|
|
217
|
+
if hasattr(backbone, "_backbone"):
|
|
218
|
+
backbone = backbone._backbone
|
|
219
|
+
is_timm_model = True
|
|
220
|
+
self.model = backbone
|
|
250
221
|
|
|
222
|
+
backbone_model_type = config.backbone_config.model_type
|
|
251
223
|
if "resnet" in backbone_model_type:
|
|
252
224
|
for name, parameter in self.model.named_parameters():
|
|
253
|
-
if
|
|
225
|
+
if is_timm_model:
|
|
254
226
|
if "layer2" not in name and "layer3" not in name and "layer4" not in name:
|
|
255
227
|
parameter.requires_grad_(False)
|
|
256
228
|
else:
|
|
@@ -259,7 +231,9 @@ class TableTransformerConvEncoder(nn.Module):
|
|
|
259
231
|
|
|
260
232
|
def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor):
|
|
261
233
|
# send pixel_values through the model to get list of feature maps
|
|
262
|
-
features = self.model(pixel_values)
|
|
234
|
+
features = self.model(pixel_values)
|
|
235
|
+
if isinstance(features, dict):
|
|
236
|
+
features = features.feature_maps
|
|
263
237
|
|
|
264
238
|
out = []
|
|
265
239
|
for feature_map in features:
|
|
@@ -269,7 +243,7 @@ class TableTransformerConvEncoder(nn.Module):
|
|
|
269
243
|
return out
|
|
270
244
|
|
|
271
245
|
|
|
272
|
-
# Copied from transformers.models.detr.modeling_detr.DetrConvModel with Detr->TableTransformer
|
|
246
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrConvModel with Detr->TableTransformer
|
|
273
247
|
class TableTransformerConvModel(nn.Module):
|
|
274
248
|
"""
|
|
275
249
|
This module adds 2D position embeddings to all intermediate feature maps of the convolutional encoder.
|
|
@@ -291,7 +265,7 @@ class TableTransformerConvModel(nn.Module):
|
|
|
291
265
|
return out, pos
|
|
292
266
|
|
|
293
267
|
|
|
294
|
-
# Copied from transformers.models.detr.modeling_detr.DetrSinePositionEmbedding with Detr->TableTransformer
|
|
268
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrSinePositionEmbedding with Detr->TableTransformer
|
|
295
269
|
class TableTransformerSinePositionEmbedding(nn.Module):
|
|
296
270
|
"""
|
|
297
271
|
This is a more standard version of the position embedding, very similar to the one used by the Attention is all you
|
|
@@ -329,7 +303,7 @@ class TableTransformerSinePositionEmbedding(nn.Module):
|
|
|
329
303
|
return pos
|
|
330
304
|
|
|
331
305
|
|
|
332
|
-
# Copied from transformers.models.detr.modeling_detr.DetrLearnedPositionEmbedding with Detr->TableTransformer
|
|
306
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrLearnedPositionEmbedding with Detr->TableTransformer
|
|
333
307
|
class TableTransformerLearnedPositionEmbedding(nn.Module):
|
|
334
308
|
"""
|
|
335
309
|
This module learns positional embeddings up to a fixed maximum size.
|
|
@@ -353,7 +327,7 @@ class TableTransformerLearnedPositionEmbedding(nn.Module):
|
|
|
353
327
|
return pos
|
|
354
328
|
|
|
355
329
|
|
|
356
|
-
# Copied from transformers.models.detr.modeling_detr.build_position_encoding with Detr->TableTransformer
|
|
330
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.build_position_encoding with Detr->TableTransformer
|
|
357
331
|
def build_position_encoding(config):
|
|
358
332
|
n_steps = config.d_model // 2
|
|
359
333
|
if config.position_embedding_type == "sine":
|
|
@@ -367,7 +341,7 @@ def build_position_encoding(config):
|
|
|
367
341
|
return position_embedding
|
|
368
342
|
|
|
369
343
|
|
|
370
|
-
# Copied from transformers.models.detr.modeling_detr.DetrAttention with DETR->TABLE_TRANSFORMER,Detr->TableTransformer
|
|
344
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrAttention with DETR->TABLE_TRANSFORMER,Detr->TableTransformer
|
|
371
345
|
class TableTransformerAttention(nn.Module):
|
|
372
346
|
"""
|
|
373
347
|
Multi-headed attention from 'Attention Is All You Need' paper.
|
|
@@ -502,7 +476,7 @@ class TableTransformerAttention(nn.Module):
|
|
|
502
476
|
|
|
503
477
|
|
|
504
478
|
class TableTransformerEncoderLayer(nn.Module):
|
|
505
|
-
# Copied from transformers.models.detr.modeling_detr.DetrEncoderLayer.__init__ with Detr->TableTransformer
|
|
479
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrEncoderLayer.__init__ with Detr->TableTransformer
|
|
506
480
|
def __init__(self, config: TableTransformerConfig):
|
|
507
481
|
super().__init__()
|
|
508
482
|
self.embed_dim = config.d_model
|
|
@@ -575,7 +549,7 @@ class TableTransformerEncoderLayer(nn.Module):
|
|
|
575
549
|
|
|
576
550
|
|
|
577
551
|
class TableTransformerDecoderLayer(GradientCheckpointingLayer):
|
|
578
|
-
# Copied from transformers.models.detr.modeling_detr.DetrDecoderLayer.__init__ with Detr->TableTransformer
|
|
552
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrDecoderLayer.__init__ with Detr->TableTransformer
|
|
579
553
|
def __init__(self, config: TableTransformerConfig):
|
|
580
554
|
super().__init__()
|
|
581
555
|
self.embed_dim = config.d_model
|
|
@@ -828,7 +802,7 @@ class TableTransformerEncoder(TableTransformerPreTrainedModel):
|
|
|
828
802
|
)
|
|
829
803
|
|
|
830
804
|
|
|
831
|
-
# Copied from transformers.models.detr.modeling_detr.DetrDecoder with DETR->TABLE_TRANSFORMER,Detr->TableTransformer
|
|
805
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrDecoder with DETR->TABLE_TRANSFORMER,Detr->TableTransformer
|
|
832
806
|
class TableTransformerDecoder(TableTransformerPreTrainedModel):
|
|
833
807
|
"""
|
|
834
808
|
Transformer decoder consisting of *config.decoder_layers* layers. Each layer is a [`TableTransformerDecoderLayer`].
|
|
@@ -1003,7 +977,7 @@ class TableTransformerDecoder(TableTransformerPreTrainedModel):
|
|
|
1003
977
|
"""
|
|
1004
978
|
)
|
|
1005
979
|
class TableTransformerModel(TableTransformerPreTrainedModel):
|
|
1006
|
-
# Copied from transformers.models.detr.modeling_detr.DetrModel.__init__ with Detr->TableTransformer
|
|
980
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrModel.__init__ with Detr->TableTransformer
|
|
1007
981
|
def __init__(self, config: TableTransformerConfig):
|
|
1008
982
|
super().__init__(config)
|
|
1009
983
|
|
|
@@ -1172,7 +1146,7 @@ class TableTransformerModel(TableTransformerPreTrainedModel):
|
|
|
1172
1146
|
"""
|
|
1173
1147
|
)
|
|
1174
1148
|
class TableTransformerForObjectDetection(TableTransformerPreTrainedModel):
|
|
1175
|
-
# Copied from transformers.models.detr.modeling_detr.DetrForObjectDetection.__init__ with Detr->TableTransformer
|
|
1149
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrForObjectDetection.__init__ with Detr->TableTransformer
|
|
1176
1150
|
def __init__(self, config: TableTransformerConfig):
|
|
1177
1151
|
super().__init__(config)
|
|
1178
1152
|
|
|
@@ -1306,7 +1280,7 @@ class TableTransformerForObjectDetection(TableTransformerPreTrainedModel):
|
|
|
1306
1280
|
)
|
|
1307
1281
|
|
|
1308
1282
|
|
|
1309
|
-
# Copied from transformers.models.detr.modeling_detr.DetrMLPPredictionHead with Detr->TableTransformer,detr->table_transformer
|
|
1283
|
+
# TODO: use modular - Copied from transformers.models.detr.modeling_detr.DetrMLPPredictionHead with Detr->TableTransformer,detr->table_transformer
|
|
1310
1284
|
class TableTransformerMLPPredictionHead(nn.Module):
|
|
1311
1285
|
"""
|
|
1312
1286
|
Very simple multi-layer perceptron (MLP, also called FFN), used to predict the normalized center coordinates,
|
|
@@ -151,6 +151,8 @@ class TapasConfig(PreTrainedConfig):
|
|
|
151
151
|
initializer_range=0.02,
|
|
152
152
|
layer_norm_eps=1e-12,
|
|
153
153
|
pad_token_id=0,
|
|
154
|
+
bos_token_id=None,
|
|
155
|
+
eos_token_id=None,
|
|
154
156
|
positive_label_weight=10.0,
|
|
155
157
|
num_aggregation_labels=0,
|
|
156
158
|
aggregation_loss_weight=1.0,
|
|
@@ -175,11 +177,20 @@ class TapasConfig(PreTrainedConfig):
|
|
|
175
177
|
disable_per_token_loss=False,
|
|
176
178
|
aggregation_labels=None,
|
|
177
179
|
no_aggregation_label_index=None,
|
|
180
|
+
is_decoder=False,
|
|
181
|
+
add_cross_attention=False,
|
|
182
|
+
tie_word_embeddings=True,
|
|
178
183
|
**kwargs,
|
|
179
184
|
):
|
|
180
|
-
super().__init__(
|
|
185
|
+
super().__init__(**kwargs)
|
|
181
186
|
|
|
182
187
|
# BERT hyperparameters (with updated max_position_embeddings and type_vocab_sizes)
|
|
188
|
+
self.is_decoder = is_decoder
|
|
189
|
+
self.add_cross_attention = add_cross_attention
|
|
190
|
+
self.tie_word_embeddings = tie_word_embeddings
|
|
191
|
+
self.pad_token_id = pad_token_id
|
|
192
|
+
self.bos_token_id = bos_token_id
|
|
193
|
+
self.eos_token_id = eos_token_id
|
|
183
194
|
self.vocab_size = vocab_size
|
|
184
195
|
self.hidden_size = hidden_size
|
|
185
196
|
self.num_hidden_layers = num_hidden_layers
|
|
@@ -1491,7 +1491,7 @@ def _segment_reduce(values, index, segment_reduce_fn, name):
|
|
|
1491
1491
|
new_shape = torch.cat(
|
|
1492
1492
|
[
|
|
1493
1493
|
torch.as_tensor(index.batch_shape(), dtype=torch.long, device=device),
|
|
1494
|
-
torch.as_tensor(
|
|
1494
|
+
torch.as_tensor(index.num_segments, dtype=torch.long, device=device).unsqueeze(dim=0),
|
|
1495
1495
|
torch.as_tensor(vector_shape, dtype=torch.long, device=device),
|
|
1496
1496
|
],
|
|
1497
1497
|
dim=0,
|
|
@@ -13,9 +13,9 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
"""TextNet model configuration"""
|
|
15
15
|
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
16
|
+
from ...backbone_utils import BackboneConfigMixin
|
|
17
|
+
from ...configuration_utils import PreTrainedConfig
|
|
18
|
+
from ...utils import logging
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
logger = logging.get_logger(__name__)
|
|
@@ -126,9 +126,7 @@ class TextNetConfig(BackboneConfigMixin, PreTrainedConfig):
|
|
|
126
126
|
|
|
127
127
|
self.depths = [len(layer) for layer in self.conv_layer_kernel_sizes]
|
|
128
128
|
self.stage_names = ["stem"] + [f"stage{idx}" for idx in range(1, 5)]
|
|
129
|
-
self.
|
|
130
|
-
out_features=out_features, out_indices=out_indices, stage_names=self.stage_names
|
|
131
|
-
)
|
|
129
|
+
self.set_output_features_output_indices(out_indices=out_indices, out_features=out_features)
|
|
132
130
|
|
|
133
131
|
|
|
134
132
|
__all__ = ["TextNetConfig"]
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
from typing import Optional
|
|
17
17
|
|
|
18
18
|
import torch
|
|
19
|
-
|
|
19
|
+
import torchvision.transforms.v2.functional as tvF
|
|
20
20
|
|
|
21
21
|
from ...image_processing_utils import BatchFeature
|
|
22
22
|
from ...image_processing_utils_fast import BaseImageProcessorFast
|
|
@@ -68,7 +68,7 @@ class TextNetImageProcessorFast(BaseImageProcessorFast):
|
|
|
68
68
|
self,
|
|
69
69
|
image: "torch.Tensor",
|
|
70
70
|
size: SizeDict,
|
|
71
|
-
interpolation: Optional["
|
|
71
|
+
interpolation: Optional["tvF.InterpolationMode"] = None,
|
|
72
72
|
antialias: bool = True,
|
|
73
73
|
size_divisor: int = 32,
|
|
74
74
|
**kwargs,
|
|
@@ -99,7 +99,7 @@ class TextNetImageProcessorFast(BaseImageProcessorFast):
|
|
|
99
99
|
do_resize: bool,
|
|
100
100
|
size: SizeDict,
|
|
101
101
|
size_divisor: int,
|
|
102
|
-
interpolation: Optional["
|
|
102
|
+
interpolation: Optional["tvF.InterpolationMode"],
|
|
103
103
|
do_center_crop: bool,
|
|
104
104
|
crop_size: SizeDict,
|
|
105
105
|
do_rescale: bool,
|
|
@@ -19,19 +19,17 @@ import torch
|
|
|
19
19
|
import torch.nn as nn
|
|
20
20
|
from torch import Tensor
|
|
21
21
|
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
22
|
+
from ...activations import ACT2CLS
|
|
23
|
+
from ...backbone_utils import BackboneMixin
|
|
24
|
+
from ...modeling_outputs import (
|
|
25
25
|
BackboneOutput,
|
|
26
26
|
BaseModelOutputWithNoAttention,
|
|
27
27
|
BaseModelOutputWithPoolingAndNoAttention,
|
|
28
28
|
ImageClassifierOutputWithNoAttention,
|
|
29
29
|
)
|
|
30
|
-
from
|
|
31
|
-
from
|
|
32
|
-
from
|
|
33
|
-
|
|
34
|
-
from ...utils import auto_docstring
|
|
30
|
+
from ...modeling_utils import PreTrainedModel
|
|
31
|
+
from ...utils import auto_docstring, logging
|
|
32
|
+
from .configuration_textnet import TextNetConfig
|
|
35
33
|
|
|
36
34
|
|
|
37
35
|
logger = logging.get_logger(__name__)
|
|
@@ -302,12 +300,14 @@ class TextNetForImageClassification(TextNetPreTrainedModel):
|
|
|
302
300
|
Examples:
|
|
303
301
|
```python
|
|
304
302
|
>>> import torch
|
|
305
|
-
>>> import
|
|
303
|
+
>>> import httpx
|
|
304
|
+
>>> from io import BytesIO
|
|
306
305
|
>>> from transformers import TextNetForImageClassification, TextNetImageProcessor
|
|
307
306
|
>>> from PIL import Image
|
|
308
307
|
|
|
309
308
|
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
|
310
|
-
>>>
|
|
309
|
+
>>> with httpx.stream("GET", url) as response:
|
|
310
|
+
... image = Image.open(BytesIO(response.read()))
|
|
311
311
|
|
|
312
312
|
>>> processor = TextNetImageProcessor.from_pretrained("czczup/textnet-base")
|
|
313
313
|
>>> model = TextNetForImageClassification.from_pretrained("czczup/textnet-base")
|
|
@@ -342,12 +342,11 @@ class TextNetForImageClassification(TextNetPreTrainedModel):
|
|
|
342
342
|
TextNet backbone, to be used with frameworks like DETR and MaskFormer.
|
|
343
343
|
"""
|
|
344
344
|
)
|
|
345
|
-
class TextNetBackbone(
|
|
345
|
+
class TextNetBackbone(BackboneMixin, TextNetPreTrainedModel):
|
|
346
346
|
has_attentions = False
|
|
347
347
|
|
|
348
348
|
def __init__(self, config):
|
|
349
349
|
super().__init__(config)
|
|
350
|
-
super()._init_backbone(config)
|
|
351
350
|
|
|
352
351
|
self.textnet = TextNetModel(config)
|
|
353
352
|
self.num_features = config.hidden_sizes
|
|
@@ -368,12 +367,14 @@ class TextNetBackbone(TextNetPreTrainedModel, BackboneMixin):
|
|
|
368
367
|
|
|
369
368
|
```python
|
|
370
369
|
>>> import torch
|
|
371
|
-
>>> import
|
|
370
|
+
>>> import httpx
|
|
371
|
+
>>> from io import BytesIO
|
|
372
372
|
>>> from PIL import Image
|
|
373
373
|
>>> from transformers import AutoImageProcessor, AutoBackbone
|
|
374
374
|
|
|
375
375
|
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
|
376
|
-
>>>
|
|
376
|
+
>>> with httpx.stream("GET", url) as response:
|
|
377
|
+
... image = Image.open(BytesIO(response.read()))
|
|
377
378
|
|
|
378
379
|
>>> processor = AutoImageProcessor.from_pretrained("czczup/textnet-base")
|
|
379
380
|
>>> model = AutoBackbone.from_pretrained("czczup/textnet-base")
|
|
@@ -402,9 +402,9 @@ class TimeSeriesTransformerAttention(nn.Module):
|
|
|
402
402
|
if is_cross_attention and isinstance(past_key_values, EncoderDecoderCache):
|
|
403
403
|
past_key_values.is_updated[self.layer_idx] = True
|
|
404
404
|
|
|
405
|
-
attention_interface: Callable =
|
|
406
|
-
|
|
407
|
-
|
|
405
|
+
attention_interface: Callable = ALL_ATTENTION_FUNCTIONS.get_interface(
|
|
406
|
+
self.config._attn_implementation, eager_attention_forward
|
|
407
|
+
)
|
|
408
408
|
|
|
409
409
|
attn_output, attn_weights = attention_interface(
|
|
410
410
|
self,
|
|
@@ -245,9 +245,9 @@ class TimesFmAttention(nn.Module):
|
|
|
245
245
|
key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
|
|
246
246
|
value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
|
|
247
247
|
|
|
248
|
-
attention_interface: Callable =
|
|
249
|
-
|
|
250
|
-
|
|
248
|
+
attention_interface: Callable = ALL_ATTENTION_FUNCTIONS.get_interface(
|
|
249
|
+
self.config._attn_implementation, simple_eager_attention_forward
|
|
250
|
+
)
|
|
251
251
|
|
|
252
252
|
attn_output, attn_weights = attention_interface(
|
|
253
253
|
self,
|
|
@@ -620,7 +620,7 @@ class TimesFmModelForPrediction(TimesFmPreTrainedModel):
|
|
|
620
620
|
- the number of padded examples for SPMD so that each core has the same
|
|
621
621
|
number (a multiple of `batch_size`) of examples.
|
|
622
622
|
"""
|
|
623
|
-
input_ts, input_padding
|
|
623
|
+
input_ts, input_padding = [], []
|
|
624
624
|
|
|
625
625
|
for i, ts in enumerate(inputs):
|
|
626
626
|
input_len = ts.shape[0]
|
|
@@ -635,12 +635,11 @@ class TimesFmModelForPrediction(TimesFmPreTrainedModel):
|
|
|
635
635
|
|
|
636
636
|
input_ts.append(ts)
|
|
637
637
|
input_padding.append(padding)
|
|
638
|
-
inp_freq.append(freq[i])
|
|
639
638
|
|
|
640
639
|
return (
|
|
641
640
|
torch.stack(input_ts, dim=0),
|
|
642
641
|
torch.stack(input_padding, dim=0),
|
|
643
|
-
torch.tensor(
|
|
642
|
+
torch.tensor(freq[: len(inputs)], dtype=torch.int32).reshape(-1, 1),
|
|
644
643
|
)
|
|
645
644
|
|
|
646
645
|
def _postprocess_output(
|
|
@@ -201,9 +201,9 @@ class TimesFmAttention(nn.Module):
|
|
|
201
201
|
key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
|
|
202
202
|
value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
|
|
203
203
|
|
|
204
|
-
attention_interface: Callable =
|
|
205
|
-
|
|
206
|
-
|
|
204
|
+
attention_interface: Callable = ALL_ATTENTION_FUNCTIONS.get_interface(
|
|
205
|
+
self.config._attn_implementation, simple_eager_attention_forward
|
|
206
|
+
)
|
|
207
207
|
|
|
208
208
|
attn_output, attn_weights = attention_interface(
|
|
209
209
|
self,
|
|
@@ -576,7 +576,7 @@ class TimesFmModelForPrediction(TimesFmPreTrainedModel):
|
|
|
576
576
|
- the number of padded examples for SPMD so that each core has the same
|
|
577
577
|
number (a multiple of `batch_size`) of examples.
|
|
578
578
|
"""
|
|
579
|
-
input_ts, input_padding
|
|
579
|
+
input_ts, input_padding = [], []
|
|
580
580
|
|
|
581
581
|
for i, ts in enumerate(inputs):
|
|
582
582
|
input_len = ts.shape[0]
|
|
@@ -591,12 +591,11 @@ class TimesFmModelForPrediction(TimesFmPreTrainedModel):
|
|
|
591
591
|
|
|
592
592
|
input_ts.append(ts)
|
|
593
593
|
input_padding.append(padding)
|
|
594
|
-
inp_freq.append(freq[i])
|
|
595
594
|
|
|
596
595
|
return (
|
|
597
596
|
torch.stack(input_ts, dim=0),
|
|
598
597
|
torch.stack(input_padding, dim=0),
|
|
599
|
-
torch.tensor(
|
|
598
|
+
torch.tensor(freq[: len(inputs)], dtype=torch.int32).reshape(-1, 1),
|
|
600
599
|
)
|
|
601
600
|
|
|
602
601
|
def _postprocess_output(
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
"""Configuration for Backbone models"""
|
|
16
16
|
|
|
17
|
+
from ...backbone_utils import BackboneConfigMixin
|
|
17
18
|
from ...configuration_utils import PreTrainedConfig
|
|
18
19
|
from ...utils import logging
|
|
19
20
|
|
|
@@ -21,7 +22,7 @@ from ...utils import logging
|
|
|
21
22
|
logger = logging.get_logger(__name__)
|
|
22
23
|
|
|
23
24
|
|
|
24
|
-
class TimmBackboneConfig(PreTrainedConfig):
|
|
25
|
+
class TimmBackboneConfig(BackboneConfigMixin, PreTrainedConfig):
|
|
25
26
|
r"""
|
|
26
27
|
This is the configuration class to store the configuration for a timm backbone [`TimmBackbone`].
|
|
27
28
|
|
|
@@ -37,8 +38,6 @@ class TimmBackboneConfig(PreTrainedConfig):
|
|
|
37
38
|
The number of input channels.
|
|
38
39
|
features_only (`bool`, *optional*, defaults to `True`):
|
|
39
40
|
Whether to output only the features or also the logits.
|
|
40
|
-
use_pretrained_backbone (`bool`, *optional*, defaults to `True`):
|
|
41
|
-
Whether to use a pretrained backbone.
|
|
42
41
|
out_indices (`list[int]`, *optional*):
|
|
43
42
|
If used as backbone, list of indices of features to output. Can be any of 0, 1, 2, etc. (depending on how
|
|
44
43
|
many stages the model has). Will default to the last stage if unset.
|
|
@@ -67,19 +66,46 @@ class TimmBackboneConfig(PreTrainedConfig):
|
|
|
67
66
|
backbone=None,
|
|
68
67
|
num_channels=3,
|
|
69
68
|
features_only=True,
|
|
70
|
-
use_pretrained_backbone=True,
|
|
71
69
|
out_indices=None,
|
|
72
70
|
freeze_batch_norm_2d=False,
|
|
71
|
+
output_stride=None,
|
|
73
72
|
**kwargs,
|
|
74
73
|
):
|
|
75
|
-
super().__init__(**kwargs)
|
|
76
74
|
self.backbone = backbone
|
|
77
75
|
self.num_channels = num_channels
|
|
78
76
|
self.features_only = features_only
|
|
79
|
-
self.use_pretrained_backbone = use_pretrained_backbone
|
|
80
|
-
self.use_timm_backbone = True
|
|
81
77
|
self.out_indices = out_indices if out_indices is not None else [-1]
|
|
78
|
+
self.output_stride = output_stride
|
|
82
79
|
self.freeze_batch_norm_2d = freeze_batch_norm_2d
|
|
83
80
|
|
|
81
|
+
# self._out_features = kwargs.pop("out_features", None)
|
|
82
|
+
super().__init__(**kwargs)
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def out_indices(self):
|
|
86
|
+
return self._out_indices
|
|
87
|
+
|
|
88
|
+
@out_indices.setter
|
|
89
|
+
def out_indices(self, out_indices: tuple[int, ...] | list[int]):
|
|
90
|
+
"""
|
|
91
|
+
Set the out_indices attribute. This will also update the out_features attribute to match the new out_indices.
|
|
92
|
+
"""
|
|
93
|
+
self._out_indices = list(out_indices) if out_indices is not None else out_indices
|
|
94
|
+
if getattr(self, "stage_names", None) is not None:
|
|
95
|
+
self.set_output_features_output_indices(out_features=None, out_indices=out_indices)
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def out_features(self):
|
|
99
|
+
return self._out_features
|
|
100
|
+
|
|
101
|
+
@out_features.setter
|
|
102
|
+
def out_features(self, out_features: list[str]):
|
|
103
|
+
"""
|
|
104
|
+
Set the out_features attribute. This will also update the out_indices attribute to match the new out_features.
|
|
105
|
+
"""
|
|
106
|
+
self._out_features = out_features
|
|
107
|
+
if getattr(self, "stage_names", None) is not None:
|
|
108
|
+
self.set_output_features_output_indices(out_features=out_features, out_indices=None)
|
|
109
|
+
|
|
84
110
|
|
|
85
111
|
__all__ = ["TimmBackboneConfig"]
|
|
@@ -17,10 +17,10 @@ import torch
|
|
|
17
17
|
from torch import Tensor, nn
|
|
18
18
|
|
|
19
19
|
from ... import initialization as init
|
|
20
|
+
from ...backbone_utils import BackboneMixin
|
|
20
21
|
from ...modeling_outputs import BackboneOutput
|
|
21
22
|
from ...modeling_utils import PreTrainedModel
|
|
22
23
|
from ...utils import is_timm_available, requires_backends
|
|
23
|
-
from ...utils.backbone_utils import BackboneMixin
|
|
24
24
|
from .configuration_timm_backbone import TimmBackboneConfig
|
|
25
25
|
|
|
26
26
|
|
|
@@ -28,7 +28,7 @@ if is_timm_available():
|
|
|
28
28
|
import timm
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
class TimmBackbone(
|
|
31
|
+
class TimmBackbone(BackboneMixin, PreTrainedModel):
|
|
32
32
|
"""
|
|
33
33
|
Wrapper class for timm models to be used as backbones. This enables using the timm models interchangeably with the
|
|
34
34
|
other models in the library keeping the same API.
|
|
@@ -41,8 +41,6 @@ class TimmBackbone(PreTrainedModel, BackboneMixin):
|
|
|
41
41
|
|
|
42
42
|
def __init__(self, config, **kwargs):
|
|
43
43
|
requires_backends(self, "timm")
|
|
44
|
-
super().__init__(config)
|
|
45
|
-
self.config = config
|
|
46
44
|
|
|
47
45
|
if config.backbone is None:
|
|
48
46
|
raise ValueError("backbone is not set in the config. Please set it to a timm model name.")
|
|
@@ -50,25 +48,29 @@ class TimmBackbone(PreTrainedModel, BackboneMixin):
|
|
|
50
48
|
if hasattr(config, "out_features") and config.out_features is not None:
|
|
51
49
|
raise ValueError("out_features is not supported by TimmBackbone. Please use out_indices instead.")
|
|
52
50
|
|
|
53
|
-
pretrained = getattr(config, "use_pretrained_backbone", None)
|
|
54
|
-
if pretrained is None:
|
|
55
|
-
raise ValueError("use_pretrained_backbone is not set in the config. Please set it to True or False.")
|
|
56
|
-
|
|
57
51
|
# We just take the final layer by default. This matches the default for the transformers models.
|
|
58
52
|
out_indices = config.out_indices if getattr(config, "out_indices", None) is not None else (-1,)
|
|
59
|
-
|
|
53
|
+
pretrained = kwargs.pop("pretrained", False)
|
|
60
54
|
in_chans = kwargs.pop("in_chans", config.num_channels)
|
|
61
|
-
|
|
55
|
+
|
|
56
|
+
backbone = timm.create_model(
|
|
62
57
|
config.backbone,
|
|
63
58
|
pretrained=pretrained,
|
|
64
59
|
# This is currently not possible for transformer architectures.
|
|
65
60
|
features_only=config.features_only,
|
|
66
61
|
in_chans=in_chans,
|
|
67
62
|
out_indices=out_indices,
|
|
63
|
+
output_stride=config.output_stride,
|
|
68
64
|
**kwargs,
|
|
69
65
|
)
|
|
70
66
|
|
|
71
|
-
#
|
|
67
|
+
# Needs to be called after creating timm model, because `super()` will try to infer
|
|
68
|
+
# `stage_names` from model architecture
|
|
69
|
+
super().__init__(config, timm_backbone=backbone)
|
|
70
|
+
self._backbone = backbone
|
|
71
|
+
|
|
72
|
+
# Converts all `BatchNorm2d` and `SyncBatchNorm` or `BatchNormAct2d` and `SyncBatchNormAct2d` layers of
|
|
73
|
+
# provided module into `FrozenBatchNorm2d` or `FrozenBatchNormAct2d` respectively
|
|
72
74
|
if getattr(config, "freeze_batch_norm_2d", False):
|
|
73
75
|
self.freeze_batch_norm_2d()
|
|
74
76
|
|
|
@@ -78,7 +80,6 @@ class TimmBackbone(PreTrainedModel, BackboneMixin):
|
|
|
78
80
|
layer["module"]: str(layer["index"]) for layer in self._backbone.feature_info.get_dicts()
|
|
79
81
|
}
|
|
80
82
|
self._all_layers = {layer["module"]: str(i) for i, layer in enumerate(self._backbone.feature_info.info)}
|
|
81
|
-
super()._init_backbone(config)
|
|
82
83
|
|
|
83
84
|
self.post_init()
|
|
84
85
|
|
|
@@ -87,23 +88,16 @@ class TimmBackbone(PreTrainedModel, BackboneMixin):
|
|
|
87
88
|
requires_backends(cls, ["vision", "timm"])
|
|
88
89
|
|
|
89
90
|
config = kwargs.pop("config", TimmBackboneConfig())
|
|
90
|
-
|
|
91
|
-
use_timm = kwargs.pop("use_timm_backbone", True)
|
|
92
|
-
if not use_timm:
|
|
93
|
-
raise ValueError("use_timm_backbone must be True for timm backbones")
|
|
94
|
-
|
|
95
91
|
num_channels = kwargs.pop("num_channels", config.num_channels)
|
|
96
92
|
features_only = kwargs.pop("features_only", config.features_only)
|
|
97
|
-
use_pretrained_backbone = kwargs.pop("use_pretrained_backbone", config.use_pretrained_backbone)
|
|
98
93
|
out_indices = kwargs.pop("out_indices", config.out_indices)
|
|
99
94
|
config = TimmBackboneConfig(
|
|
100
95
|
backbone=pretrained_model_name_or_path,
|
|
101
96
|
num_channels=num_channels,
|
|
102
97
|
features_only=features_only,
|
|
103
|
-
use_pretrained_backbone=use_pretrained_backbone,
|
|
104
98
|
out_indices=out_indices,
|
|
105
99
|
)
|
|
106
|
-
return super()._from_config(config, **kwargs)
|
|
100
|
+
return super()._from_config(config, pretrained=True, **kwargs)
|
|
107
101
|
|
|
108
102
|
def freeze_batch_norm_2d(self):
|
|
109
103
|
timm.utils.model.freeze_batch_norm_2d(self._backbone)
|
|
@@ -117,10 +111,13 @@ class TimmBackbone(PreTrainedModel, BackboneMixin):
|
|
|
117
111
|
assume weights and persistent buffers will be part of checkpoint as we have no way to control timm inits)"""
|
|
118
112
|
if hasattr(module, "init_non_persistent_buffers"):
|
|
119
113
|
module.init_non_persistent_buffers()
|
|
120
|
-
elif isinstance(module, nn.BatchNorm2d)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
114
|
+
elif isinstance(module, nn.BatchNorm2d):
|
|
115
|
+
# For non-pretrained models, always initialize buffers (handles both meta device and to_empty() cases)
|
|
116
|
+
running_mean = getattr(module, "running_mean", None)
|
|
117
|
+
if running_mean is not None:
|
|
118
|
+
init.zeros_(module.running_mean)
|
|
119
|
+
init.ones_(module.running_var)
|
|
120
|
+
init.zeros_(module.num_batches_tracked)
|
|
124
121
|
|
|
125
122
|
def forward(
|
|
126
123
|
self,
|