transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +49 -3
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/cli/serve.py +47 -17
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +83 -7
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +374 -147
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +2 -3
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +55 -24
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +165 -124
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +228 -136
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +3 -14
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +16 -2
- transformers/integrations/accelerate.py +58 -113
- transformers/integrations/aqlm.py +36 -66
- transformers/integrations/awq.py +46 -515
- transformers/integrations/bitnet.py +47 -105
- transformers/integrations/bitsandbytes.py +91 -202
- transformers/integrations/deepspeed.py +18 -2
- transformers/integrations/eetq.py +84 -81
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +241 -208
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +37 -62
- transformers/integrations/hub_kernels.py +65 -8
- transformers/integrations/integration_utils.py +45 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +28 -74
- transformers/integrations/peft.py +12 -29
- transformers/integrations/quanto.py +77 -56
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +42 -90
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +40 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +74 -19
- transformers/modeling_rope_utils.py +107 -86
- transformers/modeling_utils.py +611 -527
- transformers/models/__init__.py +22 -0
- transformers/models/afmoe/modeling_afmoe.py +10 -19
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +14 -6
- transformers/models/altclip/modeling_altclip.py +11 -3
- transformers/models/apertus/modeling_apertus.py +8 -6
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +5 -5
- transformers/models/aria/modeling_aria.py +12 -8
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +38 -0
- transformers/models/auto/feature_extraction_auto.py +9 -3
- transformers/models/auto/image_processing_auto.py +5 -2
- transformers/models/auto/modeling_auto.py +37 -0
- transformers/models/auto/processing_auto.py +22 -10
- transformers/models/auto/tokenization_auto.py +147 -566
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +21 -21
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +11 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +14 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +9 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +16 -3
- transformers/models/bitnet/modeling_bitnet.py +5 -5
- transformers/models/blenderbot/modeling_blenderbot.py +12 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +10 -0
- transformers/models/blip_2/modeling_blip_2.py +4 -1
- transformers/models/bloom/modeling_bloom.py +17 -44
- transformers/models/blt/modeling_blt.py +164 -4
- transformers/models/blt/modular_blt.py +170 -5
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +11 -1
- transformers/models/bros/modeling_bros.py +12 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +11 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +11 -5
- transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +30 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +9 -0
- transformers/models/clvp/modeling_clvp.py +19 -3
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +5 -4
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +8 -7
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
- transformers/models/convbert/modeling_convbert.py +9 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +7 -4
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +15 -2
- transformers/models/cvt/modeling_cvt.py +7 -1
- transformers/models/cwm/modeling_cwm.py +5 -5
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +48 -39
- transformers/models/d_fine/modular_d_fine.py +16 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +5 -1
- transformers/models/dac/modeling_dac.py +6 -6
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +3 -3
- transformers/models/deberta/modeling_deberta.py +7 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
- transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
- transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +13 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +16 -4
- transformers/models/dia/modular_dia.py +11 -1
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +5 -5
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +3 -4
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +18 -12
- transformers/models/dots1/modeling_dots1.py +23 -11
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +6 -3
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +7 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +12 -6
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +60 -16
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +11 -5
- transformers/models/evolla/modeling_evolla.py +13 -5
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +3 -3
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +9 -4
- transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
- transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
- transformers/models/fast_vlm/__init__.py +27 -0
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +21 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +10 -2
- transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
- transformers/models/florence2/modeling_florence2.py +22 -4
- transformers/models/florence2/modular_florence2.py +15 -1
- transformers/models/fnet/modeling_fnet.py +14 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +19 -3
- transformers/models/gemma/modeling_gemma.py +14 -16
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +5 -5
- transformers/models/gemma2/modular_gemma2.py +3 -2
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +42 -91
- transformers/models/gemma3/modular_gemma3.py +38 -87
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +65 -218
- transformers/models/gemma3n/modular_gemma3n.py +68 -68
- transformers/models/git/modeling_git.py +183 -126
- transformers/models/glm/modeling_glm.py +5 -5
- transformers/models/glm4/modeling_glm4.py +5 -5
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +18 -8
- transformers/models/glm4v/modular_glm4v.py +17 -7
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
- transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +13 -6
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
- transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
- transformers/models/gptj/modeling_gptj.py +18 -6
- transformers/models/granite/modeling_granite.py +5 -5
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +6 -9
- transformers/models/granitemoe/modular_granitemoe.py +1 -4
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
- transformers/models/groupvit/modeling_groupvit.py +9 -1
- transformers/models/helium/modeling_helium.py +5 -4
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +7 -0
- transformers/models/hubert/modular_hubert.py +5 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +22 -0
- transformers/models/idefics/modeling_idefics.py +15 -21
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +11 -3
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +13 -12
- transformers/models/internvl/modular_internvl.py +7 -13
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +25 -20
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +16 -7
- transformers/models/janus/modular_janus.py +17 -7
- transformers/models/jetmoe/modeling_jetmoe.py +4 -4
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +15 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +248 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +730 -0
- transformers/models/lasr/modular_lasr.py +576 -0
- transformers/models/lasr/processing_lasr.py +94 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +10 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +12 -0
- transformers/models/levit/modeling_levit.py +21 -0
- transformers/models/lfm2/modeling_lfm2.py +5 -6
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +23 -15
- transformers/models/llama/modeling_llama.py +5 -5
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +11 -6
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
- transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -4
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +14 -0
- transformers/models/mamba/modeling_mamba.py +16 -23
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +8 -0
- transformers/models/markuplm/modeling_markuplm.py +9 -8
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +11 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +11 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +14 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +28 -5
- transformers/models/minimax/modeling_minimax.py +19 -6
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +5 -5
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +5 -4
- transformers/models/mistral/modeling_mistral.py +5 -4
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +15 -7
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +15 -4
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +7 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
- transformers/models/modernbert/modeling_modernbert.py +16 -2
- transformers/models/modernbert/modular_modernbert.py +14 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
- transformers/models/moonshine/modeling_moonshine.py +5 -3
- transformers/models/moshi/modeling_moshi.py +26 -53
- transformers/models/mpnet/modeling_mpnet.py +7 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +10 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +7 -10
- transformers/models/musicgen/modeling_musicgen.py +7 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
- transformers/models/mvp/modeling_mvp.py +14 -0
- transformers/models/nanochat/modeling_nanochat.py +5 -5
- transformers/models/nemotron/modeling_nemotron.py +7 -5
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +15 -68
- transformers/models/nystromformer/modeling_nystromformer.py +13 -0
- transformers/models/olmo/modeling_olmo.py +5 -5
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +5 -6
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +5 -5
- transformers/models/olmoe/modeling_olmoe.py +15 -7
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +11 -39
- transformers/models/openai/modeling_openai.py +15 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +11 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +11 -3
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +14 -6
- transformers/models/parakeet/modular_parakeet.py +7 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
- transformers/models/patchtst/modeling_patchtst.py +25 -6
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +8 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +13 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +3 -2
- transformers/models/phi/modeling_phi.py +5 -6
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +3 -2
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +15 -7
- transformers/models/phimoe/modular_phimoe.py +3 -3
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +3 -2
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +13 -0
- transformers/models/plbart/modular_plbart.py +8 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +13 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +5 -1
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +5 -5
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
- transformers/models/qwen3/modeling_qwen3.py +5 -5
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
- transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
- transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +8 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
- transformers/models/reformer/modeling_reformer.py +13 -1
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +10 -1
- transformers/models/rembert/modeling_rembert.py +13 -1
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +19 -5
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +6 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +2 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +7 -3
- transformers/models/sam2/modular_sam2.py +7 -3
- transformers/models/sam2_video/modeling_sam2_video.py +52 -43
- transformers/models/sam2_video/modular_sam2_video.py +32 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +100 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +4 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
- transformers/models/seed_oss/modeling_seed_oss.py +3 -3
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +6 -3
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +67 -41
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +5 -5
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
- transformers/models/speecht5/modeling_speecht5.py +41 -1
- transformers/models/splinter/modeling_splinter.py +12 -3
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +8 -0
- transformers/models/stablelm/modeling_stablelm.py +4 -2
- transformers/models/starcoder2/modeling_starcoder2.py +5 -4
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +6 -0
- transformers/models/swin/modeling_swin.py +20 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +51 -33
- transformers/models/swinv2/modeling_swinv2.py +45 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +8 -7
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +6 -6
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +5 -1
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +14 -0
- transformers/models/timesfm/modular_timesfm.py +14 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
- transformers/models/trocr/modeling_trocr.py +3 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +6 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +7 -7
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +7 -6
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +13 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +8 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +5 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +11 -3
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +5 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +11 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +18 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +10 -1
- transformers/models/zamba/modeling_zamba.py +4 -1
- transformers/models/zamba2/modeling_zamba2.py +7 -4
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +8 -0
- transformers/pipelines/__init__.py +11 -9
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +2 -10
- transformers/pipelines/document_question_answering.py +4 -2
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +133 -50
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +44 -174
- transformers/quantizers/quantizer_aqlm.py +2 -23
- transformers/quantizers/quantizer_auto_round.py +2 -12
- transformers/quantizers/quantizer_awq.py +20 -89
- transformers/quantizers/quantizer_bitnet.py +4 -14
- transformers/quantizers/quantizer_bnb_4bit.py +18 -155
- transformers/quantizers/quantizer_bnb_8bit.py +24 -110
- transformers/quantizers/quantizer_compressed_tensors.py +2 -9
- transformers/quantizers/quantizer_eetq.py +16 -74
- transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
- transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
- transformers/quantizers/quantizer_fp_quant.py +52 -82
- transformers/quantizers/quantizer_gptq.py +8 -28
- transformers/quantizers/quantizer_higgs.py +42 -60
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +14 -194
- transformers/quantizers/quantizer_quanto.py +35 -79
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +4 -12
- transformers/quantizers/quantizer_torchao.py +50 -325
- transformers/quantizers/quantizer_vptq.py +4 -27
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +324 -47
- transformers/tokenization_mistral_common.py +7 -2
- transformers/tokenization_utils_base.py +116 -224
- transformers/tokenization_utils_tokenizers.py +190 -106
- transformers/trainer.py +51 -32
- transformers/trainer_callback.py +8 -0
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +74 -38
- transformers/utils/__init__.py +7 -4
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +35 -25
- transformers/utils/generic.py +47 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +112 -25
- transformers/utils/kernel_config.py +74 -19
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +78 -245
- transformers/video_processing_utils.py +17 -14
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -24,7 +24,8 @@ from ... import initialization as init
|
|
|
24
24
|
from ...cache_utils import Cache
|
|
25
25
|
from ...modeling_rope_utils import RopeParameters, dynamic_rope_update
|
|
26
26
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
27
|
-
from ...utils import logging
|
|
27
|
+
from ...utils import is_grouped_mm_available, logging
|
|
28
|
+
from ...utils.generic import maybe_autocast
|
|
28
29
|
from ..llama.configuration_llama import LlamaConfig
|
|
29
30
|
from ..llama.modeling_llama import (
|
|
30
31
|
LlamaDecoderLayer,
|
|
@@ -303,7 +304,7 @@ class DeepseekV2RotaryEmbedding(LlamaRotaryEmbedding):
|
|
|
303
304
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
304
305
|
|
|
305
306
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
306
|
-
with
|
|
307
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
307
308
|
freqs = (inv_freq_expanded.to(x.device) @ position_ids_expanded).transpose(1, 2)
|
|
308
309
|
freqs_cis = torch.polar(torch.ones_like(freqs), freqs) # Convert to complex representation
|
|
309
310
|
freqs_cis = freqs_cis * self.attention_scaling
|
|
@@ -368,7 +369,6 @@ class DeepseekV2Attention(nn.Module):
|
|
|
368
369
|
past_key_values: Optional[Cache] = None,
|
|
369
370
|
cache_position: Optional[torch.LongTensor] = None,
|
|
370
371
|
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None,
|
|
371
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
372
372
|
**kwargs,
|
|
373
373
|
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
|
374
374
|
batch_size, seq_length = hidden_states.shape[:-1]
|
|
@@ -437,7 +437,9 @@ class DeepseekV2DecoderLayer(LlamaDecoderLayer):
|
|
|
437
437
|
|
|
438
438
|
|
|
439
439
|
class DeepseekV2PreTrainedModel(LlamaPreTrainedModel):
|
|
440
|
-
_can_compile_fullgraph =
|
|
440
|
+
_can_compile_fullgraph = (
|
|
441
|
+
is_grouped_mm_available()
|
|
442
|
+
) # https://huggingface.co/docs/transformers/experts_interface#torchcompile
|
|
441
443
|
|
|
442
444
|
@torch.no_grad()
|
|
443
445
|
def _init_weights(self, module):
|
|
@@ -16,7 +16,7 @@ from ... import initialization as init
|
|
|
16
16
|
from ...activations import ACT2FN
|
|
17
17
|
from ...cache_utils import Cache, DynamicCache
|
|
18
18
|
from ...generation import GenerationMixin
|
|
19
|
-
from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub
|
|
19
|
+
from ...integrations import use_experts_implementation, use_kernel_forward_from_hub, use_kernel_func_from_hub
|
|
20
20
|
from ...masking_utils import create_causal_mask
|
|
21
21
|
from ...modeling_flash_attention_utils import FlashAttentionKwargs
|
|
22
22
|
from ...modeling_layers import (
|
|
@@ -28,8 +28,8 @@ from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
|
|
|
28
28
|
from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
|
29
29
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
30
30
|
from ...processing_utils import Unpack
|
|
31
|
-
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
|
|
32
|
-
from ...utils.generic import check_model_inputs
|
|
31
|
+
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_grouped_mm_available
|
|
32
|
+
from ...utils.generic import check_model_inputs, maybe_autocast
|
|
33
33
|
from .configuration_deepseek_v3 import DeepseekV3Config
|
|
34
34
|
|
|
35
35
|
|
|
@@ -71,7 +71,7 @@ class DeepseekV3RotaryEmbedding(nn.Module):
|
|
|
71
71
|
inv_freq, self.attention_scaling = rope_init_fn(self.config, device)
|
|
72
72
|
|
|
73
73
|
self.register_buffer("inv_freq", inv_freq, persistent=False)
|
|
74
|
-
self.original_inv_freq =
|
|
74
|
+
self.register_buffer("original_inv_freq", inv_freq.clone(), persistent=False)
|
|
75
75
|
|
|
76
76
|
@staticmethod
|
|
77
77
|
def compute_default_rope_parameters(
|
|
@@ -110,7 +110,7 @@ class DeepseekV3RotaryEmbedding(nn.Module):
|
|
|
110
110
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
111
111
|
|
|
112
112
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
113
|
-
with
|
|
113
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
114
114
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
115
115
|
emb = torch.cat((freqs, freqs), dim=-1)
|
|
116
116
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -150,6 +150,7 @@ class DeepseekV3TopkRouter(nn.Module):
|
|
|
150
150
|
return router_logits
|
|
151
151
|
|
|
152
152
|
|
|
153
|
+
@use_experts_implementation
|
|
153
154
|
class DeepseekV3NaiveMoe(nn.Module):
|
|
154
155
|
"""Collection of expert weights stored as 3D tensors."""
|
|
155
156
|
|
|
@@ -157,7 +158,7 @@ class DeepseekV3NaiveMoe(nn.Module):
|
|
|
157
158
|
super().__init__()
|
|
158
159
|
self.num_experts = config.num_local_experts
|
|
159
160
|
self.hidden_dim = config.hidden_size
|
|
160
|
-
self.intermediate_dim = config.
|
|
161
|
+
self.intermediate_dim = config.moe_intermediate_size
|
|
161
162
|
self.gate_up_proj = nn.Parameter(torch.empty(self.num_experts, 2 * self.intermediate_dim, self.hidden_dim))
|
|
162
163
|
self.down_proj = nn.Parameter(torch.empty(self.num_experts, self.hidden_dim, self.intermediate_dim))
|
|
163
164
|
self.act_fn = ACT2FN[config.hidden_act]
|
|
@@ -542,18 +543,22 @@ class DeepseekV3PreTrainedModel(PreTrainedModel):
|
|
|
542
543
|
_supports_flash_attn = True
|
|
543
544
|
_supports_sdpa = True
|
|
544
545
|
_supports_flex_attn = True
|
|
545
|
-
_can_compile_fullgraph =
|
|
546
|
+
_can_compile_fullgraph = (
|
|
547
|
+
is_grouped_mm_available()
|
|
548
|
+
) # https://huggingface.co/docs/transformers/experts_interface#torchcompile
|
|
546
549
|
_supports_attention_backend = True
|
|
547
550
|
_can_record_outputs = {
|
|
548
551
|
"hidden_states": DeepseekV3DecoderLayer,
|
|
549
552
|
"attentions": DeepseekV3Attention,
|
|
550
553
|
}
|
|
554
|
+
_keep_in_fp32_modules_strict = ["e_score_correction_bias"]
|
|
551
555
|
|
|
552
556
|
@torch.no_grad()
|
|
553
557
|
def _init_weights(self, module):
|
|
554
558
|
super()._init_weights(module)
|
|
555
559
|
if isinstance(module, DeepseekV3TopkRouter):
|
|
556
560
|
init.normal_(module.weight, mean=0.0, std=self.config.initializer_range)
|
|
561
|
+
init.zeros_(module.e_score_correction_bias)
|
|
557
562
|
elif isinstance(module, DeepseekV3NaiveMoe):
|
|
558
563
|
init.normal_(module.gate_up_proj, mean=0.0, std=self.config.initializer_range)
|
|
559
564
|
init.normal_(module.down_proj, mean=0.0, std=self.config.initializer_range)
|
|
@@ -12,7 +12,7 @@ from ...modeling_flash_attention_utils import FlashAttentionKwargs
|
|
|
12
12
|
from ...modeling_layers import GenericForSequenceClassification, GenericForTokenClassification
|
|
13
13
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
14
14
|
from ...processing_utils import Unpack
|
|
15
|
-
from ...utils import logging
|
|
15
|
+
from ...utils import is_grouped_mm_available, logging
|
|
16
16
|
from ..llama.modeling_llama import (
|
|
17
17
|
LlamaDecoderLayer,
|
|
18
18
|
LlamaForCausalLM,
|
|
@@ -107,6 +107,7 @@ class DeepseekV3NaiveMoe(MixtralExperts):
|
|
|
107
107
|
def __init__(self, config):
|
|
108
108
|
super().__init__(config)
|
|
109
109
|
self.num_experts = config.num_local_experts
|
|
110
|
+
self.intermediate_dim = config.moe_intermediate_size
|
|
110
111
|
|
|
111
112
|
|
|
112
113
|
class DeepseekV3MoE(nn.Module):
|
|
@@ -303,13 +304,17 @@ class DeepseekV3DecoderLayer(LlamaDecoderLayer):
|
|
|
303
304
|
|
|
304
305
|
|
|
305
306
|
class DeepseekV3PreTrainedModel(LlamaPreTrainedModel):
|
|
306
|
-
_can_compile_fullgraph =
|
|
307
|
+
_can_compile_fullgraph = (
|
|
308
|
+
is_grouped_mm_available()
|
|
309
|
+
) # https://huggingface.co/docs/transformers/experts_interface#torchcompile
|
|
310
|
+
_keep_in_fp32_modules_strict = ["e_score_correction_bias"]
|
|
307
311
|
|
|
308
312
|
@torch.no_grad()
|
|
309
313
|
def _init_weights(self, module):
|
|
310
314
|
PreTrainedModel._init_weights(self, module)
|
|
311
315
|
if isinstance(module, DeepseekV3TopkRouter):
|
|
312
316
|
init.normal_(module.weight, mean=0.0, std=self.config.initializer_range)
|
|
317
|
+
init.zeros_(module.e_score_correction_bias)
|
|
313
318
|
elif isinstance(module, DeepseekV3NaiveMoe):
|
|
314
319
|
init.normal_(module.gate_up_proj, mean=0.0, std=self.config.initializer_range)
|
|
315
320
|
init.normal_(module.down_proj, mean=0.0, std=self.config.initializer_range)
|
|
@@ -171,7 +171,6 @@ class DeepseekVLImageProcessorFast(BaseImageProcessorFast):
|
|
|
171
171
|
processed_images_grouped[shape] = stacked_images
|
|
172
172
|
|
|
173
173
|
processed_images = reorder_images(processed_images_grouped, grouped_images_index)
|
|
174
|
-
processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images
|
|
175
174
|
|
|
176
175
|
return BatchFeature(data={"pixel_values": processed_images}, tensor_type=return_tensors)
|
|
177
176
|
|
|
@@ -196,7 +196,7 @@ class DeepseekVLModel(DeepseekVLPreTrainedModel):
|
|
|
196
196
|
use_cache: Optional[bool] = None,
|
|
197
197
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
198
198
|
**kwargs,
|
|
199
|
-
):
|
|
199
|
+
) -> DeepseekVLBaseModelOutputWithPast:
|
|
200
200
|
if (input_ids is None) ^ (inputs_embeds is not None):
|
|
201
201
|
raise ValueError(
|
|
202
202
|
"You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
|
|
@@ -268,7 +268,7 @@ class DeepseekVLForConditionalGeneration(DeepseekVLPreTrainedModel, GenerationMi
|
|
|
268
268
|
use_cache: Optional[bool] = None,
|
|
269
269
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
270
270
|
**kwargs: Unpack[TransformersKwargs],
|
|
271
|
-
):
|
|
271
|
+
) -> DeepseekVLCausalLMOutputWithPast:
|
|
272
272
|
r"""
|
|
273
273
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
274
274
|
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
|
|
@@ -315,6 +315,7 @@ class DeepseekVLForConditionalGeneration(DeepseekVLPreTrainedModel, GenerationMi
|
|
|
315
315
|
inputs_embeds=None,
|
|
316
316
|
cache_position=None,
|
|
317
317
|
logits_to_keep=None,
|
|
318
|
+
is_first_iteration=False,
|
|
318
319
|
**kwargs,
|
|
319
320
|
):
|
|
320
321
|
# Overwritten -- extra custom processing
|
|
@@ -326,12 +327,15 @@ class DeepseekVLForConditionalGeneration(DeepseekVLPreTrainedModel, GenerationMi
|
|
|
326
327
|
attention_mask=attention_mask,
|
|
327
328
|
cache_position=cache_position,
|
|
328
329
|
logits_to_keep=logits_to_keep,
|
|
330
|
+
is_first_iteration=is_first_iteration,
|
|
329
331
|
**kwargs,
|
|
330
332
|
)
|
|
331
333
|
|
|
332
|
-
#
|
|
333
|
-
#
|
|
334
|
-
|
|
334
|
+
# Pixel values are used only in the first iteration if available
|
|
335
|
+
# In subsquent iterations, they are already merged with text and cached
|
|
336
|
+
# NOTE: first iteration doesn't have to be prefill, it can be the first
|
|
337
|
+
# iteration with a question and cached system prompt (continue generate from cache)
|
|
338
|
+
if is_first_iteration or not kwargs.get("use_cache", True):
|
|
335
339
|
model_inputs["pixel_values"] = pixel_values
|
|
336
340
|
|
|
337
341
|
return model_inputs
|
|
@@ -134,6 +134,9 @@ class DeepseekVLAligner(nn.Module):
|
|
|
134
134
|
class DeepseekVLPreTrainedModel(JanusPreTrainedModel):
|
|
135
135
|
_no_split_modules = ["LlamaDecoderLayer"]
|
|
136
136
|
|
|
137
|
+
def _init_weights(self, module):
|
|
138
|
+
raise AttributeError("No need to inherit!")
|
|
139
|
+
|
|
137
140
|
|
|
138
141
|
@auto_docstring
|
|
139
142
|
class DeepseekVLModel(JanusModel):
|
|
@@ -207,9 +207,6 @@ class DeepseekVLHybridImageProcessorFast(BaseImageProcessorFast):
|
|
|
207
207
|
)
|
|
208
208
|
high_res_processed_images_grouped[shape] = stacked_high_res_images
|
|
209
209
|
high_res_processed_images = reorder_images(high_res_processed_images_grouped, grouped_high_res_images_index)
|
|
210
|
-
high_res_processed_images = (
|
|
211
|
-
torch.stack(high_res_processed_images, dim=0) if return_tensors else high_res_processed_images
|
|
212
|
-
)
|
|
213
210
|
|
|
214
211
|
resized_images_grouped = {}
|
|
215
212
|
for shape, stacked_high_res_padded_images in high_res_padded_images.items():
|
|
@@ -233,7 +230,6 @@ class DeepseekVLHybridImageProcessorFast(BaseImageProcessorFast):
|
|
|
233
230
|
)
|
|
234
231
|
processed_images_grouped[shape] = stacked_images
|
|
235
232
|
processed_images = reorder_images(processed_images_grouped, grouped_resized_images_index)
|
|
236
|
-
processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images
|
|
237
233
|
|
|
238
234
|
return BatchFeature(
|
|
239
235
|
data={"pixel_values": processed_images, "high_res_pixel_values": high_res_processed_images},
|
|
@@ -314,7 +314,7 @@ class DeepseekVLHybridModel(DeepseekVLHybridPreTrainedModel):
|
|
|
314
314
|
use_cache: Optional[bool] = None,
|
|
315
315
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
316
316
|
**kwargs,
|
|
317
|
-
):
|
|
317
|
+
) -> DeepseekVLHybridBaseModelOutputWithPast:
|
|
318
318
|
if (input_ids is None) ^ (inputs_embeds is not None):
|
|
319
319
|
raise ValueError(
|
|
320
320
|
"You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
|
|
@@ -424,7 +424,7 @@ class DeepseekVLHybridForConditionalGeneration(DeepseekVLHybridPreTrainedModel,
|
|
|
424
424
|
use_cache: Optional[bool] = None,
|
|
425
425
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
426
426
|
**kwargs: Unpack[TransformersKwargs],
|
|
427
|
-
):
|
|
427
|
+
) -> DeepseekVLHybridCausalLMOutputWithPast:
|
|
428
428
|
r"""
|
|
429
429
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
430
430
|
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
|
|
@@ -473,6 +473,7 @@ class DeepseekVLHybridForConditionalGeneration(DeepseekVLHybridPreTrainedModel,
|
|
|
473
473
|
attention_mask=None,
|
|
474
474
|
cache_position=None,
|
|
475
475
|
logits_to_keep=None,
|
|
476
|
+
is_first_iteration=False,
|
|
476
477
|
**kwargs,
|
|
477
478
|
):
|
|
478
479
|
model_inputs = super().prepare_inputs_for_generation(
|
|
@@ -482,12 +483,15 @@ class DeepseekVLHybridForConditionalGeneration(DeepseekVLHybridPreTrainedModel,
|
|
|
482
483
|
attention_mask=attention_mask,
|
|
483
484
|
cache_position=cache_position,
|
|
484
485
|
logits_to_keep=logits_to_keep,
|
|
486
|
+
is_first_iteration=is_first_iteration,
|
|
485
487
|
**kwargs,
|
|
486
488
|
)
|
|
487
489
|
|
|
488
|
-
if
|
|
489
|
-
#
|
|
490
|
-
#
|
|
490
|
+
if is_first_iteration or not kwargs.get("use_cache", True):
|
|
491
|
+
# Pixel values are used only in the first iteration if available
|
|
492
|
+
# In subsquent iterations, they are already merged with text and cached
|
|
493
|
+
# NOTE: first iteration doesn't have to be prefill, it can be the first
|
|
494
|
+
# iteration with a question and cached system prompt (continue generate from cache)
|
|
491
495
|
model_inputs["pixel_values"] = pixel_values
|
|
492
496
|
model_inputs["high_res_pixel_values"] = high_res_pixel_values
|
|
493
497
|
|
|
@@ -297,7 +297,7 @@ class DeepseekVLHybridModel(DeepseekVLModel):
|
|
|
297
297
|
use_cache: Optional[bool] = None,
|
|
298
298
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
299
299
|
**kwargs,
|
|
300
|
-
):
|
|
300
|
+
) -> DeepseekVLHybridBaseModelOutputWithPast:
|
|
301
301
|
if (input_ids is None) ^ (inputs_embeds is not None):
|
|
302
302
|
raise ValueError(
|
|
303
303
|
"You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
|
|
@@ -361,7 +361,7 @@ class DeepseekVLHybridForConditionalGeneration(DeepseekVLForConditionalGeneratio
|
|
|
361
361
|
use_cache: Optional[bool] = None,
|
|
362
362
|
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
363
363
|
**kwargs: Unpack[TransformersKwargs],
|
|
364
|
-
):
|
|
364
|
+
) -> DeepseekVLHybridCausalLMOutputWithPast:
|
|
365
365
|
r"""
|
|
366
366
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
367
367
|
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
|
|
@@ -410,6 +410,7 @@ class DeepseekVLHybridForConditionalGeneration(DeepseekVLForConditionalGeneratio
|
|
|
410
410
|
attention_mask=None,
|
|
411
411
|
cache_position=None,
|
|
412
412
|
logits_to_keep=None,
|
|
413
|
+
is_first_iteration=False,
|
|
413
414
|
**kwargs,
|
|
414
415
|
):
|
|
415
416
|
model_inputs = super().prepare_inputs_for_generation(
|
|
@@ -419,12 +420,15 @@ class DeepseekVLHybridForConditionalGeneration(DeepseekVLForConditionalGeneratio
|
|
|
419
420
|
attention_mask=attention_mask,
|
|
420
421
|
cache_position=cache_position,
|
|
421
422
|
logits_to_keep=logits_to_keep,
|
|
423
|
+
is_first_iteration=is_first_iteration,
|
|
422
424
|
**kwargs,
|
|
423
425
|
)
|
|
424
426
|
|
|
425
|
-
if
|
|
426
|
-
#
|
|
427
|
-
#
|
|
427
|
+
if is_first_iteration or not kwargs.get("use_cache", True):
|
|
428
|
+
# Pixel values are used only in the first iteration if available
|
|
429
|
+
# In subsquent iterations, they are already merged with text and cached
|
|
430
|
+
# NOTE: first iteration doesn't have to be prefill, it can be the first
|
|
431
|
+
# iteration with a question and cached system prompt (continue generate from cache)
|
|
428
432
|
model_inputs["pixel_values"] = pixel_values
|
|
429
433
|
model_inputs["high_res_pixel_values"] = high_res_pixel_values
|
|
430
434
|
|
|
@@ -888,9 +892,6 @@ class DeepseekVLHybridImageProcessorFast(DeepseekVLImageProcessorFast):
|
|
|
888
892
|
)
|
|
889
893
|
high_res_processed_images_grouped[shape] = stacked_high_res_images
|
|
890
894
|
high_res_processed_images = reorder_images(high_res_processed_images_grouped, grouped_high_res_images_index)
|
|
891
|
-
high_res_processed_images = (
|
|
892
|
-
torch.stack(high_res_processed_images, dim=0) if return_tensors else high_res_processed_images
|
|
893
|
-
)
|
|
894
895
|
|
|
895
896
|
resized_images_grouped = {}
|
|
896
897
|
for shape, stacked_high_res_padded_images in high_res_padded_images.items():
|
|
@@ -914,7 +915,6 @@ class DeepseekVLHybridImageProcessorFast(DeepseekVLImageProcessorFast):
|
|
|
914
915
|
)
|
|
915
916
|
processed_images_grouped[shape] = stacked_images
|
|
916
917
|
processed_images = reorder_images(processed_images_grouped, grouped_resized_images_index)
|
|
917
|
-
processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images
|
|
918
918
|
|
|
919
919
|
return BatchFeature(
|
|
920
920
|
data={"pixel_values": processed_images, "high_res_pixel_values": high_res_processed_images},
|
|
@@ -37,7 +37,7 @@ class DeformableDetrConfig(PreTrainedConfig):
|
|
|
37
37
|
use_timm_backbone (`bool`, *optional*, defaults to `True`):
|
|
38
38
|
Whether or not to use the `timm` library for the backbone. If set to `False`, will use the [`AutoBackbone`]
|
|
39
39
|
API.
|
|
40
|
-
backbone_config (`PreTrainedConfig
|
|
40
|
+
backbone_config (`Union[dict, "PreTrainedConfig"]`, *optional*, defaults to `ResNetConfig()`):
|
|
41
41
|
The configuration of the backbone model. Only used in case `use_timm_backbone` is set to `False` in which
|
|
42
42
|
case it will default to `ResNetConfig()`.
|
|
43
43
|
num_channels (`int`, *optional*, defaults to 3):
|
|
@@ -269,8 +269,8 @@ class DeformableDetrConfig(PreTrainedConfig):
|
|
|
269
269
|
self.eos_coefficient = eos_coefficient
|
|
270
270
|
self.focal_alpha = focal_alpha
|
|
271
271
|
self.disable_custom_kernels = disable_custom_kernels
|
|
272
|
+
|
|
272
273
|
super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs)
|
|
273
|
-
self.tie_encoder_decoder = True
|
|
274
274
|
|
|
275
275
|
|
|
276
276
|
__all__ = ["DeformableDetrConfig"]
|
|
@@ -956,7 +956,7 @@ class DeformableDetrPreTrainedModel(PreTrainedModel):
|
|
|
956
956
|
init.constant_(module.value_proj.bias, 0.0)
|
|
957
957
|
init.xavier_uniform_(module.output_proj.weight)
|
|
958
958
|
init.constant_(module.output_proj.bias, 0.0)
|
|
959
|
-
elif isinstance(module, (nn.Linear, nn.Conv2d
|
|
959
|
+
elif isinstance(module, (nn.Linear, nn.Conv2d)):
|
|
960
960
|
init.normal_(module.weight, mean=0.0, std=std)
|
|
961
961
|
if module.bias is not None:
|
|
962
962
|
init.zeros_(module.bias)
|
|
@@ -1036,6 +1036,7 @@ class DeformableDetrEncoder(DeformableDetrPreTrainedModel):
|
|
|
1036
1036
|
output_attentions=None,
|
|
1037
1037
|
output_hidden_states=None,
|
|
1038
1038
|
return_dict=None,
|
|
1039
|
+
**kwargs,
|
|
1039
1040
|
):
|
|
1040
1041
|
r"""
|
|
1041
1042
|
Args:
|
|
@@ -1151,6 +1152,7 @@ class DeformableDetrDecoder(DeformableDetrPreTrainedModel):
|
|
|
1151
1152
|
output_attentions=None,
|
|
1152
1153
|
output_hidden_states=None,
|
|
1153
1154
|
return_dict=None,
|
|
1155
|
+
**kwargs,
|
|
1154
1156
|
):
|
|
1155
1157
|
r"""
|
|
1156
1158
|
Args:
|
|
@@ -1468,6 +1470,7 @@ class DeformableDetrModel(DeformableDetrPreTrainedModel):
|
|
|
1468
1470
|
output_attentions: Optional[bool] = None,
|
|
1469
1471
|
output_hidden_states: Optional[bool] = None,
|
|
1470
1472
|
return_dict: Optional[bool] = None,
|
|
1473
|
+
**kwargs,
|
|
1471
1474
|
) -> Union[tuple[torch.FloatTensor], DeformableDetrModelOutput]:
|
|
1472
1475
|
r"""
|
|
1473
1476
|
decoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, num_queries)`, *optional*):
|
|
@@ -1745,6 +1748,7 @@ class DeformableDetrForObjectDetection(DeformableDetrPreTrainedModel):
|
|
|
1745
1748
|
output_attentions: Optional[bool] = None,
|
|
1746
1749
|
output_hidden_states: Optional[bool] = None,
|
|
1747
1750
|
return_dict: Optional[bool] = None,
|
|
1751
|
+
**kwargs,
|
|
1748
1752
|
) -> Union[tuple[torch.FloatTensor], DeformableDetrObjectDetectionOutput]:
|
|
1749
1753
|
r"""
|
|
1750
1754
|
decoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, num_queries)`, *optional*):
|
|
@@ -34,9 +34,8 @@ class DepthAnythingConfig(PreTrainedConfig):
|
|
|
34
34
|
documentation from [`PreTrainedConfig`] for more information.
|
|
35
35
|
|
|
36
36
|
Args:
|
|
37
|
-
backbone_config (`Union[dict
|
|
38
|
-
The configuration of the backbone model.
|
|
39
|
-
leverage the [`AutoBackbone`] API.
|
|
37
|
+
backbone_config (`Union[dict, "PreTrainedConfig"]`, *optional*, defaults to `Dinov2Config()`):
|
|
38
|
+
The configuration of the backbone model.
|
|
40
39
|
backbone (`str`, *optional*):
|
|
41
40
|
Name of backbone to use when `backbone_config` is `None`. If `use_pretrained_backbone` is `True`, this
|
|
42
41
|
will load the corresponding pretrained weights from the timm or transformers library. If `use_pretrained_backbone`
|
|
@@ -337,6 +337,7 @@ class DepthAnythingForDepthEstimation(DepthAnythingPreTrainedModel):
|
|
|
337
337
|
output_attentions: Optional[bool] = None,
|
|
338
338
|
output_hidden_states: Optional[bool] = None,
|
|
339
339
|
return_dict: Optional[bool] = None,
|
|
340
|
+
**kwargs,
|
|
340
341
|
) -> Union[tuple[torch.Tensor], DepthEstimatorOutput]:
|
|
341
342
|
r"""
|
|
342
343
|
labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
|
|
@@ -94,7 +94,6 @@ class DepthProImageProcessorFast(BaseImageProcessorFast):
|
|
|
94
94
|
processed_images_grouped[shape] = stacked_images
|
|
95
95
|
|
|
96
96
|
processed_images = reorder_images(processed_images_grouped, grouped_images_index)
|
|
97
|
-
processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images
|
|
98
97
|
|
|
99
98
|
return BatchFeature(data={"pixel_values": processed_images}, tensor_type=return_tensors)
|
|
100
99
|
|
|
@@ -645,6 +645,7 @@ class DepthProModel(DepthProPreTrainedModel):
|
|
|
645
645
|
output_attentions: Optional[bool] = None,
|
|
646
646
|
output_hidden_states: Optional[bool] = None,
|
|
647
647
|
return_dict: Optional[bool] = None,
|
|
648
|
+
**kwargs,
|
|
648
649
|
) -> Union[tuple, DepthProOutput]:
|
|
649
650
|
r"""
|
|
650
651
|
Examples:
|
|
@@ -1027,6 +1028,7 @@ class DepthProForDepthEstimation(DepthProPreTrainedModel):
|
|
|
1027
1028
|
output_attentions: Optional[bool] = None,
|
|
1028
1029
|
output_hidden_states: Optional[bool] = None,
|
|
1029
1030
|
return_dict: Optional[bool] = None,
|
|
1031
|
+
**kwargs,
|
|
1030
1032
|
) -> Union[tuple[torch.Tensor], DepthProDepthEstimatorOutput]:
|
|
1031
1033
|
r"""
|
|
1032
1034
|
labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
|
|
@@ -37,7 +37,7 @@ class DetrConfig(PreTrainedConfig):
|
|
|
37
37
|
use_timm_backbone (`bool`, *optional*, defaults to `True`):
|
|
38
38
|
Whether or not to use the `timm` library for the backbone. If set to `False`, will use the [`AutoBackbone`]
|
|
39
39
|
API.
|
|
40
|
-
backbone_config (`PreTrainedConfig
|
|
40
|
+
backbone_config (`Union[dict, "PreTrainedConfig"]`, *optional*, defaults to `ResNetConfig()`):
|
|
41
41
|
The configuration of the backbone model. Only used in case `use_timm_backbone` is set to `False` in which
|
|
42
42
|
case it will default to `ResNetConfig()`.
|
|
43
43
|
num_channels (`int`, *optional*, defaults to 3):
|
|
@@ -741,7 +741,7 @@ class DetrPreTrainedModel(PreTrainedModel):
|
|
|
741
741
|
elif isinstance(module, DetrLearnedPositionEmbedding):
|
|
742
742
|
init.uniform_(module.row_embeddings.weight)
|
|
743
743
|
init.uniform_(module.column_embeddings.weight)
|
|
744
|
-
if isinstance(module, (nn.Linear, nn.Conv2d
|
|
744
|
+
if isinstance(module, (nn.Linear, nn.Conv2d)):
|
|
745
745
|
init.normal_(module.weight, mean=0.0, std=std)
|
|
746
746
|
if module.bias is not None:
|
|
747
747
|
init.zeros_(module.bias)
|
|
@@ -750,6 +750,9 @@ class DetrPreTrainedModel(PreTrainedModel):
|
|
|
750
750
|
# Here we need the check explicitly, as we slice the weight in the `zeros_` call, so it looses the flag
|
|
751
751
|
if module.padding_idx is not None and not getattr(module.weight, "_is_hf_initialized", False):
|
|
752
752
|
init.zeros_(module.weight[module.padding_idx])
|
|
753
|
+
elif isinstance(module, (nn.LayerNorm, nn.GroupNorm)):
|
|
754
|
+
init.ones_(module.weight)
|
|
755
|
+
init.zeros_(module.bias)
|
|
753
756
|
|
|
754
757
|
|
|
755
758
|
class DetrEncoder(DetrPreTrainedModel):
|
|
@@ -788,6 +791,7 @@ class DetrEncoder(DetrPreTrainedModel):
|
|
|
788
791
|
output_attentions=None,
|
|
789
792
|
output_hidden_states=None,
|
|
790
793
|
return_dict=None,
|
|
794
|
+
**kwargs,
|
|
791
795
|
):
|
|
792
796
|
r"""
|
|
793
797
|
Args:
|
|
@@ -905,6 +909,7 @@ class DetrDecoder(DetrPreTrainedModel):
|
|
|
905
909
|
output_attentions=None,
|
|
906
910
|
output_hidden_states=None,
|
|
907
911
|
return_dict=None,
|
|
912
|
+
**kwargs,
|
|
908
913
|
):
|
|
909
914
|
r"""
|
|
910
915
|
Args:
|
|
@@ -1078,6 +1083,7 @@ class DetrModel(DetrPreTrainedModel):
|
|
|
1078
1083
|
output_attentions: Optional[bool] = None,
|
|
1079
1084
|
output_hidden_states: Optional[bool] = None,
|
|
1080
1085
|
return_dict: Optional[bool] = None,
|
|
1086
|
+
**kwargs,
|
|
1081
1087
|
) -> Union[tuple[torch.FloatTensor], DetrModelOutput]:
|
|
1082
1088
|
r"""
|
|
1083
1089
|
decoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, num_queries)`, *optional*):
|
|
@@ -1258,6 +1264,7 @@ class DetrForObjectDetection(DetrPreTrainedModel):
|
|
|
1258
1264
|
output_attentions: Optional[bool] = None,
|
|
1259
1265
|
output_hidden_states: Optional[bool] = None,
|
|
1260
1266
|
return_dict: Optional[bool] = None,
|
|
1267
|
+
**kwargs,
|
|
1261
1268
|
) -> Union[tuple[torch.FloatTensor], DetrObjectDetectionOutput]:
|
|
1262
1269
|
r"""
|
|
1263
1270
|
decoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, num_queries)`, *optional*):
|
|
@@ -1404,6 +1411,7 @@ class DetrForSegmentation(DetrPreTrainedModel):
|
|
|
1404
1411
|
output_attentions: Optional[bool] = None,
|
|
1405
1412
|
output_hidden_states: Optional[bool] = None,
|
|
1406
1413
|
return_dict: Optional[bool] = None,
|
|
1414
|
+
**kwargs,
|
|
1407
1415
|
) -> Union[tuple[torch.FloatTensor], DetrSegmentationOutput]:
|
|
1408
1416
|
r"""
|
|
1409
1417
|
decoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, num_queries)`, *optional*):
|
|
@@ -1452,8 +1460,12 @@ class DetrForSegmentation(DetrPreTrainedModel):
|
|
|
1452
1460
|
|
|
1453
1461
|
>>> # A tensor of shape (height, width) where each value denotes a segment id, filled with -1 if no segment is found
|
|
1454
1462
|
>>> panoptic_seg = result[0]["segmentation"]
|
|
1463
|
+
>>> panoptic_seg.shape
|
|
1464
|
+
torch.Size([300, 500])
|
|
1455
1465
|
>>> # Get prediction score and segment_id to class_id mapping of each segment
|
|
1456
1466
|
>>> panoptic_segments_info = result[0]["segments_info"]
|
|
1467
|
+
>>> len(panoptic_segments_info)
|
|
1468
|
+
5
|
|
1457
1469
|
```"""
|
|
1458
1470
|
|
|
1459
1471
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
|
@@ -110,11 +110,9 @@ class DiaGenerationMixin(GenerationMixin):
|
|
|
110
110
|
return merged_processors
|
|
111
111
|
|
|
112
112
|
def _prepare_generation_config(
|
|
113
|
-
self, generation_config: Optional[GenerationConfig],
|
|
113
|
+
self, generation_config: Optional[GenerationConfig], **kwargs: Any
|
|
114
114
|
) -> tuple[GenerationConfig, dict]:
|
|
115
|
-
generation_config, model_kwargs = super()._prepare_generation_config(
|
|
116
|
-
generation_config, use_model_defaults, **kwargs
|
|
117
|
-
)
|
|
115
|
+
generation_config, model_kwargs = super()._prepare_generation_config(generation_config, **kwargs)
|
|
118
116
|
|
|
119
117
|
# We allow generation up to max length + max delay pattern
|
|
120
118
|
# (will revert back to max length after generation)
|
|
@@ -260,7 +258,6 @@ class DiaGenerationMixin(GenerationMixin):
|
|
|
260
258
|
streamer: Optional["BaseStreamer"] = None,
|
|
261
259
|
negative_prompt_ids: Optional[torch.Tensor] = None,
|
|
262
260
|
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
|
|
263
|
-
use_model_defaults: Optional[bool] = None,
|
|
264
261
|
custom_generate: Optional[str] = None,
|
|
265
262
|
**kwargs,
|
|
266
263
|
):
|
|
@@ -273,9 +270,7 @@ class DiaGenerationMixin(GenerationMixin):
|
|
|
273
270
|
assistant_model,
|
|
274
271
|
streamer,
|
|
275
272
|
)
|
|
276
|
-
generation_config, model_kwargs = self._prepare_generation_config(
|
|
277
|
-
generation_config, use_model_defaults, **kwargs
|
|
278
|
-
)
|
|
273
|
+
generation_config, model_kwargs = self._prepare_generation_config(generation_config, **kwargs)
|
|
279
274
|
generation_mode = generation_config.get_generation_mode(assistant_model)
|
|
280
275
|
|
|
281
276
|
if generation_mode not in (GenerationMode.SAMPLE, GenerationMode.GREEDY_SEARCH):
|
|
@@ -425,7 +420,6 @@ class DiaGenerationMixin(GenerationMixin):
|
|
|
425
420
|
streamer: Optional["BaseStreamer"] = None,
|
|
426
421
|
negative_prompt_ids: Optional[torch.Tensor] = None,
|
|
427
422
|
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
|
|
428
|
-
use_model_defaults: Optional[bool] = None,
|
|
429
423
|
custom_generate: Optional[str] = None,
|
|
430
424
|
**kwargs,
|
|
431
425
|
) -> Union[GenerateOutput, torch.LongTensor]:
|
|
@@ -445,7 +439,6 @@ class DiaGenerationMixin(GenerationMixin):
|
|
|
445
439
|
streamer=streamer,
|
|
446
440
|
negative_prompt_ids=negative_prompt_ids,
|
|
447
441
|
negative_prompt_attention_mask=negative_prompt_attention_mask,
|
|
448
|
-
use_model_defaults=use_model_defaults,
|
|
449
442
|
custom_generate=custom_generate,
|
|
450
443
|
**kwargs,
|
|
451
444
|
)
|