transformers 5.0.0rc2__py3-none-any.whl → 5.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +9 -28
- transformers/audio_utils.py +32 -32
- transformers/cache_utils.py +15 -124
- transformers/cli/chat.py +3 -3
- transformers/cli/serve.py +2 -2
- transformers/cli/transformers.py +2 -1
- transformers/configuration_utils.py +31 -33
- transformers/conversion_mapping.py +5 -1
- transformers/convert_slow_tokenizer.py +3 -8
- transformers/core_model_loading.py +14 -15
- transformers/data/processors/glue.py +0 -1
- transformers/data/processors/utils.py +0 -1
- transformers/data/processors/xnli.py +0 -1
- transformers/dependency_versions_table.py +4 -4
- transformers/distributed/configuration_utils.py +1 -2
- transformers/dynamic_module_utils.py +23 -23
- transformers/feature_extraction_sequence_utils.py +19 -23
- transformers/feature_extraction_utils.py +14 -14
- transformers/generation/candidate_generator.py +1 -2
- transformers/generation/configuration_utils.py +54 -39
- transformers/generation/continuous_batching/__init__.py +0 -1
- transformers/generation/continuous_batching/cache.py +34 -6
- transformers/generation/continuous_batching/cache_manager.py +25 -12
- transformers/generation/continuous_batching/continuous_api.py +54 -23
- transformers/generation/continuous_batching/requests.py +25 -4
- transformers/generation/continuous_batching/scheduler.py +117 -49
- transformers/generation/logits_process.py +0 -128
- transformers/generation/streamers.py +0 -1
- transformers/generation/utils.py +16 -26
- transformers/generation/watermarking.py +2 -3
- transformers/hf_argparser.py +9 -13
- transformers/hyperparameter_search.py +1 -2
- transformers/image_processing_base.py +9 -9
- transformers/image_processing_utils.py +11 -12
- transformers/image_processing_utils_fast.py +53 -53
- transformers/image_transforms.py +29 -29
- transformers/image_utils.py +30 -32
- transformers/integrations/awq.py +1 -3
- transformers/integrations/deepspeed.py +1 -1
- transformers/integrations/eetq.py +0 -1
- transformers/integrations/fbgemm_fp8.py +1 -2
- transformers/integrations/finegrained_fp8.py +8 -7
- transformers/integrations/flash_attention.py +1 -1
- transformers/integrations/flex_attention.py +1 -1
- transformers/integrations/fp_quant.py +4 -6
- transformers/integrations/ggml.py +0 -1
- transformers/integrations/integration_utils.py +2 -3
- transformers/integrations/mxfp4.py +5 -6
- transformers/integrations/quark.py +2 -4
- transformers/integrations/torchao.py +4 -6
- transformers/loss/loss_lw_detr.py +356 -0
- transformers/loss/loss_utils.py +2 -0
- transformers/masking_utils.py +47 -51
- transformers/model_debugging_utils.py +4 -5
- transformers/modelcard.py +14 -192
- transformers/modeling_attn_mask_utils.py +19 -19
- transformers/modeling_flash_attention_utils.py +27 -27
- transformers/modeling_gguf_pytorch_utils.py +5 -5
- transformers/modeling_layers.py +21 -22
- transformers/modeling_outputs.py +242 -253
- transformers/modeling_rope_utils.py +32 -32
- transformers/modeling_utils.py +67 -90
- transformers/models/__init__.py +4 -0
- transformers/models/afmoe/configuration_afmoe.py +26 -29
- transformers/models/afmoe/modeling_afmoe.py +30 -33
- transformers/models/afmoe/modular_afmoe.py +16 -18
- transformers/models/aimv2/configuration_aimv2.py +2 -5
- transformers/models/aimv2/modeling_aimv2.py +20 -21
- transformers/models/aimv2/modular_aimv2.py +7 -9
- transformers/models/albert/configuration_albert.py +0 -1
- transformers/models/albert/modeling_albert.py +67 -69
- transformers/models/albert/tokenization_albert.py +1 -4
- transformers/models/align/configuration_align.py +0 -1
- transformers/models/align/modeling_align.py +61 -62
- transformers/models/align/processing_align.py +2 -30
- transformers/models/altclip/configuration_altclip.py +0 -1
- transformers/models/altclip/modeling_altclip.py +76 -77
- transformers/models/altclip/processing_altclip.py +2 -15
- transformers/models/apertus/__init__.py +0 -1
- transformers/models/apertus/configuration_apertus.py +18 -21
- transformers/models/apertus/modeling_apertus.py +31 -34
- transformers/models/apertus/modular_apertus.py +28 -30
- transformers/models/arcee/configuration_arcee.py +20 -23
- transformers/models/arcee/modeling_arcee.py +31 -34
- transformers/models/arcee/modular_arcee.py +20 -23
- transformers/models/aria/configuration_aria.py +20 -23
- transformers/models/aria/image_processing_aria.py +25 -27
- transformers/models/aria/modeling_aria.py +63 -66
- transformers/models/aria/modular_aria.py +78 -85
- transformers/models/aria/processing_aria.py +28 -35
- transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +0 -1
- transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py +3 -6
- transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +6 -8
- transformers/models/audioflamingo3/__init__.py +0 -1
- transformers/models/audioflamingo3/configuration_audioflamingo3.py +0 -1
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +22 -23
- transformers/models/audioflamingo3/modular_audioflamingo3.py +12 -17
- transformers/models/audioflamingo3/processing_audioflamingo3.py +6 -8
- transformers/models/auto/auto_factory.py +4 -5
- transformers/models/auto/configuration_auto.py +26 -5
- transformers/models/auto/feature_extraction_auto.py +5 -7
- transformers/models/auto/image_processing_auto.py +13 -26
- transformers/models/auto/modeling_auto.py +18 -199
- transformers/models/auto/processing_auto.py +2 -1
- transformers/models/auto/tokenization_auto.py +21 -22
- transformers/models/auto/video_processing_auto.py +7 -8
- transformers/models/autoformer/configuration_autoformer.py +4 -7
- transformers/models/autoformer/modeling_autoformer.py +98 -100
- transformers/models/aya_vision/configuration_aya_vision.py +0 -1
- transformers/models/aya_vision/modeling_aya_vision.py +35 -37
- transformers/models/aya_vision/modular_aya_vision.py +26 -29
- transformers/models/aya_vision/processing_aya_vision.py +25 -53
- transformers/models/bamba/configuration_bamba.py +29 -32
- transformers/models/bamba/modeling_bamba.py +60 -64
- transformers/models/bamba/modular_bamba.py +51 -55
- transformers/models/bark/configuration_bark.py +4 -7
- transformers/models/bark/generation_configuration_bark.py +3 -5
- transformers/models/bark/modeling_bark.py +40 -55
- transformers/models/bark/processing_bark.py +19 -41
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +115 -117
- transformers/models/barthez/tokenization_barthez.py +1 -4
- transformers/models/bartpho/tokenization_bartpho.py +6 -7
- transformers/models/beit/configuration_beit.py +0 -11
- transformers/models/beit/image_processing_beit.py +53 -56
- transformers/models/beit/image_processing_beit_fast.py +8 -9
- transformers/models/beit/modeling_beit.py +51 -53
- transformers/models/bert/configuration_bert.py +0 -1
- transformers/models/bert/modeling_bert.py +111 -122
- transformers/models/bert/tokenization_bert.py +2 -4
- transformers/models/bert/tokenization_bert_legacy.py +3 -5
- transformers/models/bert_generation/configuration_bert_generation.py +0 -1
- transformers/models/bert_generation/modeling_bert_generation.py +47 -49
- transformers/models/bert_generation/tokenization_bert_generation.py +2 -3
- transformers/models/bert_japanese/tokenization_bert_japanese.py +5 -6
- transformers/models/bertweet/tokenization_bertweet.py +1 -3
- transformers/models/big_bird/configuration_big_bird.py +0 -1
- transformers/models/big_bird/modeling_big_bird.py +107 -109
- transformers/models/big_bird/tokenization_big_bird.py +1 -4
- transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +0 -1
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +109 -111
- transformers/models/biogpt/configuration_biogpt.py +0 -1
- transformers/models/biogpt/modeling_biogpt.py +69 -71
- transformers/models/biogpt/modular_biogpt.py +59 -61
- transformers/models/biogpt/tokenization_biogpt.py +3 -5
- transformers/models/bit/configuration_bit.py +0 -1
- transformers/models/bit/image_processing_bit.py +21 -24
- transformers/models/bit/image_processing_bit_fast.py +0 -1
- transformers/models/bit/modeling_bit.py +9 -11
- transformers/models/bitnet/configuration_bitnet.py +18 -21
- transformers/models/bitnet/modeling_bitnet.py +31 -34
- transformers/models/bitnet/modular_bitnet.py +4 -6
- transformers/models/blenderbot/configuration_blenderbot.py +0 -1
- transformers/models/blenderbot/modeling_blenderbot.py +64 -95
- transformers/models/blenderbot/tokenization_blenderbot.py +0 -1
- transformers/models/blenderbot_small/configuration_blenderbot_small.py +0 -1
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +66 -68
- transformers/models/blenderbot_small/tokenization_blenderbot_small.py +1 -3
- transformers/models/blip/configuration_blip.py +0 -1
- transformers/models/blip/image_processing_blip.py +17 -20
- transformers/models/blip/image_processing_blip_fast.py +0 -1
- transformers/models/blip/modeling_blip.py +60 -71
- transformers/models/blip/modeling_blip_text.py +63 -65
- transformers/models/blip/processing_blip.py +5 -36
- transformers/models/blip_2/configuration_blip_2.py +0 -1
- transformers/models/blip_2/modeling_blip_2.py +70 -71
- transformers/models/blip_2/processing_blip_2.py +8 -38
- transformers/models/bloom/configuration_bloom.py +0 -1
- transformers/models/bloom/modeling_bloom.py +58 -59
- transformers/models/blt/configuration_blt.py +71 -74
- transformers/models/blt/modeling_blt.py +73 -76
- transformers/models/blt/modular_blt.py +57 -59
- transformers/models/bridgetower/configuration_bridgetower.py +0 -1
- transformers/models/bridgetower/image_processing_bridgetower.py +34 -35
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +7 -8
- transformers/models/bridgetower/modeling_bridgetower.py +107 -109
- transformers/models/bridgetower/processing_bridgetower.py +2 -16
- transformers/models/bros/configuration_bros.py +0 -1
- transformers/models/bros/modeling_bros.py +78 -80
- transformers/models/bros/processing_bros.py +2 -12
- transformers/models/byt5/tokenization_byt5.py +4 -6
- transformers/models/camembert/configuration_camembert.py +0 -1
- transformers/models/camembert/modeling_camembert.py +91 -93
- transformers/models/camembert/modular_camembert.py +51 -54
- transformers/models/camembert/tokenization_camembert.py +1 -4
- transformers/models/canine/configuration_canine.py +0 -1
- transformers/models/canine/modeling_canine.py +73 -75
- transformers/models/canine/tokenization_canine.py +0 -1
- transformers/models/chameleon/configuration_chameleon.py +24 -27
- transformers/models/chameleon/image_processing_chameleon.py +21 -24
- transformers/models/chameleon/image_processing_chameleon_fast.py +0 -1
- transformers/models/chameleon/modeling_chameleon.py +53 -56
- transformers/models/chameleon/processing_chameleon.py +16 -41
- transformers/models/chinese_clip/configuration_chinese_clip.py +0 -1
- transformers/models/chinese_clip/image_processing_chinese_clip.py +21 -24
- transformers/models/chinese_clip/image_processing_chinese_clip_fast.py +0 -1
- transformers/models/chinese_clip/modeling_chinese_clip.py +65 -66
- transformers/models/chinese_clip/processing_chinese_clip.py +2 -15
- transformers/models/clap/configuration_clap.py +0 -1
- transformers/models/clap/feature_extraction_clap.py +9 -10
- transformers/models/clap/modeling_clap.py +88 -89
- transformers/models/clap/processing_clap.py +2 -15
- transformers/models/clip/configuration_clip.py +0 -1
- transformers/models/clip/image_processing_clip.py +21 -24
- transformers/models/clip/image_processing_clip_fast.py +0 -1
- transformers/models/clip/modeling_clip.py +45 -46
- transformers/models/clip/processing_clip.py +2 -14
- transformers/models/clip/tokenization_clip.py +2 -5
- transformers/models/clipseg/configuration_clipseg.py +0 -1
- transformers/models/clipseg/modeling_clipseg.py +86 -87
- transformers/models/clipseg/processing_clipseg.py +8 -39
- transformers/models/clvp/configuration_clvp.py +1 -3
- transformers/models/clvp/feature_extraction_clvp.py +7 -10
- transformers/models/clvp/modeling_clvp.py +119 -115
- transformers/models/clvp/number_normalizer.py +1 -2
- transformers/models/clvp/processing_clvp.py +3 -20
- transformers/models/clvp/tokenization_clvp.py +0 -1
- transformers/models/code_llama/tokenization_code_llama.py +3 -6
- transformers/models/codegen/configuration_codegen.py +0 -1
- transformers/models/codegen/modeling_codegen.py +48 -48
- transformers/models/codegen/tokenization_codegen.py +5 -6
- transformers/models/cohere/configuration_cohere.py +20 -23
- transformers/models/cohere/modeling_cohere.py +35 -38
- transformers/models/cohere/modular_cohere.py +24 -28
- transformers/models/cohere/tokenization_cohere.py +5 -6
- transformers/models/cohere2/configuration_cohere2.py +21 -24
- transformers/models/cohere2/modeling_cohere2.py +34 -37
- transformers/models/cohere2/modular_cohere2.py +39 -41
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +6 -7
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +28 -30
- transformers/models/cohere2_vision/modular_cohere2_vision.py +21 -23
- transformers/models/cohere2_vision/processing_cohere2_vision.py +6 -36
- transformers/models/colpali/configuration_colpali.py +0 -1
- transformers/models/colpali/modeling_colpali.py +14 -16
- transformers/models/colpali/modular_colpali.py +11 -51
- transformers/models/colpali/processing_colpali.py +14 -52
- transformers/models/colqwen2/modeling_colqwen2.py +20 -22
- transformers/models/colqwen2/modular_colqwen2.py +29 -68
- transformers/models/colqwen2/processing_colqwen2.py +16 -52
- transformers/models/conditional_detr/configuration_conditional_detr.py +0 -1
- transformers/models/conditional_detr/image_processing_conditional_detr.py +64 -66
- transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +22 -22
- transformers/models/conditional_detr/modeling_conditional_detr.py +78 -80
- transformers/models/conditional_detr/modular_conditional_detr.py +1 -3
- transformers/models/convbert/configuration_convbert.py +0 -1
- transformers/models/convbert/modeling_convbert.py +85 -87
- transformers/models/convbert/tokenization_convbert.py +0 -1
- transformers/models/convnext/configuration_convnext.py +0 -1
- transformers/models/convnext/image_processing_convnext.py +18 -21
- transformers/models/convnext/image_processing_convnext_fast.py +5 -6
- transformers/models/convnext/modeling_convnext.py +5 -8
- transformers/models/convnextv2/configuration_convnextv2.py +0 -1
- transformers/models/convnextv2/modeling_convnextv2.py +5 -8
- transformers/models/cpm/tokenization_cpm.py +6 -7
- transformers/models/cpm/tokenization_cpm_fast.py +3 -5
- transformers/models/cpmant/configuration_cpmant.py +0 -1
- transformers/models/cpmant/modeling_cpmant.py +38 -40
- transformers/models/cpmant/tokenization_cpmant.py +1 -3
- transformers/models/csm/configuration_csm.py +49 -51
- transformers/models/csm/generation_csm.py +13 -14
- transformers/models/csm/modeling_csm.py +78 -81
- transformers/models/csm/modular_csm.py +56 -58
- transformers/models/csm/processing_csm.py +25 -68
- transformers/models/ctrl/configuration_ctrl.py +0 -1
- transformers/models/ctrl/modeling_ctrl.py +38 -41
- transformers/models/ctrl/tokenization_ctrl.py +0 -1
- transformers/models/cvt/configuration_cvt.py +0 -1
- transformers/models/cvt/modeling_cvt.py +13 -15
- transformers/models/cwm/__init__.py +0 -1
- transformers/models/cwm/configuration_cwm.py +3 -5
- transformers/models/cwm/modeling_cwm.py +32 -34
- transformers/models/cwm/modular_cwm.py +10 -12
- transformers/models/d_fine/configuration_d_fine.py +0 -1
- transformers/models/d_fine/modeling_d_fine.py +81 -82
- transformers/models/d_fine/modular_d_fine.py +8 -9
- transformers/models/dab_detr/configuration_dab_detr.py +0 -1
- transformers/models/dab_detr/modeling_dab_detr.py +68 -70
- transformers/models/dac/configuration_dac.py +0 -1
- transformers/models/dac/feature_extraction_dac.py +6 -9
- transformers/models/dac/modeling_dac.py +21 -23
- transformers/models/data2vec/configuration_data2vec_audio.py +0 -1
- transformers/models/data2vec/configuration_data2vec_text.py +0 -1
- transformers/models/data2vec/configuration_data2vec_vision.py +0 -1
- transformers/models/data2vec/modeling_data2vec_audio.py +52 -56
- transformers/models/data2vec/modeling_data2vec_text.py +91 -93
- transformers/models/data2vec/modeling_data2vec_vision.py +41 -42
- transformers/models/data2vec/modular_data2vec_audio.py +6 -1
- transformers/models/data2vec/modular_data2vec_text.py +51 -54
- transformers/models/dbrx/configuration_dbrx.py +18 -19
- transformers/models/dbrx/modeling_dbrx.py +39 -42
- transformers/models/dbrx/modular_dbrx.py +31 -33
- transformers/models/deberta/configuration_deberta.py +0 -1
- transformers/models/deberta/modeling_deberta.py +57 -60
- transformers/models/deberta/tokenization_deberta.py +2 -5
- transformers/models/deberta_v2/configuration_deberta_v2.py +0 -1
- transformers/models/deberta_v2/modeling_deberta_v2.py +63 -65
- transformers/models/deberta_v2/tokenization_deberta_v2.py +1 -4
- transformers/models/decision_transformer/configuration_decision_transformer.py +0 -1
- transformers/models/decision_transformer/modeling_decision_transformer.py +48 -50
- transformers/models/deepseek_v2/configuration_deepseek_v2.py +34 -37
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +32 -33
- transformers/models/deepseek_v2/modular_deepseek_v2.py +40 -42
- transformers/models/deepseek_v3/configuration_deepseek_v3.py +35 -38
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +31 -33
- transformers/models/deepseek_v3/modular_deepseek_v3.py +4 -5
- transformers/models/deepseek_vl/configuration_deepseek_vl.py +2 -3
- transformers/models/deepseek_vl/image_processing_deepseek_vl.py +25 -26
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +7 -6
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +31 -31
- transformers/models/deepseek_vl/modular_deepseek_vl.py +11 -43
- transformers/models/deepseek_vl/processing_deepseek_vl.py +10 -41
- transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +3 -5
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +35 -35
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +16 -16
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +33 -33
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +71 -90
- transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py +12 -44
- transformers/models/deformable_detr/configuration_deformable_detr.py +0 -1
- transformers/models/deformable_detr/image_processing_deformable_detr.py +59 -61
- transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +17 -17
- transformers/models/deformable_detr/modeling_deformable_detr.py +66 -67
- transformers/models/deformable_detr/modular_deformable_detr.py +1 -3
- transformers/models/deit/configuration_deit.py +0 -1
- transformers/models/deit/image_processing_deit.py +18 -21
- transformers/models/deit/image_processing_deit_fast.py +0 -1
- transformers/models/deit/modeling_deit.py +16 -18
- transformers/models/depth_anything/configuration_depth_anything.py +0 -1
- transformers/models/depth_anything/modeling_depth_anything.py +5 -8
- transformers/models/depth_pro/configuration_depth_pro.py +0 -1
- transformers/models/depth_pro/image_processing_depth_pro.py +22 -23
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +6 -7
- transformers/models/depth_pro/modeling_depth_pro.py +21 -23
- transformers/models/detr/configuration_detr.py +0 -1
- transformers/models/detr/image_processing_detr.py +64 -66
- transformers/models/detr/image_processing_detr_fast.py +22 -23
- transformers/models/detr/modeling_detr.py +70 -72
- transformers/models/dia/configuration_dia.py +5 -8
- transformers/models/dia/feature_extraction_dia.py +6 -9
- transformers/models/dia/generation_dia.py +40 -36
- transformers/models/dia/modeling_dia.py +61 -64
- transformers/models/dia/modular_dia.py +52 -54
- transformers/models/dia/processing_dia.py +39 -29
- transformers/models/dia/tokenization_dia.py +3 -6
- transformers/models/diffllama/configuration_diffllama.py +20 -23
- transformers/models/diffllama/modeling_diffllama.py +42 -45
- transformers/models/diffllama/modular_diffllama.py +16 -18
- transformers/models/dinat/configuration_dinat.py +0 -1
- transformers/models/dinat/modeling_dinat.py +40 -42
- transformers/models/dinov2/configuration_dinov2.py +0 -1
- transformers/models/dinov2/modeling_dinov2.py +11 -13
- transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +1 -1
- transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +12 -13
- transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +5 -7
- transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +4 -7
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +3 -6
- transformers/models/dinov3_vit/configuration_dinov3_vit.py +5 -8
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +5 -6
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +14 -16
- transformers/models/dinov3_vit/modular_dinov3_vit.py +11 -13
- transformers/models/distilbert/configuration_distilbert.py +0 -1
- transformers/models/distilbert/modeling_distilbert.py +44 -46
- transformers/models/distilbert/tokenization_distilbert.py +0 -1
- transformers/models/doge/__init__.py +0 -1
- transformers/models/doge/configuration_doge.py +25 -28
- transformers/models/doge/modeling_doge.py +42 -45
- transformers/models/doge/modular_doge.py +57 -58
- transformers/models/donut/configuration_donut_swin.py +0 -1
- transformers/models/donut/image_processing_donut.py +26 -29
- transformers/models/donut/image_processing_donut_fast.py +5 -10
- transformers/models/donut/modeling_donut_swin.py +44 -46
- transformers/models/donut/processing_donut.py +5 -26
- transformers/models/dots1/configuration_dots1.py +27 -29
- transformers/models/dots1/modeling_dots1.py +31 -34
- transformers/models/dots1/modular_dots1.py +0 -1
- transformers/models/dpr/configuration_dpr.py +0 -1
- transformers/models/dpr/modeling_dpr.py +37 -39
- transformers/models/dpr/tokenization_dpr.py +7 -9
- transformers/models/dpr/tokenization_dpr_fast.py +7 -9
- transformers/models/dpt/configuration_dpt.py +0 -1
- transformers/models/dpt/image_processing_dpt.py +65 -66
- transformers/models/dpt/image_processing_dpt_fast.py +13 -14
- transformers/models/dpt/modeling_dpt.py +19 -21
- transformers/models/dpt/modular_dpt.py +10 -11
- transformers/models/edgetam/configuration_edgetam.py +0 -1
- transformers/models/edgetam/modeling_edgetam.py +39 -41
- transformers/models/edgetam/modular_edgetam.py +2 -6
- transformers/models/edgetam_video/__init__.py +0 -1
- transformers/models/edgetam_video/configuration_edgetam_video.py +0 -1
- transformers/models/edgetam_video/modeling_edgetam_video.py +76 -77
- transformers/models/edgetam_video/modular_edgetam_video.py +16 -18
- transformers/models/efficientloftr/configuration_efficientloftr.py +4 -5
- transformers/models/efficientloftr/image_processing_efficientloftr.py +14 -16
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +4 -4
- transformers/models/efficientloftr/modeling_efficientloftr.py +27 -29
- transformers/models/efficientloftr/modular_efficientloftr.py +1 -3
- transformers/models/efficientnet/configuration_efficientnet.py +0 -1
- transformers/models/efficientnet/image_processing_efficientnet.py +23 -26
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +14 -15
- transformers/models/efficientnet/modeling_efficientnet.py +12 -14
- transformers/models/electra/configuration_electra.py +0 -1
- transformers/models/electra/modeling_electra.py +101 -103
- transformers/models/emu3/configuration_emu3.py +5 -7
- transformers/models/emu3/image_processing_emu3.py +44 -39
- transformers/models/emu3/modeling_emu3.py +59 -62
- transformers/models/emu3/modular_emu3.py +32 -34
- transformers/models/emu3/processing_emu3.py +18 -43
- transformers/models/encodec/configuration_encodec.py +2 -4
- transformers/models/encodec/feature_extraction_encodec.py +10 -13
- transformers/models/encodec/modeling_encodec.py +25 -29
- transformers/models/encoder_decoder/configuration_encoder_decoder.py +0 -1
- transformers/models/encoder_decoder/modeling_encoder_decoder.py +17 -19
- transformers/models/eomt/configuration_eomt.py +0 -1
- transformers/models/eomt/image_processing_eomt.py +53 -55
- transformers/models/eomt/image_processing_eomt_fast.py +15 -16
- transformers/models/eomt/modeling_eomt.py +16 -18
- transformers/models/eomt/modular_eomt.py +11 -13
- transformers/models/ernie/configuration_ernie.py +0 -1
- transformers/models/ernie/modeling_ernie.py +121 -132
- transformers/models/ernie/modular_ernie.py +91 -103
- transformers/models/ernie4_5/configuration_ernie4_5.py +18 -20
- transformers/models/ernie4_5/modeling_ernie4_5.py +31 -33
- transformers/models/ernie4_5/modular_ernie4_5.py +1 -3
- transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +27 -29
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +36 -38
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +7 -9
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +0 -1
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +34 -35
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +6 -7
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +84 -87
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +86 -89
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +3 -5
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +17 -18
- transformers/models/esm/configuration_esm.py +2 -4
- transformers/models/esm/modeling_esm.py +32 -34
- transformers/models/esm/modeling_esmfold.py +42 -44
- transformers/models/esm/openfold_utils/chunk_utils.py +6 -6
- transformers/models/esm/openfold_utils/loss.py +1 -2
- transformers/models/esm/openfold_utils/protein.py +13 -13
- transformers/models/esm/openfold_utils/tensor_utils.py +6 -6
- transformers/models/esm/tokenization_esm.py +2 -4
- transformers/models/evolla/configuration_evolla.py +29 -32
- transformers/models/evolla/modeling_evolla.py +58 -61
- transformers/models/evolla/modular_evolla.py +45 -47
- transformers/models/evolla/processing_evolla.py +23 -35
- transformers/models/exaone4/configuration_exaone4.py +19 -22
- transformers/models/exaone4/modeling_exaone4.py +32 -35
- transformers/models/exaone4/modular_exaone4.py +40 -42
- transformers/models/falcon/configuration_falcon.py +22 -25
- transformers/models/falcon/modeling_falcon.py +73 -76
- transformers/models/falcon_h1/configuration_falcon_h1.py +40 -43
- transformers/models/falcon_h1/modeling_falcon_h1.py +52 -55
- transformers/models/falcon_h1/modular_falcon_h1.py +47 -48
- transformers/models/falcon_mamba/configuration_falcon_mamba.py +0 -1
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +46 -47
- transformers/models/falcon_mamba/modular_falcon_mamba.py +10 -13
- transformers/models/fast_vlm/configuration_fast_vlm.py +1 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +36 -36
- transformers/models/fast_vlm/modular_fast_vlm.py +2 -3
- transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +2 -5
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +45 -47
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -3
- transformers/models/flaubert/configuration_flaubert.py +0 -1
- transformers/models/flaubert/modeling_flaubert.py +124 -128
- transformers/models/flaubert/tokenization_flaubert.py +3 -5
- transformers/models/flava/configuration_flava.py +5 -6
- transformers/models/flava/image_processing_flava.py +66 -67
- transformers/models/flava/image_processing_flava_fast.py +42 -43
- transformers/models/flava/modeling_flava.py +108 -107
- transformers/models/flava/processing_flava.py +2 -12
- transformers/models/flex_olmo/__init__.py +0 -1
- transformers/models/flex_olmo/configuration_flex_olmo.py +23 -25
- transformers/models/flex_olmo/modeling_flex_olmo.py +37 -39
- transformers/models/flex_olmo/modular_flex_olmo.py +35 -37
- transformers/models/florence2/configuration_florence2.py +0 -1
- transformers/models/florence2/modeling_florence2.py +39 -40
- transformers/models/florence2/modular_florence2.py +52 -81
- transformers/models/florence2/processing_florence2.py +18 -47
- transformers/models/fnet/configuration_fnet.py +0 -1
- transformers/models/fnet/modeling_fnet.py +69 -80
- transformers/models/fnet/tokenization_fnet.py +0 -1
- transformers/models/focalnet/configuration_focalnet.py +0 -1
- transformers/models/focalnet/modeling_focalnet.py +39 -41
- transformers/models/fsmt/configuration_fsmt.py +0 -1
- transformers/models/fsmt/modeling_fsmt.py +47 -48
- transformers/models/fsmt/tokenization_fsmt.py +3 -5
- transformers/models/funnel/configuration_funnel.py +0 -1
- transformers/models/funnel/modeling_funnel.py +91 -93
- transformers/models/funnel/tokenization_funnel.py +2 -5
- transformers/models/fuyu/configuration_fuyu.py +23 -26
- transformers/models/fuyu/image_processing_fuyu.py +29 -31
- transformers/models/fuyu/image_processing_fuyu_fast.py +12 -13
- transformers/models/fuyu/modeling_fuyu.py +26 -29
- transformers/models/fuyu/processing_fuyu.py +9 -36
- transformers/models/gemma/configuration_gemma.py +20 -23
- transformers/models/gemma/modeling_gemma.py +32 -34
- transformers/models/gemma/modular_gemma.py +28 -29
- transformers/models/gemma/tokenization_gemma.py +3 -6
- transformers/models/gemma2/configuration_gemma2.py +25 -28
- transformers/models/gemma2/modeling_gemma2.py +34 -37
- transformers/models/gemma2/modular_gemma2.py +55 -57
- transformers/models/gemma3/configuration_gemma3.py +28 -29
- transformers/models/gemma3/image_processing_gemma3.py +29 -31
- transformers/models/gemma3/image_processing_gemma3_fast.py +9 -10
- transformers/models/gemma3/modeling_gemma3.py +86 -89
- transformers/models/gemma3/modular_gemma3.py +85 -86
- transformers/models/gemma3/processing_gemma3.py +5 -5
- transformers/models/gemma3n/configuration_gemma3n.py +9 -10
- transformers/models/gemma3n/feature_extraction_gemma3n.py +9 -11
- transformers/models/gemma3n/modeling_gemma3n.py +80 -89
- transformers/models/gemma3n/modular_gemma3n.py +66 -75
- transformers/models/gemma3n/processing_gemma3n.py +12 -26
- transformers/models/git/configuration_git.py +0 -1
- transformers/models/git/modeling_git.py +84 -86
- transformers/models/git/processing_git.py +2 -14
- transformers/models/glm/configuration_glm.py +19 -21
- transformers/models/glm/modeling_glm.py +32 -35
- transformers/models/glm/modular_glm.py +4 -7
- transformers/models/glm4/configuration_glm4.py +19 -21
- transformers/models/glm4/modeling_glm4.py +35 -37
- transformers/models/glm4/modular_glm4.py +8 -10
- transformers/models/glm46v/configuration_glm46v.py +0 -1
- transformers/models/glm46v/image_processing_glm46v.py +35 -36
- transformers/models/glm46v/image_processing_glm46v_fast.py +7 -7
- transformers/models/glm46v/modeling_glm46v.py +51 -51
- transformers/models/glm46v/modular_glm46v.py +1 -3
- transformers/models/glm46v/processing_glm46v.py +7 -41
- transformers/models/glm46v/video_processing_glm46v.py +9 -11
- transformers/models/glm4_moe/configuration_glm4_moe.py +25 -28
- transformers/models/glm4_moe/modeling_glm4_moe.py +32 -35
- transformers/models/glm4_moe/modular_glm4_moe.py +26 -29
- transformers/models/glm4_moe_lite/__init__.py +28 -0
- transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +235 -0
- transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +740 -0
- transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +304 -0
- transformers/models/glm4v/configuration_glm4v.py +14 -17
- transformers/models/glm4v/image_processing_glm4v.py +34 -36
- transformers/models/glm4v/image_processing_glm4v_fast.py +6 -7
- transformers/models/glm4v/modeling_glm4v.py +133 -151
- transformers/models/glm4v/modular_glm4v.py +131 -182
- transformers/models/glm4v/processing_glm4v.py +7 -41
- transformers/models/glm4v/video_processing_glm4v.py +9 -11
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +119 -122
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +237 -297
- transformers/models/glm4v_moe/modular_glm4v_moe.py +54 -163
- transformers/models/glm_image/__init__.py +31 -0
- transformers/models/glm_image/configuration_glm_image.py +352 -0
- transformers/models/glm_image/image_processing_glm_image.py +503 -0
- transformers/models/glm_image/image_processing_glm_image_fast.py +296 -0
- transformers/models/glm_image/modeling_glm_image.py +1590 -0
- transformers/models/glm_image/modular_glm_image.py +1480 -0
- transformers/models/glm_image/processing_glm_image.py +217 -0
- transformers/models/glmasr/__init__.py +0 -1
- transformers/models/glmasr/configuration_glmasr.py +0 -1
- transformers/models/glmasr/modeling_glmasr.py +17 -18
- transformers/models/glmasr/modular_glmasr.py +16 -18
- transformers/models/glmasr/processing_glmasr.py +7 -8
- transformers/models/glpn/configuration_glpn.py +0 -1
- transformers/models/glpn/image_processing_glpn.py +11 -12
- transformers/models/glpn/image_processing_glpn_fast.py +8 -9
- transformers/models/glpn/modeling_glpn.py +10 -12
- transformers/models/got_ocr2/configuration_got_ocr2.py +5 -8
- transformers/models/got_ocr2/image_processing_got_ocr2.py +22 -24
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +6 -7
- transformers/models/got_ocr2/modeling_got_ocr2.py +40 -42
- transformers/models/got_ocr2/modular_got_ocr2.py +31 -34
- transformers/models/got_ocr2/processing_got_ocr2.py +42 -63
- transformers/models/gpt2/configuration_gpt2.py +0 -1
- transformers/models/gpt2/modeling_gpt2.py +106 -108
- transformers/models/gpt2/tokenization_gpt2.py +6 -9
- transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +0 -1
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +73 -80
- transformers/models/gpt_neo/configuration_gpt_neo.py +0 -1
- transformers/models/gpt_neo/modeling_gpt_neo.py +63 -64
- transformers/models/gpt_neox/configuration_gpt_neox.py +19 -22
- transformers/models/gpt_neox/modeling_gpt_neox.py +70 -72
- transformers/models/gpt_neox/modular_gpt_neox.py +64 -66
- transformers/models/gpt_neox/tokenization_gpt_neox.py +2 -5
- transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +15 -18
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +41 -44
- transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +1 -3
- transformers/models/gpt_oss/configuration_gpt_oss.py +21 -24
- transformers/models/gpt_oss/modeling_gpt_oss.py +34 -35
- transformers/models/gpt_oss/modular_gpt_oss.py +17 -19
- transformers/models/gpt_sw3/tokenization_gpt_sw3.py +4 -4
- transformers/models/gptj/configuration_gptj.py +0 -1
- transformers/models/gptj/modeling_gptj.py +82 -81
- transformers/models/granite/configuration_granite.py +23 -26
- transformers/models/granite/modeling_granite.py +39 -41
- transformers/models/granite/modular_granite.py +29 -31
- transformers/models/granite_speech/configuration_granite_speech.py +0 -1
- transformers/models/granite_speech/feature_extraction_granite_speech.py +1 -3
- transformers/models/granite_speech/modeling_granite_speech.py +21 -23
- transformers/models/granite_speech/processing_granite_speech.py +11 -4
- transformers/models/granitemoe/configuration_granitemoe.py +26 -29
- transformers/models/granitemoe/modeling_granitemoe.py +35 -37
- transformers/models/granitemoe/modular_granitemoe.py +21 -23
- transformers/models/granitemoehybrid/__init__.py +0 -1
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +38 -41
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +60 -64
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +18 -20
- transformers/models/granitemoeshared/configuration_granitemoeshared.py +27 -30
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +48 -52
- transformers/models/granitemoeshared/modular_granitemoeshared.py +19 -21
- transformers/models/grounding_dino/configuration_grounding_dino.py +0 -1
- transformers/models/grounding_dino/image_processing_grounding_dino.py +60 -62
- transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +17 -18
- transformers/models/grounding_dino/modeling_grounding_dino.py +94 -96
- transformers/models/grounding_dino/modular_grounding_dino.py +2 -3
- transformers/models/grounding_dino/processing_grounding_dino.py +10 -38
- transformers/models/groupvit/configuration_groupvit.py +0 -1
- transformers/models/groupvit/modeling_groupvit.py +69 -70
- transformers/models/helium/configuration_helium.py +20 -22
- transformers/models/helium/modeling_helium.py +33 -36
- transformers/models/helium/modular_helium.py +3 -7
- transformers/models/herbert/tokenization_herbert.py +4 -6
- transformers/models/hgnet_v2/configuration_hgnet_v2.py +0 -1
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -9
- transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -9
- transformers/models/hiera/configuration_hiera.py +0 -1
- transformers/models/hiera/modeling_hiera.py +60 -62
- transformers/models/hubert/configuration_hubert.py +0 -1
- transformers/models/hubert/modeling_hubert.py +35 -37
- transformers/models/hubert/modular_hubert.py +8 -11
- transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +21 -24
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +30 -33
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +3 -5
- transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +25 -28
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +32 -35
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +5 -7
- transformers/models/ibert/configuration_ibert.py +0 -1
- transformers/models/ibert/modeling_ibert.py +60 -62
- transformers/models/ibert/quant_modules.py +0 -1
- transformers/models/idefics/configuration_idefics.py +0 -1
- transformers/models/idefics/image_processing_idefics.py +13 -15
- transformers/models/idefics/modeling_idefics.py +60 -61
- transformers/models/idefics/perceiver.py +1 -3
- transformers/models/idefics/processing_idefics.py +32 -48
- transformers/models/idefics/vision.py +22 -24
- transformers/models/idefics2/configuration_idefics2.py +0 -1
- transformers/models/idefics2/image_processing_idefics2.py +31 -32
- transformers/models/idefics2/image_processing_idefics2_fast.py +7 -8
- transformers/models/idefics2/modeling_idefics2.py +56 -58
- transformers/models/idefics2/processing_idefics2.py +10 -68
- transformers/models/idefics3/configuration_idefics3.py +0 -1
- transformers/models/idefics3/image_processing_idefics3.py +42 -43
- transformers/models/idefics3/image_processing_idefics3_fast.py +11 -12
- transformers/models/idefics3/modeling_idefics3.py +52 -54
- transformers/models/idefics3/processing_idefics3.py +15 -69
- transformers/models/ijepa/configuration_ijepa.py +0 -1
- transformers/models/ijepa/modeling_ijepa.py +10 -11
- transformers/models/ijepa/modular_ijepa.py +5 -7
- transformers/models/imagegpt/configuration_imagegpt.py +0 -1
- transformers/models/imagegpt/image_processing_imagegpt.py +17 -18
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +8 -9
- transformers/models/imagegpt/modeling_imagegpt.py +57 -58
- transformers/models/informer/configuration_informer.py +6 -9
- transformers/models/informer/modeling_informer.py +84 -86
- transformers/models/informer/modular_informer.py +13 -16
- transformers/models/instructblip/configuration_instructblip.py +0 -1
- transformers/models/instructblip/modeling_instructblip.py +43 -44
- transformers/models/instructblip/processing_instructblip.py +10 -36
- transformers/models/instructblipvideo/configuration_instructblipvideo.py +0 -1
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +55 -55
- transformers/models/instructblipvideo/modular_instructblipvideo.py +34 -36
- transformers/models/instructblipvideo/processing_instructblipvideo.py +14 -33
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +4 -5
- transformers/models/internvl/configuration_internvl.py +0 -1
- transformers/models/internvl/modeling_internvl.py +41 -43
- transformers/models/internvl/modular_internvl.py +19 -21
- transformers/models/internvl/processing_internvl.py +12 -45
- transformers/models/internvl/video_processing_internvl.py +8 -9
- transformers/models/jais2/configuration_jais2.py +20 -22
- transformers/models/jais2/modeling_jais2.py +32 -34
- transformers/models/jais2/modular_jais2.py +20 -22
- transformers/models/jamba/configuration_jamba.py +0 -1
- transformers/models/jamba/modeling_jamba.py +43 -46
- transformers/models/jamba/modular_jamba.py +37 -38
- transformers/models/janus/configuration_janus.py +0 -1
- transformers/models/janus/image_processing_janus.py +35 -37
- transformers/models/janus/image_processing_janus_fast.py +12 -13
- transformers/models/janus/modeling_janus.py +41 -43
- transformers/models/janus/modular_janus.py +60 -63
- transformers/models/janus/processing_janus.py +17 -43
- transformers/models/jetmoe/configuration_jetmoe.py +20 -23
- transformers/models/jetmoe/modeling_jetmoe.py +39 -42
- transformers/models/jetmoe/modular_jetmoe.py +30 -33
- transformers/models/kosmos2/configuration_kosmos2.py +0 -1
- transformers/models/kosmos2/modeling_kosmos2.py +145 -146
- transformers/models/kosmos2/processing_kosmos2.py +40 -55
- transformers/models/kosmos2_5/__init__.py +0 -1
- transformers/models/kosmos2_5/configuration_kosmos2_5.py +0 -1
- transformers/models/kosmos2_5/image_processing_kosmos2_5.py +10 -12
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -11
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +108 -109
- transformers/models/kosmos2_5/processing_kosmos2_5.py +8 -29
- transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +23 -25
- transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py +12 -14
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +59 -66
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +19 -21
- transformers/models/kyutai_speech_to_text/processing_kyutai_speech_to_text.py +2 -8
- transformers/models/lasr/configuration_lasr.py +1 -3
- transformers/models/lasr/feature_extraction_lasr.py +10 -12
- transformers/models/lasr/modeling_lasr.py +18 -21
- transformers/models/lasr/modular_lasr.py +8 -10
- transformers/models/lasr/processing_lasr.py +12 -6
- transformers/models/lasr/tokenization_lasr.py +2 -4
- transformers/models/layoutlm/configuration_layoutlm.py +0 -1
- transformers/models/layoutlm/modeling_layoutlm.py +67 -69
- transformers/models/layoutlmv2/configuration_layoutlmv2.py +0 -1
- transformers/models/layoutlmv2/image_processing_layoutlmv2.py +18 -21
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +5 -6
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +48 -50
- transformers/models/layoutlmv2/processing_layoutlmv2.py +14 -44
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +63 -74
- transformers/models/layoutlmv3/configuration_layoutlmv3.py +0 -1
- transformers/models/layoutlmv3/image_processing_layoutlmv3.py +24 -26
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +7 -8
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +49 -51
- transformers/models/layoutlmv3/processing_layoutlmv3.py +14 -46
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +64 -75
- transformers/models/layoutxlm/configuration_layoutxlm.py +0 -1
- transformers/models/layoutxlm/modular_layoutxlm.py +0 -1
- transformers/models/layoutxlm/processing_layoutxlm.py +14 -44
- transformers/models/layoutxlm/tokenization_layoutxlm.py +65 -76
- transformers/models/led/configuration_led.py +1 -4
- transformers/models/led/modeling_led.py +113 -267
- transformers/models/levit/configuration_levit.py +0 -1
- transformers/models/levit/image_processing_levit.py +19 -21
- transformers/models/levit/image_processing_levit_fast.py +0 -1
- transformers/models/levit/modeling_levit.py +17 -19
- transformers/models/lfm2/configuration_lfm2.py +22 -23
- transformers/models/lfm2/modeling_lfm2.py +42 -44
- transformers/models/lfm2/modular_lfm2.py +29 -29
- transformers/models/lfm2_moe/__init__.py +0 -1
- transformers/models/lfm2_moe/configuration_lfm2_moe.py +1 -2
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +44 -45
- transformers/models/lfm2_moe/modular_lfm2_moe.py +8 -9
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +0 -1
- transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +34 -5
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +31 -33
- transformers/models/lfm2_vl/modular_lfm2_vl.py +24 -27
- transformers/models/lfm2_vl/processing_lfm2_vl.py +14 -34
- transformers/models/lightglue/image_processing_lightglue.py +16 -15
- transformers/models/lightglue/image_processing_lightglue_fast.py +4 -4
- transformers/models/lightglue/modeling_lightglue.py +28 -30
- transformers/models/lightglue/modular_lightglue.py +28 -28
- transformers/models/lighton_ocr/__init__.py +28 -0
- transformers/models/lighton_ocr/configuration_lighton_ocr.py +128 -0
- transformers/models/lighton_ocr/modeling_lighton_ocr.py +460 -0
- transformers/models/lighton_ocr/modular_lighton_ocr.py +403 -0
- transformers/models/lighton_ocr/processing_lighton_ocr.py +229 -0
- transformers/models/lilt/configuration_lilt.py +0 -1
- transformers/models/lilt/modeling_lilt.py +53 -55
- transformers/models/llama/configuration_llama.py +21 -24
- transformers/models/llama/modeling_llama.py +31 -34
- transformers/models/llama/tokenization_llama.py +2 -4
- transformers/models/llama4/configuration_llama4.py +20 -22
- transformers/models/llama4/image_processing_llama4_fast.py +8 -9
- transformers/models/llama4/modeling_llama4.py +70 -71
- transformers/models/llama4/processing_llama4.py +33 -57
- transformers/models/llava/configuration_llava.py +0 -1
- transformers/models/llava/image_processing_llava.py +25 -28
- transformers/models/llava/image_processing_llava_fast.py +6 -7
- transformers/models/llava/modeling_llava.py +35 -37
- transformers/models/llava/processing_llava.py +18 -51
- transformers/models/llava_next/configuration_llava_next.py +0 -1
- transformers/models/llava_next/image_processing_llava_next.py +43 -45
- transformers/models/llava_next/image_processing_llava_next_fast.py +5 -6
- transformers/models/llava_next/modeling_llava_next.py +42 -44
- transformers/models/llava_next/processing_llava_next.py +18 -47
- transformers/models/llava_next_video/configuration_llava_next_video.py +0 -1
- transformers/models/llava_next_video/modeling_llava_next_video.py +53 -55
- transformers/models/llava_next_video/modular_llava_next_video.py +44 -46
- transformers/models/llava_next_video/processing_llava_next_video.py +21 -63
- transformers/models/llava_next_video/video_processing_llava_next_video.py +0 -1
- transformers/models/llava_onevision/configuration_llava_onevision.py +0 -1
- transformers/models/llava_onevision/image_processing_llava_onevision.py +40 -42
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +6 -7
- transformers/models/llava_onevision/modeling_llava_onevision.py +60 -62
- transformers/models/llava_onevision/modular_llava_onevision.py +51 -52
- transformers/models/llava_onevision/processing_llava_onevision.py +21 -53
- transformers/models/llava_onevision/video_processing_llava_onevision.py +0 -1
- transformers/models/longcat_flash/__init__.py +0 -1
- transformers/models/longcat_flash/configuration_longcat_flash.py +32 -35
- transformers/models/longcat_flash/modeling_longcat_flash.py +30 -31
- transformers/models/longcat_flash/modular_longcat_flash.py +17 -19
- transformers/models/longformer/configuration_longformer.py +1 -4
- transformers/models/longformer/modeling_longformer.py +99 -101
- transformers/models/longt5/configuration_longt5.py +0 -1
- transformers/models/longt5/modeling_longt5.py +43 -44
- transformers/models/luke/configuration_luke.py +0 -1
- transformers/models/luke/modeling_luke.py +179 -181
- transformers/models/luke/tokenization_luke.py +99 -105
- transformers/models/lw_detr/__init__.py +27 -0
- transformers/models/lw_detr/configuration_lw_detr.py +374 -0
- transformers/models/lw_detr/modeling_lw_detr.py +1698 -0
- transformers/models/lw_detr/modular_lw_detr.py +1611 -0
- transformers/models/lxmert/configuration_lxmert.py +0 -1
- transformers/models/lxmert/modeling_lxmert.py +63 -74
- transformers/models/m2m_100/configuration_m2m_100.py +0 -1
- transformers/models/m2m_100/modeling_m2m_100.py +69 -71
- transformers/models/m2m_100/tokenization_m2m_100.py +8 -8
- transformers/models/mamba/configuration_mamba.py +0 -1
- transformers/models/mamba/modeling_mamba.py +43 -44
- transformers/models/mamba2/configuration_mamba2.py +0 -1
- transformers/models/mamba2/modeling_mamba2.py +44 -46
- transformers/models/marian/configuration_marian.py +0 -1
- transformers/models/marian/modeling_marian.py +84 -86
- transformers/models/marian/tokenization_marian.py +6 -6
- transformers/models/markuplm/configuration_markuplm.py +0 -1
- transformers/models/markuplm/feature_extraction_markuplm.py +1 -2
- transformers/models/markuplm/modeling_markuplm.py +60 -62
- transformers/models/markuplm/processing_markuplm.py +31 -38
- transformers/models/markuplm/tokenization_markuplm.py +67 -77
- transformers/models/mask2former/configuration_mask2former.py +4 -7
- transformers/models/mask2former/image_processing_mask2former.py +84 -85
- transformers/models/mask2former/image_processing_mask2former_fast.py +29 -29
- transformers/models/mask2former/modeling_mask2former.py +90 -92
- transformers/models/mask2former/modular_mask2former.py +6 -8
- transformers/models/maskformer/configuration_maskformer.py +5 -8
- transformers/models/maskformer/configuration_maskformer_swin.py +0 -1
- transformers/models/maskformer/image_processing_maskformer.py +84 -85
- transformers/models/maskformer/image_processing_maskformer_fast.py +28 -29
- transformers/models/maskformer/modeling_maskformer.py +56 -58
- transformers/models/maskformer/modeling_maskformer_swin.py +18 -20
- transformers/models/mbart/configuration_mbart.py +0 -1
- transformers/models/mbart/modeling_mbart.py +111 -113
- transformers/models/mbart/tokenization_mbart.py +2 -4
- transformers/models/mbart50/tokenization_mbart50.py +3 -5
- transformers/models/megatron_bert/configuration_megatron_bert.py +0 -1
- transformers/models/megatron_bert/modeling_megatron_bert.py +139 -150
- transformers/models/metaclip_2/modeling_metaclip_2.py +46 -46
- transformers/models/metaclip_2/modular_metaclip_2.py +19 -21
- transformers/models/mgp_str/configuration_mgp_str.py +0 -1
- transformers/models/mgp_str/modeling_mgp_str.py +14 -16
- transformers/models/mgp_str/processing_mgp_str.py +3 -20
- transformers/models/mgp_str/tokenization_mgp_str.py +1 -3
- transformers/models/mimi/configuration_mimi.py +38 -40
- transformers/models/mimi/modeling_mimi.py +76 -79
- transformers/models/minimax/__init__.py +0 -1
- transformers/models/minimax/configuration_minimax.py +32 -36
- transformers/models/minimax/modeling_minimax.py +41 -44
- transformers/models/minimax/modular_minimax.py +50 -53
- transformers/models/minimax_m2/__init__.py +28 -0
- transformers/models/minimax_m2/configuration_minimax_m2.py +211 -0
- transformers/models/minimax_m2/modeling_minimax_m2.py +704 -0
- transformers/models/minimax_m2/modular_minimax_m2.py +369 -0
- transformers/models/ministral/configuration_ministral.py +20 -22
- transformers/models/ministral/modeling_ministral.py +31 -33
- transformers/models/ministral/modular_ministral.py +27 -29
- transformers/models/ministral3/configuration_ministral3.py +19 -22
- transformers/models/ministral3/modeling_ministral3.py +31 -33
- transformers/models/ministral3/modular_ministral3.py +4 -5
- transformers/models/mistral/configuration_mistral.py +19 -22
- transformers/models/mistral/modeling_mistral.py +31 -33
- transformers/models/mistral/modular_mistral.py +11 -12
- transformers/models/mistral3/configuration_mistral3.py +0 -1
- transformers/models/mistral3/modeling_mistral3.py +43 -42
- transformers/models/mistral3/modular_mistral3.py +35 -35
- transformers/models/mixtral/configuration_mixtral.py +24 -27
- transformers/models/mixtral/modeling_mixtral.py +35 -38
- transformers/models/mixtral/modular_mixtral.py +26 -29
- transformers/models/mlcd/configuration_mlcd.py +0 -1
- transformers/models/mlcd/modeling_mlcd.py +10 -12
- transformers/models/mlcd/modular_mlcd.py +9 -11
- transformers/models/mllama/configuration_mllama.py +5 -8
- transformers/models/mllama/image_processing_mllama.py +23 -25
- transformers/models/mllama/image_processing_mllama_fast.py +5 -6
- transformers/models/mllama/modeling_mllama.py +81 -84
- transformers/models/mllama/processing_mllama.py +6 -55
- transformers/models/mluke/tokenization_mluke.py +97 -103
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +0 -1
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +94 -96
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +0 -1
- transformers/models/mobilebert/configuration_mobilebert.py +0 -1
- transformers/models/mobilebert/modeling_mobilebert.py +75 -85
- transformers/models/mobilebert/tokenization_mobilebert.py +0 -1
- transformers/models/mobilenet_v1/configuration_mobilenet_v1.py +0 -1
- transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +20 -23
- transformers/models/mobilenet_v1/image_processing_mobilenet_v1_fast.py +0 -1
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +13 -16
- transformers/models/mobilenet_v2/configuration_mobilenet_v2.py +0 -1
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +48 -51
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +10 -11
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +17 -20
- transformers/models/mobilevit/configuration_mobilevit.py +0 -1
- transformers/models/mobilevit/image_processing_mobilevit.py +41 -44
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +8 -9
- transformers/models/mobilevit/modeling_mobilevit.py +17 -19
- transformers/models/mobilevitv2/configuration_mobilevitv2.py +0 -1
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +17 -20
- transformers/models/modernbert/configuration_modernbert.py +34 -34
- transformers/models/modernbert/modeling_modernbert.py +123 -125
- transformers/models/modernbert/modular_modernbert.py +155 -155
- transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +30 -32
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +45 -47
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +69 -70
- transformers/models/moonshine/configuration_moonshine.py +22 -24
- transformers/models/moonshine/modeling_moonshine.py +63 -65
- transformers/models/moonshine/modular_moonshine.py +72 -73
- transformers/models/moshi/configuration_moshi.py +18 -21
- transformers/models/moshi/modeling_moshi.py +130 -133
- transformers/models/mpnet/configuration_mpnet.py +0 -1
- transformers/models/mpnet/modeling_mpnet.py +55 -57
- transformers/models/mpnet/tokenization_mpnet.py +1 -4
- transformers/models/mpt/configuration_mpt.py +1 -9
- transformers/models/mpt/modeling_mpt.py +58 -60
- transformers/models/mra/configuration_mra.py +0 -1
- transformers/models/mra/modeling_mra.py +54 -56
- transformers/models/mt5/configuration_mt5.py +0 -1
- transformers/models/mt5/modeling_mt5.py +75 -77
- transformers/models/musicgen/configuration_musicgen.py +0 -1
- transformers/models/musicgen/modeling_musicgen.py +108 -111
- transformers/models/musicgen/processing_musicgen.py +3 -21
- transformers/models/musicgen_melody/configuration_musicgen_melody.py +0 -1
- transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py +8 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +106 -109
- transformers/models/musicgen_melody/processing_musicgen_melody.py +3 -22
- transformers/models/mvp/configuration_mvp.py +0 -1
- transformers/models/mvp/modeling_mvp.py +115 -119
- transformers/models/myt5/tokenization_myt5.py +8 -10
- transformers/models/nanochat/configuration_nanochat.py +0 -1
- transformers/models/nanochat/modeling_nanochat.py +32 -35
- transformers/models/nanochat/modular_nanochat.py +12 -14
- transformers/models/nemotron/configuration_nemotron.py +20 -23
- transformers/models/nemotron/modeling_nemotron.py +49 -52
- transformers/models/nllb/tokenization_nllb.py +7 -9
- transformers/models/nllb_moe/configuration_nllb_moe.py +0 -1
- transformers/models/nllb_moe/modeling_nllb_moe.py +67 -69
- transformers/models/nougat/image_processing_nougat.py +29 -32
- transformers/models/nougat/image_processing_nougat_fast.py +4 -5
- transformers/models/nougat/processing_nougat.py +37 -39
- transformers/models/nougat/tokenization_nougat.py +5 -7
- transformers/models/nystromformer/configuration_nystromformer.py +0 -1
- transformers/models/nystromformer/modeling_nystromformer.py +61 -63
- transformers/models/olmo/configuration_olmo.py +18 -21
- transformers/models/olmo/modeling_olmo.py +31 -34
- transformers/models/olmo/modular_olmo.py +5 -9
- transformers/models/olmo2/configuration_olmo2.py +18 -21
- transformers/models/olmo2/modeling_olmo2.py +32 -35
- transformers/models/olmo2/modular_olmo2.py +29 -31
- transformers/models/olmo3/__init__.py +0 -1
- transformers/models/olmo3/configuration_olmo3.py +20 -23
- transformers/models/olmo3/modeling_olmo3.py +31 -34
- transformers/models/olmo3/modular_olmo3.py +31 -33
- transformers/models/olmoe/configuration_olmoe.py +24 -26
- transformers/models/olmoe/modeling_olmoe.py +37 -39
- transformers/models/olmoe/modular_olmoe.py +12 -13
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +0 -1
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +38 -40
- transformers/models/omdet_turbo/processing_omdet_turbo.py +19 -67
- transformers/models/oneformer/configuration_oneformer.py +4 -7
- transformers/models/oneformer/image_processing_oneformer.py +83 -84
- transformers/models/oneformer/image_processing_oneformer_fast.py +33 -34
- transformers/models/oneformer/modeling_oneformer.py +123 -124
- transformers/models/oneformer/processing_oneformer.py +28 -43
- transformers/models/openai/configuration_openai.py +0 -1
- transformers/models/openai/modeling_openai.py +50 -51
- transformers/models/openai/tokenization_openai.py +2 -5
- transformers/models/opt/configuration_opt.py +0 -1
- transformers/models/opt/modeling_opt.py +74 -75
- transformers/models/ovis2/__init__.py +0 -1
- transformers/models/ovis2/configuration_ovis2.py +0 -1
- transformers/models/ovis2/image_processing_ovis2.py +22 -24
- transformers/models/ovis2/image_processing_ovis2_fast.py +6 -7
- transformers/models/ovis2/modeling_ovis2.py +43 -45
- transformers/models/ovis2/modular_ovis2.py +30 -32
- transformers/models/ovis2/processing_ovis2.py +12 -40
- transformers/models/owlv2/configuration_owlv2.py +0 -1
- transformers/models/owlv2/image_processing_owlv2.py +20 -21
- transformers/models/owlv2/image_processing_owlv2_fast.py +7 -8
- transformers/models/owlv2/modeling_owlv2.py +82 -87
- transformers/models/owlv2/modular_owlv2.py +6 -7
- transformers/models/owlv2/processing_owlv2.py +20 -49
- transformers/models/owlvit/configuration_owlvit.py +0 -1
- transformers/models/owlvit/image_processing_owlvit.py +21 -22
- transformers/models/owlvit/image_processing_owlvit_fast.py +2 -3
- transformers/models/owlvit/modeling_owlvit.py +81 -86
- transformers/models/owlvit/processing_owlvit.py +20 -48
- transformers/models/paddleocr_vl/__init__.py +0 -1
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +19 -19
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +34 -35
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +12 -12
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +76 -76
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +68 -68
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +1 -3
- transformers/models/paligemma/configuration_paligemma.py +0 -1
- transformers/models/paligemma/modeling_paligemma.py +51 -53
- transformers/models/paligemma/processing_paligemma.py +13 -66
- transformers/models/parakeet/configuration_parakeet.py +1 -4
- transformers/models/parakeet/feature_extraction_parakeet.py +10 -12
- transformers/models/parakeet/modeling_parakeet.py +18 -22
- transformers/models/parakeet/modular_parakeet.py +16 -18
- transformers/models/parakeet/processing_parakeet.py +12 -5
- transformers/models/parakeet/tokenization_parakeet.py +2 -4
- transformers/models/patchtsmixer/configuration_patchtsmixer.py +5 -8
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +60 -62
- transformers/models/patchtst/configuration_patchtst.py +6 -9
- transformers/models/patchtst/modeling_patchtst.py +72 -74
- transformers/models/pe_audio/__init__.py +0 -1
- transformers/models/pe_audio/configuration_pe_audio.py +14 -16
- transformers/models/pe_audio/feature_extraction_pe_audio.py +6 -8
- transformers/models/pe_audio/modeling_pe_audio.py +26 -27
- transformers/models/pe_audio/modular_pe_audio.py +16 -17
- transformers/models/pe_audio/processing_pe_audio.py +0 -1
- transformers/models/pe_audio_video/__init__.py +0 -1
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +15 -17
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +60 -61
- transformers/models/pe_audio_video/modular_pe_audio_video.py +52 -53
- transformers/models/pe_audio_video/processing_pe_audio_video.py +0 -1
- transformers/models/pe_video/__init__.py +0 -1
- transformers/models/pe_video/configuration_pe_video.py +14 -16
- transformers/models/pe_video/modeling_pe_video.py +21 -22
- transformers/models/pe_video/modular_pe_video.py +11 -12
- transformers/models/pe_video/video_processing_pe_video.py +2 -4
- transformers/models/pegasus/configuration_pegasus.py +0 -1
- transformers/models/pegasus/modeling_pegasus.py +63 -65
- transformers/models/pegasus/tokenization_pegasus.py +1 -4
- transformers/models/pegasus_x/configuration_pegasus_x.py +0 -1
- transformers/models/pegasus_x/modeling_pegasus_x.py +50 -52
- transformers/models/perceiver/configuration_perceiver.py +0 -1
- transformers/models/perceiver/image_processing_perceiver.py +22 -25
- transformers/models/perceiver/image_processing_perceiver_fast.py +5 -6
- transformers/models/perceiver/modeling_perceiver.py +135 -136
- transformers/models/perceiver/tokenization_perceiver.py +3 -6
- transformers/models/perception_lm/configuration_perception_lm.py +0 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +8 -9
- transformers/models/perception_lm/modeling_perception_lm.py +38 -40
- transformers/models/perception_lm/modular_perception_lm.py +31 -33
- transformers/models/perception_lm/processing_perception_lm.py +13 -47
- transformers/models/perception_lm/video_processing_perception_lm.py +0 -1
- transformers/models/persimmon/configuration_persimmon.py +18 -21
- transformers/models/persimmon/modeling_persimmon.py +39 -42
- transformers/models/phi/configuration_phi.py +19 -22
- transformers/models/phi/modeling_phi.py +35 -37
- transformers/models/phi/modular_phi.py +23 -23
- transformers/models/phi3/configuration_phi3.py +23 -26
- transformers/models/phi3/modeling_phi3.py +33 -36
- transformers/models/phi3/modular_phi3.py +13 -17
- transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +25 -26
- transformers/models/phi4_multimodal/feature_extraction_phi4_multimodal.py +7 -9
- transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +7 -7
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +54 -56
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +59 -60
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +7 -42
- transformers/models/phimoe/configuration_phimoe.py +26 -29
- transformers/models/phimoe/modeling_phimoe.py +35 -38
- transformers/models/phimoe/modular_phimoe.py +0 -1
- transformers/models/phobert/tokenization_phobert.py +4 -6
- transformers/models/pix2struct/configuration_pix2struct.py +0 -1
- transformers/models/pix2struct/image_processing_pix2struct.py +15 -19
- transformers/models/pix2struct/image_processing_pix2struct_fast.py +7 -10
- transformers/models/pix2struct/modeling_pix2struct.py +42 -45
- transformers/models/pix2struct/processing_pix2struct.py +5 -26
- transformers/models/pixio/__init__.py +0 -1
- transformers/models/pixio/configuration_pixio.py +0 -1
- transformers/models/pixio/modeling_pixio.py +7 -9
- transformers/models/pixio/modular_pixio.py +3 -6
- transformers/models/pixtral/configuration_pixtral.py +11 -14
- transformers/models/pixtral/image_processing_pixtral.py +26 -28
- transformers/models/pixtral/image_processing_pixtral_fast.py +5 -6
- transformers/models/pixtral/modeling_pixtral.py +22 -25
- transformers/models/pixtral/processing_pixtral.py +18 -52
- transformers/models/plbart/configuration_plbart.py +0 -1
- transformers/models/plbart/modeling_plbart.py +100 -102
- transformers/models/plbart/modular_plbart.py +30 -32
- transformers/models/plbart/tokenization_plbart.py +4 -5
- transformers/models/poolformer/configuration_poolformer.py +0 -1
- transformers/models/poolformer/image_processing_poolformer.py +21 -24
- transformers/models/poolformer/image_processing_poolformer_fast.py +6 -7
- transformers/models/poolformer/modeling_poolformer.py +10 -12
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/pop2piano/feature_extraction_pop2piano.py +6 -9
- transformers/models/pop2piano/modeling_pop2piano.py +22 -23
- transformers/models/pop2piano/processing_pop2piano.py +25 -33
- transformers/models/pop2piano/tokenization_pop2piano.py +15 -23
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py +28 -28
- transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +14 -15
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +9 -10
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +9 -10
- transformers/models/prophetnet/configuration_prophetnet.py +26 -28
- transformers/models/prophetnet/modeling_prophetnet.py +109 -130
- transformers/models/prophetnet/tokenization_prophetnet.py +14 -16
- transformers/models/pvt/configuration_pvt.py +0 -1
- transformers/models/pvt/image_processing_pvt.py +17 -20
- transformers/models/pvt/image_processing_pvt_fast.py +0 -1
- transformers/models/pvt/modeling_pvt.py +19 -21
- transformers/models/pvt_v2/configuration_pvt_v2.py +2 -4
- transformers/models/pvt_v2/modeling_pvt_v2.py +21 -23
- transformers/models/qwen2/configuration_qwen2.py +18 -21
- transformers/models/qwen2/modeling_qwen2.py +31 -33
- transformers/models/qwen2/modular_qwen2.py +11 -12
- transformers/models/qwen2/tokenization_qwen2.py +2 -5
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +20 -23
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +135 -128
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +116 -109
- transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py +41 -49
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +22 -25
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +94 -96
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +46 -85
- transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py +7 -43
- transformers/models/qwen2_audio/configuration_qwen2_audio.py +0 -1
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +27 -29
- transformers/models/qwen2_audio/processing_qwen2_audio.py +13 -42
- transformers/models/qwen2_moe/configuration_qwen2_moe.py +28 -31
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +36 -39
- transformers/models/qwen2_moe/modular_qwen2_moe.py +7 -10
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +22 -24
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +38 -40
- transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +8 -9
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +91 -92
- transformers/models/qwen2_vl/processing_qwen2_vl.py +7 -44
- transformers/models/qwen2_vl/video_processing_qwen2_vl.py +35 -13
- transformers/models/qwen3/configuration_qwen3.py +20 -23
- transformers/models/qwen3/modeling_qwen3.py +31 -34
- transformers/models/qwen3/modular_qwen3.py +4 -6
- transformers/models/qwen3_moe/configuration_qwen3_moe.py +25 -28
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +36 -39
- transformers/models/qwen3_moe/modular_qwen3_moe.py +10 -13
- transformers/models/qwen3_next/configuration_qwen3_next.py +31 -34
- transformers/models/qwen3_next/modeling_qwen3_next.py +39 -42
- transformers/models/qwen3_next/modular_qwen3_next.py +33 -34
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +85 -88
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +107 -110
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +122 -148
- transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py +40 -48
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +16 -19
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +74 -77
- transformers/models/qwen3_vl/modular_qwen3_vl.py +68 -105
- transformers/models/qwen3_vl/processing_qwen3_vl.py +6 -42
- transformers/models/qwen3_vl/video_processing_qwen3_vl.py +10 -12
- transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +21 -25
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +80 -83
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +33 -36
- transformers/models/rag/configuration_rag.py +0 -1
- transformers/models/rag/modeling_rag.py +116 -118
- transformers/models/rag/retrieval_rag.py +2 -4
- transformers/models/rag/tokenization_rag.py +0 -50
- transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +21 -24
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +31 -34
- transformers/models/reformer/configuration_reformer.py +0 -1
- transformers/models/reformer/modeling_reformer.py +67 -68
- transformers/models/reformer/tokenization_reformer.py +3 -6
- transformers/models/regnet/configuration_regnet.py +0 -1
- transformers/models/regnet/modeling_regnet.py +7 -9
- transformers/models/rembert/configuration_rembert.py +0 -1
- transformers/models/rembert/modeling_rembert.py +108 -110
- transformers/models/rembert/tokenization_rembert.py +1 -4
- transformers/models/resnet/configuration_resnet.py +0 -1
- transformers/models/resnet/modeling_resnet.py +8 -10
- transformers/models/roberta/configuration_roberta.py +0 -1
- transformers/models/roberta/modeling_roberta.py +91 -93
- transformers/models/roberta/modular_roberta.py +55 -58
- transformers/models/roberta/tokenization_roberta.py +2 -5
- transformers/models/roberta/tokenization_roberta_old.py +2 -4
- transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +0 -1
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +91 -93
- transformers/models/roc_bert/configuration_roc_bert.py +0 -1
- transformers/models/roc_bert/modeling_roc_bert.py +119 -121
- transformers/models/roc_bert/tokenization_roc_bert.py +88 -94
- transformers/models/roformer/configuration_roformer.py +0 -1
- transformers/models/roformer/modeling_roformer.py +79 -81
- transformers/models/roformer/tokenization_roformer.py +3 -6
- transformers/models/roformer/tokenization_utils.py +0 -1
- transformers/models/rt_detr/configuration_rt_detr.py +0 -1
- transformers/models/rt_detr/configuration_rt_detr_resnet.py +0 -1
- transformers/models/rt_detr/image_processing_rt_detr.py +54 -55
- transformers/models/rt_detr/image_processing_rt_detr_fast.py +15 -15
- transformers/models/rt_detr/modeling_rt_detr.py +80 -82
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +2 -4
- transformers/models/rt_detr/modular_rt_detr.py +14 -14
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +0 -1
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +79 -81
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +2 -4
- transformers/models/rwkv/configuration_rwkv.py +0 -1
- transformers/models/rwkv/modeling_rwkv.py +29 -31
- transformers/models/sam/configuration_sam.py +0 -1
- transformers/models/sam/image_processing_sam.py +59 -60
- transformers/models/sam/image_processing_sam_fast.py +21 -22
- transformers/models/sam/modeling_sam.py +33 -35
- transformers/models/sam/processing_sam.py +39 -27
- transformers/models/sam2/configuration_sam2.py +0 -1
- transformers/models/sam2/image_processing_sam2_fast.py +14 -15
- transformers/models/sam2/modeling_sam2.py +45 -47
- transformers/models/sam2/modular_sam2.py +43 -44
- transformers/models/sam2/processing_sam2.py +31 -47
- transformers/models/sam2_video/configuration_sam2_video.py +0 -1
- transformers/models/sam2_video/modeling_sam2_video.py +69 -70
- transformers/models/sam2_video/modular_sam2_video.py +60 -79
- transformers/models/sam2_video/processing_sam2_video.py +49 -66
- transformers/models/sam2_video/video_processing_sam2_video.py +1 -4
- transformers/models/sam3/configuration_sam3.py +0 -1
- transformers/models/sam3/image_processing_sam3_fast.py +17 -20
- transformers/models/sam3/modeling_sam3.py +54 -56
- transformers/models/sam3/modular_sam3.py +3 -8
- transformers/models/sam3/processing_sam3.py +29 -48
- transformers/models/sam3_tracker/__init__.py +0 -1
- transformers/models/sam3_tracker/configuration_sam3_tracker.py +0 -1
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +34 -36
- transformers/models/sam3_tracker/modular_sam3_tracker.py +0 -1
- transformers/models/sam3_tracker/processing_sam3_tracker.py +31 -47
- transformers/models/sam3_tracker_video/__init__.py +0 -1
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +0 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +70 -70
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +2 -4
- transformers/models/sam3_tracker_video/processing_sam3_tracker_video.py +50 -66
- transformers/models/sam3_video/configuration_sam3_video.py +0 -1
- transformers/models/sam3_video/modeling_sam3_video.py +29 -31
- transformers/models/sam3_video/processing_sam3_video.py +25 -45
- transformers/models/sam_hq/__init__.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +0 -1
- transformers/models/sam_hq/modeling_sam_hq.py +39 -41
- transformers/models/sam_hq/modular_sam_hq.py +17 -19
- transformers/models/sam_hq/{processing_samhq.py → processing_sam_hq.py} +39 -28
- transformers/models/seamless_m4t/configuration_seamless_m4t.py +0 -1
- transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py +8 -11
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +180 -182
- transformers/models/seamless_m4t/processing_seamless_m4t.py +18 -39
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +15 -20
- transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +0 -1
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +193 -195
- transformers/models/seed_oss/configuration_seed_oss.py +23 -25
- transformers/models/seed_oss/modeling_seed_oss.py +30 -32
- transformers/models/seed_oss/modular_seed_oss.py +3 -4
- transformers/models/segformer/configuration_segformer.py +0 -10
- transformers/models/segformer/image_processing_segformer.py +39 -42
- transformers/models/segformer/image_processing_segformer_fast.py +7 -8
- transformers/models/segformer/modeling_segformer.py +24 -26
- transformers/models/segformer/modular_segformer.py +5 -6
- transformers/models/seggpt/configuration_seggpt.py +0 -1
- transformers/models/seggpt/image_processing_seggpt.py +38 -41
- transformers/models/seggpt/modeling_seggpt.py +28 -30
- transformers/models/sew/configuration_sew.py +0 -1
- transformers/models/sew/modeling_sew.py +33 -35
- transformers/models/sew/modular_sew.py +10 -12
- transformers/models/sew_d/configuration_sew_d.py +0 -1
- transformers/models/sew_d/modeling_sew_d.py +28 -30
- transformers/models/shieldgemma2/configuration_shieldgemma2.py +0 -1
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +15 -17
- transformers/models/shieldgemma2/processing_shieldgemma2.py +3 -5
- transformers/models/siglip/configuration_siglip.py +0 -1
- transformers/models/siglip/image_processing_siglip.py +17 -20
- transformers/models/siglip/image_processing_siglip_fast.py +0 -1
- transformers/models/siglip/modeling_siglip.py +38 -39
- transformers/models/siglip/processing_siglip.py +2 -14
- transformers/models/siglip/tokenization_siglip.py +6 -7
- transformers/models/siglip2/configuration_siglip2.py +1 -1
- transformers/models/siglip2/image_processing_siglip2.py +15 -16
- transformers/models/siglip2/image_processing_siglip2_fast.py +4 -5
- transformers/models/siglip2/modeling_siglip2.py +54 -54
- transformers/models/siglip2/modular_siglip2.py +23 -25
- transformers/models/siglip2/processing_siglip2.py +2 -14
- transformers/models/smollm3/configuration_smollm3.py +23 -26
- transformers/models/smollm3/modeling_smollm3.py +31 -34
- transformers/models/smollm3/modular_smollm3.py +27 -29
- transformers/models/smolvlm/configuration_smolvlm.py +1 -1
- transformers/models/smolvlm/image_processing_smolvlm.py +42 -43
- transformers/models/smolvlm/image_processing_smolvlm_fast.py +12 -12
- transformers/models/smolvlm/modeling_smolvlm.py +51 -52
- transformers/models/smolvlm/modular_smolvlm.py +15 -17
- transformers/models/smolvlm/processing_smolvlm.py +15 -76
- transformers/models/smolvlm/video_processing_smolvlm.py +7 -8
- transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py +0 -1
- transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +20 -23
- transformers/models/speech_to_text/configuration_speech_to_text.py +0 -1
- transformers/models/speech_to_text/feature_extraction_speech_to_text.py +10 -13
- transformers/models/speech_to_text/modeling_speech_to_text.py +52 -54
- transformers/models/speech_to_text/processing_speech_to_text.py +4 -30
- transformers/models/speech_to_text/tokenization_speech_to_text.py +5 -6
- transformers/models/speecht5/configuration_speecht5.py +0 -1
- transformers/models/speecht5/feature_extraction_speecht5.py +16 -37
- transformers/models/speecht5/modeling_speecht5.py +172 -174
- transformers/models/speecht5/number_normalizer.py +0 -1
- transformers/models/speecht5/processing_speecht5.py +3 -37
- transformers/models/speecht5/tokenization_speecht5.py +4 -5
- transformers/models/splinter/configuration_splinter.py +0 -1
- transformers/models/splinter/modeling_splinter.py +54 -56
- transformers/models/splinter/tokenization_splinter.py +2 -4
- transformers/models/squeezebert/configuration_squeezebert.py +0 -1
- transformers/models/squeezebert/modeling_squeezebert.py +60 -62
- transformers/models/squeezebert/tokenization_squeezebert.py +0 -1
- transformers/models/stablelm/configuration_stablelm.py +20 -23
- transformers/models/stablelm/modeling_stablelm.py +39 -42
- transformers/models/starcoder2/configuration_starcoder2.py +19 -22
- transformers/models/starcoder2/modeling_starcoder2.py +33 -36
- transformers/models/starcoder2/modular_starcoder2.py +13 -15
- transformers/models/superglue/configuration_superglue.py +3 -3
- transformers/models/superglue/image_processing_superglue.py +15 -15
- transformers/models/superglue/image_processing_superglue_fast.py +4 -5
- transformers/models/superglue/modeling_superglue.py +32 -33
- transformers/models/superpoint/image_processing_superpoint.py +15 -15
- transformers/models/superpoint/image_processing_superpoint_fast.py +4 -5
- transformers/models/superpoint/modeling_superpoint.py +13 -14
- transformers/models/swiftformer/configuration_swiftformer.py +0 -1
- transformers/models/swiftformer/modeling_swiftformer.py +12 -14
- transformers/models/swin/configuration_swin.py +0 -1
- transformers/models/swin/modeling_swin.py +58 -70
- transformers/models/swin2sr/configuration_swin2sr.py +0 -1
- transformers/models/swin2sr/image_processing_swin2sr.py +10 -13
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +2 -5
- transformers/models/swin2sr/modeling_swin2sr.py +26 -28
- transformers/models/swinv2/configuration_swinv2.py +0 -1
- transformers/models/swinv2/modeling_swinv2.py +55 -67
- transformers/models/switch_transformers/configuration_switch_transformers.py +0 -1
- transformers/models/switch_transformers/modeling_switch_transformers.py +32 -33
- transformers/models/switch_transformers/modular_switch_transformers.py +29 -30
- transformers/models/t5/configuration_t5.py +0 -1
- transformers/models/t5/modeling_t5.py +75 -77
- transformers/models/t5/tokenization_t5.py +1 -3
- transformers/models/t5gemma/configuration_t5gemma.py +33 -34
- transformers/models/t5gemma/modeling_t5gemma.py +96 -99
- transformers/models/t5gemma/modular_t5gemma.py +117 -118
- transformers/models/t5gemma2/configuration_t5gemma2.py +53 -54
- transformers/models/t5gemma2/modeling_t5gemma2.py +96 -99
- transformers/models/t5gemma2/modular_t5gemma2.py +134 -135
- transformers/models/table_transformer/configuration_table_transformer.py +0 -1
- transformers/models/table_transformer/modeling_table_transformer.py +46 -48
- transformers/models/tapas/configuration_tapas.py +0 -1
- transformers/models/tapas/modeling_tapas.py +64 -66
- transformers/models/tapas/tokenization_tapas.py +115 -153
- transformers/models/textnet/configuration_textnet.py +0 -1
- transformers/models/textnet/image_processing_textnet.py +22 -25
- transformers/models/textnet/image_processing_textnet_fast.py +5 -6
- transformers/models/textnet/modeling_textnet.py +13 -14
- transformers/models/time_series_transformer/configuration_time_series_transformer.py +5 -8
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +79 -81
- transformers/models/timesfm/configuration_timesfm.py +0 -1
- transformers/models/timesfm/modeling_timesfm.py +17 -19
- transformers/models/timesfm/modular_timesfm.py +16 -18
- transformers/models/timesformer/configuration_timesformer.py +0 -1
- transformers/models/timesformer/modeling_timesformer.py +13 -16
- transformers/models/timm_backbone/configuration_timm_backbone.py +0 -1
- transformers/models/timm_backbone/modeling_timm_backbone.py +4 -6
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +2 -3
- transformers/models/timm_wrapper/image_processing_timm_wrapper.py +4 -5
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +13 -15
- transformers/models/trocr/configuration_trocr.py +0 -1
- transformers/models/trocr/modeling_trocr.py +38 -40
- transformers/models/trocr/processing_trocr.py +5 -25
- transformers/models/tvp/configuration_tvp.py +0 -1
- transformers/models/tvp/image_processing_tvp.py +50 -52
- transformers/models/tvp/image_processing_tvp_fast.py +9 -10
- transformers/models/tvp/modeling_tvp.py +25 -27
- transformers/models/tvp/processing_tvp.py +2 -14
- transformers/models/udop/configuration_udop.py +0 -1
- transformers/models/udop/modeling_udop.py +63 -66
- transformers/models/udop/processing_udop.py +7 -26
- transformers/models/udop/tokenization_udop.py +80 -93
- transformers/models/umt5/configuration_umt5.py +0 -1
- transformers/models/umt5/modeling_umt5.py +80 -81
- transformers/models/unispeech/configuration_unispeech.py +0 -1
- transformers/models/unispeech/modeling_unispeech.py +47 -49
- transformers/models/unispeech/modular_unispeech.py +20 -22
- transformers/models/unispeech_sat/configuration_unispeech_sat.py +0 -1
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +63 -65
- transformers/models/unispeech_sat/modular_unispeech_sat.py +21 -23
- transformers/models/univnet/feature_extraction_univnet.py +14 -14
- transformers/models/univnet/modeling_univnet.py +7 -8
- transformers/models/upernet/configuration_upernet.py +0 -1
- transformers/models/upernet/modeling_upernet.py +10 -13
- transformers/models/vaultgemma/__init__.py +0 -1
- transformers/models/vaultgemma/configuration_vaultgemma.py +24 -26
- transformers/models/vaultgemma/modeling_vaultgemma.py +34 -36
- transformers/models/vaultgemma/modular_vaultgemma.py +29 -31
- transformers/models/video_llama_3/image_processing_video_llama_3.py +40 -40
- transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +8 -8
- transformers/models/video_llama_3/modeling_video_llama_3.py +66 -66
- transformers/models/video_llama_3/modular_video_llama_3.py +101 -112
- transformers/models/video_llama_3/processing_video_llama_3.py +5 -39
- transformers/models/video_llama_3/video_processing_video_llama_3.py +18 -18
- transformers/models/video_llava/configuration_video_llava.py +0 -1
- transformers/models/video_llava/image_processing_video_llava.py +35 -38
- transformers/models/video_llava/modeling_video_llava.py +52 -54
- transformers/models/video_llava/processing_video_llava.py +38 -78
- transformers/models/video_llava/video_processing_video_llava.py +0 -1
- transformers/models/videomae/configuration_videomae.py +0 -1
- transformers/models/videomae/image_processing_videomae.py +31 -34
- transformers/models/videomae/modeling_videomae.py +13 -15
- transformers/models/videomae/video_processing_videomae.py +0 -1
- transformers/models/vilt/configuration_vilt.py +0 -1
- transformers/models/vilt/image_processing_vilt.py +29 -30
- transformers/models/vilt/image_processing_vilt_fast.py +9 -10
- transformers/models/vilt/modeling_vilt.py +76 -78
- transformers/models/vilt/processing_vilt.py +2 -14
- transformers/models/vipllava/configuration_vipllava.py +0 -1
- transformers/models/vipllava/modeling_vipllava.py +38 -39
- transformers/models/vipllava/modular_vipllava.py +30 -32
- transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +0 -1
- transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +18 -21
- transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py +0 -1
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +18 -21
- transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py +2 -16
- transformers/models/visual_bert/configuration_visual_bert.py +0 -1
- transformers/models/visual_bert/modeling_visual_bert.py +90 -92
- transformers/models/vit/configuration_vit.py +0 -1
- transformers/models/vit/image_processing_vit.py +19 -22
- transformers/models/vit/image_processing_vit_fast.py +0 -1
- transformers/models/vit/modeling_vit.py +13 -15
- transformers/models/vit_mae/configuration_vit_mae.py +0 -1
- transformers/models/vit_mae/modeling_vit_mae.py +21 -23
- transformers/models/vit_msn/configuration_vit_msn.py +0 -1
- transformers/models/vit_msn/modeling_vit_msn.py +10 -12
- transformers/models/vitdet/configuration_vitdet.py +0 -1
- transformers/models/vitdet/modeling_vitdet.py +12 -14
- transformers/models/vitmatte/configuration_vitmatte.py +1 -4
- transformers/models/vitmatte/image_processing_vitmatte.py +15 -18
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +14 -15
- transformers/models/vitmatte/modeling_vitmatte.py +9 -11
- transformers/models/vitpose/configuration_vitpose.py +3 -6
- transformers/models/vitpose/image_processing_vitpose.py +24 -25
- transformers/models/vitpose/image_processing_vitpose_fast.py +9 -10
- transformers/models/vitpose/modeling_vitpose.py +10 -12
- transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +0 -1
- transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +8 -10
- transformers/models/vits/configuration_vits.py +0 -1
- transformers/models/vits/modeling_vits.py +34 -35
- transformers/models/vits/tokenization_vits.py +3 -4
- transformers/models/vivit/configuration_vivit.py +0 -1
- transformers/models/vivit/image_processing_vivit.py +36 -39
- transformers/models/vivit/modeling_vivit.py +5 -7
- transformers/models/vjepa2/__init__.py +0 -1
- transformers/models/vjepa2/configuration_vjepa2.py +0 -1
- transformers/models/vjepa2/modeling_vjepa2.py +30 -32
- transformers/models/vjepa2/video_processing_vjepa2.py +0 -1
- transformers/models/voxtral/__init__.py +0 -1
- transformers/models/voxtral/configuration_voxtral.py +0 -1
- transformers/models/voxtral/modeling_voxtral.py +17 -25
- transformers/models/voxtral/modular_voxtral.py +10 -19
- transformers/models/voxtral/processing_voxtral.py +25 -48
- transformers/models/wav2vec2/configuration_wav2vec2.py +0 -1
- transformers/models/wav2vec2/feature_extraction_wav2vec2.py +7 -10
- transformers/models/wav2vec2/modeling_wav2vec2.py +67 -122
- transformers/models/wav2vec2/processing_wav2vec2.py +6 -35
- transformers/models/wav2vec2/tokenization_wav2vec2.py +20 -332
- transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +0 -1
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +49 -52
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +45 -48
- transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +6 -35
- transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +0 -1
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +62 -65
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +15 -18
- transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +16 -17
- transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +36 -55
- transformers/models/wavlm/configuration_wavlm.py +0 -1
- transformers/models/wavlm/modeling_wavlm.py +45 -48
- transformers/models/wavlm/modular_wavlm.py +4 -5
- transformers/models/whisper/configuration_whisper.py +0 -1
- transformers/models/whisper/english_normalizer.py +3 -4
- transformers/models/whisper/feature_extraction_whisper.py +9 -24
- transformers/models/whisper/generation_whisper.py +26 -48
- transformers/models/whisper/modeling_whisper.py +68 -70
- transformers/models/whisper/processing_whisper.py +3 -20
- transformers/models/whisper/tokenization_whisper.py +9 -30
- transformers/models/x_clip/configuration_x_clip.py +0 -1
- transformers/models/x_clip/modeling_x_clip.py +68 -69
- transformers/models/x_clip/processing_x_clip.py +2 -14
- transformers/models/xcodec/configuration_xcodec.py +4 -6
- transformers/models/xcodec/modeling_xcodec.py +15 -17
- transformers/models/xglm/configuration_xglm.py +0 -1
- transformers/models/xglm/modeling_xglm.py +49 -55
- transformers/models/xglm/tokenization_xglm.py +1 -4
- transformers/models/xlm/configuration_xlm.py +0 -1
- transformers/models/xlm/modeling_xlm.py +126 -130
- transformers/models/xlm/tokenization_xlm.py +3 -5
- transformers/models/xlm_roberta/configuration_xlm_roberta.py +0 -1
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +90 -92
- transformers/models/xlm_roberta/modular_xlm_roberta.py +50 -53
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +1 -4
- transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +0 -1
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +91 -93
- transformers/models/xlm_roberta_xl/modular_xlm_roberta_xl.py +67 -70
- transformers/models/xlnet/configuration_xlnet.py +0 -11
- transformers/models/xlnet/modeling_xlnet.py +149 -162
- transformers/models/xlnet/tokenization_xlnet.py +1 -4
- transformers/models/xlstm/configuration_xlstm.py +3 -5
- transformers/models/xlstm/modeling_xlstm.py +62 -65
- transformers/models/xmod/configuration_xmod.py +0 -1
- transformers/models/xmod/modeling_xmod.py +98 -100
- transformers/models/yolos/configuration_yolos.py +0 -1
- transformers/models/yolos/image_processing_yolos.py +60 -62
- transformers/models/yolos/image_processing_yolos_fast.py +18 -18
- transformers/models/yolos/modeling_yolos.py +12 -14
- transformers/models/yolos/modular_yolos.py +2 -4
- transformers/models/yoso/configuration_yoso.py +0 -1
- transformers/models/yoso/modeling_yoso.py +60 -62
- transformers/models/zamba/configuration_zamba.py +0 -1
- transformers/models/zamba/modeling_zamba.py +68 -69
- transformers/models/zamba2/configuration_zamba2.py +36 -37
- transformers/models/zamba2/modeling_zamba2.py +84 -87
- transformers/models/zamba2/modular_zamba2.py +43 -45
- transformers/models/zoedepth/configuration_zoedepth.py +0 -1
- transformers/models/zoedepth/image_processing_zoedepth.py +28 -29
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +11 -12
- transformers/models/zoedepth/modeling_zoedepth.py +14 -16
- transformers/pipelines/__init__.py +50 -49
- transformers/pipelines/any_to_any.py +14 -22
- transformers/pipelines/audio_utils.py +1 -2
- transformers/pipelines/base.py +12 -16
- transformers/pipelines/deprecated/__init__.py +0 -1
- transformers/pipelines/image_text_to_text.py +0 -1
- transformers/pipelines/image_to_text.py +4 -44
- transformers/pipelines/question_answering.py +4 -43
- transformers/pipelines/text_classification.py +1 -14
- transformers/pipelines/token_classification.py +1 -22
- transformers/pipelines/video_classification.py +1 -9
- transformers/pipelines/zero_shot_audio_classification.py +0 -1
- transformers/pipelines/zero_shot_classification.py +0 -6
- transformers/pipelines/zero_shot_image_classification.py +0 -7
- transformers/processing_utils.py +95 -95
- transformers/quantizers/base.py +10 -0
- transformers/quantizers/quantizer_quark.py +0 -1
- transformers/quantizers/quantizer_torchao.py +3 -3
- transformers/testing_utils.py +3 -37
- transformers/tokenization_mistral_common.py +554 -903
- transformers/tokenization_utils_base.py +109 -122
- transformers/tokenization_utils_sentencepiece.py +5 -6
- transformers/tokenization_utils_tokenizers.py +5 -5
- transformers/trainer.py +6 -9
- transformers/trainer_jit_checkpoint.py +1 -2
- transformers/training_args.py +3 -3
- transformers/utils/attention_visualizer.py +1 -1
- transformers/utils/auto_docstring.py +564 -12
- transformers/utils/doc.py +1 -1
- transformers/utils/dummy_pt_objects.py +0 -42
- transformers/utils/generic.py +1 -1
- transformers/utils/loading_report.py +3 -3
- transformers/utils/quantization_config.py +8 -10
- transformers/video_processing_utils.py +19 -20
- transformers/video_utils.py +18 -22
- {transformers-5.0.0rc2.dist-info → transformers-5.0.0rc3.dist-info}/METADATA +19 -19
- transformers-5.0.0rc3.dist-info/RECORD +2067 -0
- transformers-5.0.0rc2.dist-info/RECORD +0 -2042
- {transformers-5.0.0rc2.dist-info → transformers-5.0.0rc3.dist-info}/WHEEL +0 -0
- {transformers-5.0.0rc2.dist-info → transformers-5.0.0rc3.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc2.dist-info → transformers-5.0.0rc3.dist-info}/licenses/LICENSE +0 -0
- {transformers-5.0.0rc2.dist-info → transformers-5.0.0rc3.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# coding=utf-8
|
|
2
1
|
# Copyright 2022 The HuggingFace Inc. team.
|
|
3
2
|
#
|
|
4
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -101,6 +100,10 @@ class GenerationConfig(PushToHubMixin):
|
|
|
101
100
|
|
|
102
101
|
</Tip>
|
|
103
102
|
|
|
103
|
+
Note: the configuration field that are still `None` will be overriden by `GenerationConfig._get_default_generation_params()`
|
|
104
|
+
during the generation loop. If you want to use different values for these fields, make sure to explicitly set them in the
|
|
105
|
+
generation config.
|
|
106
|
+
|
|
104
107
|
Arg:
|
|
105
108
|
> Parameters that control the length of the output
|
|
106
109
|
|
|
@@ -129,14 +132,14 @@ class GenerationConfig(PushToHubMixin):
|
|
|
129
132
|
|
|
130
133
|
> Parameters that control the generation strategy used
|
|
131
134
|
|
|
132
|
-
do_sample (`bool
|
|
135
|
+
do_sample (`bool`):
|
|
133
136
|
Whether or not to use sampling ; use greedy decoding otherwise.
|
|
134
137
|
num_beams (`int`, *optional*):
|
|
135
138
|
Number of beams for beam search. 1 means no beam search.
|
|
136
139
|
|
|
137
140
|
> Parameters that control the cache
|
|
138
141
|
|
|
139
|
-
use_cache (`bool
|
|
142
|
+
use_cache (`bool`):
|
|
140
143
|
Whether or not the model should use the past last key/values attentions (if applicable to the model) to
|
|
141
144
|
speed up decoding.
|
|
142
145
|
cache_implementation (`str`, *optional*):
|
|
@@ -206,7 +209,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
206
209
|
bad_words_ids (`list[list[int]]`, *optional*):
|
|
207
210
|
List of list of token ids that are not allowed to be generated. Check
|
|
208
211
|
[`~generation.NoBadWordsLogitsProcessor`] for further documentation and examples.
|
|
209
|
-
renormalize_logits (`bool
|
|
212
|
+
renormalize_logits (`bool`):
|
|
210
213
|
Whether to renormalize the logits after applying all the logits processors (including the custom
|
|
211
214
|
ones). It's highly recommended to set this flag to `True` as the search algorithms suppose the score logits
|
|
212
215
|
are normalized but some logit processors break the normalization.
|
|
@@ -217,7 +220,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
217
220
|
forced_eos_token_id (`int` or list[int]`, *optional*, defaults to `model.config.forced_eos_token_id`):
|
|
218
221
|
The id of the token to force as the last generated token when `max_length` is reached. Optionally, use a
|
|
219
222
|
list to set multiple *end-of-sequence* tokens.
|
|
220
|
-
remove_invalid_values (`bool
|
|
223
|
+
remove_invalid_values (`bool`):
|
|
221
224
|
Whether to remove possible *nan* and *inf* outputs of the model to prevent the generation method to crash.
|
|
222
225
|
Note that using `remove_invalid_values` can slow down generation.
|
|
223
226
|
exponential_decay_length_penalty (`tuple(int, float)`, *optional*):
|
|
@@ -234,7 +237,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
234
237
|
Dictionary that maps a sequence of tokens to its bias term. Positive biases increase the odds of the
|
|
235
238
|
sequence being selected, while negative biases do the opposite. Check
|
|
236
239
|
[`~generation.SequenceBiasLogitsProcessor`] for further documentation and examples.
|
|
237
|
-
token_healing (`bool
|
|
240
|
+
token_healing (`bool`):
|
|
238
241
|
Heal tail tokens of prompts by replacing them with their appropriate extensions.
|
|
239
242
|
This enhances the quality of completions for prompts affected by greedy tokenization bias.
|
|
240
243
|
guidance_scale (`float`, *optional*):
|
|
@@ -248,20 +251,20 @@ class GenerationConfig(PushToHubMixin):
|
|
|
248
251
|
|
|
249
252
|
> Parameters that define the output variables of generate
|
|
250
253
|
|
|
251
|
-
num_return_sequences (`int`, *optional
|
|
254
|
+
num_return_sequences (`int`, *optional*):
|
|
252
255
|
The number of independently computed returned sequences for each element in the batch.
|
|
253
|
-
output_attentions (`bool
|
|
256
|
+
output_attentions (`bool`):
|
|
254
257
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
|
|
255
258
|
tensors for more details.
|
|
256
|
-
output_hidden_states (`bool
|
|
259
|
+
output_hidden_states (`bool`):
|
|
257
260
|
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
|
258
261
|
more details.
|
|
259
|
-
output_scores (`bool
|
|
262
|
+
output_scores (`bool`):
|
|
260
263
|
Whether or not to return the prediction scores. See `scores` under returned tensors for more details.
|
|
261
|
-
output_logits (`bool
|
|
264
|
+
output_logits (`bool`):
|
|
262
265
|
Whether or not to return the unprocessed prediction logit scores. See `logits` under returned tensors for
|
|
263
266
|
more details.
|
|
264
|
-
return_dict_in_generate (`bool
|
|
267
|
+
return_dict_in_generate (`bool`):
|
|
265
268
|
Whether or not to return a [`~utils.ModelOutput`], as opposed to returning exclusively the generated
|
|
266
269
|
sequence. This flag must be set to `True` to return the generation cache (when `use_cache` is `True`)
|
|
267
270
|
or optional outputs (see flags starting with `output_`)
|
|
@@ -286,7 +289,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
286
289
|
(e.g. multilingual models with different target languages in one batch)
|
|
287
290
|
|
|
288
291
|
> Generation parameters exclusive to assistant generation
|
|
289
|
-
is_assistant (`bool
|
|
292
|
+
is_assistant (`bool`):
|
|
290
293
|
Whether the model is an assistant (draft) model.
|
|
291
294
|
num_assistant_tokens (`int`, *optional*):
|
|
292
295
|
Defines the number of _speculative tokens_ that shall be generated by the assistant model before being
|
|
@@ -327,7 +330,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
327
330
|
compile_config (CompileConfig, *optional*):
|
|
328
331
|
If using a compilable cache, this controls how `generate` will `compile` the forward pass for faster
|
|
329
332
|
inference.
|
|
330
|
-
disable_compile (`bool
|
|
333
|
+
disable_compile (`bool`):
|
|
331
334
|
Whether to disable the automatic compilation of the forward pass. Automatic compilation happens when
|
|
332
335
|
specific criteria are met, including using a compilable cache. Please open an issue if you find the
|
|
333
336
|
need to use this flag.
|
|
@@ -346,11 +349,11 @@ class GenerationConfig(PushToHubMixin):
|
|
|
346
349
|
self.stop_strings = kwargs.pop("stop_strings", None)
|
|
347
350
|
|
|
348
351
|
# Parameters that control the generation strategy used
|
|
349
|
-
self.do_sample = kwargs.pop("do_sample",
|
|
352
|
+
self.do_sample = kwargs.pop("do_sample", None)
|
|
350
353
|
self.num_beams = kwargs.pop("num_beams", None)
|
|
351
354
|
|
|
352
355
|
# Parameters that control the cache
|
|
353
|
-
self.use_cache = kwargs.pop("use_cache",
|
|
356
|
+
self.use_cache = kwargs.pop("use_cache", None)
|
|
354
357
|
self.cache_implementation = kwargs.pop("cache_implementation", None)
|
|
355
358
|
self.cache_config = kwargs.pop("cache_config", None)
|
|
356
359
|
|
|
@@ -368,15 +371,15 @@ class GenerationConfig(PushToHubMixin):
|
|
|
368
371
|
self.length_penalty = kwargs.pop("length_penalty", None)
|
|
369
372
|
self.no_repeat_ngram_size = kwargs.pop("no_repeat_ngram_size", None)
|
|
370
373
|
self.bad_words_ids = kwargs.pop("bad_words_ids", None)
|
|
371
|
-
self.renormalize_logits = kwargs.pop("renormalize_logits",
|
|
374
|
+
self.renormalize_logits = kwargs.pop("renormalize_logits", None)
|
|
372
375
|
self.forced_bos_token_id = kwargs.pop("forced_bos_token_id", None)
|
|
373
376
|
self.forced_eos_token_id = kwargs.pop("forced_eos_token_id", None)
|
|
374
|
-
self.remove_invalid_values = kwargs.pop("remove_invalid_values",
|
|
377
|
+
self.remove_invalid_values = kwargs.pop("remove_invalid_values", None)
|
|
375
378
|
self.exponential_decay_length_penalty = kwargs.pop("exponential_decay_length_penalty", None)
|
|
376
379
|
self.suppress_tokens = kwargs.pop("suppress_tokens", None)
|
|
377
380
|
self.begin_suppress_tokens = kwargs.pop("begin_suppress_tokens", None)
|
|
378
381
|
self.sequence_bias = kwargs.pop("sequence_bias", None)
|
|
379
|
-
self.token_healing = kwargs.pop("token_healing",
|
|
382
|
+
self.token_healing = kwargs.pop("token_healing", None)
|
|
380
383
|
self.guidance_scale = kwargs.pop("guidance_scale", None)
|
|
381
384
|
|
|
382
385
|
self.watermarking_config = kwargs.pop("watermarking_config", None)
|
|
@@ -384,12 +387,12 @@ class GenerationConfig(PushToHubMixin):
|
|
|
384
387
|
self.watermarking_config = WatermarkingConfig.from_dict(self.watermarking_config)
|
|
385
388
|
|
|
386
389
|
# Parameters that define the output variables of `generate`
|
|
387
|
-
self.num_return_sequences = kwargs.pop("num_return_sequences",
|
|
388
|
-
self.output_attentions = kwargs.pop("output_attentions",
|
|
389
|
-
self.output_hidden_states = kwargs.pop("output_hidden_states",
|
|
390
|
-
self.output_scores = kwargs.pop("output_scores",
|
|
391
|
-
self.output_logits = kwargs.pop("output_logits",
|
|
392
|
-
self.return_dict_in_generate = kwargs.pop("return_dict_in_generate",
|
|
390
|
+
self.num_return_sequences = kwargs.pop("num_return_sequences", None)
|
|
391
|
+
self.output_attentions = kwargs.pop("output_attentions", None)
|
|
392
|
+
self.output_hidden_states = kwargs.pop("output_hidden_states", None)
|
|
393
|
+
self.output_scores = kwargs.pop("output_scores", None)
|
|
394
|
+
self.output_logits = kwargs.pop("output_logits", None)
|
|
395
|
+
self.return_dict_in_generate = kwargs.pop("return_dict_in_generate", None)
|
|
393
396
|
|
|
394
397
|
# Special tokens that can be used at generation time
|
|
395
398
|
self.pad_token_id = kwargs.pop("pad_token_id", None)
|
|
@@ -401,7 +404,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
401
404
|
self.decoder_start_token_id = kwargs.pop("decoder_start_token_id", None)
|
|
402
405
|
|
|
403
406
|
# Assistant generation
|
|
404
|
-
self.is_assistant = kwargs.pop("is_assistant",
|
|
407
|
+
self.is_assistant = kwargs.pop("is_assistant", None)
|
|
405
408
|
self.num_assistant_tokens = kwargs.pop("num_assistant_tokens", None)
|
|
406
409
|
self.num_assistant_tokens_schedule = kwargs.pop("num_assistant_tokens_schedule", None)
|
|
407
410
|
self.assistant_confidence_threshold = kwargs.pop("assistant_confidence_threshold", None)
|
|
@@ -413,7 +416,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
413
416
|
|
|
414
417
|
# Performance
|
|
415
418
|
self.compile_config = kwargs.pop("compile_config", None)
|
|
416
|
-
self.disable_compile = kwargs.pop("disable_compile",
|
|
419
|
+
self.disable_compile = kwargs.pop("disable_compile", None)
|
|
417
420
|
|
|
418
421
|
# Deprecated (moved to the Hub). TODO remove for v5
|
|
419
422
|
self.low_memory = kwargs.pop("low_memory", None)
|
|
@@ -483,7 +486,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
483
486
|
if self.constraints is not None or self.force_words_ids is not None:
|
|
484
487
|
generation_mode = GenerationMode.CONSTRAINED_BEAM_SEARCH
|
|
485
488
|
elif self.num_beams is None or self.num_beams == 1:
|
|
486
|
-
if
|
|
489
|
+
if self.do_sample is not True:
|
|
487
490
|
if (
|
|
488
491
|
self.top_k is not None
|
|
489
492
|
and self.top_k > 1
|
|
@@ -498,7 +501,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
498
501
|
else:
|
|
499
502
|
if self.num_beam_groups is not None and self.num_beam_groups > 1:
|
|
500
503
|
generation_mode = GenerationMode.GROUP_BEAM_SEARCH
|
|
501
|
-
elif self.do_sample:
|
|
504
|
+
elif self.do_sample is True:
|
|
502
505
|
generation_mode = GenerationMode.BEAM_SAMPLE
|
|
503
506
|
else:
|
|
504
507
|
generation_mode = GenerationMode.BEAM_SEARCH
|
|
@@ -537,6 +540,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
537
540
|
"max_length": 20,
|
|
538
541
|
"min_length": 0,
|
|
539
542
|
"do_sample": False,
|
|
543
|
+
"use_cache": True,
|
|
540
544
|
"early_stopping": False,
|
|
541
545
|
"num_beams": 1,
|
|
542
546
|
"temperature": 1.0,
|
|
@@ -616,7 +620,10 @@ class GenerationConfig(PushToHubMixin):
|
|
|
616
620
|
|
|
617
621
|
# 2. Validation of attribute combinations
|
|
618
622
|
# 2.1. detect sampling-only parameterization when not in sampling mode
|
|
619
|
-
|
|
623
|
+
|
|
624
|
+
# Note that we check `is not True` in purpose. Boolean fields can also be `None` so we
|
|
625
|
+
# have to be explicit. Value of `None` is same as having `False`, i.e. the default value
|
|
626
|
+
if self.do_sample is not True:
|
|
620
627
|
greedy_wrong_parameter_msg = (
|
|
621
628
|
"`do_sample` is set not to set `True`. However, `{flag_name}` is set to `{flag_value}` -- this flag is only "
|
|
622
629
|
"used in sample-based generation modes. You should set `do_sample=True` or unset `{flag_name}`."
|
|
@@ -662,21 +669,25 @@ class GenerationConfig(PushToHubMixin):
|
|
|
662
669
|
)
|
|
663
670
|
|
|
664
671
|
# 2.4. check `num_return_sequences`
|
|
665
|
-
if self.num_return_sequences > 1:
|
|
672
|
+
if self.num_return_sequences is not None and self.num_return_sequences > 1:
|
|
666
673
|
if self.num_beams is None or self.num_beams == 1:
|
|
667
674
|
if not self.do_sample:
|
|
668
675
|
raise ValueError(
|
|
669
676
|
"Greedy methods (do_sample != True) without beam search do not support "
|
|
670
677
|
f"`num_return_sequences` different than 1 (got {self.num_return_sequences})."
|
|
671
678
|
)
|
|
672
|
-
elif
|
|
679
|
+
elif (
|
|
680
|
+
self.num_beams is not None
|
|
681
|
+
and self.num_return_sequences is not None
|
|
682
|
+
and self.num_return_sequences > self.num_beams
|
|
683
|
+
):
|
|
673
684
|
raise ValueError(
|
|
674
685
|
f"`num_return_sequences` ({self.num_return_sequences}) has to be smaller or equal to `num_beams` "
|
|
675
686
|
f"({self.num_beams})."
|
|
676
687
|
)
|
|
677
688
|
|
|
678
689
|
# 2.5. check cache-related arguments
|
|
679
|
-
if
|
|
690
|
+
if self.use_cache is False:
|
|
680
691
|
# In this case, all cache-related arguments should be unset. However, since `use_cache=False` is often used
|
|
681
692
|
# passed to `generate` directly to hot-fix cache issues, let's raise a warning instead of an error
|
|
682
693
|
# (otherwise a user might need to overwrite several parameters).
|
|
@@ -691,9 +702,9 @@ class GenerationConfig(PushToHubMixin):
|
|
|
691
702
|
)
|
|
692
703
|
|
|
693
704
|
# 2.6. other incorrect combinations
|
|
694
|
-
if
|
|
705
|
+
if self.return_dict_in_generate is not True:
|
|
695
706
|
for extra_output_flag in self.extra_output_flags:
|
|
696
|
-
if getattr(self, extra_output_flag):
|
|
707
|
+
if getattr(self, extra_output_flag) is True:
|
|
697
708
|
minor_issues[extra_output_flag] = (
|
|
698
709
|
f"`return_dict_in_generate` is NOT set to `True`, but `{extra_output_flag}` is. When "
|
|
699
710
|
f"`return_dict_in_generate` is not `True`, `{extra_output_flag}` is ignored."
|
|
@@ -1182,7 +1193,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
1182
1193
|
generation_config._original_object_hash = hash(generation_config)
|
|
1183
1194
|
return generation_config
|
|
1184
1195
|
|
|
1185
|
-
def update(self, defaults_only=False, **kwargs):
|
|
1196
|
+
def update(self, defaults_only=False, allow_custom_entries=False, **kwargs):
|
|
1186
1197
|
"""
|
|
1187
1198
|
Updates attributes of this class instance with attributes from `kwargs` if they match existing attributes,
|
|
1188
1199
|
returning all the unused kwargs.
|
|
@@ -1190,6 +1201,8 @@ class GenerationConfig(PushToHubMixin):
|
|
|
1190
1201
|
Args:
|
|
1191
1202
|
defaults_only (`bool`, *optional*, defaults to `False`):
|
|
1192
1203
|
Whether to update all keys in config with `kwargs` or only those that are set to `None` (i.e. default value).
|
|
1204
|
+
allow_custom_entries (`bool`, *optional*, defaults to `False`):
|
|
1205
|
+
Whether to allow updating custom entries into the config with `kwargs` if not present in the current config.
|
|
1193
1206
|
kwargs (`dict[str, Any]`):
|
|
1194
1207
|
Dictionary of attributes to tentatively update this class.
|
|
1195
1208
|
|
|
@@ -1198,7 +1211,10 @@ class GenerationConfig(PushToHubMixin):
|
|
|
1198
1211
|
"""
|
|
1199
1212
|
to_remove = []
|
|
1200
1213
|
for key, value in kwargs.items():
|
|
1201
|
-
if hasattr(self, key):
|
|
1214
|
+
if allow_custom_entries and not hasattr(self, key):
|
|
1215
|
+
setattr(self, key, value)
|
|
1216
|
+
to_remove.append(key)
|
|
1217
|
+
elif hasattr(self, key):
|
|
1202
1218
|
if not defaults_only or getattr(self, key) is None:
|
|
1203
1219
|
setattr(self, key, value)
|
|
1204
1220
|
to_remove.append(key)
|
|
@@ -1261,8 +1277,7 @@ class BaseWatermarkingConfig(ABC):
|
|
|
1261
1277
|
return output
|
|
1262
1278
|
|
|
1263
1279
|
def __iter__(self):
|
|
1264
|
-
|
|
1265
|
-
yield attr, value
|
|
1280
|
+
yield from copy.deepcopy(self.__dict__).items()
|
|
1266
1281
|
|
|
1267
1282
|
def __repr__(self):
|
|
1268
1283
|
return f"{self.__class__.__name__} {self.to_json_string()}"
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# coding=utf-8
|
|
2
1
|
# Copyright 2025 The HuggingFace Inc. team.
|
|
3
2
|
#
|
|
4
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -223,11 +222,18 @@ class PagedAttentionCache:
|
|
|
223
222
|
# Block management data structures
|
|
224
223
|
self.allow_block_sharing = allow_block_sharing
|
|
225
224
|
self.group_cache_managers: list[CacheAllocator] = []
|
|
225
|
+
self.num_full_attention_groups = 0
|
|
226
|
+
self.num_sliding_attention_groups = 0
|
|
227
|
+
self.max_sliding_window_blocks_per_request = 0
|
|
228
|
+
|
|
226
229
|
for i, group_type in enumerate(group_types):
|
|
227
230
|
if group_type == "full_attention":
|
|
228
231
|
cm = FullAttentionCacheAllocator(i, self.block_size, allow_block_sharing=allow_block_sharing)
|
|
232
|
+
self.num_full_attention_groups += 1
|
|
229
233
|
elif group_type == "sliding_attention":
|
|
230
234
|
cm = SlidingAttentionCacheAllocator(i, self.block_size, config.sliding_window)
|
|
235
|
+
self.num_sliding_attention_groups += 1
|
|
236
|
+
self.max_sliding_window_blocks_per_request = cm._max_blocks_per_request
|
|
231
237
|
else:
|
|
232
238
|
raise ValueError(f"Invalid group type: {group_type}")
|
|
233
239
|
self.group_cache_managers.append(cm)
|
|
@@ -238,16 +244,38 @@ class PagedAttentionCache:
|
|
|
238
244
|
self.blocks_to_complete: dict[str, int] = {}
|
|
239
245
|
self._total_prefix_length: int = 0 # a counter to measure the impact of prefix sharing, also used in tests
|
|
240
246
|
|
|
247
|
+
def will_allocation_be_successful(self, num_requested_blocks: int, allocated_blocks: int) -> bool:
|
|
248
|
+
"""Returns a boolean indicating if the allocation of (num_requested_blocks) blocks will be successful. The
|
|
249
|
+
number of newly allocated blocks needed is predicted by the following rules:
|
|
250
|
+
- for full attention groups: since there is no sliding window for full attention layers, one requested block is
|
|
251
|
+
always equivalent to one newly allocated block for EACH full attention group
|
|
252
|
+
- for sliding window groups: because of the sliding window, the number of blocks allocated to a request is
|
|
253
|
+
capped. Using the number of already (allocated_blocks) we can compute the number of new blocks to actually
|
|
254
|
+
allocate to the request, which can be lower than the number of requested blocks. That number is the same for
|
|
255
|
+
all sliding window groups, as only one sliding window size is supported.
|
|
256
|
+
"""
|
|
257
|
+
# This is not in a branch, because it is very rare to have zero full attention layer
|
|
258
|
+
needed_blocks = num_requested_blocks * self.num_full_attention_groups
|
|
259
|
+
# Only take this branch if the model has sliding window attention layers
|
|
260
|
+
if self.num_sliding_attention_groups:
|
|
261
|
+
blocks_left = max(self.max_sliding_window_blocks_per_request - allocated_blocks, 0)
|
|
262
|
+
needed_blocks += min(blocks_left, num_requested_blocks) * self.num_sliding_attention_groups
|
|
263
|
+
return needed_blocks <= self.get_num_free_blocks()
|
|
264
|
+
|
|
241
265
|
@traced
|
|
242
|
-
def allocate_blocks(self, n_blocks: int,
|
|
266
|
+
def allocate_blocks(self, n_blocks: int, request_id: str, allocated_blocks: int) -> int | None:
|
|
243
267
|
"""Allocate cache blocks across all layer groups for a given request. Actual allocation is done by the cache
|
|
244
268
|
managers, and this method only returns the maximum number of blocks actually allocated across all managers."""
|
|
269
|
+
# First check allocation will be successful before starting, to avoid partial allocations
|
|
270
|
+
if not self.will_allocation_be_successful(n_blocks, allocated_blocks):
|
|
271
|
+
return None
|
|
272
|
+
# Allocate blocks across all cache managers
|
|
245
273
|
max_allocated = 0
|
|
246
274
|
for cm in self.group_cache_managers:
|
|
247
|
-
|
|
248
|
-
if
|
|
249
|
-
|
|
250
|
-
max_allocated = max(max_allocated,
|
|
275
|
+
num_allocated_blocks = cm.allocate_blocks(n_blocks, request_id, self._block_manager)
|
|
276
|
+
if num_allocated_blocks is None:
|
|
277
|
+
raise ValueError(f"Failed to allocate {n_blocks} blocks for request {request_id}")
|
|
278
|
+
max_allocated = max(max_allocated, num_allocated_blocks)
|
|
251
279
|
return max_allocated
|
|
252
280
|
|
|
253
281
|
@traced
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# coding=utf-8
|
|
2
1
|
# Copyright 2025 The HuggingFace Inc. team.
|
|
3
2
|
#
|
|
4
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -207,6 +206,15 @@ class BlockManager:
|
|
|
207
206
|
else:
|
|
208
207
|
self._uninit_block_ids.extend(blocks)
|
|
209
208
|
|
|
209
|
+
def uninitialize_unshared_block(self, block_id: int) -> None:
|
|
210
|
+
"""Marks a block as uninitialized. Raises an error if the block has more than one reference."""
|
|
211
|
+
# Make sure the block has only one reference and remove it from the block table
|
|
212
|
+
block = self._id_to_block.pop(block_id)
|
|
213
|
+
if block.ref_count > 1:
|
|
214
|
+
raise RuntimeError(f"Block {block_id} has more than one reference: {block.ref_count = }")
|
|
215
|
+
# Add the block to the uninitialized blocks queue
|
|
216
|
+
self._uninit_block_ids.append(block_id)
|
|
217
|
+
|
|
210
218
|
def mark_shareable_blocks_as_complete(
|
|
211
219
|
self, num_complete_blocks: int, allocated_blocks: list[int], prompt_ids: list[int]
|
|
212
220
|
) -> None:
|
|
@@ -242,13 +250,17 @@ class BlockManager:
|
|
|
242
250
|
block.hash = self.compute_hash(parent_hash, tokens, block.group_id)
|
|
243
251
|
|
|
244
252
|
existing_block_id = self._hash_to_id.get(block.hash)
|
|
245
|
-
# If
|
|
253
|
+
# If their was a different block with the same hash, we reference the existing block instead
|
|
246
254
|
if existing_block_id is not None:
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
255
|
+
if existing_block_id == block.id:
|
|
256
|
+
# This should not happen, but is not a problem in itself, so we just log a warning
|
|
257
|
+
logger.warning(f"Block {block.id} was marked as complete more than once")
|
|
258
|
+
else:
|
|
259
|
+
logger.debug(f"Found existing block {existing_block_id} for block {block.id}")
|
|
260
|
+
allocated_blocks[i] = existing_block_id
|
|
261
|
+
new_parent_id = existing_block_id
|
|
262
|
+
self.increase_ref_count(existing_block_id)
|
|
263
|
+
self.uninitialize_unshared_block(block.id)
|
|
252
264
|
|
|
253
265
|
# Otherwise, we add the completed block to the hash table
|
|
254
266
|
else:
|
|
@@ -349,16 +361,17 @@ class FullAttentionCacheAllocator(CacheAllocator):
|
|
|
349
361
|
allocated if successful and None otherwise. For group of full attention layers, we always allocate the number of
|
|
350
362
|
requested blocks."""
|
|
351
363
|
# Make sure the request_id is in the block table and get the first block id
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
last_block_id =
|
|
364
|
+
block_table = self.block_table.get(request_id, [])
|
|
365
|
+
if block_table:
|
|
366
|
+
last_block_id = block_table[-1]
|
|
355
367
|
else:
|
|
356
|
-
|
|
368
|
+
self.block_table[request_id] = block_table # TODO: check the impact of making this a deque
|
|
369
|
+
last_block_id = None
|
|
357
370
|
# Actual allocation, return early if failed
|
|
358
371
|
allocated_blocks = block_manager.get_free_blocks(n_blocks, last_block_id, self.uses_block_sharing, self._index)
|
|
359
372
|
if allocated_blocks is None:
|
|
360
373
|
return None
|
|
361
|
-
|
|
374
|
+
block_table.extend(allocated_blocks)
|
|
362
375
|
return n_blocks
|
|
363
376
|
|
|
364
377
|
def get_read_indices(self, request_id: str, past_length: int, query_length: int) -> list[int]:
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# coding=utf-8
|
|
2
1
|
# Copyright 2024 The HuggingFace Inc. team.
|
|
3
2
|
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
4
3
|
#
|
|
@@ -34,7 +33,7 @@ from ...generation.logits_process import LogitsProcessor
|
|
|
34
33
|
from ...utils.logging import logging
|
|
35
34
|
from ...utils.metrics import ContinuousBatchProcessorMetrics, attach_tracer, traced
|
|
36
35
|
from .cache import PagedAttentionCache
|
|
37
|
-
from .requests import GenerationOutput, RequestState, RequestStatus,
|
|
36
|
+
from .requests import GenerationOutput, RequestState, RequestStatus, logger
|
|
38
37
|
from .scheduler import SCHEDULER_MAPPING, FIFOScheduler, Scheduler
|
|
39
38
|
|
|
40
39
|
|
|
@@ -250,7 +249,7 @@ class ContinuousBatchProcessor:
|
|
|
250
249
|
def setup_static_tensors(self, num_groups: int) -> None:
|
|
251
250
|
"""Setup the static tensors that are used for storage during the generation step. No other tensor will be
|
|
252
251
|
allowed for the inputs or the outputs of the generation step."""
|
|
253
|
-
num_pages = self.cache.num_blocks * self.cache.block_size
|
|
252
|
+
self.num_pages = self.cache.num_blocks * self.cache.block_size
|
|
254
253
|
self.tensor_metadata = {"dtype": torch.int32, "device": self.model_device}
|
|
255
254
|
|
|
256
255
|
# Some tensors always have the same shape regardless of the model
|
|
@@ -275,7 +274,7 @@ class ContinuousBatchProcessor:
|
|
|
275
274
|
|
|
276
275
|
if attn_mask_is_needed(self.config):
|
|
277
276
|
attn_mask_kwargs = {
|
|
278
|
-
"size": (1, 1, self.max_batch_tokens, num_pages + self.max_batch_tokens),
|
|
277
|
+
"size": (1, 1, self.max_batch_tokens, self.num_pages + self.max_batch_tokens),
|
|
279
278
|
"dtype": self.model_dtype,
|
|
280
279
|
"device": self.model_device,
|
|
281
280
|
}
|
|
@@ -288,7 +287,7 @@ class ContinuousBatchProcessor:
|
|
|
288
287
|
torch.empty((self.max_batch_tokens,), **self.tensor_metadata) for _ in range(num_groups)
|
|
289
288
|
]
|
|
290
289
|
self.read_index_storage = [
|
|
291
|
-
torch.empty((num_pages + self.max_batch_tokens), **self.tensor_metadata) for _ in range(num_groups)
|
|
290
|
+
torch.empty((self.num_pages + self.max_batch_tokens), **self.tensor_metadata) for _ in range(num_groups)
|
|
292
291
|
]
|
|
293
292
|
# For read index, the +T is because there are -1 for seqlen_q when model uses a sliding window
|
|
294
293
|
|
|
@@ -428,6 +427,33 @@ class ContinuousBatchProcessor:
|
|
|
428
427
|
self.metrics.record_request_completion(state.created_time, state.request_id)
|
|
429
428
|
self.output_queue.put(state.to_generation_output())
|
|
430
429
|
|
|
430
|
+
# TODO: there should be a way to choose the offloading policy: biggest request, oldest request, etc.
|
|
431
|
+
# Including a policy to not allow offloading and crashing the generation
|
|
432
|
+
def soft_reset_one_request(self) -> None:
|
|
433
|
+
"""Soft resets one active request by removing it from active requests and re-adding it to the waiting queue.
|
|
434
|
+
|
|
435
|
+
The generated tokens are kept as part of the new request's initial prompt. When `block_new_requests` is False,
|
|
436
|
+
the oldest request is offloaded; when True, the newest request is offloaded. This method also sets
|
|
437
|
+
`block_new_requests` to True to prevent infinite loops of offloading and re-scheduling requests.
|
|
438
|
+
"""
|
|
439
|
+
# The offloaded request is the newest (resp. oldest) if block_new_requests is True (resp. False)
|
|
440
|
+
if self.scheduler.block_new_requests:
|
|
441
|
+
request_id, state = self.scheduler.active_requests.popitem()
|
|
442
|
+
else:
|
|
443
|
+
request_id, state = next(iter(self.scheduler.active_requests.items()))
|
|
444
|
+
logger.info(
|
|
445
|
+
f"Soft resetting request {request_id} with {len(state.initial_tokens)} initial tokens and "
|
|
446
|
+
f"{len(state.generated_tokens)} generated tokens"
|
|
447
|
+
)
|
|
448
|
+
# Create a copy of the offloaded request keeping the generated tokens as addition to the initial prompt
|
|
449
|
+
new_state = state.create_equivalent_initial_request()
|
|
450
|
+
# Actual offloading of the request
|
|
451
|
+
self.scheduler.finish_request(request_id, evict_from_cache=True)
|
|
452
|
+
self.scheduler.add_waiting_request(new_state)
|
|
453
|
+
# This flag blocks any new requests from being scheduled until one request is finished. This ensures that we
|
|
454
|
+
# don't enter an offload / schedule loop
|
|
455
|
+
self.scheduler.block_new_requests = True
|
|
456
|
+
|
|
431
457
|
@traced
|
|
432
458
|
def prepare_next_batch(self) -> bool:
|
|
433
459
|
"""Prepare tensors and metadata for the next model forward pass. Returns True if there are requests to process,
|
|
@@ -441,9 +467,18 @@ class ContinuousBatchProcessor:
|
|
|
441
467
|
self.metrics.record_queue_metrics(len(self.scheduler.active_requests), len(self.scheduler.waiting_requests))
|
|
442
468
|
|
|
443
469
|
# Schedule the next batch of requests, stop if there are no requests in the batch
|
|
444
|
-
self.requests_in_batch = self.scheduler.schedule_batch(self.max_batch_tokens)
|
|
470
|
+
self.requests_in_batch = self.scheduler.schedule_batch(self.max_batch_tokens, self.num_pages)
|
|
471
|
+
|
|
472
|
+
# If requests_in_batch is None, it means we need to offload some requests if possible
|
|
473
|
+
if self.requests_in_batch is None:
|
|
474
|
+
if len(self.scheduler.active_requests) > 1:
|
|
475
|
+
self.soft_reset_one_request()
|
|
476
|
+
else:
|
|
477
|
+
raise RuntimeError("No requests can be scheduled and no request can be offloaded.")
|
|
478
|
+
# If it's an empty list, it means we have no requests to process
|
|
445
479
|
if not self.requests_in_batch:
|
|
446
480
|
return False
|
|
481
|
+
# Otherwise, we can continue with the non-empty batch
|
|
447
482
|
self.metrics.record_batch_metrics(self.requests_in_batch)
|
|
448
483
|
|
|
449
484
|
# Reset the static tensors used for storage
|
|
@@ -592,6 +627,7 @@ class ContinuousBatchProcessor:
|
|
|
592
627
|
if is_finished:
|
|
593
628
|
self.metrics.record_request_completion(state.created_time, state.request_id)
|
|
594
629
|
self.scheduler.finish_request(state.request_id, evict_from_cache=(not self.manual_eviction))
|
|
630
|
+
self.scheduler.block_new_requests = False
|
|
595
631
|
self._maybe_send_output(state)
|
|
596
632
|
# Otherwise, the request is still prefilling, but the prefill has been split
|
|
597
633
|
elif state.status == RequestStatus.PREFILLING_SPLIT:
|
|
@@ -621,9 +657,6 @@ class ContinuousBatchProcessor:
|
|
|
621
657
|
if copy_source:
|
|
622
658
|
self.cache.copy_cache(copy_source, copy_destination)
|
|
623
659
|
|
|
624
|
-
if self.cache.get_num_free_blocks() == 0:
|
|
625
|
-
raise ValueError("No more free blocks")
|
|
626
|
-
|
|
627
660
|
@traced
|
|
628
661
|
def has_pending_requests(self) -> bool:
|
|
629
662
|
"""Check if there are any active or waiting requests."""
|
|
@@ -679,11 +712,8 @@ class ContinuousBatchProcessor:
|
|
|
679
712
|
if self._pad_inputs:
|
|
680
713
|
padded_q = pad_by_intervals(self.actual_query_length, self.max_batch_tokens, self.q_padding_intervals)
|
|
681
714
|
max_read_index_size = max(self.actual_index_sizes[i][0] for i in range(self.cache.num_groups))
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
self.cache.num_blocks * self.cache.block_size,
|
|
685
|
-
self.kv_padding_intervals,
|
|
686
|
-
)
|
|
715
|
+
# The space planned for query tokens will be added later, so we remove it from the space planned for KV
|
|
716
|
+
padded_read_index_size = pad_by_intervals(max_read_index_size, self.num_pages, self.kv_padding_intervals)
|
|
687
717
|
else:
|
|
688
718
|
padded_q, padded_read_index_size = 0, 0
|
|
689
719
|
# Retrieve the model kwargs with or without padding
|
|
@@ -811,7 +841,8 @@ class ContinuousBatchingManager:
|
|
|
811
841
|
self.log_prob_generation = getattr(generation_config, "log_prob_generation", False)
|
|
812
842
|
self.do_sample = getattr(generation_config, "do_sample", True)
|
|
813
843
|
self.logit_processor = self.model._get_logits_processor(generation_config)
|
|
814
|
-
|
|
844
|
+
num_return_sequences = getattr(generation_config, "num_return_sequences", None)
|
|
845
|
+
self.num_return_sequences = num_return_sequences if num_return_sequences is not None else 1
|
|
815
846
|
|
|
816
847
|
# self.model.generation_config.top_p = None NOTE: figure out why this was here
|
|
817
848
|
|
|
@@ -1108,13 +1139,13 @@ class ContinuousBatchingManager:
|
|
|
1108
1139
|
# Loop body ends if there is no requests in the batch
|
|
1109
1140
|
if not batch_processor.prepare_next_batch():
|
|
1110
1141
|
return
|
|
1111
|
-
# Debug logging of the current memory usage
|
|
1112
|
-
if logger.level
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1142
|
+
# Debug logging of the current memory usage -- commented out because it's often not used even in debug
|
|
1143
|
+
# if logger.level < logging.DEBUG:
|
|
1144
|
+
# device, total, reserved, allocated = get_device_and_memory_breakdown()
|
|
1145
|
+
# available_memory = total - max(allocated, reserved)
|
|
1146
|
+
# logger.debug(
|
|
1147
|
+
# f"[Memory] Device: {device}, Total: {total}, Reserved: {reserved}, Allocated: {allocated}, Available: {available_memory}"
|
|
1148
|
+
# )
|
|
1118
1149
|
|
|
1119
1150
|
self._generation_step()
|
|
1120
1151
|
batch_processor.update_batch()
|
|
@@ -1262,7 +1293,7 @@ class ContinuousMixin:
|
|
|
1262
1293
|
# Initialize manager with the batch inputs
|
|
1263
1294
|
results = {}
|
|
1264
1295
|
gen_cfg = self.generation_config if generation_config is None else generation_config
|
|
1265
|
-
num_requests = len(inputs) * gen_cfg.num_return_sequences
|
|
1296
|
+
num_requests = len(inputs) * (gen_cfg.num_return_sequences if gen_cfg.num_return_sequences is not None else 1)
|
|
1266
1297
|
# Prepare context managers for the main loop
|
|
1267
1298
|
manager_cm = self.continuous_batching_context_manager(
|
|
1268
1299
|
generation_config=generation_config,
|