transformers 5.0.0rc1__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +20 -1
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +68 -5
- transformers/core_model_loading.py +201 -35
- transformers/dependency_versions_table.py +1 -1
- transformers/feature_extraction_utils.py +54 -22
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +162 -122
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +101 -64
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +2 -12
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +12 -0
- transformers/integrations/accelerate.py +44 -111
- transformers/integrations/aqlm.py +3 -5
- transformers/integrations/awq.py +2 -5
- transformers/integrations/bitnet.py +5 -8
- transformers/integrations/bitsandbytes.py +16 -15
- transformers/integrations/deepspeed.py +18 -3
- transformers/integrations/eetq.py +3 -5
- transformers/integrations/fbgemm_fp8.py +1 -1
- transformers/integrations/finegrained_fp8.py +6 -16
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/higgs.py +2 -5
- transformers/integrations/hub_kernels.py +23 -5
- transformers/integrations/integration_utils.py +35 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +4 -10
- transformers/integrations/peft.py +5 -0
- transformers/integrations/quanto.py +5 -2
- transformers/integrations/spqr.py +3 -5
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/vptq.py +3 -5
- transformers/modeling_gguf_pytorch_utils.py +66 -19
- transformers/modeling_rope_utils.py +78 -81
- transformers/modeling_utils.py +583 -503
- transformers/models/__init__.py +19 -0
- transformers/models/afmoe/modeling_afmoe.py +7 -16
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/align/modeling_align.py +12 -6
- transformers/models/altclip/modeling_altclip.py +7 -3
- transformers/models/apertus/modeling_apertus.py +4 -2
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +1 -1
- transformers/models/aria/modeling_aria.py +8 -4
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +27 -0
- transformers/models/auto/feature_extraction_auto.py +7 -3
- transformers/models/auto/image_processing_auto.py +4 -2
- transformers/models/auto/modeling_auto.py +31 -0
- transformers/models/auto/processing_auto.py +4 -0
- transformers/models/auto/tokenization_auto.py +132 -153
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +18 -19
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +9 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +7 -0
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +3 -0
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +7 -0
- transformers/models/bit/modeling_bit.py +5 -1
- transformers/models/bitnet/modeling_bitnet.py +1 -1
- transformers/models/blenderbot/modeling_blenderbot.py +7 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +6 -7
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +7 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +8 -0
- transformers/models/blip_2/modeling_blip_2.py +2 -0
- transformers/models/bloom/modeling_bloom.py +13 -44
- transformers/models/blt/modeling_blt.py +162 -2
- transformers/models/blt/modular_blt.py +168 -3
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +6 -0
- transformers/models/bros/modeling_bros.py +8 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/canine/modeling_canine.py +6 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +9 -4
- transformers/models/chinese_clip/modeling_chinese_clip.py +6 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +25 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clipseg/modeling_clipseg.py +4 -0
- transformers/models/clvp/modeling_clvp.py +14 -3
- transformers/models/code_llama/tokenization_code_llama.py +1 -1
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/cohere/modeling_cohere.py +1 -1
- transformers/models/cohere2/modeling_cohere2.py +1 -1
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +0 -1
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +4 -1
- transformers/models/convbert/modeling_convbert.py +3 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +3 -1
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +14 -2
- transformers/models/cvt/modeling_cvt.py +5 -1
- transformers/models/cwm/modeling_cwm.py +1 -1
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +46 -39
- transformers/models/d_fine/modular_d_fine.py +15 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +1 -1
- transformers/models/dac/modeling_dac.py +4 -4
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +1 -1
- transformers/models/deberta/modeling_deberta.py +2 -0
- transformers/models/deberta_v2/modeling_deberta_v2.py +2 -0
- transformers/models/decision_transformer/modeling_decision_transformer.py +8 -5
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +7 -4
- transformers/models/deepseek_v2/modular_deepseek_v2.py +4 -2
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +9 -5
- transformers/models/deepseek_v3/modular_deepseek_v3.py +6 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +1 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +8 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +12 -1
- transformers/models/dia/modular_dia.py +11 -0
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +3 -3
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +3 -0
- transformers/models/dinov3_vit/modular_dinov3_vit.py +3 -0
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/doge/modeling_doge.py +1 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +16 -12
- transformers/models/dots1/modeling_dots1.py +14 -5
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +5 -2
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +55 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +13 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +14 -1
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +5 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +8 -2
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt_fast.py +46 -14
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +1 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +16 -13
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +9 -35
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +6 -1
- transformers/models/evolla/modeling_evolla.py +9 -1
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +1 -1
- transformers/models/falcon/modeling_falcon.py +3 -3
- transformers/models/falcon_h1/modeling_falcon_h1.py +28 -23
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +6 -2
- transformers/models/falcon_mamba/modular_falcon_mamba.py +7 -2
- transformers/models/fast_vlm/modeling_fast_vlm.py +7 -3
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +23 -10
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +14 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +4 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +7 -4
- transformers/models/florence2/modeling_florence2.py +20 -3
- transformers/models/florence2/modular_florence2.py +13 -0
- transformers/models/fnet/modeling_fnet.py +7 -0
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +16 -0
- transformers/models/gemma/modeling_gemma.py +10 -12
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma2/modeling_gemma2.py +1 -1
- transformers/models/gemma2/modular_gemma2.py +1 -1
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +28 -7
- transformers/models/gemma3/modular_gemma3.py +26 -6
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +47 -9
- transformers/models/gemma3n/modular_gemma3n.py +51 -9
- transformers/models/git/modeling_git.py +181 -126
- transformers/models/glm/modeling_glm.py +1 -1
- transformers/models/glm4/modeling_glm4.py +1 -1
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +9 -5
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +15 -5
- transformers/models/glm4v/modular_glm4v.py +11 -3
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +39 -23
- transformers/models/glm4v_moe/modular_glm4v_moe.py +12 -0
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +8 -5
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +3 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +15 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +1 -1
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +1 -1
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +6 -9
- transformers/models/gpt_oss/modular_gpt_oss.py +5 -7
- transformers/models/gptj/modeling_gptj.py +15 -6
- transformers/models/granite/modeling_granite.py +1 -1
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +2 -3
- transformers/models/granitemoe/modular_granitemoe.py +1 -2
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +33 -23
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +2 -3
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +4 -4
- transformers/models/groupvit/modeling_groupvit.py +6 -1
- transformers/models/helium/modeling_helium.py +1 -1
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +10 -0
- transformers/models/hgnet_v2/modular_hgnet_v2.py +10 -0
- transformers/models/hubert/modeling_hubert.py +4 -0
- transformers/models/hubert/modular_hubert.py +4 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +12 -4
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +16 -0
- transformers/models/idefics/modeling_idefics.py +10 -0
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +9 -2
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +11 -8
- transformers/models/internvl/modular_internvl.py +5 -9
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +24 -19
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +15 -7
- transformers/models/janus/modular_janus.py +16 -7
- transformers/models/jetmoe/modeling_jetmoe.py +2 -2
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +14 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +9 -3
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/configuration_lasr.py +4 -0
- transformers/models/lasr/modeling_lasr.py +3 -2
- transformers/models/lasr/modular_lasr.py +8 -1
- transformers/models/lasr/processing_lasr.py +0 -2
- transformers/models/layoutlm/modeling_layoutlm.py +5 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +12 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +1 -0
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +29 -5
- transformers/models/led/modeling_led.py +6 -0
- transformers/models/levit/modeling_levit.py +18 -0
- transformers/models/lfm2/modeling_lfm2.py +1 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +14 -4
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lilt/modeling_lilt.py +19 -15
- transformers/models/llama/modeling_llama.py +1 -1
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +8 -4
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +2 -1
- transformers/models/longcat_flash/modular_longcat_flash.py +1 -0
- transformers/models/longt5/modeling_longt5.py +0 -4
- transformers/models/m2m_100/modeling_m2m_100.py +10 -0
- transformers/models/mamba/modeling_mamba.py +2 -1
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +3 -0
- transformers/models/markuplm/modeling_markuplm.py +5 -8
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +9 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +9 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +19 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +7 -0
- transformers/models/megatron_bert/modeling_megatron_bert.py +2 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mimi/modeling_mimi.py +25 -4
- transformers/models/minimax/modeling_minimax.py +16 -3
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +1 -1
- transformers/models/mistral/modeling_mistral.py +1 -1
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +12 -4
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +13 -2
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +4 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +4 -0
- transformers/models/modernbert/modeling_modernbert.py +12 -1
- transformers/models/modernbert/modular_modernbert.py +12 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +9 -1
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +9 -1
- transformers/models/moonshine/modeling_moonshine.py +1 -1
- transformers/models/moshi/modeling_moshi.py +21 -51
- transformers/models/mpnet/modeling_mpnet.py +2 -0
- transformers/models/mra/modeling_mra.py +4 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +0 -10
- transformers/models/musicgen/modeling_musicgen.py +5 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +4 -0
- transformers/models/mvp/modeling_mvp.py +7 -0
- transformers/models/nanochat/modeling_nanochat.py +1 -1
- transformers/models/nemotron/modeling_nemotron.py +3 -3
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +11 -16
- transformers/models/nystromformer/modeling_nystromformer.py +7 -0
- transformers/models/olmo/modeling_olmo.py +1 -1
- transformers/models/olmo2/modeling_olmo2.py +1 -1
- transformers/models/olmo3/modeling_olmo3.py +1 -1
- transformers/models/olmoe/modeling_olmoe.py +12 -4
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +4 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +7 -38
- transformers/models/openai/modeling_openai.py +12 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +7 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +7 -3
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +3 -2
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +28 -14
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +22 -12
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/modeling_parakeet.py +5 -0
- transformers/models/parakeet/modular_parakeet.py +5 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +4 -0
- transformers/models/patchtst/modeling_patchtst.py +5 -4
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/models/pe_audio/processing_pe_audio.py +24 -0
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +3 -0
- transformers/models/pegasus_x/modeling_pegasus_x.py +1 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +5 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +1 -1
- transformers/models/phi/modeling_phi.py +1 -1
- transformers/models/phi3/modeling_phi3.py +1 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +4 -1
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +3 -0
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +12 -4
- transformers/models/phimoe/modular_phimoe.py +1 -1
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +1 -1
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +7 -0
- transformers/models/plbart/modular_plbart.py +6 -0
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +11 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prophetnet/modeling_prophetnet.py +2 -1
- transformers/models/qwen2/modeling_qwen2.py +1 -1
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +104 -64
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +58 -18
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +18 -5
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +26 -22
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +12 -4
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +17 -4
- transformers/models/qwen3/modeling_qwen3.py +1 -1
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +12 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +4 -6
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +92 -46
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +48 -4
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +17 -4
- transformers/models/qwen3_vl/modular_qwen3_vl.py +21 -10
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +94 -112
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +32 -81
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +7 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +3 -2
- transformers/models/reformer/modeling_reformer.py +9 -1
- transformers/models/regnet/modeling_regnet.py +4 -0
- transformers/models/rembert/modeling_rembert.py +7 -1
- transformers/models/resnet/modeling_resnet.py +8 -3
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +4 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +8 -3
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +7 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +1 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +5 -1
- transformers/models/sam2/modular_sam2.py +5 -1
- transformers/models/sam2_video/modeling_sam2_video.py +51 -43
- transformers/models/sam2_video/modular_sam2_video.py +31 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +23 -0
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +2 -0
- transformers/models/sam3_tracker/modular_sam3_tracker.py +2 -0
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +26 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +3 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +27 -11
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +6 -0
- transformers/models/seed_oss/modeling_seed_oss.py +1 -1
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +2 -2
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +63 -41
- transformers/models/smollm3/modeling_smollm3.py +1 -1
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +10 -0
- transformers/models/speecht5/modeling_speecht5.py +28 -0
- transformers/models/splinter/modeling_splinter.py +9 -3
- transformers/models/squeezebert/modeling_squeezebert.py +2 -0
- transformers/models/stablelm/modeling_stablelm.py +1 -1
- transformers/models/starcoder2/modeling_starcoder2.py +1 -1
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/swiftformer/modeling_swiftformer.py +4 -0
- transformers/models/swin/modeling_swin.py +16 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +49 -33
- transformers/models/swinv2/modeling_swinv2.py +41 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +1 -7
- transformers/models/t5gemma/modeling_t5gemma.py +1 -1
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +13 -4
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +1 -1
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/timesfm/modeling_timesfm.py +12 -0
- transformers/models/timesfm/modular_timesfm.py +12 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +19 -13
- transformers/models/trocr/modeling_trocr.py +1 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +4 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +3 -7
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +0 -6
- transformers/models/vaultgemma/modeling_vaultgemma.py +1 -1
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +7 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/visual_bert/modeling_visual_bert.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +4 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +16 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +7 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +21 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +5 -3
- transformers/models/x_clip/modeling_x_clip.py +2 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +10 -0
- transformers/models/xlm/modeling_xlm.py +13 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +4 -1
- transformers/models/zamba/modeling_zamba.py +2 -1
- transformers/models/zamba2/modeling_zamba2.py +3 -2
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +7 -0
- transformers/pipelines/__init__.py +9 -6
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +1 -1
- transformers/pipelines/document_question_answering.py +1 -1
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +127 -56
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +9 -64
- transformers/quantizers/quantizer_aqlm.py +1 -18
- transformers/quantizers/quantizer_auto_round.py +1 -10
- transformers/quantizers/quantizer_awq.py +3 -8
- transformers/quantizers/quantizer_bitnet.py +1 -6
- transformers/quantizers/quantizer_bnb_4bit.py +9 -49
- transformers/quantizers/quantizer_bnb_8bit.py +9 -19
- transformers/quantizers/quantizer_compressed_tensors.py +1 -4
- transformers/quantizers/quantizer_eetq.py +2 -12
- transformers/quantizers/quantizer_fbgemm_fp8.py +5 -14
- transformers/quantizers/quantizer_finegrained_fp8.py +15 -10
- transformers/quantizers/quantizer_fp_quant.py +4 -4
- transformers/quantizers/quantizer_gptq.py +1 -4
- transformers/quantizers/quantizer_higgs.py +2 -6
- transformers/quantizers/quantizer_mxfp4.py +2 -28
- transformers/quantizers/quantizer_quanto.py +14 -14
- transformers/quantizers/quantizer_spqr.py +3 -8
- transformers/quantizers/quantizer_torchao.py +28 -124
- transformers/quantizers/quantizer_vptq.py +1 -10
- transformers/testing_utils.py +28 -12
- transformers/tokenization_mistral_common.py +3 -2
- transformers/tokenization_utils_base.py +3 -2
- transformers/tokenization_utils_tokenizers.py +25 -2
- transformers/trainer.py +24 -2
- transformers/trainer_callback.py +8 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/training_args.py +8 -10
- transformers/utils/__init__.py +4 -0
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +34 -25
- transformers/utils/generic.py +20 -0
- transformers/utils/import_utils.py +51 -9
- transformers/utils/kernel_config.py +71 -18
- transformers/utils/quantization_config.py +8 -8
- transformers/video_processing_utils.py +16 -12
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +5 -6
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +671 -632
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +0 -0
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/licenses/LICENSE +0 -0
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -20,12 +20,11 @@ import os
|
|
|
20
20
|
from abc import ABC, abstractmethod
|
|
21
21
|
from collections.abc import Callable
|
|
22
22
|
from dataclasses import dataclass, is_dataclass
|
|
23
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
23
|
+
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
24
24
|
|
|
25
25
|
from huggingface_hub import create_repo
|
|
26
26
|
|
|
27
27
|
from .. import __version__
|
|
28
|
-
from ..configuration_utils import PreTrainedConfig
|
|
29
28
|
from ..utils import (
|
|
30
29
|
GENERATION_CONFIG_NAME,
|
|
31
30
|
ExplicitEnum,
|
|
@@ -38,6 +37,7 @@ from ..utils import (
|
|
|
38
37
|
|
|
39
38
|
|
|
40
39
|
if TYPE_CHECKING:
|
|
40
|
+
from ..configuration_utils import PreTrainedConfig
|
|
41
41
|
from ..modeling_utils import PreTrainedModel
|
|
42
42
|
|
|
43
43
|
|
|
@@ -104,18 +104,18 @@ class GenerationConfig(PushToHubMixin):
|
|
|
104
104
|
Arg:
|
|
105
105
|
> Parameters that control the length of the output
|
|
106
106
|
|
|
107
|
-
max_length (`int`, *optional
|
|
107
|
+
max_length (`int`, *optional*):
|
|
108
108
|
`max_new_tokens` is recommended for controlling how many tokens the model generates.
|
|
109
109
|
`max_length` remains for backward compatibility.
|
|
110
110
|
|
|
111
111
|
max_new_tokens (`int`, *optional*):
|
|
112
112
|
The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
|
|
113
|
-
min_length (`int`, *optional
|
|
113
|
+
min_length (`int`, *optional*):
|
|
114
114
|
The minimum length of the sequence to be generated. Corresponds to the length of the input prompt +
|
|
115
115
|
`min_new_tokens`. Its effect is overridden by `min_new_tokens`, if also set.
|
|
116
116
|
min_new_tokens (`int`, *optional*):
|
|
117
117
|
The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.
|
|
118
|
-
early_stopping (`bool` or `str`, *optional
|
|
118
|
+
early_stopping (`bool` or `str`, *optional*):
|
|
119
119
|
Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values:
|
|
120
120
|
`True`, where the generation stops as soon as there are `num_beams` complete candidates; `False`, where an
|
|
121
121
|
heuristic is applied and the generation stops when is it very unlikely to find better candidates;
|
|
@@ -129,17 +129,17 @@ class GenerationConfig(PushToHubMixin):
|
|
|
129
129
|
|
|
130
130
|
> Parameters that control the generation strategy used
|
|
131
131
|
|
|
132
|
-
do_sample (`bool`,
|
|
132
|
+
do_sample (`bool`, defaults to `False`):
|
|
133
133
|
Whether or not to use sampling ; use greedy decoding otherwise.
|
|
134
|
-
num_beams (`int`, *optional
|
|
134
|
+
num_beams (`int`, *optional*):
|
|
135
135
|
Number of beams for beam search. 1 means no beam search.
|
|
136
136
|
|
|
137
137
|
> Parameters that control the cache
|
|
138
138
|
|
|
139
|
-
use_cache (`bool`,
|
|
139
|
+
use_cache (`bool`, defaults to `True`):
|
|
140
140
|
Whether or not the model should use the past last key/values attentions (if applicable to the model) to
|
|
141
141
|
speed up decoding.
|
|
142
|
-
cache_implementation (`str`, *optional
|
|
142
|
+
cache_implementation (`str`, *optional*):
|
|
143
143
|
Name of the cache class that will be instantiated in `generate`, for faster decoding. Possible values are:
|
|
144
144
|
|
|
145
145
|
- `"dynamic"`: [`DynamicCache`]
|
|
@@ -155,11 +155,11 @@ class GenerationConfig(PushToHubMixin):
|
|
|
155
155
|
|
|
156
156
|
> Parameters for manipulation of the model output logits
|
|
157
157
|
|
|
158
|
-
temperature (`float`, *optional
|
|
158
|
+
temperature (`float`, *optional*):
|
|
159
159
|
The value used to module the next token probabilities. This value is set in a model's `generation_config.json` file. If it isn't set, the default value is 1.0
|
|
160
|
-
top_k (`int`, *optional
|
|
160
|
+
top_k (`int`, *optional*):
|
|
161
161
|
The number of highest probability vocabulary tokens to keep for top-k-filtering. This value is set in a model's `generation_config.json` file. If it isn't set, the default value is 50.
|
|
162
|
-
top_p (`float`, *optional
|
|
162
|
+
top_p (`float`, *optional*):
|
|
163
163
|
If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to
|
|
164
164
|
`top_p` or higher are kept for generation. This value is set in a model's `generation_config.json` file. If it isn't set, the default value is 1.0
|
|
165
165
|
min_p (`float`, *optional*):
|
|
@@ -172,41 +172,41 @@ class GenerationConfig(PushToHubMixin):
|
|
|
172
172
|
is kept whose *renormalized* entropy is less than or equal to `top_h` times the entropy of the full distribution.
|
|
173
173
|
Smaller values (e.g., 0.2–0.5) lead to more focused, deterministic outputs, while values closer to 1.0 allow more
|
|
174
174
|
randomness and diversity. Typical values are in the 0.3–0.6 range.
|
|
175
|
-
typical_p (`float`, *optional
|
|
175
|
+
typical_p (`float`, *optional*):
|
|
176
176
|
Local typicality measures how similar the conditional probability of predicting a target token next is to
|
|
177
177
|
the expected conditional probability of predicting a random token next, given the partial text already
|
|
178
178
|
generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that
|
|
179
179
|
add up to `typical_p` or higher are kept for generation. See [this
|
|
180
180
|
paper](https://huggingface.co/papers/2202.00666) for more details.
|
|
181
|
-
epsilon_cutoff (`float`, *optional
|
|
181
|
+
epsilon_cutoff (`float`, *optional*):
|
|
182
182
|
If set to float strictly between 0 and 1, only tokens with a conditional probability greater than
|
|
183
183
|
`epsilon_cutoff` will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the
|
|
184
184
|
size of the model. See [Truncation Sampling as Language Model
|
|
185
185
|
Desmoothing](https://huggingface.co/papers/2210.15191) for more details.
|
|
186
|
-
eta_cutoff (`float`, *optional
|
|
186
|
+
eta_cutoff (`float`, *optional*):
|
|
187
187
|
Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between
|
|
188
188
|
0 and 1, a token is only considered if it is greater than either `eta_cutoff` or `sqrt(eta_cutoff) *
|
|
189
189
|
exp(-entropy(softmax(next_token_logits)))`. The latter term is intuitively the expected next token
|
|
190
190
|
probability, scaled by `sqrt(eta_cutoff)`. In the paper, suggested values range from 3e-4 to 2e-3,
|
|
191
191
|
depending on the size of the model. See [Truncation Sampling as Language Model
|
|
192
192
|
Desmoothing](https://huggingface.co/papers/2210.15191) for more details.
|
|
193
|
-
repetition_penalty (`float`, *optional
|
|
193
|
+
repetition_penalty (`float`, *optional*):
|
|
194
194
|
The parameter for repetition penalty. 1.0 means no penalty. See [this
|
|
195
195
|
paper](https://huggingface.co/papers/1909.05858) for more details.
|
|
196
|
-
encoder_repetition_penalty (`float`, *optional
|
|
196
|
+
encoder_repetition_penalty (`float`, *optional*):
|
|
197
197
|
The parameter for encoder_repetition_penalty. An exponential penalty on sequences that are not in the
|
|
198
198
|
original input. 1.0 means no penalty.
|
|
199
|
-
length_penalty (`float`, *optional
|
|
199
|
+
length_penalty (`float`, *optional*):
|
|
200
200
|
Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to
|
|
201
201
|
the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log
|
|
202
202
|
likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while
|
|
203
203
|
`length_penalty` < 0.0 encourages shorter sequences.
|
|
204
|
-
no_repeat_ngram_size (`int`, *optional
|
|
204
|
+
no_repeat_ngram_size (`int`, *optional*):
|
|
205
205
|
If set to int > 0, all ngrams of that size can only occur once.
|
|
206
206
|
bad_words_ids (`list[list[int]]`, *optional*):
|
|
207
207
|
List of list of token ids that are not allowed to be generated. Check
|
|
208
208
|
[`~generation.NoBadWordsLogitsProcessor`] for further documentation and examples.
|
|
209
|
-
renormalize_logits (`bool`,
|
|
209
|
+
renormalize_logits (`bool`, defaults to `False`):
|
|
210
210
|
Whether to renormalize the logits after applying all the logits processors (including the custom
|
|
211
211
|
ones). It's highly recommended to set this flag to `True` as the search algorithms suppose the score logits
|
|
212
212
|
are normalized but some logit processors break the normalization.
|
|
@@ -217,7 +217,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
217
217
|
forced_eos_token_id (`int` or list[int]`, *optional*, defaults to `model.config.forced_eos_token_id`):
|
|
218
218
|
The id of the token to force as the last generated token when `max_length` is reached. Optionally, use a
|
|
219
219
|
list to set multiple *end-of-sequence* tokens.
|
|
220
|
-
remove_invalid_values (`bool`,
|
|
220
|
+
remove_invalid_values (`bool`, defaults to `model.config.remove_invalid_values`):
|
|
221
221
|
Whether to remove possible *nan* and *inf* outputs of the model to prevent the generation method to crash.
|
|
222
222
|
Note that using `remove_invalid_values` can slow down generation.
|
|
223
223
|
exponential_decay_length_penalty (`tuple(int, float)`, *optional*):
|
|
@@ -234,7 +234,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
234
234
|
Dictionary that maps a sequence of tokens to its bias term. Positive biases increase the odds of the
|
|
235
235
|
sequence being selected, while negative biases do the opposite. Check
|
|
236
236
|
[`~generation.SequenceBiasLogitsProcessor`] for further documentation and examples.
|
|
237
|
-
token_healing (`bool`,
|
|
237
|
+
token_healing (`bool`, defaults to `False`):
|
|
238
238
|
Heal tail tokens of prompts by replacing them with their appropriate extensions.
|
|
239
239
|
This enhances the quality of completions for prompts affected by greedy tokenization bias.
|
|
240
240
|
guidance_scale (`float`, *optional*):
|
|
@@ -250,18 +250,18 @@ class GenerationConfig(PushToHubMixin):
|
|
|
250
250
|
|
|
251
251
|
num_return_sequences (`int`, *optional*, defaults to 1):
|
|
252
252
|
The number of independently computed returned sequences for each element in the batch.
|
|
253
|
-
output_attentions (`bool`,
|
|
253
|
+
output_attentions (`bool`, defaults to `False`):
|
|
254
254
|
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
|
|
255
255
|
tensors for more details.
|
|
256
|
-
output_hidden_states (`bool`,
|
|
256
|
+
output_hidden_states (`bool`, defaults to `False`):
|
|
257
257
|
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
|
|
258
258
|
more details.
|
|
259
|
-
output_scores (`bool`,
|
|
259
|
+
output_scores (`bool`, defaults to `False`):
|
|
260
260
|
Whether or not to return the prediction scores. See `scores` under returned tensors for more details.
|
|
261
|
-
output_logits (`bool`,
|
|
261
|
+
output_logits (`bool`, defaults to `False`):
|
|
262
262
|
Whether or not to return the unprocessed prediction logit scores. See `logits` under returned tensors for
|
|
263
263
|
more details.
|
|
264
|
-
return_dict_in_generate (`bool`,
|
|
264
|
+
return_dict_in_generate (`bool`, defaults to `False`):
|
|
265
265
|
Whether or not to return a [`~utils.ModelOutput`], as opposed to returning exclusively the generated
|
|
266
266
|
sequence. This flag must be set to `True` to return the generation cache (when `use_cache` is `True`)
|
|
267
267
|
or optional outputs (see flags starting with `output_`)
|
|
@@ -277,7 +277,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
277
277
|
|
|
278
278
|
> Generation parameters exclusive to encoder-decoder models
|
|
279
279
|
|
|
280
|
-
encoder_no_repeat_ngram_size (`int`, *optional
|
|
280
|
+
encoder_no_repeat_ngram_size (`int`, *optional*):
|
|
281
281
|
If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the
|
|
282
282
|
`decoder_input_ids`.
|
|
283
283
|
decoder_start_token_id (`int` or `list[int]`, *optional*):
|
|
@@ -286,20 +286,20 @@ class GenerationConfig(PushToHubMixin):
|
|
|
286
286
|
(e.g. multilingual models with different target languages in one batch)
|
|
287
287
|
|
|
288
288
|
> Generation parameters exclusive to assistant generation
|
|
289
|
-
is_assistant (`bool`,
|
|
289
|
+
is_assistant (`bool`, defaults to `False`):
|
|
290
290
|
Whether the model is an assistant (draft) model.
|
|
291
|
-
num_assistant_tokens (`int`, *optional
|
|
291
|
+
num_assistant_tokens (`int`, *optional*):
|
|
292
292
|
Defines the number of _speculative tokens_ that shall be generated by the assistant model before being
|
|
293
293
|
checked by the target model at each iteration. Higher values for `num_assistant_tokens` make the generation
|
|
294
294
|
more _speculative_ : If the assistant model is performant larger speed-ups can be reached, if the assistant
|
|
295
295
|
model requires lots of corrections, lower speed-ups are reached.
|
|
296
|
-
num_assistant_tokens_schedule (`str`, *optional
|
|
296
|
+
num_assistant_tokens_schedule (`str`, *optional*):
|
|
297
297
|
Defines the schedule at which max assistant tokens shall be changed during inference.
|
|
298
298
|
- `"heuristic"`: When all speculative tokens are correct, increase `num_assistant_tokens` by 2 else
|
|
299
299
|
reduce by 1. `num_assistant_tokens` value is persistent over multiple generation calls with the same assistant model.
|
|
300
300
|
- `"heuristic_transient"`: Same as `"heuristic"` but `num_assistant_tokens` is reset to its initial value after each generation call.
|
|
301
301
|
- `"constant"`: `num_assistant_tokens` stays unchanged during generation
|
|
302
|
-
assistant_confidence_threshold (`float`, *optional
|
|
302
|
+
assistant_confidence_threshold (`float`, *optional*):
|
|
303
303
|
The confidence threshold for the assistant model. If the assistant model's confidence in its prediction for the current token is lower
|
|
304
304
|
than this threshold, the assistant model stops the current token generation iteration, even if the number of _speculative tokens_
|
|
305
305
|
(defined by `num_assistant_tokens`) is not yet reached. The assistant's confidence threshold is adjusted throughout the speculative iterations to reduce the number of unnecessary draft and target forward passes, biased towards avoiding false negatives.
|
|
@@ -313,11 +313,11 @@ class GenerationConfig(PushToHubMixin):
|
|
|
313
313
|
assistant_early_exit(`int`, *optional*):
|
|
314
314
|
If set to a positive integer, early exit of the model will be used as an assistant. Can only be used with
|
|
315
315
|
models that support early exit (i.e. models where logits from intermediate layers can be interpreted by the LM head).
|
|
316
|
-
assistant_lookbehind(`int`, *optional
|
|
316
|
+
assistant_lookbehind(`int`, *optional*):
|
|
317
317
|
If set to a positive integer, the re-encodeing process will additionally consider the last `assistant_lookbehind` assistant tokens
|
|
318
318
|
to correctly align tokens. Can only be used with different tokenizers in speculative decoding.
|
|
319
319
|
See this [blog](https://huggingface.co/blog/universal_assisted_generation) for more details.
|
|
320
|
-
target_lookbehind(`int`, *optional
|
|
320
|
+
target_lookbehind(`int`, *optional*):
|
|
321
321
|
If set to a positive integer, the re-encodeing process will additionally consider the last `target_lookbehind` target tokens
|
|
322
322
|
to correctly align tokens. Can only be used with different tokenizers in speculative decoding.
|
|
323
323
|
See this [blog](https://huggingface.co/blog/universal_assisted_generation) for more details.
|
|
@@ -327,7 +327,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
327
327
|
compile_config (CompileConfig, *optional*):
|
|
328
328
|
If using a compilable cache, this controls how `generate` will `compile` the forward pass for faster
|
|
329
329
|
inference.
|
|
330
|
-
disable_compile (`bool`,
|
|
330
|
+
disable_compile (`bool`, defaults to `False`):
|
|
331
331
|
Whether to disable the automatic compilation of the forward pass. Automatic compilation happens when
|
|
332
332
|
specific criteria are met, including using a compilable cache. Please open an issue if you find the
|
|
333
333
|
need to use this flag.
|
|
@@ -337,38 +337,36 @@ class GenerationConfig(PushToHubMixin):
|
|
|
337
337
|
|
|
338
338
|
def __init__(self, **kwargs):
|
|
339
339
|
# Parameters that control the length of the output
|
|
340
|
-
self.max_length = kwargs.pop("max_length",
|
|
340
|
+
self.max_length = kwargs.pop("max_length", None)
|
|
341
341
|
self.max_new_tokens = kwargs.pop("max_new_tokens", None)
|
|
342
|
-
self.min_length = kwargs.pop("min_length",
|
|
342
|
+
self.min_length = kwargs.pop("min_length", None)
|
|
343
343
|
self.min_new_tokens = kwargs.pop("min_new_tokens", None)
|
|
344
|
-
self.early_stopping = kwargs.pop("early_stopping",
|
|
344
|
+
self.early_stopping = kwargs.pop("early_stopping", None)
|
|
345
345
|
self.max_time = kwargs.pop("max_time", None)
|
|
346
346
|
self.stop_strings = kwargs.pop("stop_strings", None)
|
|
347
347
|
|
|
348
348
|
# Parameters that control the generation strategy used
|
|
349
349
|
self.do_sample = kwargs.pop("do_sample", False)
|
|
350
|
-
self.num_beams = kwargs.pop("num_beams",
|
|
350
|
+
self.num_beams = kwargs.pop("num_beams", None)
|
|
351
351
|
|
|
352
352
|
# Parameters that control the cache
|
|
353
353
|
self.use_cache = kwargs.pop("use_cache", True)
|
|
354
354
|
self.cache_implementation = kwargs.pop("cache_implementation", None)
|
|
355
355
|
self.cache_config = kwargs.pop("cache_config", None)
|
|
356
356
|
|
|
357
|
-
self.prefill_chunk_size = kwargs.pop("prefill_chunk_size", None)
|
|
358
|
-
|
|
359
357
|
# Parameters for manipulation of the model output logits
|
|
360
|
-
self.temperature = kwargs.pop("temperature",
|
|
361
|
-
self.top_k = kwargs.pop("top_k",
|
|
362
|
-
self.top_p = kwargs.pop("top_p",
|
|
358
|
+
self.temperature = kwargs.pop("temperature", None)
|
|
359
|
+
self.top_k = kwargs.pop("top_k", None)
|
|
360
|
+
self.top_p = kwargs.pop("top_p", None)
|
|
363
361
|
self.min_p = kwargs.pop("min_p", None)
|
|
364
362
|
self.top_h = kwargs.pop("top_h", None)
|
|
365
|
-
self.typical_p = kwargs.pop("typical_p",
|
|
366
|
-
self.epsilon_cutoff = kwargs.pop("epsilon_cutoff",
|
|
367
|
-
self.eta_cutoff = kwargs.pop("eta_cutoff",
|
|
368
|
-
self.repetition_penalty = kwargs.pop("repetition_penalty",
|
|
369
|
-
self.encoder_repetition_penalty = kwargs.pop("encoder_repetition_penalty",
|
|
370
|
-
self.length_penalty = kwargs.pop("length_penalty",
|
|
371
|
-
self.no_repeat_ngram_size = kwargs.pop("no_repeat_ngram_size",
|
|
363
|
+
self.typical_p = kwargs.pop("typical_p", None)
|
|
364
|
+
self.epsilon_cutoff = kwargs.pop("epsilon_cutoff", None)
|
|
365
|
+
self.eta_cutoff = kwargs.pop("eta_cutoff", None)
|
|
366
|
+
self.repetition_penalty = kwargs.pop("repetition_penalty", None)
|
|
367
|
+
self.encoder_repetition_penalty = kwargs.pop("encoder_repetition_penalty", None)
|
|
368
|
+
self.length_penalty = kwargs.pop("length_penalty", None)
|
|
369
|
+
self.no_repeat_ngram_size = kwargs.pop("no_repeat_ngram_size", None)
|
|
372
370
|
self.bad_words_ids = kwargs.pop("bad_words_ids", None)
|
|
373
371
|
self.renormalize_logits = kwargs.pop("renormalize_logits", False)
|
|
374
372
|
self.forced_bos_token_id = kwargs.pop("forced_bos_token_id", None)
|
|
@@ -381,20 +379,16 @@ class GenerationConfig(PushToHubMixin):
|
|
|
381
379
|
self.token_healing = kwargs.pop("token_healing", False)
|
|
382
380
|
self.guidance_scale = kwargs.pop("guidance_scale", None)
|
|
383
381
|
|
|
384
|
-
watermarking_config = kwargs.pop("watermarking_config", None)
|
|
385
|
-
if watermarking_config
|
|
386
|
-
self.watermarking_config =
|
|
387
|
-
elif isinstance(watermarking_config, BaseWatermarkingConfig):
|
|
388
|
-
self.watermarking_config = watermarking_config
|
|
389
|
-
else:
|
|
390
|
-
self.watermarking_config = WatermarkingConfig.from_dict(watermarking_config)
|
|
382
|
+
self.watermarking_config = kwargs.pop("watermarking_config", None)
|
|
383
|
+
if isinstance(self.watermarking_config, dict):
|
|
384
|
+
self.watermarking_config = WatermarkingConfig.from_dict(self.watermarking_config)
|
|
391
385
|
|
|
392
386
|
# Parameters that define the output variables of `generate`
|
|
393
387
|
self.num_return_sequences = kwargs.pop("num_return_sequences", 1)
|
|
394
388
|
self.output_attentions = kwargs.pop("output_attentions", False)
|
|
395
389
|
self.output_hidden_states = kwargs.pop("output_hidden_states", False)
|
|
396
390
|
self.output_scores = kwargs.pop("output_scores", False)
|
|
397
|
-
self.output_logits = kwargs.pop("output_logits",
|
|
391
|
+
self.output_logits = kwargs.pop("output_logits", False)
|
|
398
392
|
self.return_dict_in_generate = kwargs.pop("return_dict_in_generate", False)
|
|
399
393
|
|
|
400
394
|
# Special tokens that can be used at generation time
|
|
@@ -403,57 +397,57 @@ class GenerationConfig(PushToHubMixin):
|
|
|
403
397
|
self.eos_token_id = kwargs.pop("eos_token_id", None)
|
|
404
398
|
|
|
405
399
|
# Generation parameters exclusive to encoder-decoder models
|
|
406
|
-
self.encoder_no_repeat_ngram_size = kwargs.pop("encoder_no_repeat_ngram_size",
|
|
400
|
+
self.encoder_no_repeat_ngram_size = kwargs.pop("encoder_no_repeat_ngram_size", None)
|
|
407
401
|
self.decoder_start_token_id = kwargs.pop("decoder_start_token_id", None)
|
|
408
402
|
|
|
409
403
|
# Assistant generation
|
|
410
|
-
self.is_assistant = False
|
|
411
|
-
self.num_assistant_tokens = kwargs.pop("num_assistant_tokens",
|
|
412
|
-
self.num_assistant_tokens_schedule = kwargs.pop("num_assistant_tokens_schedule",
|
|
413
|
-
self.assistant_confidence_threshold = kwargs.pop("assistant_confidence_threshold",
|
|
404
|
+
self.is_assistant = kwargs.pop("is_assistant", False)
|
|
405
|
+
self.num_assistant_tokens = kwargs.pop("num_assistant_tokens", None)
|
|
406
|
+
self.num_assistant_tokens_schedule = kwargs.pop("num_assistant_tokens_schedule", None)
|
|
407
|
+
self.assistant_confidence_threshold = kwargs.pop("assistant_confidence_threshold", None)
|
|
414
408
|
self.prompt_lookup_num_tokens = kwargs.pop("prompt_lookup_num_tokens", None)
|
|
415
409
|
self.max_matching_ngram_size = kwargs.pop("max_matching_ngram_size", None)
|
|
416
410
|
self.assistant_early_exit = kwargs.pop("assistant_early_exit", None)
|
|
417
|
-
|
|
418
|
-
self.
|
|
419
|
-
self.target_lookbehind = kwargs.pop("target_lookbehind", 10)
|
|
411
|
+
self.assistant_lookbehind = kwargs.pop("assistant_lookbehind", None)
|
|
412
|
+
self.target_lookbehind = kwargs.pop("target_lookbehind", None)
|
|
420
413
|
|
|
421
414
|
# Performance
|
|
422
415
|
self.compile_config = kwargs.pop("compile_config", None)
|
|
423
416
|
self.disable_compile = kwargs.pop("disable_compile", False)
|
|
424
417
|
|
|
425
|
-
# Deprecated (moved to the Hub). TODO
|
|
418
|
+
# Deprecated (moved to the Hub). TODO remove for v5
|
|
426
419
|
self.low_memory = kwargs.pop("low_memory", None)
|
|
427
420
|
self.penalty_alpha = kwargs.pop("penalty_alpha", None)
|
|
428
421
|
self.dola_layers = kwargs.pop("dola_layers", None)
|
|
429
|
-
self.diversity_penalty = kwargs.pop("diversity_penalty",
|
|
430
|
-
self.num_beam_groups = kwargs.pop("num_beam_groups",
|
|
422
|
+
self.diversity_penalty = kwargs.pop("diversity_penalty", None)
|
|
423
|
+
self.num_beam_groups = kwargs.pop("num_beam_groups", None)
|
|
431
424
|
self.constraints = kwargs.pop("constraints", None)
|
|
432
425
|
self.force_words_ids = kwargs.pop("force_words_ids", None)
|
|
433
426
|
|
|
434
|
-
|
|
435
|
-
# interface.
|
|
436
|
-
self._from_model_config = kwargs.pop("_from_model_config", False)
|
|
437
|
-
self._commit_hash = kwargs.pop("_commit_hash", None)
|
|
438
|
-
self.transformers_version = kwargs.pop("transformers_version", __version__)
|
|
427
|
+
self.prefill_chunk_size = kwargs.pop("prefill_chunk_size", None)
|
|
439
428
|
|
|
440
|
-
#
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
f"Please make sure the generation config includes `forced_bos_token_id={self.bos_token_id}`. "
|
|
445
|
-
)
|
|
429
|
+
# Common attributes
|
|
430
|
+
self._commit_hash = kwargs.pop("_commit_hash", None)
|
|
431
|
+
self._from_model_config = kwargs.pop("_from_model_config", None)
|
|
432
|
+
self.transformers_version = kwargs.pop("transformers_version", None)
|
|
446
433
|
|
|
447
434
|
# Additional attributes without default values
|
|
448
435
|
if not self._from_model_config:
|
|
449
|
-
# we don't want to copy values from the model config if we're initializing
|
|
450
|
-
# model's default configuration file
|
|
436
|
+
# we don't want to copy values from the model config if we're initializing
|
|
437
|
+
# a `GenerationConfig` from a model's default configuration file
|
|
451
438
|
for key, value in kwargs.items():
|
|
452
439
|
try:
|
|
453
440
|
setattr(self, key, value)
|
|
454
441
|
except AttributeError as err:
|
|
455
442
|
logger.error(f"Can't set {key} with value {value} for {self}")
|
|
456
443
|
raise err
|
|
444
|
+
else:
|
|
445
|
+
# Ensure backward compatibility for models that use `forced_bos_token_id` within their config
|
|
446
|
+
if kwargs.get("force_bos_token_to_be_generated", False):
|
|
447
|
+
self.forced_bos_token_id = self.bos_token_id
|
|
448
|
+
logger.warning_once(
|
|
449
|
+
f"Please make sure the generation config includes `forced_bos_token_id={self.bos_token_id}`. "
|
|
450
|
+
)
|
|
457
451
|
|
|
458
452
|
# Validate the values of the attributes
|
|
459
453
|
self.validate()
|
|
@@ -488,8 +482,8 @@ class GenerationConfig(PushToHubMixin):
|
|
|
488
482
|
# property and part of the `__repr__`
|
|
489
483
|
if self.constraints is not None or self.force_words_ids is not None:
|
|
490
484
|
generation_mode = GenerationMode.CONSTRAINED_BEAM_SEARCH
|
|
491
|
-
elif self.num_beams == 1:
|
|
492
|
-
if self.do_sample
|
|
485
|
+
elif self.num_beams is None or self.num_beams == 1:
|
|
486
|
+
if not self.do_sample:
|
|
493
487
|
if (
|
|
494
488
|
self.top_k is not None
|
|
495
489
|
and self.top_k > 1
|
|
@@ -502,9 +496,9 @@ class GenerationConfig(PushToHubMixin):
|
|
|
502
496
|
else:
|
|
503
497
|
generation_mode = GenerationMode.SAMPLE
|
|
504
498
|
else:
|
|
505
|
-
if self.num_beam_groups > 1:
|
|
499
|
+
if self.num_beam_groups is not None and self.num_beam_groups > 1:
|
|
506
500
|
generation_mode = GenerationMode.GROUP_BEAM_SEARCH
|
|
507
|
-
elif self.do_sample
|
|
501
|
+
elif self.do_sample:
|
|
508
502
|
generation_mode = GenerationMode.BEAM_SAMPLE
|
|
509
503
|
else:
|
|
510
504
|
generation_mode = GenerationMode.BEAM_SEARCH
|
|
@@ -537,6 +531,45 @@ class GenerationConfig(PushToHubMixin):
|
|
|
537
531
|
)
|
|
538
532
|
return generation_mode
|
|
539
533
|
|
|
534
|
+
@staticmethod
|
|
535
|
+
def _get_default_generation_params() -> dict[str, Any]:
|
|
536
|
+
return {
|
|
537
|
+
"max_length": 20,
|
|
538
|
+
"min_length": 0,
|
|
539
|
+
"do_sample": False,
|
|
540
|
+
"early_stopping": False,
|
|
541
|
+
"num_beams": 1,
|
|
542
|
+
"temperature": 1.0,
|
|
543
|
+
"top_k": 50,
|
|
544
|
+
"top_p": 1.0,
|
|
545
|
+
"typical_p": 1.0,
|
|
546
|
+
"repetition_penalty": 1.0,
|
|
547
|
+
"length_penalty": 1.0,
|
|
548
|
+
"no_repeat_ngram_size": 0,
|
|
549
|
+
"encoder_no_repeat_ngram_size": 0,
|
|
550
|
+
"bad_words_ids": None,
|
|
551
|
+
"num_return_sequences": 1,
|
|
552
|
+
"output_scores": False,
|
|
553
|
+
"return_dict_in_generate": False,
|
|
554
|
+
"forced_bos_token_id": None,
|
|
555
|
+
"forced_eos_token_id": None,
|
|
556
|
+
"remove_invalid_values": False,
|
|
557
|
+
"exponential_decay_length_penalty": None,
|
|
558
|
+
"suppress_tokens": None,
|
|
559
|
+
"begin_suppress_tokens": None,
|
|
560
|
+
"epsilon_cutoff": 0.0,
|
|
561
|
+
"eta_cutoff": 0.0,
|
|
562
|
+
"encoder_repetition_penalty": 1.0,
|
|
563
|
+
"num_assistant_tokens": 20,
|
|
564
|
+
"num_assistant_tokens_schedule": "constant",
|
|
565
|
+
"assistant_confidence_threshold": 0.4,
|
|
566
|
+
"assistant_lookbehind": 10,
|
|
567
|
+
"target_lookbehind": 10,
|
|
568
|
+
# Deprecated arguments (moved to the Hub). TODO joao, manuel: remove in v4.62.0
|
|
569
|
+
"num_beam_groups": 1,
|
|
570
|
+
"diversity_penalty": 0.0,
|
|
571
|
+
}
|
|
572
|
+
|
|
540
573
|
def validate(self, strict=False):
|
|
541
574
|
"""
|
|
542
575
|
Validates the values of the attributes of the [`GenerationConfig`] instance. Raises exceptions in the presence
|
|
@@ -552,7 +585,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
552
585
|
|
|
553
586
|
# 1. Validation of individual attributes
|
|
554
587
|
# 1.1. Decoding attributes
|
|
555
|
-
if self.early_stopping not in {True, False, "never"}:
|
|
588
|
+
if self.early_stopping not in {None, True, False, "never"}:
|
|
556
589
|
raise ValueError(f"`early_stopping` must be a boolean or 'never', but is {self.early_stopping}.")
|
|
557
590
|
if self.max_new_tokens is not None and self.max_new_tokens <= 0:
|
|
558
591
|
raise ValueError(f"`max_new_tokens` must be greater than 0, but is {self.max_new_tokens}.")
|
|
@@ -583,9 +616,9 @@ class GenerationConfig(PushToHubMixin):
|
|
|
583
616
|
|
|
584
617
|
# 2. Validation of attribute combinations
|
|
585
618
|
# 2.1. detect sampling-only parameterization when not in sampling mode
|
|
586
|
-
if self.do_sample
|
|
619
|
+
if not self.do_sample:
|
|
587
620
|
greedy_wrong_parameter_msg = (
|
|
588
|
-
"`do_sample` is set to `
|
|
621
|
+
"`do_sample` is set not to set `True`. However, `{flag_name}` is set to `{flag_value}` -- this flag is only "
|
|
589
622
|
"used in sample-based generation modes. You should set `do_sample=True` or unset `{flag_name}`."
|
|
590
623
|
)
|
|
591
624
|
if self.temperature is not None and self.temperature != 1.0:
|
|
@@ -614,42 +647,42 @@ class GenerationConfig(PushToHubMixin):
|
|
|
614
647
|
)
|
|
615
648
|
|
|
616
649
|
# 2.2. detect beam-only parameterization when not in beam mode
|
|
617
|
-
if self.num_beams == 1:
|
|
650
|
+
if self.num_beams is None or self.num_beams == 1:
|
|
618
651
|
single_beam_wrong_parameter_msg = (
|
|
619
|
-
"`num_beams` is set to
|
|
652
|
+
"`num_beams` is set to {num_beams}. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used "
|
|
620
653
|
"in beam-based generation modes. You should set `num_beams>1` or unset `{flag_name}`."
|
|
621
654
|
)
|
|
622
|
-
if self.early_stopping is not False:
|
|
655
|
+
if self.early_stopping is not None and self.early_stopping is not False:
|
|
623
656
|
minor_issues["early_stopping"] = single_beam_wrong_parameter_msg.format(
|
|
624
|
-
flag_name="early_stopping", flag_value=self.early_stopping
|
|
657
|
+
num_beams=self.num_beams, flag_name="early_stopping", flag_value=self.early_stopping
|
|
625
658
|
)
|
|
626
659
|
if self.length_penalty is not None and self.length_penalty != 1.0:
|
|
627
660
|
minor_issues["length_penalty"] = single_beam_wrong_parameter_msg.format(
|
|
628
|
-
flag_name="length_penalty", flag_value=self.length_penalty
|
|
661
|
+
num_beams=self.num_beams, flag_name="length_penalty", flag_value=self.length_penalty
|
|
629
662
|
)
|
|
630
663
|
|
|
631
664
|
# 2.4. check `num_return_sequences`
|
|
632
|
-
if self.num_return_sequences
|
|
633
|
-
if self.num_beams == 1:
|
|
634
|
-
if self.do_sample
|
|
665
|
+
if self.num_return_sequences > 1:
|
|
666
|
+
if self.num_beams is None or self.num_beams == 1:
|
|
667
|
+
if not self.do_sample:
|
|
635
668
|
raise ValueError(
|
|
636
|
-
"Greedy methods without beam search do not support
|
|
637
|
-
f"(got {self.num_return_sequences})."
|
|
669
|
+
"Greedy methods (do_sample != True) without beam search do not support "
|
|
670
|
+
f"`num_return_sequences` different than 1 (got {self.num_return_sequences})."
|
|
638
671
|
)
|
|
639
|
-
elif self.num_return_sequences > self.num_beams:
|
|
672
|
+
elif self.num_beams is not None and self.num_return_sequences > self.num_beams:
|
|
640
673
|
raise ValueError(
|
|
641
674
|
f"`num_return_sequences` ({self.num_return_sequences}) has to be smaller or equal to `num_beams` "
|
|
642
675
|
f"({self.num_beams})."
|
|
643
676
|
)
|
|
644
677
|
|
|
645
678
|
# 2.5. check cache-related arguments
|
|
646
|
-
if self.use_cache
|
|
679
|
+
if not self.use_cache:
|
|
647
680
|
# In this case, all cache-related arguments should be unset. However, since `use_cache=False` is often used
|
|
648
681
|
# passed to `generate` directly to hot-fix cache issues, let's raise a warning instead of an error
|
|
649
682
|
# (otherwise a user might need to overwrite several parameters).
|
|
650
683
|
no_cache_warning = (
|
|
651
|
-
"You have set `use_cache` to `
|
|
652
|
-
"have no effect."
|
|
684
|
+
"You have not set `use_cache` to `True`, but {cache_arg} is set to {cache_arg_value}."
|
|
685
|
+
"{cache_arg} will have no effect."
|
|
653
686
|
)
|
|
654
687
|
for arg_name in ("cache_implementation", "cache_config"):
|
|
655
688
|
if getattr(self, arg_name) is not None:
|
|
@@ -658,9 +691,9 @@ class GenerationConfig(PushToHubMixin):
|
|
|
658
691
|
)
|
|
659
692
|
|
|
660
693
|
# 2.6. other incorrect combinations
|
|
661
|
-
if self.return_dict_in_generate
|
|
694
|
+
if not self.return_dict_in_generate:
|
|
662
695
|
for extra_output_flag in self.extra_output_flags:
|
|
663
|
-
if getattr(self, extra_output_flag)
|
|
696
|
+
if getattr(self, extra_output_flag):
|
|
664
697
|
minor_issues[extra_output_flag] = (
|
|
665
698
|
f"`return_dict_in_generate` is NOT set to `True`, but `{extra_output_flag}` is. When "
|
|
666
699
|
f"`return_dict_in_generate` is not `True`, `{extra_output_flag}` is ignored."
|
|
@@ -676,7 +709,6 @@ class GenerationConfig(PushToHubMixin):
|
|
|
676
709
|
"streamer",
|
|
677
710
|
"negative_prompt_ids",
|
|
678
711
|
"negative_prompt_attention_mask",
|
|
679
|
-
"use_model_defaults",
|
|
680
712
|
)
|
|
681
713
|
for arg in generate_arguments:
|
|
682
714
|
if hasattr(self, arg):
|
|
@@ -1101,7 +1133,7 @@ class GenerationConfig(PushToHubMixin):
|
|
|
1101
1133
|
writer.write(self.to_json_string(use_diff=use_diff, keys_to_pop=keys_to_pop))
|
|
1102
1134
|
|
|
1103
1135
|
@classmethod
|
|
1104
|
-
def from_model_config(cls, model_config: PreTrainedConfig
|
|
1136
|
+
def from_model_config(cls, model_config: Union["PreTrainedConfig", dict]) -> "GenerationConfig":
|
|
1105
1137
|
"""
|
|
1106
1138
|
Instantiates a [`GenerationConfig`] from a [`PreTrainedConfig`]. This function is useful to convert legacy
|
|
1107
1139
|
[`PreTrainedConfig`] objects, which may contain generation parameters, into a stand-alone [`GenerationConfig`].
|
|
@@ -1118,23 +1150,28 @@ class GenerationConfig(PushToHubMixin):
|
|
|
1118
1150
|
|
|
1119
1151
|
# Removes all `None` from the model config dict -- this lets the generation config defaults to take hold
|
|
1120
1152
|
config_dict = {key: value for key, value in config_dict.items() if value is not None}
|
|
1121
|
-
|
|
1122
1153
|
generation_config = cls.from_dict(config_dict, return_unused_kwargs=False, _from_model_config=True)
|
|
1123
1154
|
|
|
1124
1155
|
# Special case: some models have generation attributes set in the decoder. Use them if still unset in the
|
|
1125
1156
|
# generation config (which in turn is defined from the outer attributes of model config).
|
|
1126
|
-
if
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1157
|
+
if isinstance(model_config, dict):
|
|
1158
|
+
decoder_possible_text_config_names = ("decoder", "generator", "text_config")
|
|
1159
|
+
for text_config_name in decoder_possible_text_config_names:
|
|
1160
|
+
if text_config := model_config.get(text_config_name):
|
|
1161
|
+
model_config = text_config
|
|
1162
|
+
break
|
|
1163
|
+
else:
|
|
1164
|
+
model_config = model_config.get_text_config(decoder=True)
|
|
1165
|
+
model_config = model_config.to_dict()
|
|
1166
|
+
|
|
1167
|
+
default_generation_config = GenerationConfig()
|
|
1168
|
+
for attr in generation_config.to_dict():
|
|
1169
|
+
is_unset = getattr(generation_config, attr) == getattr(default_generation_config, attr)
|
|
1170
|
+
if attr in model_config and is_unset:
|
|
1171
|
+
setattr(generation_config, attr, model_config[attr])
|
|
1135
1172
|
|
|
1136
1173
|
# If any `output_...` flag is set to `True`, we ensure `return_dict_in_generate` is set to `True`.
|
|
1137
|
-
if generation_config.return_dict_in_generate
|
|
1174
|
+
if not generation_config.return_dict_in_generate:
|
|
1138
1175
|
if any(
|
|
1139
1176
|
getattr(generation_config, extra_output_flag, False)
|
|
1140
1177
|
for extra_output_flag in generation_config.extra_output_flags
|
|
@@ -1145,12 +1182,14 @@ class GenerationConfig(PushToHubMixin):
|
|
|
1145
1182
|
generation_config._original_object_hash = hash(generation_config)
|
|
1146
1183
|
return generation_config
|
|
1147
1184
|
|
|
1148
|
-
def update(self, **kwargs):
|
|
1185
|
+
def update(self, defaults_only=False, **kwargs):
|
|
1149
1186
|
"""
|
|
1150
1187
|
Updates attributes of this class instance with attributes from `kwargs` if they match existing attributes,
|
|
1151
1188
|
returning all the unused kwargs.
|
|
1152
1189
|
|
|
1153
1190
|
Args:
|
|
1191
|
+
defaults_only (`bool`, *optional*, defaults to `False`):
|
|
1192
|
+
Whether to update all keys in config with `kwargs` or only those that are set to `None` (i.e. default value).
|
|
1154
1193
|
kwargs (`dict[str, Any]`):
|
|
1155
1194
|
Dictionary of attributes to tentatively update this class.
|
|
1156
1195
|
|
|
@@ -1160,8 +1199,9 @@ class GenerationConfig(PushToHubMixin):
|
|
|
1160
1199
|
to_remove = []
|
|
1161
1200
|
for key, value in kwargs.items():
|
|
1162
1201
|
if hasattr(self, key):
|
|
1163
|
-
|
|
1164
|
-
|
|
1202
|
+
if not defaults_only or getattr(self, key) is None:
|
|
1203
|
+
setattr(self, key, value)
|
|
1204
|
+
to_remove.append(key)
|
|
1165
1205
|
|
|
1166
1206
|
# Confirm that the updated instance is still valid
|
|
1167
1207
|
self.validate()
|