transformers 5.0.0rc1__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (671) hide show
  1. transformers/__init__.py +20 -1
  2. transformers/activations.py +1 -1
  3. transformers/audio_utils.py +0 -1
  4. transformers/cache_utils.py +17 -15
  5. transformers/configuration_utils.py +114 -70
  6. transformers/conversion_mapping.py +68 -5
  7. transformers/core_model_loading.py +201 -35
  8. transformers/dependency_versions_table.py +1 -1
  9. transformers/feature_extraction_utils.py +54 -22
  10. transformers/generation/candidate_generator.py +79 -31
  11. transformers/generation/configuration_utils.py +162 -122
  12. transformers/generation/continuous_batching/cache.py +47 -18
  13. transformers/generation/continuous_batching/cache_manager.py +131 -34
  14. transformers/generation/continuous_batching/continuous_api.py +101 -64
  15. transformers/generation/continuous_batching/requests.py +28 -1
  16. transformers/generation/continuous_batching/scheduler.py +11 -4
  17. transformers/generation/stopping_criteria.py +1 -1
  18. transformers/generation/utils.py +108 -110
  19. transformers/generation/watermarking.py +8 -5
  20. transformers/image_processing_base.py +2 -12
  21. transformers/image_processing_utils_fast.py +15 -4
  22. transformers/initialization.py +37 -0
  23. transformers/integrations/__init__.py +12 -0
  24. transformers/integrations/accelerate.py +44 -111
  25. transformers/integrations/aqlm.py +3 -5
  26. transformers/integrations/awq.py +2 -5
  27. transformers/integrations/bitnet.py +5 -8
  28. transformers/integrations/bitsandbytes.py +16 -15
  29. transformers/integrations/deepspeed.py +18 -3
  30. transformers/integrations/eetq.py +3 -5
  31. transformers/integrations/fbgemm_fp8.py +1 -1
  32. transformers/integrations/finegrained_fp8.py +6 -16
  33. transformers/integrations/flash_attention.py +2 -2
  34. transformers/integrations/higgs.py +2 -5
  35. transformers/integrations/hub_kernels.py +23 -5
  36. transformers/integrations/integration_utils.py +35 -0
  37. transformers/integrations/mistral.py +12 -0
  38. transformers/integrations/moe.py +240 -0
  39. transformers/integrations/mxfp4.py +4 -10
  40. transformers/integrations/peft.py +5 -0
  41. transformers/integrations/quanto.py +5 -2
  42. transformers/integrations/spqr.py +3 -5
  43. transformers/integrations/tensor_parallel.py +167 -221
  44. transformers/integrations/vptq.py +3 -5
  45. transformers/modeling_gguf_pytorch_utils.py +66 -19
  46. transformers/modeling_rope_utils.py +78 -81
  47. transformers/modeling_utils.py +583 -503
  48. transformers/models/__init__.py +19 -0
  49. transformers/models/afmoe/modeling_afmoe.py +7 -16
  50. transformers/models/afmoe/modular_afmoe.py +5 -13
  51. transformers/models/aimv2/modeling_aimv2.py +4 -0
  52. transformers/models/aimv2/modular_aimv2.py +4 -0
  53. transformers/models/albert/modeling_albert.py +3 -0
  54. transformers/models/align/modeling_align.py +12 -6
  55. transformers/models/altclip/modeling_altclip.py +7 -3
  56. transformers/models/apertus/modeling_apertus.py +4 -2
  57. transformers/models/apertus/modular_apertus.py +4 -1
  58. transformers/models/arcee/modeling_arcee.py +1 -1
  59. transformers/models/aria/modeling_aria.py +8 -4
  60. transformers/models/aria/modular_aria.py +7 -3
  61. transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
  62. transformers/models/auto/auto_factory.py +1 -1
  63. transformers/models/auto/configuration_auto.py +27 -0
  64. transformers/models/auto/feature_extraction_auto.py +7 -3
  65. transformers/models/auto/image_processing_auto.py +4 -2
  66. transformers/models/auto/modeling_auto.py +31 -0
  67. transformers/models/auto/processing_auto.py +4 -0
  68. transformers/models/auto/tokenization_auto.py +132 -153
  69. transformers/models/auto/video_processing_auto.py +5 -2
  70. transformers/models/aya_vision/modeling_aya_vision.py +7 -3
  71. transformers/models/bamba/modeling_bamba.py +18 -19
  72. transformers/models/bamba/modular_bamba.py +17 -16
  73. transformers/models/bark/modeling_bark.py +9 -0
  74. transformers/models/bart/configuration_bart.py +0 -1
  75. transformers/models/bart/modeling_bart.py +7 -0
  76. transformers/models/beit/image_processing_beit_fast.py +0 -1
  77. transformers/models/bert/modeling_bert.py +3 -0
  78. transformers/models/bert_generation/modeling_bert_generation.py +2 -0
  79. transformers/models/big_bird/modeling_big_bird.py +3 -0
  80. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +7 -0
  81. transformers/models/bit/modeling_bit.py +5 -1
  82. transformers/models/bitnet/modeling_bitnet.py +1 -1
  83. transformers/models/blenderbot/modeling_blenderbot.py +7 -0
  84. transformers/models/blenderbot/tokenization_blenderbot.py +6 -7
  85. transformers/models/blenderbot_small/modeling_blenderbot_small.py +7 -0
  86. transformers/models/blip/modeling_blip.py +2 -0
  87. transformers/models/blip/modeling_blip_text.py +8 -0
  88. transformers/models/blip_2/modeling_blip_2.py +2 -0
  89. transformers/models/bloom/modeling_bloom.py +13 -44
  90. transformers/models/blt/modeling_blt.py +162 -2
  91. transformers/models/blt/modular_blt.py +168 -3
  92. transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
  93. transformers/models/bridgetower/modeling_bridgetower.py +6 -0
  94. transformers/models/bros/modeling_bros.py +8 -0
  95. transformers/models/camembert/modeling_camembert.py +109 -106
  96. transformers/models/canine/modeling_canine.py +6 -0
  97. transformers/models/canine/tokenization_canine.py +2 -0
  98. transformers/models/chameleon/modeling_chameleon.py +9 -4
  99. transformers/models/chinese_clip/modeling_chinese_clip.py +6 -3
  100. transformers/models/clap/feature_extraction_clap.py +2 -2
  101. transformers/models/clap/modeling_clap.py +25 -15
  102. transformers/models/clip/modeling_clip.py +2 -0
  103. transformers/models/clipseg/modeling_clipseg.py +4 -0
  104. transformers/models/clvp/modeling_clvp.py +14 -3
  105. transformers/models/code_llama/tokenization_code_llama.py +1 -1
  106. transformers/models/codegen/modeling_codegen.py +13 -4
  107. transformers/models/cohere/modeling_cohere.py +1 -1
  108. transformers/models/cohere2/modeling_cohere2.py +1 -1
  109. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +0 -1
  110. transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
  111. transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
  112. transformers/models/conditional_detr/modeling_conditional_detr.py +4 -1
  113. transformers/models/convbert/modeling_convbert.py +3 -0
  114. transformers/models/convnext/image_processing_convnext.py +2 -2
  115. transformers/models/convnext/image_processing_convnext_fast.py +9 -13
  116. transformers/models/csm/generation_csm.py +19 -22
  117. transformers/models/csm/modeling_csm.py +3 -1
  118. transformers/models/csm/modular_csm.py +2 -0
  119. transformers/models/ctrl/modeling_ctrl.py +14 -2
  120. transformers/models/cvt/modeling_cvt.py +5 -1
  121. transformers/models/cwm/modeling_cwm.py +1 -1
  122. transformers/models/d_fine/configuration_d_fine.py +3 -4
  123. transformers/models/d_fine/modeling_d_fine.py +46 -39
  124. transformers/models/d_fine/modular_d_fine.py +15 -4
  125. transformers/models/dab_detr/configuration_dab_detr.py +2 -2
  126. transformers/models/dab_detr/modeling_dab_detr.py +1 -1
  127. transformers/models/dac/modeling_dac.py +4 -4
  128. transformers/models/data2vec/modeling_data2vec_text.py +7 -0
  129. transformers/models/data2vec/modular_data2vec_text.py +7 -0
  130. transformers/models/dbrx/configuration_dbrx.py +9 -1
  131. transformers/models/dbrx/modeling_dbrx.py +1 -1
  132. transformers/models/deberta/modeling_deberta.py +2 -0
  133. transformers/models/deberta_v2/modeling_deberta_v2.py +2 -0
  134. transformers/models/decision_transformer/modeling_decision_transformer.py +8 -5
  135. transformers/models/deepseek_v2/modeling_deepseek_v2.py +7 -4
  136. transformers/models/deepseek_v2/modular_deepseek_v2.py +4 -2
  137. transformers/models/deepseek_v3/modeling_deepseek_v3.py +9 -5
  138. transformers/models/deepseek_v3/modular_deepseek_v3.py +6 -2
  139. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
  140. transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
  141. transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
  142. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
  143. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
  144. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
  145. transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
  146. transformers/models/deformable_detr/modeling_deformable_detr.py +1 -1
  147. transformers/models/depth_anything/configuration_depth_anything.py +2 -3
  148. transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
  149. transformers/models/detr/configuration_detr.py +1 -1
  150. transformers/models/detr/modeling_detr.py +8 -1
  151. transformers/models/dia/generation_dia.py +3 -10
  152. transformers/models/dia/modeling_dia.py +12 -1
  153. transformers/models/dia/modular_dia.py +11 -0
  154. transformers/models/dia/processing_dia.py +1 -1
  155. transformers/models/diffllama/modeling_diffllama.py +3 -3
  156. transformers/models/diffllama/modular_diffllama.py +2 -2
  157. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
  158. transformers/models/dinov3_vit/modeling_dinov3_vit.py +3 -0
  159. transformers/models/dinov3_vit/modular_dinov3_vit.py +3 -0
  160. transformers/models/distilbert/modeling_distilbert.py +11 -9
  161. transformers/models/doge/modeling_doge.py +1 -1
  162. transformers/models/donut/image_processing_donut_fast.py +0 -1
  163. transformers/models/donut/modeling_donut_swin.py +16 -12
  164. transformers/models/dots1/modeling_dots1.py +14 -5
  165. transformers/models/dpt/configuration_dpt.py +1 -1
  166. transformers/models/dpt/image_processing_dpt_fast.py +1 -2
  167. transformers/models/dpt/modular_dpt.py +1 -2
  168. transformers/models/edgetam/configuration_edgetam.py +1 -1
  169. transformers/models/edgetam/modeling_edgetam.py +5 -2
  170. transformers/models/edgetam/modular_edgetam.py +15 -14
  171. transformers/models/edgetam_video/modeling_edgetam_video.py +55 -43
  172. transformers/models/edgetam_video/modular_edgetam_video.py +13 -19
  173. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
  174. transformers/models/efficientloftr/modeling_efficientloftr.py +14 -1
  175. transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
  176. transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
  177. transformers/models/efficientnet/modeling_efficientnet.py +5 -1
  178. transformers/models/electra/modeling_electra.py +7 -0
  179. transformers/models/emu3/modeling_emu3.py +8 -2
  180. transformers/models/emu3/modular_emu3.py +7 -1
  181. transformers/models/encodec/modeling_encodec.py +14 -0
  182. transformers/models/eomt/image_processing_eomt_fast.py +46 -14
  183. transformers/models/eomt/modeling_eomt.py +7 -0
  184. transformers/models/eomt/modular_eomt.py +7 -0
  185. transformers/models/ernie/modeling_ernie.py +6 -0
  186. transformers/models/ernie/modular_ernie.py +6 -0
  187. transformers/models/ernie4_5/modeling_ernie4_5.py +1 -1
  188. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +16 -13
  189. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +9 -35
  190. transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
  191. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
  192. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
  193. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
  194. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
  195. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
  196. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
  197. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
  198. transformers/models/esm/modeling_esm.py +6 -0
  199. transformers/models/esm/modeling_esmfold.py +6 -1
  200. transformers/models/evolla/modeling_evolla.py +9 -1
  201. transformers/models/evolla/modular_evolla.py +8 -0
  202. transformers/models/exaone4/modeling_exaone4.py +1 -1
  203. transformers/models/falcon/modeling_falcon.py +3 -3
  204. transformers/models/falcon_h1/modeling_falcon_h1.py +28 -23
  205. transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
  206. transformers/models/falcon_mamba/modeling_falcon_mamba.py +6 -2
  207. transformers/models/falcon_mamba/modular_falcon_mamba.py +7 -2
  208. transformers/models/fast_vlm/modeling_fast_vlm.py +7 -3
  209. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +23 -10
  210. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
  211. transformers/models/flaubert/modeling_flaubert.py +14 -15
  212. transformers/models/flava/image_processing_flava_fast.py +0 -2
  213. transformers/models/flava/modeling_flava.py +4 -1
  214. transformers/models/flex_olmo/modeling_flex_olmo.py +7 -4
  215. transformers/models/florence2/modeling_florence2.py +20 -3
  216. transformers/models/florence2/modular_florence2.py +13 -0
  217. transformers/models/fnet/modeling_fnet.py +7 -0
  218. transformers/models/fuyu/image_processing_fuyu.py +1 -1
  219. transformers/models/fuyu/modeling_fuyu.py +3 -1
  220. transformers/models/fuyu/processing_fuyu.py +16 -0
  221. transformers/models/gemma/modeling_gemma.py +10 -12
  222. transformers/models/gemma/modular_gemma.py +9 -11
  223. transformers/models/gemma2/modeling_gemma2.py +1 -1
  224. transformers/models/gemma2/modular_gemma2.py +1 -1
  225. transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
  226. transformers/models/gemma3/modeling_gemma3.py +28 -7
  227. transformers/models/gemma3/modular_gemma3.py +26 -6
  228. transformers/models/gemma3n/configuration_gemma3n.py +3 -0
  229. transformers/models/gemma3n/modeling_gemma3n.py +47 -9
  230. transformers/models/gemma3n/modular_gemma3n.py +51 -9
  231. transformers/models/git/modeling_git.py +181 -126
  232. transformers/models/glm/modeling_glm.py +1 -1
  233. transformers/models/glm4/modeling_glm4.py +1 -1
  234. transformers/models/glm46v/image_processing_glm46v.py +0 -4
  235. transformers/models/glm46v/modeling_glm46v.py +3 -1
  236. transformers/models/glm46v/modular_glm46v.py +3 -0
  237. transformers/models/glm4_moe/modeling_glm4_moe.py +9 -5
  238. transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
  239. transformers/models/glm4v/image_processing_glm4v.py +0 -4
  240. transformers/models/glm4v/modeling_glm4v.py +15 -5
  241. transformers/models/glm4v/modular_glm4v.py +11 -3
  242. transformers/models/glm4v_moe/modeling_glm4v_moe.py +39 -23
  243. transformers/models/glm4v_moe/modular_glm4v_moe.py +12 -0
  244. transformers/models/glmasr/__init__.py +30 -0
  245. transformers/models/glmasr/configuration_glmasr.py +197 -0
  246. transformers/models/glmasr/modeling_glmasr.py +512 -0
  247. transformers/models/glmasr/modular_glmasr.py +433 -0
  248. transformers/models/glmasr/processing_glmasr.py +332 -0
  249. transformers/models/glpn/image_processing_glpn_fast.py +0 -1
  250. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
  251. transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
  252. transformers/models/gpt2/modeling_gpt2.py +8 -5
  253. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +3 -8
  254. transformers/models/gpt_neo/modeling_gpt_neo.py +15 -3
  255. transformers/models/gpt_neox/modeling_gpt_neox.py +1 -1
  256. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +1 -1
  257. transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
  258. transformers/models/gpt_oss/modeling_gpt_oss.py +6 -9
  259. transformers/models/gpt_oss/modular_gpt_oss.py +5 -7
  260. transformers/models/gptj/modeling_gptj.py +15 -6
  261. transformers/models/granite/modeling_granite.py +1 -1
  262. transformers/models/granite_speech/modeling_granite_speech.py +15 -1
  263. transformers/models/granitemoe/modeling_granitemoe.py +2 -3
  264. transformers/models/granitemoe/modular_granitemoe.py +1 -2
  265. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
  266. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +33 -23
  267. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
  268. transformers/models/granitemoeshared/modeling_granitemoeshared.py +2 -3
  269. transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
  270. transformers/models/grounding_dino/modeling_grounding_dino.py +4 -4
  271. transformers/models/groupvit/modeling_groupvit.py +6 -1
  272. transformers/models/helium/modeling_helium.py +1 -1
  273. transformers/models/hgnet_v2/modeling_hgnet_v2.py +10 -0
  274. transformers/models/hgnet_v2/modular_hgnet_v2.py +10 -0
  275. transformers/models/hubert/modeling_hubert.py +4 -0
  276. transformers/models/hubert/modular_hubert.py +4 -0
  277. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +1 -1
  278. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
  279. transformers/models/hunyuan_v1_moe/__init__.py +1 -1
  280. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +12 -4
  281. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
  282. transformers/models/ibert/modeling_ibert.py +16 -0
  283. transformers/models/idefics/modeling_idefics.py +10 -0
  284. transformers/models/idefics2/modeling_idefics2.py +7 -1
  285. transformers/models/idefics3/modeling_idefics3.py +5 -1
  286. transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
  287. transformers/models/imagegpt/modeling_imagegpt.py +9 -2
  288. transformers/models/instructblip/modeling_instructblip.py +2 -0
  289. transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
  290. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
  291. transformers/models/internvl/modeling_internvl.py +11 -8
  292. transformers/models/internvl/modular_internvl.py +5 -9
  293. transformers/models/internvl/video_processing_internvl.py +0 -1
  294. transformers/models/jais2/__init__.py +27 -0
  295. transformers/models/jais2/configuration_jais2.py +152 -0
  296. transformers/models/jais2/modeling_jais2.py +486 -0
  297. transformers/models/jais2/modular_jais2.py +196 -0
  298. transformers/models/jamba/modeling_jamba.py +24 -19
  299. transformers/models/jamba/modular_jamba.py +17 -17
  300. transformers/models/janus/image_processing_janus_fast.py +0 -1
  301. transformers/models/janus/modeling_janus.py +15 -7
  302. transformers/models/janus/modular_janus.py +16 -7
  303. transformers/models/jetmoe/modeling_jetmoe.py +2 -2
  304. transformers/models/jetmoe/modular_jetmoe.py +1 -0
  305. transformers/models/kosmos2/modeling_kosmos2.py +14 -2
  306. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
  307. transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
  308. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +9 -3
  309. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
  310. transformers/models/lasr/configuration_lasr.py +4 -0
  311. transformers/models/lasr/modeling_lasr.py +3 -2
  312. transformers/models/lasr/modular_lasr.py +8 -1
  313. transformers/models/lasr/processing_lasr.py +0 -2
  314. transformers/models/layoutlm/modeling_layoutlm.py +5 -3
  315. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
  316. transformers/models/layoutlmv2/modeling_layoutlmv2.py +12 -0
  317. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +1 -0
  318. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
  319. transformers/models/layoutlmv3/modeling_layoutlmv3.py +29 -5
  320. transformers/models/led/modeling_led.py +6 -0
  321. transformers/models/levit/modeling_levit.py +18 -0
  322. transformers/models/lfm2/modeling_lfm2.py +1 -1
  323. transformers/models/lfm2_moe/modeling_lfm2_moe.py +14 -4
  324. transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
  325. transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
  326. transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
  327. transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
  328. transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
  329. transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
  330. transformers/models/lilt/modeling_lilt.py +19 -15
  331. transformers/models/llama/modeling_llama.py +1 -1
  332. transformers/models/llama4/image_processing_llama4_fast.py +1 -2
  333. transformers/models/llama4/modeling_llama4.py +8 -4
  334. transformers/models/llava/image_processing_llava_fast.py +0 -1
  335. transformers/models/llava/modeling_llava.py +12 -7
  336. transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
  337. transformers/models/llava_next/modeling_llava_next.py +7 -3
  338. transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
  339. transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
  340. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
  341. transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
  342. transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
  343. transformers/models/longcat_flash/modeling_longcat_flash.py +2 -1
  344. transformers/models/longcat_flash/modular_longcat_flash.py +1 -0
  345. transformers/models/longt5/modeling_longt5.py +0 -4
  346. transformers/models/m2m_100/modeling_m2m_100.py +10 -0
  347. transformers/models/mamba/modeling_mamba.py +2 -1
  348. transformers/models/mamba2/modeling_mamba2.py +24 -23
  349. transformers/models/marian/configuration_marian.py +1 -1
  350. transformers/models/marian/modeling_marian.py +3 -0
  351. transformers/models/markuplm/modeling_markuplm.py +5 -8
  352. transformers/models/mask2former/configuration_mask2former.py +3 -3
  353. transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
  354. transformers/models/mask2former/modeling_mask2former.py +9 -0
  355. transformers/models/maskformer/configuration_maskformer.py +3 -3
  356. transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
  357. transformers/models/maskformer/modeling_maskformer.py +9 -1
  358. transformers/models/maskformer/modeling_maskformer_swin.py +19 -15
  359. transformers/models/mbart/configuration_mbart.py +1 -0
  360. transformers/models/mbart/modeling_mbart.py +7 -0
  361. transformers/models/megatron_bert/modeling_megatron_bert.py +2 -0
  362. transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
  363. transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
  364. transformers/models/mimi/modeling_mimi.py +25 -4
  365. transformers/models/minimax/modeling_minimax.py +16 -3
  366. transformers/models/minimax/modular_minimax.py +12 -1
  367. transformers/models/ministral/modeling_ministral.py +1 -1
  368. transformers/models/ministral3/modeling_ministral3.py +1 -1
  369. transformers/models/mistral/modeling_mistral.py +1 -1
  370. transformers/models/mistral3/modeling_mistral3.py +10 -4
  371. transformers/models/mistral3/modular_mistral3.py +3 -1
  372. transformers/models/mixtral/modeling_mixtral.py +12 -4
  373. transformers/models/mixtral/modular_mixtral.py +6 -2
  374. transformers/models/mlcd/modeling_mlcd.py +6 -0
  375. transformers/models/mlcd/modular_mlcd.py +4 -0
  376. transformers/models/mllama/modeling_mllama.py +13 -2
  377. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
  378. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -4
  379. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
  380. transformers/models/mobilebert/modeling_mobilebert.py +2 -0
  381. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
  382. transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
  383. transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
  384. transformers/models/mobilevit/modeling_mobilevit.py +4 -0
  385. transformers/models/mobilevitv2/modeling_mobilevitv2.py +4 -0
  386. transformers/models/modernbert/modeling_modernbert.py +12 -1
  387. transformers/models/modernbert/modular_modernbert.py +12 -1
  388. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +9 -1
  389. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +9 -1
  390. transformers/models/moonshine/modeling_moonshine.py +1 -1
  391. transformers/models/moshi/modeling_moshi.py +21 -51
  392. transformers/models/mpnet/modeling_mpnet.py +2 -0
  393. transformers/models/mra/modeling_mra.py +4 -1
  394. transformers/models/mt5/configuration_mt5.py +2 -3
  395. transformers/models/mt5/modeling_mt5.py +0 -10
  396. transformers/models/musicgen/modeling_musicgen.py +5 -9
  397. transformers/models/musicgen_melody/modeling_musicgen_melody.py +4 -0
  398. transformers/models/mvp/modeling_mvp.py +7 -0
  399. transformers/models/nanochat/modeling_nanochat.py +1 -1
  400. transformers/models/nemotron/modeling_nemotron.py +3 -3
  401. transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
  402. transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
  403. transformers/models/nougat/image_processing_nougat_fast.py +0 -1
  404. transformers/models/nougat/tokenization_nougat.py +11 -16
  405. transformers/models/nystromformer/modeling_nystromformer.py +7 -0
  406. transformers/models/olmo/modeling_olmo.py +1 -1
  407. transformers/models/olmo2/modeling_olmo2.py +1 -1
  408. transformers/models/olmo3/modeling_olmo3.py +1 -1
  409. transformers/models/olmoe/modeling_olmoe.py +12 -4
  410. transformers/models/olmoe/modular_olmoe.py +4 -2
  411. transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
  412. transformers/models/omdet_turbo/modeling_omdet_turbo.py +4 -0
  413. transformers/models/oneformer/configuration_oneformer.py +3 -3
  414. transformers/models/oneformer/modeling_oneformer.py +7 -38
  415. transformers/models/openai/modeling_openai.py +12 -0
  416. transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
  417. transformers/models/ovis2/modeling_ovis2.py +15 -3
  418. transformers/models/ovis2/modular_ovis2.py +8 -0
  419. transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
  420. transformers/models/owlv2/modeling_owlv2.py +7 -3
  421. transformers/models/owlv2/modular_owlv2.py +0 -2
  422. transformers/models/owlvit/modeling_owlvit.py +7 -3
  423. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +3 -2
  424. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +28 -14
  425. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +22 -12
  426. transformers/models/paligemma/modeling_paligemma.py +25 -17
  427. transformers/models/parakeet/modeling_parakeet.py +5 -0
  428. transformers/models/parakeet/modular_parakeet.py +5 -0
  429. transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
  430. transformers/models/patchtsmixer/modeling_patchtsmixer.py +4 -0
  431. transformers/models/patchtst/modeling_patchtst.py +5 -4
  432. transformers/models/pe_audio/__init__.py +30 -0
  433. transformers/models/pe_audio/configuration_pe_audio.py +206 -0
  434. transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
  435. transformers/models/pe_audio/modeling_pe_audio.py +820 -0
  436. transformers/models/pe_audio/modular_pe_audio.py +299 -0
  437. transformers/models/pe_audio/processing_pe_audio.py +24 -0
  438. transformers/models/pe_audio_video/__init__.py +29 -0
  439. transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
  440. transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
  441. transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
  442. transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
  443. transformers/models/pe_video/__init__.py +30 -0
  444. transformers/models/pe_video/configuration_pe_video.py +211 -0
  445. transformers/models/pe_video/modeling_pe_video.py +636 -0
  446. transformers/models/pe_video/modular_pe_video.py +219 -0
  447. transformers/models/pe_video/processing_pe_video.py +10 -0
  448. transformers/models/pe_video/video_processing_pe_video.py +66 -0
  449. transformers/models/pegasus/configuration_pegasus.py +1 -0
  450. transformers/models/pegasus/modeling_pegasus.py +3 -0
  451. transformers/models/pegasus_x/modeling_pegasus_x.py +1 -0
  452. transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
  453. transformers/models/perceiver/modeling_perceiver.py +5 -1
  454. transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
  455. transformers/models/perception_lm/modeling_perception_lm.py +7 -3
  456. transformers/models/perception_lm/modular_perception_lm.py +7 -3
  457. transformers/models/persimmon/modeling_persimmon.py +1 -1
  458. transformers/models/phi/modeling_phi.py +1 -1
  459. transformers/models/phi3/modeling_phi3.py +1 -1
  460. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +4 -1
  461. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +3 -0
  462. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
  463. transformers/models/phimoe/modeling_phimoe.py +12 -4
  464. transformers/models/phimoe/modular_phimoe.py +1 -1
  465. transformers/models/pix2struct/processing_pix2struct.py +0 -4
  466. transformers/models/pixio/__init__.py +30 -0
  467. transformers/models/pixio/configuration_pixio.py +151 -0
  468. transformers/models/pixio/modeling_pixio.py +507 -0
  469. transformers/models/pixio/modular_pixio.py +404 -0
  470. transformers/models/pixtral/modeling_pixtral.py +1 -1
  471. transformers/models/pixtral/processing_pixtral.py +3 -1
  472. transformers/models/plbart/configuration_plbart.py +1 -0
  473. transformers/models/plbart/modeling_plbart.py +7 -0
  474. transformers/models/plbart/modular_plbart.py +6 -0
  475. transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
  476. transformers/models/poolformer/modeling_poolformer.py +11 -1
  477. transformers/models/pop2piano/configuration_pop2piano.py +0 -1
  478. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
  479. transformers/models/prophetnet/modeling_prophetnet.py +2 -1
  480. transformers/models/qwen2/modeling_qwen2.py +1 -1
  481. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +104 -64
  482. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +58 -18
  483. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +18 -5
  484. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +26 -22
  485. transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -2
  486. transformers/models/qwen2_moe/modeling_qwen2_moe.py +12 -4
  487. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
  488. transformers/models/qwen2_vl/modeling_qwen2_vl.py +17 -4
  489. transformers/models/qwen3/modeling_qwen3.py +1 -1
  490. transformers/models/qwen3_moe/modeling_qwen3_moe.py +12 -4
  491. transformers/models/qwen3_next/modeling_qwen3_next.py +4 -6
  492. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
  493. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +92 -46
  494. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +48 -4
  495. transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
  496. transformers/models/qwen3_vl/modeling_qwen3_vl.py +17 -4
  497. transformers/models/qwen3_vl/modular_qwen3_vl.py +21 -10
  498. transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
  499. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +94 -112
  500. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +32 -81
  501. transformers/models/rag/configuration_rag.py +0 -8
  502. transformers/models/rag/modeling_rag.py +7 -9
  503. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +3 -2
  504. transformers/models/reformer/modeling_reformer.py +9 -1
  505. transformers/models/regnet/modeling_regnet.py +4 -0
  506. transformers/models/rembert/modeling_rembert.py +7 -1
  507. transformers/models/resnet/modeling_resnet.py +8 -3
  508. transformers/models/roberta/modeling_roberta.py +3 -0
  509. transformers/models/roberta/modular_roberta.py +3 -0
  510. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
  511. transformers/models/roc_bert/modeling_roc_bert.py +3 -0
  512. transformers/models/rt_detr/configuration_rt_detr.py +1 -1
  513. transformers/models/rt_detr/modeling_rt_detr.py +4 -0
  514. transformers/models/rt_detr/modeling_rt_detr_resnet.py +8 -3
  515. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
  516. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +7 -0
  517. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
  518. transformers/models/rwkv/modeling_rwkv.py +1 -1
  519. transformers/models/sam/configuration_sam.py +1 -0
  520. transformers/models/sam/image_processing_sam_fast.py +0 -1
  521. transformers/models/sam/modeling_sam.py +4 -1
  522. transformers/models/sam2/configuration_sam2.py +1 -1
  523. transformers/models/sam2/modeling_sam2.py +5 -1
  524. transformers/models/sam2/modular_sam2.py +5 -1
  525. transformers/models/sam2_video/modeling_sam2_video.py +51 -43
  526. transformers/models/sam2_video/modular_sam2_video.py +31 -18
  527. transformers/models/sam3/configuration_sam3.py +21 -1
  528. transformers/models/sam3/modeling_sam3.py +23 -0
  529. transformers/models/sam3_tracker/modeling_sam3_tracker.py +2 -0
  530. transformers/models/sam3_tracker/modular_sam3_tracker.py +2 -0
  531. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
  532. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +26 -15
  533. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
  534. transformers/models/sam3_video/configuration_sam3_video.py +14 -0
  535. transformers/models/sam3_video/modeling_sam3_video.py +3 -3
  536. transformers/models/sam3_video/processing_sam3_video.py +1 -1
  537. transformers/models/sam_hq/configuration_sam_hq.py +1 -0
  538. transformers/models/sam_hq/modeling_sam_hq.py +26 -23
  539. transformers/models/seamless_m4t/modeling_seamless_m4t.py +27 -11
  540. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +6 -0
  541. transformers/models/seed_oss/modeling_seed_oss.py +1 -1
  542. transformers/models/segformer/image_processing_segformer_fast.py +0 -1
  543. transformers/models/segformer/modeling_segformer.py +2 -2
  544. transformers/models/segformer/modular_segformer.py +0 -1
  545. transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
  546. transformers/models/siglip/modeling_siglip.py +24 -2
  547. transformers/models/siglip2/modeling_siglip2.py +63 -41
  548. transformers/models/smollm3/modeling_smollm3.py +1 -1
  549. transformers/models/smolvlm/modeling_smolvlm.py +5 -1
  550. transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
  551. transformers/models/speech_to_text/modeling_speech_to_text.py +10 -0
  552. transformers/models/speecht5/modeling_speecht5.py +28 -0
  553. transformers/models/splinter/modeling_splinter.py +9 -3
  554. transformers/models/squeezebert/modeling_squeezebert.py +2 -0
  555. transformers/models/stablelm/modeling_stablelm.py +1 -1
  556. transformers/models/starcoder2/modeling_starcoder2.py +1 -1
  557. transformers/models/superglue/image_processing_superglue_fast.py +1 -2
  558. transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
  559. transformers/models/swiftformer/modeling_swiftformer.py +4 -0
  560. transformers/models/swin/modeling_swin.py +16 -12
  561. transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
  562. transformers/models/swin2sr/modeling_swin2sr.py +49 -33
  563. transformers/models/swinv2/modeling_swinv2.py +41 -33
  564. transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
  565. transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
  566. transformers/models/t5/configuration_t5.py +7 -1
  567. transformers/models/t5/modeling_t5.py +1 -7
  568. transformers/models/t5gemma/modeling_t5gemma.py +1 -1
  569. transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
  570. transformers/models/t5gemma2/modeling_t5gemma2.py +13 -4
  571. transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
  572. transformers/models/table_transformer/configuration_table_transformer.py +1 -1
  573. transformers/models/table_transformer/modeling_table_transformer.py +1 -1
  574. transformers/models/textnet/image_processing_textnet_fast.py +0 -1
  575. transformers/models/timesfm/modeling_timesfm.py +12 -0
  576. transformers/models/timesfm/modular_timesfm.py +12 -0
  577. transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
  578. transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
  579. transformers/models/timm_wrapper/modeling_timm_wrapper.py +19 -13
  580. transformers/models/trocr/modeling_trocr.py +1 -2
  581. transformers/models/tvp/configuration_tvp.py +5 -1
  582. transformers/models/tvp/modeling_tvp.py +4 -4
  583. transformers/models/udop/configuration_udop.py +1 -0
  584. transformers/models/udop/modeling_udop.py +3 -7
  585. transformers/models/umt5/configuration_umt5.py +2 -2
  586. transformers/models/umt5/modeling_umt5.py +0 -6
  587. transformers/models/vaultgemma/modeling_vaultgemma.py +1 -1
  588. transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
  589. transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
  590. transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
  591. transformers/models/video_llava/modeling_video_llava.py +7 -3
  592. transformers/models/vilt/configuration_vilt.py +2 -2
  593. transformers/models/vilt/modeling_vilt.py +7 -0
  594. transformers/models/vipllava/modeling_vipllava.py +7 -3
  595. transformers/models/visual_bert/modeling_visual_bert.py +2 -0
  596. transformers/models/vitmatte/configuration_vitmatte.py +1 -1
  597. transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
  598. transformers/models/vitmatte/modeling_vitmatte.py +4 -0
  599. transformers/models/vitpose/configuration_vitpose.py +1 -1
  600. transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
  601. transformers/models/voxtral/modeling_voxtral.py +2 -2
  602. transformers/models/voxtral/modular_voxtral.py +2 -2
  603. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +16 -10
  604. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +7 -0
  605. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +21 -11
  606. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
  607. transformers/models/whisper/generation_whisper.py +1 -0
  608. transformers/models/whisper/modeling_whisper.py +5 -3
  609. transformers/models/x_clip/modeling_x_clip.py +2 -0
  610. transformers/models/xcodec/modeling_xcodec.py +5 -0
  611. transformers/models/xglm/modeling_xglm.py +10 -0
  612. transformers/models/xlm/modeling_xlm.py +13 -14
  613. transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
  614. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
  615. transformers/models/xlnet/modeling_xlnet.py +3 -1
  616. transformers/models/xmod/modeling_xmod.py +3 -0
  617. transformers/models/yoso/modeling_yoso.py +4 -1
  618. transformers/models/zamba/modeling_zamba.py +2 -1
  619. transformers/models/zamba2/modeling_zamba2.py +3 -2
  620. transformers/models/zoedepth/configuration_zoedepth.py +1 -1
  621. transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
  622. transformers/models/zoedepth/modeling_zoedepth.py +7 -0
  623. transformers/pipelines/__init__.py +9 -6
  624. transformers/pipelines/automatic_speech_recognition.py +20 -12
  625. transformers/pipelines/base.py +1 -1
  626. transformers/pipelines/document_question_answering.py +1 -1
  627. transformers/pipelines/question_answering.py +1 -1
  628. transformers/pipelines/text_to_audio.py +2 -2
  629. transformers/processing_utils.py +127 -56
  630. transformers/quantizers/auto.py +2 -4
  631. transformers/quantizers/base.py +9 -64
  632. transformers/quantizers/quantizer_aqlm.py +1 -18
  633. transformers/quantizers/quantizer_auto_round.py +1 -10
  634. transformers/quantizers/quantizer_awq.py +3 -8
  635. transformers/quantizers/quantizer_bitnet.py +1 -6
  636. transformers/quantizers/quantizer_bnb_4bit.py +9 -49
  637. transformers/quantizers/quantizer_bnb_8bit.py +9 -19
  638. transformers/quantizers/quantizer_compressed_tensors.py +1 -4
  639. transformers/quantizers/quantizer_eetq.py +2 -12
  640. transformers/quantizers/quantizer_fbgemm_fp8.py +5 -14
  641. transformers/quantizers/quantizer_finegrained_fp8.py +15 -10
  642. transformers/quantizers/quantizer_fp_quant.py +4 -4
  643. transformers/quantizers/quantizer_gptq.py +1 -4
  644. transformers/quantizers/quantizer_higgs.py +2 -6
  645. transformers/quantizers/quantizer_mxfp4.py +2 -28
  646. transformers/quantizers/quantizer_quanto.py +14 -14
  647. transformers/quantizers/quantizer_spqr.py +3 -8
  648. transformers/quantizers/quantizer_torchao.py +28 -124
  649. transformers/quantizers/quantizer_vptq.py +1 -10
  650. transformers/testing_utils.py +28 -12
  651. transformers/tokenization_mistral_common.py +3 -2
  652. transformers/tokenization_utils_base.py +3 -2
  653. transformers/tokenization_utils_tokenizers.py +25 -2
  654. transformers/trainer.py +24 -2
  655. transformers/trainer_callback.py +8 -0
  656. transformers/trainer_seq2seq.py +4 -0
  657. transformers/training_args.py +8 -10
  658. transformers/utils/__init__.py +4 -0
  659. transformers/utils/attention_visualizer.py +4 -4
  660. transformers/utils/auto_docstring.py +34 -25
  661. transformers/utils/generic.py +20 -0
  662. transformers/utils/import_utils.py +51 -9
  663. transformers/utils/kernel_config.py +71 -18
  664. transformers/utils/quantization_config.py +8 -8
  665. transformers/video_processing_utils.py +16 -12
  666. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +5 -6
  667. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +671 -632
  668. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +0 -0
  669. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
  670. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/licenses/LICENSE +0 -0
  671. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -20,12 +20,11 @@ import os
20
20
  from abc import ABC, abstractmethod
21
21
  from collections.abc import Callable
22
22
  from dataclasses import dataclass, is_dataclass
23
- from typing import TYPE_CHECKING, Any, Optional
23
+ from typing import TYPE_CHECKING, Any, Optional, Union
24
24
 
25
25
  from huggingface_hub import create_repo
26
26
 
27
27
  from .. import __version__
28
- from ..configuration_utils import PreTrainedConfig
29
28
  from ..utils import (
30
29
  GENERATION_CONFIG_NAME,
31
30
  ExplicitEnum,
@@ -38,6 +37,7 @@ from ..utils import (
38
37
 
39
38
 
40
39
  if TYPE_CHECKING:
40
+ from ..configuration_utils import PreTrainedConfig
41
41
  from ..modeling_utils import PreTrainedModel
42
42
 
43
43
 
@@ -104,18 +104,18 @@ class GenerationConfig(PushToHubMixin):
104
104
  Arg:
105
105
  > Parameters that control the length of the output
106
106
 
107
- max_length (`int`, *optional*, defaults to 20):
107
+ max_length (`int`, *optional*):
108
108
  `max_new_tokens` is recommended for controlling how many tokens the model generates.
109
109
  `max_length` remains for backward compatibility.
110
110
 
111
111
  max_new_tokens (`int`, *optional*):
112
112
  The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
113
- min_length (`int`, *optional*, defaults to 0):
113
+ min_length (`int`, *optional*):
114
114
  The minimum length of the sequence to be generated. Corresponds to the length of the input prompt +
115
115
  `min_new_tokens`. Its effect is overridden by `min_new_tokens`, if also set.
116
116
  min_new_tokens (`int`, *optional*):
117
117
  The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.
118
- early_stopping (`bool` or `str`, *optional*, defaults to `False`):
118
+ early_stopping (`bool` or `str`, *optional*):
119
119
  Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values:
120
120
  `True`, where the generation stops as soon as there are `num_beams` complete candidates; `False`, where an
121
121
  heuristic is applied and the generation stops when is it very unlikely to find better candidates;
@@ -129,17 +129,17 @@ class GenerationConfig(PushToHubMixin):
129
129
 
130
130
  > Parameters that control the generation strategy used
131
131
 
132
- do_sample (`bool`, *optional*, defaults to `False`):
132
+ do_sample (`bool`, defaults to `False`):
133
133
  Whether or not to use sampling ; use greedy decoding otherwise.
134
- num_beams (`int`, *optional*, defaults to 1):
134
+ num_beams (`int`, *optional*):
135
135
  Number of beams for beam search. 1 means no beam search.
136
136
 
137
137
  > Parameters that control the cache
138
138
 
139
- use_cache (`bool`, *optional*, defaults to `True`):
139
+ use_cache (`bool`, defaults to `True`):
140
140
  Whether or not the model should use the past last key/values attentions (if applicable to the model) to
141
141
  speed up decoding.
142
- cache_implementation (`str`, *optional*, default to `None`):
142
+ cache_implementation (`str`, *optional*):
143
143
  Name of the cache class that will be instantiated in `generate`, for faster decoding. Possible values are:
144
144
 
145
145
  - `"dynamic"`: [`DynamicCache`]
@@ -155,11 +155,11 @@ class GenerationConfig(PushToHubMixin):
155
155
 
156
156
  > Parameters for manipulation of the model output logits
157
157
 
158
- temperature (`float`, *optional*, defaults to 1.0):
158
+ temperature (`float`, *optional*):
159
159
  The value used to module the next token probabilities. This value is set in a model's `generation_config.json` file. If it isn't set, the default value is 1.0
160
- top_k (`int`, *optional*, defaults to 50):
160
+ top_k (`int`, *optional*):
161
161
  The number of highest probability vocabulary tokens to keep for top-k-filtering. This value is set in a model's `generation_config.json` file. If it isn't set, the default value is 50.
162
- top_p (`float`, *optional*, defaults to 1.0):
162
+ top_p (`float`, *optional*):
163
163
  If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to
164
164
  `top_p` or higher are kept for generation. This value is set in a model's `generation_config.json` file. If it isn't set, the default value is 1.0
165
165
  min_p (`float`, *optional*):
@@ -172,41 +172,41 @@ class GenerationConfig(PushToHubMixin):
172
172
  is kept whose *renormalized* entropy is less than or equal to `top_h` times the entropy of the full distribution.
173
173
  Smaller values (e.g., 0.2–0.5) lead to more focused, deterministic outputs, while values closer to 1.0 allow more
174
174
  randomness and diversity. Typical values are in the 0.3–0.6 range.
175
- typical_p (`float`, *optional*, defaults to 1.0):
175
+ typical_p (`float`, *optional*):
176
176
  Local typicality measures how similar the conditional probability of predicting a target token next is to
177
177
  the expected conditional probability of predicting a random token next, given the partial text already
178
178
  generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that
179
179
  add up to `typical_p` or higher are kept for generation. See [this
180
180
  paper](https://huggingface.co/papers/2202.00666) for more details.
181
- epsilon_cutoff (`float`, *optional*, defaults to 0.0):
181
+ epsilon_cutoff (`float`, *optional*):
182
182
  If set to float strictly between 0 and 1, only tokens with a conditional probability greater than
183
183
  `epsilon_cutoff` will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the
184
184
  size of the model. See [Truncation Sampling as Language Model
185
185
  Desmoothing](https://huggingface.co/papers/2210.15191) for more details.
186
- eta_cutoff (`float`, *optional*, defaults to 0.0):
186
+ eta_cutoff (`float`, *optional*):
187
187
  Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between
188
188
  0 and 1, a token is only considered if it is greater than either `eta_cutoff` or `sqrt(eta_cutoff) *
189
189
  exp(-entropy(softmax(next_token_logits)))`. The latter term is intuitively the expected next token
190
190
  probability, scaled by `sqrt(eta_cutoff)`. In the paper, suggested values range from 3e-4 to 2e-3,
191
191
  depending on the size of the model. See [Truncation Sampling as Language Model
192
192
  Desmoothing](https://huggingface.co/papers/2210.15191) for more details.
193
- repetition_penalty (`float`, *optional*, defaults to 1.0):
193
+ repetition_penalty (`float`, *optional*):
194
194
  The parameter for repetition penalty. 1.0 means no penalty. See [this
195
195
  paper](https://huggingface.co/papers/1909.05858) for more details.
196
- encoder_repetition_penalty (`float`, *optional*, defaults to 1.0):
196
+ encoder_repetition_penalty (`float`, *optional*):
197
197
  The parameter for encoder_repetition_penalty. An exponential penalty on sequences that are not in the
198
198
  original input. 1.0 means no penalty.
199
- length_penalty (`float`, *optional*, defaults to 1.0):
199
+ length_penalty (`float`, *optional*):
200
200
  Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to
201
201
  the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log
202
202
  likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while
203
203
  `length_penalty` < 0.0 encourages shorter sequences.
204
- no_repeat_ngram_size (`int`, *optional*, defaults to 0):
204
+ no_repeat_ngram_size (`int`, *optional*):
205
205
  If set to int > 0, all ngrams of that size can only occur once.
206
206
  bad_words_ids (`list[list[int]]`, *optional*):
207
207
  List of list of token ids that are not allowed to be generated. Check
208
208
  [`~generation.NoBadWordsLogitsProcessor`] for further documentation and examples.
209
- renormalize_logits (`bool`, *optional*, defaults to `False`):
209
+ renormalize_logits (`bool`, defaults to `False`):
210
210
  Whether to renormalize the logits after applying all the logits processors (including the custom
211
211
  ones). It's highly recommended to set this flag to `True` as the search algorithms suppose the score logits
212
212
  are normalized but some logit processors break the normalization.
@@ -217,7 +217,7 @@ class GenerationConfig(PushToHubMixin):
217
217
  forced_eos_token_id (`int` or list[int]`, *optional*, defaults to `model.config.forced_eos_token_id`):
218
218
  The id of the token to force as the last generated token when `max_length` is reached. Optionally, use a
219
219
  list to set multiple *end-of-sequence* tokens.
220
- remove_invalid_values (`bool`, *optional*, defaults to `model.config.remove_invalid_values`):
220
+ remove_invalid_values (`bool`, defaults to `model.config.remove_invalid_values`):
221
221
  Whether to remove possible *nan* and *inf* outputs of the model to prevent the generation method to crash.
222
222
  Note that using `remove_invalid_values` can slow down generation.
223
223
  exponential_decay_length_penalty (`tuple(int, float)`, *optional*):
@@ -234,7 +234,7 @@ class GenerationConfig(PushToHubMixin):
234
234
  Dictionary that maps a sequence of tokens to its bias term. Positive biases increase the odds of the
235
235
  sequence being selected, while negative biases do the opposite. Check
236
236
  [`~generation.SequenceBiasLogitsProcessor`] for further documentation and examples.
237
- token_healing (`bool`, *optional*, defaults to `False`):
237
+ token_healing (`bool`, defaults to `False`):
238
238
  Heal tail tokens of prompts by replacing them with their appropriate extensions.
239
239
  This enhances the quality of completions for prompts affected by greedy tokenization bias.
240
240
  guidance_scale (`float`, *optional*):
@@ -250,18 +250,18 @@ class GenerationConfig(PushToHubMixin):
250
250
 
251
251
  num_return_sequences (`int`, *optional*, defaults to 1):
252
252
  The number of independently computed returned sequences for each element in the batch.
253
- output_attentions (`bool`, *optional*, defaults to `False`):
253
+ output_attentions (`bool`, defaults to `False`):
254
254
  Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
255
255
  tensors for more details.
256
- output_hidden_states (`bool`, *optional*, defaults to `False`):
256
+ output_hidden_states (`bool`, defaults to `False`):
257
257
  Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
258
258
  more details.
259
- output_scores (`bool`, *optional*, defaults to `False`):
259
+ output_scores (`bool`, defaults to `False`):
260
260
  Whether or not to return the prediction scores. See `scores` under returned tensors for more details.
261
- output_logits (`bool`, *optional*):
261
+ output_logits (`bool`, defaults to `False`):
262
262
  Whether or not to return the unprocessed prediction logit scores. See `logits` under returned tensors for
263
263
  more details.
264
- return_dict_in_generate (`bool`, *optional*, defaults to `False`):
264
+ return_dict_in_generate (`bool`, defaults to `False`):
265
265
  Whether or not to return a [`~utils.ModelOutput`], as opposed to returning exclusively the generated
266
266
  sequence. This flag must be set to `True` to return the generation cache (when `use_cache` is `True`)
267
267
  or optional outputs (see flags starting with `output_`)
@@ -277,7 +277,7 @@ class GenerationConfig(PushToHubMixin):
277
277
 
278
278
  > Generation parameters exclusive to encoder-decoder models
279
279
 
280
- encoder_no_repeat_ngram_size (`int`, *optional*, defaults to 0):
280
+ encoder_no_repeat_ngram_size (`int`, *optional*):
281
281
  If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the
282
282
  `decoder_input_ids`.
283
283
  decoder_start_token_id (`int` or `list[int]`, *optional*):
@@ -286,20 +286,20 @@ class GenerationConfig(PushToHubMixin):
286
286
  (e.g. multilingual models with different target languages in one batch)
287
287
 
288
288
  > Generation parameters exclusive to assistant generation
289
- is_assistant (`bool`, *optional*, defaults to `False`):
289
+ is_assistant (`bool`, defaults to `False`):
290
290
  Whether the model is an assistant (draft) model.
291
- num_assistant_tokens (`int`, *optional*, defaults to 20):
291
+ num_assistant_tokens (`int`, *optional*):
292
292
  Defines the number of _speculative tokens_ that shall be generated by the assistant model before being
293
293
  checked by the target model at each iteration. Higher values for `num_assistant_tokens` make the generation
294
294
  more _speculative_ : If the assistant model is performant larger speed-ups can be reached, if the assistant
295
295
  model requires lots of corrections, lower speed-ups are reached.
296
- num_assistant_tokens_schedule (`str`, *optional*, defaults to `"constant"`):
296
+ num_assistant_tokens_schedule (`str`, *optional*):
297
297
  Defines the schedule at which max assistant tokens shall be changed during inference.
298
298
  - `"heuristic"`: When all speculative tokens are correct, increase `num_assistant_tokens` by 2 else
299
299
  reduce by 1. `num_assistant_tokens` value is persistent over multiple generation calls with the same assistant model.
300
300
  - `"heuristic_transient"`: Same as `"heuristic"` but `num_assistant_tokens` is reset to its initial value after each generation call.
301
301
  - `"constant"`: `num_assistant_tokens` stays unchanged during generation
302
- assistant_confidence_threshold (`float`, *optional*, defaults to 0.4):
302
+ assistant_confidence_threshold (`float`, *optional*):
303
303
  The confidence threshold for the assistant model. If the assistant model's confidence in its prediction for the current token is lower
304
304
  than this threshold, the assistant model stops the current token generation iteration, even if the number of _speculative tokens_
305
305
  (defined by `num_assistant_tokens`) is not yet reached. The assistant's confidence threshold is adjusted throughout the speculative iterations to reduce the number of unnecessary draft and target forward passes, biased towards avoiding false negatives.
@@ -313,11 +313,11 @@ class GenerationConfig(PushToHubMixin):
313
313
  assistant_early_exit(`int`, *optional*):
314
314
  If set to a positive integer, early exit of the model will be used as an assistant. Can only be used with
315
315
  models that support early exit (i.e. models where logits from intermediate layers can be interpreted by the LM head).
316
- assistant_lookbehind(`int`, *optional*, defaults to 10):
316
+ assistant_lookbehind(`int`, *optional*):
317
317
  If set to a positive integer, the re-encodeing process will additionally consider the last `assistant_lookbehind` assistant tokens
318
318
  to correctly align tokens. Can only be used with different tokenizers in speculative decoding.
319
319
  See this [blog](https://huggingface.co/blog/universal_assisted_generation) for more details.
320
- target_lookbehind(`int`, *optional*, defaults to 10):
320
+ target_lookbehind(`int`, *optional*):
321
321
  If set to a positive integer, the re-encodeing process will additionally consider the last `target_lookbehind` target tokens
322
322
  to correctly align tokens. Can only be used with different tokenizers in speculative decoding.
323
323
  See this [blog](https://huggingface.co/blog/universal_assisted_generation) for more details.
@@ -327,7 +327,7 @@ class GenerationConfig(PushToHubMixin):
327
327
  compile_config (CompileConfig, *optional*):
328
328
  If using a compilable cache, this controls how `generate` will `compile` the forward pass for faster
329
329
  inference.
330
- disable_compile (`bool`, *optional*):
330
+ disable_compile (`bool`, defaults to `False`):
331
331
  Whether to disable the automatic compilation of the forward pass. Automatic compilation happens when
332
332
  specific criteria are met, including using a compilable cache. Please open an issue if you find the
333
333
  need to use this flag.
@@ -337,38 +337,36 @@ class GenerationConfig(PushToHubMixin):
337
337
 
338
338
  def __init__(self, **kwargs):
339
339
  # Parameters that control the length of the output
340
- self.max_length = kwargs.pop("max_length", 20)
340
+ self.max_length = kwargs.pop("max_length", None)
341
341
  self.max_new_tokens = kwargs.pop("max_new_tokens", None)
342
- self.min_length = kwargs.pop("min_length", 0)
342
+ self.min_length = kwargs.pop("min_length", None)
343
343
  self.min_new_tokens = kwargs.pop("min_new_tokens", None)
344
- self.early_stopping = kwargs.pop("early_stopping", False)
344
+ self.early_stopping = kwargs.pop("early_stopping", None)
345
345
  self.max_time = kwargs.pop("max_time", None)
346
346
  self.stop_strings = kwargs.pop("stop_strings", None)
347
347
 
348
348
  # Parameters that control the generation strategy used
349
349
  self.do_sample = kwargs.pop("do_sample", False)
350
- self.num_beams = kwargs.pop("num_beams", 1)
350
+ self.num_beams = kwargs.pop("num_beams", None)
351
351
 
352
352
  # Parameters that control the cache
353
353
  self.use_cache = kwargs.pop("use_cache", True)
354
354
  self.cache_implementation = kwargs.pop("cache_implementation", None)
355
355
  self.cache_config = kwargs.pop("cache_config", None)
356
356
 
357
- self.prefill_chunk_size = kwargs.pop("prefill_chunk_size", None)
358
-
359
357
  # Parameters for manipulation of the model output logits
360
- self.temperature = kwargs.pop("temperature", 1.0)
361
- self.top_k = kwargs.pop("top_k", 50)
362
- self.top_p = kwargs.pop("top_p", 1.0)
358
+ self.temperature = kwargs.pop("temperature", None)
359
+ self.top_k = kwargs.pop("top_k", None)
360
+ self.top_p = kwargs.pop("top_p", None)
363
361
  self.min_p = kwargs.pop("min_p", None)
364
362
  self.top_h = kwargs.pop("top_h", None)
365
- self.typical_p = kwargs.pop("typical_p", 1.0)
366
- self.epsilon_cutoff = kwargs.pop("epsilon_cutoff", 0.0)
367
- self.eta_cutoff = kwargs.pop("eta_cutoff", 0.0)
368
- self.repetition_penalty = kwargs.pop("repetition_penalty", 1.0)
369
- self.encoder_repetition_penalty = kwargs.pop("encoder_repetition_penalty", 1.0)
370
- self.length_penalty = kwargs.pop("length_penalty", 1.0)
371
- self.no_repeat_ngram_size = kwargs.pop("no_repeat_ngram_size", 0)
363
+ self.typical_p = kwargs.pop("typical_p", None)
364
+ self.epsilon_cutoff = kwargs.pop("epsilon_cutoff", None)
365
+ self.eta_cutoff = kwargs.pop("eta_cutoff", None)
366
+ self.repetition_penalty = kwargs.pop("repetition_penalty", None)
367
+ self.encoder_repetition_penalty = kwargs.pop("encoder_repetition_penalty", None)
368
+ self.length_penalty = kwargs.pop("length_penalty", None)
369
+ self.no_repeat_ngram_size = kwargs.pop("no_repeat_ngram_size", None)
372
370
  self.bad_words_ids = kwargs.pop("bad_words_ids", None)
373
371
  self.renormalize_logits = kwargs.pop("renormalize_logits", False)
374
372
  self.forced_bos_token_id = kwargs.pop("forced_bos_token_id", None)
@@ -381,20 +379,16 @@ class GenerationConfig(PushToHubMixin):
381
379
  self.token_healing = kwargs.pop("token_healing", False)
382
380
  self.guidance_scale = kwargs.pop("guidance_scale", None)
383
381
 
384
- watermarking_config = kwargs.pop("watermarking_config", None)
385
- if watermarking_config is None:
386
- self.watermarking_config = None
387
- elif isinstance(watermarking_config, BaseWatermarkingConfig):
388
- self.watermarking_config = watermarking_config
389
- else:
390
- self.watermarking_config = WatermarkingConfig.from_dict(watermarking_config)
382
+ self.watermarking_config = kwargs.pop("watermarking_config", None)
383
+ if isinstance(self.watermarking_config, dict):
384
+ self.watermarking_config = WatermarkingConfig.from_dict(self.watermarking_config)
391
385
 
392
386
  # Parameters that define the output variables of `generate`
393
387
  self.num_return_sequences = kwargs.pop("num_return_sequences", 1)
394
388
  self.output_attentions = kwargs.pop("output_attentions", False)
395
389
  self.output_hidden_states = kwargs.pop("output_hidden_states", False)
396
390
  self.output_scores = kwargs.pop("output_scores", False)
397
- self.output_logits = kwargs.pop("output_logits", None)
391
+ self.output_logits = kwargs.pop("output_logits", False)
398
392
  self.return_dict_in_generate = kwargs.pop("return_dict_in_generate", False)
399
393
 
400
394
  # Special tokens that can be used at generation time
@@ -403,57 +397,57 @@ class GenerationConfig(PushToHubMixin):
403
397
  self.eos_token_id = kwargs.pop("eos_token_id", None)
404
398
 
405
399
  # Generation parameters exclusive to encoder-decoder models
406
- self.encoder_no_repeat_ngram_size = kwargs.pop("encoder_no_repeat_ngram_size", 0)
400
+ self.encoder_no_repeat_ngram_size = kwargs.pop("encoder_no_repeat_ngram_size", None)
407
401
  self.decoder_start_token_id = kwargs.pop("decoder_start_token_id", None)
408
402
 
409
403
  # Assistant generation
410
- self.is_assistant = False
411
- self.num_assistant_tokens = kwargs.pop("num_assistant_tokens", 20)
412
- self.num_assistant_tokens_schedule = kwargs.pop("num_assistant_tokens_schedule", "constant")
413
- self.assistant_confidence_threshold = kwargs.pop("assistant_confidence_threshold", 0.4)
404
+ self.is_assistant = kwargs.pop("is_assistant", False)
405
+ self.num_assistant_tokens = kwargs.pop("num_assistant_tokens", None)
406
+ self.num_assistant_tokens_schedule = kwargs.pop("num_assistant_tokens_schedule", None)
407
+ self.assistant_confidence_threshold = kwargs.pop("assistant_confidence_threshold", None)
414
408
  self.prompt_lookup_num_tokens = kwargs.pop("prompt_lookup_num_tokens", None)
415
409
  self.max_matching_ngram_size = kwargs.pop("max_matching_ngram_size", None)
416
410
  self.assistant_early_exit = kwargs.pop("assistant_early_exit", None)
417
- ## assistant generation for different tokenizers, the windows size for assistant/target model
418
- self.assistant_lookbehind = kwargs.pop("assistant_lookbehind", 10)
419
- self.target_lookbehind = kwargs.pop("target_lookbehind", 10)
411
+ self.assistant_lookbehind = kwargs.pop("assistant_lookbehind", None)
412
+ self.target_lookbehind = kwargs.pop("target_lookbehind", None)
420
413
 
421
414
  # Performance
422
415
  self.compile_config = kwargs.pop("compile_config", None)
423
416
  self.disable_compile = kwargs.pop("disable_compile", False)
424
417
 
425
- # Deprecated (moved to the Hub). TODO joao, manuel: remove in v4.62.0
418
+ # Deprecated (moved to the Hub). TODO remove for v5
426
419
  self.low_memory = kwargs.pop("low_memory", None)
427
420
  self.penalty_alpha = kwargs.pop("penalty_alpha", None)
428
421
  self.dola_layers = kwargs.pop("dola_layers", None)
429
- self.diversity_penalty = kwargs.pop("diversity_penalty", 0.0)
430
- self.num_beam_groups = kwargs.pop("num_beam_groups", 1)
422
+ self.diversity_penalty = kwargs.pop("diversity_penalty", None)
423
+ self.num_beam_groups = kwargs.pop("num_beam_groups", None)
431
424
  self.constraints = kwargs.pop("constraints", None)
432
425
  self.force_words_ids = kwargs.pop("force_words_ids", None)
433
426
 
434
- # The remaining attributes do not parametrize `.generate()`, but are informative and/or used by the hub
435
- # interface.
436
- self._from_model_config = kwargs.pop("_from_model_config", False)
437
- self._commit_hash = kwargs.pop("_commit_hash", None)
438
- self.transformers_version = kwargs.pop("transformers_version", __version__)
427
+ self.prefill_chunk_size = kwargs.pop("prefill_chunk_size", None)
439
428
 
440
- # Ensure backward compatibility for models that use `forced_bos_token_id` within their config
441
- if self._from_model_config and kwargs.get("force_bos_token_to_be_generated", False):
442
- self.forced_bos_token_id = self.bos_token_id
443
- logger.warning_once(
444
- f"Please make sure the generation config includes `forced_bos_token_id={self.bos_token_id}`. "
445
- )
429
+ # Common attributes
430
+ self._commit_hash = kwargs.pop("_commit_hash", None)
431
+ self._from_model_config = kwargs.pop("_from_model_config", None)
432
+ self.transformers_version = kwargs.pop("transformers_version", None)
446
433
 
447
434
  # Additional attributes without default values
448
435
  if not self._from_model_config:
449
- # we don't want to copy values from the model config if we're initializing a `GenerationConfig` from a
450
- # model's default configuration file
436
+ # we don't want to copy values from the model config if we're initializing
437
+ # a `GenerationConfig` from a model's default configuration file
451
438
  for key, value in kwargs.items():
452
439
  try:
453
440
  setattr(self, key, value)
454
441
  except AttributeError as err:
455
442
  logger.error(f"Can't set {key} with value {value} for {self}")
456
443
  raise err
444
+ else:
445
+ # Ensure backward compatibility for models that use `forced_bos_token_id` within their config
446
+ if kwargs.get("force_bos_token_to_be_generated", False):
447
+ self.forced_bos_token_id = self.bos_token_id
448
+ logger.warning_once(
449
+ f"Please make sure the generation config includes `forced_bos_token_id={self.bos_token_id}`. "
450
+ )
457
451
 
458
452
  # Validate the values of the attributes
459
453
  self.validate()
@@ -488,8 +482,8 @@ class GenerationConfig(PushToHubMixin):
488
482
  # property and part of the `__repr__`
489
483
  if self.constraints is not None or self.force_words_ids is not None:
490
484
  generation_mode = GenerationMode.CONSTRAINED_BEAM_SEARCH
491
- elif self.num_beams == 1:
492
- if self.do_sample is False:
485
+ elif self.num_beams is None or self.num_beams == 1:
486
+ if not self.do_sample:
493
487
  if (
494
488
  self.top_k is not None
495
489
  and self.top_k > 1
@@ -502,9 +496,9 @@ class GenerationConfig(PushToHubMixin):
502
496
  else:
503
497
  generation_mode = GenerationMode.SAMPLE
504
498
  else:
505
- if self.num_beam_groups > 1:
499
+ if self.num_beam_groups is not None and self.num_beam_groups > 1:
506
500
  generation_mode = GenerationMode.GROUP_BEAM_SEARCH
507
- elif self.do_sample is True:
501
+ elif self.do_sample:
508
502
  generation_mode = GenerationMode.BEAM_SAMPLE
509
503
  else:
510
504
  generation_mode = GenerationMode.BEAM_SEARCH
@@ -537,6 +531,45 @@ class GenerationConfig(PushToHubMixin):
537
531
  )
538
532
  return generation_mode
539
533
 
534
+ @staticmethod
535
+ def _get_default_generation_params() -> dict[str, Any]:
536
+ return {
537
+ "max_length": 20,
538
+ "min_length": 0,
539
+ "do_sample": False,
540
+ "early_stopping": False,
541
+ "num_beams": 1,
542
+ "temperature": 1.0,
543
+ "top_k": 50,
544
+ "top_p": 1.0,
545
+ "typical_p": 1.0,
546
+ "repetition_penalty": 1.0,
547
+ "length_penalty": 1.0,
548
+ "no_repeat_ngram_size": 0,
549
+ "encoder_no_repeat_ngram_size": 0,
550
+ "bad_words_ids": None,
551
+ "num_return_sequences": 1,
552
+ "output_scores": False,
553
+ "return_dict_in_generate": False,
554
+ "forced_bos_token_id": None,
555
+ "forced_eos_token_id": None,
556
+ "remove_invalid_values": False,
557
+ "exponential_decay_length_penalty": None,
558
+ "suppress_tokens": None,
559
+ "begin_suppress_tokens": None,
560
+ "epsilon_cutoff": 0.0,
561
+ "eta_cutoff": 0.0,
562
+ "encoder_repetition_penalty": 1.0,
563
+ "num_assistant_tokens": 20,
564
+ "num_assistant_tokens_schedule": "constant",
565
+ "assistant_confidence_threshold": 0.4,
566
+ "assistant_lookbehind": 10,
567
+ "target_lookbehind": 10,
568
+ # Deprecated arguments (moved to the Hub). TODO joao, manuel: remove in v4.62.0
569
+ "num_beam_groups": 1,
570
+ "diversity_penalty": 0.0,
571
+ }
572
+
540
573
  def validate(self, strict=False):
541
574
  """
542
575
  Validates the values of the attributes of the [`GenerationConfig`] instance. Raises exceptions in the presence
@@ -552,7 +585,7 @@ class GenerationConfig(PushToHubMixin):
552
585
 
553
586
  # 1. Validation of individual attributes
554
587
  # 1.1. Decoding attributes
555
- if self.early_stopping not in {True, False, "never"}:
588
+ if self.early_stopping not in {None, True, False, "never"}:
556
589
  raise ValueError(f"`early_stopping` must be a boolean or 'never', but is {self.early_stopping}.")
557
590
  if self.max_new_tokens is not None and self.max_new_tokens <= 0:
558
591
  raise ValueError(f"`max_new_tokens` must be greater than 0, but is {self.max_new_tokens}.")
@@ -583,9 +616,9 @@ class GenerationConfig(PushToHubMixin):
583
616
 
584
617
  # 2. Validation of attribute combinations
585
618
  # 2.1. detect sampling-only parameterization when not in sampling mode
586
- if self.do_sample is False:
619
+ if not self.do_sample:
587
620
  greedy_wrong_parameter_msg = (
588
- "`do_sample` is set to `False`. However, `{flag_name}` is set to `{flag_value}` -- this flag is only "
621
+ "`do_sample` is set not to set `True`. However, `{flag_name}` is set to `{flag_value}` -- this flag is only "
589
622
  "used in sample-based generation modes. You should set `do_sample=True` or unset `{flag_name}`."
590
623
  )
591
624
  if self.temperature is not None and self.temperature != 1.0:
@@ -614,42 +647,42 @@ class GenerationConfig(PushToHubMixin):
614
647
  )
615
648
 
616
649
  # 2.2. detect beam-only parameterization when not in beam mode
617
- if self.num_beams == 1:
650
+ if self.num_beams is None or self.num_beams == 1:
618
651
  single_beam_wrong_parameter_msg = (
619
- "`num_beams` is set to 1. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used "
652
+ "`num_beams` is set to {num_beams}. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used "
620
653
  "in beam-based generation modes. You should set `num_beams>1` or unset `{flag_name}`."
621
654
  )
622
- if self.early_stopping is not False:
655
+ if self.early_stopping is not None and self.early_stopping is not False:
623
656
  minor_issues["early_stopping"] = single_beam_wrong_parameter_msg.format(
624
- flag_name="early_stopping", flag_value=self.early_stopping
657
+ num_beams=self.num_beams, flag_name="early_stopping", flag_value=self.early_stopping
625
658
  )
626
659
  if self.length_penalty is not None and self.length_penalty != 1.0:
627
660
  minor_issues["length_penalty"] = single_beam_wrong_parameter_msg.format(
628
- flag_name="length_penalty", flag_value=self.length_penalty
661
+ num_beams=self.num_beams, flag_name="length_penalty", flag_value=self.length_penalty
629
662
  )
630
663
 
631
664
  # 2.4. check `num_return_sequences`
632
- if self.num_return_sequences != 1:
633
- if self.num_beams == 1:
634
- if self.do_sample is False:
665
+ if self.num_return_sequences > 1:
666
+ if self.num_beams is None or self.num_beams == 1:
667
+ if not self.do_sample:
635
668
  raise ValueError(
636
- "Greedy methods without beam search do not support `num_return_sequences` different than 1 "
637
- f"(got {self.num_return_sequences})."
669
+ "Greedy methods (do_sample != True) without beam search do not support "
670
+ f"`num_return_sequences` different than 1 (got {self.num_return_sequences})."
638
671
  )
639
- elif self.num_return_sequences > self.num_beams:
672
+ elif self.num_beams is not None and self.num_return_sequences > self.num_beams:
640
673
  raise ValueError(
641
674
  f"`num_return_sequences` ({self.num_return_sequences}) has to be smaller or equal to `num_beams` "
642
675
  f"({self.num_beams})."
643
676
  )
644
677
 
645
678
  # 2.5. check cache-related arguments
646
- if self.use_cache is False:
679
+ if not self.use_cache:
647
680
  # In this case, all cache-related arguments should be unset. However, since `use_cache=False` is often used
648
681
  # passed to `generate` directly to hot-fix cache issues, let's raise a warning instead of an error
649
682
  # (otherwise a user might need to overwrite several parameters).
650
683
  no_cache_warning = (
651
- "You have set `use_cache` to `False`, but {cache_arg} is set to {cache_arg_value}. {cache_arg} will "
652
- "have no effect."
684
+ "You have not set `use_cache` to `True`, but {cache_arg} is set to {cache_arg_value}."
685
+ "{cache_arg} will have no effect."
653
686
  )
654
687
  for arg_name in ("cache_implementation", "cache_config"):
655
688
  if getattr(self, arg_name) is not None:
@@ -658,9 +691,9 @@ class GenerationConfig(PushToHubMixin):
658
691
  )
659
692
 
660
693
  # 2.6. other incorrect combinations
661
- if self.return_dict_in_generate is not True:
694
+ if not self.return_dict_in_generate:
662
695
  for extra_output_flag in self.extra_output_flags:
663
- if getattr(self, extra_output_flag) is True:
696
+ if getattr(self, extra_output_flag):
664
697
  minor_issues[extra_output_flag] = (
665
698
  f"`return_dict_in_generate` is NOT set to `True`, but `{extra_output_flag}` is. When "
666
699
  f"`return_dict_in_generate` is not `True`, `{extra_output_flag}` is ignored."
@@ -676,7 +709,6 @@ class GenerationConfig(PushToHubMixin):
676
709
  "streamer",
677
710
  "negative_prompt_ids",
678
711
  "negative_prompt_attention_mask",
679
- "use_model_defaults",
680
712
  )
681
713
  for arg in generate_arguments:
682
714
  if hasattr(self, arg):
@@ -1101,7 +1133,7 @@ class GenerationConfig(PushToHubMixin):
1101
1133
  writer.write(self.to_json_string(use_diff=use_diff, keys_to_pop=keys_to_pop))
1102
1134
 
1103
1135
  @classmethod
1104
- def from_model_config(cls, model_config: PreTrainedConfig | dict) -> "GenerationConfig":
1136
+ def from_model_config(cls, model_config: Union["PreTrainedConfig", dict]) -> "GenerationConfig":
1105
1137
  """
1106
1138
  Instantiates a [`GenerationConfig`] from a [`PreTrainedConfig`]. This function is useful to convert legacy
1107
1139
  [`PreTrainedConfig`] objects, which may contain generation parameters, into a stand-alone [`GenerationConfig`].
@@ -1118,23 +1150,28 @@ class GenerationConfig(PushToHubMixin):
1118
1150
 
1119
1151
  # Removes all `None` from the model config dict -- this lets the generation config defaults to take hold
1120
1152
  config_dict = {key: value for key, value in config_dict.items() if value is not None}
1121
-
1122
1153
  generation_config = cls.from_dict(config_dict, return_unused_kwargs=False, _from_model_config=True)
1123
1154
 
1124
1155
  # Special case: some models have generation attributes set in the decoder. Use them if still unset in the
1125
1156
  # generation config (which in turn is defined from the outer attributes of model config).
1126
- if not isinstance(model_config, dict):
1127
- decoder_config = model_config.get_text_config(decoder=True)
1128
- if decoder_config is not model_config:
1129
- default_generation_config = GenerationConfig()
1130
- decoder_config_dict = decoder_config.to_dict()
1131
- for attr in generation_config.to_dict():
1132
- is_unset = getattr(generation_config, attr) == getattr(default_generation_config, attr)
1133
- if attr in decoder_config_dict and is_unset:
1134
- setattr(generation_config, attr, decoder_config_dict[attr])
1157
+ if isinstance(model_config, dict):
1158
+ decoder_possible_text_config_names = ("decoder", "generator", "text_config")
1159
+ for text_config_name in decoder_possible_text_config_names:
1160
+ if text_config := model_config.get(text_config_name):
1161
+ model_config = text_config
1162
+ break
1163
+ else:
1164
+ model_config = model_config.get_text_config(decoder=True)
1165
+ model_config = model_config.to_dict()
1166
+
1167
+ default_generation_config = GenerationConfig()
1168
+ for attr in generation_config.to_dict():
1169
+ is_unset = getattr(generation_config, attr) == getattr(default_generation_config, attr)
1170
+ if attr in model_config and is_unset:
1171
+ setattr(generation_config, attr, model_config[attr])
1135
1172
 
1136
1173
  # If any `output_...` flag is set to `True`, we ensure `return_dict_in_generate` is set to `True`.
1137
- if generation_config.return_dict_in_generate is False:
1174
+ if not generation_config.return_dict_in_generate:
1138
1175
  if any(
1139
1176
  getattr(generation_config, extra_output_flag, False)
1140
1177
  for extra_output_flag in generation_config.extra_output_flags
@@ -1145,12 +1182,14 @@ class GenerationConfig(PushToHubMixin):
1145
1182
  generation_config._original_object_hash = hash(generation_config)
1146
1183
  return generation_config
1147
1184
 
1148
- def update(self, **kwargs):
1185
+ def update(self, defaults_only=False, **kwargs):
1149
1186
  """
1150
1187
  Updates attributes of this class instance with attributes from `kwargs` if they match existing attributes,
1151
1188
  returning all the unused kwargs.
1152
1189
 
1153
1190
  Args:
1191
+ defaults_only (`bool`, *optional*, defaults to `False`):
1192
+ Whether to update all keys in config with `kwargs` or only those that are set to `None` (i.e. default value).
1154
1193
  kwargs (`dict[str, Any]`):
1155
1194
  Dictionary of attributes to tentatively update this class.
1156
1195
 
@@ -1160,8 +1199,9 @@ class GenerationConfig(PushToHubMixin):
1160
1199
  to_remove = []
1161
1200
  for key, value in kwargs.items():
1162
1201
  if hasattr(self, key):
1163
- setattr(self, key, value)
1164
- to_remove.append(key)
1202
+ if not defaults_only or getattr(self, key) is None:
1203
+ setattr(self, key, value)
1204
+ to_remove.append(key)
1165
1205
 
1166
1206
  # Confirm that the updated instance is still valid
1167
1207
  self.validate()