transformers 5.0.0rc1__py3-none-any.whl → 5.0.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1584) hide show
  1. transformers/__init__.py +27 -27
  2. transformers/activations.py +1 -1
  3. transformers/audio_utils.py +32 -33
  4. transformers/cache_utils.py +32 -139
  5. transformers/cli/chat.py +3 -3
  6. transformers/cli/serve.py +2 -2
  7. transformers/cli/transformers.py +2 -1
  8. transformers/configuration_utils.py +143 -101
  9. transformers/conversion_mapping.py +73 -6
  10. transformers/convert_slow_tokenizer.py +3 -8
  11. transformers/core_model_loading.py +215 -50
  12. transformers/data/processors/glue.py +0 -1
  13. transformers/data/processors/utils.py +0 -1
  14. transformers/data/processors/xnli.py +0 -1
  15. transformers/dependency_versions_table.py +5 -5
  16. transformers/distributed/configuration_utils.py +1 -2
  17. transformers/dynamic_module_utils.py +23 -23
  18. transformers/feature_extraction_sequence_utils.py +19 -23
  19. transformers/feature_extraction_utils.py +63 -31
  20. transformers/generation/candidate_generator.py +80 -33
  21. transformers/generation/configuration_utils.py +186 -131
  22. transformers/generation/continuous_batching/__init__.py +0 -1
  23. transformers/generation/continuous_batching/cache.py +81 -24
  24. transformers/generation/continuous_batching/cache_manager.py +155 -45
  25. transformers/generation/continuous_batching/continuous_api.py +152 -84
  26. transformers/generation/continuous_batching/requests.py +51 -3
  27. transformers/generation/continuous_batching/scheduler.py +127 -52
  28. transformers/generation/logits_process.py +0 -128
  29. transformers/generation/stopping_criteria.py +1 -1
  30. transformers/generation/streamers.py +0 -1
  31. transformers/generation/utils.py +107 -119
  32. transformers/generation/watermarking.py +8 -6
  33. transformers/hf_argparser.py +9 -13
  34. transformers/hyperparameter_search.py +1 -2
  35. transformers/image_processing_base.py +11 -21
  36. transformers/image_processing_utils.py +11 -12
  37. transformers/image_processing_utils_fast.py +68 -57
  38. transformers/image_transforms.py +29 -29
  39. transformers/image_utils.py +30 -32
  40. transformers/initialization.py +37 -0
  41. transformers/integrations/__init__.py +12 -0
  42. transformers/integrations/accelerate.py +44 -111
  43. transformers/integrations/aqlm.py +3 -5
  44. transformers/integrations/awq.py +3 -8
  45. transformers/integrations/bitnet.py +5 -8
  46. transformers/integrations/bitsandbytes.py +16 -15
  47. transformers/integrations/deepspeed.py +19 -4
  48. transformers/integrations/eetq.py +3 -6
  49. transformers/integrations/fbgemm_fp8.py +2 -3
  50. transformers/integrations/finegrained_fp8.py +14 -23
  51. transformers/integrations/flash_attention.py +2 -2
  52. transformers/integrations/flex_attention.py +1 -1
  53. transformers/integrations/fp_quant.py +4 -6
  54. transformers/integrations/ggml.py +0 -1
  55. transformers/integrations/higgs.py +2 -5
  56. transformers/integrations/hub_kernels.py +23 -5
  57. transformers/integrations/integration_utils.py +37 -3
  58. transformers/integrations/mistral.py +12 -0
  59. transformers/integrations/moe.py +240 -0
  60. transformers/integrations/mxfp4.py +9 -16
  61. transformers/integrations/peft.py +5 -0
  62. transformers/integrations/quanto.py +5 -2
  63. transformers/integrations/quark.py +2 -4
  64. transformers/integrations/spqr.py +3 -5
  65. transformers/integrations/tensor_parallel.py +167 -221
  66. transformers/integrations/torchao.py +4 -6
  67. transformers/integrations/vptq.py +3 -5
  68. transformers/loss/loss_lw_detr.py +356 -0
  69. transformers/loss/loss_utils.py +2 -0
  70. transformers/masking_utils.py +47 -51
  71. transformers/model_debugging_utils.py +4 -5
  72. transformers/modelcard.py +14 -192
  73. transformers/modeling_attn_mask_utils.py +19 -19
  74. transformers/modeling_flash_attention_utils.py +27 -27
  75. transformers/modeling_gguf_pytorch_utils.py +71 -24
  76. transformers/modeling_layers.py +21 -22
  77. transformers/modeling_outputs.py +242 -253
  78. transformers/modeling_rope_utils.py +110 -113
  79. transformers/modeling_utils.py +633 -576
  80. transformers/models/__init__.py +23 -0
  81. transformers/models/afmoe/configuration_afmoe.py +26 -29
  82. transformers/models/afmoe/modeling_afmoe.py +37 -49
  83. transformers/models/afmoe/modular_afmoe.py +21 -31
  84. transformers/models/aimv2/configuration_aimv2.py +2 -5
  85. transformers/models/aimv2/modeling_aimv2.py +24 -21
  86. transformers/models/aimv2/modular_aimv2.py +11 -9
  87. transformers/models/albert/configuration_albert.py +0 -1
  88. transformers/models/albert/modeling_albert.py +70 -69
  89. transformers/models/albert/tokenization_albert.py +1 -4
  90. transformers/models/align/configuration_align.py +0 -1
  91. transformers/models/align/modeling_align.py +73 -68
  92. transformers/models/align/processing_align.py +2 -30
  93. transformers/models/altclip/configuration_altclip.py +0 -1
  94. transformers/models/altclip/modeling_altclip.py +83 -80
  95. transformers/models/altclip/processing_altclip.py +2 -15
  96. transformers/models/apertus/__init__.py +0 -1
  97. transformers/models/apertus/configuration_apertus.py +18 -21
  98. transformers/models/apertus/modeling_apertus.py +35 -36
  99. transformers/models/apertus/modular_apertus.py +32 -31
  100. transformers/models/arcee/configuration_arcee.py +20 -23
  101. transformers/models/arcee/modeling_arcee.py +32 -35
  102. transformers/models/arcee/modular_arcee.py +20 -23
  103. transformers/models/aria/configuration_aria.py +20 -23
  104. transformers/models/aria/image_processing_aria.py +25 -27
  105. transformers/models/aria/modeling_aria.py +71 -70
  106. transformers/models/aria/modular_aria.py +85 -88
  107. transformers/models/aria/processing_aria.py +28 -35
  108. transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +0 -1
  109. transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py +3 -6
  110. transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +6 -8
  111. transformers/models/audioflamingo3/__init__.py +0 -1
  112. transformers/models/audioflamingo3/configuration_audioflamingo3.py +0 -1
  113. transformers/models/audioflamingo3/modeling_audioflamingo3.py +22 -23
  114. transformers/models/audioflamingo3/modular_audioflamingo3.py +12 -17
  115. transformers/models/audioflamingo3/processing_audioflamingo3.py +33 -30
  116. transformers/models/auto/auto_factory.py +5 -6
  117. transformers/models/auto/configuration_auto.py +53 -5
  118. transformers/models/auto/feature_extraction_auto.py +12 -10
  119. transformers/models/auto/image_processing_auto.py +17 -28
  120. transformers/models/auto/modeling_auto.py +38 -188
  121. transformers/models/auto/processing_auto.py +6 -1
  122. transformers/models/auto/tokenization_auto.py +147 -169
  123. transformers/models/auto/video_processing_auto.py +12 -10
  124. transformers/models/autoformer/configuration_autoformer.py +4 -7
  125. transformers/models/autoformer/modeling_autoformer.py +98 -100
  126. transformers/models/aya_vision/configuration_aya_vision.py +0 -1
  127. transformers/models/aya_vision/modeling_aya_vision.py +42 -40
  128. transformers/models/aya_vision/modular_aya_vision.py +26 -29
  129. transformers/models/aya_vision/processing_aya_vision.py +25 -53
  130. transformers/models/bamba/configuration_bamba.py +29 -32
  131. transformers/models/bamba/modeling_bamba.py +78 -83
  132. transformers/models/bamba/modular_bamba.py +68 -71
  133. transformers/models/bark/configuration_bark.py +4 -7
  134. transformers/models/bark/generation_configuration_bark.py +3 -5
  135. transformers/models/bark/modeling_bark.py +49 -55
  136. transformers/models/bark/processing_bark.py +19 -41
  137. transformers/models/bart/configuration_bart.py +0 -2
  138. transformers/models/bart/modeling_bart.py +122 -117
  139. transformers/models/barthez/tokenization_barthez.py +1 -4
  140. transformers/models/bartpho/tokenization_bartpho.py +6 -7
  141. transformers/models/beit/configuration_beit.py +0 -11
  142. transformers/models/beit/image_processing_beit.py +53 -56
  143. transformers/models/beit/image_processing_beit_fast.py +8 -10
  144. transformers/models/beit/modeling_beit.py +51 -53
  145. transformers/models/bert/configuration_bert.py +0 -1
  146. transformers/models/bert/modeling_bert.py +114 -122
  147. transformers/models/bert/tokenization_bert.py +2 -4
  148. transformers/models/bert/tokenization_bert_legacy.py +3 -5
  149. transformers/models/bert_generation/configuration_bert_generation.py +0 -1
  150. transformers/models/bert_generation/modeling_bert_generation.py +49 -49
  151. transformers/models/bert_generation/tokenization_bert_generation.py +2 -3
  152. transformers/models/bert_japanese/tokenization_bert_japanese.py +5 -6
  153. transformers/models/bertweet/tokenization_bertweet.py +1 -3
  154. transformers/models/big_bird/configuration_big_bird.py +0 -1
  155. transformers/models/big_bird/modeling_big_bird.py +110 -109
  156. transformers/models/big_bird/tokenization_big_bird.py +1 -4
  157. transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +0 -1
  158. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +116 -111
  159. transformers/models/biogpt/configuration_biogpt.py +0 -1
  160. transformers/models/biogpt/modeling_biogpt.py +69 -71
  161. transformers/models/biogpt/modular_biogpt.py +59 -61
  162. transformers/models/biogpt/tokenization_biogpt.py +3 -5
  163. transformers/models/bit/configuration_bit.py +0 -1
  164. transformers/models/bit/image_processing_bit.py +21 -24
  165. transformers/models/bit/image_processing_bit_fast.py +0 -1
  166. transformers/models/bit/modeling_bit.py +14 -12
  167. transformers/models/bitnet/configuration_bitnet.py +18 -21
  168. transformers/models/bitnet/modeling_bitnet.py +32 -35
  169. transformers/models/bitnet/modular_bitnet.py +4 -6
  170. transformers/models/blenderbot/configuration_blenderbot.py +0 -1
  171. transformers/models/blenderbot/modeling_blenderbot.py +71 -95
  172. transformers/models/blenderbot/tokenization_blenderbot.py +6 -8
  173. transformers/models/blenderbot_small/configuration_blenderbot_small.py +0 -1
  174. transformers/models/blenderbot_small/modeling_blenderbot_small.py +73 -68
  175. transformers/models/blenderbot_small/tokenization_blenderbot_small.py +1 -3
  176. transformers/models/blip/configuration_blip.py +0 -1
  177. transformers/models/blip/image_processing_blip.py +17 -20
  178. transformers/models/blip/image_processing_blip_fast.py +0 -1
  179. transformers/models/blip/modeling_blip.py +62 -71
  180. transformers/models/blip/modeling_blip_text.py +71 -65
  181. transformers/models/blip/processing_blip.py +5 -36
  182. transformers/models/blip_2/configuration_blip_2.py +0 -1
  183. transformers/models/blip_2/modeling_blip_2.py +72 -71
  184. transformers/models/blip_2/processing_blip_2.py +8 -38
  185. transformers/models/bloom/configuration_bloom.py +0 -1
  186. transformers/models/bloom/modeling_bloom.py +71 -103
  187. transformers/models/blt/configuration_blt.py +71 -74
  188. transformers/models/blt/modeling_blt.py +235 -78
  189. transformers/models/blt/modular_blt.py +225 -62
  190. transformers/models/bridgetower/configuration_bridgetower.py +0 -1
  191. transformers/models/bridgetower/image_processing_bridgetower.py +34 -35
  192. transformers/models/bridgetower/image_processing_bridgetower_fast.py +7 -10
  193. transformers/models/bridgetower/modeling_bridgetower.py +113 -109
  194. transformers/models/bridgetower/processing_bridgetower.py +2 -16
  195. transformers/models/bros/configuration_bros.py +0 -1
  196. transformers/models/bros/modeling_bros.py +86 -80
  197. transformers/models/bros/processing_bros.py +2 -12
  198. transformers/models/byt5/tokenization_byt5.py +4 -6
  199. transformers/models/camembert/configuration_camembert.py +0 -1
  200. transformers/models/camembert/modeling_camembert.py +196 -195
  201. transformers/models/camembert/modular_camembert.py +51 -54
  202. transformers/models/camembert/tokenization_camembert.py +1 -4
  203. transformers/models/canine/configuration_canine.py +0 -1
  204. transformers/models/canine/modeling_canine.py +79 -75
  205. transformers/models/canine/tokenization_canine.py +2 -1
  206. transformers/models/chameleon/configuration_chameleon.py +24 -27
  207. transformers/models/chameleon/image_processing_chameleon.py +21 -24
  208. transformers/models/chameleon/image_processing_chameleon_fast.py +0 -1
  209. transformers/models/chameleon/modeling_chameleon.py +62 -60
  210. transformers/models/chameleon/processing_chameleon.py +16 -41
  211. transformers/models/chinese_clip/configuration_chinese_clip.py +0 -1
  212. transformers/models/chinese_clip/image_processing_chinese_clip.py +21 -24
  213. transformers/models/chinese_clip/image_processing_chinese_clip_fast.py +0 -1
  214. transformers/models/chinese_clip/modeling_chinese_clip.py +71 -69
  215. transformers/models/chinese_clip/processing_chinese_clip.py +2 -15
  216. transformers/models/clap/configuration_clap.py +0 -1
  217. transformers/models/clap/feature_extraction_clap.py +11 -12
  218. transformers/models/clap/modeling_clap.py +113 -104
  219. transformers/models/clap/processing_clap.py +2 -15
  220. transformers/models/clip/configuration_clip.py +0 -1
  221. transformers/models/clip/image_processing_clip.py +21 -24
  222. transformers/models/clip/image_processing_clip_fast.py +0 -1
  223. transformers/models/clip/modeling_clip.py +47 -46
  224. transformers/models/clip/processing_clip.py +2 -14
  225. transformers/models/clip/tokenization_clip.py +2 -5
  226. transformers/models/clipseg/configuration_clipseg.py +0 -1
  227. transformers/models/clipseg/modeling_clipseg.py +90 -87
  228. transformers/models/clipseg/processing_clipseg.py +8 -39
  229. transformers/models/clvp/configuration_clvp.py +1 -3
  230. transformers/models/clvp/feature_extraction_clvp.py +7 -10
  231. transformers/models/clvp/modeling_clvp.py +133 -118
  232. transformers/models/clvp/number_normalizer.py +1 -2
  233. transformers/models/clvp/processing_clvp.py +3 -20
  234. transformers/models/clvp/tokenization_clvp.py +0 -1
  235. transformers/models/code_llama/tokenization_code_llama.py +4 -7
  236. transformers/models/codegen/configuration_codegen.py +0 -1
  237. transformers/models/codegen/modeling_codegen.py +61 -52
  238. transformers/models/codegen/tokenization_codegen.py +5 -6
  239. transformers/models/cohere/configuration_cohere.py +20 -23
  240. transformers/models/cohere/modeling_cohere.py +36 -39
  241. transformers/models/cohere/modular_cohere.py +24 -28
  242. transformers/models/cohere/tokenization_cohere.py +5 -6
  243. transformers/models/cohere2/configuration_cohere2.py +21 -24
  244. transformers/models/cohere2/modeling_cohere2.py +35 -38
  245. transformers/models/cohere2/modular_cohere2.py +39 -41
  246. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +6 -8
  247. transformers/models/cohere2_vision/modeling_cohere2_vision.py +35 -33
  248. transformers/models/cohere2_vision/modular_cohere2_vision.py +21 -23
  249. transformers/models/cohere2_vision/processing_cohere2_vision.py +6 -36
  250. transformers/models/colpali/configuration_colpali.py +0 -1
  251. transformers/models/colpali/modeling_colpali.py +14 -16
  252. transformers/models/colpali/modular_colpali.py +11 -51
  253. transformers/models/colpali/processing_colpali.py +14 -52
  254. transformers/models/colqwen2/modeling_colqwen2.py +20 -22
  255. transformers/models/colqwen2/modular_colqwen2.py +29 -68
  256. transformers/models/colqwen2/processing_colqwen2.py +16 -52
  257. transformers/models/conditional_detr/configuration_conditional_detr.py +1 -2
  258. transformers/models/conditional_detr/image_processing_conditional_detr.py +64 -66
  259. transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +22 -22
  260. transformers/models/conditional_detr/modeling_conditional_detr.py +82 -81
  261. transformers/models/conditional_detr/modular_conditional_detr.py +1 -3
  262. transformers/models/convbert/configuration_convbert.py +0 -1
  263. transformers/models/convbert/modeling_convbert.py +88 -87
  264. transformers/models/convbert/tokenization_convbert.py +0 -1
  265. transformers/models/convnext/configuration_convnext.py +0 -1
  266. transformers/models/convnext/image_processing_convnext.py +20 -23
  267. transformers/models/convnext/image_processing_convnext_fast.py +14 -19
  268. transformers/models/convnext/modeling_convnext.py +5 -8
  269. transformers/models/convnextv2/configuration_convnextv2.py +0 -1
  270. transformers/models/convnextv2/modeling_convnextv2.py +5 -8
  271. transformers/models/cpm/tokenization_cpm.py +6 -7
  272. transformers/models/cpm/tokenization_cpm_fast.py +3 -5
  273. transformers/models/cpmant/configuration_cpmant.py +0 -1
  274. transformers/models/cpmant/modeling_cpmant.py +38 -40
  275. transformers/models/cpmant/tokenization_cpmant.py +1 -3
  276. transformers/models/csm/configuration_csm.py +49 -51
  277. transformers/models/csm/generation_csm.py +31 -35
  278. transformers/models/csm/modeling_csm.py +81 -82
  279. transformers/models/csm/modular_csm.py +58 -58
  280. transformers/models/csm/processing_csm.py +25 -68
  281. transformers/models/ctrl/configuration_ctrl.py +0 -1
  282. transformers/models/ctrl/modeling_ctrl.py +52 -43
  283. transformers/models/ctrl/tokenization_ctrl.py +0 -1
  284. transformers/models/cvt/configuration_cvt.py +0 -1
  285. transformers/models/cvt/modeling_cvt.py +18 -16
  286. transformers/models/cwm/__init__.py +0 -1
  287. transformers/models/cwm/configuration_cwm.py +3 -5
  288. transformers/models/cwm/modeling_cwm.py +33 -35
  289. transformers/models/cwm/modular_cwm.py +10 -12
  290. transformers/models/d_fine/configuration_d_fine.py +3 -5
  291. transformers/models/d_fine/modeling_d_fine.py +127 -121
  292. transformers/models/d_fine/modular_d_fine.py +23 -13
  293. transformers/models/dab_detr/configuration_dab_detr.py +2 -3
  294. transformers/models/dab_detr/modeling_dab_detr.py +69 -71
  295. transformers/models/dac/configuration_dac.py +0 -1
  296. transformers/models/dac/feature_extraction_dac.py +6 -9
  297. transformers/models/dac/modeling_dac.py +21 -23
  298. transformers/models/data2vec/configuration_data2vec_audio.py +0 -1
  299. transformers/models/data2vec/configuration_data2vec_text.py +0 -1
  300. transformers/models/data2vec/configuration_data2vec_vision.py +0 -1
  301. transformers/models/data2vec/modeling_data2vec_audio.py +52 -56
  302. transformers/models/data2vec/modeling_data2vec_text.py +98 -93
  303. transformers/models/data2vec/modeling_data2vec_vision.py +41 -42
  304. transformers/models/data2vec/modular_data2vec_audio.py +6 -1
  305. transformers/models/data2vec/modular_data2vec_text.py +58 -54
  306. transformers/models/dbrx/configuration_dbrx.py +27 -20
  307. transformers/models/dbrx/modeling_dbrx.py +40 -43
  308. transformers/models/dbrx/modular_dbrx.py +31 -33
  309. transformers/models/deberta/configuration_deberta.py +0 -1
  310. transformers/models/deberta/modeling_deberta.py +59 -60
  311. transformers/models/deberta/tokenization_deberta.py +2 -5
  312. transformers/models/deberta_v2/configuration_deberta_v2.py +0 -1
  313. transformers/models/deberta_v2/modeling_deberta_v2.py +65 -65
  314. transformers/models/deberta_v2/tokenization_deberta_v2.py +1 -4
  315. transformers/models/decision_transformer/configuration_decision_transformer.py +0 -1
  316. transformers/models/decision_transformer/modeling_decision_transformer.py +56 -55
  317. transformers/models/deepseek_v2/configuration_deepseek_v2.py +34 -37
  318. transformers/models/deepseek_v2/modeling_deepseek_v2.py +39 -37
  319. transformers/models/deepseek_v2/modular_deepseek_v2.py +44 -44
  320. transformers/models/deepseek_v3/configuration_deepseek_v3.py +35 -38
  321. transformers/models/deepseek_v3/modeling_deepseek_v3.py +40 -38
  322. transformers/models/deepseek_v3/modular_deepseek_v3.py +10 -7
  323. transformers/models/deepseek_vl/configuration_deepseek_vl.py +2 -3
  324. transformers/models/deepseek_vl/image_processing_deepseek_vl.py +25 -26
  325. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +7 -7
  326. transformers/models/deepseek_vl/modeling_deepseek_vl.py +40 -36
  327. transformers/models/deepseek_vl/modular_deepseek_vl.py +14 -43
  328. transformers/models/deepseek_vl/processing_deepseek_vl.py +10 -41
  329. transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +3 -5
  330. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +35 -35
  331. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +16 -20
  332. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +42 -38
  333. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +80 -99
  334. transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py +12 -44
  335. transformers/models/deformable_detr/configuration_deformable_detr.py +2 -3
  336. transformers/models/deformable_detr/image_processing_deformable_detr.py +59 -61
  337. transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +17 -17
  338. transformers/models/deformable_detr/modeling_deformable_detr.py +67 -68
  339. transformers/models/deformable_detr/modular_deformable_detr.py +1 -3
  340. transformers/models/deit/configuration_deit.py +0 -1
  341. transformers/models/deit/image_processing_deit.py +18 -21
  342. transformers/models/deit/image_processing_deit_fast.py +0 -1
  343. transformers/models/deit/modeling_deit.py +16 -18
  344. transformers/models/depth_anything/configuration_depth_anything.py +2 -4
  345. transformers/models/depth_anything/modeling_depth_anything.py +5 -8
  346. transformers/models/depth_pro/configuration_depth_pro.py +0 -1
  347. transformers/models/depth_pro/image_processing_depth_pro.py +22 -23
  348. transformers/models/depth_pro/image_processing_depth_pro_fast.py +6 -8
  349. transformers/models/depth_pro/modeling_depth_pro.py +21 -23
  350. transformers/models/detr/configuration_detr.py +1 -2
  351. transformers/models/detr/image_processing_detr.py +64 -66
  352. transformers/models/detr/image_processing_detr_fast.py +22 -23
  353. transformers/models/detr/modeling_detr.py +78 -73
  354. transformers/models/dia/configuration_dia.py +5 -8
  355. transformers/models/dia/feature_extraction_dia.py +6 -9
  356. transformers/models/dia/generation_dia.py +42 -45
  357. transformers/models/dia/modeling_dia.py +73 -65
  358. transformers/models/dia/modular_dia.py +63 -54
  359. transformers/models/dia/processing_dia.py +39 -29
  360. transformers/models/dia/tokenization_dia.py +3 -6
  361. transformers/models/diffllama/configuration_diffllama.py +20 -23
  362. transformers/models/diffllama/modeling_diffllama.py +44 -47
  363. transformers/models/diffllama/modular_diffllama.py +17 -19
  364. transformers/models/dinat/configuration_dinat.py +0 -1
  365. transformers/models/dinat/modeling_dinat.py +40 -42
  366. transformers/models/dinov2/configuration_dinov2.py +0 -1
  367. transformers/models/dinov2/modeling_dinov2.py +11 -13
  368. transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +1 -1
  369. transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +12 -13
  370. transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +5 -7
  371. transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +4 -7
  372. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +3 -6
  373. transformers/models/dinov3_vit/configuration_dinov3_vit.py +5 -8
  374. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +5 -7
  375. transformers/models/dinov3_vit/modeling_dinov3_vit.py +17 -16
  376. transformers/models/dinov3_vit/modular_dinov3_vit.py +14 -13
  377. transformers/models/distilbert/configuration_distilbert.py +0 -1
  378. transformers/models/distilbert/modeling_distilbert.py +55 -55
  379. transformers/models/distilbert/tokenization_distilbert.py +0 -1
  380. transformers/models/doge/__init__.py +0 -1
  381. transformers/models/doge/configuration_doge.py +25 -28
  382. transformers/models/doge/modeling_doge.py +43 -46
  383. transformers/models/doge/modular_doge.py +57 -58
  384. transformers/models/donut/configuration_donut_swin.py +0 -1
  385. transformers/models/donut/image_processing_donut.py +26 -29
  386. transformers/models/donut/image_processing_donut_fast.py +5 -11
  387. transformers/models/donut/modeling_donut_swin.py +60 -58
  388. transformers/models/donut/processing_donut.py +5 -26
  389. transformers/models/dots1/configuration_dots1.py +27 -29
  390. transformers/models/dots1/modeling_dots1.py +45 -39
  391. transformers/models/dots1/modular_dots1.py +0 -1
  392. transformers/models/dpr/configuration_dpr.py +0 -1
  393. transformers/models/dpr/modeling_dpr.py +37 -39
  394. transformers/models/dpr/tokenization_dpr.py +7 -9
  395. transformers/models/dpr/tokenization_dpr_fast.py +7 -9
  396. transformers/models/dpt/configuration_dpt.py +1 -2
  397. transformers/models/dpt/image_processing_dpt.py +65 -66
  398. transformers/models/dpt/image_processing_dpt_fast.py +14 -16
  399. transformers/models/dpt/modeling_dpt.py +19 -21
  400. transformers/models/dpt/modular_dpt.py +11 -13
  401. transformers/models/edgetam/configuration_edgetam.py +1 -2
  402. transformers/models/edgetam/modeling_edgetam.py +44 -43
  403. transformers/models/edgetam/modular_edgetam.py +17 -20
  404. transformers/models/edgetam_video/__init__.py +0 -1
  405. transformers/models/edgetam_video/configuration_edgetam_video.py +0 -1
  406. transformers/models/edgetam_video/modeling_edgetam_video.py +131 -120
  407. transformers/models/edgetam_video/modular_edgetam_video.py +29 -37
  408. transformers/models/efficientloftr/configuration_efficientloftr.py +4 -5
  409. transformers/models/efficientloftr/image_processing_efficientloftr.py +14 -16
  410. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +5 -6
  411. transformers/models/efficientloftr/modeling_efficientloftr.py +41 -30
  412. transformers/models/efficientloftr/modular_efficientloftr.py +1 -3
  413. transformers/models/efficientnet/configuration_efficientnet.py +0 -1
  414. transformers/models/efficientnet/image_processing_efficientnet.py +28 -32
  415. transformers/models/efficientnet/image_processing_efficientnet_fast.py +15 -17
  416. transformers/models/efficientnet/modeling_efficientnet.py +17 -15
  417. transformers/models/electra/configuration_electra.py +0 -1
  418. transformers/models/electra/modeling_electra.py +108 -103
  419. transformers/models/emu3/configuration_emu3.py +5 -7
  420. transformers/models/emu3/image_processing_emu3.py +44 -39
  421. transformers/models/emu3/modeling_emu3.py +67 -64
  422. transformers/models/emu3/modular_emu3.py +39 -35
  423. transformers/models/emu3/processing_emu3.py +18 -43
  424. transformers/models/encodec/configuration_encodec.py +2 -4
  425. transformers/models/encodec/feature_extraction_encodec.py +10 -13
  426. transformers/models/encodec/modeling_encodec.py +39 -29
  427. transformers/models/encoder_decoder/configuration_encoder_decoder.py +0 -1
  428. transformers/models/encoder_decoder/modeling_encoder_decoder.py +17 -19
  429. transformers/models/eomt/configuration_eomt.py +0 -1
  430. transformers/models/eomt/image_processing_eomt.py +53 -55
  431. transformers/models/eomt/image_processing_eomt_fast.py +59 -28
  432. transformers/models/eomt/modeling_eomt.py +23 -18
  433. transformers/models/eomt/modular_eomt.py +18 -13
  434. transformers/models/ernie/configuration_ernie.py +0 -1
  435. transformers/models/ernie/modeling_ernie.py +127 -132
  436. transformers/models/ernie/modular_ernie.py +97 -103
  437. transformers/models/ernie4_5/configuration_ernie4_5.py +18 -20
  438. transformers/models/ernie4_5/modeling_ernie4_5.py +32 -34
  439. transformers/models/ernie4_5/modular_ernie4_5.py +1 -3
  440. transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +27 -29
  441. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +52 -51
  442. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +16 -44
  443. transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
  444. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +329 -0
  445. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +455 -0
  446. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +231 -0
  447. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1895 -0
  448. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1901 -0
  449. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +249 -0
  450. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +593 -0
  451. transformers/models/esm/configuration_esm.py +2 -4
  452. transformers/models/esm/modeling_esm.py +38 -34
  453. transformers/models/esm/modeling_esmfold.py +48 -45
  454. transformers/models/esm/openfold_utils/chunk_utils.py +6 -6
  455. transformers/models/esm/openfold_utils/loss.py +1 -2
  456. transformers/models/esm/openfold_utils/protein.py +13 -13
  457. transformers/models/esm/openfold_utils/tensor_utils.py +6 -6
  458. transformers/models/esm/tokenization_esm.py +2 -4
  459. transformers/models/evolla/configuration_evolla.py +29 -32
  460. transformers/models/evolla/modeling_evolla.py +67 -62
  461. transformers/models/evolla/modular_evolla.py +53 -47
  462. transformers/models/evolla/processing_evolla.py +23 -35
  463. transformers/models/exaone4/configuration_exaone4.py +19 -22
  464. transformers/models/exaone4/modeling_exaone4.py +33 -36
  465. transformers/models/exaone4/modular_exaone4.py +40 -42
  466. transformers/models/falcon/configuration_falcon.py +22 -25
  467. transformers/models/falcon/modeling_falcon.py +75 -78
  468. transformers/models/falcon_h1/configuration_falcon_h1.py +40 -43
  469. transformers/models/falcon_h1/modeling_falcon_h1.py +80 -78
  470. transformers/models/falcon_h1/modular_falcon_h1.py +54 -50
  471. transformers/models/falcon_mamba/configuration_falcon_mamba.py +0 -1
  472. transformers/models/falcon_mamba/modeling_falcon_mamba.py +50 -47
  473. transformers/models/falcon_mamba/modular_falcon_mamba.py +16 -14
  474. transformers/models/fast_vlm/configuration_fast_vlm.py +1 -0
  475. transformers/models/fast_vlm/modeling_fast_vlm.py +43 -39
  476. transformers/models/fast_vlm/modular_fast_vlm.py +2 -3
  477. transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +2 -5
  478. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +68 -57
  479. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +2 -3
  480. transformers/models/flaubert/configuration_flaubert.py +0 -1
  481. transformers/models/flaubert/modeling_flaubert.py +138 -143
  482. transformers/models/flaubert/tokenization_flaubert.py +3 -5
  483. transformers/models/flava/configuration_flava.py +5 -6
  484. transformers/models/flava/image_processing_flava.py +66 -67
  485. transformers/models/flava/image_processing_flava_fast.py +42 -45
  486. transformers/models/flava/modeling_flava.py +111 -107
  487. transformers/models/flava/processing_flava.py +2 -12
  488. transformers/models/flex_olmo/__init__.py +0 -1
  489. transformers/models/flex_olmo/configuration_flex_olmo.py +23 -25
  490. transformers/models/flex_olmo/modeling_flex_olmo.py +44 -43
  491. transformers/models/flex_olmo/modular_flex_olmo.py +35 -37
  492. transformers/models/florence2/configuration_florence2.py +0 -1
  493. transformers/models/florence2/modeling_florence2.py +59 -43
  494. transformers/models/florence2/modular_florence2.py +65 -81
  495. transformers/models/florence2/processing_florence2.py +18 -47
  496. transformers/models/fnet/configuration_fnet.py +0 -1
  497. transformers/models/fnet/modeling_fnet.py +76 -80
  498. transformers/models/fnet/tokenization_fnet.py +0 -1
  499. transformers/models/focalnet/configuration_focalnet.py +0 -1
  500. transformers/models/focalnet/modeling_focalnet.py +39 -41
  501. transformers/models/fsmt/configuration_fsmt.py +0 -1
  502. transformers/models/fsmt/modeling_fsmt.py +47 -48
  503. transformers/models/fsmt/tokenization_fsmt.py +3 -5
  504. transformers/models/funnel/configuration_funnel.py +0 -1
  505. transformers/models/funnel/modeling_funnel.py +91 -93
  506. transformers/models/funnel/tokenization_funnel.py +2 -5
  507. transformers/models/fuyu/configuration_fuyu.py +23 -26
  508. transformers/models/fuyu/image_processing_fuyu.py +29 -31
  509. transformers/models/fuyu/image_processing_fuyu_fast.py +12 -13
  510. transformers/models/fuyu/modeling_fuyu.py +29 -30
  511. transformers/models/fuyu/processing_fuyu.py +23 -34
  512. transformers/models/gemma/configuration_gemma.py +20 -23
  513. transformers/models/gemma/modeling_gemma.py +42 -46
  514. transformers/models/gemma/modular_gemma.py +37 -40
  515. transformers/models/gemma/tokenization_gemma.py +3 -6
  516. transformers/models/gemma2/configuration_gemma2.py +25 -28
  517. transformers/models/gemma2/modeling_gemma2.py +35 -38
  518. transformers/models/gemma2/modular_gemma2.py +56 -58
  519. transformers/models/gemma3/configuration_gemma3.py +28 -29
  520. transformers/models/gemma3/image_processing_gemma3.py +29 -31
  521. transformers/models/gemma3/image_processing_gemma3_fast.py +9 -11
  522. transformers/models/gemma3/modeling_gemma3.py +112 -94
  523. transformers/models/gemma3/modular_gemma3.py +110 -91
  524. transformers/models/gemma3/processing_gemma3.py +5 -5
  525. transformers/models/gemma3n/configuration_gemma3n.py +12 -10
  526. transformers/models/gemma3n/feature_extraction_gemma3n.py +9 -11
  527. transformers/models/gemma3n/modeling_gemma3n.py +127 -98
  528. transformers/models/gemma3n/modular_gemma3n.py +117 -84
  529. transformers/models/gemma3n/processing_gemma3n.py +12 -26
  530. transformers/models/git/configuration_git.py +0 -1
  531. transformers/models/git/modeling_git.py +250 -197
  532. transformers/models/git/processing_git.py +2 -14
  533. transformers/models/glm/configuration_glm.py +19 -21
  534. transformers/models/glm/modeling_glm.py +33 -36
  535. transformers/models/glm/modular_glm.py +4 -7
  536. transformers/models/glm4/configuration_glm4.py +19 -21
  537. transformers/models/glm4/modeling_glm4.py +36 -38
  538. transformers/models/glm4/modular_glm4.py +8 -10
  539. transformers/models/glm46v/configuration_glm46v.py +0 -1
  540. transformers/models/glm46v/image_processing_glm46v.py +35 -40
  541. transformers/models/glm46v/image_processing_glm46v_fast.py +7 -7
  542. transformers/models/glm46v/modeling_glm46v.py +54 -52
  543. transformers/models/glm46v/modular_glm46v.py +4 -3
  544. transformers/models/glm46v/processing_glm46v.py +7 -41
  545. transformers/models/glm46v/video_processing_glm46v.py +9 -11
  546. transformers/models/glm4_moe/configuration_glm4_moe.py +25 -28
  547. transformers/models/glm4_moe/modeling_glm4_moe.py +41 -40
  548. transformers/models/glm4_moe/modular_glm4_moe.py +27 -30
  549. transformers/models/glm4_moe_lite/__init__.py +28 -0
  550. transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +235 -0
  551. transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +740 -0
  552. transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +304 -0
  553. transformers/models/glm4v/configuration_glm4v.py +14 -17
  554. transformers/models/glm4v/image_processing_glm4v.py +34 -40
  555. transformers/models/glm4v/image_processing_glm4v_fast.py +6 -7
  556. transformers/models/glm4v/modeling_glm4v.py +148 -156
  557. transformers/models/glm4v/modular_glm4v.py +142 -185
  558. transformers/models/glm4v/processing_glm4v.py +7 -41
  559. transformers/models/glm4v/video_processing_glm4v.py +9 -11
  560. transformers/models/glm4v_moe/configuration_glm4v_moe.py +119 -122
  561. transformers/models/glm4v_moe/modeling_glm4v_moe.py +275 -319
  562. transformers/models/glm4v_moe/modular_glm4v_moe.py +66 -163
  563. transformers/models/glm_image/__init__.py +31 -0
  564. transformers/models/glm_image/configuration_glm_image.py +352 -0
  565. transformers/models/glm_image/image_processing_glm_image.py +503 -0
  566. transformers/models/glm_image/image_processing_glm_image_fast.py +296 -0
  567. transformers/models/glm_image/modeling_glm_image.py +1590 -0
  568. transformers/models/glm_image/modular_glm_image.py +1480 -0
  569. transformers/models/glm_image/processing_glm_image.py +217 -0
  570. transformers/models/glmasr/__init__.py +29 -0
  571. transformers/models/glmasr/configuration_glmasr.py +196 -0
  572. transformers/models/glmasr/modeling_glmasr.py +511 -0
  573. transformers/models/glmasr/modular_glmasr.py +431 -0
  574. transformers/models/glmasr/processing_glmasr.py +331 -0
  575. transformers/models/glpn/configuration_glpn.py +0 -1
  576. transformers/models/glpn/image_processing_glpn.py +11 -12
  577. transformers/models/glpn/image_processing_glpn_fast.py +8 -10
  578. transformers/models/glpn/modeling_glpn.py +10 -12
  579. transformers/models/got_ocr2/configuration_got_ocr2.py +5 -8
  580. transformers/models/got_ocr2/image_processing_got_ocr2.py +22 -24
  581. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +6 -8
  582. transformers/models/got_ocr2/modeling_got_ocr2.py +48 -45
  583. transformers/models/got_ocr2/modular_got_ocr2.py +31 -34
  584. transformers/models/got_ocr2/processing_got_ocr2.py +42 -63
  585. transformers/models/gpt2/configuration_gpt2.py +0 -1
  586. transformers/models/gpt2/modeling_gpt2.py +114 -113
  587. transformers/models/gpt2/tokenization_gpt2.py +6 -9
  588. transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +0 -1
  589. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +76 -88
  590. transformers/models/gpt_neo/configuration_gpt_neo.py +0 -1
  591. transformers/models/gpt_neo/modeling_gpt_neo.py +77 -66
  592. transformers/models/gpt_neox/configuration_gpt_neox.py +19 -22
  593. transformers/models/gpt_neox/modeling_gpt_neox.py +71 -73
  594. transformers/models/gpt_neox/modular_gpt_neox.py +64 -66
  595. transformers/models/gpt_neox/tokenization_gpt_neox.py +2 -5
  596. transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +15 -18
  597. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +42 -45
  598. transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +1 -3
  599. transformers/models/gpt_oss/configuration_gpt_oss.py +38 -24
  600. transformers/models/gpt_oss/modeling_gpt_oss.py +40 -44
  601. transformers/models/gpt_oss/modular_gpt_oss.py +22 -26
  602. transformers/models/gpt_sw3/tokenization_gpt_sw3.py +4 -4
  603. transformers/models/gptj/configuration_gptj.py +0 -1
  604. transformers/models/gptj/modeling_gptj.py +96 -86
  605. transformers/models/granite/configuration_granite.py +23 -26
  606. transformers/models/granite/modeling_granite.py +40 -42
  607. transformers/models/granite/modular_granite.py +29 -31
  608. transformers/models/granite_speech/configuration_granite_speech.py +0 -1
  609. transformers/models/granite_speech/feature_extraction_granite_speech.py +1 -3
  610. transformers/models/granite_speech/modeling_granite_speech.py +36 -24
  611. transformers/models/granite_speech/processing_granite_speech.py +11 -4
  612. transformers/models/granitemoe/configuration_granitemoe.py +26 -29
  613. transformers/models/granitemoe/modeling_granitemoe.py +37 -40
  614. transformers/models/granitemoe/modular_granitemoe.py +22 -25
  615. transformers/models/granitemoehybrid/__init__.py +0 -1
  616. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +41 -40
  617. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +92 -86
  618. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +29 -21
  619. transformers/models/granitemoeshared/configuration_granitemoeshared.py +27 -30
  620. transformers/models/granitemoeshared/modeling_granitemoeshared.py +50 -55
  621. transformers/models/granitemoeshared/modular_granitemoeshared.py +19 -21
  622. transformers/models/grounding_dino/configuration_grounding_dino.py +2 -4
  623. transformers/models/grounding_dino/image_processing_grounding_dino.py +60 -62
  624. transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +17 -18
  625. transformers/models/grounding_dino/modeling_grounding_dino.py +95 -97
  626. transformers/models/grounding_dino/modular_grounding_dino.py +2 -3
  627. transformers/models/grounding_dino/processing_grounding_dino.py +10 -38
  628. transformers/models/groupvit/configuration_groupvit.py +0 -1
  629. transformers/models/groupvit/modeling_groupvit.py +75 -71
  630. transformers/models/helium/configuration_helium.py +20 -22
  631. transformers/models/helium/modeling_helium.py +34 -37
  632. transformers/models/helium/modular_helium.py +3 -7
  633. transformers/models/herbert/tokenization_herbert.py +4 -6
  634. transformers/models/hgnet_v2/configuration_hgnet_v2.py +0 -1
  635. transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -9
  636. transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -9
  637. transformers/models/hiera/configuration_hiera.py +0 -1
  638. transformers/models/hiera/modeling_hiera.py +60 -62
  639. transformers/models/hubert/configuration_hubert.py +0 -1
  640. transformers/models/hubert/modeling_hubert.py +39 -37
  641. transformers/models/hubert/modular_hubert.py +12 -11
  642. transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +21 -24
  643. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +31 -34
  644. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +4 -6
  645. transformers/models/hunyuan_v1_moe/__init__.py +1 -1
  646. transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +25 -28
  647. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +44 -39
  648. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +9 -9
  649. transformers/models/ibert/configuration_ibert.py +0 -1
  650. transformers/models/ibert/modeling_ibert.py +76 -62
  651. transformers/models/ibert/quant_modules.py +0 -1
  652. transformers/models/idefics/configuration_idefics.py +0 -1
  653. transformers/models/idefics/image_processing_idefics.py +13 -15
  654. transformers/models/idefics/modeling_idefics.py +70 -61
  655. transformers/models/idefics/perceiver.py +1 -3
  656. transformers/models/idefics/processing_idefics.py +32 -48
  657. transformers/models/idefics/vision.py +22 -24
  658. transformers/models/idefics2/configuration_idefics2.py +0 -1
  659. transformers/models/idefics2/image_processing_idefics2.py +31 -32
  660. transformers/models/idefics2/image_processing_idefics2_fast.py +7 -8
  661. transformers/models/idefics2/modeling_idefics2.py +63 -59
  662. transformers/models/idefics2/processing_idefics2.py +10 -68
  663. transformers/models/idefics3/configuration_idefics3.py +0 -1
  664. transformers/models/idefics3/image_processing_idefics3.py +42 -43
  665. transformers/models/idefics3/image_processing_idefics3_fast.py +11 -12
  666. transformers/models/idefics3/modeling_idefics3.py +57 -55
  667. transformers/models/idefics3/processing_idefics3.py +15 -69
  668. transformers/models/ijepa/configuration_ijepa.py +0 -1
  669. transformers/models/ijepa/modeling_ijepa.py +10 -11
  670. transformers/models/ijepa/modular_ijepa.py +5 -7
  671. transformers/models/imagegpt/configuration_imagegpt.py +0 -1
  672. transformers/models/imagegpt/image_processing_imagegpt.py +17 -18
  673. transformers/models/imagegpt/image_processing_imagegpt_fast.py +9 -14
  674. transformers/models/imagegpt/modeling_imagegpt.py +66 -60
  675. transformers/models/informer/configuration_informer.py +6 -9
  676. transformers/models/informer/modeling_informer.py +84 -86
  677. transformers/models/informer/modular_informer.py +13 -16
  678. transformers/models/instructblip/configuration_instructblip.py +0 -1
  679. transformers/models/instructblip/modeling_instructblip.py +45 -44
  680. transformers/models/instructblip/processing_instructblip.py +10 -36
  681. transformers/models/instructblipvideo/configuration_instructblipvideo.py +0 -1
  682. transformers/models/instructblipvideo/modeling_instructblipvideo.py +107 -105
  683. transformers/models/instructblipvideo/modular_instructblipvideo.py +34 -36
  684. transformers/models/instructblipvideo/processing_instructblipvideo.py +14 -33
  685. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +4 -6
  686. transformers/models/internvl/configuration_internvl.py +0 -1
  687. transformers/models/internvl/modeling_internvl.py +52 -51
  688. transformers/models/internvl/modular_internvl.py +24 -30
  689. transformers/models/internvl/processing_internvl.py +12 -45
  690. transformers/models/internvl/video_processing_internvl.py +8 -10
  691. transformers/models/jais2/__init__.py +27 -0
  692. transformers/models/jais2/configuration_jais2.py +150 -0
  693. transformers/models/jais2/modeling_jais2.py +484 -0
  694. transformers/models/jais2/modular_jais2.py +194 -0
  695. transformers/models/jamba/configuration_jamba.py +0 -1
  696. transformers/models/jamba/modeling_jamba.py +67 -65
  697. transformers/models/jamba/modular_jamba.py +54 -55
  698. transformers/models/janus/configuration_janus.py +0 -1
  699. transformers/models/janus/image_processing_janus.py +35 -37
  700. transformers/models/janus/image_processing_janus_fast.py +12 -14
  701. transformers/models/janus/modeling_janus.py +56 -50
  702. transformers/models/janus/modular_janus.py +76 -70
  703. transformers/models/janus/processing_janus.py +17 -43
  704. transformers/models/jetmoe/configuration_jetmoe.py +20 -23
  705. transformers/models/jetmoe/modeling_jetmoe.py +41 -44
  706. transformers/models/jetmoe/modular_jetmoe.py +31 -33
  707. transformers/models/kosmos2/configuration_kosmos2.py +0 -1
  708. transformers/models/kosmos2/modeling_kosmos2.py +159 -148
  709. transformers/models/kosmos2/processing_kosmos2.py +40 -55
  710. transformers/models/kosmos2_5/__init__.py +0 -1
  711. transformers/models/kosmos2_5/configuration_kosmos2_5.py +0 -1
  712. transformers/models/kosmos2_5/image_processing_kosmos2_5.py +10 -12
  713. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +4 -13
  714. transformers/models/kosmos2_5/modeling_kosmos2_5.py +118 -110
  715. transformers/models/kosmos2_5/processing_kosmos2_5.py +8 -29
  716. transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +23 -25
  717. transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py +12 -14
  718. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +67 -68
  719. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +28 -22
  720. transformers/models/kyutai_speech_to_text/processing_kyutai_speech_to_text.py +2 -8
  721. transformers/models/lasr/configuration_lasr.py +5 -3
  722. transformers/models/lasr/feature_extraction_lasr.py +10 -12
  723. transformers/models/lasr/modeling_lasr.py +21 -23
  724. transformers/models/lasr/modular_lasr.py +16 -11
  725. transformers/models/lasr/processing_lasr.py +12 -8
  726. transformers/models/lasr/tokenization_lasr.py +2 -4
  727. transformers/models/layoutlm/configuration_layoutlm.py +0 -1
  728. transformers/models/layoutlm/modeling_layoutlm.py +72 -72
  729. transformers/models/layoutlmv2/configuration_layoutlmv2.py +0 -1
  730. transformers/models/layoutlmv2/image_processing_layoutlmv2.py +18 -21
  731. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +5 -7
  732. transformers/models/layoutlmv2/modeling_layoutlmv2.py +60 -50
  733. transformers/models/layoutlmv2/processing_layoutlmv2.py +14 -44
  734. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +64 -74
  735. transformers/models/layoutlmv3/configuration_layoutlmv3.py +0 -1
  736. transformers/models/layoutlmv3/image_processing_layoutlmv3.py +24 -26
  737. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +7 -9
  738. transformers/models/layoutlmv3/modeling_layoutlmv3.py +78 -56
  739. transformers/models/layoutlmv3/processing_layoutlmv3.py +14 -46
  740. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +64 -75
  741. transformers/models/layoutxlm/configuration_layoutxlm.py +0 -1
  742. transformers/models/layoutxlm/modular_layoutxlm.py +0 -1
  743. transformers/models/layoutxlm/processing_layoutxlm.py +14 -44
  744. transformers/models/layoutxlm/tokenization_layoutxlm.py +65 -76
  745. transformers/models/led/configuration_led.py +1 -4
  746. transformers/models/led/modeling_led.py +119 -267
  747. transformers/models/levit/configuration_levit.py +0 -1
  748. transformers/models/levit/image_processing_levit.py +19 -21
  749. transformers/models/levit/image_processing_levit_fast.py +0 -1
  750. transformers/models/levit/modeling_levit.py +35 -19
  751. transformers/models/lfm2/configuration_lfm2.py +22 -23
  752. transformers/models/lfm2/modeling_lfm2.py +43 -45
  753. transformers/models/lfm2/modular_lfm2.py +29 -29
  754. transformers/models/lfm2_moe/__init__.py +0 -1
  755. transformers/models/lfm2_moe/configuration_lfm2_moe.py +1 -2
  756. transformers/models/lfm2_moe/modeling_lfm2_moe.py +58 -49
  757. transformers/models/lfm2_moe/modular_lfm2_moe.py +13 -37
  758. transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -1
  759. transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +34 -5
  760. transformers/models/lfm2_vl/modeling_lfm2_vl.py +42 -38
  761. transformers/models/lfm2_vl/modular_lfm2_vl.py +28 -29
  762. transformers/models/lfm2_vl/processing_lfm2_vl.py +96 -76
  763. transformers/models/lightglue/image_processing_lightglue.py +16 -15
  764. transformers/models/lightglue/image_processing_lightglue_fast.py +5 -6
  765. transformers/models/lightglue/modeling_lightglue.py +28 -30
  766. transformers/models/lightglue/modular_lightglue.py +28 -28
  767. transformers/models/lighton_ocr/__init__.py +28 -0
  768. transformers/models/lighton_ocr/configuration_lighton_ocr.py +128 -0
  769. transformers/models/lighton_ocr/modeling_lighton_ocr.py +460 -0
  770. transformers/models/lighton_ocr/modular_lighton_ocr.py +403 -0
  771. transformers/models/lighton_ocr/processing_lighton_ocr.py +229 -0
  772. transformers/models/lilt/configuration_lilt.py +0 -1
  773. transformers/models/lilt/modeling_lilt.py +72 -70
  774. transformers/models/llama/configuration_llama.py +21 -24
  775. transformers/models/llama/modeling_llama.py +32 -35
  776. transformers/models/llama/tokenization_llama.py +2 -4
  777. transformers/models/llama4/configuration_llama4.py +20 -22
  778. transformers/models/llama4/image_processing_llama4_fast.py +9 -11
  779. transformers/models/llama4/modeling_llama4.py +78 -75
  780. transformers/models/llama4/processing_llama4.py +33 -57
  781. transformers/models/llava/configuration_llava.py +0 -1
  782. transformers/models/llava/image_processing_llava.py +25 -28
  783. transformers/models/llava/image_processing_llava_fast.py +6 -8
  784. transformers/models/llava/modeling_llava.py +47 -44
  785. transformers/models/llava/processing_llava.py +18 -51
  786. transformers/models/llava_next/configuration_llava_next.py +0 -1
  787. transformers/models/llava_next/image_processing_llava_next.py +43 -45
  788. transformers/models/llava_next/image_processing_llava_next_fast.py +5 -7
  789. transformers/models/llava_next/modeling_llava_next.py +49 -47
  790. transformers/models/llava_next/processing_llava_next.py +18 -47
  791. transformers/models/llava_next_video/configuration_llava_next_video.py +0 -1
  792. transformers/models/llava_next_video/modeling_llava_next_video.py +60 -58
  793. transformers/models/llava_next_video/modular_llava_next_video.py +51 -49
  794. transformers/models/llava_next_video/processing_llava_next_video.py +21 -63
  795. transformers/models/llava_next_video/video_processing_llava_next_video.py +0 -1
  796. transformers/models/llava_onevision/configuration_llava_onevision.py +0 -1
  797. transformers/models/llava_onevision/image_processing_llava_onevision.py +40 -42
  798. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +6 -8
  799. transformers/models/llava_onevision/modeling_llava_onevision.py +67 -65
  800. transformers/models/llava_onevision/modular_llava_onevision.py +58 -56
  801. transformers/models/llava_onevision/processing_llava_onevision.py +21 -53
  802. transformers/models/llava_onevision/video_processing_llava_onevision.py +0 -1
  803. transformers/models/longcat_flash/__init__.py +0 -1
  804. transformers/models/longcat_flash/configuration_longcat_flash.py +32 -35
  805. transformers/models/longcat_flash/modeling_longcat_flash.py +32 -32
  806. transformers/models/longcat_flash/modular_longcat_flash.py +18 -19
  807. transformers/models/longformer/configuration_longformer.py +1 -4
  808. transformers/models/longformer/modeling_longformer.py +99 -101
  809. transformers/models/longt5/configuration_longt5.py +0 -1
  810. transformers/models/longt5/modeling_longt5.py +43 -48
  811. transformers/models/luke/configuration_luke.py +0 -1
  812. transformers/models/luke/modeling_luke.py +179 -181
  813. transformers/models/luke/tokenization_luke.py +99 -105
  814. transformers/models/lw_detr/__init__.py +27 -0
  815. transformers/models/lw_detr/configuration_lw_detr.py +374 -0
  816. transformers/models/lw_detr/modeling_lw_detr.py +1698 -0
  817. transformers/models/lw_detr/modular_lw_detr.py +1611 -0
  818. transformers/models/lxmert/configuration_lxmert.py +0 -1
  819. transformers/models/lxmert/modeling_lxmert.py +63 -74
  820. transformers/models/m2m_100/configuration_m2m_100.py +0 -1
  821. transformers/models/m2m_100/modeling_m2m_100.py +79 -71
  822. transformers/models/m2m_100/tokenization_m2m_100.py +8 -8
  823. transformers/models/mamba/configuration_mamba.py +0 -1
  824. transformers/models/mamba/modeling_mamba.py +44 -44
  825. transformers/models/mamba2/configuration_mamba2.py +0 -1
  826. transformers/models/mamba2/modeling_mamba2.py +67 -68
  827. transformers/models/marian/configuration_marian.py +1 -2
  828. transformers/models/marian/modeling_marian.py +87 -86
  829. transformers/models/marian/tokenization_marian.py +6 -6
  830. transformers/models/markuplm/configuration_markuplm.py +0 -1
  831. transformers/models/markuplm/feature_extraction_markuplm.py +1 -2
  832. transformers/models/markuplm/modeling_markuplm.py +65 -70
  833. transformers/models/markuplm/processing_markuplm.py +31 -38
  834. transformers/models/markuplm/tokenization_markuplm.py +67 -77
  835. transformers/models/mask2former/configuration_mask2former.py +5 -8
  836. transformers/models/mask2former/image_processing_mask2former.py +84 -85
  837. transformers/models/mask2former/image_processing_mask2former_fast.py +30 -33
  838. transformers/models/mask2former/modeling_mask2former.py +99 -92
  839. transformers/models/mask2former/modular_mask2former.py +6 -8
  840. transformers/models/maskformer/configuration_maskformer.py +6 -9
  841. transformers/models/maskformer/configuration_maskformer_swin.py +0 -1
  842. transformers/models/maskformer/image_processing_maskformer.py +84 -85
  843. transformers/models/maskformer/image_processing_maskformer_fast.py +29 -33
  844. transformers/models/maskformer/modeling_maskformer.py +65 -59
  845. transformers/models/maskformer/modeling_maskformer_swin.py +34 -32
  846. transformers/models/mbart/configuration_mbart.py +1 -1
  847. transformers/models/mbart/modeling_mbart.py +118 -113
  848. transformers/models/mbart/tokenization_mbart.py +2 -4
  849. transformers/models/mbart50/tokenization_mbart50.py +3 -5
  850. transformers/models/megatron_bert/configuration_megatron_bert.py +0 -1
  851. transformers/models/megatron_bert/modeling_megatron_bert.py +141 -150
  852. transformers/models/metaclip_2/modeling_metaclip_2.py +48 -46
  853. transformers/models/metaclip_2/modular_metaclip_2.py +21 -21
  854. transformers/models/mgp_str/configuration_mgp_str.py +0 -1
  855. transformers/models/mgp_str/modeling_mgp_str.py +14 -16
  856. transformers/models/mgp_str/processing_mgp_str.py +3 -20
  857. transformers/models/mgp_str/tokenization_mgp_str.py +1 -3
  858. transformers/models/mimi/configuration_mimi.py +38 -40
  859. transformers/models/mimi/modeling_mimi.py +100 -82
  860. transformers/models/minimax/__init__.py +0 -1
  861. transformers/models/minimax/configuration_minimax.py +32 -36
  862. transformers/models/minimax/modeling_minimax.py +57 -47
  863. transformers/models/minimax/modular_minimax.py +62 -54
  864. transformers/models/minimax_m2/__init__.py +28 -0
  865. transformers/models/minimax_m2/configuration_minimax_m2.py +211 -0
  866. transformers/models/minimax_m2/modeling_minimax_m2.py +704 -0
  867. transformers/models/minimax_m2/modular_minimax_m2.py +369 -0
  868. transformers/models/ministral/configuration_ministral.py +20 -22
  869. transformers/models/ministral/modeling_ministral.py +32 -34
  870. transformers/models/ministral/modular_ministral.py +27 -29
  871. transformers/models/ministral3/configuration_ministral3.py +19 -22
  872. transformers/models/ministral3/modeling_ministral3.py +32 -34
  873. transformers/models/ministral3/modular_ministral3.py +4 -5
  874. transformers/models/mistral/configuration_mistral.py +19 -22
  875. transformers/models/mistral/modeling_mistral.py +32 -34
  876. transformers/models/mistral/modular_mistral.py +11 -12
  877. transformers/models/mistral3/configuration_mistral3.py +0 -1
  878. transformers/models/mistral3/modeling_mistral3.py +53 -46
  879. transformers/models/mistral3/modular_mistral3.py +38 -36
  880. transformers/models/mixtral/configuration_mixtral.py +24 -27
  881. transformers/models/mixtral/modeling_mixtral.py +47 -42
  882. transformers/models/mixtral/modular_mixtral.py +32 -31
  883. transformers/models/mlcd/configuration_mlcd.py +0 -1
  884. transformers/models/mlcd/modeling_mlcd.py +16 -12
  885. transformers/models/mlcd/modular_mlcd.py +13 -11
  886. transformers/models/mllama/configuration_mllama.py +5 -8
  887. transformers/models/mllama/image_processing_mllama.py +23 -25
  888. transformers/models/mllama/image_processing_mllama_fast.py +5 -6
  889. transformers/models/mllama/modeling_mllama.py +94 -86
  890. transformers/models/mllama/processing_mllama.py +6 -55
  891. transformers/models/mluke/tokenization_mluke.py +97 -103
  892. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -3
  893. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +95 -97
  894. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -3
  895. transformers/models/mobilebert/configuration_mobilebert.py +0 -1
  896. transformers/models/mobilebert/modeling_mobilebert.py +77 -85
  897. transformers/models/mobilebert/tokenization_mobilebert.py +0 -1
  898. transformers/models/mobilenet_v1/configuration_mobilenet_v1.py +0 -1
  899. transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +20 -23
  900. transformers/models/mobilenet_v1/image_processing_mobilenet_v1_fast.py +0 -1
  901. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +13 -16
  902. transformers/models/mobilenet_v2/configuration_mobilenet_v2.py +0 -1
  903. transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +48 -51
  904. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +10 -12
  905. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +17 -20
  906. transformers/models/mobilevit/configuration_mobilevit.py +0 -1
  907. transformers/models/mobilevit/image_processing_mobilevit.py +46 -49
  908. transformers/models/mobilevit/image_processing_mobilevit_fast.py +9 -11
  909. transformers/models/mobilevit/modeling_mobilevit.py +21 -19
  910. transformers/models/mobilevitv2/configuration_mobilevitv2.py +0 -1
  911. transformers/models/mobilevitv2/modeling_mobilevitv2.py +21 -20
  912. transformers/models/modernbert/configuration_modernbert.py +34 -34
  913. transformers/models/modernbert/modeling_modernbert.py +135 -126
  914. transformers/models/modernbert/modular_modernbert.py +167 -156
  915. transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +30 -32
  916. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +54 -48
  917. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +78 -71
  918. transformers/models/moonshine/configuration_moonshine.py +22 -24
  919. transformers/models/moonshine/modeling_moonshine.py +64 -66
  920. transformers/models/moonshine/modular_moonshine.py +72 -73
  921. transformers/models/moshi/configuration_moshi.py +18 -21
  922. transformers/models/moshi/modeling_moshi.py +150 -183
  923. transformers/models/mpnet/configuration_mpnet.py +0 -1
  924. transformers/models/mpnet/modeling_mpnet.py +57 -57
  925. transformers/models/mpnet/tokenization_mpnet.py +1 -4
  926. transformers/models/mpt/configuration_mpt.py +1 -9
  927. transformers/models/mpt/modeling_mpt.py +58 -60
  928. transformers/models/mra/configuration_mra.py +0 -1
  929. transformers/models/mra/modeling_mra.py +58 -57
  930. transformers/models/mt5/configuration_mt5.py +2 -4
  931. transformers/models/mt5/modeling_mt5.py +75 -87
  932. transformers/models/musicgen/configuration_musicgen.py +0 -1
  933. transformers/models/musicgen/modeling_musicgen.py +113 -120
  934. transformers/models/musicgen/processing_musicgen.py +3 -21
  935. transformers/models/musicgen_melody/configuration_musicgen_melody.py +0 -1
  936. transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py +8 -9
  937. transformers/models/musicgen_melody/modeling_musicgen_melody.py +110 -109
  938. transformers/models/musicgen_melody/processing_musicgen_melody.py +3 -22
  939. transformers/models/mvp/configuration_mvp.py +0 -1
  940. transformers/models/mvp/modeling_mvp.py +122 -119
  941. transformers/models/myt5/tokenization_myt5.py +8 -10
  942. transformers/models/nanochat/configuration_nanochat.py +0 -1
  943. transformers/models/nanochat/modeling_nanochat.py +33 -36
  944. transformers/models/nanochat/modular_nanochat.py +12 -14
  945. transformers/models/nemotron/configuration_nemotron.py +20 -23
  946. transformers/models/nemotron/modeling_nemotron.py +51 -54
  947. transformers/models/nllb/tokenization_nllb.py +7 -9
  948. transformers/models/nllb_moe/configuration_nllb_moe.py +1 -1
  949. transformers/models/nllb_moe/modeling_nllb_moe.py +77 -69
  950. transformers/models/nougat/image_processing_nougat.py +29 -32
  951. transformers/models/nougat/image_processing_nougat_fast.py +4 -6
  952. transformers/models/nougat/processing_nougat.py +37 -39
  953. transformers/models/nougat/tokenization_nougat.py +16 -23
  954. transformers/models/nystromformer/configuration_nystromformer.py +0 -1
  955. transformers/models/nystromformer/modeling_nystromformer.py +68 -63
  956. transformers/models/olmo/configuration_olmo.py +18 -21
  957. transformers/models/olmo/modeling_olmo.py +32 -35
  958. transformers/models/olmo/modular_olmo.py +5 -9
  959. transformers/models/olmo2/configuration_olmo2.py +18 -21
  960. transformers/models/olmo2/modeling_olmo2.py +33 -36
  961. transformers/models/olmo2/modular_olmo2.py +29 -31
  962. transformers/models/olmo3/__init__.py +0 -1
  963. transformers/models/olmo3/configuration_olmo3.py +20 -23
  964. transformers/models/olmo3/modeling_olmo3.py +32 -35
  965. transformers/models/olmo3/modular_olmo3.py +31 -33
  966. transformers/models/olmoe/configuration_olmoe.py +24 -26
  967. transformers/models/olmoe/modeling_olmoe.py +49 -43
  968. transformers/models/olmoe/modular_olmoe.py +16 -15
  969. transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -3
  970. transformers/models/omdet_turbo/modeling_omdet_turbo.py +42 -40
  971. transformers/models/omdet_turbo/processing_omdet_turbo.py +19 -67
  972. transformers/models/oneformer/configuration_oneformer.py +5 -8
  973. transformers/models/oneformer/image_processing_oneformer.py +83 -84
  974. transformers/models/oneformer/image_processing_oneformer_fast.py +33 -34
  975. transformers/models/oneformer/modeling_oneformer.py +130 -162
  976. transformers/models/oneformer/processing_oneformer.py +28 -43
  977. transformers/models/openai/configuration_openai.py +0 -1
  978. transformers/models/openai/modeling_openai.py +62 -51
  979. transformers/models/openai/tokenization_openai.py +2 -5
  980. transformers/models/opt/configuration_opt.py +0 -1
  981. transformers/models/opt/modeling_opt.py +74 -75
  982. transformers/models/ovis2/__init__.py +0 -1
  983. transformers/models/ovis2/configuration_ovis2.py +0 -1
  984. transformers/models/ovis2/image_processing_ovis2.py +22 -24
  985. transformers/models/ovis2/image_processing_ovis2_fast.py +6 -8
  986. transformers/models/ovis2/modeling_ovis2.py +58 -48
  987. transformers/models/ovis2/modular_ovis2.py +38 -32
  988. transformers/models/ovis2/processing_ovis2.py +12 -40
  989. transformers/models/owlv2/configuration_owlv2.py +0 -1
  990. transformers/models/owlv2/image_processing_owlv2.py +20 -21
  991. transformers/models/owlv2/image_processing_owlv2_fast.py +7 -10
  992. transformers/models/owlv2/modeling_owlv2.py +89 -90
  993. transformers/models/owlv2/modular_owlv2.py +6 -9
  994. transformers/models/owlv2/processing_owlv2.py +20 -49
  995. transformers/models/owlvit/configuration_owlvit.py +0 -1
  996. transformers/models/owlvit/image_processing_owlvit.py +21 -22
  997. transformers/models/owlvit/image_processing_owlvit_fast.py +2 -3
  998. transformers/models/owlvit/modeling_owlvit.py +88 -89
  999. transformers/models/owlvit/processing_owlvit.py +20 -48
  1000. transformers/models/paddleocr_vl/__init__.py +0 -1
  1001. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +19 -19
  1002. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +37 -37
  1003. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +12 -12
  1004. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +104 -90
  1005. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +90 -80
  1006. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +1 -3
  1007. transformers/models/paligemma/configuration_paligemma.py +0 -1
  1008. transformers/models/paligemma/modeling_paligemma.py +73 -67
  1009. transformers/models/paligemma/processing_paligemma.py +13 -66
  1010. transformers/models/parakeet/configuration_parakeet.py +1 -4
  1011. transformers/models/parakeet/feature_extraction_parakeet.py +10 -12
  1012. transformers/models/parakeet/modeling_parakeet.py +23 -22
  1013. transformers/models/parakeet/modular_parakeet.py +21 -18
  1014. transformers/models/parakeet/processing_parakeet.py +12 -5
  1015. transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +5 -7
  1016. transformers/models/patchtsmixer/configuration_patchtsmixer.py +5 -8
  1017. transformers/models/patchtsmixer/modeling_patchtsmixer.py +64 -62
  1018. transformers/models/patchtst/configuration_patchtst.py +6 -9
  1019. transformers/models/patchtst/modeling_patchtst.py +77 -78
  1020. transformers/models/pe_audio/__init__.py +29 -0
  1021. transformers/models/pe_audio/configuration_pe_audio.py +204 -0
  1022. transformers/models/pe_audio/feature_extraction_pe_audio.py +160 -0
  1023. transformers/models/pe_audio/modeling_pe_audio.py +819 -0
  1024. transformers/models/pe_audio/modular_pe_audio.py +298 -0
  1025. transformers/models/pe_audio/processing_pe_audio.py +23 -0
  1026. transformers/models/pe_audio_video/__init__.py +28 -0
  1027. transformers/models/pe_audio_video/configuration_pe_audio_video.py +223 -0
  1028. transformers/models/pe_audio_video/modeling_pe_audio_video.py +971 -0
  1029. transformers/models/pe_audio_video/modular_pe_audio_video.py +763 -0
  1030. transformers/models/pe_audio_video/processing_pe_audio_video.py +24 -0
  1031. transformers/models/pe_video/__init__.py +29 -0
  1032. transformers/models/pe_video/configuration_pe_video.py +209 -0
  1033. transformers/models/pe_video/modeling_pe_video.py +635 -0
  1034. transformers/models/pe_video/modular_pe_video.py +218 -0
  1035. transformers/models/pe_video/processing_pe_video.py +10 -0
  1036. transformers/models/pe_video/video_processing_pe_video.py +64 -0
  1037. transformers/models/pegasus/configuration_pegasus.py +1 -1
  1038. transformers/models/pegasus/modeling_pegasus.py +66 -65
  1039. transformers/models/pegasus/tokenization_pegasus.py +1 -4
  1040. transformers/models/pegasus_x/configuration_pegasus_x.py +0 -1
  1041. transformers/models/pegasus_x/modeling_pegasus_x.py +51 -52
  1042. transformers/models/perceiver/configuration_perceiver.py +0 -1
  1043. transformers/models/perceiver/image_processing_perceiver.py +22 -25
  1044. transformers/models/perceiver/image_processing_perceiver_fast.py +5 -7
  1045. transformers/models/perceiver/modeling_perceiver.py +140 -137
  1046. transformers/models/perceiver/tokenization_perceiver.py +3 -6
  1047. transformers/models/perception_lm/configuration_perception_lm.py +0 -1
  1048. transformers/models/perception_lm/image_processing_perception_lm_fast.py +8 -10
  1049. transformers/models/perception_lm/modeling_perception_lm.py +45 -43
  1050. transformers/models/perception_lm/modular_perception_lm.py +38 -36
  1051. transformers/models/perception_lm/processing_perception_lm.py +13 -47
  1052. transformers/models/perception_lm/video_processing_perception_lm.py +0 -1
  1053. transformers/models/persimmon/configuration_persimmon.py +18 -21
  1054. transformers/models/persimmon/modeling_persimmon.py +40 -43
  1055. transformers/models/phi/configuration_phi.py +19 -22
  1056. transformers/models/phi/modeling_phi.py +36 -38
  1057. transformers/models/phi/modular_phi.py +23 -23
  1058. transformers/models/phi3/configuration_phi3.py +23 -26
  1059. transformers/models/phi3/modeling_phi3.py +34 -37
  1060. transformers/models/phi3/modular_phi3.py +13 -17
  1061. transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +25 -26
  1062. transformers/models/phi4_multimodal/feature_extraction_phi4_multimodal.py +7 -9
  1063. transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +7 -7
  1064. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +58 -57
  1065. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +62 -60
  1066. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +7 -44
  1067. transformers/models/phimoe/configuration_phimoe.py +26 -29
  1068. transformers/models/phimoe/modeling_phimoe.py +47 -42
  1069. transformers/models/phimoe/modular_phimoe.py +1 -2
  1070. transformers/models/phobert/tokenization_phobert.py +4 -6
  1071. transformers/models/pix2struct/configuration_pix2struct.py +0 -1
  1072. transformers/models/pix2struct/image_processing_pix2struct.py +15 -19
  1073. transformers/models/pix2struct/image_processing_pix2struct_fast.py +7 -10
  1074. transformers/models/pix2struct/modeling_pix2struct.py +42 -45
  1075. transformers/models/pix2struct/processing_pix2struct.py +5 -30
  1076. transformers/models/pixio/__init__.py +29 -0
  1077. transformers/models/pixio/configuration_pixio.py +150 -0
  1078. transformers/models/pixio/modeling_pixio.py +505 -0
  1079. transformers/models/pixio/modular_pixio.py +401 -0
  1080. transformers/models/pixtral/configuration_pixtral.py +11 -14
  1081. transformers/models/pixtral/image_processing_pixtral.py +26 -28
  1082. transformers/models/pixtral/image_processing_pixtral_fast.py +5 -6
  1083. transformers/models/pixtral/modeling_pixtral.py +23 -26
  1084. transformers/models/pixtral/processing_pixtral.py +21 -53
  1085. transformers/models/plbart/configuration_plbart.py +1 -1
  1086. transformers/models/plbart/modeling_plbart.py +107 -102
  1087. transformers/models/plbart/modular_plbart.py +36 -32
  1088. transformers/models/plbart/tokenization_plbart.py +4 -5
  1089. transformers/models/poolformer/configuration_poolformer.py +0 -1
  1090. transformers/models/poolformer/image_processing_poolformer.py +21 -24
  1091. transformers/models/poolformer/image_processing_poolformer_fast.py +6 -8
  1092. transformers/models/poolformer/modeling_poolformer.py +21 -13
  1093. transformers/models/pop2piano/configuration_pop2piano.py +0 -2
  1094. transformers/models/pop2piano/feature_extraction_pop2piano.py +6 -9
  1095. transformers/models/pop2piano/modeling_pop2piano.py +22 -23
  1096. transformers/models/pop2piano/processing_pop2piano.py +25 -33
  1097. transformers/models/pop2piano/tokenization_pop2piano.py +15 -23
  1098. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +3 -3
  1099. transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py +28 -28
  1100. transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +14 -15
  1101. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +9 -10
  1102. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +9 -10
  1103. transformers/models/prophetnet/configuration_prophetnet.py +26 -28
  1104. transformers/models/prophetnet/modeling_prophetnet.py +111 -131
  1105. transformers/models/prophetnet/tokenization_prophetnet.py +14 -16
  1106. transformers/models/pvt/configuration_pvt.py +0 -1
  1107. transformers/models/pvt/image_processing_pvt.py +17 -20
  1108. transformers/models/pvt/image_processing_pvt_fast.py +0 -1
  1109. transformers/models/pvt/modeling_pvt.py +19 -21
  1110. transformers/models/pvt_v2/configuration_pvt_v2.py +2 -4
  1111. transformers/models/pvt_v2/modeling_pvt_v2.py +21 -23
  1112. transformers/models/qwen2/configuration_qwen2.py +18 -21
  1113. transformers/models/qwen2/modeling_qwen2.py +32 -34
  1114. transformers/models/qwen2/modular_qwen2.py +11 -12
  1115. transformers/models/qwen2/tokenization_qwen2.py +2 -5
  1116. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +20 -23
  1117. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +239 -192
  1118. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +174 -127
  1119. transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py +41 -49
  1120. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +22 -25
  1121. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +112 -101
  1122. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +72 -107
  1123. transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py +7 -43
  1124. transformers/models/qwen2_audio/configuration_qwen2_audio.py +0 -1
  1125. transformers/models/qwen2_audio/modeling_qwen2_audio.py +29 -31
  1126. transformers/models/qwen2_audio/processing_qwen2_audio.py +13 -42
  1127. transformers/models/qwen2_moe/configuration_qwen2_moe.py +28 -31
  1128. transformers/models/qwen2_moe/modeling_qwen2_moe.py +48 -43
  1129. transformers/models/qwen2_moe/modular_qwen2_moe.py +7 -10
  1130. transformers/models/qwen2_vl/configuration_qwen2_vl.py +22 -24
  1131. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +41 -42
  1132. transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +8 -9
  1133. transformers/models/qwen2_vl/modeling_qwen2_vl.py +108 -96
  1134. transformers/models/qwen2_vl/processing_qwen2_vl.py +7 -44
  1135. transformers/models/qwen2_vl/video_processing_qwen2_vl.py +35 -13
  1136. transformers/models/qwen3/configuration_qwen3.py +20 -23
  1137. transformers/models/qwen3/modeling_qwen3.py +32 -35
  1138. transformers/models/qwen3/modular_qwen3.py +4 -6
  1139. transformers/models/qwen3_moe/configuration_qwen3_moe.py +25 -28
  1140. transformers/models/qwen3_moe/modeling_qwen3_moe.py +48 -43
  1141. transformers/models/qwen3_moe/modular_qwen3_moe.py +10 -13
  1142. transformers/models/qwen3_next/configuration_qwen3_next.py +31 -34
  1143. transformers/models/qwen3_next/modeling_qwen3_next.py +43 -48
  1144. transformers/models/qwen3_next/modular_qwen3_next.py +33 -34
  1145. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +89 -88
  1146. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +199 -156
  1147. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +170 -152
  1148. transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py +40 -48
  1149. transformers/models/qwen3_vl/configuration_qwen3_vl.py +21 -24
  1150. transformers/models/qwen3_vl/modeling_qwen3_vl.py +91 -81
  1151. transformers/models/qwen3_vl/modular_qwen3_vl.py +86 -112
  1152. transformers/models/qwen3_vl/processing_qwen3_vl.py +6 -42
  1153. transformers/models/qwen3_vl/video_processing_qwen3_vl.py +10 -12
  1154. transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +21 -25
  1155. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +174 -195
  1156. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +65 -117
  1157. transformers/models/rag/configuration_rag.py +0 -9
  1158. transformers/models/rag/modeling_rag.py +123 -127
  1159. transformers/models/rag/retrieval_rag.py +2 -4
  1160. transformers/models/rag/tokenization_rag.py +0 -50
  1161. transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +21 -24
  1162. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +34 -36
  1163. transformers/models/reformer/configuration_reformer.py +0 -1
  1164. transformers/models/reformer/modeling_reformer.py +76 -69
  1165. transformers/models/reformer/tokenization_reformer.py +3 -6
  1166. transformers/models/regnet/configuration_regnet.py +0 -1
  1167. transformers/models/regnet/modeling_regnet.py +11 -9
  1168. transformers/models/rembert/configuration_rembert.py +0 -1
  1169. transformers/models/rembert/modeling_rembert.py +115 -111
  1170. transformers/models/rembert/tokenization_rembert.py +1 -4
  1171. transformers/models/resnet/configuration_resnet.py +0 -1
  1172. transformers/models/resnet/modeling_resnet.py +16 -13
  1173. transformers/models/roberta/configuration_roberta.py +0 -1
  1174. transformers/models/roberta/modeling_roberta.py +94 -93
  1175. transformers/models/roberta/modular_roberta.py +58 -58
  1176. transformers/models/roberta/tokenization_roberta.py +2 -5
  1177. transformers/models/roberta/tokenization_roberta_old.py +2 -4
  1178. transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +0 -1
  1179. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +94 -93
  1180. transformers/models/roc_bert/configuration_roc_bert.py +0 -1
  1181. transformers/models/roc_bert/modeling_roc_bert.py +122 -121
  1182. transformers/models/roc_bert/tokenization_roc_bert.py +88 -94
  1183. transformers/models/roformer/configuration_roformer.py +0 -1
  1184. transformers/models/roformer/modeling_roformer.py +79 -81
  1185. transformers/models/roformer/tokenization_roformer.py +3 -6
  1186. transformers/models/roformer/tokenization_utils.py +0 -1
  1187. transformers/models/rt_detr/configuration_rt_detr.py +1 -2
  1188. transformers/models/rt_detr/configuration_rt_detr_resnet.py +0 -1
  1189. transformers/models/rt_detr/image_processing_rt_detr.py +54 -55
  1190. transformers/models/rt_detr/image_processing_rt_detr_fast.py +15 -15
  1191. transformers/models/rt_detr/modeling_rt_detr.py +84 -82
  1192. transformers/models/rt_detr/modeling_rt_detr_resnet.py +10 -7
  1193. transformers/models/rt_detr/modular_rt_detr.py +14 -14
  1194. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -4
  1195. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +86 -81
  1196. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +10 -7
  1197. transformers/models/rwkv/configuration_rwkv.py +0 -1
  1198. transformers/models/rwkv/modeling_rwkv.py +30 -32
  1199. transformers/models/sam/configuration_sam.py +1 -1
  1200. transformers/models/sam/image_processing_sam.py +59 -60
  1201. transformers/models/sam/image_processing_sam_fast.py +21 -23
  1202. transformers/models/sam/modeling_sam.py +37 -36
  1203. transformers/models/sam/processing_sam.py +39 -27
  1204. transformers/models/sam2/configuration_sam2.py +1 -2
  1205. transformers/models/sam2/image_processing_sam2_fast.py +14 -15
  1206. transformers/models/sam2/modeling_sam2.py +50 -48
  1207. transformers/models/sam2/modular_sam2.py +48 -45
  1208. transformers/models/sam2/processing_sam2.py +31 -47
  1209. transformers/models/sam2_video/configuration_sam2_video.py +0 -1
  1210. transformers/models/sam2_video/modeling_sam2_video.py +119 -112
  1211. transformers/models/sam2_video/modular_sam2_video.py +91 -97
  1212. transformers/models/sam2_video/processing_sam2_video.py +49 -66
  1213. transformers/models/sam2_video/video_processing_sam2_video.py +1 -4
  1214. transformers/models/sam3/configuration_sam3.py +21 -2
  1215. transformers/models/sam3/image_processing_sam3_fast.py +17 -20
  1216. transformers/models/sam3/modeling_sam3.py +77 -56
  1217. transformers/models/sam3/modular_sam3.py +3 -8
  1218. transformers/models/sam3/processing_sam3.py +29 -48
  1219. transformers/models/sam3_tracker/__init__.py +0 -1
  1220. transformers/models/sam3_tracker/configuration_sam3_tracker.py +0 -1
  1221. transformers/models/sam3_tracker/modeling_sam3_tracker.py +36 -36
  1222. transformers/models/sam3_tracker/modular_sam3_tracker.py +2 -1
  1223. transformers/models/sam3_tracker/processing_sam3_tracker.py +31 -47
  1224. transformers/models/sam3_tracker_video/__init__.py +0 -1
  1225. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -1
  1226. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +96 -85
  1227. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +27 -6
  1228. transformers/models/sam3_tracker_video/processing_sam3_tracker_video.py +50 -66
  1229. transformers/models/sam3_video/configuration_sam3_video.py +14 -1
  1230. transformers/models/sam3_video/modeling_sam3_video.py +32 -34
  1231. transformers/models/sam3_video/processing_sam3_video.py +26 -46
  1232. transformers/models/sam_hq/__init__.py +1 -1
  1233. transformers/models/sam_hq/configuration_sam_hq.py +1 -1
  1234. transformers/models/sam_hq/modeling_sam_hq.py +65 -64
  1235. transformers/models/sam_hq/modular_sam_hq.py +17 -19
  1236. transformers/models/sam_hq/{processing_samhq.py → processing_sam_hq.py} +39 -28
  1237. transformers/models/seamless_m4t/configuration_seamless_m4t.py +0 -1
  1238. transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py +8 -11
  1239. transformers/models/seamless_m4t/modeling_seamless_m4t.py +207 -193
  1240. transformers/models/seamless_m4t/processing_seamless_m4t.py +18 -39
  1241. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +15 -20
  1242. transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +0 -1
  1243. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +199 -195
  1244. transformers/models/seed_oss/configuration_seed_oss.py +23 -25
  1245. transformers/models/seed_oss/modeling_seed_oss.py +31 -33
  1246. transformers/models/seed_oss/modular_seed_oss.py +3 -4
  1247. transformers/models/segformer/configuration_segformer.py +0 -10
  1248. transformers/models/segformer/image_processing_segformer.py +39 -42
  1249. transformers/models/segformer/image_processing_segformer_fast.py +7 -9
  1250. transformers/models/segformer/modeling_segformer.py +26 -28
  1251. transformers/models/segformer/modular_segformer.py +5 -7
  1252. transformers/models/seggpt/configuration_seggpt.py +0 -1
  1253. transformers/models/seggpt/image_processing_seggpt.py +38 -41
  1254. transformers/models/seggpt/modeling_seggpt.py +28 -30
  1255. transformers/models/sew/configuration_sew.py +0 -1
  1256. transformers/models/sew/modeling_sew.py +33 -35
  1257. transformers/models/sew/modular_sew.py +10 -12
  1258. transformers/models/sew_d/configuration_sew_d.py +0 -1
  1259. transformers/models/sew_d/modeling_sew_d.py +28 -30
  1260. transformers/models/shieldgemma2/configuration_shieldgemma2.py +0 -1
  1261. transformers/models/shieldgemma2/modeling_shieldgemma2.py +16 -17
  1262. transformers/models/shieldgemma2/processing_shieldgemma2.py +3 -5
  1263. transformers/models/siglip/configuration_siglip.py +0 -1
  1264. transformers/models/siglip/image_processing_siglip.py +17 -20
  1265. transformers/models/siglip/image_processing_siglip_fast.py +0 -1
  1266. transformers/models/siglip/modeling_siglip.py +62 -41
  1267. transformers/models/siglip/processing_siglip.py +2 -14
  1268. transformers/models/siglip/tokenization_siglip.py +6 -7
  1269. transformers/models/siglip2/configuration_siglip2.py +1 -1
  1270. transformers/models/siglip2/image_processing_siglip2.py +15 -16
  1271. transformers/models/siglip2/image_processing_siglip2_fast.py +4 -5
  1272. transformers/models/siglip2/modeling_siglip2.py +114 -92
  1273. transformers/models/siglip2/modular_siglip2.py +23 -25
  1274. transformers/models/siglip2/processing_siglip2.py +2 -14
  1275. transformers/models/smollm3/configuration_smollm3.py +23 -26
  1276. transformers/models/smollm3/modeling_smollm3.py +32 -35
  1277. transformers/models/smollm3/modular_smollm3.py +27 -29
  1278. transformers/models/smolvlm/configuration_smolvlm.py +1 -1
  1279. transformers/models/smolvlm/image_processing_smolvlm.py +42 -43
  1280. transformers/models/smolvlm/image_processing_smolvlm_fast.py +12 -12
  1281. transformers/models/smolvlm/modeling_smolvlm.py +56 -53
  1282. transformers/models/smolvlm/modular_smolvlm.py +15 -17
  1283. transformers/models/smolvlm/processing_smolvlm.py +15 -76
  1284. transformers/models/smolvlm/video_processing_smolvlm.py +7 -9
  1285. transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py +0 -1
  1286. transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +20 -23
  1287. transformers/models/speech_to_text/configuration_speech_to_text.py +0 -1
  1288. transformers/models/speech_to_text/feature_extraction_speech_to_text.py +10 -13
  1289. transformers/models/speech_to_text/modeling_speech_to_text.py +62 -54
  1290. transformers/models/speech_to_text/processing_speech_to_text.py +4 -30
  1291. transformers/models/speech_to_text/tokenization_speech_to_text.py +5 -6
  1292. transformers/models/speecht5/configuration_speecht5.py +0 -1
  1293. transformers/models/speecht5/feature_extraction_speecht5.py +16 -37
  1294. transformers/models/speecht5/modeling_speecht5.py +200 -174
  1295. transformers/models/speecht5/number_normalizer.py +0 -1
  1296. transformers/models/speecht5/processing_speecht5.py +3 -37
  1297. transformers/models/speecht5/tokenization_speecht5.py +4 -5
  1298. transformers/models/splinter/configuration_splinter.py +0 -1
  1299. transformers/models/splinter/modeling_splinter.py +63 -59
  1300. transformers/models/splinter/tokenization_splinter.py +2 -4
  1301. transformers/models/squeezebert/configuration_squeezebert.py +0 -1
  1302. transformers/models/squeezebert/modeling_squeezebert.py +62 -62
  1303. transformers/models/squeezebert/tokenization_squeezebert.py +0 -1
  1304. transformers/models/stablelm/configuration_stablelm.py +20 -23
  1305. transformers/models/stablelm/modeling_stablelm.py +40 -43
  1306. transformers/models/starcoder2/configuration_starcoder2.py +19 -22
  1307. transformers/models/starcoder2/modeling_starcoder2.py +34 -37
  1308. transformers/models/starcoder2/modular_starcoder2.py +13 -15
  1309. transformers/models/superglue/configuration_superglue.py +3 -3
  1310. transformers/models/superglue/image_processing_superglue.py +15 -15
  1311. transformers/models/superglue/image_processing_superglue_fast.py +5 -7
  1312. transformers/models/superglue/modeling_superglue.py +32 -33
  1313. transformers/models/superpoint/image_processing_superpoint.py +15 -15
  1314. transformers/models/superpoint/image_processing_superpoint_fast.py +5 -7
  1315. transformers/models/superpoint/modeling_superpoint.py +13 -14
  1316. transformers/models/swiftformer/configuration_swiftformer.py +0 -1
  1317. transformers/models/swiftformer/modeling_swiftformer.py +16 -14
  1318. transformers/models/swin/configuration_swin.py +0 -1
  1319. transformers/models/swin/modeling_swin.py +74 -82
  1320. transformers/models/swin2sr/configuration_swin2sr.py +0 -1
  1321. transformers/models/swin2sr/image_processing_swin2sr.py +10 -13
  1322. transformers/models/swin2sr/image_processing_swin2sr_fast.py +2 -6
  1323. transformers/models/swin2sr/modeling_swin2sr.py +75 -61
  1324. transformers/models/swinv2/configuration_swinv2.py +0 -1
  1325. transformers/models/swinv2/modeling_swinv2.py +96 -100
  1326. transformers/models/switch_transformers/configuration_switch_transformers.py +0 -1
  1327. transformers/models/switch_transformers/modeling_switch_transformers.py +34 -41
  1328. transformers/models/switch_transformers/modular_switch_transformers.py +31 -38
  1329. transformers/models/t5/configuration_t5.py +7 -2
  1330. transformers/models/t5/modeling_t5.py +76 -84
  1331. transformers/models/t5/tokenization_t5.py +1 -3
  1332. transformers/models/t5gemma/configuration_t5gemma.py +33 -34
  1333. transformers/models/t5gemma/modeling_t5gemma.py +97 -100
  1334. transformers/models/t5gemma/modular_t5gemma.py +117 -118
  1335. transformers/models/t5gemma2/configuration_t5gemma2.py +59 -96
  1336. transformers/models/t5gemma2/modeling_t5gemma2.py +109 -103
  1337. transformers/models/t5gemma2/modular_t5gemma2.py +375 -91
  1338. transformers/models/table_transformer/configuration_table_transformer.py +1 -2
  1339. transformers/models/table_transformer/modeling_table_transformer.py +47 -49
  1340. transformers/models/tapas/configuration_tapas.py +0 -1
  1341. transformers/models/tapas/modeling_tapas.py +64 -66
  1342. transformers/models/tapas/tokenization_tapas.py +115 -153
  1343. transformers/models/textnet/configuration_textnet.py +0 -1
  1344. transformers/models/textnet/image_processing_textnet.py +22 -25
  1345. transformers/models/textnet/image_processing_textnet_fast.py +5 -7
  1346. transformers/models/textnet/modeling_textnet.py +13 -14
  1347. transformers/models/time_series_transformer/configuration_time_series_transformer.py +5 -8
  1348. transformers/models/time_series_transformer/modeling_time_series_transformer.py +79 -81
  1349. transformers/models/timesfm/configuration_timesfm.py +0 -1
  1350. transformers/models/timesfm/modeling_timesfm.py +29 -19
  1351. transformers/models/timesfm/modular_timesfm.py +28 -18
  1352. transformers/models/timesformer/configuration_timesformer.py +0 -1
  1353. transformers/models/timesformer/modeling_timesformer.py +13 -16
  1354. transformers/models/timm_backbone/configuration_timm_backbone.py +0 -1
  1355. transformers/models/timm_backbone/modeling_timm_backbone.py +17 -15
  1356. transformers/models/timm_wrapper/configuration_timm_wrapper.py +5 -3
  1357. transformers/models/timm_wrapper/image_processing_timm_wrapper.py +4 -5
  1358. transformers/models/timm_wrapper/modeling_timm_wrapper.py +32 -28
  1359. transformers/models/trocr/configuration_trocr.py +0 -1
  1360. transformers/models/trocr/modeling_trocr.py +39 -42
  1361. transformers/models/trocr/processing_trocr.py +5 -25
  1362. transformers/models/tvp/configuration_tvp.py +5 -2
  1363. transformers/models/tvp/image_processing_tvp.py +50 -52
  1364. transformers/models/tvp/image_processing_tvp_fast.py +9 -10
  1365. transformers/models/tvp/modeling_tvp.py +25 -27
  1366. transformers/models/tvp/processing_tvp.py +2 -14
  1367. transformers/models/udop/configuration_udop.py +1 -1
  1368. transformers/models/udop/modeling_udop.py +63 -70
  1369. transformers/models/udop/processing_udop.py +7 -26
  1370. transformers/models/udop/tokenization_udop.py +80 -93
  1371. transformers/models/umt5/configuration_umt5.py +2 -3
  1372. transformers/models/umt5/modeling_umt5.py +80 -87
  1373. transformers/models/unispeech/configuration_unispeech.py +0 -1
  1374. transformers/models/unispeech/modeling_unispeech.py +47 -49
  1375. transformers/models/unispeech/modular_unispeech.py +20 -22
  1376. transformers/models/unispeech_sat/configuration_unispeech_sat.py +0 -1
  1377. transformers/models/unispeech_sat/modeling_unispeech_sat.py +63 -65
  1378. transformers/models/unispeech_sat/modular_unispeech_sat.py +21 -23
  1379. transformers/models/univnet/feature_extraction_univnet.py +14 -14
  1380. transformers/models/univnet/modeling_univnet.py +7 -8
  1381. transformers/models/upernet/configuration_upernet.py +0 -1
  1382. transformers/models/upernet/modeling_upernet.py +10 -13
  1383. transformers/models/vaultgemma/__init__.py +0 -1
  1384. transformers/models/vaultgemma/configuration_vaultgemma.py +24 -26
  1385. transformers/models/vaultgemma/modeling_vaultgemma.py +35 -37
  1386. transformers/models/vaultgemma/modular_vaultgemma.py +29 -31
  1387. transformers/models/video_llama_3/image_processing_video_llama_3.py +43 -42
  1388. transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +8 -8
  1389. transformers/models/video_llama_3/modeling_video_llama_3.py +77 -66
  1390. transformers/models/video_llama_3/modular_video_llama_3.py +110 -112
  1391. transformers/models/video_llama_3/processing_video_llama_3.py +5 -39
  1392. transformers/models/video_llama_3/video_processing_video_llama_3.py +18 -18
  1393. transformers/models/video_llava/configuration_video_llava.py +0 -1
  1394. transformers/models/video_llava/image_processing_video_llava.py +35 -38
  1395. transformers/models/video_llava/modeling_video_llava.py +59 -57
  1396. transformers/models/video_llava/processing_video_llava.py +38 -78
  1397. transformers/models/video_llava/video_processing_video_llava.py +0 -1
  1398. transformers/models/videomae/configuration_videomae.py +0 -1
  1399. transformers/models/videomae/image_processing_videomae.py +31 -34
  1400. transformers/models/videomae/modeling_videomae.py +13 -15
  1401. transformers/models/videomae/video_processing_videomae.py +0 -1
  1402. transformers/models/vilt/configuration_vilt.py +2 -3
  1403. transformers/models/vilt/image_processing_vilt.py +29 -30
  1404. transformers/models/vilt/image_processing_vilt_fast.py +9 -10
  1405. transformers/models/vilt/modeling_vilt.py +83 -78
  1406. transformers/models/vilt/processing_vilt.py +2 -14
  1407. transformers/models/vipllava/configuration_vipllava.py +0 -1
  1408. transformers/models/vipllava/modeling_vipllava.py +45 -42
  1409. transformers/models/vipllava/modular_vipllava.py +30 -32
  1410. transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +0 -1
  1411. transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +18 -21
  1412. transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py +0 -1
  1413. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +18 -21
  1414. transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py +2 -16
  1415. transformers/models/visual_bert/configuration_visual_bert.py +0 -1
  1416. transformers/models/visual_bert/modeling_visual_bert.py +92 -92
  1417. transformers/models/vit/configuration_vit.py +0 -1
  1418. transformers/models/vit/image_processing_vit.py +19 -22
  1419. transformers/models/vit/image_processing_vit_fast.py +0 -1
  1420. transformers/models/vit/modeling_vit.py +13 -15
  1421. transformers/models/vit_mae/configuration_vit_mae.py +0 -1
  1422. transformers/models/vit_mae/modeling_vit_mae.py +21 -23
  1423. transformers/models/vit_msn/configuration_vit_msn.py +0 -1
  1424. transformers/models/vit_msn/modeling_vit_msn.py +10 -12
  1425. transformers/models/vitdet/configuration_vitdet.py +0 -1
  1426. transformers/models/vitdet/modeling_vitdet.py +12 -14
  1427. transformers/models/vitmatte/configuration_vitmatte.py +2 -5
  1428. transformers/models/vitmatte/image_processing_vitmatte.py +15 -18
  1429. transformers/models/vitmatte/image_processing_vitmatte_fast.py +14 -16
  1430. transformers/models/vitmatte/modeling_vitmatte.py +13 -11
  1431. transformers/models/vitpose/configuration_vitpose.py +4 -7
  1432. transformers/models/vitpose/image_processing_vitpose.py +24 -25
  1433. transformers/models/vitpose/image_processing_vitpose_fast.py +9 -11
  1434. transformers/models/vitpose/modeling_vitpose.py +10 -12
  1435. transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +0 -1
  1436. transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +8 -10
  1437. transformers/models/vits/configuration_vits.py +0 -1
  1438. transformers/models/vits/modeling_vits.py +34 -35
  1439. transformers/models/vits/tokenization_vits.py +3 -4
  1440. transformers/models/vivit/configuration_vivit.py +0 -1
  1441. transformers/models/vivit/image_processing_vivit.py +36 -39
  1442. transformers/models/vivit/modeling_vivit.py +5 -7
  1443. transformers/models/vjepa2/__init__.py +0 -1
  1444. transformers/models/vjepa2/configuration_vjepa2.py +0 -1
  1445. transformers/models/vjepa2/modeling_vjepa2.py +30 -32
  1446. transformers/models/vjepa2/video_processing_vjepa2.py +0 -1
  1447. transformers/models/voxtral/__init__.py +0 -1
  1448. transformers/models/voxtral/configuration_voxtral.py +0 -1
  1449. transformers/models/voxtral/modeling_voxtral.py +19 -27
  1450. transformers/models/voxtral/modular_voxtral.py +12 -21
  1451. transformers/models/voxtral/processing_voxtral.py +25 -48
  1452. transformers/models/wav2vec2/configuration_wav2vec2.py +0 -1
  1453. transformers/models/wav2vec2/feature_extraction_wav2vec2.py +7 -10
  1454. transformers/models/wav2vec2/modeling_wav2vec2.py +67 -122
  1455. transformers/models/wav2vec2/processing_wav2vec2.py +6 -35
  1456. transformers/models/wav2vec2/tokenization_wav2vec2.py +20 -332
  1457. transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +0 -1
  1458. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +65 -62
  1459. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +52 -48
  1460. transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +6 -35
  1461. transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +0 -1
  1462. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +84 -77
  1463. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +37 -30
  1464. transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +16 -17
  1465. transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +36 -55
  1466. transformers/models/wavlm/configuration_wavlm.py +0 -1
  1467. transformers/models/wavlm/modeling_wavlm.py +45 -48
  1468. transformers/models/wavlm/modular_wavlm.py +4 -5
  1469. transformers/models/whisper/configuration_whisper.py +0 -1
  1470. transformers/models/whisper/english_normalizer.py +3 -4
  1471. transformers/models/whisper/feature_extraction_whisper.py +9 -24
  1472. transformers/models/whisper/generation_whisper.py +27 -48
  1473. transformers/models/whisper/modeling_whisper.py +73 -73
  1474. transformers/models/whisper/processing_whisper.py +3 -20
  1475. transformers/models/whisper/tokenization_whisper.py +9 -30
  1476. transformers/models/x_clip/configuration_x_clip.py +0 -1
  1477. transformers/models/x_clip/modeling_x_clip.py +70 -69
  1478. transformers/models/x_clip/processing_x_clip.py +2 -14
  1479. transformers/models/xcodec/configuration_xcodec.py +4 -6
  1480. transformers/models/xcodec/modeling_xcodec.py +20 -17
  1481. transformers/models/xglm/configuration_xglm.py +0 -1
  1482. transformers/models/xglm/modeling_xglm.py +59 -55
  1483. transformers/models/xglm/tokenization_xglm.py +1 -4
  1484. transformers/models/xlm/configuration_xlm.py +0 -1
  1485. transformers/models/xlm/modeling_xlm.py +139 -144
  1486. transformers/models/xlm/tokenization_xlm.py +3 -5
  1487. transformers/models/xlm_roberta/configuration_xlm_roberta.py +0 -1
  1488. transformers/models/xlm_roberta/modeling_xlm_roberta.py +195 -194
  1489. transformers/models/xlm_roberta/modular_xlm_roberta.py +50 -53
  1490. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +1 -4
  1491. transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +0 -1
  1492. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +94 -93
  1493. transformers/models/xlm_roberta_xl/modular_xlm_roberta_xl.py +67 -70
  1494. transformers/models/xlnet/configuration_xlnet.py +0 -11
  1495. transformers/models/xlnet/modeling_xlnet.py +152 -163
  1496. transformers/models/xlnet/tokenization_xlnet.py +1 -4
  1497. transformers/models/xlstm/configuration_xlstm.py +3 -5
  1498. transformers/models/xlstm/modeling_xlstm.py +62 -65
  1499. transformers/models/xmod/configuration_xmod.py +0 -1
  1500. transformers/models/xmod/modeling_xmod.py +101 -100
  1501. transformers/models/yolos/configuration_yolos.py +0 -1
  1502. transformers/models/yolos/image_processing_yolos.py +60 -62
  1503. transformers/models/yolos/image_processing_yolos_fast.py +18 -18
  1504. transformers/models/yolos/modeling_yolos.py +12 -14
  1505. transformers/models/yolos/modular_yolos.py +2 -4
  1506. transformers/models/yoso/configuration_yoso.py +0 -1
  1507. transformers/models/yoso/modeling_yoso.py +64 -63
  1508. transformers/models/zamba/configuration_zamba.py +0 -1
  1509. transformers/models/zamba/modeling_zamba.py +70 -70
  1510. transformers/models/zamba2/configuration_zamba2.py +36 -37
  1511. transformers/models/zamba2/modeling_zamba2.py +87 -89
  1512. transformers/models/zamba2/modular_zamba2.py +43 -45
  1513. transformers/models/zoedepth/configuration_zoedepth.py +1 -2
  1514. transformers/models/zoedepth/image_processing_zoedepth.py +28 -29
  1515. transformers/models/zoedepth/image_processing_zoedepth_fast.py +12 -15
  1516. transformers/models/zoedepth/modeling_zoedepth.py +21 -16
  1517. transformers/pipelines/__init__.py +59 -55
  1518. transformers/pipelines/any_to_any.py +14 -22
  1519. transformers/pipelines/audio_utils.py +1 -2
  1520. transformers/pipelines/automatic_speech_recognition.py +20 -12
  1521. transformers/pipelines/base.py +13 -17
  1522. transformers/pipelines/deprecated/__init__.py +0 -1
  1523. transformers/pipelines/document_question_answering.py +1 -1
  1524. transformers/pipelines/image_text_to_text.py +0 -1
  1525. transformers/pipelines/image_to_text.py +4 -44
  1526. transformers/pipelines/question_answering.py +5 -44
  1527. transformers/pipelines/text_classification.py +1 -14
  1528. transformers/pipelines/text_to_audio.py +2 -2
  1529. transformers/pipelines/token_classification.py +1 -22
  1530. transformers/pipelines/video_classification.py +1 -9
  1531. transformers/pipelines/zero_shot_audio_classification.py +0 -1
  1532. transformers/pipelines/zero_shot_classification.py +0 -6
  1533. transformers/pipelines/zero_shot_image_classification.py +0 -7
  1534. transformers/processing_utils.py +222 -151
  1535. transformers/quantizers/auto.py +2 -4
  1536. transformers/quantizers/base.py +19 -64
  1537. transformers/quantizers/quantizer_aqlm.py +1 -18
  1538. transformers/quantizers/quantizer_auto_round.py +1 -10
  1539. transformers/quantizers/quantizer_awq.py +3 -8
  1540. transformers/quantizers/quantizer_bitnet.py +1 -6
  1541. transformers/quantizers/quantizer_bnb_4bit.py +9 -49
  1542. transformers/quantizers/quantizer_bnb_8bit.py +9 -19
  1543. transformers/quantizers/quantizer_compressed_tensors.py +1 -4
  1544. transformers/quantizers/quantizer_eetq.py +2 -12
  1545. transformers/quantizers/quantizer_fbgemm_fp8.py +5 -14
  1546. transformers/quantizers/quantizer_finegrained_fp8.py +15 -10
  1547. transformers/quantizers/quantizer_fp_quant.py +4 -4
  1548. transformers/quantizers/quantizer_gptq.py +1 -4
  1549. transformers/quantizers/quantizer_higgs.py +2 -6
  1550. transformers/quantizers/quantizer_mxfp4.py +2 -28
  1551. transformers/quantizers/quantizer_quanto.py +14 -14
  1552. transformers/quantizers/quantizer_quark.py +0 -1
  1553. transformers/quantizers/quantizer_spqr.py +3 -8
  1554. transformers/quantizers/quantizer_torchao.py +31 -127
  1555. transformers/quantizers/quantizer_vptq.py +1 -10
  1556. transformers/testing_utils.py +31 -49
  1557. transformers/tokenization_mistral_common.py +554 -902
  1558. transformers/tokenization_utils_base.py +112 -124
  1559. transformers/tokenization_utils_sentencepiece.py +5 -6
  1560. transformers/tokenization_utils_tokenizers.py +30 -7
  1561. transformers/trainer.py +30 -11
  1562. transformers/trainer_callback.py +8 -0
  1563. transformers/trainer_jit_checkpoint.py +1 -2
  1564. transformers/trainer_seq2seq.py +4 -0
  1565. transformers/training_args.py +11 -13
  1566. transformers/utils/__init__.py +4 -0
  1567. transformers/utils/attention_visualizer.py +5 -5
  1568. transformers/utils/auto_docstring.py +598 -37
  1569. transformers/utils/doc.py +1 -1
  1570. transformers/utils/dummy_pt_objects.py +0 -42
  1571. transformers/utils/generic.py +21 -1
  1572. transformers/utils/import_utils.py +51 -9
  1573. transformers/utils/kernel_config.py +71 -18
  1574. transformers/utils/loading_report.py +3 -3
  1575. transformers/utils/quantization_config.py +16 -18
  1576. transformers/video_processing_utils.py +35 -32
  1577. transformers/video_utils.py +18 -22
  1578. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc3.dist-info}/METADATA +23 -24
  1579. transformers-5.0.0rc3.dist-info/RECORD +2067 -0
  1580. transformers-5.0.0rc1.dist-info/RECORD +0 -2003
  1581. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc3.dist-info}/WHEEL +0 -0
  1582. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc3.dist-info}/entry_points.txt +0 -0
  1583. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc3.dist-info}/licenses/LICENSE +0 -0
  1584. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc3.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,3 @@
1
- # coding=utf-8
2
1
  # Copyright 2025 The HuggingFace Inc. team
3
2
  #
4
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,7 +17,7 @@ from collections import deque
18
17
 
19
18
  from ...utils.metrics import attach_tracer, traced
20
19
  from .cache import PagedAttentionCache
21
- from .requests import RequestState, RequestStatus
20
+ from .requests import RequestState, RequestStatus, logger
22
21
 
23
22
 
24
23
  class Scheduler(ABC):
@@ -36,6 +35,11 @@ class Scheduler(ABC):
36
35
  self.retain_cache_on_finish = retain_cache_on_finish
37
36
  self._cancellation_lock = threading.Lock()
38
37
  self._requests_to_cancel: set[str] = set()
38
+ self._requests_to_fork: list[RequestState] = []
39
+ # This state is used to avoid infinite loops when offloading requests
40
+ self.block_new_requests = False
41
+ # This is to compute the cache used by a new request being scheduled
42
+ self.cache_budget_module = None if cache.num_full_attention_groups else cache.config.sliding_window
39
43
 
40
44
  @traced
41
45
  def add_waiting_request(self, state: RequestState):
@@ -51,10 +55,11 @@ class Scheduler(ABC):
51
55
  self.waiting_requests_order.append(state.request_id)
52
56
 
53
57
  @abstractmethod
54
- def schedule_batch(self, token_budget: int) -> list[RequestState]:
55
- """Schedules requests for the next batch based on available token budget. This method selects which requests
56
- should be processed in the current batch, considering the token budget and the scheduler's prioritization rules.
57
- The token_budget is the maximum number of tokens that can be processed in this batch."""
58
+ def schedule_batch(self, token_budget: int, cache_budget: int) -> list[RequestState]:
59
+ """Schedules requests for the next batch based on available token and cache budgets. This method selects which
60
+ requests should be processed in the current batch, considering the budgets and the scheduler's prioritization
61
+ rules. The token_budget is the maximum number of tokens that can be processed in a batch, and the cache_budget
62
+ is the maximum number of KV cache entries that can be read in a batch."""
58
63
 
59
64
  @traced
60
65
  def has_pending_requests(self) -> bool:
@@ -62,14 +67,13 @@ class Scheduler(ABC):
62
67
  return len(self.active_requests) or len(self.waiting_requests)
63
68
 
64
69
  @traced
65
- def finish_request(self, request_id: str, evict_from_cache: bool = True):
70
+ def finish_request(self, request_id: str, evict_from_cache: bool = True) -> None:
66
71
  """Completes processing of a request and optionally frees its allocated cache blocks. This method is called
67
72
  when a request has finished generation or encountered an error.
68
73
  """
69
74
  if evict_from_cache:
70
75
  self.cache.free_blocks(request_id)
71
- if request_id in self.active_requests:
72
- del self.active_requests[request_id]
76
+ self.active_requests.pop(request_id, None)
73
77
 
74
78
  @traced
75
79
  def get_active_request_static_outputs(self, request_id: str) -> list[int]:
@@ -89,10 +93,8 @@ class Scheduler(ABC):
89
93
  """Remove all cancelled requests from active and waiting queues."""
90
94
  with self._cancellation_lock:
91
95
  for request_id in self._requests_to_cancel:
92
- if request_id in self.active_requests:
93
- del self.active_requests[request_id]
94
- if request_id in self.waiting_requests:
95
- del self.waiting_requests[request_id]
96
+ self.active_requests.pop(request_id, None)
97
+ self.waiting_requests.pop(request_id, None)
96
98
  if request_id in self.waiting_requests_order:
97
99
  self.waiting_requests_order.remove(request_id)
98
100
  self.cache.free_blocks(request_id)
@@ -106,7 +108,7 @@ class Scheduler(ABC):
106
108
  )
107
109
 
108
110
  @traced
109
- def _allocate_blocks_if_needed(self, state: RequestState) -> bool:
111
+ def _allocate_blocks_if_needed(self, state: RequestState, len_next_tokens: int) -> bool:
110
112
  """Allocate additional cache blocks for a request if the currently allocated blocks are insufficient to
111
113
  accommodate the next tokens. It calculates how many blocks are needed based on the request's current
112
114
  cache occupancy and the number of tokens to be processed. The allocation itself is done by the CacheAllocator
@@ -115,20 +117,16 @@ class Scheduler(ABC):
115
117
  # 1. we check that the occupancy is less than the requested length
116
118
  # 2. we allocate enough blocks to cover the requested length
117
119
  current_len = state.current_len()
118
- len_next_tokens = len(state.tokens_to_process)
119
120
  occupancy = state.allocated_blocks * self.cache.block_size - current_len
120
121
  if occupancy < len_next_tokens or state.allocated_blocks == 0:
121
122
  blocks_needed = ((len_next_tokens - occupancy + 1) // self.cache.block_size) + 1
122
- allocated = self.cache.allocate_blocks(blocks_needed, state)
123
+ allocated = self.cache.allocate_blocks(blocks_needed, state.request_id, state.allocated_blocks)
123
124
  if allocated is None:
124
125
  return False
125
126
  state.allocated_blocks += allocated
126
127
  return True
127
128
 
128
- @traced(span_name="prepare_request")
129
- def _prepare_request_for_processing(
130
- self, state: RequestState, token_budget: int, request_ids_to_remove_from_waiting: set[str]
131
- ) -> None:
129
+ def _infer_request_tokens(self, state: RequestState, request_ids_to_remove_from_waiting: set[str]) -> list[int]:
132
130
  """Prepares a request for processing in the current batch. If prefix sharing is enabled, and the request was
133
131
  pending, this is where we look for a prefix match and split the request if found."""
134
132
  # If prefix sharing is enabled, we look for a prefix match and split the request if found
@@ -138,6 +136,8 @@ class Scheduler(ABC):
138
136
  self.active_requests[state.request_id] = state
139
137
  request_ids_to_remove_from_waiting.add(state.request_id)
140
138
  state.status = RequestStatus.SPLIT_PENDING_REMAINDER
139
+ # We keep track of the number of allocated blocks to avoid double allocation
140
+ state.allocated_blocks += prefill_length // self.cache.block_size
141
141
  # Even if we match the whole request, we keep at least 1 token to start decoding
142
142
  prefill_length = min(prefill_length, len(state.tokens_to_process) - 1)
143
143
  state.remaining_prefill_tokens = state.tokens_to_process[prefill_length:]
@@ -150,9 +150,25 @@ class Scheduler(ABC):
150
150
  # Otherwise, the tokens to process are the prompt ids, which are the full prompt or the last predicted tokens
151
151
  else:
152
152
  request_tokens = state.tokens_to_process
153
-
153
+ return request_tokens
154
+
155
+ def _schedule_request(
156
+ self,
157
+ state: RequestState,
158
+ request_tokens: list[int],
159
+ token_budget: int,
160
+ request_ids_to_remove_from_waiting: set[str],
161
+ ) -> None:
162
+ """Schedules a request for the current batch, updating the request's status according to the token budget left.
163
+ If the request has children (for parallel decoding), it ensures at least one token remains before the request is
164
+ forked."""
165
+ # If the request has one or more children we make sure not to prefill it entirely
166
+ if state.num_children > 0 and token_budget >= len(request_tokens) - 1:
167
+ token_budget = len(request_tokens) - 1
168
+ self._requests_to_fork.append(state)
169
+
170
+ # Case: we can process the entire prompt/remainder
154
171
  if len(request_tokens) < token_budget:
155
- # Can process the entire prompt/remainder
156
172
  if state.status == RequestStatus.PENDING:
157
173
  self.active_requests[state.request_id] = state
158
174
  state.status = RequestStatus.PREFILLING
@@ -161,8 +177,9 @@ class Scheduler(ABC):
161
177
  state.status = RequestStatus.PREFILLING
162
178
  state.tokens_to_process = state.remaining_prefill_tokens
163
179
  state.remaining_prefill_tokens = []
180
+
181
+ # Otherwise: we need to split the request
164
182
  else:
165
- # Need to split the request
166
183
  if state.status == RequestStatus.PENDING:
167
184
  self.active_requests[state.request_id] = state
168
185
  state.status = RequestStatus.PREFILLING_SPLIT
@@ -189,7 +206,7 @@ class FIFOScheduler(Scheduler):
189
206
  self.safety_margin = safety_margin
190
207
 
191
208
  @traced
192
- def schedule_batch(self, token_budget: int) -> list[RequestState]:
209
+ def schedule_batch(self, token_budget: int, cache_budget: int) -> list[RequestState] | None:
193
210
  priority_states: list[RequestState] = []
194
211
  second_priority_states: list[RequestState] = []
195
212
  scheduled_requests = []
@@ -201,35 +218,62 @@ class FIFOScheduler(Scheduler):
201
218
  second_priority_states.append(state)
202
219
 
203
220
  # Add waiting requests to second priority
204
- for req_id in self.waiting_requests_order:
205
- second_priority_states.append(self.waiting_requests[req_id])
221
+ if not self.block_new_requests:
222
+ for req_id in self.waiting_requests_order:
223
+ second_priority_states.append(self.waiting_requests[req_id])
206
224
 
207
225
  candidates = priority_states + second_priority_states
208
226
  request_ids_to_remove_from_waiting = set()
209
227
  safety_margins = self.safety_margin * self.cache.num_blocks
210
228
 
229
+ one_allocation_failed = False
230
+
211
231
  for state in candidates:
212
232
  # If we are out the safety margin, we only accept decoding requests or the first prefill request
213
233
  num_free_blocks = self.cache.get_num_free_blocks()
214
234
  outside_safety_margin = num_free_blocks < safety_margins
215
235
  if outside_safety_margin and scheduled_requests and state.status != RequestStatus.DECODING:
236
+ logger.info(
237
+ f"Outside safety margin, breaking out of scheduling loop. {num_free_blocks = } {safety_margins = }"
238
+ )
216
239
  break
217
240
 
218
- self._prepare_request_for_processing(state, token_budget, request_ids_to_remove_from_waiting)
219
- request_len = len(state.tokens_to_process)
220
- # If we can't allocate blocks, do not schedule the request and break if the cache is full
221
- if not self._allocate_blocks_if_needed(state):
222
- if self.cache.get_num_free_blocks() == 0:
241
+ # Check cache budget
242
+ cache_needed = state.current_len()
243
+ cache_needed = (
244
+ cache_needed if self.cache_budget_module is None else cache_needed % self.cache_budget_module
245
+ )
246
+ if cache_budget < cache_needed:
247
+ continue
248
+
249
+ # Infer the tokens that will be present in the batch if token budget is enough
250
+ request_tokens = self._infer_request_tokens(state, request_ids_to_remove_from_waiting)
251
+ # Account for token budget
252
+ request_len = min(len(request_tokens), token_budget)
253
+ # Check there will be enough cache for the new tokens
254
+ allocation_successful = self._allocate_blocks_if_needed(state, request_len)
255
+
256
+ # If the allocation would not be successful, we move on to the next request
257
+ if not allocation_successful:
258
+ one_allocation_failed = True
259
+ # If we have reached a request that was waiting, all subsequent requests are also waiting, and will need
260
+ # allocation as well. So if there is no more free blocks, we can safely break out of the loop.
261
+ if num_free_blocks == 0 and state.request_id in self.waiting_requests:
262
+ logger.info(f"Breaking mid-loop for request {state.request_id} because the cache is full")
223
263
  break
224
264
  continue
225
265
 
226
- # Add the request to the scheduled requests
266
+ # If this point is reached, it means we can safely schedule the request
267
+ self._schedule_request(state, request_tokens, token_budget, request_ids_to_remove_from_waiting)
268
+ request_len = len(state.tokens_to_process) # it may change after scheduling
227
269
  scheduled_requests.append(state)
228
270
 
229
- # Update the token budget
271
+ # Update the token and cache budgets
230
272
  token_budget -= request_len
273
+ cache_budget -= cache_needed
274
+
231
275
  # If using prefix sharing, we make note of the blocks that will be computed in the forward pass
232
- if self.cache.use_prefix_sharing:
276
+ if self.cache.allow_block_sharing:
233
277
  tokens_in_current_block = state.current_len() % self.cache.block_size
234
278
  tokens_after_forward = tokens_in_current_block + request_len
235
279
  complete_blocks = tokens_after_forward // self.cache.block_size
@@ -241,18 +285,24 @@ class FIFOScheduler(Scheduler):
241
285
  if was_waiting:
242
286
  request_ids_to_remove_from_waiting.add(req_id)
243
287
 
244
- # Early exit of the loop if we have no token budget left
245
- if token_budget == 0:
288
+ # Early exit of the loop if we have no budget left
289
+ if token_budget == 0 or cache_budget == 0:
246
290
  break
247
291
 
292
+ # We remove waiting requests before checking requests were scheduled, because there might have been prefill matches
248
293
  self.waiting_requests_order = deque(
249
294
  [req_id for req_id in self.waiting_requests_order if req_id not in request_ids_to_remove_from_waiting]
250
295
  )
251
296
 
297
+ # If no requests were scheduled and the cache is full, we signal it by returning None
298
+ if not scheduled_requests and one_allocation_failed:
299
+ return None
300
+
252
301
  return scheduled_requests
253
302
 
254
303
 
255
304
  # FIXME: prioritize adding from waiting reqs before scheduling `RequestStatus.DECODING` when cache space allows it
305
+ # TODO: further consolidate the code by making more of it common. The reference Scheduler is FIFO, not this one.
256
306
  @attach_tracer()
257
307
  class PrefillFirstScheduler(Scheduler):
258
308
  """Scheduler that prioritizes split prefill requests over decoding requests. This scheduler ensures that split
@@ -260,7 +310,7 @@ class PrefillFirstScheduler(Scheduler):
260
310
  decoding requests."""
261
311
 
262
312
  @traced
263
- def schedule_batch(self, token_budget: int) -> list[RequestState]:
313
+ def schedule_batch(self, token_budget: int, cache_budget: int) -> list[RequestState] | None:
264
314
  priority_states: list[RequestState] = []
265
315
  second_priority_states: list[RequestState] = []
266
316
  scheduled_requests = []
@@ -273,29 +323,49 @@ class PrefillFirstScheduler(Scheduler):
273
323
  second_priority_states.append(state)
274
324
 
275
325
  # Add waiting requests to second priority
276
- for req_id in self.waiting_requests_order:
277
- second_priority_states.append(self.waiting_requests[req_id])
326
+ if not self.block_new_requests:
327
+ for req_id in self.waiting_requests_order:
328
+ second_priority_states.append(self.waiting_requests[req_id])
278
329
 
279
330
  candidates = priority_states + second_priority_states
280
-
281
331
  request_ids_to_remove_from_waiting = set()
332
+ one_allocation_failed = False
282
333
 
283
334
  for state in candidates:
284
- self._prepare_request_for_processing(state, token_budget, request_ids_to_remove_from_waiting)
285
- request_len = len(state.tokens_to_process)
286
- # If we can't allocate blocks, do not schedule the request and break if the cache is full
287
- if not self._allocate_blocks_if_needed(state):
288
- if self.cache.get_num_free_blocks() == 0:
335
+ # Check cache budget
336
+ cache_needed = state.current_len()
337
+ cache_needed = (
338
+ cache_needed if self.cache_budget_module is None else cache_needed % self.cache_budget_module
339
+ )
340
+ if cache_budget < cache_needed:
341
+ continue
342
+
343
+ # Infer the tokens that will be present in the batch if token budget is enough
344
+ request_tokens = self._infer_request_tokens(state, request_ids_to_remove_from_waiting)
345
+ # Account for token budget
346
+ request_len = min(len(request_tokens), token_budget)
347
+ # Check there will be enough cache for the new tokens
348
+ allocation_successful = self._allocate_blocks_if_needed(state, request_len)
349
+
350
+ # If the allocation would not be successful, we move on to the next request
351
+ if not allocation_successful:
352
+ one_allocation_failed = True
353
+ # If the request was waiting, all requests afterwards will need allocation, so we break if the cache is full
354
+ if state.request_id in self.waiting_requests and self.cache.get_num_free_blocks() == 0:
289
355
  break
290
356
  continue
291
357
 
292
- # Add the request to the scheduled requests
358
+ # If this point is reached, it means we can safely schedule the request
359
+ self._schedule_request(state, request_tokens, token_budget, request_ids_to_remove_from_waiting)
360
+ request_len = len(state.tokens_to_process) # it may change after scheduling
293
361
  scheduled_requests.append(state)
294
362
 
295
- # Update the token budget
363
+ # Update the token and cache budgets
296
364
  token_budget -= request_len
365
+ cache_budget -= cache_needed
366
+
297
367
  # If using prefix sharing, we make note of the blocks that will be computed in the forward pass
298
- if self.cache.use_prefix_sharing:
368
+ if self.cache.allow_block_sharing:
299
369
  tokens_in_current_block = state.current_len() % self.cache.block_size
300
370
  tokens_after_forward = tokens_in_current_block + request_len
301
371
  complete_blocks = tokens_after_forward // self.cache.block_size
@@ -303,18 +373,23 @@ class PrefillFirstScheduler(Scheduler):
303
373
 
304
374
  # Remove the request from the waiting queue and mark it as removed
305
375
  req_id = state.request_id
306
- if req_id in self.waiting_requests:
307
- del self.waiting_requests[req_id]
376
+ was_waiting = self.waiting_requests.pop(req_id, None) is not None
377
+ if was_waiting:
308
378
  request_ids_to_remove_from_waiting.add(req_id)
309
379
 
310
- # Early exit of the loop if we have no token budget left
311
- if token_budget == 0:
380
+ # Early exit of the loop if we have no budget left
381
+ if token_budget == 0 or cache_budget == 0:
312
382
  break
313
383
 
384
+ # We remove waiting requests before checking requests were scheduled, because there might have been prefill matches
314
385
  self.waiting_requests_order = deque(
315
386
  [req_id for req_id in self.waiting_requests_order if req_id not in request_ids_to_remove_from_waiting]
316
387
  )
317
388
 
389
+ # If no requests were scheduled and the cache is full, we signal it by returning None
390
+ if not scheduled_requests and one_allocation_failed:
391
+ return None
392
+
318
393
  return scheduled_requests
319
394
 
320
395
 
@@ -1,4 +1,3 @@
1
- # coding=utf-8
2
1
  # Copyright 2024 The HuggingFace Inc. team and Google DeepMind.
3
2
  #
4
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -1543,133 +1542,6 @@ class PrefixConstrainedLogitsProcessor(LogitsProcessor):
1543
1542
  return scores_processed
1544
1543
 
1545
1544
 
1546
- class HammingDiversityLogitsProcessor(LogitsProcessor):
1547
- r"""
1548
- [`LogitsProcessor`] that enforces diverse beam search.
1549
- Note that this logits processor is only effective for [`PreTrainedModel.group_beam_search`]. See [Diverse Beam
1550
- Search: Decoding Diverse Solutions from Neural Sequence Models](https://huggingface.co/papers/1610.02424) for more
1551
- details.
1552
- Traditional beam search often generates very similar sequences across different beams.
1553
- `HammingDiversityLogitsProcessor` addresses this by penalizing beams that generate tokens already chosen by other
1554
- beams in the same time step.
1555
- Args:
1556
- diversity_penalty (`float`):
1557
- This value is subtracted from a beam's score if it generates a token same as any beam from other group at a
1558
- particular time. A higher `diversity_penalty` will enforce greater diversity among the beams. Adjusting
1559
- this value can help strike a balance between diversity and natural likelihood.
1560
- num_beams (`int`):
1561
- Number of beams for beam search. 1 means no beam search.
1562
- num_beam_groups (`int`):
1563
- Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
1564
- [this paper](https://huggingface.co/papers/1610.02424) for more details.
1565
- Examples:
1566
- ```python
1567
- >>> from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
1568
- >>> import torch
1569
- >>> # Initialize the model and tokenizer
1570
- >>> tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base")
1571
- >>> model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
1572
- >>> # A long text about the solar system
1573
- >>> text = (
1574
- ... "The Solar System is a gravitationally bound system comprising the Sun and the objects that orbit it, "
1575
- ... "either directly or indirectly. Of the objects that orbit the Sun directly, the largest are the eight "
1576
- ... "planets, with the remainder being smaller objects, such as the five dwarf planets and small Solar System "
1577
- ... "bodies. The Solar System formed 4.6 billion years ago from the gravitational collapse of a giant "
1578
- ... "interstellar molecular cloud."
1579
- ... )
1580
- >>> inputs = tokenizer("summarize: " + text, return_tensors="pt")
1581
- >>> # Generate diverse summary
1582
- >>> outputs_diverse = model.generate(
1583
- ... **inputs,
1584
- ... num_beam_groups=2,
1585
- ... diversity_penalty=10.0,
1586
- ... max_length=100,
1587
- ... num_beams=4,
1588
- ... num_return_sequences=2,
1589
- ... )
1590
- >>> summaries_diverse = tokenizer.batch_decode(outputs_diverse, skip_special_tokens=True)
1591
- >>> # Generate non-diverse summary
1592
- >>> outputs_non_diverse = model.generate(
1593
- ... **inputs,
1594
- ... max_length=100,
1595
- ... num_beams=4,
1596
- ... num_return_sequences=2,
1597
- ... )
1598
- >>> summary_non_diverse = tokenizer.batch_decode(outputs_non_diverse, skip_special_tokens=True)
1599
- >>> # With `diversity_penalty`, the resulting beams are much more diverse
1600
- >>> print(summary_non_diverse)
1601
- ['the solar system formed 4.6 billion years ago from the collapse of a giant interstellar molecular cloud. of the objects that orbit the Sun directly, the largest are the eight planets.',
1602
- 'the Solar System formed 4.6 billion years ago from the collapse of a giant interstellar molecular cloud. of the objects that orbit the Sun directly, the largest are the eight planets.']
1603
- >>> print(summaries_diverse)
1604
- ['the solar system formed 4.6 billion years ago from the collapse of a giant interstellar molecular cloud. of the objects that orbit the Sun directly, the largest are the eight planets.',
1605
- 'the solar system formed 4.6 billion years ago from the collapse of a giant interstellar molecular cloud. of the objects that orbit the Sun directly, the largest are the eight planets. the rest of the objects are smaller objects, such as the five dwarf planets and small solar system bodies.']
1606
- ```
1607
- """
1608
-
1609
- def __init__(self, diversity_penalty: float, num_beams: int, num_beam_groups: int):
1610
- logger.warning_once(
1611
- "`HammingDiversityLogitsProcessor` is deprecated and will be removed in v4.62.0, as constrained beam search has been moved to the Hub: https://hf.co/transformers-community/constrained-beam-search."
1612
- )
1613
- if not isinstance(diversity_penalty, float) or (not diversity_penalty > 0.0):
1614
- raise ValueError("`diversity_penalty` should be a float strictly larger than 0.")
1615
- self._diversity_penalty = diversity_penalty
1616
- if not isinstance(num_beams, int) or num_beams < 2:
1617
- raise ValueError("`num_beams` should be an integer strictly larger than 1.")
1618
- self._num_beams = num_beams
1619
- if not isinstance(num_beam_groups, int) or num_beam_groups < 2:
1620
- raise ValueError("`num_beam_groups` should be an integer strictly larger than 1.")
1621
- if num_beam_groups > num_beams:
1622
- raise ValueError("`beam_groups` has to be smaller or equal to `num_beams`.")
1623
- self._num_sub_beams = num_beams // num_beam_groups
1624
-
1625
- def __call__(
1626
- self,
1627
- input_ids: torch.LongTensor,
1628
- scores: torch.FloatTensor,
1629
- current_tokens: torch.LongTensor,
1630
- beam_group_idx: int,
1631
- ) -> torch.FloatTensor:
1632
- r"""
1633
- Args:
1634
- input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
1635
- Indices of input sequence tokens in the vocabulary. [What are input IDs?](../glossary#input-ids)
1636
- scores (`torch.FloatTensor` of shape `(batch_size, config.vocab_size)`):
1637
- Prediction scores of a language modeling head. These can be logits for each vocabulary when not using
1638
- beam search or log softmax for each vocabulary token when using beam search
1639
- current_tokens (`torch.LongTensor` of shape `(batch_size)`):
1640
- Indices of input sequence tokens in the vocabulary, corresponding to the tokens selected by the other
1641
- beam groups in the current generation step.
1642
- beam_group_idx (`int`):
1643
- The index of the beam group currently being processed.
1644
- Return:
1645
- `torch.FloatTensor` of shape `(batch_size, config.vocab_size)`:
1646
- The processed prediction scores.
1647
- """
1648
- # hamming diversity: penalise using same token in current group which was used in previous groups at
1649
- # the same time step
1650
- batch_size = current_tokens.shape[0] // self._num_beams
1651
- group_start_idx = beam_group_idx * self._num_sub_beams
1652
- group_end_idx = min(group_start_idx + self._num_sub_beams, self._num_beams)
1653
- group_size = group_end_idx - group_start_idx
1654
- vocab_size = scores.shape[-1]
1655
-
1656
- if group_start_idx == 0:
1657
- return scores
1658
-
1659
- scores_processed = scores.clone()
1660
- for batch_idx in range(batch_size):
1661
- # predicted tokens of last time step of previous groups
1662
- previous_group_tokens = current_tokens[
1663
- batch_idx * self._num_beams : batch_idx * self._num_beams + group_start_idx
1664
- ]
1665
- token_frequency = torch.bincount(previous_group_tokens, minlength=vocab_size).to(scores.device)
1666
- scores_processed[batch_idx * group_size : (batch_idx + 1) * group_size] -= (
1667
- self._diversity_penalty * token_frequency
1668
- )
1669
-
1670
- return scores_processed
1671
-
1672
-
1673
1545
  class ForcedBOSTokenLogitsProcessor(LogitsProcessor):
1674
1546
  r"""
1675
1547
  [`LogitsProcessor`] that enforces the specified token as the first generated token. Used with encoder-decoder
@@ -430,7 +430,7 @@ class StopStringCriteria(StoppingCriteria):
430
430
  initial_match = end_lengths > 0
431
431
 
432
432
  # Tokens continue the string if the cumsum() so far is one of the valid positions for that token
433
- # Note that we're actually tracking one cumsum() for for each possible end_length
433
+ # Note that we're actually tracking one cumsum() for each possible end_length
434
434
  later_match = torch.any(cumsum[:, :-1, :, None] == valid_positions[:, :, :, :, None], axis=-2)
435
435
 
436
436
  # The match vector is a boolean vector that indicates which positions have valid tokens
@@ -1,4 +1,3 @@
1
- # coding=utf-8
2
1
  # Copyright 2023 The HuggingFace Inc. team.
3
2
  #
4
3
  # Licensed under the Apache License, Version 2.0 (the "License");