transformers 5.0.0rc1__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (671) hide show
  1. transformers/__init__.py +20 -1
  2. transformers/activations.py +1 -1
  3. transformers/audio_utils.py +0 -1
  4. transformers/cache_utils.py +17 -15
  5. transformers/configuration_utils.py +114 -70
  6. transformers/conversion_mapping.py +68 -5
  7. transformers/core_model_loading.py +201 -35
  8. transformers/dependency_versions_table.py +1 -1
  9. transformers/feature_extraction_utils.py +54 -22
  10. transformers/generation/candidate_generator.py +79 -31
  11. transformers/generation/configuration_utils.py +162 -122
  12. transformers/generation/continuous_batching/cache.py +47 -18
  13. transformers/generation/continuous_batching/cache_manager.py +131 -34
  14. transformers/generation/continuous_batching/continuous_api.py +101 -64
  15. transformers/generation/continuous_batching/requests.py +28 -1
  16. transformers/generation/continuous_batching/scheduler.py +11 -4
  17. transformers/generation/stopping_criteria.py +1 -1
  18. transformers/generation/utils.py +108 -110
  19. transformers/generation/watermarking.py +8 -5
  20. transformers/image_processing_base.py +2 -12
  21. transformers/image_processing_utils_fast.py +15 -4
  22. transformers/initialization.py +37 -0
  23. transformers/integrations/__init__.py +12 -0
  24. transformers/integrations/accelerate.py +44 -111
  25. transformers/integrations/aqlm.py +3 -5
  26. transformers/integrations/awq.py +2 -5
  27. transformers/integrations/bitnet.py +5 -8
  28. transformers/integrations/bitsandbytes.py +16 -15
  29. transformers/integrations/deepspeed.py +18 -3
  30. transformers/integrations/eetq.py +3 -5
  31. transformers/integrations/fbgemm_fp8.py +1 -1
  32. transformers/integrations/finegrained_fp8.py +6 -16
  33. transformers/integrations/flash_attention.py +2 -2
  34. transformers/integrations/higgs.py +2 -5
  35. transformers/integrations/hub_kernels.py +23 -5
  36. transformers/integrations/integration_utils.py +35 -0
  37. transformers/integrations/mistral.py +12 -0
  38. transformers/integrations/moe.py +240 -0
  39. transformers/integrations/mxfp4.py +4 -10
  40. transformers/integrations/peft.py +5 -0
  41. transformers/integrations/quanto.py +5 -2
  42. transformers/integrations/spqr.py +3 -5
  43. transformers/integrations/tensor_parallel.py +167 -221
  44. transformers/integrations/vptq.py +3 -5
  45. transformers/modeling_gguf_pytorch_utils.py +66 -19
  46. transformers/modeling_rope_utils.py +78 -81
  47. transformers/modeling_utils.py +583 -503
  48. transformers/models/__init__.py +19 -0
  49. transformers/models/afmoe/modeling_afmoe.py +7 -16
  50. transformers/models/afmoe/modular_afmoe.py +5 -13
  51. transformers/models/aimv2/modeling_aimv2.py +4 -0
  52. transformers/models/aimv2/modular_aimv2.py +4 -0
  53. transformers/models/albert/modeling_albert.py +3 -0
  54. transformers/models/align/modeling_align.py +12 -6
  55. transformers/models/altclip/modeling_altclip.py +7 -3
  56. transformers/models/apertus/modeling_apertus.py +4 -2
  57. transformers/models/apertus/modular_apertus.py +4 -1
  58. transformers/models/arcee/modeling_arcee.py +1 -1
  59. transformers/models/aria/modeling_aria.py +8 -4
  60. transformers/models/aria/modular_aria.py +7 -3
  61. transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
  62. transformers/models/auto/auto_factory.py +1 -1
  63. transformers/models/auto/configuration_auto.py +27 -0
  64. transformers/models/auto/feature_extraction_auto.py +7 -3
  65. transformers/models/auto/image_processing_auto.py +4 -2
  66. transformers/models/auto/modeling_auto.py +31 -0
  67. transformers/models/auto/processing_auto.py +4 -0
  68. transformers/models/auto/tokenization_auto.py +132 -153
  69. transformers/models/auto/video_processing_auto.py +5 -2
  70. transformers/models/aya_vision/modeling_aya_vision.py +7 -3
  71. transformers/models/bamba/modeling_bamba.py +18 -19
  72. transformers/models/bamba/modular_bamba.py +17 -16
  73. transformers/models/bark/modeling_bark.py +9 -0
  74. transformers/models/bart/configuration_bart.py +0 -1
  75. transformers/models/bart/modeling_bart.py +7 -0
  76. transformers/models/beit/image_processing_beit_fast.py +0 -1
  77. transformers/models/bert/modeling_bert.py +3 -0
  78. transformers/models/bert_generation/modeling_bert_generation.py +2 -0
  79. transformers/models/big_bird/modeling_big_bird.py +3 -0
  80. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +7 -0
  81. transformers/models/bit/modeling_bit.py +5 -1
  82. transformers/models/bitnet/modeling_bitnet.py +1 -1
  83. transformers/models/blenderbot/modeling_blenderbot.py +7 -0
  84. transformers/models/blenderbot/tokenization_blenderbot.py +6 -7
  85. transformers/models/blenderbot_small/modeling_blenderbot_small.py +7 -0
  86. transformers/models/blip/modeling_blip.py +2 -0
  87. transformers/models/blip/modeling_blip_text.py +8 -0
  88. transformers/models/blip_2/modeling_blip_2.py +2 -0
  89. transformers/models/bloom/modeling_bloom.py +13 -44
  90. transformers/models/blt/modeling_blt.py +162 -2
  91. transformers/models/blt/modular_blt.py +168 -3
  92. transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
  93. transformers/models/bridgetower/modeling_bridgetower.py +6 -0
  94. transformers/models/bros/modeling_bros.py +8 -0
  95. transformers/models/camembert/modeling_camembert.py +109 -106
  96. transformers/models/canine/modeling_canine.py +6 -0
  97. transformers/models/canine/tokenization_canine.py +2 -0
  98. transformers/models/chameleon/modeling_chameleon.py +9 -4
  99. transformers/models/chinese_clip/modeling_chinese_clip.py +6 -3
  100. transformers/models/clap/feature_extraction_clap.py +2 -2
  101. transformers/models/clap/modeling_clap.py +25 -15
  102. transformers/models/clip/modeling_clip.py +2 -0
  103. transformers/models/clipseg/modeling_clipseg.py +4 -0
  104. transformers/models/clvp/modeling_clvp.py +14 -3
  105. transformers/models/code_llama/tokenization_code_llama.py +1 -1
  106. transformers/models/codegen/modeling_codegen.py +13 -4
  107. transformers/models/cohere/modeling_cohere.py +1 -1
  108. transformers/models/cohere2/modeling_cohere2.py +1 -1
  109. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +0 -1
  110. transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
  111. transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
  112. transformers/models/conditional_detr/modeling_conditional_detr.py +4 -1
  113. transformers/models/convbert/modeling_convbert.py +3 -0
  114. transformers/models/convnext/image_processing_convnext.py +2 -2
  115. transformers/models/convnext/image_processing_convnext_fast.py +9 -13
  116. transformers/models/csm/generation_csm.py +19 -22
  117. transformers/models/csm/modeling_csm.py +3 -1
  118. transformers/models/csm/modular_csm.py +2 -0
  119. transformers/models/ctrl/modeling_ctrl.py +14 -2
  120. transformers/models/cvt/modeling_cvt.py +5 -1
  121. transformers/models/cwm/modeling_cwm.py +1 -1
  122. transformers/models/d_fine/configuration_d_fine.py +3 -4
  123. transformers/models/d_fine/modeling_d_fine.py +46 -39
  124. transformers/models/d_fine/modular_d_fine.py +15 -4
  125. transformers/models/dab_detr/configuration_dab_detr.py +2 -2
  126. transformers/models/dab_detr/modeling_dab_detr.py +1 -1
  127. transformers/models/dac/modeling_dac.py +4 -4
  128. transformers/models/data2vec/modeling_data2vec_text.py +7 -0
  129. transformers/models/data2vec/modular_data2vec_text.py +7 -0
  130. transformers/models/dbrx/configuration_dbrx.py +9 -1
  131. transformers/models/dbrx/modeling_dbrx.py +1 -1
  132. transformers/models/deberta/modeling_deberta.py +2 -0
  133. transformers/models/deberta_v2/modeling_deberta_v2.py +2 -0
  134. transformers/models/decision_transformer/modeling_decision_transformer.py +8 -5
  135. transformers/models/deepseek_v2/modeling_deepseek_v2.py +7 -4
  136. transformers/models/deepseek_v2/modular_deepseek_v2.py +4 -2
  137. transformers/models/deepseek_v3/modeling_deepseek_v3.py +9 -5
  138. transformers/models/deepseek_v3/modular_deepseek_v3.py +6 -2
  139. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
  140. transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
  141. transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
  142. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
  143. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
  144. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
  145. transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
  146. transformers/models/deformable_detr/modeling_deformable_detr.py +1 -1
  147. transformers/models/depth_anything/configuration_depth_anything.py +2 -3
  148. transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
  149. transformers/models/detr/configuration_detr.py +1 -1
  150. transformers/models/detr/modeling_detr.py +8 -1
  151. transformers/models/dia/generation_dia.py +3 -10
  152. transformers/models/dia/modeling_dia.py +12 -1
  153. transformers/models/dia/modular_dia.py +11 -0
  154. transformers/models/dia/processing_dia.py +1 -1
  155. transformers/models/diffllama/modeling_diffllama.py +3 -3
  156. transformers/models/diffllama/modular_diffllama.py +2 -2
  157. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
  158. transformers/models/dinov3_vit/modeling_dinov3_vit.py +3 -0
  159. transformers/models/dinov3_vit/modular_dinov3_vit.py +3 -0
  160. transformers/models/distilbert/modeling_distilbert.py +11 -9
  161. transformers/models/doge/modeling_doge.py +1 -1
  162. transformers/models/donut/image_processing_donut_fast.py +0 -1
  163. transformers/models/donut/modeling_donut_swin.py +16 -12
  164. transformers/models/dots1/modeling_dots1.py +14 -5
  165. transformers/models/dpt/configuration_dpt.py +1 -1
  166. transformers/models/dpt/image_processing_dpt_fast.py +1 -2
  167. transformers/models/dpt/modular_dpt.py +1 -2
  168. transformers/models/edgetam/configuration_edgetam.py +1 -1
  169. transformers/models/edgetam/modeling_edgetam.py +5 -2
  170. transformers/models/edgetam/modular_edgetam.py +15 -14
  171. transformers/models/edgetam_video/modeling_edgetam_video.py +55 -43
  172. transformers/models/edgetam_video/modular_edgetam_video.py +13 -19
  173. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
  174. transformers/models/efficientloftr/modeling_efficientloftr.py +14 -1
  175. transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
  176. transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
  177. transformers/models/efficientnet/modeling_efficientnet.py +5 -1
  178. transformers/models/electra/modeling_electra.py +7 -0
  179. transformers/models/emu3/modeling_emu3.py +8 -2
  180. transformers/models/emu3/modular_emu3.py +7 -1
  181. transformers/models/encodec/modeling_encodec.py +14 -0
  182. transformers/models/eomt/image_processing_eomt_fast.py +46 -14
  183. transformers/models/eomt/modeling_eomt.py +7 -0
  184. transformers/models/eomt/modular_eomt.py +7 -0
  185. transformers/models/ernie/modeling_ernie.py +6 -0
  186. transformers/models/ernie/modular_ernie.py +6 -0
  187. transformers/models/ernie4_5/modeling_ernie4_5.py +1 -1
  188. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +16 -13
  189. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +9 -35
  190. transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
  191. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
  192. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
  193. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
  194. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
  195. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
  196. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
  197. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
  198. transformers/models/esm/modeling_esm.py +6 -0
  199. transformers/models/esm/modeling_esmfold.py +6 -1
  200. transformers/models/evolla/modeling_evolla.py +9 -1
  201. transformers/models/evolla/modular_evolla.py +8 -0
  202. transformers/models/exaone4/modeling_exaone4.py +1 -1
  203. transformers/models/falcon/modeling_falcon.py +3 -3
  204. transformers/models/falcon_h1/modeling_falcon_h1.py +28 -23
  205. transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
  206. transformers/models/falcon_mamba/modeling_falcon_mamba.py +6 -2
  207. transformers/models/falcon_mamba/modular_falcon_mamba.py +7 -2
  208. transformers/models/fast_vlm/modeling_fast_vlm.py +7 -3
  209. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +23 -10
  210. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
  211. transformers/models/flaubert/modeling_flaubert.py +14 -15
  212. transformers/models/flava/image_processing_flava_fast.py +0 -2
  213. transformers/models/flava/modeling_flava.py +4 -1
  214. transformers/models/flex_olmo/modeling_flex_olmo.py +7 -4
  215. transformers/models/florence2/modeling_florence2.py +20 -3
  216. transformers/models/florence2/modular_florence2.py +13 -0
  217. transformers/models/fnet/modeling_fnet.py +7 -0
  218. transformers/models/fuyu/image_processing_fuyu.py +1 -1
  219. transformers/models/fuyu/modeling_fuyu.py +3 -1
  220. transformers/models/fuyu/processing_fuyu.py +16 -0
  221. transformers/models/gemma/modeling_gemma.py +10 -12
  222. transformers/models/gemma/modular_gemma.py +9 -11
  223. transformers/models/gemma2/modeling_gemma2.py +1 -1
  224. transformers/models/gemma2/modular_gemma2.py +1 -1
  225. transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
  226. transformers/models/gemma3/modeling_gemma3.py +28 -7
  227. transformers/models/gemma3/modular_gemma3.py +26 -6
  228. transformers/models/gemma3n/configuration_gemma3n.py +3 -0
  229. transformers/models/gemma3n/modeling_gemma3n.py +47 -9
  230. transformers/models/gemma3n/modular_gemma3n.py +51 -9
  231. transformers/models/git/modeling_git.py +181 -126
  232. transformers/models/glm/modeling_glm.py +1 -1
  233. transformers/models/glm4/modeling_glm4.py +1 -1
  234. transformers/models/glm46v/image_processing_glm46v.py +0 -4
  235. transformers/models/glm46v/modeling_glm46v.py +3 -1
  236. transformers/models/glm46v/modular_glm46v.py +3 -0
  237. transformers/models/glm4_moe/modeling_glm4_moe.py +9 -5
  238. transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
  239. transformers/models/glm4v/image_processing_glm4v.py +0 -4
  240. transformers/models/glm4v/modeling_glm4v.py +15 -5
  241. transformers/models/glm4v/modular_glm4v.py +11 -3
  242. transformers/models/glm4v_moe/modeling_glm4v_moe.py +39 -23
  243. transformers/models/glm4v_moe/modular_glm4v_moe.py +12 -0
  244. transformers/models/glmasr/__init__.py +30 -0
  245. transformers/models/glmasr/configuration_glmasr.py +197 -0
  246. transformers/models/glmasr/modeling_glmasr.py +512 -0
  247. transformers/models/glmasr/modular_glmasr.py +433 -0
  248. transformers/models/glmasr/processing_glmasr.py +332 -0
  249. transformers/models/glpn/image_processing_glpn_fast.py +0 -1
  250. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
  251. transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
  252. transformers/models/gpt2/modeling_gpt2.py +8 -5
  253. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +3 -8
  254. transformers/models/gpt_neo/modeling_gpt_neo.py +15 -3
  255. transformers/models/gpt_neox/modeling_gpt_neox.py +1 -1
  256. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +1 -1
  257. transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
  258. transformers/models/gpt_oss/modeling_gpt_oss.py +6 -9
  259. transformers/models/gpt_oss/modular_gpt_oss.py +5 -7
  260. transformers/models/gptj/modeling_gptj.py +15 -6
  261. transformers/models/granite/modeling_granite.py +1 -1
  262. transformers/models/granite_speech/modeling_granite_speech.py +15 -1
  263. transformers/models/granitemoe/modeling_granitemoe.py +2 -3
  264. transformers/models/granitemoe/modular_granitemoe.py +1 -2
  265. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
  266. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +33 -23
  267. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
  268. transformers/models/granitemoeshared/modeling_granitemoeshared.py +2 -3
  269. transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
  270. transformers/models/grounding_dino/modeling_grounding_dino.py +4 -4
  271. transformers/models/groupvit/modeling_groupvit.py +6 -1
  272. transformers/models/helium/modeling_helium.py +1 -1
  273. transformers/models/hgnet_v2/modeling_hgnet_v2.py +10 -0
  274. transformers/models/hgnet_v2/modular_hgnet_v2.py +10 -0
  275. transformers/models/hubert/modeling_hubert.py +4 -0
  276. transformers/models/hubert/modular_hubert.py +4 -0
  277. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +1 -1
  278. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
  279. transformers/models/hunyuan_v1_moe/__init__.py +1 -1
  280. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +12 -4
  281. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
  282. transformers/models/ibert/modeling_ibert.py +16 -0
  283. transformers/models/idefics/modeling_idefics.py +10 -0
  284. transformers/models/idefics2/modeling_idefics2.py +7 -1
  285. transformers/models/idefics3/modeling_idefics3.py +5 -1
  286. transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
  287. transformers/models/imagegpt/modeling_imagegpt.py +9 -2
  288. transformers/models/instructblip/modeling_instructblip.py +2 -0
  289. transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
  290. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
  291. transformers/models/internvl/modeling_internvl.py +11 -8
  292. transformers/models/internvl/modular_internvl.py +5 -9
  293. transformers/models/internvl/video_processing_internvl.py +0 -1
  294. transformers/models/jais2/__init__.py +27 -0
  295. transformers/models/jais2/configuration_jais2.py +152 -0
  296. transformers/models/jais2/modeling_jais2.py +486 -0
  297. transformers/models/jais2/modular_jais2.py +196 -0
  298. transformers/models/jamba/modeling_jamba.py +24 -19
  299. transformers/models/jamba/modular_jamba.py +17 -17
  300. transformers/models/janus/image_processing_janus_fast.py +0 -1
  301. transformers/models/janus/modeling_janus.py +15 -7
  302. transformers/models/janus/modular_janus.py +16 -7
  303. transformers/models/jetmoe/modeling_jetmoe.py +2 -2
  304. transformers/models/jetmoe/modular_jetmoe.py +1 -0
  305. transformers/models/kosmos2/modeling_kosmos2.py +14 -2
  306. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
  307. transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
  308. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +9 -3
  309. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
  310. transformers/models/lasr/configuration_lasr.py +4 -0
  311. transformers/models/lasr/modeling_lasr.py +3 -2
  312. transformers/models/lasr/modular_lasr.py +8 -1
  313. transformers/models/lasr/processing_lasr.py +0 -2
  314. transformers/models/layoutlm/modeling_layoutlm.py +5 -3
  315. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
  316. transformers/models/layoutlmv2/modeling_layoutlmv2.py +12 -0
  317. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +1 -0
  318. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
  319. transformers/models/layoutlmv3/modeling_layoutlmv3.py +29 -5
  320. transformers/models/led/modeling_led.py +6 -0
  321. transformers/models/levit/modeling_levit.py +18 -0
  322. transformers/models/lfm2/modeling_lfm2.py +1 -1
  323. transformers/models/lfm2_moe/modeling_lfm2_moe.py +14 -4
  324. transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
  325. transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
  326. transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
  327. transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
  328. transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
  329. transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
  330. transformers/models/lilt/modeling_lilt.py +19 -15
  331. transformers/models/llama/modeling_llama.py +1 -1
  332. transformers/models/llama4/image_processing_llama4_fast.py +1 -2
  333. transformers/models/llama4/modeling_llama4.py +8 -4
  334. transformers/models/llava/image_processing_llava_fast.py +0 -1
  335. transformers/models/llava/modeling_llava.py +12 -7
  336. transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
  337. transformers/models/llava_next/modeling_llava_next.py +7 -3
  338. transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
  339. transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
  340. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
  341. transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
  342. transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
  343. transformers/models/longcat_flash/modeling_longcat_flash.py +2 -1
  344. transformers/models/longcat_flash/modular_longcat_flash.py +1 -0
  345. transformers/models/longt5/modeling_longt5.py +0 -4
  346. transformers/models/m2m_100/modeling_m2m_100.py +10 -0
  347. transformers/models/mamba/modeling_mamba.py +2 -1
  348. transformers/models/mamba2/modeling_mamba2.py +24 -23
  349. transformers/models/marian/configuration_marian.py +1 -1
  350. transformers/models/marian/modeling_marian.py +3 -0
  351. transformers/models/markuplm/modeling_markuplm.py +5 -8
  352. transformers/models/mask2former/configuration_mask2former.py +3 -3
  353. transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
  354. transformers/models/mask2former/modeling_mask2former.py +9 -0
  355. transformers/models/maskformer/configuration_maskformer.py +3 -3
  356. transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
  357. transformers/models/maskformer/modeling_maskformer.py +9 -1
  358. transformers/models/maskformer/modeling_maskformer_swin.py +19 -15
  359. transformers/models/mbart/configuration_mbart.py +1 -0
  360. transformers/models/mbart/modeling_mbart.py +7 -0
  361. transformers/models/megatron_bert/modeling_megatron_bert.py +2 -0
  362. transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
  363. transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
  364. transformers/models/mimi/modeling_mimi.py +25 -4
  365. transformers/models/minimax/modeling_minimax.py +16 -3
  366. transformers/models/minimax/modular_minimax.py +12 -1
  367. transformers/models/ministral/modeling_ministral.py +1 -1
  368. transformers/models/ministral3/modeling_ministral3.py +1 -1
  369. transformers/models/mistral/modeling_mistral.py +1 -1
  370. transformers/models/mistral3/modeling_mistral3.py +10 -4
  371. transformers/models/mistral3/modular_mistral3.py +3 -1
  372. transformers/models/mixtral/modeling_mixtral.py +12 -4
  373. transformers/models/mixtral/modular_mixtral.py +6 -2
  374. transformers/models/mlcd/modeling_mlcd.py +6 -0
  375. transformers/models/mlcd/modular_mlcd.py +4 -0
  376. transformers/models/mllama/modeling_mllama.py +13 -2
  377. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
  378. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -4
  379. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
  380. transformers/models/mobilebert/modeling_mobilebert.py +2 -0
  381. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
  382. transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
  383. transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
  384. transformers/models/mobilevit/modeling_mobilevit.py +4 -0
  385. transformers/models/mobilevitv2/modeling_mobilevitv2.py +4 -0
  386. transformers/models/modernbert/modeling_modernbert.py +12 -1
  387. transformers/models/modernbert/modular_modernbert.py +12 -1
  388. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +9 -1
  389. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +9 -1
  390. transformers/models/moonshine/modeling_moonshine.py +1 -1
  391. transformers/models/moshi/modeling_moshi.py +21 -51
  392. transformers/models/mpnet/modeling_mpnet.py +2 -0
  393. transformers/models/mra/modeling_mra.py +4 -1
  394. transformers/models/mt5/configuration_mt5.py +2 -3
  395. transformers/models/mt5/modeling_mt5.py +0 -10
  396. transformers/models/musicgen/modeling_musicgen.py +5 -9
  397. transformers/models/musicgen_melody/modeling_musicgen_melody.py +4 -0
  398. transformers/models/mvp/modeling_mvp.py +7 -0
  399. transformers/models/nanochat/modeling_nanochat.py +1 -1
  400. transformers/models/nemotron/modeling_nemotron.py +3 -3
  401. transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
  402. transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
  403. transformers/models/nougat/image_processing_nougat_fast.py +0 -1
  404. transformers/models/nougat/tokenization_nougat.py +11 -16
  405. transformers/models/nystromformer/modeling_nystromformer.py +7 -0
  406. transformers/models/olmo/modeling_olmo.py +1 -1
  407. transformers/models/olmo2/modeling_olmo2.py +1 -1
  408. transformers/models/olmo3/modeling_olmo3.py +1 -1
  409. transformers/models/olmoe/modeling_olmoe.py +12 -4
  410. transformers/models/olmoe/modular_olmoe.py +4 -2
  411. transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
  412. transformers/models/omdet_turbo/modeling_omdet_turbo.py +4 -0
  413. transformers/models/oneformer/configuration_oneformer.py +3 -3
  414. transformers/models/oneformer/modeling_oneformer.py +7 -38
  415. transformers/models/openai/modeling_openai.py +12 -0
  416. transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
  417. transformers/models/ovis2/modeling_ovis2.py +15 -3
  418. transformers/models/ovis2/modular_ovis2.py +8 -0
  419. transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
  420. transformers/models/owlv2/modeling_owlv2.py +7 -3
  421. transformers/models/owlv2/modular_owlv2.py +0 -2
  422. transformers/models/owlvit/modeling_owlvit.py +7 -3
  423. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +3 -2
  424. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +28 -14
  425. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +22 -12
  426. transformers/models/paligemma/modeling_paligemma.py +25 -17
  427. transformers/models/parakeet/modeling_parakeet.py +5 -0
  428. transformers/models/parakeet/modular_parakeet.py +5 -0
  429. transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
  430. transformers/models/patchtsmixer/modeling_patchtsmixer.py +4 -0
  431. transformers/models/patchtst/modeling_patchtst.py +5 -4
  432. transformers/models/pe_audio/__init__.py +30 -0
  433. transformers/models/pe_audio/configuration_pe_audio.py +206 -0
  434. transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
  435. transformers/models/pe_audio/modeling_pe_audio.py +820 -0
  436. transformers/models/pe_audio/modular_pe_audio.py +299 -0
  437. transformers/models/pe_audio/processing_pe_audio.py +24 -0
  438. transformers/models/pe_audio_video/__init__.py +29 -0
  439. transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
  440. transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
  441. transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
  442. transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
  443. transformers/models/pe_video/__init__.py +30 -0
  444. transformers/models/pe_video/configuration_pe_video.py +211 -0
  445. transformers/models/pe_video/modeling_pe_video.py +636 -0
  446. transformers/models/pe_video/modular_pe_video.py +219 -0
  447. transformers/models/pe_video/processing_pe_video.py +10 -0
  448. transformers/models/pe_video/video_processing_pe_video.py +66 -0
  449. transformers/models/pegasus/configuration_pegasus.py +1 -0
  450. transformers/models/pegasus/modeling_pegasus.py +3 -0
  451. transformers/models/pegasus_x/modeling_pegasus_x.py +1 -0
  452. transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
  453. transformers/models/perceiver/modeling_perceiver.py +5 -1
  454. transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
  455. transformers/models/perception_lm/modeling_perception_lm.py +7 -3
  456. transformers/models/perception_lm/modular_perception_lm.py +7 -3
  457. transformers/models/persimmon/modeling_persimmon.py +1 -1
  458. transformers/models/phi/modeling_phi.py +1 -1
  459. transformers/models/phi3/modeling_phi3.py +1 -1
  460. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +4 -1
  461. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +3 -0
  462. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
  463. transformers/models/phimoe/modeling_phimoe.py +12 -4
  464. transformers/models/phimoe/modular_phimoe.py +1 -1
  465. transformers/models/pix2struct/processing_pix2struct.py +0 -4
  466. transformers/models/pixio/__init__.py +30 -0
  467. transformers/models/pixio/configuration_pixio.py +151 -0
  468. transformers/models/pixio/modeling_pixio.py +507 -0
  469. transformers/models/pixio/modular_pixio.py +404 -0
  470. transformers/models/pixtral/modeling_pixtral.py +1 -1
  471. transformers/models/pixtral/processing_pixtral.py +3 -1
  472. transformers/models/plbart/configuration_plbart.py +1 -0
  473. transformers/models/plbart/modeling_plbart.py +7 -0
  474. transformers/models/plbart/modular_plbart.py +6 -0
  475. transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
  476. transformers/models/poolformer/modeling_poolformer.py +11 -1
  477. transformers/models/pop2piano/configuration_pop2piano.py +0 -1
  478. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
  479. transformers/models/prophetnet/modeling_prophetnet.py +2 -1
  480. transformers/models/qwen2/modeling_qwen2.py +1 -1
  481. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +104 -64
  482. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +58 -18
  483. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +18 -5
  484. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +26 -22
  485. transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -2
  486. transformers/models/qwen2_moe/modeling_qwen2_moe.py +12 -4
  487. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
  488. transformers/models/qwen2_vl/modeling_qwen2_vl.py +17 -4
  489. transformers/models/qwen3/modeling_qwen3.py +1 -1
  490. transformers/models/qwen3_moe/modeling_qwen3_moe.py +12 -4
  491. transformers/models/qwen3_next/modeling_qwen3_next.py +4 -6
  492. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
  493. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +92 -46
  494. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +48 -4
  495. transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
  496. transformers/models/qwen3_vl/modeling_qwen3_vl.py +17 -4
  497. transformers/models/qwen3_vl/modular_qwen3_vl.py +21 -10
  498. transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
  499. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +94 -112
  500. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +32 -81
  501. transformers/models/rag/configuration_rag.py +0 -8
  502. transformers/models/rag/modeling_rag.py +7 -9
  503. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +3 -2
  504. transformers/models/reformer/modeling_reformer.py +9 -1
  505. transformers/models/regnet/modeling_regnet.py +4 -0
  506. transformers/models/rembert/modeling_rembert.py +7 -1
  507. transformers/models/resnet/modeling_resnet.py +8 -3
  508. transformers/models/roberta/modeling_roberta.py +3 -0
  509. transformers/models/roberta/modular_roberta.py +3 -0
  510. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
  511. transformers/models/roc_bert/modeling_roc_bert.py +3 -0
  512. transformers/models/rt_detr/configuration_rt_detr.py +1 -1
  513. transformers/models/rt_detr/modeling_rt_detr.py +4 -0
  514. transformers/models/rt_detr/modeling_rt_detr_resnet.py +8 -3
  515. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
  516. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +7 -0
  517. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
  518. transformers/models/rwkv/modeling_rwkv.py +1 -1
  519. transformers/models/sam/configuration_sam.py +1 -0
  520. transformers/models/sam/image_processing_sam_fast.py +0 -1
  521. transformers/models/sam/modeling_sam.py +4 -1
  522. transformers/models/sam2/configuration_sam2.py +1 -1
  523. transformers/models/sam2/modeling_sam2.py +5 -1
  524. transformers/models/sam2/modular_sam2.py +5 -1
  525. transformers/models/sam2_video/modeling_sam2_video.py +51 -43
  526. transformers/models/sam2_video/modular_sam2_video.py +31 -18
  527. transformers/models/sam3/configuration_sam3.py +21 -1
  528. transformers/models/sam3/modeling_sam3.py +23 -0
  529. transformers/models/sam3_tracker/modeling_sam3_tracker.py +2 -0
  530. transformers/models/sam3_tracker/modular_sam3_tracker.py +2 -0
  531. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
  532. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +26 -15
  533. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
  534. transformers/models/sam3_video/configuration_sam3_video.py +14 -0
  535. transformers/models/sam3_video/modeling_sam3_video.py +3 -3
  536. transformers/models/sam3_video/processing_sam3_video.py +1 -1
  537. transformers/models/sam_hq/configuration_sam_hq.py +1 -0
  538. transformers/models/sam_hq/modeling_sam_hq.py +26 -23
  539. transformers/models/seamless_m4t/modeling_seamless_m4t.py +27 -11
  540. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +6 -0
  541. transformers/models/seed_oss/modeling_seed_oss.py +1 -1
  542. transformers/models/segformer/image_processing_segformer_fast.py +0 -1
  543. transformers/models/segformer/modeling_segformer.py +2 -2
  544. transformers/models/segformer/modular_segformer.py +0 -1
  545. transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
  546. transformers/models/siglip/modeling_siglip.py +24 -2
  547. transformers/models/siglip2/modeling_siglip2.py +63 -41
  548. transformers/models/smollm3/modeling_smollm3.py +1 -1
  549. transformers/models/smolvlm/modeling_smolvlm.py +5 -1
  550. transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
  551. transformers/models/speech_to_text/modeling_speech_to_text.py +10 -0
  552. transformers/models/speecht5/modeling_speecht5.py +28 -0
  553. transformers/models/splinter/modeling_splinter.py +9 -3
  554. transformers/models/squeezebert/modeling_squeezebert.py +2 -0
  555. transformers/models/stablelm/modeling_stablelm.py +1 -1
  556. transformers/models/starcoder2/modeling_starcoder2.py +1 -1
  557. transformers/models/superglue/image_processing_superglue_fast.py +1 -2
  558. transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
  559. transformers/models/swiftformer/modeling_swiftformer.py +4 -0
  560. transformers/models/swin/modeling_swin.py +16 -12
  561. transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
  562. transformers/models/swin2sr/modeling_swin2sr.py +49 -33
  563. transformers/models/swinv2/modeling_swinv2.py +41 -33
  564. transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
  565. transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
  566. transformers/models/t5/configuration_t5.py +7 -1
  567. transformers/models/t5/modeling_t5.py +1 -7
  568. transformers/models/t5gemma/modeling_t5gemma.py +1 -1
  569. transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
  570. transformers/models/t5gemma2/modeling_t5gemma2.py +13 -4
  571. transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
  572. transformers/models/table_transformer/configuration_table_transformer.py +1 -1
  573. transformers/models/table_transformer/modeling_table_transformer.py +1 -1
  574. transformers/models/textnet/image_processing_textnet_fast.py +0 -1
  575. transformers/models/timesfm/modeling_timesfm.py +12 -0
  576. transformers/models/timesfm/modular_timesfm.py +12 -0
  577. transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
  578. transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
  579. transformers/models/timm_wrapper/modeling_timm_wrapper.py +19 -13
  580. transformers/models/trocr/modeling_trocr.py +1 -2
  581. transformers/models/tvp/configuration_tvp.py +5 -1
  582. transformers/models/tvp/modeling_tvp.py +4 -4
  583. transformers/models/udop/configuration_udop.py +1 -0
  584. transformers/models/udop/modeling_udop.py +3 -7
  585. transformers/models/umt5/configuration_umt5.py +2 -2
  586. transformers/models/umt5/modeling_umt5.py +0 -6
  587. transformers/models/vaultgemma/modeling_vaultgemma.py +1 -1
  588. transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
  589. transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
  590. transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
  591. transformers/models/video_llava/modeling_video_llava.py +7 -3
  592. transformers/models/vilt/configuration_vilt.py +2 -2
  593. transformers/models/vilt/modeling_vilt.py +7 -0
  594. transformers/models/vipllava/modeling_vipllava.py +7 -3
  595. transformers/models/visual_bert/modeling_visual_bert.py +2 -0
  596. transformers/models/vitmatte/configuration_vitmatte.py +1 -1
  597. transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
  598. transformers/models/vitmatte/modeling_vitmatte.py +4 -0
  599. transformers/models/vitpose/configuration_vitpose.py +1 -1
  600. transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
  601. transformers/models/voxtral/modeling_voxtral.py +2 -2
  602. transformers/models/voxtral/modular_voxtral.py +2 -2
  603. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +16 -10
  604. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +7 -0
  605. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +21 -11
  606. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
  607. transformers/models/whisper/generation_whisper.py +1 -0
  608. transformers/models/whisper/modeling_whisper.py +5 -3
  609. transformers/models/x_clip/modeling_x_clip.py +2 -0
  610. transformers/models/xcodec/modeling_xcodec.py +5 -0
  611. transformers/models/xglm/modeling_xglm.py +10 -0
  612. transformers/models/xlm/modeling_xlm.py +13 -14
  613. transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
  614. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
  615. transformers/models/xlnet/modeling_xlnet.py +3 -1
  616. transformers/models/xmod/modeling_xmod.py +3 -0
  617. transformers/models/yoso/modeling_yoso.py +4 -1
  618. transformers/models/zamba/modeling_zamba.py +2 -1
  619. transformers/models/zamba2/modeling_zamba2.py +3 -2
  620. transformers/models/zoedepth/configuration_zoedepth.py +1 -1
  621. transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
  622. transformers/models/zoedepth/modeling_zoedepth.py +7 -0
  623. transformers/pipelines/__init__.py +9 -6
  624. transformers/pipelines/automatic_speech_recognition.py +20 -12
  625. transformers/pipelines/base.py +1 -1
  626. transformers/pipelines/document_question_answering.py +1 -1
  627. transformers/pipelines/question_answering.py +1 -1
  628. transformers/pipelines/text_to_audio.py +2 -2
  629. transformers/processing_utils.py +127 -56
  630. transformers/quantizers/auto.py +2 -4
  631. transformers/quantizers/base.py +9 -64
  632. transformers/quantizers/quantizer_aqlm.py +1 -18
  633. transformers/quantizers/quantizer_auto_round.py +1 -10
  634. transformers/quantizers/quantizer_awq.py +3 -8
  635. transformers/quantizers/quantizer_bitnet.py +1 -6
  636. transformers/quantizers/quantizer_bnb_4bit.py +9 -49
  637. transformers/quantizers/quantizer_bnb_8bit.py +9 -19
  638. transformers/quantizers/quantizer_compressed_tensors.py +1 -4
  639. transformers/quantizers/quantizer_eetq.py +2 -12
  640. transformers/quantizers/quantizer_fbgemm_fp8.py +5 -14
  641. transformers/quantizers/quantizer_finegrained_fp8.py +15 -10
  642. transformers/quantizers/quantizer_fp_quant.py +4 -4
  643. transformers/quantizers/quantizer_gptq.py +1 -4
  644. transformers/quantizers/quantizer_higgs.py +2 -6
  645. transformers/quantizers/quantizer_mxfp4.py +2 -28
  646. transformers/quantizers/quantizer_quanto.py +14 -14
  647. transformers/quantizers/quantizer_spqr.py +3 -8
  648. transformers/quantizers/quantizer_torchao.py +28 -124
  649. transformers/quantizers/quantizer_vptq.py +1 -10
  650. transformers/testing_utils.py +28 -12
  651. transformers/tokenization_mistral_common.py +3 -2
  652. transformers/tokenization_utils_base.py +3 -2
  653. transformers/tokenization_utils_tokenizers.py +25 -2
  654. transformers/trainer.py +24 -2
  655. transformers/trainer_callback.py +8 -0
  656. transformers/trainer_seq2seq.py +4 -0
  657. transformers/training_args.py +8 -10
  658. transformers/utils/__init__.py +4 -0
  659. transformers/utils/attention_visualizer.py +4 -4
  660. transformers/utils/auto_docstring.py +34 -25
  661. transformers/utils/generic.py +20 -0
  662. transformers/utils/import_utils.py +51 -9
  663. transformers/utils/kernel_config.py +71 -18
  664. transformers/utils/quantization_config.py +8 -8
  665. transformers/video_processing_utils.py +16 -12
  666. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +5 -6
  667. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +671 -632
  668. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +0 -0
  669. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
  670. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/licenses/LICENSE +0 -0
  671. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -63,11 +63,10 @@ REGISTERED_FAST_ALIASES: dict[str, type[Any]] = {}
63
63
 
64
64
  TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
65
65
  [
66
- ("aimv2", "CLIPTokenizerFast" if is_tokenizers_available() else None),
66
+ ("aimv2", "CLIPTokenizer" if is_tokenizers_available() else None),
67
67
  ("albert", "AlbertTokenizer" if is_tokenizers_available() else None),
68
68
  ("align", "BertTokenizer" if is_tokenizers_available() else None),
69
- ("arcee", "LlamaTokenizer" if is_tokenizers_available() else None),
70
- ("aria", "LlamaTokenizer" if is_tokenizers_available() else None),
69
+ ("audioflamingo3", "Qwen2Tokenizer" if is_tokenizers_available() else None),
71
70
  ("aya_vision", "CohereTokenizer" if is_tokenizers_available() else None),
72
71
  ("bark", "BertTokenizer" if is_tokenizers_available() else None),
73
72
  ("bart", "RobertaTokenizer" if is_tokenizers_available() else None),
@@ -80,19 +79,15 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
80
79
  ("big_bird", "BigBirdTokenizer" if is_tokenizers_available() else None),
81
80
  ("bigbird_pegasus", "PegasusTokenizer" if is_tokenizers_available() else None),
82
81
  ("biogpt", "BioGptTokenizer"),
83
- ("bitnet", "TokenizersBackend" if is_tokenizers_available() else None),
84
82
  ("blenderbot", "BlenderbotTokenizer" if is_tokenizers_available() else None),
85
83
  ("blenderbot-small", "BlenderbotSmallTokenizer"),
86
84
  ("blip", "BertTokenizer" if is_tokenizers_available() else None),
87
85
  ("blip-2", "GPT2Tokenizer" if is_tokenizers_available() else None),
88
- ("bloom", "TokenizersBackend" if is_tokenizers_available() else None),
89
- ("blt", "TokenizersBackend" if is_tokenizers_available() else None),
90
86
  ("bridgetower", "RobertaTokenizer"),
91
87
  ("bros", "BertTokenizer" if is_tokenizers_available() else None),
92
88
  ("byt5", "ByT5Tokenizer"),
93
89
  ("camembert", "CamembertTokenizer" if is_tokenizers_available() else None),
94
90
  ("canine", "CanineTokenizer"),
95
- ("chameleon", "LlamaTokenizer" if is_tokenizers_available() else None),
96
91
  ("chinese_clip", "BertTokenizer" if is_tokenizers_available() else None),
97
92
  ("clap", "RobertaTokenizer"),
98
93
  ("clip", "CLIPTokenizer" if is_tokenizers_available() else None),
@@ -102,186 +97,141 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
102
97
  ("codegen", "GPT2Tokenizer" if is_tokenizers_available() else None),
103
98
  ("cohere", "CohereTokenizer" if is_tokenizers_available() else None),
104
99
  ("cohere2", "CohereTokenizer" if is_tokenizers_available() else None),
105
- ("colpali", "LlamaTokenizer" if is_tokenizers_available() else None),
106
- ("colqwen2", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
100
+ ("colqwen2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
107
101
  ("convbert", "BertTokenizer" if is_tokenizers_available() else None),
108
102
  ("cpm", "CpmTokenizer" if is_tokenizers_available() else None),
109
103
  ("cpmant", "CpmAntTokenizer"),
110
- ("csm", "TokenizersBackend" if is_tokenizers_available() else None),
111
104
  ("ctrl", "CTRLTokenizer"),
112
105
  ("data2vec-audio", "Wav2Vec2CTCTokenizer"),
113
106
  ("data2vec-text", "RobertaTokenizer"),
114
107
  ("dbrx", "GPT2Tokenizer" if is_tokenizers_available() else None),
115
108
  ("deberta", "DebertaTokenizer" if is_tokenizers_available() else None),
116
109
  ("deberta-v2", "DebertaV2Tokenizer" if is_tokenizers_available() else None),
117
- ("deepseek_v2", "LlamaTokenizer" if is_tokenizers_available() else None),
118
- ("deepseek_v3", "LlamaTokenizer" if is_tokenizers_available() else None),
119
- ("deepseek_vl", "LlamaTokenizer" if is_tokenizers_available() else None),
120
- ("deepseek_vl_hybrid", "LlamaTokenizer" if is_tokenizers_available() else None),
121
110
  ("dia", "DiaTokenizer"),
122
- ("diffllama", "LlamaTokenizer" if is_tokenizers_available() else None),
123
111
  ("distilbert", "BertTokenizer" if is_tokenizers_available() else None),
124
- ("dpr", "DPRQuestionEncoderTokenizerFast" if is_tokenizers_available() else None),
112
+ ("dpr", "DPRQuestionEncoderTokenizer" if is_tokenizers_available() else None),
125
113
  ("electra", "BertTokenizer" if is_tokenizers_available() else None),
126
114
  ("emu3", "GPT2Tokenizer" if is_tokenizers_available() else None),
127
115
  ("ernie", "BertTokenizer" if is_tokenizers_available() else None),
128
- ("ernie4_5", "LlamaTokenizer" if is_tokenizers_available() else None),
129
- ("ernie4_5_moe", "LlamaTokenizer" if is_tokenizers_available() else None),
130
116
  ("esm", "EsmTokenizer"),
131
117
  ("exaone4", "GPT2Tokenizer" if is_tokenizers_available() else None),
132
- ("falcon", "TokenizersBackend" if is_tokenizers_available() else None),
133
- ("falcon_mamba", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
118
+ ("falcon_mamba", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
134
119
  ("fastspeech2_conformer", "FastSpeech2ConformerTokenizer" if is_g2p_en_available() else None),
135
120
  ("flaubert", "FlaubertTokenizer"),
136
121
  ("flava", "BertTokenizer" if is_tokenizers_available() else None),
137
122
  ("flex_olmo", "GPT2Tokenizer" if is_tokenizers_available() else None),
138
123
  ("florence2", "BartTokenizer" if is_tokenizers_available() else None),
139
- ("fnet", "FNetTokenizerFast" if is_tokenizers_available() else None),
124
+ ("fnet", "FNetTokenizer" if is_tokenizers_available() else None),
140
125
  ("fsmt", "FSMTTokenizer"),
141
126
  ("funnel", "FunnelTokenizer" if is_tokenizers_available() else None),
142
- ("fuyu", "TokenizersBackend" if is_tokenizers_available() else None),
143
- ("gemma", "GemmaTokenizerFast" if is_tokenizers_available() else None),
144
- ("gemma2", "GemmaTokenizerFast" if is_tokenizers_available() else None),
145
- ("gemma3", "GemmaTokenizerFast" if is_tokenizers_available() else None),
146
- ("gemma3_text", "GemmaTokenizerFast" if is_tokenizers_available() else None),
147
- ("gemma3n", "GemmaTokenizerFast" if is_tokenizers_available() else None),
148
- ("gemma3n_text", "GemmaTokenizerFast" if is_tokenizers_available() else None),
127
+ ("gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
128
+ ("gemma2", "GemmaTokenizer" if is_tokenizers_available() else None),
129
+ ("gemma3", "GemmaTokenizer" if is_tokenizers_available() else None),
130
+ ("gemma3_text", "GemmaTokenizer" if is_tokenizers_available() else None),
131
+ ("gemma3n", "GemmaTokenizer" if is_tokenizers_available() else None),
132
+ ("gemma3n_text", "GemmaTokenizer" if is_tokenizers_available() else None),
149
133
  ("git", "BertTokenizer" if is_tokenizers_available() else None),
150
- ("glm", "TokenizersBackend" if is_tokenizers_available() else None),
151
- ("glm4", "TokenizersBackend" if is_tokenizers_available() else None),
152
- ("glm4_moe", "TokenizersBackend" if is_tokenizers_available() else None),
153
- ("glm4v", "TokenizersBackend" if is_tokenizers_available() else None),
154
- ("glm4v_moe", "TokenizersBackend" if is_tokenizers_available() else None),
155
- ("got_ocr2", "TokenizersBackend" if is_tokenizers_available() else None),
156
134
  ("gpt-sw3", "GPTSw3Tokenizer" if is_sentencepiece_available() else None),
157
135
  ("gpt2", "GPT2Tokenizer" if is_tokenizers_available() else None),
158
136
  ("gpt_bigcode", "GPT2Tokenizer" if is_tokenizers_available() else None),
159
137
  ("gpt_neo", "GPT2Tokenizer" if is_tokenizers_available() else None),
160
138
  ("gpt_neox", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
161
139
  ("gpt_neox_japanese", "GPTNeoXJapaneseTokenizer"),
162
- ("gpt_oss", "TokenizersBackend" if is_tokenizers_available() else None),
163
140
  ("gptj", "GPT2Tokenizer" if is_tokenizers_available() else None),
164
141
  ("granite", "GPT2Tokenizer"),
165
142
  ("granitemoe", "GPT2Tokenizer"),
166
143
  ("granitemoehybrid", "GPT2Tokenizer"),
167
144
  ("granitemoeshared", "GPT2Tokenizer"),
168
145
  ("grounding-dino", "BertTokenizer" if is_tokenizers_available() else None),
169
- ("groupvit", "CLIPTokenizerFast" if is_tokenizers_available() else None),
170
- ("helium", "TokenizersBackend" if is_tokenizers_available() else None),
146
+ ("groupvit", "CLIPTokenizer" if is_tokenizers_available() else None),
171
147
  ("herbert", "HerbertTokenizer" if is_tokenizers_available() else None),
172
148
  ("hubert", "Wav2Vec2CTCTokenizer"),
173
149
  ("ibert", "RobertaTokenizer"),
174
150
  ("idefics", "LlamaTokenizer" if is_tokenizers_available() else None),
175
151
  ("idefics2", "LlamaTokenizer" if is_tokenizers_available() else None),
176
- ("idefics3", "LlamaTokenizer" if is_tokenizers_available() else None),
177
152
  ("instructblip", "GPT2Tokenizer" if is_tokenizers_available() else None),
178
153
  ("instructblipvideo", "GPT2Tokenizer" if is_tokenizers_available() else None),
179
- ("internvl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
180
- ("jamba", "LlamaTokenizer" if is_tokenizers_available() else None),
181
- ("janus", "LlamaTokenizer" if is_tokenizers_available() else None),
182
- ("jetmoe", "LlamaTokenizer" if is_tokenizers_available() else None),
154
+ ("internvl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
155
+ ("jais2", "GPT2Tokenizer" if is_tokenizers_available() else None),
183
156
  ("kosmos-2", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
184
- ("kosmos-2.5", "TokenizersBackend" if is_tokenizers_available() else None),
157
+ ("lasr_ctc", "ParakeetTokenizer" if is_tokenizers_available() else None),
158
+ ("lasr_encoder", "ParakeetTokenizer" if is_tokenizers_available() else None),
185
159
  ("layoutlm", "BertTokenizer" if is_tokenizers_available() else None),
186
160
  ("layoutlmv2", "LayoutLMv2Tokenizer" if is_tokenizers_available() else None),
187
161
  ("layoutlmv3", "LayoutLMv3Tokenizer" if is_tokenizers_available() else None),
188
162
  ("layoutxlm", "LayoutXLMTokenizer" if is_tokenizers_available() else None),
189
163
  ("led", "LEDTokenizer" if is_tokenizers_available() else None),
190
- ("lfm2_vl", "TokenizersBackend" if is_tokenizers_available() else None),
191
164
  ("lilt", "RobertaTokenizer" if is_tokenizers_available() else None),
192
- ("llama", "LlamaTokenizer" if is_tokenizers_available() else None),
193
- ("llama4", "LlamaTokenizer" if is_tokenizers_available() else None),
194
- ("llama4_text", "LlamaTokenizer" if is_tokenizers_available() else None),
195
- ("llava", "LlamaTokenizer" if is_tokenizers_available() else None),
196
- ("llava_next", "LlamaTokenizer" if is_tokenizers_available() else None),
197
- ("llava_next_video", "LlamaTokenizer" if is_tokenizers_available() else None),
198
- ("llava_onevision", "LlamaTokenizer" if is_tokenizers_available() else None),
199
165
  ("longformer", "RobertaTokenizer" if is_tokenizers_available() else None),
200
166
  ("longt5", "T5Tokenizer" if is_tokenizers_available() else None),
201
167
  ("luke", "LukeTokenizer"),
202
168
  ("lxmert", "LxmertTokenizer" if is_tokenizers_available() else None),
203
169
  ("m2m_100", "M2M100Tokenizer" if is_sentencepiece_available() else None),
204
- ("mamba", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
205
- ("mamba2", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
170
+ ("mamba", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
171
+ ("mamba2", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
206
172
  ("marian", "MarianTokenizer" if is_sentencepiece_available() else None),
173
+ ("markuplm", "MarkupLMTokenizer" if is_tokenizers_available() else None),
207
174
  ("mbart", "MBartTokenizer" if is_tokenizers_available() else None),
208
175
  ("mbart50", "MBart50Tokenizer" if is_tokenizers_available() else None),
209
176
  ("mega", "RobertaTokenizer"),
210
177
  ("megatron-bert", "BertTokenizer" if is_tokenizers_available() else None),
211
- ("metaclip_2", "XLMRobertaTokenizerFast" if is_tokenizers_available() else None),
178
+ ("metaclip_2", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
212
179
  ("mgp-str", "MgpstrTokenizer"),
213
- ("minimax", "GPT2Tokenizer" if is_tokenizers_available() else None),
214
180
  (
215
181
  "ministral3",
216
- (
217
- "MistralCommonBackend"
218
- if is_mistral_common_available()
219
- else ("LlamaTokenizer" if is_sentencepiece_available() else None),
220
- "LlamaTokenizer" if is_tokenizers_available() and not is_mistral_common_available() else None,
221
- ),
182
+ "MistralCommonBackend"
183
+ if is_mistral_common_available()
184
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
222
185
  ),
223
186
  (
224
187
  "mistral",
225
188
  "MistralCommonBackend"
226
189
  if is_mistral_common_available()
227
- else ("LlamaTokenizer" if is_tokenizers_available() else None),
190
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
228
191
  ),
229
192
  (
230
193
  "mistral3",
231
- (
232
- "MistralCommonBackend"
233
- if is_mistral_common_available()
234
- else ("LlamaTokenizer" if is_sentencepiece_available() else None),
235
- "LlamaTokenizer" if is_tokenizers_available() and not is_mistral_common_available() else None,
236
- ),
194
+ "MistralCommonBackend"
195
+ if is_mistral_common_available()
196
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
237
197
  ),
238
198
  (
239
199
  "mixtral",
240
200
  "MistralCommonBackend"
241
201
  if is_mistral_common_available()
242
- else ("LlamaTokenizer" if is_tokenizers_available() else None),
202
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
243
203
  ),
244
- ("mllama", "LlamaTokenizer" if is_tokenizers_available() else None),
245
204
  ("mluke", "MLukeTokenizer" if is_sentencepiece_available() else None),
246
205
  ("mm-grounding-dino", "BertTokenizer" if is_tokenizers_available() else None),
247
206
  ("mobilebert", "MobileBertTokenizer" if is_tokenizers_available() else None),
248
- ("modernbert", "TokenizersBackend" if is_tokenizers_available() else None),
249
- ("moonshine", "TokenizersBackend" if is_tokenizers_available() else None),
250
- ("moshi", "TokenizersBackend" if is_tokenizers_available() else None),
251
207
  ("mpnet", "MPNetTokenizer" if is_tokenizers_available() else None),
252
- ("mpt", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
208
+ ("mpt", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
253
209
  ("mra", "RobertaTokenizer"),
254
210
  ("mt5", "T5Tokenizer" if is_tokenizers_available() else None),
255
211
  ("musicgen", "T5Tokenizer" if is_tokenizers_available() else None),
256
212
  ("musicgen_melody", "T5Tokenizer" if is_tokenizers_available() else None),
257
213
  ("mvp", "MvpTokenizer" if is_tokenizers_available() else None),
258
214
  ("myt5", "MyT5Tokenizer"),
259
- ("nemotron", "TokenizersBackend" if is_tokenizers_available() else None),
260
215
  ("nezha", "BertTokenizer" if is_tokenizers_available() else None),
261
216
  ("nllb", "NllbTokenizer" if is_tokenizers_available() else None),
262
217
  ("nllb-moe", "NllbTokenizer" if is_tokenizers_available() else None),
263
218
  ("nougat", "NougatTokenizer" if is_tokenizers_available() else None),
264
- ("nystromformer", "AlbertTokenizerFast" if is_tokenizers_available() else None),
265
- ("olmo", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
266
- ("olmo2", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
219
+ ("nystromformer", "AlbertTokenizer" if is_tokenizers_available() else None),
220
+ ("olmo", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
221
+ ("olmo2", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
267
222
  ("olmo3", "GPT2Tokenizer" if is_tokenizers_available() else None),
268
- ("olmoe", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
269
- ("omdet-turbo", "CLIPTokenizerFast" if is_tokenizers_available() else None),
270
- ("oneformer", "CLIPTokenizerFast" if is_tokenizers_available() else None),
223
+ ("olmoe", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
224
+ ("omdet-turbo", "CLIPTokenizer" if is_tokenizers_available() else None),
225
+ ("oneformer", "CLIPTokenizer" if is_tokenizers_available() else None),
271
226
  ("openai-gpt", "OpenAIGPTTokenizer" if is_tokenizers_available() else None),
272
227
  ("opt", "GPT2Tokenizer" if is_tokenizers_available() else None),
273
- ("ovis2", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
274
- ("owlv2", "CLIPTokenizerFast" if is_tokenizers_available() else None),
275
- ("owlvit", "CLIPTokenizerFast" if is_tokenizers_available() else None),
276
- ("paddleocr_vl", "TokenizersBackend" if is_tokenizers_available() else None),
277
- ("paligemma", "LlamaTokenizer" if is_tokenizers_available() else None),
228
+ ("ovis2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
229
+ ("owlv2", "CLIPTokenizer" if is_tokenizers_available() else None),
230
+ ("owlvit", "CLIPTokenizer" if is_tokenizers_available() else None),
278
231
  ("pegasus", "PegasusTokenizer" if is_tokenizers_available() else None),
279
232
  ("pegasus_x", "PegasusTokenizer" if is_tokenizers_available() else None),
280
233
  ("perceiver", "PerceiverTokenizer"),
281
- ("persimmon", "LlamaTokenizer" if is_tokenizers_available() else None),
282
234
  ("phi", "GPT2Tokenizer" if is_tokenizers_available() else None),
283
- ("phi3", "LlamaTokenizer" if is_tokenizers_available() else None),
284
- ("phimoe", "LlamaTokenizer" if is_tokenizers_available() else None),
285
235
  ("phobert", "PhobertTokenizer"),
286
236
  ("pix2struct", "T5Tokenizer" if is_tokenizers_available() else None),
287
237
  (
@@ -293,21 +243,21 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
293
243
  ("plbart", "PLBartTokenizer" if is_tokenizers_available() else None),
294
244
  ("prophetnet", "ProphetNetTokenizer"),
295
245
  ("qdqbert", "BertTokenizer" if is_tokenizers_available() else None),
296
- ("qwen2", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
297
- ("qwen2_5_omni", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
298
- ("qwen2_5_vl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
299
- ("qwen2_audio", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
300
- ("qwen2_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
301
- ("qwen2_vl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
302
- ("qwen3", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
303
- ("qwen3_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
304
- ("qwen3_next", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
305
- ("qwen3_omni_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
306
- ("qwen3_vl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
307
- ("qwen3_vl_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
246
+ ("qwen2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
247
+ ("qwen2_5_omni", "Qwen2Tokenizer" if is_tokenizers_available() else None),
248
+ ("qwen2_5_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
249
+ ("qwen2_audio", "Qwen2Tokenizer" if is_tokenizers_available() else None),
250
+ ("qwen2_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
251
+ ("qwen2_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
252
+ ("qwen3", "Qwen2Tokenizer" if is_tokenizers_available() else None),
253
+ ("qwen3_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
254
+ ("qwen3_next", "Qwen2Tokenizer" if is_tokenizers_available() else None),
255
+ ("qwen3_omni_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
256
+ ("qwen3_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
257
+ ("qwen3_vl_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
308
258
  ("rag", "RagTokenizer"),
309
259
  ("realm", "BertTokenizer" if is_tokenizers_available() else None),
310
- ("recurrent_gemma", "GemmaTokenizerFast" if is_tokenizers_available() else None),
260
+ ("recurrent_gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
311
261
  ("reformer", "ReformerTokenizer" if is_tokenizers_available() else None),
312
262
  ("rembert", "RemBertTokenizer" if is_tokenizers_available() else None),
313
263
  ("retribert", "BertTokenizer" if is_tokenizers_available() else None),
@@ -315,54 +265,51 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
315
265
  ("roberta-prelayernorm", "RobertaTokenizer"),
316
266
  ("roc_bert", "RoCBertTokenizer"),
317
267
  ("roformer", "RoFormerTokenizer" if is_tokenizers_available() else None),
318
- ("rwkv", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
268
+ ("rwkv", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
319
269
  ("seamless_m4t", "SeamlessM4TTokenizer" if is_tokenizers_available() else None),
320
270
  ("seamless_m4t_v2", "SeamlessM4TTokenizer" if is_tokenizers_available() else None),
321
- ("shieldgemma2", "GemmaTokenizerFast" if is_tokenizers_available() else None),
271
+ ("shieldgemma2", "GemmaTokenizer" if is_tokenizers_available() else None),
322
272
  ("siglip", "SiglipTokenizer" if is_sentencepiece_available() else None),
323
- ("siglip2", "GemmaTokenizerFast" if is_tokenizers_available() else None),
324
- ("smollm3", "TokenizersBackend" if is_tokenizers_available() else None),
273
+ ("siglip2", "GemmaTokenizer" if is_tokenizers_available() else None),
325
274
  ("speech_to_text", "Speech2TextTokenizer" if is_sentencepiece_available() else None),
326
275
  ("speecht5", "SpeechT5Tokenizer" if is_sentencepiece_available() else None),
327
276
  ("splinter", "SplinterTokenizer"),
328
277
  ("squeezebert", "BertTokenizer" if is_tokenizers_available() else None),
329
- ("stablelm", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
278
+ ("stablelm", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
330
279
  ("starcoder2", "GPT2Tokenizer" if is_tokenizers_available() else None),
331
280
  ("switch_transformers", "T5Tokenizer" if is_tokenizers_available() else None),
332
281
  ("t5", "T5Tokenizer" if is_tokenizers_available() else None),
333
- ("t5gemma", "GemmaTokenizerFast" if is_tokenizers_available() else None),
282
+ ("t5gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
334
283
  ("tapas", "TapasTokenizer"),
335
284
  ("trocr", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
336
285
  ("tvp", "BertTokenizer" if is_tokenizers_available() else None),
337
286
  ("udop", "UdopTokenizer" if is_tokenizers_available() else None),
338
287
  ("umt5", "T5Tokenizer" if is_tokenizers_available() else None),
339
- ("video_llava", "LlamaTokenizer" if is_tokenizers_available() else None),
288
+ ("unispeech", "Wav2Vec2CTCTokenizer"),
289
+ ("unispeech-sat", "Wav2Vec2CTCTokenizer"),
340
290
  ("vilt", "BertTokenizer" if is_tokenizers_available() else None),
341
- ("vipllava", "LlamaTokenizer" if is_tokenizers_available() else None),
342
291
  ("visual_bert", "BertTokenizer" if is_tokenizers_available() else None),
343
292
  ("vits", "VitsTokenizer"),
344
293
  (
345
294
  "voxtral",
346
295
  "MistralCommonBackend"
347
296
  if is_mistral_common_available()
348
- else ("LlamaTokenizer" if is_tokenizers_available() else None),
297
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
349
298
  ),
350
299
  ("wav2vec2", "Wav2Vec2CTCTokenizer"),
351
300
  ("wav2vec2-bert", "Wav2Vec2CTCTokenizer"),
352
301
  ("wav2vec2-conformer", "Wav2Vec2CTCTokenizer"),
353
302
  ("wav2vec2_phoneme", "Wav2Vec2PhonemeCTCTokenizer"),
354
303
  ("whisper", "WhisperTokenizer" if is_tokenizers_available() else None),
355
- ("xclip", "CLIPTokenizerFast" if is_tokenizers_available() else None),
304
+ ("xclip", "CLIPTokenizer" if is_tokenizers_available() else None),
356
305
  ("xglm", "XGLMTokenizer" if is_tokenizers_available() else None),
357
306
  ("xlm", "XLMTokenizer"),
358
307
  ("xlm-roberta", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
359
308
  ("xlm-roberta-xl", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
360
309
  ("xlnet", "XLNetTokenizer" if is_tokenizers_available() else None),
361
- ("xlstm", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
362
- ("xmod", "XLMRobertaTokenizerFast" if is_tokenizers_available() else None),
310
+ ("xlstm", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
311
+ ("xmod", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
363
312
  ("yoso", "AlbertTokenizer" if is_tokenizers_available() else None),
364
- ("zamba", "LlamaTokenizer" if is_tokenizers_available() else None),
365
- ("zamba2", "LlamaTokenizer" if is_tokenizers_available() else None),
366
313
  ]
367
314
  )
368
315
 
@@ -662,11 +609,43 @@ class AutoTokenizer:
662
609
 
663
610
  return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
664
611
 
612
+ if gguf_file:
613
+ gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **kwargs)
614
+ config_dict = load_gguf_checkpoint(gguf_path, return_tensors=False)["config"]
615
+ config = AutoConfig.for_model(**config_dict)
616
+ elif config is None:
617
+ try:
618
+ config = AutoConfig.from_pretrained(
619
+ pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
620
+ )
621
+ except Exception:
622
+ config = PreTrainedConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
623
+
624
+ config_model_type = config.model_type
625
+
665
626
  # Next, let's try to use the tokenizer_config file to get the tokenizer class.
666
627
  tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
628
+ tokenizer_config_class = tokenizer_config.get("tokenizer_class", None)
629
+ # if there is a config, we can check that the tokenizer class != than model class and can thus assume we need to use `TokenizersBackend`
630
+ if (
631
+ tokenizer_config_class is not None
632
+ and config_model_type is not None
633
+ and config_model_type != ""
634
+ and TOKENIZER_MAPPING_NAMES.get(config_model_type, "").replace("Fast", "")
635
+ != tokenizer_config_class.replace("Fast", "")
636
+ ):
637
+ # new model, but we ignore it unless the model type is the same
638
+ try:
639
+ return TokenizersBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
640
+ except Exception:
641
+ return tokenizer_class_from_name(tokenizer_config_class).from_pretrained(
642
+ pretrained_model_name_or_path, *inputs, **kwargs
643
+ )
644
+
667
645
  if "_commit_hash" in tokenizer_config:
668
646
  kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
669
- config_tokenizer_class = tokenizer_config.get("tokenizer_class")
647
+
648
+ # Check for auto_map early to handle dynamic tokenizers properly
670
649
  tokenizer_auto_map = None
671
650
  if "auto_map" in tokenizer_config:
672
651
  if isinstance(tokenizer_config["auto_map"], (tuple, list)):
@@ -675,34 +654,15 @@ class AutoTokenizer:
675
654
  else:
676
655
  tokenizer_auto_map = tokenizer_config["auto_map"].get("AutoTokenizer", None)
677
656
 
678
- # If that did not work, let's try to use the config.
679
- if config_tokenizer_class is None:
680
- if not isinstance(config, PreTrainedConfig):
681
- if gguf_file:
682
- gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **kwargs)
683
- config_dict = load_gguf_checkpoint(gguf_path, return_tensors=False)["config"]
684
- config = AutoConfig.for_model(**config_dict)
685
- else:
686
- config = AutoConfig.from_pretrained(
687
- pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
688
- )
689
- config_tokenizer_class = config.tokenizer_class
690
- if hasattr(config, "auto_map") and "AutoTokenizer" in config.auto_map:
691
- tokenizer_auto_map = config.auto_map["AutoTokenizer"]
692
-
693
- if (
694
- config_tokenizer_class is not None
695
- and config_tokenizer_class != "TokenizersBackend"
696
- and "Fast" in config_tokenizer_class
697
- ):
698
- config_tokenizer_class = config_tokenizer_class[:-4]
657
+ if tokenizer_config_class:
658
+ tokenizer_config_class = tokenizer_config_class.replace("Fast", "")
699
659
 
700
660
  has_remote_code = tokenizer_auto_map is not None
701
661
  has_local_code = type(config) in TOKENIZER_MAPPING or (
702
- config_tokenizer_class is not None
662
+ tokenizer_config_class is not None
703
663
  and (
704
- tokenizer_class_from_name(config_tokenizer_class) is not None
705
- or tokenizer_class_from_name(config_tokenizer_class + "Fast") is not None
664
+ tokenizer_class_from_name(tokenizer_config_class) is not None
665
+ or tokenizer_class_from_name(tokenizer_config_class + "Fast") is not None
706
666
  )
707
667
  )
708
668
  if has_remote_code:
@@ -726,19 +686,24 @@ class AutoTokenizer:
726
686
  return tokenizer_class.from_pretrained(
727
687
  pretrained_model_name_or_path, *inputs, trust_remote_code=trust_remote_code, **kwargs
728
688
  )
729
- elif config_tokenizer_class is not None:
730
- fast_tokenizer_class = None
731
- if fast_tokenizer_class is None:
732
- tokenizer_class_candidate = config_tokenizer_class
733
- tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
734
- if tokenizer_class is None and not tokenizer_class_candidate.endswith("Fast"):
735
- tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate + "Fast")
736
- if tokenizer_class.__name__ == "PythonBackend": # unless you inherit from it?
737
- tokenizer_class = TokenizersBackend
738
- else:
739
- tokenizer_class = fast_tokenizer_class
689
+ elif tokenizer_config_class is not None:
690
+ tokenizer_class_candidate = tokenizer_config_class
691
+ tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
692
+ if tokenizer_class is None and not tokenizer_class_candidate.endswith("Fast"):
693
+ tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate + "Fast")
694
+ if tokenizer_class is not None and tokenizer_class.__name__ == "PythonBackend":
695
+ tokenizer_class = TokenizersBackend
696
+ # Fallback to TokenizersBackend if the class wasn't found
697
+ if tokenizer_class is None:
698
+ tokenizer_class = TokenizersBackend
740
699
 
741
700
  return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
701
+ elif getattr(config, "tokenizer_class"):
702
+ _class = config.tokenizer_class
703
+ if "PreTrainedTokenizerFast" not in _class:
704
+ _class = _class.replace("Fast", "")
705
+ tokenizer_class = tokenizer_class_from_name(_class)
706
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
742
707
 
743
708
  # Otherwise we have to be creative.
744
709
  # if model is an encoder decoder, the encoder tokenizer class is used by default
@@ -752,12 +717,26 @@ class AutoTokenizer:
752
717
  )
753
718
  config = config.encoder
754
719
 
755
- model_type = config_class_to_model_type(type(config).__name__)
720
+ model_type = config_class_to_model_type(type(config).__name__) or config.get("model_type", None)
756
721
  if model_type is not None:
757
722
  tokenizer_class = TOKENIZER_MAPPING.get(type(config), TokenizersBackend)
758
723
  if tokenizer_class is not None:
759
724
  return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
760
725
 
726
+ # Fallback: try tokenizer_class from tokenizer_config.json
727
+ tokenizer_config_class = tokenizer_config.get("tokenizer_class", None)
728
+ if tokenizer_config_class is not None:
729
+ if tokenizer_config_class != "TokenizersBackend" and "Fast" in tokenizer_config_class:
730
+ tokenizer_config_class = tokenizer_config_class[:-4]
731
+ tokenizer_class = tokenizer_class_from_name(tokenizer_config_class)
732
+ if tokenizer_class is None and not tokenizer_config_class.endswith("Fast"):
733
+ tokenizer_class = tokenizer_class_from_name(tokenizer_config_class + "Fast")
734
+ if tokenizer_class is not None and tokenizer_class.__name__ == "PythonBackend":
735
+ tokenizer_class = TokenizersBackend
736
+ if tokenizer_class is None:
737
+ tokenizer_class = TokenizersBackend
738
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
739
+
761
740
  raise ValueError(
762
741
  f"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\n"
763
742
  f"Model type should be one of {', '.join(c.__name__ for c in TOKENIZER_MAPPING)}."
@@ -53,6 +53,7 @@ if TYPE_CHECKING:
53
53
  else:
54
54
  VIDEO_PROCESSOR_MAPPING_NAMES = OrderedDict(
55
55
  [
56
+ ("ernie4_5_vl_moe", "Ernie4_5_VL_MoeVideoProcessor"),
56
57
  ("glm46v", "Glm46VVideoProcessor"),
57
58
  ("glm4v", "Glm4vVideoProcessor"),
58
59
  ("instructblip", "InstructBlipVideoVideoProcessor"),
@@ -60,6 +61,8 @@ else:
60
61
  ("internvl", "InternVLVideoProcessor"),
61
62
  ("llava_next_video", "LlavaNextVideoVideoProcessor"),
62
63
  ("llava_onevision", "LlavaOnevisionVideoProcessor"),
64
+ ("pe_audio_video", "PeVideoVideoProcessor"),
65
+ ("pe_video", "PeVideoVideoProcessor"),
63
66
  ("perception_lm", "PerceptionLMVideoProcessor"),
64
67
  ("qwen2_5_omni", "Qwen2VLVideoProcessor"),
65
68
  ("qwen2_5_vl", "Qwen2VLVideoProcessor"),
@@ -373,9 +376,9 @@ class AutoVideoProcessor:
373
376
  video_processor_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
374
377
  _ = kwargs.pop("code_revision", None)
375
378
  video_processor_class.register_for_auto_class()
376
- return video_processor_class.from_dict(config_dict, **kwargs)
379
+ return video_processor_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
377
380
  elif video_processor_class is not None:
378
- return video_processor_class.from_dict(config_dict, **kwargs)
381
+ return video_processor_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
379
382
  # Last try: we use the VIDEO_PROCESSOR_MAPPING.
380
383
  elif type(config) in VIDEO_PROCESSOR_MAPPING:
381
384
  video_processor_class = VIDEO_PROCESSOR_MAPPING[type(config)]
@@ -471,6 +471,7 @@ class AyaVisionForConditionalGeneration(AyaVisionPreTrainedModel, GenerationMixi
471
471
  attention_mask=None,
472
472
  cache_position=None,
473
473
  logits_to_keep=None,
474
+ is_first_iteration=False,
474
475
  **kwargs,
475
476
  ):
476
477
  # Overwritten -- in specific circumstances we don't want to forward image inputs to the model
@@ -482,12 +483,15 @@ class AyaVisionForConditionalGeneration(AyaVisionPreTrainedModel, GenerationMixi
482
483
  attention_mask=attention_mask,
483
484
  cache_position=cache_position,
484
485
  logits_to_keep=logits_to_keep,
486
+ is_first_iteration=is_first_iteration,
485
487
  **kwargs,
486
488
  )
487
489
 
488
- if cache_position[0] == 0:
489
- # If we're in cached decoding stage, pixel values should be None because input ids do not contain special image token anymore
490
- # Otherwise we need pixel values to be passed to model
490
+ if is_first_iteration or not kwargs.get("use_cache", True):
491
+ # Pixel values are used only in the first iteration if available
492
+ # In subsquent iterations, they are already merged with text and cached
493
+ # NOTE: first iteration doesn't have to be prefill, it can be the first
494
+ # iteration with a question and cached system prompt (continue generate from cache)
491
495
  model_inputs["pixel_values"] = pixel_values
492
496
 
493
497
  return model_inputs