transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (835) hide show
  1. transformers/__init__.py +49 -3
  2. transformers/activations.py +1 -1
  3. transformers/audio_utils.py +0 -1
  4. transformers/cache_utils.py +17 -15
  5. transformers/cli/serve.py +47 -17
  6. transformers/configuration_utils.py +114 -70
  7. transformers/conversion_mapping.py +83 -7
  8. transformers/convert_slow_tokenizer.py +225 -10
  9. transformers/core_model_loading.py +374 -147
  10. transformers/data/data_collator.py +12 -4
  11. transformers/dependency_versions_table.py +2 -3
  12. transformers/dynamic_module_utils.py +1 -2
  13. transformers/feature_extraction_utils.py +55 -24
  14. transformers/file_utils.py +0 -1
  15. transformers/generation/__init__.py +11 -1
  16. transformers/generation/candidate_generator.py +79 -31
  17. transformers/generation/configuration_utils.py +165 -124
  18. transformers/generation/continuous_batching/__init__.py +4 -0
  19. transformers/generation/continuous_batching/cache.py +47 -18
  20. transformers/generation/continuous_batching/cache_manager.py +131 -34
  21. transformers/generation/continuous_batching/continuous_api.py +228 -136
  22. transformers/generation/continuous_batching/requests.py +28 -1
  23. transformers/generation/continuous_batching/scheduler.py +11 -4
  24. transformers/generation/stopping_criteria.py +1 -1
  25. transformers/generation/utils.py +108 -110
  26. transformers/generation/watermarking.py +8 -5
  27. transformers/image_processing_base.py +3 -14
  28. transformers/image_processing_utils_fast.py +15 -4
  29. transformers/initialization.py +37 -0
  30. transformers/integrations/__init__.py +16 -2
  31. transformers/integrations/accelerate.py +58 -113
  32. transformers/integrations/aqlm.py +36 -66
  33. transformers/integrations/awq.py +46 -515
  34. transformers/integrations/bitnet.py +47 -105
  35. transformers/integrations/bitsandbytes.py +91 -202
  36. transformers/integrations/deepspeed.py +18 -2
  37. transformers/integrations/eetq.py +84 -81
  38. transformers/integrations/fbgemm_fp8.py +191 -145
  39. transformers/integrations/finegrained_fp8.py +241 -208
  40. transformers/integrations/flash_attention.py +2 -2
  41. transformers/integrations/fp_quant.py +92 -0
  42. transformers/integrations/ggml.py +11 -1
  43. transformers/integrations/higgs.py +37 -62
  44. transformers/integrations/hub_kernels.py +65 -8
  45. transformers/integrations/integration_utils.py +45 -0
  46. transformers/integrations/mistral.py +12 -0
  47. transformers/integrations/moe.py +240 -0
  48. transformers/integrations/mxfp4.py +28 -74
  49. transformers/integrations/peft.py +12 -29
  50. transformers/integrations/quanto.py +77 -56
  51. transformers/integrations/quark.py +55 -0
  52. transformers/integrations/spqr.py +42 -90
  53. transformers/integrations/tensor_parallel.py +167 -221
  54. transformers/integrations/torchao.py +32 -38
  55. transformers/integrations/vptq.py +40 -59
  56. transformers/modelcard.py +1 -2
  57. transformers/modeling_gguf_pytorch_utils.py +74 -19
  58. transformers/modeling_rope_utils.py +107 -86
  59. transformers/modeling_utils.py +611 -527
  60. transformers/models/__init__.py +22 -0
  61. transformers/models/afmoe/modeling_afmoe.py +10 -19
  62. transformers/models/afmoe/modular_afmoe.py +5 -13
  63. transformers/models/aimv2/modeling_aimv2.py +4 -0
  64. transformers/models/aimv2/modular_aimv2.py +4 -0
  65. transformers/models/albert/modeling_albert.py +3 -0
  66. transformers/models/albert/tokenization_albert.py +6 -12
  67. transformers/models/align/modeling_align.py +14 -6
  68. transformers/models/altclip/modeling_altclip.py +11 -3
  69. transformers/models/apertus/modeling_apertus.py +8 -6
  70. transformers/models/apertus/modular_apertus.py +4 -1
  71. transformers/models/arcee/modeling_arcee.py +5 -5
  72. transformers/models/aria/modeling_aria.py +12 -8
  73. transformers/models/aria/modular_aria.py +7 -3
  74. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  75. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  76. transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
  77. transformers/models/auto/auto_factory.py +1 -1
  78. transformers/models/auto/configuration_auto.py +38 -0
  79. transformers/models/auto/feature_extraction_auto.py +9 -3
  80. transformers/models/auto/image_processing_auto.py +5 -2
  81. transformers/models/auto/modeling_auto.py +37 -0
  82. transformers/models/auto/processing_auto.py +22 -10
  83. transformers/models/auto/tokenization_auto.py +147 -566
  84. transformers/models/auto/video_processing_auto.py +5 -2
  85. transformers/models/autoformer/modeling_autoformer.py +4 -0
  86. transformers/models/aya_vision/modeling_aya_vision.py +7 -3
  87. transformers/models/bamba/modeling_bamba.py +21 -21
  88. transformers/models/bamba/modular_bamba.py +17 -16
  89. transformers/models/bark/modeling_bark.py +11 -0
  90. transformers/models/bart/configuration_bart.py +0 -1
  91. transformers/models/bart/modeling_bart.py +14 -0
  92. transformers/models/barthez/tokenization_barthez.py +5 -10
  93. transformers/models/beit/image_processing_beit_fast.py +0 -1
  94. transformers/models/beit/modeling_beit.py +6 -1
  95. transformers/models/bert/modeling_bert.py +3 -0
  96. transformers/models/bert/tokenization_bert.py +8 -21
  97. transformers/models/bert_generation/modeling_bert_generation.py +2 -0
  98. transformers/models/big_bird/modeling_big_bird.py +9 -0
  99. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  100. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
  101. transformers/models/biogpt/modeling_biogpt.py +2 -0
  102. transformers/models/biogpt/modular_biogpt.py +2 -0
  103. transformers/models/bit/modeling_bit.py +16 -3
  104. transformers/models/bitnet/modeling_bitnet.py +5 -5
  105. transformers/models/blenderbot/modeling_blenderbot.py +12 -0
  106. transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
  107. transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
  108. transformers/models/blip/modeling_blip.py +2 -0
  109. transformers/models/blip/modeling_blip_text.py +10 -0
  110. transformers/models/blip_2/modeling_blip_2.py +4 -1
  111. transformers/models/bloom/modeling_bloom.py +17 -44
  112. transformers/models/blt/modeling_blt.py +164 -4
  113. transformers/models/blt/modular_blt.py +170 -5
  114. transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
  115. transformers/models/bridgetower/modeling_bridgetower.py +11 -1
  116. transformers/models/bros/modeling_bros.py +12 -0
  117. transformers/models/camembert/modeling_camembert.py +109 -106
  118. transformers/models/camembert/tokenization_camembert.py +8 -12
  119. transformers/models/canine/modeling_canine.py +11 -0
  120. transformers/models/canine/tokenization_canine.py +2 -0
  121. transformers/models/chameleon/modeling_chameleon.py +11 -5
  122. transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
  123. transformers/models/clap/feature_extraction_clap.py +2 -2
  124. transformers/models/clap/modeling_clap.py +30 -15
  125. transformers/models/clip/modeling_clip.py +2 -0
  126. transformers/models/clip/tokenization_clip.py +22 -44
  127. transformers/models/clipseg/modeling_clipseg.py +9 -0
  128. transformers/models/clvp/modeling_clvp.py +19 -3
  129. transformers/models/clvp/tokenization_clvp.py +1 -63
  130. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  131. transformers/models/codegen/modeling_codegen.py +13 -4
  132. transformers/models/codegen/tokenization_codegen.py +14 -43
  133. transformers/models/cohere/modeling_cohere.py +5 -4
  134. transformers/models/cohere/modular_cohere.py +2 -1
  135. transformers/models/cohere/tokenization_cohere.py +12 -42
  136. transformers/models/cohere2/modeling_cohere2.py +8 -7
  137. transformers/models/cohere2/modular_cohere2.py +5 -5
  138. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
  139. transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
  140. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  141. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  142. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  143. transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
  144. transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
  145. transformers/models/convbert/modeling_convbert.py +9 -0
  146. transformers/models/convnext/image_processing_convnext.py +2 -2
  147. transformers/models/convnext/image_processing_convnext_fast.py +9 -13
  148. transformers/models/convnext/modeling_convnext.py +2 -4
  149. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  150. transformers/models/csm/generation_csm.py +19 -22
  151. transformers/models/csm/modeling_csm.py +7 -4
  152. transformers/models/csm/modular_csm.py +2 -0
  153. transformers/models/ctrl/modeling_ctrl.py +15 -2
  154. transformers/models/cvt/modeling_cvt.py +7 -1
  155. transformers/models/cwm/modeling_cwm.py +5 -5
  156. transformers/models/d_fine/configuration_d_fine.py +3 -4
  157. transformers/models/d_fine/modeling_d_fine.py +48 -39
  158. transformers/models/d_fine/modular_d_fine.py +16 -4
  159. transformers/models/dab_detr/configuration_dab_detr.py +2 -2
  160. transformers/models/dab_detr/modeling_dab_detr.py +5 -1
  161. transformers/models/dac/modeling_dac.py +6 -6
  162. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  163. transformers/models/data2vec/modeling_data2vec_text.py +7 -0
  164. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  165. transformers/models/data2vec/modular_data2vec_text.py +7 -0
  166. transformers/models/dbrx/configuration_dbrx.py +9 -1
  167. transformers/models/dbrx/modeling_dbrx.py +3 -3
  168. transformers/models/deberta/modeling_deberta.py +7 -0
  169. transformers/models/deberta/tokenization_deberta.py +11 -20
  170. transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
  171. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  172. transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
  173. transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
  174. transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
  175. transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
  176. transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
  177. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
  178. transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
  179. transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
  180. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
  181. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
  182. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
  183. transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
  184. transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
  185. transformers/models/depth_anything/configuration_depth_anything.py +2 -3
  186. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  187. transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
  188. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  189. transformers/models/detr/configuration_detr.py +1 -1
  190. transformers/models/detr/modeling_detr.py +13 -1
  191. transformers/models/dia/generation_dia.py +3 -10
  192. transformers/models/dia/modeling_dia.py +16 -4
  193. transformers/models/dia/modular_dia.py +11 -1
  194. transformers/models/dia/processing_dia.py +1 -1
  195. transformers/models/diffllama/modeling_diffllama.py +5 -5
  196. transformers/models/diffllama/modular_diffllama.py +2 -2
  197. transformers/models/dinat/modeling_dinat.py +3 -0
  198. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  199. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
  200. transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
  201. transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
  202. transformers/models/distilbert/modeling_distilbert.py +11 -9
  203. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  204. transformers/models/doge/modeling_doge.py +3 -4
  205. transformers/models/doge/modular_doge.py +0 -1
  206. transformers/models/donut/image_processing_donut_fast.py +0 -1
  207. transformers/models/donut/modeling_donut_swin.py +18 -12
  208. transformers/models/dots1/modeling_dots1.py +23 -11
  209. transformers/models/dots1/modular_dots1.py +5 -3
  210. transformers/models/dpr/modeling_dpr.py +5 -0
  211. transformers/models/dpr/tokenization_dpr.py +12 -0
  212. transformers/models/dpt/configuration_dpt.py +1 -1
  213. transformers/models/dpt/image_processing_dpt_fast.py +1 -2
  214. transformers/models/dpt/modular_dpt.py +1 -2
  215. transformers/models/edgetam/configuration_edgetam.py +1 -1
  216. transformers/models/edgetam/modeling_edgetam.py +6 -3
  217. transformers/models/edgetam/modular_edgetam.py +15 -14
  218. transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
  219. transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
  220. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
  221. transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
  222. transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
  223. transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
  224. transformers/models/efficientnet/modeling_efficientnet.py +7 -1
  225. transformers/models/electra/modeling_electra.py +7 -0
  226. transformers/models/emu3/modeling_emu3.py +12 -6
  227. transformers/models/emu3/modular_emu3.py +7 -1
  228. transformers/models/encodec/modeling_encodec.py +14 -0
  229. transformers/models/eomt/image_processing_eomt.py +13 -1
  230. transformers/models/eomt/image_processing_eomt_fast.py +60 -16
  231. transformers/models/eomt/modeling_eomt.py +7 -0
  232. transformers/models/eomt/modular_eomt.py +7 -0
  233. transformers/models/ernie/modeling_ernie.py +6 -0
  234. transformers/models/ernie/modular_ernie.py +6 -0
  235. transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
  236. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  237. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
  238. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
  239. transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
  240. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
  241. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
  242. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
  243. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
  244. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
  245. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
  246. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
  247. transformers/models/esm/modeling_esm.py +6 -0
  248. transformers/models/esm/modeling_esmfold.py +11 -5
  249. transformers/models/evolla/modeling_evolla.py +13 -5
  250. transformers/models/evolla/modular_evolla.py +8 -0
  251. transformers/models/exaone4/modeling_exaone4.py +3 -3
  252. transformers/models/exaone4/modular_exaone4.py +0 -1
  253. transformers/models/falcon/modeling_falcon.py +9 -4
  254. transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
  255. transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
  256. transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
  257. transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
  258. transformers/models/fast_vlm/__init__.py +27 -0
  259. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  260. transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
  261. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  262. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
  263. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
  264. transformers/models/flaubert/modeling_flaubert.py +21 -15
  265. transformers/models/flava/image_processing_flava_fast.py +0 -2
  266. transformers/models/flava/modeling_flava.py +10 -2
  267. transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
  268. transformers/models/florence2/modeling_florence2.py +22 -4
  269. transformers/models/florence2/modular_florence2.py +15 -1
  270. transformers/models/fnet/modeling_fnet.py +14 -0
  271. transformers/models/focalnet/modeling_focalnet.py +4 -0
  272. transformers/models/fsmt/modeling_fsmt.py +2 -0
  273. transformers/models/funnel/modeling_funnel.py +8 -0
  274. transformers/models/funnel/tokenization_funnel.py +17 -24
  275. transformers/models/fuyu/image_processing_fuyu.py +1 -1
  276. transformers/models/fuyu/modeling_fuyu.py +3 -1
  277. transformers/models/fuyu/processing_fuyu.py +19 -3
  278. transformers/models/gemma/modeling_gemma.py +14 -16
  279. transformers/models/gemma/modular_gemma.py +9 -11
  280. transformers/models/gemma/tokenization_gemma.py +10 -27
  281. transformers/models/gemma2/modeling_gemma2.py +5 -5
  282. transformers/models/gemma2/modular_gemma2.py +3 -2
  283. transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
  284. transformers/models/gemma3/modeling_gemma3.py +42 -91
  285. transformers/models/gemma3/modular_gemma3.py +38 -87
  286. transformers/models/gemma3n/configuration_gemma3n.py +3 -0
  287. transformers/models/gemma3n/modeling_gemma3n.py +65 -218
  288. transformers/models/gemma3n/modular_gemma3n.py +68 -68
  289. transformers/models/git/modeling_git.py +183 -126
  290. transformers/models/glm/modeling_glm.py +5 -5
  291. transformers/models/glm4/modeling_glm4.py +5 -5
  292. transformers/models/glm46v/image_processing_glm46v.py +0 -4
  293. transformers/models/glm46v/modeling_glm46v.py +3 -1
  294. transformers/models/glm46v/modular_glm46v.py +3 -0
  295. transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
  296. transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
  297. transformers/models/glm4v/configuration_glm4v.py +3 -1
  298. transformers/models/glm4v/image_processing_glm4v.py +0 -4
  299. transformers/models/glm4v/modeling_glm4v.py +18 -8
  300. transformers/models/glm4v/modular_glm4v.py +17 -7
  301. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  302. transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
  303. transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
  304. transformers/models/glmasr/__init__.py +30 -0
  305. transformers/models/glmasr/configuration_glmasr.py +197 -0
  306. transformers/models/glmasr/modeling_glmasr.py +512 -0
  307. transformers/models/glmasr/modular_glmasr.py +433 -0
  308. transformers/models/glmasr/processing_glmasr.py +332 -0
  309. transformers/models/glpn/image_processing_glpn_fast.py +0 -1
  310. transformers/models/glpn/modeling_glpn.py +2 -0
  311. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
  312. transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
  313. transformers/models/gpt2/modeling_gpt2.py +13 -6
  314. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  315. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
  316. transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
  317. transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
  318. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  319. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  320. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
  321. transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
  322. transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
  323. transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
  324. transformers/models/gptj/modeling_gptj.py +18 -6
  325. transformers/models/granite/modeling_granite.py +5 -5
  326. transformers/models/granite_speech/modeling_granite_speech.py +15 -1
  327. transformers/models/granitemoe/modeling_granitemoe.py +6 -9
  328. transformers/models/granitemoe/modular_granitemoe.py +1 -4
  329. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
  330. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
  331. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
  332. transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
  333. transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
  334. transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
  335. transformers/models/groupvit/modeling_groupvit.py +9 -1
  336. transformers/models/helium/modeling_helium.py +5 -4
  337. transformers/models/herbert/tokenization_herbert.py +9 -25
  338. transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
  339. transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
  340. transformers/models/hiera/modeling_hiera.py +4 -0
  341. transformers/models/hubert/modeling_hubert.py +7 -0
  342. transformers/models/hubert/modular_hubert.py +5 -0
  343. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
  344. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
  345. transformers/models/hunyuan_v1_moe/__init__.py +1 -1
  346. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
  347. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
  348. transformers/models/ibert/modeling_ibert.py +22 -0
  349. transformers/models/idefics/modeling_idefics.py +15 -21
  350. transformers/models/idefics2/modeling_idefics2.py +7 -1
  351. transformers/models/idefics3/modeling_idefics3.py +5 -1
  352. transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
  353. transformers/models/imagegpt/modeling_imagegpt.py +11 -3
  354. transformers/models/informer/modeling_informer.py +4 -0
  355. transformers/models/informer/modular_informer.py +1 -0
  356. transformers/models/instructblip/modeling_instructblip.py +2 -0
  357. transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
  358. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
  359. transformers/models/internvl/modeling_internvl.py +13 -12
  360. transformers/models/internvl/modular_internvl.py +7 -13
  361. transformers/models/internvl/video_processing_internvl.py +0 -1
  362. transformers/models/jais2/__init__.py +27 -0
  363. transformers/models/jais2/configuration_jais2.py +152 -0
  364. transformers/models/jais2/modeling_jais2.py +486 -0
  365. transformers/models/jais2/modular_jais2.py +196 -0
  366. transformers/models/jamba/modeling_jamba.py +25 -20
  367. transformers/models/jamba/modular_jamba.py +17 -17
  368. transformers/models/janus/image_processing_janus_fast.py +0 -1
  369. transformers/models/janus/modeling_janus.py +16 -7
  370. transformers/models/janus/modular_janus.py +17 -7
  371. transformers/models/jetmoe/modeling_jetmoe.py +4 -4
  372. transformers/models/jetmoe/modular_jetmoe.py +1 -0
  373. transformers/models/kosmos2/modeling_kosmos2.py +15 -2
  374. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
  375. transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
  376. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
  377. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
  378. transformers/models/lasr/__init__.py +29 -0
  379. transformers/models/lasr/configuration_lasr.py +248 -0
  380. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  381. transformers/models/lasr/modeling_lasr.py +730 -0
  382. transformers/models/lasr/modular_lasr.py +576 -0
  383. transformers/models/lasr/processing_lasr.py +94 -0
  384. transformers/models/lasr/tokenization_lasr.py +186 -0
  385. transformers/models/layoutlm/modeling_layoutlm.py +10 -3
  386. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
  387. transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
  388. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
  389. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
  390. transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
  391. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  392. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  393. transformers/models/led/modeling_led.py +12 -0
  394. transformers/models/levit/modeling_levit.py +21 -0
  395. transformers/models/lfm2/modeling_lfm2.py +5 -6
  396. transformers/models/lfm2/modular_lfm2.py +0 -1
  397. transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
  398. transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
  399. transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
  400. transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
  401. transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
  402. transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
  403. transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
  404. transformers/models/lightglue/modeling_lightglue.py +3 -1
  405. transformers/models/lightglue/modular_lightglue.py +1 -0
  406. transformers/models/lilt/modeling_lilt.py +23 -15
  407. transformers/models/llama/modeling_llama.py +5 -5
  408. transformers/models/llama/tokenization_llama.py +15 -43
  409. transformers/models/llama4/image_processing_llama4_fast.py +1 -2
  410. transformers/models/llama4/modeling_llama4.py +11 -6
  411. transformers/models/llava/image_processing_llava_fast.py +0 -1
  412. transformers/models/llava/modeling_llava.py +12 -7
  413. transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
  414. transformers/models/llava_next/modeling_llava_next.py +7 -3
  415. transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
  416. transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
  417. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
  418. transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
  419. transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
  420. transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
  421. transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
  422. transformers/models/longformer/modeling_longformer.py +6 -0
  423. transformers/models/longt5/modeling_longt5.py +4 -4
  424. transformers/models/luke/modeling_luke.py +9 -0
  425. transformers/models/luke/tokenization_luke.py +11 -38
  426. transformers/models/lxmert/modeling_lxmert.py +2 -0
  427. transformers/models/m2m_100/modeling_m2m_100.py +14 -0
  428. transformers/models/mamba/modeling_mamba.py +16 -23
  429. transformers/models/mamba2/modeling_mamba2.py +24 -23
  430. transformers/models/marian/configuration_marian.py +1 -1
  431. transformers/models/marian/modeling_marian.py +8 -0
  432. transformers/models/markuplm/modeling_markuplm.py +9 -8
  433. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  434. transformers/models/mask2former/configuration_mask2former.py +3 -3
  435. transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
  436. transformers/models/mask2former/modeling_mask2former.py +11 -0
  437. transformers/models/maskformer/configuration_maskformer.py +3 -3
  438. transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
  439. transformers/models/maskformer/modeling_maskformer.py +11 -1
  440. transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
  441. transformers/models/mbart/configuration_mbart.py +1 -0
  442. transformers/models/mbart/modeling_mbart.py +14 -0
  443. transformers/models/mbart/tokenization_mbart.py +11 -52
  444. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  445. transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
  446. transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
  447. transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
  448. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  449. transformers/models/mimi/modeling_mimi.py +28 -5
  450. transformers/models/minimax/modeling_minimax.py +19 -6
  451. transformers/models/minimax/modular_minimax.py +12 -1
  452. transformers/models/ministral/modeling_ministral.py +5 -5
  453. transformers/models/ministral3/configuration_ministral3.py +1 -1
  454. transformers/models/ministral3/modeling_ministral3.py +5 -4
  455. transformers/models/mistral/modeling_mistral.py +5 -4
  456. transformers/models/mistral3/modeling_mistral3.py +10 -4
  457. transformers/models/mistral3/modular_mistral3.py +3 -1
  458. transformers/models/mixtral/modeling_mixtral.py +15 -7
  459. transformers/models/mixtral/modular_mixtral.py +6 -2
  460. transformers/models/mlcd/modeling_mlcd.py +6 -0
  461. transformers/models/mlcd/modular_mlcd.py +4 -0
  462. transformers/models/mllama/modeling_mllama.py +15 -4
  463. transformers/models/mluke/tokenization_mluke.py +6 -6
  464. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
  465. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
  466. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
  467. transformers/models/mobilebert/modeling_mobilebert.py +2 -0
  468. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  469. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
  470. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  471. transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
  472. transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
  473. transformers/models/mobilevit/modeling_mobilevit.py +7 -0
  474. transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
  475. transformers/models/modernbert/modeling_modernbert.py +16 -2
  476. transformers/models/modernbert/modular_modernbert.py +14 -1
  477. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
  478. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
  479. transformers/models/moonshine/modeling_moonshine.py +5 -3
  480. transformers/models/moshi/modeling_moshi.py +26 -53
  481. transformers/models/mpnet/modeling_mpnet.py +7 -0
  482. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  483. transformers/models/mpt/modeling_mpt.py +2 -0
  484. transformers/models/mra/modeling_mra.py +10 -1
  485. transformers/models/mt5/configuration_mt5.py +2 -3
  486. transformers/models/mt5/modeling_mt5.py +7 -10
  487. transformers/models/musicgen/modeling_musicgen.py +7 -9
  488. transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
  489. transformers/models/mvp/modeling_mvp.py +14 -0
  490. transformers/models/nanochat/modeling_nanochat.py +5 -5
  491. transformers/models/nemotron/modeling_nemotron.py +7 -5
  492. transformers/models/nllb/tokenization_nllb.py +8 -22
  493. transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
  494. transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
  495. transformers/models/nougat/image_processing_nougat_fast.py +0 -1
  496. transformers/models/nougat/tokenization_nougat.py +15 -68
  497. transformers/models/nystromformer/modeling_nystromformer.py +13 -0
  498. transformers/models/olmo/modeling_olmo.py +5 -5
  499. transformers/models/olmo/modular_olmo.py +2 -2
  500. transformers/models/olmo2/modeling_olmo2.py +5 -6
  501. transformers/models/olmo2/modular_olmo2.py +0 -1
  502. transformers/models/olmo3/modeling_olmo3.py +5 -5
  503. transformers/models/olmoe/modeling_olmoe.py +15 -7
  504. transformers/models/olmoe/modular_olmoe.py +4 -2
  505. transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
  506. transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
  507. transformers/models/oneformer/configuration_oneformer.py +3 -3
  508. transformers/models/oneformer/modeling_oneformer.py +11 -39
  509. transformers/models/openai/modeling_openai.py +15 -0
  510. transformers/models/openai/tokenization_openai.py +10 -46
  511. transformers/models/opt/modeling_opt.py +2 -0
  512. transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
  513. transformers/models/ovis2/modeling_ovis2.py +15 -3
  514. transformers/models/ovis2/modular_ovis2.py +8 -0
  515. transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
  516. transformers/models/owlv2/modeling_owlv2.py +11 -3
  517. transformers/models/owlv2/modular_owlv2.py +0 -2
  518. transformers/models/owlvit/modeling_owlvit.py +11 -3
  519. transformers/models/paddleocr_vl/__init__.py +32 -0
  520. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  521. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
  522. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  523. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
  524. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
  525. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  526. transformers/models/paligemma/modeling_paligemma.py +25 -17
  527. transformers/models/parakeet/configuration_parakeet.py +4 -6
  528. transformers/models/parakeet/modeling_parakeet.py +14 -6
  529. transformers/models/parakeet/modular_parakeet.py +7 -2
  530. transformers/models/parakeet/processing_parakeet.py +1 -0
  531. transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
  532. transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
  533. transformers/models/patchtst/modeling_patchtst.py +25 -6
  534. transformers/models/pe_audio/__init__.py +30 -0
  535. transformers/models/pe_audio/configuration_pe_audio.py +206 -0
  536. transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
  537. transformers/models/pe_audio/modeling_pe_audio.py +820 -0
  538. transformers/models/pe_audio/modular_pe_audio.py +299 -0
  539. transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
  540. transformers/models/pe_audio_video/__init__.py +29 -0
  541. transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
  542. transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
  543. transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
  544. transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
  545. transformers/models/pe_video/__init__.py +30 -0
  546. transformers/models/pe_video/configuration_pe_video.py +211 -0
  547. transformers/models/pe_video/modeling_pe_video.py +636 -0
  548. transformers/models/pe_video/modular_pe_video.py +219 -0
  549. transformers/models/pe_video/processing_pe_video.py +10 -0
  550. transformers/models/pe_video/video_processing_pe_video.py +66 -0
  551. transformers/models/pegasus/configuration_pegasus.py +1 -0
  552. transformers/models/pegasus/modeling_pegasus.py +8 -0
  553. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  554. transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
  555. transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
  556. transformers/models/perceiver/modeling_perceiver.py +13 -1
  557. transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
  558. transformers/models/perception_lm/modeling_perception_lm.py +7 -3
  559. transformers/models/perception_lm/modular_perception_lm.py +7 -3
  560. transformers/models/persimmon/modeling_persimmon.py +3 -2
  561. transformers/models/phi/modeling_phi.py +5 -6
  562. transformers/models/phi/modular_phi.py +0 -1
  563. transformers/models/phi3/modeling_phi3.py +3 -2
  564. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
  565. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
  566. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
  567. transformers/models/phimoe/modeling_phimoe.py +15 -7
  568. transformers/models/phimoe/modular_phimoe.py +3 -3
  569. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  570. transformers/models/pix2struct/processing_pix2struct.py +0 -4
  571. transformers/models/pixio/__init__.py +30 -0
  572. transformers/models/pixio/configuration_pixio.py +151 -0
  573. transformers/models/pixio/modeling_pixio.py +507 -0
  574. transformers/models/pixio/modular_pixio.py +404 -0
  575. transformers/models/pixtral/modeling_pixtral.py +3 -2
  576. transformers/models/pixtral/processing_pixtral.py +3 -1
  577. transformers/models/plbart/configuration_plbart.py +1 -0
  578. transformers/models/plbart/modeling_plbart.py +13 -0
  579. transformers/models/plbart/modular_plbart.py +8 -0
  580. transformers/models/plbart/tokenization_plbart.py +0 -2
  581. transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
  582. transformers/models/poolformer/modeling_poolformer.py +13 -1
  583. transformers/models/pop2piano/configuration_pop2piano.py +0 -1
  584. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  585. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
  586. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  587. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  588. transformers/models/prophetnet/modeling_prophetnet.py +5 -1
  589. transformers/models/pvt/modeling_pvt.py +2 -0
  590. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  591. transformers/models/qwen2/modeling_qwen2.py +5 -5
  592. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  593. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  594. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
  595. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
  596. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  597. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
  598. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
  599. transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
  600. transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
  601. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  602. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
  603. transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
  604. transformers/models/qwen3/modeling_qwen3.py +5 -5
  605. transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
  606. transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
  607. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
  608. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
  609. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
  610. transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
  611. transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
  612. transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
  613. transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
  614. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
  615. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
  616. transformers/models/rag/configuration_rag.py +0 -8
  617. transformers/models/rag/modeling_rag.py +8 -9
  618. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
  619. transformers/models/reformer/modeling_reformer.py +13 -1
  620. transformers/models/reformer/tokenization_reformer.py +11 -28
  621. transformers/models/regnet/modeling_regnet.py +10 -1
  622. transformers/models/rembert/modeling_rembert.py +13 -1
  623. transformers/models/rembert/tokenization_rembert.py +3 -10
  624. transformers/models/resnet/modeling_resnet.py +19 -5
  625. transformers/models/roberta/modeling_roberta.py +3 -0
  626. transformers/models/roberta/modular_roberta.py +3 -0
  627. transformers/models/roberta/tokenization_roberta.py +18 -27
  628. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
  629. transformers/models/roc_bert/modeling_roc_bert.py +3 -0
  630. transformers/models/roformer/modeling_roformer.py +6 -0
  631. transformers/models/roformer/tokenization_roformer.py +77 -412
  632. transformers/models/rt_detr/configuration_rt_detr.py +1 -1
  633. transformers/models/rt_detr/modeling_rt_detr.py +6 -0
  634. transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
  635. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
  636. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
  637. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
  638. transformers/models/rwkv/modeling_rwkv.py +2 -1
  639. transformers/models/sam/configuration_sam.py +1 -0
  640. transformers/models/sam/image_processing_sam_fast.py +0 -1
  641. transformers/models/sam/modeling_sam.py +4 -1
  642. transformers/models/sam2/configuration_sam2.py +1 -1
  643. transformers/models/sam2/modeling_sam2.py +7 -3
  644. transformers/models/sam2/modular_sam2.py +7 -3
  645. transformers/models/sam2_video/modeling_sam2_video.py +52 -43
  646. transformers/models/sam2_video/modular_sam2_video.py +32 -18
  647. transformers/models/sam3/configuration_sam3.py +21 -1
  648. transformers/models/sam3/modeling_sam3.py +100 -80
  649. transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
  650. transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
  651. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
  652. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
  653. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
  654. transformers/models/sam3_video/configuration_sam3_video.py +14 -0
  655. transformers/models/sam3_video/modeling_sam3_video.py +4 -3
  656. transformers/models/sam3_video/processing_sam3_video.py +1 -1
  657. transformers/models/sam_hq/configuration_sam_hq.py +1 -0
  658. transformers/models/sam_hq/modeling_sam_hq.py +26 -23
  659. transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
  660. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  661. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
  662. transformers/models/seed_oss/modeling_seed_oss.py +3 -3
  663. transformers/models/segformer/image_processing_segformer_fast.py +0 -1
  664. transformers/models/segformer/modeling_segformer.py +6 -3
  665. transformers/models/segformer/modular_segformer.py +0 -1
  666. transformers/models/seggpt/modeling_seggpt.py +2 -0
  667. transformers/models/sew/modeling_sew.py +3 -0
  668. transformers/models/sew/modular_sew.py +1 -0
  669. transformers/models/sew_d/modeling_sew_d.py +3 -0
  670. transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
  671. transformers/models/siglip/modeling_siglip.py +24 -2
  672. transformers/models/siglip2/modeling_siglip2.py +67 -41
  673. transformers/models/siglip2/modular_siglip2.py +4 -0
  674. transformers/models/smollm3/modeling_smollm3.py +5 -5
  675. transformers/models/smolvlm/modeling_smolvlm.py +5 -1
  676. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  677. transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
  678. transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
  679. transformers/models/speecht5/modeling_speecht5.py +41 -1
  680. transformers/models/splinter/modeling_splinter.py +12 -3
  681. transformers/models/splinter/tokenization_splinter.py +9 -28
  682. transformers/models/squeezebert/modeling_squeezebert.py +8 -0
  683. transformers/models/stablelm/modeling_stablelm.py +4 -2
  684. transformers/models/starcoder2/modeling_starcoder2.py +5 -4
  685. transformers/models/superglue/image_processing_superglue_fast.py +1 -2
  686. transformers/models/superglue/modeling_superglue.py +1 -0
  687. transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
  688. transformers/models/superpoint/modeling_superpoint.py +1 -0
  689. transformers/models/swiftformer/modeling_swiftformer.py +6 -0
  690. transformers/models/swin/modeling_swin.py +20 -12
  691. transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
  692. transformers/models/swin2sr/modeling_swin2sr.py +51 -33
  693. transformers/models/swinv2/modeling_swinv2.py +45 -33
  694. transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
  695. transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
  696. transformers/models/t5/configuration_t5.py +7 -1
  697. transformers/models/t5/modeling_t5.py +8 -7
  698. transformers/models/t5/tokenization_t5.py +4 -8
  699. transformers/models/t5gemma/modeling_t5gemma.py +6 -6
  700. transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
  701. transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
  702. transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
  703. transformers/models/table_transformer/configuration_table_transformer.py +1 -1
  704. transformers/models/table_transformer/modeling_table_transformer.py +5 -1
  705. transformers/models/tapas/modeling_tapas.py +3 -0
  706. transformers/models/textnet/image_processing_textnet_fast.py +0 -1
  707. transformers/models/textnet/modeling_textnet.py +11 -2
  708. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  709. transformers/models/timesfm/modeling_timesfm.py +14 -0
  710. transformers/models/timesfm/modular_timesfm.py +14 -0
  711. transformers/models/timesformer/modeling_timesformer.py +2 -0
  712. transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
  713. transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
  714. transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
  715. transformers/models/trocr/modeling_trocr.py +3 -2
  716. transformers/models/tvp/configuration_tvp.py +5 -1
  717. transformers/models/tvp/modeling_tvp.py +6 -4
  718. transformers/models/udop/configuration_udop.py +1 -0
  719. transformers/models/udop/modeling_udop.py +7 -7
  720. transformers/models/udop/tokenization_udop.py +5 -13
  721. transformers/models/umt5/configuration_umt5.py +2 -2
  722. transformers/models/umt5/modeling_umt5.py +7 -6
  723. transformers/models/unispeech/modeling_unispeech.py +4 -0
  724. transformers/models/unispeech/modular_unispeech.py +2 -0
  725. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  726. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  727. transformers/models/univnet/modeling_univnet.py +1 -0
  728. transformers/models/upernet/modeling_upernet.py +1 -0
  729. transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
  730. transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
  731. transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
  732. transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
  733. transformers/models/video_llava/modeling_video_llava.py +7 -3
  734. transformers/models/vilt/configuration_vilt.py +2 -2
  735. transformers/models/vilt/modeling_vilt.py +13 -0
  736. transformers/models/vipllava/modeling_vipllava.py +7 -3
  737. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  738. transformers/models/visual_bert/modeling_visual_bert.py +8 -0
  739. transformers/models/vitdet/modeling_vitdet.py +2 -0
  740. transformers/models/vitmatte/configuration_vitmatte.py +1 -1
  741. transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
  742. transformers/models/vitmatte/modeling_vitmatte.py +5 -0
  743. transformers/models/vitpose/configuration_vitpose.py +1 -1
  744. transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
  745. transformers/models/vits/modeling_vits.py +1 -0
  746. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  747. transformers/models/voxtral/modeling_voxtral.py +2 -2
  748. transformers/models/voxtral/modular_voxtral.py +2 -2
  749. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  750. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
  751. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
  752. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
  753. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
  754. transformers/models/wavlm/modeling_wavlm.py +5 -0
  755. transformers/models/whisper/generation_whisper.py +1 -0
  756. transformers/models/whisper/modeling_whisper.py +11 -3
  757. transformers/models/whisper/tokenization_whisper.py +4 -15
  758. transformers/models/x_clip/modeling_x_clip.py +5 -0
  759. transformers/models/xcodec/modeling_xcodec.py +5 -0
  760. transformers/models/xglm/modeling_xglm.py +11 -0
  761. transformers/models/xglm/tokenization_xglm.py +4 -9
  762. transformers/models/xlm/modeling_xlm.py +18 -14
  763. transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
  764. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  765. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
  766. transformers/models/xlnet/modeling_xlnet.py +3 -1
  767. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  768. transformers/models/xmod/modeling_xmod.py +3 -0
  769. transformers/models/yoso/modeling_yoso.py +10 -1
  770. transformers/models/zamba/modeling_zamba.py +4 -1
  771. transformers/models/zamba2/modeling_zamba2.py +7 -4
  772. transformers/models/zamba2/modular_zamba2.py +1 -1
  773. transformers/models/zoedepth/configuration_zoedepth.py +1 -1
  774. transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
  775. transformers/models/zoedepth/modeling_zoedepth.py +8 -0
  776. transformers/pipelines/__init__.py +11 -9
  777. transformers/pipelines/automatic_speech_recognition.py +20 -12
  778. transformers/pipelines/base.py +2 -10
  779. transformers/pipelines/document_question_answering.py +4 -2
  780. transformers/pipelines/question_answering.py +1 -1
  781. transformers/pipelines/text_generation.py +1 -1
  782. transformers/pipelines/text_to_audio.py +2 -2
  783. transformers/processing_utils.py +133 -50
  784. transformers/quantizers/auto.py +2 -4
  785. transformers/quantizers/base.py +44 -174
  786. transformers/quantizers/quantizer_aqlm.py +2 -23
  787. transformers/quantizers/quantizer_auto_round.py +2 -12
  788. transformers/quantizers/quantizer_awq.py +20 -89
  789. transformers/quantizers/quantizer_bitnet.py +4 -14
  790. transformers/quantizers/quantizer_bnb_4bit.py +18 -155
  791. transformers/quantizers/quantizer_bnb_8bit.py +24 -110
  792. transformers/quantizers/quantizer_compressed_tensors.py +2 -9
  793. transformers/quantizers/quantizer_eetq.py +16 -74
  794. transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
  795. transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
  796. transformers/quantizers/quantizer_fp_quant.py +52 -82
  797. transformers/quantizers/quantizer_gptq.py +8 -28
  798. transformers/quantizers/quantizer_higgs.py +42 -60
  799. transformers/quantizers/quantizer_hqq.py +144 -153
  800. transformers/quantizers/quantizer_mxfp4.py +14 -194
  801. transformers/quantizers/quantizer_quanto.py +35 -79
  802. transformers/quantizers/quantizer_quark.py +36 -17
  803. transformers/quantizers/quantizer_spqr.py +4 -12
  804. transformers/quantizers/quantizer_torchao.py +50 -325
  805. transformers/quantizers/quantizer_vptq.py +4 -27
  806. transformers/quantizers/quantizers_utils.py +20 -0
  807. transformers/testing_utils.py +324 -47
  808. transformers/tokenization_mistral_common.py +7 -2
  809. transformers/tokenization_utils_base.py +116 -224
  810. transformers/tokenization_utils_tokenizers.py +190 -106
  811. transformers/trainer.py +51 -32
  812. transformers/trainer_callback.py +8 -0
  813. transformers/trainer_jit_checkpoint.py +126 -0
  814. transformers/trainer_seq2seq.py +4 -0
  815. transformers/trainer_utils.py +1 -1
  816. transformers/training_args.py +74 -38
  817. transformers/utils/__init__.py +7 -4
  818. transformers/utils/attention_visualizer.py +4 -4
  819. transformers/utils/auto_docstring.py +35 -25
  820. transformers/utils/generic.py +47 -1
  821. transformers/utils/hub.py +5 -15
  822. transformers/utils/import_utils.py +112 -25
  823. transformers/utils/kernel_config.py +74 -19
  824. transformers/utils/loading_report.py +19 -10
  825. transformers/utils/quantization_config.py +78 -245
  826. transformers/video_processing_utils.py +17 -14
  827. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
  828. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
  829. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
  830. transformers/kernels/__init__.py +0 -0
  831. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  832. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  833. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
  834. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
  835. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -15,7 +15,6 @@
15
15
  """Auto Tokenizer class."""
16
16
 
17
17
  import importlib
18
- import inspect
19
18
  import json
20
19
  import os
21
20
  from collections import OrderedDict
@@ -26,8 +25,7 @@ from transformers.utils.import_utils import is_mistral_common_available
26
25
  from ...configuration_utils import PreTrainedConfig
27
26
  from ...dynamic_module_utils import get_class_from_dynamic_module, resolve_trust_remote_code
28
27
  from ...modeling_gguf_pytorch_utils import load_gguf_checkpoint
29
- from ...tokenization_python import PreTrainedTokenizer, PythonBackend
30
- from ...tokenization_utils_base import TOKENIZER_CONFIG_FILE, find_sentencepiece_model_file, load_vocab_and_merges
28
+ from ...tokenization_utils_base import TOKENIZER_CONFIG_FILE
31
29
  from ...utils import (
32
30
  extract_commit_hash,
33
31
  is_g2p_en_available,
@@ -35,7 +33,7 @@ from ...utils import (
35
33
  is_tokenizers_available,
36
34
  logging,
37
35
  )
38
- from ...utils.hub import cached_file, has_file
36
+ from ...utils.hub import cached_file
39
37
  from ..encoder_decoder import EncoderDecoderConfig
40
38
  from .auto_factory import _LazyAutoMapping
41
39
  from .configuration_auto import (
@@ -65,11 +63,10 @@ REGISTERED_FAST_ALIASES: dict[str, type[Any]] = {}
65
63
 
66
64
  TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
67
65
  [
68
- ("aimv2", "CLIPTokenizerFast" if is_tokenizers_available() else None),
66
+ ("aimv2", "CLIPTokenizer" if is_tokenizers_available() else None),
69
67
  ("albert", "AlbertTokenizer" if is_tokenizers_available() else None),
70
68
  ("align", "BertTokenizer" if is_tokenizers_available() else None),
71
- ("arcee", "LlamaTokenizerFast" if is_tokenizers_available() else None),
72
- ("aria", "LlamaTokenizerFast" if is_tokenizers_available() else None),
69
+ ("audioflamingo3", "Qwen2Tokenizer" if is_tokenizers_available() else None),
73
70
  ("aya_vision", "CohereTokenizer" if is_tokenizers_available() else None),
74
71
  ("bark", "BertTokenizer" if is_tokenizers_available() else None),
75
72
  ("bart", "RobertaTokenizer" if is_tokenizers_available() else None),
@@ -82,19 +79,15 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
82
79
  ("big_bird", "BigBirdTokenizer" if is_tokenizers_available() else None),
83
80
  ("bigbird_pegasus", "PegasusTokenizer" if is_tokenizers_available() else None),
84
81
  ("biogpt", "BioGptTokenizer"),
85
- ("bitnet", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
86
82
  ("blenderbot", "BlenderbotTokenizer" if is_tokenizers_available() else None),
87
83
  ("blenderbot-small", "BlenderbotSmallTokenizer"),
88
84
  ("blip", "BertTokenizer" if is_tokenizers_available() else None),
89
85
  ("blip-2", "GPT2Tokenizer" if is_tokenizers_available() else None),
90
- ("bloom", "TokenizersBackend" if is_tokenizers_available() else None),
91
- ("blt", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
92
86
  ("bridgetower", "RobertaTokenizer"),
93
87
  ("bros", "BertTokenizer" if is_tokenizers_available() else None),
94
88
  ("byt5", "ByT5Tokenizer"),
95
89
  ("camembert", "CamembertTokenizer" if is_tokenizers_available() else None),
96
90
  ("canine", "CanineTokenizer"),
97
- ("chameleon", "LlamaTokenizerFast" if is_tokenizers_available() else None),
98
91
  ("chinese_clip", "BertTokenizer" if is_tokenizers_available() else None),
99
92
  ("clap", "RobertaTokenizer"),
100
93
  ("clip", "CLIPTokenizer" if is_tokenizers_available() else None),
@@ -104,265 +97,219 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
104
97
  ("codegen", "GPT2Tokenizer" if is_tokenizers_available() else None),
105
98
  ("cohere", "CohereTokenizer" if is_tokenizers_available() else None),
106
99
  ("cohere2", "CohereTokenizer" if is_tokenizers_available() else None),
107
- ("colpali", "LlamaTokenizerFast" if is_tokenizers_available() else None),
108
- ("colqwen2", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
100
+ ("colqwen2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
109
101
  ("convbert", "BertTokenizer" if is_tokenizers_available() else None),
110
102
  ("cpm", "CpmTokenizer" if is_tokenizers_available() else None),
111
103
  ("cpmant", "CpmAntTokenizer"),
112
- ("csm", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
113
104
  ("ctrl", "CTRLTokenizer"),
114
105
  ("data2vec-audio", "Wav2Vec2CTCTokenizer"),
115
106
  ("data2vec-text", "RobertaTokenizer"),
116
107
  ("dbrx", "GPT2Tokenizer" if is_tokenizers_available() else None),
117
108
  ("deberta", "DebertaTokenizer" if is_tokenizers_available() else None),
118
109
  ("deberta-v2", "DebertaV2Tokenizer" if is_tokenizers_available() else None),
119
- ("deepseek_v2", "LlamaTokenizerFast" if is_tokenizers_available() else None),
120
- ("deepseek_v3", "LlamaTokenizerFast" if is_tokenizers_available() else None),
121
- ("deepseek_vl", "LlamaTokenizerFast" if is_tokenizers_available() else None),
122
- ("deepseek_vl_hybrid", "LlamaTokenizerFast" if is_tokenizers_available() else None),
123
110
  ("dia", "DiaTokenizer"),
124
- ("diffllama", "LlamaTokenizerFast" if is_tokenizers_available() else None),
125
111
  ("distilbert", "BertTokenizer" if is_tokenizers_available() else None),
126
- ("dpr", "DPRQuestionEncoderTokenizerFast" if is_tokenizers_available() else None),
112
+ ("dpr", "DPRQuestionEncoderTokenizer" if is_tokenizers_available() else None),
127
113
  ("electra", "BertTokenizer" if is_tokenizers_available() else None),
128
114
  ("emu3", "GPT2Tokenizer" if is_tokenizers_available() else None),
129
115
  ("ernie", "BertTokenizer" if is_tokenizers_available() else None),
130
- ("ernie4_5", "LlamaTokenizerFast" if is_tokenizers_available() else None),
131
- ("ernie4_5_moe", "LlamaTokenizerFast" if is_tokenizers_available() else None),
132
116
  ("esm", "EsmTokenizer"),
133
117
  ("exaone4", "GPT2Tokenizer" if is_tokenizers_available() else None),
134
- ("falcon", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
135
- ("falcon_mamba", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
118
+ ("falcon_mamba", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
136
119
  ("fastspeech2_conformer", "FastSpeech2ConformerTokenizer" if is_g2p_en_available() else None),
137
120
  ("flaubert", "FlaubertTokenizer"),
138
121
  ("flava", "BertTokenizer" if is_tokenizers_available() else None),
139
122
  ("flex_olmo", "GPT2Tokenizer" if is_tokenizers_available() else None),
140
123
  ("florence2", "BartTokenizer" if is_tokenizers_available() else None),
141
- ("fnet", "FNetTokenizerFast" if is_tokenizers_available() else None),
124
+ ("fnet", "FNetTokenizer" if is_tokenizers_available() else None),
142
125
  ("fsmt", "FSMTTokenizer"),
143
126
  ("funnel", "FunnelTokenizer" if is_tokenizers_available() else None),
144
- ("gemma", "GemmaTokenizerFast" if is_tokenizers_available() else None),
145
- ("gemma2", "GemmaTokenizerFast" if is_tokenizers_available() else None),
146
- ("gemma3", "GemmaTokenizerFast" if is_tokenizers_available() else None),
147
- ("gemma3_text", "GemmaTokenizerFast" if is_tokenizers_available() else None),
148
- ("gemma3n", "GemmaTokenizerFast" if is_tokenizers_available() else None),
149
- ("gemma3n_text", "GemmaTokenizerFast" if is_tokenizers_available() else None),
127
+ ("gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
128
+ ("gemma2", "GemmaTokenizer" if is_tokenizers_available() else None),
129
+ ("gemma3", "GemmaTokenizer" if is_tokenizers_available() else None),
130
+ ("gemma3_text", "GemmaTokenizer" if is_tokenizers_available() else None),
131
+ ("gemma3n", "GemmaTokenizer" if is_tokenizers_available() else None),
132
+ ("gemma3n_text", "GemmaTokenizer" if is_tokenizers_available() else None),
150
133
  ("git", "BertTokenizer" if is_tokenizers_available() else None),
151
- ("glm", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
152
- ("glm4", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
153
- ("glm4_moe", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
154
- ("glm4v", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
155
- ("glm4v_moe", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
156
- ("got_ocr2", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
157
134
  ("gpt-sw3", "GPTSw3Tokenizer" if is_sentencepiece_available() else None),
158
135
  ("gpt2", "GPT2Tokenizer" if is_tokenizers_available() else None),
159
136
  ("gpt_bigcode", "GPT2Tokenizer" if is_tokenizers_available() else None),
160
137
  ("gpt_neo", "GPT2Tokenizer" if is_tokenizers_available() else None),
161
138
  ("gpt_neox", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
162
139
  ("gpt_neox_japanese", "GPTNeoXJapaneseTokenizer"),
163
- ("gpt_oss", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
164
140
  ("gptj", "GPT2Tokenizer" if is_tokenizers_available() else None),
165
141
  ("granite", "GPT2Tokenizer"),
166
142
  ("granitemoe", "GPT2Tokenizer"),
167
143
  ("granitemoehybrid", "GPT2Tokenizer"),
168
144
  ("granitemoeshared", "GPT2Tokenizer"),
169
145
  ("grounding-dino", "BertTokenizer" if is_tokenizers_available() else None),
170
- ("groupvit", "CLIPTokenizerFast" if is_tokenizers_available() else None),
171
- ("helium", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
146
+ ("groupvit", "CLIPTokenizer" if is_tokenizers_available() else None),
172
147
  ("herbert", "HerbertTokenizer" if is_tokenizers_available() else None),
173
148
  ("hubert", "Wav2Vec2CTCTokenizer"),
174
149
  ("ibert", "RobertaTokenizer"),
175
- ("idefics", "LlamaTokenizerFast" if is_tokenizers_available() else None),
176
- ("idefics2", "LlamaTokenizerFast" if is_tokenizers_available() else None),
177
- ("idefics3", "LlamaTokenizerFast" if is_tokenizers_available() else None),
150
+ ("idefics", "LlamaTokenizer" if is_tokenizers_available() else None),
151
+ ("idefics2", "LlamaTokenizer" if is_tokenizers_available() else None),
178
152
  ("instructblip", "GPT2Tokenizer" if is_tokenizers_available() else None),
179
153
  ("instructblipvideo", "GPT2Tokenizer" if is_tokenizers_available() else None),
180
- ("internvl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
181
- ("jamba", "LlamaTokenizerFast" if is_tokenizers_available() else None),
182
- ("janus", "LlamaTokenizerFast" if is_tokenizers_available() else None),
183
- ("jetmoe", "LlamaTokenizerFast" if is_tokenizers_available() else None),
154
+ ("internvl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
155
+ ("jais2", "GPT2Tokenizer" if is_tokenizers_available() else None),
184
156
  ("kosmos-2", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
185
- ("kosmos-2.5", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
157
+ ("lasr_ctc", "ParakeetTokenizer" if is_tokenizers_available() else None),
158
+ ("lasr_encoder", "ParakeetTokenizer" if is_tokenizers_available() else None),
186
159
  ("layoutlm", "BertTokenizer" if is_tokenizers_available() else None),
187
160
  ("layoutlmv2", "LayoutLMv2Tokenizer" if is_tokenizers_available() else None),
188
161
  ("layoutlmv3", "LayoutLMv3Tokenizer" if is_tokenizers_available() else None),
189
162
  ("layoutxlm", "LayoutXLMTokenizer" if is_tokenizers_available() else None),
190
163
  ("led", "LEDTokenizer" if is_tokenizers_available() else None),
191
- ("lfm2_vl", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
192
164
  ("lilt", "RobertaTokenizer" if is_tokenizers_available() else None),
193
- ("llama", "LlamaTokenizer" if is_tokenizers_available() else None),
194
- ("llama4", "LlamaTokenizerFast" if is_tokenizers_available() else None),
195
- ("llama4_text", "LlamaTokenizerFast" if is_tokenizers_available() else None),
196
- ("llava", "LlamaTokenizerFast" if is_tokenizers_available() else None),
197
- ("llava_next", "LlamaTokenizerFast" if is_tokenizers_available() else None),
198
- ("llava_next_video", "LlamaTokenizerFast" if is_tokenizers_available() else None),
199
- ("llava_onevision", "LlamaTokenizerFast" if is_tokenizers_available() else None),
200
165
  ("longformer", "RobertaTokenizer" if is_tokenizers_available() else None),
201
166
  ("longt5", "T5Tokenizer" if is_tokenizers_available() else None),
202
167
  ("luke", "LukeTokenizer"),
203
168
  ("lxmert", "LxmertTokenizer" if is_tokenizers_available() else None),
204
169
  ("m2m_100", "M2M100Tokenizer" if is_sentencepiece_available() else None),
205
- ("mamba", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
206
- ("mamba2", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
170
+ ("mamba", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
171
+ ("mamba2", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
207
172
  ("marian", "MarianTokenizer" if is_sentencepiece_available() else None),
173
+ ("markuplm", "MarkupLMTokenizer" if is_tokenizers_available() else None),
208
174
  ("mbart", "MBartTokenizer" if is_tokenizers_available() else None),
209
175
  ("mbart50", "MBart50Tokenizer" if is_tokenizers_available() else None),
210
176
  ("mega", "RobertaTokenizer"),
211
177
  ("megatron-bert", "BertTokenizer" if is_tokenizers_available() else None),
212
- ("metaclip_2", "XLMRobertaTokenizerFast" if is_tokenizers_available() else None),
178
+ ("metaclip_2", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
213
179
  ("mgp-str", "MgpstrTokenizer"),
214
- ("minimax", "GPT2Tokenizer" if is_tokenizers_available() else None),
215
180
  (
216
181
  "ministral3",
217
- (
218
- "MistralCommonBackend"
219
- if is_mistral_common_available()
220
- else ("LlamaTokenizer" if is_sentencepiece_available() else None),
221
- "LlamaTokenizerFast" if is_tokenizers_available() and not is_mistral_common_available() else None,
222
- ),
182
+ "MistralCommonBackend"
183
+ if is_mistral_common_available()
184
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
223
185
  ),
224
186
  (
225
187
  "mistral",
226
188
  "MistralCommonBackend"
227
189
  if is_mistral_common_available()
228
- else ("LlamaTokenizerFast" if is_tokenizers_available() else None),
190
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
229
191
  ),
230
192
  (
231
193
  "mistral3",
232
- (
233
- "MistralCommonBackend"
234
- if is_mistral_common_available()
235
- else ("LlamaTokenizer" if is_sentencepiece_available() else None),
236
- "LlamaTokenizerFast" if is_tokenizers_available() and not is_mistral_common_available() else None,
237
- ),
194
+ "MistralCommonBackend"
195
+ if is_mistral_common_available()
196
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
238
197
  ),
239
198
  (
240
199
  "mixtral",
241
200
  "MistralCommonBackend"
242
201
  if is_mistral_common_available()
243
- else ("LlamaTokenizerFast" if is_tokenizers_available() else None),
202
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
244
203
  ),
245
- ("mllama", "LlamaTokenizerFast" if is_tokenizers_available() else None),
246
204
  ("mluke", "MLukeTokenizer" if is_sentencepiece_available() else None),
247
205
  ("mm-grounding-dino", "BertTokenizer" if is_tokenizers_available() else None),
248
206
  ("mobilebert", "MobileBertTokenizer" if is_tokenizers_available() else None),
249
- ("modernbert", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
250
- ("moonshine", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
251
- ("moshi", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
252
207
  ("mpnet", "MPNetTokenizer" if is_tokenizers_available() else None),
253
- ("mpt", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
208
+ ("mpt", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
254
209
  ("mra", "RobertaTokenizer"),
255
210
  ("mt5", "T5Tokenizer" if is_tokenizers_available() else None),
256
211
  ("musicgen", "T5Tokenizer" if is_tokenizers_available() else None),
257
212
  ("musicgen_melody", "T5Tokenizer" if is_tokenizers_available() else None),
258
213
  ("mvp", "MvpTokenizer" if is_tokenizers_available() else None),
259
214
  ("myt5", "MyT5Tokenizer"),
260
- ("nemotron", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
261
215
  ("nezha", "BertTokenizer" if is_tokenizers_available() else None),
262
216
  ("nllb", "NllbTokenizer" if is_tokenizers_available() else None),
263
217
  ("nllb-moe", "NllbTokenizer" if is_tokenizers_available() else None),
264
218
  ("nougat", "NougatTokenizer" if is_tokenizers_available() else None),
265
- ("nystromformer", "AlbertTokenizerFast" if is_tokenizers_available() else None),
266
- ("olmo", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
267
- ("olmo2", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
219
+ ("nystromformer", "AlbertTokenizer" if is_tokenizers_available() else None),
220
+ ("olmo", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
221
+ ("olmo2", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
268
222
  ("olmo3", "GPT2Tokenizer" if is_tokenizers_available() else None),
269
- ("olmoe", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
270
- ("omdet-turbo", "CLIPTokenizerFast" if is_tokenizers_available() else None),
271
- ("oneformer", "CLIPTokenizerFast" if is_tokenizers_available() else None),
223
+ ("olmoe", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
224
+ ("omdet-turbo", "CLIPTokenizer" if is_tokenizers_available() else None),
225
+ ("oneformer", "CLIPTokenizer" if is_tokenizers_available() else None),
272
226
  ("openai-gpt", "OpenAIGPTTokenizer" if is_tokenizers_available() else None),
273
227
  ("opt", "GPT2Tokenizer" if is_tokenizers_available() else None),
274
- ("ovis2", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
275
- ("owlv2", "CLIPTokenizerFast" if is_tokenizers_available() else None),
276
- ("owlvit", "CLIPTokenizerFast" if is_tokenizers_available() else None),
277
- ("paligemma", "LlamaTokenizerFast" if is_tokenizers_available() else None),
228
+ ("ovis2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
229
+ ("owlv2", "CLIPTokenizer" if is_tokenizers_available() else None),
230
+ ("owlvit", "CLIPTokenizer" if is_tokenizers_available() else None),
278
231
  ("pegasus", "PegasusTokenizer" if is_tokenizers_available() else None),
279
232
  ("pegasus_x", "PegasusTokenizer" if is_tokenizers_available() else None),
280
233
  ("perceiver", "PerceiverTokenizer"),
281
- ("persimmon", "LlamaTokenizerFast" if is_tokenizers_available() else None),
282
234
  ("phi", "GPT2Tokenizer" if is_tokenizers_available() else None),
283
- ("phi3", "LlamaTokenizerFast" if is_tokenizers_available() else None),
284
- ("phimoe", "LlamaTokenizerFast" if is_tokenizers_available() else None),
285
235
  ("phobert", "PhobertTokenizer"),
286
236
  ("pix2struct", "T5Tokenizer" if is_tokenizers_available() else None),
287
237
  (
288
238
  "pixtral",
289
239
  "MistralCommonBackend"
290
240
  if is_mistral_common_available()
291
- else ("PreTrainedTokenizerFast" if is_tokenizers_available() else None),
241
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
292
242
  ),
293
243
  ("plbart", "PLBartTokenizer" if is_tokenizers_available() else None),
294
244
  ("prophetnet", "ProphetNetTokenizer"),
295
245
  ("qdqbert", "BertTokenizer" if is_tokenizers_available() else None),
296
- ("qwen2", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
297
- ("qwen2_5_omni", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
298
- ("qwen2_5_vl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
299
- ("qwen2_audio", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
300
- ("qwen2_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
301
- ("qwen2_vl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
302
- ("qwen3", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
303
- ("qwen3_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
304
- ("qwen3_next", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
305
- ("qwen3_omni_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
306
- ("qwen3_vl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
307
- ("qwen3_vl_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
246
+ ("qwen2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
247
+ ("qwen2_5_omni", "Qwen2Tokenizer" if is_tokenizers_available() else None),
248
+ ("qwen2_5_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
249
+ ("qwen2_audio", "Qwen2Tokenizer" if is_tokenizers_available() else None),
250
+ ("qwen2_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
251
+ ("qwen2_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
252
+ ("qwen3", "Qwen2Tokenizer" if is_tokenizers_available() else None),
253
+ ("qwen3_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
254
+ ("qwen3_next", "Qwen2Tokenizer" if is_tokenizers_available() else None),
255
+ ("qwen3_omni_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
256
+ ("qwen3_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
257
+ ("qwen3_vl_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
308
258
  ("rag", "RagTokenizer"),
309
259
  ("realm", "BertTokenizer" if is_tokenizers_available() else None),
310
- ("recurrent_gemma", "GemmaTokenizerFast" if is_tokenizers_available() else None),
260
+ ("recurrent_gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
311
261
  ("reformer", "ReformerTokenizer" if is_tokenizers_available() else None),
312
262
  ("rembert", "RemBertTokenizer" if is_tokenizers_available() else None),
313
263
  ("retribert", "BertTokenizer" if is_tokenizers_available() else None),
314
264
  ("roberta", "RobertaTokenizer"),
315
265
  ("roberta-prelayernorm", "RobertaTokenizer"),
316
266
  ("roc_bert", "RoCBertTokenizer"),
317
- ("roformer", "RoFormerTokenizerFast" if is_tokenizers_available() else None),
318
- ("rwkv", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
267
+ ("roformer", "RoFormerTokenizer" if is_tokenizers_available() else None),
268
+ ("rwkv", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
319
269
  ("seamless_m4t", "SeamlessM4TTokenizer" if is_tokenizers_available() else None),
320
270
  ("seamless_m4t_v2", "SeamlessM4TTokenizer" if is_tokenizers_available() else None),
321
- ("shieldgemma2", "GemmaTokenizerFast" if is_tokenizers_available() else None),
271
+ ("shieldgemma2", "GemmaTokenizer" if is_tokenizers_available() else None),
322
272
  ("siglip", "SiglipTokenizer" if is_sentencepiece_available() else None),
323
- ("siglip2", "GemmaTokenizerFast" if is_tokenizers_available() else None),
324
- ("smollm3", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
273
+ ("siglip2", "GemmaTokenizer" if is_tokenizers_available() else None),
325
274
  ("speech_to_text", "Speech2TextTokenizer" if is_sentencepiece_available() else None),
326
275
  ("speecht5", "SpeechT5Tokenizer" if is_sentencepiece_available() else None),
327
276
  ("splinter", "SplinterTokenizer"),
328
277
  ("squeezebert", "BertTokenizer" if is_tokenizers_available() else None),
329
- ("stablelm", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
278
+ ("stablelm", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
330
279
  ("starcoder2", "GPT2Tokenizer" if is_tokenizers_available() else None),
331
280
  ("switch_transformers", "T5Tokenizer" if is_tokenizers_available() else None),
332
281
  ("t5", "T5Tokenizer" if is_tokenizers_available() else None),
333
- ("t5gemma", "GemmaTokenizerFast" if is_tokenizers_available() else None),
282
+ ("t5gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
334
283
  ("tapas", "TapasTokenizer"),
335
284
  ("trocr", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
336
285
  ("tvp", "BertTokenizer" if is_tokenizers_available() else None),
337
286
  ("udop", "UdopTokenizer" if is_tokenizers_available() else None),
338
287
  ("umt5", "T5Tokenizer" if is_tokenizers_available() else None),
339
- ("video_llava", "LlamaTokenizerFast" if is_tokenizers_available() else None),
288
+ ("unispeech", "Wav2Vec2CTCTokenizer"),
289
+ ("unispeech-sat", "Wav2Vec2CTCTokenizer"),
340
290
  ("vilt", "BertTokenizer" if is_tokenizers_available() else None),
341
- ("vipllava", "LlamaTokenizerFast" if is_tokenizers_available() else None),
342
291
  ("visual_bert", "BertTokenizer" if is_tokenizers_available() else None),
343
292
  ("vits", "VitsTokenizer"),
344
293
  (
345
294
  "voxtral",
346
295
  "MistralCommonBackend"
347
296
  if is_mistral_common_available()
348
- else ("PreTrainedTokenizerFast" if is_tokenizers_available() else None),
297
+ else ("TokenizersBackend" if is_tokenizers_available() else None),
349
298
  ),
350
299
  ("wav2vec2", "Wav2Vec2CTCTokenizer"),
351
300
  ("wav2vec2-bert", "Wav2Vec2CTCTokenizer"),
352
301
  ("wav2vec2-conformer", "Wav2Vec2CTCTokenizer"),
353
302
  ("wav2vec2_phoneme", "Wav2Vec2PhonemeCTCTokenizer"),
354
303
  ("whisper", "WhisperTokenizer" if is_tokenizers_available() else None),
355
- ("xclip", "CLIPTokenizerFast" if is_tokenizers_available() else None),
304
+ ("xclip", "CLIPTokenizer" if is_tokenizers_available() else None),
356
305
  ("xglm", "XGLMTokenizer" if is_tokenizers_available() else None),
357
306
  ("xlm", "XLMTokenizer"),
358
307
  ("xlm-roberta", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
359
308
  ("xlm-roberta-xl", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
360
309
  ("xlnet", "XLNetTokenizer" if is_tokenizers_available() else None),
361
- ("xlstm", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
362
- ("xmod", "XLMRobertaTokenizerFast" if is_tokenizers_available() else None),
310
+ ("xlstm", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
311
+ ("xmod", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
363
312
  ("yoso", "AlbertTokenizer" if is_tokenizers_available() else None),
364
- ("zamba", "LlamaTokenizerFast" if is_tokenizers_available() else None),
365
- ("zamba2", "LlamaTokenizerFast" if is_tokenizers_available() else None),
366
313
  ]
367
314
  )
368
315
 
@@ -389,13 +336,17 @@ def load_merges(merges_file):
389
336
 
390
337
 
391
338
  def tokenizer_class_from_name(class_name: str) -> Union[type[Any], None]:
339
+ # Bloom tokenizer classes were removed but should map to the fast backend for BC
340
+ if class_name in {"BloomTokenizer", "BloomTokenizerFast"}:
341
+ return TokenizersBackend
342
+
392
343
  if class_name in REGISTERED_FAST_ALIASES:
393
344
  return REGISTERED_FAST_ALIASES[class_name]
394
345
 
395
346
  if class_name in REGISTERED_TOKENIZER_CLASSES:
396
347
  return REGISTERED_TOKENIZER_CLASSES[class_name]
397
348
 
398
- if class_name == "PreTrainedTokenizerFast":
349
+ if class_name == "TokenizersBackend":
399
350
  return TokenizersBackend
400
351
 
401
352
  # V5: TOKENIZER_MAPPING_NAMES now maps to single strings, not tuples
@@ -404,7 +355,7 @@ def tokenizer_class_from_name(class_name: str) -> Union[type[Any], None]:
404
355
  module_name = model_type_to_module_name(module_name)
405
356
  if (
406
357
  module_name in ["mistral", "mistral3", "mixtral", "ministral", "ministral3", "pixtral", "voxtral"]
407
- and class_name == "MistralCommonTokenizer"
358
+ and class_name == "MistralCommonBackend"
408
359
  ):
409
360
  module = importlib.import_module(".tokenization_mistral_common", "transformers")
410
361
  else:
@@ -428,402 +379,6 @@ def tokenizer_class_from_name(class_name: str) -> Union[type[Any], None]:
428
379
  return None
429
380
 
430
381
 
431
- def _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs):
432
- # Delegate to shared helper to avoid duplication
433
- return find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
434
-
435
-
436
- def _load_tokenizers_backend(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs):
437
- """
438
- Load a tokenizer using only the tokenizers backend (no SentencePiece fallback).
439
-
440
- This function attempts to load with the following priority:
441
- 1. If tokenizer.json exists, load directly
442
- 2. If any .model file (SPM) exists, try extracting vocab and merges
443
- 3. If vocab.json and merges.txt exist, load with those
444
- 4. If vocab.txt exists (WordPiece models), load with that
445
-
446
- Args:
447
- tokenizer_class: The tokenizer class to instantiate
448
- pretrained_model_name_or_path: Path or model id
449
- inputs: Additional positional arguments for tokenizer init
450
- kwargs: Additional keyword arguments
451
-
452
- Returns:
453
- An instantiated tokenizer object
454
-
455
- Raises:
456
- ValueError: If tokenizer could not be loaded with tokenizers backend
457
- """
458
- files_loaded = []
459
-
460
- # Try tokenizer.json first
461
- try:
462
- tokenizer_json_exists = has_file(
463
- pretrained_model_name_or_path,
464
- "tokenizer.json",
465
- revision=kwargs.get("revision"),
466
- token=kwargs.get("token"),
467
- cache_dir=kwargs.get("cache_dir"),
468
- local_files_only=kwargs.get("local_files_only", False),
469
- )
470
- except Exception:
471
- tokenizer_json_exists = False
472
-
473
- if tokenizer_json_exists:
474
- files_loaded.append("tokenizer.json")
475
- kwargs["backend"] = "tokenizers"
476
- kwargs["files_loaded"] = files_loaded
477
- # Some old models have uploaded a tokenizer.json but haven't updated tokenizer_config.json to point to the correct tokenizer class
478
- tokenizer_class = (
479
- TokenizersBackend
480
- if tokenizer_class.__name__ in ("PythonBackend", "PreTrainedTokenizer")
481
- else tokenizer_class
482
- )
483
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
484
-
485
- # Try tekken.json (Mistral format)
486
- try:
487
- if has_file(
488
- pretrained_model_name_or_path,
489
- "tekken.json",
490
- revision=kwargs.get("revision"),
491
- token=kwargs.get("token"),
492
- cache_dir=kwargs.get("cache_dir"),
493
- local_files_only=kwargs.get("local_files_only", False),
494
- ):
495
- from ...integrations.mistral import convert_tekken_tokenizer
496
-
497
- tekken_file = cached_file(
498
- pretrained_model_name_or_path,
499
- "tekken.json",
500
- **{
501
- k: v
502
- for k, v in kwargs.items()
503
- if k
504
- in ["cache_dir", "force_download", "proxies", "token", "revision", "local_files_only", "subfolder"]
505
- },
506
- )
507
- if tekken_file is not None:
508
- files_loaded.append("tekken.json")
509
- kwargs["backend"] = "tokenizers"
510
- kwargs["files_loaded"] = files_loaded
511
- return convert_tekken_tokenizer(tekken_file)
512
- except (ImportError, Exception):
513
- pass
514
-
515
- # Try extracting from SentencePiece model
516
- spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
517
- if spm_file is not None:
518
- try:
519
- resolved_spm = cached_file(
520
- pretrained_model_name_or_path,
521
- spm_file,
522
- cache_dir=kwargs.get("cache_dir"),
523
- force_download=kwargs.get("force_download", False),
524
- proxies=kwargs.get("proxies"),
525
- token=kwargs.get("token"),
526
- revision=kwargs.get("revision"),
527
- local_files_only=kwargs.get("local_files_only", False),
528
- subfolder=kwargs.get("subfolder", ""),
529
- )
530
- except Exception:
531
- resolved_spm = None
532
-
533
- if resolved_spm is not None:
534
- try:
535
- from ...tokenization_utils_sentencepiece import SentencePieceExtractor
536
-
537
- fast_sig = inspect.signature(getattr(tokenizer_class, "__init__", tokenizer_class))
538
- if "vocab" in fast_sig.parameters:
539
- try:
540
- vocab_ids, vocab_scores, merges = SentencePieceExtractor(resolved_spm).extract()
541
- files_loaded.append(spm_file)
542
- kwargs["backend"] = "tokenizers"
543
- kwargs["files_loaded"] = files_loaded
544
- # If tokenizer needs both vocab and merges (BPE models)
545
- if "merges" in fast_sig.parameters:
546
- return tokenizer_class.from_pretrained(
547
- pretrained_model_name_or_path, *inputs, vocab=vocab_scores, merges=merges, **kwargs
548
- )
549
- # If tokenizer only needs vocab (Unigram models like NLLB, SeamlessM4T)
550
- else:
551
- return tokenizer_class.from_pretrained(
552
- pretrained_model_name_or_path, *inputs, vocab=vocab_scores, **kwargs
553
- )
554
- except Exception:
555
- pass
556
- except ImportError as e:
557
- if "sentencepiece" in str(e).lower() or "SentencePiece" in str(e):
558
- raise ImportError(
559
- f"This checkpoint only contains a SentencePiece model file ({spm_file}), but the `sentencepiece` library is not installed. "
560
- f"Please install sentencepiece to load this tokenizer: `pip install sentencepiece`"
561
- ) from e
562
- raise
563
- except Exception:
564
- pass
565
-
566
- vocab, merges, loaded = load_vocab_and_merges(pretrained_model_name_or_path, **kwargs)
567
- if vocab is not None:
568
- files_loaded.extend(loaded)
569
- if issubclass(tokenizer_class, PreTrainedTokenizer):
570
- kwargs["backend"] = "python"
571
- else:
572
- kwargs["backend"] = "tokenizers"
573
- kwargs["files_loaded"] = files_loaded
574
- if merges is not None:
575
- return tokenizer_class.from_pretrained(
576
- pretrained_model_name_or_path, *inputs, vocab=vocab, merges=merges, **kwargs
577
- )
578
- else:
579
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, vocab=vocab, **kwargs)
580
-
581
- # Try vocab.txt (WordPiece models like SplinterTokenizer)
582
- try:
583
- resolved_vocab_txt = cached_file(
584
- pretrained_model_name_or_path,
585
- "vocab.txt",
586
- cache_dir=kwargs.get("cache_dir"),
587
- force_download=kwargs.get("force_download", False),
588
- proxies=kwargs.get("proxies"),
589
- token=kwargs.get("token"),
590
- revision=kwargs.get("revision"),
591
- local_files_only=kwargs.get("local_files_only", False),
592
- subfolder=kwargs.get("subfolder", ""),
593
- )
594
- except Exception:
595
- resolved_vocab_txt = None
596
-
597
- if resolved_vocab_txt is not None:
598
- try:
599
- fast_sig = inspect.signature(getattr(tokenizer_class, "__init__", tokenizer_class))
600
- if "vocab" in fast_sig.parameters:
601
- # Load vocab.txt: each line is a token, line number is the ID
602
- vocab = OrderedDict()
603
- with open(resolved_vocab_txt, "r", encoding="utf-8") as reader:
604
- tokens = reader.readlines()
605
- for index, token in enumerate(tokens):
606
- token = token.rstrip("\n")
607
- vocab[token] = index
608
- files_loaded.append("vocab.txt")
609
- kwargs["backend"] = "tokenizers"
610
- kwargs["files_loaded"] = files_loaded
611
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, vocab=vocab, **kwargs)
612
- except Exception:
613
- pass
614
-
615
- # If all methods failed, raise an error
616
- raise ValueError(
617
- f"Could not load tokenizer from {pretrained_model_name_or_path} using tokenizers backend. "
618
- "No tokenizer.json, tekken.json, vocab.json+merges.txt, vocab.txt, or compatible SentencePiece model found."
619
- )
620
-
621
-
622
- def _try_load_tokenizer_with_fallbacks(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs):
623
- """
624
- Try to load a tokenizer with backend selection.
625
-
626
- This function routes to the appropriate backend based on the 'backend' parameter:
627
- - "tokenizers" (default): Uses HuggingFace tokenizers library backend
628
- - "sentencepiece": Uses SentencePiece backend
629
-
630
- For the tokenizers backend, attempts to load with the following priority:
631
- 1. If tokenizer.json exists, load directly
632
- 2. If any .model file (SPM) exists, try extracting vocab and merges
633
- 3. If vocab.json and merges.txt exist, load with those
634
- 4. Fallback to SentencePieceBackend if available
635
-
636
- Args:
637
- tokenizer_class: The tokenizer class to instantiate (can be None)
638
- pretrained_model_name_or_path: Path or model id
639
- inputs: Additional positional arguments for tokenizer init
640
- kwargs: Additional keyword arguments (may include 'backend' parameter, defaults to "tokenizers")
641
-
642
- Returns:
643
- An instantiated tokenizer object
644
-
645
- Raises:
646
- ValueError: If no tokenizer could be loaded
647
- """
648
- # Extract the backend parameter - default to "tokenizers" to prioritize tokenizers backend
649
- backend = kwargs.pop("backend", "tokenizers")
650
-
651
- # Validate backend parameter
652
- if backend not in ["sentencepiece", "tokenizers"]:
653
- logger.warning(
654
- f"Invalid backend '{backend}' specified. Valid options are 'tokenizers' or 'sentencepiece'. "
655
- "Defaulting to 'tokenizers' backend."
656
- )
657
- backend = "tokenizers"
658
-
659
- # Route to SentencePiece backend if requested
660
- if backend == "sentencepiece":
661
- if SentencePieceBackend is None:
662
- raise ValueError(
663
- "SentencePiece backend was requested but sentencepiece is not installed. "
664
- "Please install it with: pip install sentencepiece"
665
- )
666
- logger.info("Loading tokenizer with SentencePiece backend")
667
- # Track files loaded for SentencePiece backend
668
- spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
669
- files_loaded = [spm_file] if spm_file else []
670
- kwargs["backend"] = "sentencepiece"
671
- kwargs["files_loaded"] = files_loaded
672
- # Resolve the SPM file path and pass it as vocab_file
673
- if spm_file is not None:
674
- resolved_vocab_file = cached_file(
675
- pretrained_model_name_or_path,
676
- spm_file,
677
- cache_dir=kwargs.get("cache_dir"),
678
- force_download=kwargs.get("force_download", False),
679
- proxies=kwargs.get("proxies"),
680
- token=kwargs.get("token"),
681
- revision=kwargs.get("revision"),
682
- local_files_only=kwargs.get("local_files_only", False),
683
- subfolder=kwargs.get("subfolder", ""),
684
- )
685
- kwargs["vocab_file"] = resolved_vocab_file
686
- if isinstance(tokenizer_class, type) and issubclass(tokenizer_class, SentencePieceBackend):
687
- logger.info("Loading tokenizer with SentencePiece backend using tokenizer class")
688
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
689
- return SentencePieceBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
690
-
691
- # Route to tokenizers backend (default)
692
- if backend == "tokenizers":
693
- if tokenizer_class is not None:
694
- # Check if tokenizer_class inherits from PreTrainedTokenizer (but not from TokenizersBackend/SentencePieceBackend)
695
- # These are edge cases with custom logic (e.g., BioGptTokenizer with Moses tokenization)
696
- from ...tokenization_python import PreTrainedTokenizer
697
-
698
- # Build list of backend classes to check against
699
- backend_classes = [TokenizersBackend] if TokenizersBackend else []
700
- if SentencePieceBackend:
701
- backend_classes.append(SentencePieceBackend)
702
-
703
- # Check if it's a custom PreTrainedTokenizer (not a backend class)
704
- is_custom_pre_trained = (
705
- isinstance(tokenizer_class, type)
706
- and issubclass(tokenizer_class, PreTrainedTokenizer)
707
- and not any(issubclass(tokenizer_class, bc) for bc in backend_classes)
708
- and tokenizer_class.__name__ not in ("PythonBackend", "PreTrainedTokenizer")
709
- )
710
-
711
- # Check if it's a completely custom tokenizer (not PreTrainedTokenizer, not backend class)
712
- # e.g., MistralCommonBackend which has its own from_pretrained logic
713
- inherits_from_backend = isinstance(tokenizer_class, type) and any(
714
- bc and issubclass(tokenizer_class, bc) for bc in backend_classes
715
- )
716
- is_completely_custom = (
717
- isinstance(tokenizer_class, type)
718
- and not issubclass(tokenizer_class, PythonBackend)
719
- and not inherits_from_backend
720
- )
721
-
722
- if is_custom_pre_trained:
723
- logger.info("Loading tokenizer with custom PreTrainedTokenizer backend (edge case)")
724
- # Track the backend type for custom tokenizers
725
- kwargs["backend"] = "custom"
726
- kwargs["files_loaded"] = [] # Custom tokenizers may load various files
727
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
728
-
729
- if is_completely_custom:
730
- # For completely custom tokenizers (like MistralCommonBackend), try calling from_pretrained directly
731
- logger.info("Loading tokenizer with custom tokenizer class (non-PreTrainedTokenizer)")
732
- # Filter out AutoTokenizer-specific kwargs that custom tokenizers don't accept
733
- custom_kwargs = {k: v for k, v in kwargs.items() if k not in ["backend", "files_loaded"]}
734
- custom_kwargs["_from_auto"] = True # Signal that this is called from AutoTokenizer
735
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **custom_kwargs)
736
-
737
- if TokenizersBackend is None:
738
- raise ValueError(
739
- "Tokenizers backend is the default but tokenizers library is not installed. "
740
- "Please install it with: pip install tokenizers"
741
- )
742
- logger.info("Loading tokenizer with tokenizers backend")
743
- try:
744
- return _load_tokenizers_backend(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs)
745
- except ValueError as e:
746
- # If tokenizers backend fails, try falling back to SentencePiece backend if available
747
- spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
748
- if spm_file is not None and SentencePieceBackend is not None:
749
- logger.info(
750
- f"Tokenizers backend failed: {e}. "
751
- f"Falling back to SentencePieceBackend since {spm_file} file was found."
752
- )
753
- files_loaded = [spm_file]
754
- kwargs["backend"] = "sentencepiece"
755
- kwargs["files_loaded"] = files_loaded
756
- # Resolve the SPM file path and pass it as vocab_file
757
- resolved_vocab_file = cached_file(
758
- pretrained_model_name_or_path,
759
- spm_file,
760
- cache_dir=kwargs.get("cache_dir"),
761
- force_download=kwargs.get("force_download", False),
762
- proxies=kwargs.get("proxies"),
763
- token=kwargs.get("token"),
764
- revision=kwargs.get("revision"),
765
- local_files_only=kwargs.get("local_files_only", False),
766
- subfolder=kwargs.get("subfolder", ""),
767
- )
768
- kwargs["vocab_file"] = resolved_vocab_file
769
- if tokenizer_class is not None and issubclass(tokenizer_class, SentencePieceBackend):
770
- logger.info(
771
- "Falling back to SentencePiece backend using tokenizer class that inherits from SentencePieceBackend."
772
- )
773
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
774
- return SentencePieceBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
775
- # If no fallback available, try calling tokenizer class directly as last resort
776
- if hasattr(tokenizer_class, "from_pretrained"):
777
- logger.info(
778
- f"Tokenizers backend failed: {e}. Trying to load tokenizer directly from tokenizer class."
779
- )
780
- # Filter out AutoTokenizer-specific kwargs that custom tokenizers don't accept
781
- custom_kwargs = {k: v for k, v in kwargs.items() if k not in ["backend", "files_loaded"]}
782
- custom_kwargs["_from_auto"] = True # Signal that this is called from AutoTokenizer
783
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **custom_kwargs)
784
- # Re-raise if no fallback options available
785
- raise
786
-
787
- # If no tokenizer class but tokenizers backend requested, fall back to SentencePiece if available
788
- spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
789
- if spm_file is not None and SentencePieceBackend is not None:
790
- logger.info(
791
- f"Tokenizers backend was requested but no tokenizer class found. "
792
- f"Falling back to SentencePieceBackend since {spm_file} file was found."
793
- )
794
- files_loaded = [spm_file]
795
- kwargs["backend"] = "sentencepiece"
796
- kwargs["files_loaded"] = files_loaded
797
- # Resolve the SPM file path and pass it as vocab_file
798
- resolved_vocab_file = cached_file(
799
- pretrained_model_name_or_path,
800
- spm_file,
801
- cache_dir=kwargs.get("cache_dir"),
802
- force_download=kwargs.get("force_download", False),
803
- proxies=kwargs.get("proxies"),
804
- token=kwargs.get("token"),
805
- revision=kwargs.get("revision"),
806
- local_files_only=kwargs.get("local_files_only", False),
807
- subfolder=kwargs.get("subfolder", ""),
808
- )
809
- kwargs["vocab_file"] = resolved_vocab_file
810
- if (
811
- tokenizer_class is not None
812
- and SentencePieceBackend is not None
813
- and issubclass(tokenizer_class, SentencePieceBackend)
814
- ):
815
- logger.info(
816
- "Falling back to SentencePiece backend using tokenizer class that inherits from SentencePieceBackend."
817
- )
818
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
819
- return SentencePieceBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
820
-
821
- raise ValueError(
822
- f"Could not load tokenizer from {pretrained_model_name_or_path}. "
823
- "No tokenizer class could be determined and no SentencePiece model found."
824
- )
825
-
826
-
827
382
  def get_tokenizer_config(
828
383
  pretrained_model_name_or_path: Union[str, os.PathLike[str]],
829
384
  cache_dir: Optional[Union[str, os.PathLike[str]]] = None,
@@ -1054,11 +609,43 @@ class AutoTokenizer:
1054
609
 
1055
610
  return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
1056
611
 
612
+ if gguf_file:
613
+ gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **kwargs)
614
+ config_dict = load_gguf_checkpoint(gguf_path, return_tensors=False)["config"]
615
+ config = AutoConfig.for_model(**config_dict)
616
+ elif config is None:
617
+ try:
618
+ config = AutoConfig.from_pretrained(
619
+ pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
620
+ )
621
+ except Exception:
622
+ config = PreTrainedConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
623
+
624
+ config_model_type = config.model_type
625
+
1057
626
  # Next, let's try to use the tokenizer_config file to get the tokenizer class.
1058
627
  tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
628
+ tokenizer_config_class = tokenizer_config.get("tokenizer_class", None)
629
+ # if there is a config, we can check that the tokenizer class != than model class and can thus assume we need to use `TokenizersBackend`
630
+ if (
631
+ tokenizer_config_class is not None
632
+ and config_model_type is not None
633
+ and config_model_type != ""
634
+ and TOKENIZER_MAPPING_NAMES.get(config_model_type, "").replace("Fast", "")
635
+ != tokenizer_config_class.replace("Fast", "")
636
+ ):
637
+ # new model, but we ignore it unless the model type is the same
638
+ try:
639
+ return TokenizersBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
640
+ except Exception:
641
+ return tokenizer_class_from_name(tokenizer_config_class).from_pretrained(
642
+ pretrained_model_name_or_path, *inputs, **kwargs
643
+ )
644
+
1059
645
  if "_commit_hash" in tokenizer_config:
1060
646
  kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
1061
- config_tokenizer_class = tokenizer_config.get("tokenizer_class")
647
+
648
+ # Check for auto_map early to handle dynamic tokenizers properly
1062
649
  tokenizer_auto_map = None
1063
650
  if "auto_map" in tokenizer_config:
1064
651
  if isinstance(tokenizer_config["auto_map"], (tuple, list)):
@@ -1067,34 +654,15 @@ class AutoTokenizer:
1067
654
  else:
1068
655
  tokenizer_auto_map = tokenizer_config["auto_map"].get("AutoTokenizer", None)
1069
656
 
1070
- # If that did not work, let's try to use the config.
1071
- if config_tokenizer_class is None:
1072
- if not isinstance(config, PreTrainedConfig):
1073
- if gguf_file:
1074
- gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **kwargs)
1075
- config_dict = load_gguf_checkpoint(gguf_path, return_tensors=False)["config"]
1076
- config = AutoConfig.for_model(**config_dict)
1077
- else:
1078
- config = AutoConfig.from_pretrained(
1079
- pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
1080
- )
1081
- config_tokenizer_class = config.tokenizer_class
1082
- if hasattr(config, "auto_map") and "AutoTokenizer" in config.auto_map:
1083
- tokenizer_auto_map = config.auto_map["AutoTokenizer"]
1084
-
1085
- if (
1086
- config_tokenizer_class is not None
1087
- and config_tokenizer_class != "PreTrainedTokenizerFast"
1088
- and "Fast" in config_tokenizer_class
1089
- ):
1090
- config_tokenizer_class = config_tokenizer_class[:-4]
657
+ if tokenizer_config_class:
658
+ tokenizer_config_class = tokenizer_config_class.replace("Fast", "")
1091
659
 
1092
660
  has_remote_code = tokenizer_auto_map is not None
1093
661
  has_local_code = type(config) in TOKENIZER_MAPPING or (
1094
- config_tokenizer_class is not None
662
+ tokenizer_config_class is not None
1095
663
  and (
1096
- tokenizer_class_from_name(config_tokenizer_class) is not None
1097
- or tokenizer_class_from_name(config_tokenizer_class + "Fast") is not None
664
+ tokenizer_class_from_name(tokenizer_config_class) is not None
665
+ or tokenizer_class_from_name(tokenizer_config_class + "Fast") is not None
1098
666
  )
1099
667
  )
1100
668
  if has_remote_code:
@@ -1118,17 +686,24 @@ class AutoTokenizer:
1118
686
  return tokenizer_class.from_pretrained(
1119
687
  pretrained_model_name_or_path, *inputs, trust_remote_code=trust_remote_code, **kwargs
1120
688
  )
1121
- elif config_tokenizer_class is not None:
1122
- fast_tokenizer_class = None
1123
- if fast_tokenizer_class is None:
1124
- tokenizer_class_candidate = config_tokenizer_class
1125
- tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
1126
- if tokenizer_class is None and not tokenizer_class_candidate.endswith("Fast"):
1127
- tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate + "Fast")
1128
- else:
1129
- tokenizer_class = fast_tokenizer_class
689
+ elif tokenizer_config_class is not None:
690
+ tokenizer_class_candidate = tokenizer_config_class
691
+ tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
692
+ if tokenizer_class is None and not tokenizer_class_candidate.endswith("Fast"):
693
+ tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate + "Fast")
694
+ if tokenizer_class is not None and tokenizer_class.__name__ == "PythonBackend":
695
+ tokenizer_class = TokenizersBackend
696
+ # Fallback to TokenizersBackend if the class wasn't found
697
+ if tokenizer_class is None:
698
+ tokenizer_class = TokenizersBackend
1130
699
 
1131
- return _try_load_tokenizer_with_fallbacks(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs)
700
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
701
+ elif getattr(config, "tokenizer_class"):
702
+ _class = config.tokenizer_class
703
+ if "PreTrainedTokenizerFast" not in _class:
704
+ _class = _class.replace("Fast", "")
705
+ tokenizer_class = tokenizer_class_from_name(_class)
706
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
1132
707
 
1133
708
  # Otherwise we have to be creative.
1134
709
  # if model is an encoder decoder, the encoder tokenizer class is used by default
@@ -1142,19 +717,25 @@ class AutoTokenizer:
1142
717
  )
1143
718
  config = config.encoder
1144
719
 
1145
- model_type = config_class_to_model_type(type(config).__name__)
720
+ model_type = config_class_to_model_type(type(config).__name__) or config.get("model_type", None)
1146
721
  if model_type is not None:
1147
- tokenizer_class = TOKENIZER_MAPPING[type(config)]
1148
-
722
+ tokenizer_class = TOKENIZER_MAPPING.get(type(config), TokenizersBackend)
1149
723
  if tokenizer_class is not None:
1150
- return _try_load_tokenizer_with_fallbacks(
1151
- tokenizer_class, pretrained_model_name_or_path, inputs, kwargs
1152
- )
1153
- else:
1154
- raise ValueError(
1155
- "This tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed "
1156
- "in order to use this tokenizer."
1157
- )
724
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
725
+
726
+ # Fallback: try tokenizer_class from tokenizer_config.json
727
+ tokenizer_config_class = tokenizer_config.get("tokenizer_class", None)
728
+ if tokenizer_config_class is not None:
729
+ if tokenizer_config_class != "TokenizersBackend" and "Fast" in tokenizer_config_class:
730
+ tokenizer_config_class = tokenizer_config_class[:-4]
731
+ tokenizer_class = tokenizer_class_from_name(tokenizer_config_class)
732
+ if tokenizer_class is None and not tokenizer_config_class.endswith("Fast"):
733
+ tokenizer_class = tokenizer_class_from_name(tokenizer_config_class + "Fast")
734
+ if tokenizer_class is not None and tokenizer_class.__name__ == "PythonBackend":
735
+ tokenizer_class = TokenizersBackend
736
+ if tokenizer_class is None:
737
+ tokenizer_class = TokenizersBackend
738
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
1158
739
 
1159
740
  raise ValueError(
1160
741
  f"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\n"