transformers 5.0.0__py3-none-any.whl → 5.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1606) hide show
  1. transformers/__init__.py +36 -55
  2. transformers/activations.py +1 -1
  3. transformers/audio_utils.py +33 -32
  4. transformers/cache_utils.py +139 -32
  5. transformers/cli/chat.py +3 -3
  6. transformers/cli/serve.py +19 -49
  7. transformers/cli/transformers.py +1 -2
  8. transformers/configuration_utils.py +155 -129
  9. transformers/conversion_mapping.py +22 -158
  10. transformers/convert_slow_tokenizer.py +17 -227
  11. transformers/core_model_loading.py +185 -528
  12. transformers/data/data_collator.py +4 -12
  13. transformers/data/processors/glue.py +1 -0
  14. transformers/data/processors/utils.py +1 -0
  15. transformers/data/processors/xnli.py +1 -0
  16. transformers/dependency_versions_check.py +1 -0
  17. transformers/dependency_versions_table.py +7 -5
  18. transformers/distributed/configuration_utils.py +2 -1
  19. transformers/dynamic_module_utils.py +25 -24
  20. transformers/feature_extraction_sequence_utils.py +23 -19
  21. transformers/feature_extraction_utils.py +33 -64
  22. transformers/file_utils.py +1 -0
  23. transformers/generation/__init__.py +1 -11
  24. transformers/generation/candidate_generator.py +33 -80
  25. transformers/generation/configuration_utils.py +133 -189
  26. transformers/generation/continuous_batching/__init__.py +1 -4
  27. transformers/generation/continuous_batching/cache.py +25 -83
  28. transformers/generation/continuous_batching/cache_manager.py +45 -155
  29. transformers/generation/continuous_batching/continuous_api.py +147 -270
  30. transformers/generation/continuous_batching/requests.py +3 -51
  31. transformers/generation/continuous_batching/scheduler.py +105 -160
  32. transformers/generation/logits_process.py +128 -0
  33. transformers/generation/stopping_criteria.py +1 -1
  34. transformers/generation/streamers.py +1 -0
  35. transformers/generation/utils.py +123 -122
  36. transformers/generation/watermarking.py +6 -8
  37. transformers/hf_argparser.py +13 -9
  38. transformers/hyperparameter_search.py +2 -1
  39. transformers/image_processing_base.py +23 -12
  40. transformers/image_processing_utils.py +15 -11
  41. transformers/image_processing_utils_fast.py +75 -85
  42. transformers/image_transforms.py +42 -73
  43. transformers/image_utils.py +32 -30
  44. transformers/initialization.py +0 -37
  45. transformers/integrations/__init__.py +2 -16
  46. transformers/integrations/accelerate.py +113 -58
  47. transformers/integrations/aqlm.py +66 -36
  48. transformers/integrations/awq.py +516 -45
  49. transformers/integrations/bitnet.py +105 -47
  50. transformers/integrations/bitsandbytes.py +202 -91
  51. transformers/integrations/deepspeed.py +4 -161
  52. transformers/integrations/eetq.py +82 -84
  53. transformers/integrations/executorch.py +1 -1
  54. transformers/integrations/fbgemm_fp8.py +145 -190
  55. transformers/integrations/finegrained_fp8.py +215 -249
  56. transformers/integrations/flash_attention.py +3 -3
  57. transformers/integrations/flex_attention.py +1 -1
  58. transformers/integrations/fp_quant.py +0 -90
  59. transformers/integrations/ggml.py +2 -11
  60. transformers/integrations/higgs.py +62 -37
  61. transformers/integrations/hub_kernels.py +8 -65
  62. transformers/integrations/integration_utils.py +3 -47
  63. transformers/integrations/mistral.py +0 -12
  64. transformers/integrations/mxfp4.py +80 -33
  65. transformers/integrations/peft.py +191 -483
  66. transformers/integrations/quanto.py +56 -77
  67. transformers/integrations/spqr.py +90 -42
  68. transformers/integrations/tensor_parallel.py +221 -167
  69. transformers/integrations/torchao.py +43 -35
  70. transformers/integrations/vptq.py +59 -40
  71. transformers/kernels/__init__.py +0 -0
  72. transformers/{models/pe_audio_video/processing_pe_audio_video.py → kernels/falcon_mamba/__init__.py} +3 -12
  73. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +529 -0
  74. transformers/loss/loss_utils.py +0 -2
  75. transformers/masking_utils.py +55 -51
  76. transformers/model_debugging_utils.py +5 -4
  77. transformers/modelcard.py +194 -15
  78. transformers/modeling_attn_mask_utils.py +19 -19
  79. transformers/modeling_flash_attention_utils.py +27 -27
  80. transformers/modeling_gguf_pytorch_utils.py +24 -79
  81. transformers/modeling_layers.py +22 -21
  82. transformers/modeling_outputs.py +253 -242
  83. transformers/modeling_rope_utils.py +117 -138
  84. transformers/modeling_utils.py +739 -850
  85. transformers/models/__init__.py +0 -27
  86. transformers/models/afmoe/configuration_afmoe.py +33 -40
  87. transformers/models/afmoe/modeling_afmoe.py +54 -42
  88. transformers/models/afmoe/modular_afmoe.py +33 -23
  89. transformers/models/aimv2/configuration_aimv2.py +10 -2
  90. transformers/models/aimv2/modeling_aimv2.py +42 -47
  91. transformers/models/aimv2/modular_aimv2.py +19 -17
  92. transformers/models/albert/configuration_albert.py +2 -8
  93. transformers/models/albert/modeling_albert.py +69 -70
  94. transformers/models/albert/tokenization_albert.py +14 -5
  95. transformers/models/align/configuration_align.py +6 -8
  96. transformers/models/align/modeling_align.py +89 -94
  97. transformers/models/align/processing_align.py +30 -2
  98. transformers/models/altclip/configuration_altclip.py +7 -4
  99. transformers/models/altclip/modeling_altclip.py +103 -114
  100. transformers/models/altclip/processing_altclip.py +15 -2
  101. transformers/models/apertus/__init__.py +1 -0
  102. transformers/models/apertus/configuration_apertus.py +28 -23
  103. transformers/models/apertus/modeling_apertus.py +40 -39
  104. transformers/models/apertus/modular_apertus.py +38 -37
  105. transformers/models/arcee/configuration_arcee.py +30 -25
  106. transformers/models/arcee/modeling_arcee.py +39 -36
  107. transformers/models/arcee/modular_arcee.py +23 -20
  108. transformers/models/aria/configuration_aria.py +44 -31
  109. transformers/models/aria/image_processing_aria.py +27 -25
  110. transformers/models/aria/modeling_aria.py +106 -110
  111. transformers/models/aria/modular_aria.py +127 -118
  112. transformers/models/aria/processing_aria.py +35 -28
  113. transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +1 -0
  114. transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py +6 -3
  115. transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +8 -6
  116. transformers/models/audioflamingo3/__init__.py +1 -0
  117. transformers/models/audioflamingo3/configuration_audioflamingo3.py +1 -0
  118. transformers/models/audioflamingo3/modeling_audioflamingo3.py +49 -58
  119. transformers/models/audioflamingo3/modular_audioflamingo3.py +43 -53
  120. transformers/models/audioflamingo3/processing_audioflamingo3.py +30 -33
  121. transformers/models/auto/auto_factory.py +7 -6
  122. transformers/models/auto/configuration_auto.py +5 -66
  123. transformers/models/auto/feature_extraction_auto.py +10 -14
  124. transformers/models/auto/image_processing_auto.py +41 -32
  125. transformers/models/auto/modeling_auto.py +188 -46
  126. transformers/models/auto/processing_auto.py +11 -24
  127. transformers/models/auto/tokenization_auto.py +588 -171
  128. transformers/models/auto/video_processing_auto.py +10 -12
  129. transformers/models/autoformer/configuration_autoformer.py +7 -4
  130. transformers/models/autoformer/modeling_autoformer.py +101 -104
  131. transformers/models/aya_vision/configuration_aya_vision.py +1 -4
  132. transformers/models/aya_vision/modeling_aya_vision.py +102 -71
  133. transformers/models/aya_vision/modular_aya_vision.py +74 -46
  134. transformers/models/aya_vision/processing_aya_vision.py +53 -25
  135. transformers/models/bamba/configuration_bamba.py +39 -34
  136. transformers/models/bamba/modeling_bamba.py +86 -82
  137. transformers/models/bamba/modular_bamba.py +72 -70
  138. transformers/models/bark/configuration_bark.py +8 -6
  139. transformers/models/bark/generation_configuration_bark.py +5 -3
  140. transformers/models/bark/modeling_bark.py +57 -54
  141. transformers/models/bark/processing_bark.py +41 -19
  142. transformers/models/bart/configuration_bart.py +6 -9
  143. transformers/models/bart/modeling_bart.py +126 -135
  144. transformers/models/barthez/tokenization_barthez.py +11 -3
  145. transformers/models/bartpho/tokenization_bartpho.py +7 -6
  146. transformers/models/beit/configuration_beit.py +11 -0
  147. transformers/models/beit/image_processing_beit.py +56 -53
  148. transformers/models/beit/image_processing_beit_fast.py +12 -10
  149. transformers/models/beit/modeling_beit.py +60 -69
  150. transformers/models/bert/configuration_bert.py +2 -12
  151. transformers/models/bert/modeling_bert.py +122 -114
  152. transformers/models/bert/tokenization_bert.py +23 -8
  153. transformers/models/bert/tokenization_bert_legacy.py +5 -3
  154. transformers/models/bert_generation/configuration_bert_generation.py +2 -17
  155. transformers/models/bert_generation/modeling_bert_generation.py +49 -49
  156. transformers/models/bert_generation/tokenization_bert_generation.py +3 -2
  157. transformers/models/bert_japanese/tokenization_bert_japanese.py +6 -5
  158. transformers/models/bertweet/tokenization_bertweet.py +3 -1
  159. transformers/models/big_bird/configuration_big_bird.py +9 -12
  160. transformers/models/big_bird/modeling_big_bird.py +109 -116
  161. transformers/models/big_bird/tokenization_big_bird.py +43 -16
  162. transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +9 -9
  163. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +117 -130
  164. transformers/models/biogpt/configuration_biogpt.py +2 -8
  165. transformers/models/biogpt/modeling_biogpt.py +76 -72
  166. transformers/models/biogpt/modular_biogpt.py +66 -62
  167. transformers/models/biogpt/tokenization_biogpt.py +5 -3
  168. transformers/models/bit/configuration_bit.py +1 -0
  169. transformers/models/bit/image_processing_bit.py +24 -21
  170. transformers/models/bit/image_processing_bit_fast.py +1 -0
  171. transformers/models/bit/modeling_bit.py +12 -25
  172. transformers/models/bitnet/configuration_bitnet.py +28 -23
  173. transformers/models/bitnet/modeling_bitnet.py +39 -36
  174. transformers/models/bitnet/modular_bitnet.py +6 -4
  175. transformers/models/blenderbot/configuration_blenderbot.py +5 -8
  176. transformers/models/blenderbot/modeling_blenderbot.py +96 -77
  177. transformers/models/blenderbot/tokenization_blenderbot.py +24 -18
  178. transformers/models/blenderbot_small/configuration_blenderbot_small.py +5 -8
  179. transformers/models/blenderbot_small/modeling_blenderbot_small.py +69 -79
  180. transformers/models/blenderbot_small/tokenization_blenderbot_small.py +3 -1
  181. transformers/models/blip/configuration_blip.py +10 -9
  182. transformers/models/blip/image_processing_blip.py +20 -17
  183. transformers/models/blip/image_processing_blip_fast.py +1 -0
  184. transformers/models/blip/modeling_blip.py +108 -117
  185. transformers/models/blip/modeling_blip_text.py +65 -73
  186. transformers/models/blip/processing_blip.py +36 -5
  187. transformers/models/blip_2/configuration_blip_2.py +2 -2
  188. transformers/models/blip_2/modeling_blip_2.py +118 -146
  189. transformers/models/blip_2/processing_blip_2.py +38 -8
  190. transformers/models/bloom/configuration_bloom.py +2 -5
  191. transformers/models/bloom/modeling_bloom.py +104 -77
  192. transformers/models/blt/configuration_blt.py +86 -94
  193. transformers/models/blt/modeling_blt.py +81 -238
  194. transformers/models/blt/modular_blt.py +65 -228
  195. transformers/models/bridgetower/configuration_bridgetower.py +2 -7
  196. transformers/models/bridgetower/image_processing_bridgetower.py +35 -34
  197. transformers/models/bridgetower/image_processing_bridgetower_fast.py +16 -13
  198. transformers/models/bridgetower/modeling_bridgetower.py +119 -141
  199. transformers/models/bridgetower/processing_bridgetower.py +16 -2
  200. transformers/models/bros/configuration_bros.py +18 -24
  201. transformers/models/bros/modeling_bros.py +80 -90
  202. transformers/models/bros/processing_bros.py +12 -2
  203. transformers/models/byt5/tokenization_byt5.py +6 -4
  204. transformers/models/camembert/configuration_camembert.py +2 -8
  205. transformers/models/camembert/modeling_camembert.py +195 -196
  206. transformers/models/camembert/modular_camembert.py +54 -51
  207. transformers/models/camembert/tokenization_camembert.py +13 -6
  208. transformers/models/canine/configuration_canine.py +2 -4
  209. transformers/models/canine/modeling_canine.py +75 -84
  210. transformers/models/canine/tokenization_canine.py +1 -2
  211. transformers/models/chameleon/configuration_chameleon.py +34 -29
  212. transformers/models/chameleon/image_processing_chameleon.py +24 -21
  213. transformers/models/chameleon/image_processing_chameleon_fast.py +6 -5
  214. transformers/models/chameleon/modeling_chameleon.py +93 -142
  215. transformers/models/chameleon/processing_chameleon.py +41 -16
  216. transformers/models/chinese_clip/configuration_chinese_clip.py +8 -10
  217. transformers/models/chinese_clip/image_processing_chinese_clip.py +24 -21
  218. transformers/models/chinese_clip/image_processing_chinese_clip_fast.py +1 -0
  219. transformers/models/chinese_clip/modeling_chinese_clip.py +92 -96
  220. transformers/models/chinese_clip/processing_chinese_clip.py +15 -2
  221. transformers/models/clap/configuration_clap.py +9 -4
  222. transformers/models/clap/feature_extraction_clap.py +12 -11
  223. transformers/models/clap/modeling_clap.py +123 -136
  224. transformers/models/clap/processing_clap.py +15 -2
  225. transformers/models/clip/configuration_clip.py +2 -4
  226. transformers/models/clip/image_processing_clip.py +24 -21
  227. transformers/models/clip/image_processing_clip_fast.py +1 -9
  228. transformers/models/clip/modeling_clip.py +65 -65
  229. transformers/models/clip/processing_clip.py +14 -2
  230. transformers/models/clip/tokenization_clip.py +46 -21
  231. transformers/models/clipseg/configuration_clipseg.py +2 -4
  232. transformers/models/clipseg/modeling_clipseg.py +109 -119
  233. transformers/models/clipseg/processing_clipseg.py +42 -19
  234. transformers/models/clvp/configuration_clvp.py +5 -15
  235. transformers/models/clvp/feature_extraction_clvp.py +10 -7
  236. transformers/models/clvp/modeling_clvp.py +146 -155
  237. transformers/models/clvp/number_normalizer.py +2 -1
  238. transformers/models/clvp/processing_clvp.py +20 -3
  239. transformers/models/clvp/tokenization_clvp.py +64 -1
  240. transformers/models/code_llama/tokenization_code_llama.py +44 -18
  241. transformers/models/codegen/configuration_codegen.py +4 -4
  242. transformers/models/codegen/modeling_codegen.py +53 -63
  243. transformers/models/codegen/tokenization_codegen.py +47 -17
  244. transformers/models/cohere/configuration_cohere.py +30 -25
  245. transformers/models/cohere/modeling_cohere.py +42 -40
  246. transformers/models/cohere/modular_cohere.py +29 -26
  247. transformers/models/cohere/tokenization_cohere.py +46 -15
  248. transformers/models/cohere2/configuration_cohere2.py +32 -31
  249. transformers/models/cohere2/modeling_cohere2.py +44 -42
  250. transformers/models/cohere2/modular_cohere2.py +54 -54
  251. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +14 -13
  252. transformers/models/cohere2_vision/modeling_cohere2_vision.py +58 -59
  253. transformers/models/cohere2_vision/modular_cohere2_vision.py +46 -45
  254. transformers/models/cohere2_vision/processing_cohere2_vision.py +36 -6
  255. transformers/models/colpali/configuration_colpali.py +1 -0
  256. transformers/models/colpali/modeling_colpali.py +16 -14
  257. transformers/models/colpali/modular_colpali.py +51 -11
  258. transformers/models/colpali/processing_colpali.py +52 -14
  259. transformers/models/colqwen2/modeling_colqwen2.py +28 -28
  260. transformers/models/colqwen2/modular_colqwen2.py +74 -37
  261. transformers/models/colqwen2/processing_colqwen2.py +52 -16
  262. transformers/models/conditional_detr/configuration_conditional_detr.py +2 -1
  263. transformers/models/conditional_detr/image_processing_conditional_detr.py +70 -67
  264. transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +36 -36
  265. transformers/models/conditional_detr/modeling_conditional_detr.py +87 -99
  266. transformers/models/conditional_detr/modular_conditional_detr.py +3 -49
  267. transformers/models/convbert/configuration_convbert.py +8 -11
  268. transformers/models/convbert/modeling_convbert.py +87 -94
  269. transformers/models/convbert/tokenization_convbert.py +1 -0
  270. transformers/models/convnext/configuration_convnext.py +1 -0
  271. transformers/models/convnext/image_processing_convnext.py +23 -20
  272. transformers/models/convnext/image_processing_convnext_fast.py +21 -16
  273. transformers/models/convnext/modeling_convnext.py +12 -9
  274. transformers/models/convnextv2/configuration_convnextv2.py +1 -0
  275. transformers/models/convnextv2/modeling_convnextv2.py +12 -9
  276. transformers/models/cpm/tokenization_cpm.py +7 -6
  277. transformers/models/cpm/tokenization_cpm_fast.py +5 -3
  278. transformers/models/cpmant/configuration_cpmant.py +1 -4
  279. transformers/models/cpmant/modeling_cpmant.py +40 -38
  280. transformers/models/cpmant/tokenization_cpmant.py +3 -1
  281. transformers/models/csm/configuration_csm.py +66 -58
  282. transformers/models/csm/generation_csm.py +35 -31
  283. transformers/models/csm/modeling_csm.py +85 -85
  284. transformers/models/csm/modular_csm.py +58 -58
  285. transformers/models/csm/processing_csm.py +68 -25
  286. transformers/models/ctrl/configuration_ctrl.py +1 -16
  287. transformers/models/ctrl/modeling_ctrl.py +44 -54
  288. transformers/models/ctrl/tokenization_ctrl.py +1 -0
  289. transformers/models/cvt/configuration_cvt.py +1 -0
  290. transformers/models/cvt/modeling_cvt.py +16 -20
  291. transformers/models/cwm/__init__.py +1 -0
  292. transformers/models/cwm/configuration_cwm.py +12 -8
  293. transformers/models/cwm/modeling_cwm.py +39 -37
  294. transformers/models/cwm/modular_cwm.py +12 -10
  295. transformers/models/d_fine/configuration_d_fine.py +5 -7
  296. transformers/models/d_fine/modeling_d_fine.py +128 -138
  297. transformers/models/d_fine/modular_d_fine.py +18 -33
  298. transformers/models/dab_detr/configuration_dab_detr.py +3 -6
  299. transformers/models/dab_detr/modeling_dab_detr.py +75 -81
  300. transformers/models/dac/configuration_dac.py +1 -0
  301. transformers/models/dac/feature_extraction_dac.py +9 -6
  302. transformers/models/dac/modeling_dac.py +26 -24
  303. transformers/models/data2vec/configuration_data2vec_audio.py +2 -4
  304. transformers/models/data2vec/configuration_data2vec_text.py +3 -11
  305. transformers/models/data2vec/configuration_data2vec_vision.py +1 -0
  306. transformers/models/data2vec/modeling_data2vec_audio.py +56 -57
  307. transformers/models/data2vec/modeling_data2vec_text.py +93 -98
  308. transformers/models/data2vec/modeling_data2vec_vision.py +45 -49
  309. transformers/models/data2vec/modular_data2vec_audio.py +1 -6
  310. transformers/models/data2vec/modular_data2vec_text.py +54 -58
  311. transformers/models/dbrx/configuration_dbrx.py +22 -36
  312. transformers/models/dbrx/modeling_dbrx.py +45 -42
  313. transformers/models/dbrx/modular_dbrx.py +33 -31
  314. transformers/models/deberta/configuration_deberta.py +1 -6
  315. transformers/models/deberta/modeling_deberta.py +60 -64
  316. transformers/models/deberta/tokenization_deberta.py +21 -9
  317. transformers/models/deberta_v2/configuration_deberta_v2.py +1 -6
  318. transformers/models/deberta_v2/modeling_deberta_v2.py +65 -71
  319. transformers/models/deberta_v2/tokenization_deberta_v2.py +29 -11
  320. transformers/models/decision_transformer/configuration_decision_transformer.py +2 -3
  321. transformers/models/decision_transformer/modeling_decision_transformer.py +56 -60
  322. transformers/models/deepseek_v2/configuration_deepseek_v2.py +44 -39
  323. transformers/models/deepseek_v2/modeling_deepseek_v2.py +43 -43
  324. transformers/models/deepseek_v2/modular_deepseek_v2.py +49 -48
  325. transformers/models/deepseek_v3/configuration_deepseek_v3.py +45 -40
  326. transformers/models/deepseek_v3/modeling_deepseek_v3.py +42 -45
  327. transformers/models/deepseek_v3/modular_deepseek_v3.py +9 -14
  328. transformers/models/deepseek_vl/configuration_deepseek_vl.py +3 -2
  329. transformers/models/deepseek_vl/image_processing_deepseek_vl.py +26 -25
  330. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +10 -10
  331. transformers/models/deepseek_vl/modeling_deepseek_vl.py +48 -57
  332. transformers/models/deepseek_vl/modular_deepseek_vl.py +43 -14
  333. transformers/models/deepseek_vl/processing_deepseek_vl.py +41 -10
  334. transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +5 -3
  335. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +35 -35
  336. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +24 -20
  337. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +61 -109
  338. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +118 -146
  339. transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py +44 -12
  340. transformers/models/deformable_detr/configuration_deformable_detr.py +3 -2
  341. transformers/models/deformable_detr/image_processing_deformable_detr.py +61 -59
  342. transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +28 -28
  343. transformers/models/deformable_detr/modeling_deformable_detr.py +82 -88
  344. transformers/models/deformable_detr/modular_deformable_detr.py +3 -1
  345. transformers/models/deit/configuration_deit.py +1 -0
  346. transformers/models/deit/image_processing_deit.py +21 -18
  347. transformers/models/deit/image_processing_deit_fast.py +1 -0
  348. transformers/models/deit/modeling_deit.py +22 -24
  349. transformers/models/depth_anything/configuration_depth_anything.py +4 -2
  350. transformers/models/depth_anything/modeling_depth_anything.py +10 -10
  351. transformers/models/depth_pro/configuration_depth_pro.py +1 -0
  352. transformers/models/depth_pro/image_processing_depth_pro.py +23 -22
  353. transformers/models/depth_pro/image_processing_depth_pro_fast.py +10 -8
  354. transformers/models/depth_pro/modeling_depth_pro.py +27 -31
  355. transformers/models/detr/configuration_detr.py +2 -1
  356. transformers/models/detr/image_processing_detr.py +66 -64
  357. transformers/models/detr/image_processing_detr_fast.py +34 -33
  358. transformers/models/detr/modeling_detr.py +79 -95
  359. transformers/models/dia/configuration_dia.py +15 -9
  360. transformers/models/dia/feature_extraction_dia.py +9 -6
  361. transformers/models/dia/generation_dia.py +50 -48
  362. transformers/models/dia/modeling_dia.py +69 -78
  363. transformers/models/dia/modular_dia.py +56 -64
  364. transformers/models/dia/processing_dia.py +29 -39
  365. transformers/models/dia/tokenization_dia.py +6 -3
  366. transformers/models/diffllama/configuration_diffllama.py +30 -25
  367. transformers/models/diffllama/modeling_diffllama.py +49 -46
  368. transformers/models/diffllama/modular_diffllama.py +19 -17
  369. transformers/models/dinat/configuration_dinat.py +1 -0
  370. transformers/models/dinat/modeling_dinat.py +44 -47
  371. transformers/models/dinov2/configuration_dinov2.py +1 -0
  372. transformers/models/dinov2/modeling_dinov2.py +15 -15
  373. transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +1 -1
  374. transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +15 -16
  375. transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +9 -9
  376. transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +7 -4
  377. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +6 -3
  378. transformers/models/dinov3_vit/configuration_dinov3_vit.py +8 -5
  379. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +9 -7
  380. transformers/models/dinov3_vit/modeling_dinov3_vit.py +18 -19
  381. transformers/models/dinov3_vit/modular_dinov3_vit.py +15 -16
  382. transformers/models/distilbert/configuration_distilbert.py +2 -8
  383. transformers/models/distilbert/modeling_distilbert.py +55 -55
  384. transformers/models/distilbert/tokenization_distilbert.py +1 -13
  385. transformers/models/doge/__init__.py +1 -0
  386. transformers/models/doge/configuration_doge.py +32 -39
  387. transformers/models/doge/modeling_doge.py +49 -45
  388. transformers/models/doge/modular_doge.py +63 -71
  389. transformers/models/donut/configuration_donut_swin.py +1 -0
  390. transformers/models/donut/image_processing_donut.py +29 -26
  391. transformers/models/donut/image_processing_donut_fast.py +15 -9
  392. transformers/models/donut/modeling_donut_swin.py +58 -62
  393. transformers/models/donut/processing_donut.py +26 -5
  394. transformers/models/dots1/configuration_dots1.py +33 -41
  395. transformers/models/dots1/modeling_dots1.py +45 -54
  396. transformers/models/dots1/modular_dots1.py +4 -5
  397. transformers/models/dpr/configuration_dpr.py +2 -19
  398. transformers/models/dpr/modeling_dpr.py +39 -42
  399. transformers/models/dpr/tokenization_dpr.py +9 -19
  400. transformers/models/dpr/tokenization_dpr_fast.py +9 -7
  401. transformers/models/dpt/configuration_dpt.py +2 -1
  402. transformers/models/dpt/image_processing_dpt.py +66 -65
  403. transformers/models/dpt/image_processing_dpt_fast.py +20 -18
  404. transformers/models/dpt/modeling_dpt.py +30 -32
  405. transformers/models/dpt/modular_dpt.py +17 -15
  406. transformers/models/edgetam/configuration_edgetam.py +3 -2
  407. transformers/models/edgetam/modeling_edgetam.py +86 -86
  408. transformers/models/edgetam/modular_edgetam.py +26 -21
  409. transformers/models/edgetam_video/__init__.py +1 -0
  410. transformers/models/edgetam_video/configuration_edgetam_video.py +1 -0
  411. transformers/models/edgetam_video/modeling_edgetam_video.py +158 -169
  412. transformers/models/edgetam_video/modular_edgetam_video.py +37 -30
  413. transformers/models/efficientloftr/configuration_efficientloftr.py +5 -4
  414. transformers/models/efficientloftr/image_processing_efficientloftr.py +16 -14
  415. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +9 -9
  416. transformers/models/efficientloftr/modeling_efficientloftr.py +38 -59
  417. transformers/models/efficientloftr/modular_efficientloftr.py +3 -1
  418. transformers/models/efficientnet/configuration_efficientnet.py +1 -0
  419. transformers/models/efficientnet/image_processing_efficientnet.py +32 -28
  420. transformers/models/efficientnet/image_processing_efficientnet_fast.py +19 -17
  421. transformers/models/efficientnet/modeling_efficientnet.py +15 -19
  422. transformers/models/electra/configuration_electra.py +3 -13
  423. transformers/models/electra/modeling_electra.py +103 -108
  424. transformers/models/emu3/configuration_emu3.py +17 -13
  425. transformers/models/emu3/image_processing_emu3.py +39 -44
  426. transformers/models/emu3/modeling_emu3.py +108 -148
  427. transformers/models/emu3/modular_emu3.py +73 -115
  428. transformers/models/emu3/processing_emu3.py +43 -18
  429. transformers/models/encodec/configuration_encodec.py +4 -2
  430. transformers/models/encodec/feature_extraction_encodec.py +13 -10
  431. transformers/models/encodec/modeling_encodec.py +29 -39
  432. transformers/models/encoder_decoder/configuration_encoder_decoder.py +2 -12
  433. transformers/models/encoder_decoder/modeling_encoder_decoder.py +43 -37
  434. transformers/models/eomt/configuration_eomt.py +1 -0
  435. transformers/models/eomt/image_processing_eomt.py +56 -66
  436. transformers/models/eomt/image_processing_eomt_fast.py +33 -76
  437. transformers/models/eomt/modeling_eomt.py +18 -23
  438. transformers/models/eomt/modular_eomt.py +13 -18
  439. transformers/models/ernie/configuration_ernie.py +3 -24
  440. transformers/models/ernie/modeling_ernie.py +132 -127
  441. transformers/models/ernie/modular_ernie.py +103 -97
  442. transformers/models/ernie4_5/configuration_ernie4_5.py +27 -23
  443. transformers/models/ernie4_5/modeling_ernie4_5.py +38 -36
  444. transformers/models/ernie4_5/modular_ernie4_5.py +4 -3
  445. transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +36 -32
  446. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +55 -56
  447. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +46 -18
  448. transformers/models/esm/configuration_esm.py +15 -11
  449. transformers/models/esm/modeling_esm.py +34 -38
  450. transformers/models/esm/modeling_esmfold.py +49 -53
  451. transformers/models/esm/openfold_utils/chunk_utils.py +6 -6
  452. transformers/models/esm/openfold_utils/loss.py +2 -1
  453. transformers/models/esm/openfold_utils/protein.py +16 -15
  454. transformers/models/esm/openfold_utils/tensor_utils.py +6 -6
  455. transformers/models/esm/tokenization_esm.py +4 -2
  456. transformers/models/evolla/configuration_evolla.py +40 -50
  457. transformers/models/evolla/modeling_evolla.py +66 -71
  458. transformers/models/evolla/modular_evolla.py +47 -53
  459. transformers/models/evolla/processing_evolla.py +35 -23
  460. transformers/models/exaone4/configuration_exaone4.py +25 -23
  461. transformers/models/exaone4/modeling_exaone4.py +38 -35
  462. transformers/models/exaone4/modular_exaone4.py +46 -44
  463. transformers/models/falcon/configuration_falcon.py +26 -31
  464. transformers/models/falcon/modeling_falcon.py +80 -82
  465. transformers/models/falcon_h1/configuration_falcon_h1.py +51 -45
  466. transformers/models/falcon_h1/modeling_falcon_h1.py +82 -85
  467. transformers/models/falcon_h1/modular_falcon_h1.py +51 -56
  468. transformers/models/falcon_mamba/configuration_falcon_mamba.py +2 -1
  469. transformers/models/falcon_mamba/modeling_falcon_mamba.py +82 -75
  470. transformers/models/falcon_mamba/modular_falcon_mamba.py +45 -28
  471. transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +6 -2
  472. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +60 -76
  473. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +3 -2
  474. transformers/models/flaubert/configuration_flaubert.py +5 -10
  475. transformers/models/flaubert/modeling_flaubert.py +143 -145
  476. transformers/models/flaubert/tokenization_flaubert.py +5 -3
  477. transformers/models/flava/configuration_flava.py +6 -5
  478. transformers/models/flava/image_processing_flava.py +67 -66
  479. transformers/models/flava/image_processing_flava_fast.py +49 -46
  480. transformers/models/flava/modeling_flava.py +136 -153
  481. transformers/models/flava/processing_flava.py +12 -2
  482. transformers/models/flex_olmo/__init__.py +1 -0
  483. transformers/models/flex_olmo/configuration_flex_olmo.py +32 -28
  484. transformers/models/flex_olmo/modeling_flex_olmo.py +47 -47
  485. transformers/models/flex_olmo/modular_flex_olmo.py +44 -40
  486. transformers/models/florence2/configuration_florence2.py +1 -0
  487. transformers/models/florence2/modeling_florence2.py +69 -111
  488. transformers/models/florence2/modular_florence2.py +101 -104
  489. transformers/models/florence2/processing_florence2.py +47 -18
  490. transformers/models/fnet/configuration_fnet.py +2 -6
  491. transformers/models/fnet/modeling_fnet.py +80 -83
  492. transformers/models/fnet/tokenization_fnet.py +1 -0
  493. transformers/models/focalnet/configuration_focalnet.py +1 -0
  494. transformers/models/focalnet/modeling_focalnet.py +45 -51
  495. transformers/models/fsmt/configuration_fsmt.py +17 -12
  496. transformers/models/fsmt/modeling_fsmt.py +48 -49
  497. transformers/models/fsmt/tokenization_fsmt.py +5 -3
  498. transformers/models/funnel/configuration_funnel.py +1 -8
  499. transformers/models/funnel/modeling_funnel.py +93 -99
  500. transformers/models/funnel/tokenization_funnel.py +27 -17
  501. transformers/models/fuyu/configuration_fuyu.py +34 -28
  502. transformers/models/fuyu/image_processing_fuyu.py +31 -29
  503. transformers/models/fuyu/image_processing_fuyu_fast.py +17 -17
  504. transformers/models/fuyu/modeling_fuyu.py +53 -53
  505. transformers/models/fuyu/processing_fuyu.py +34 -23
  506. transformers/models/gemma/configuration_gemma.py +30 -25
  507. transformers/models/gemma/modeling_gemma.py +50 -46
  508. transformers/models/gemma/modular_gemma.py +47 -42
  509. transformers/models/gemma/tokenization_gemma.py +30 -10
  510. transformers/models/gemma2/configuration_gemma2.py +35 -30
  511. transformers/models/gemma2/modeling_gemma2.py +42 -39
  512. transformers/models/gemma2/modular_gemma2.py +66 -63
  513. transformers/models/gemma3/configuration_gemma3.py +44 -44
  514. transformers/models/gemma3/image_processing_gemma3.py +31 -29
  515. transformers/models/gemma3/image_processing_gemma3_fast.py +13 -11
  516. transformers/models/gemma3/modeling_gemma3.py +207 -159
  517. transformers/models/gemma3/modular_gemma3.py +204 -153
  518. transformers/models/gemma3/processing_gemma3.py +5 -5
  519. transformers/models/gemma3n/configuration_gemma3n.py +26 -36
  520. transformers/models/gemma3n/feature_extraction_gemma3n.py +11 -9
  521. transformers/models/gemma3n/modeling_gemma3n.py +356 -222
  522. transformers/models/gemma3n/modular_gemma3n.py +207 -230
  523. transformers/models/gemma3n/processing_gemma3n.py +26 -12
  524. transformers/models/git/configuration_git.py +8 -5
  525. transformers/models/git/modeling_git.py +204 -266
  526. transformers/models/git/processing_git.py +14 -2
  527. transformers/models/glm/configuration_glm.py +28 -24
  528. transformers/models/glm/modeling_glm.py +40 -37
  529. transformers/models/glm/modular_glm.py +7 -4
  530. transformers/models/glm4/configuration_glm4.py +28 -24
  531. transformers/models/glm4/modeling_glm4.py +42 -40
  532. transformers/models/glm4/modular_glm4.py +10 -8
  533. transformers/models/glm46v/configuration_glm46v.py +1 -0
  534. transformers/models/glm46v/image_processing_glm46v.py +40 -35
  535. transformers/models/glm46v/image_processing_glm46v_fast.py +9 -9
  536. transformers/models/glm46v/modeling_glm46v.py +90 -137
  537. transformers/models/glm46v/modular_glm46v.py +3 -4
  538. transformers/models/glm46v/processing_glm46v.py +41 -7
  539. transformers/models/glm46v/video_processing_glm46v.py +11 -9
  540. transformers/models/glm4_moe/configuration_glm4_moe.py +32 -40
  541. transformers/models/glm4_moe/modeling_glm4_moe.py +42 -45
  542. transformers/models/glm4_moe/modular_glm4_moe.py +34 -42
  543. transformers/models/glm4v/configuration_glm4v.py +20 -18
  544. transformers/models/glm4v/image_processing_glm4v.py +40 -34
  545. transformers/models/glm4v/image_processing_glm4v_fast.py +9 -8
  546. transformers/models/glm4v/modeling_glm4v.py +205 -254
  547. transformers/models/glm4v/modular_glm4v.py +224 -210
  548. transformers/models/glm4v/processing_glm4v.py +41 -7
  549. transformers/models/glm4v/video_processing_glm4v.py +11 -9
  550. transformers/models/glm4v_moe/configuration_glm4v_moe.py +125 -136
  551. transformers/models/glm4v_moe/modeling_glm4v_moe.py +368 -377
  552. transformers/models/glm4v_moe/modular_glm4v_moe.py +169 -83
  553. transformers/models/glpn/configuration_glpn.py +1 -0
  554. transformers/models/glpn/image_processing_glpn.py +12 -11
  555. transformers/models/glpn/image_processing_glpn_fast.py +13 -11
  556. transformers/models/glpn/modeling_glpn.py +14 -16
  557. transformers/models/got_ocr2/configuration_got_ocr2.py +12 -4
  558. transformers/models/got_ocr2/image_processing_got_ocr2.py +24 -22
  559. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +11 -9
  560. transformers/models/got_ocr2/modeling_got_ocr2.py +80 -77
  561. transformers/models/got_ocr2/modular_got_ocr2.py +51 -54
  562. transformers/models/got_ocr2/processing_got_ocr2.py +63 -42
  563. transformers/models/gpt2/configuration_gpt2.py +2 -13
  564. transformers/models/gpt2/modeling_gpt2.py +115 -120
  565. transformers/models/gpt2/tokenization_gpt2.py +46 -15
  566. transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +2 -5
  567. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +89 -79
  568. transformers/models/gpt_neo/configuration_gpt_neo.py +2 -9
  569. transformers/models/gpt_neo/modeling_gpt_neo.py +67 -83
  570. transformers/models/gpt_neox/configuration_gpt_neox.py +25 -25
  571. transformers/models/gpt_neox/modeling_gpt_neox.py +75 -76
  572. transformers/models/gpt_neox/modular_gpt_neox.py +66 -67
  573. transformers/models/gpt_neox/tokenization_gpt_neox.py +51 -9
  574. transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +19 -24
  575. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +47 -46
  576. transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +3 -1
  577. transformers/models/gpt_oss/configuration_gpt_oss.py +28 -46
  578. transformers/models/gpt_oss/modeling_gpt_oss.py +121 -83
  579. transformers/models/gpt_oss/modular_gpt_oss.py +103 -64
  580. transformers/models/gpt_sw3/tokenization_gpt_sw3.py +4 -4
  581. transformers/models/gptj/configuration_gptj.py +4 -4
  582. transformers/models/gptj/modeling_gptj.py +87 -101
  583. transformers/models/granite/configuration_granite.py +33 -28
  584. transformers/models/granite/modeling_granite.py +46 -44
  585. transformers/models/granite/modular_granite.py +31 -29
  586. transformers/models/granite_speech/configuration_granite_speech.py +1 -0
  587. transformers/models/granite_speech/feature_extraction_granite_speech.py +3 -1
  588. transformers/models/granite_speech/modeling_granite_speech.py +52 -82
  589. transformers/models/granite_speech/processing_granite_speech.py +4 -11
  590. transformers/models/granitemoe/configuration_granitemoe.py +36 -31
  591. transformers/models/granitemoe/modeling_granitemoe.py +46 -41
  592. transformers/models/granitemoe/modular_granitemoe.py +27 -22
  593. transformers/models/granitemoehybrid/__init__.py +1 -0
  594. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +47 -46
  595. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +93 -97
  596. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +21 -54
  597. transformers/models/granitemoeshared/configuration_granitemoeshared.py +37 -33
  598. transformers/models/granitemoeshared/modeling_granitemoeshared.py +61 -54
  599. transformers/models/granitemoeshared/modular_granitemoeshared.py +21 -19
  600. transformers/models/grounding_dino/configuration_grounding_dino.py +4 -6
  601. transformers/models/grounding_dino/image_processing_grounding_dino.py +62 -60
  602. transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +29 -28
  603. transformers/models/grounding_dino/modeling_grounding_dino.py +140 -155
  604. transformers/models/grounding_dino/modular_grounding_dino.py +3 -2
  605. transformers/models/grounding_dino/processing_grounding_dino.py +38 -10
  606. transformers/models/groupvit/configuration_groupvit.py +2 -4
  607. transformers/models/groupvit/modeling_groupvit.py +93 -107
  608. transformers/models/helium/configuration_helium.py +29 -25
  609. transformers/models/helium/modeling_helium.py +40 -38
  610. transformers/models/helium/modular_helium.py +7 -3
  611. transformers/models/herbert/tokenization_herbert.py +28 -10
  612. transformers/models/hgnet_v2/configuration_hgnet_v2.py +1 -0
  613. transformers/models/hgnet_v2/modeling_hgnet_v2.py +10 -24
  614. transformers/models/hgnet_v2/modular_hgnet_v2.py +10 -24
  615. transformers/models/hiera/configuration_hiera.py +1 -0
  616. transformers/models/hiera/modeling_hiera.py +66 -72
  617. transformers/models/hubert/configuration_hubert.py +2 -4
  618. transformers/models/hubert/modeling_hubert.py +37 -42
  619. transformers/models/hubert/modular_hubert.py +11 -13
  620. transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +31 -26
  621. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +38 -35
  622. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +6 -4
  623. transformers/models/hunyuan_v1_moe/__init__.py +1 -1
  624. transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +36 -31
  625. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +42 -47
  626. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +9 -9
  627. transformers/models/ibert/configuration_ibert.py +2 -4
  628. transformers/models/ibert/modeling_ibert.py +62 -82
  629. transformers/models/ibert/quant_modules.py +1 -0
  630. transformers/models/idefics/configuration_idefics.py +8 -5
  631. transformers/models/idefics/image_processing_idefics.py +15 -13
  632. transformers/models/idefics/modeling_idefics.py +82 -75
  633. transformers/models/idefics/perceiver.py +3 -1
  634. transformers/models/idefics/processing_idefics.py +48 -32
  635. transformers/models/idefics/vision.py +25 -24
  636. transformers/models/idefics2/configuration_idefics2.py +3 -1
  637. transformers/models/idefics2/image_processing_idefics2.py +32 -31
  638. transformers/models/idefics2/image_processing_idefics2_fast.py +8 -8
  639. transformers/models/idefics2/modeling_idefics2.py +101 -127
  640. transformers/models/idefics2/processing_idefics2.py +68 -10
  641. transformers/models/idefics3/configuration_idefics3.py +4 -1
  642. transformers/models/idefics3/image_processing_idefics3.py +43 -42
  643. transformers/models/idefics3/image_processing_idefics3_fast.py +15 -40
  644. transformers/models/idefics3/modeling_idefics3.py +90 -115
  645. transformers/models/idefics3/processing_idefics3.py +69 -15
  646. transformers/models/ijepa/configuration_ijepa.py +1 -0
  647. transformers/models/ijepa/modeling_ijepa.py +11 -10
  648. transformers/models/ijepa/modular_ijepa.py +7 -5
  649. transformers/models/imagegpt/configuration_imagegpt.py +2 -9
  650. transformers/models/imagegpt/image_processing_imagegpt.py +18 -17
  651. transformers/models/imagegpt/image_processing_imagegpt_fast.py +16 -11
  652. transformers/models/imagegpt/modeling_imagegpt.py +65 -76
  653. transformers/models/informer/configuration_informer.py +9 -6
  654. transformers/models/informer/modeling_informer.py +86 -88
  655. transformers/models/informer/modular_informer.py +16 -14
  656. transformers/models/instructblip/configuration_instructblip.py +2 -2
  657. transformers/models/instructblip/modeling_instructblip.py +63 -103
  658. transformers/models/instructblip/processing_instructblip.py +36 -10
  659. transformers/models/instructblipvideo/configuration_instructblipvideo.py +2 -2
  660. transformers/models/instructblipvideo/modeling_instructblipvideo.py +139 -157
  661. transformers/models/instructblipvideo/modular_instructblipvideo.py +64 -73
  662. transformers/models/instructblipvideo/processing_instructblipvideo.py +33 -14
  663. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +8 -6
  664. transformers/models/internvl/configuration_internvl.py +1 -0
  665. transformers/models/internvl/modeling_internvl.py +106 -85
  666. transformers/models/internvl/modular_internvl.py +67 -47
  667. transformers/models/internvl/processing_internvl.py +45 -12
  668. transformers/models/internvl/video_processing_internvl.py +12 -10
  669. transformers/models/jamba/configuration_jamba.py +8 -5
  670. transformers/models/jamba/modeling_jamba.py +66 -68
  671. transformers/models/jamba/modular_jamba.py +55 -54
  672. transformers/models/janus/configuration_janus.py +1 -0
  673. transformers/models/janus/image_processing_janus.py +37 -35
  674. transformers/models/janus/image_processing_janus_fast.py +20 -18
  675. transformers/models/janus/modeling_janus.py +191 -115
  676. transformers/models/janus/modular_janus.py +84 -133
  677. transformers/models/janus/processing_janus.py +43 -17
  678. transformers/models/jetmoe/configuration_jetmoe.py +26 -24
  679. transformers/models/jetmoe/modeling_jetmoe.py +46 -43
  680. transformers/models/jetmoe/modular_jetmoe.py +33 -31
  681. transformers/models/kosmos2/configuration_kosmos2.py +9 -10
  682. transformers/models/kosmos2/modeling_kosmos2.py +173 -208
  683. transformers/models/kosmos2/processing_kosmos2.py +55 -40
  684. transformers/models/kosmos2_5/__init__.py +1 -0
  685. transformers/models/kosmos2_5/configuration_kosmos2_5.py +9 -8
  686. transformers/models/kosmos2_5/image_processing_kosmos2_5.py +12 -10
  687. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +13 -4
  688. transformers/models/kosmos2_5/modeling_kosmos2_5.py +118 -132
  689. transformers/models/kosmos2_5/processing_kosmos2_5.py +29 -8
  690. transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +28 -31
  691. transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py +14 -12
  692. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +100 -110
  693. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +22 -28
  694. transformers/models/kyutai_speech_to_text/processing_kyutai_speech_to_text.py +8 -2
  695. transformers/models/layoutlm/configuration_layoutlm.py +2 -14
  696. transformers/models/layoutlm/modeling_layoutlm.py +72 -77
  697. transformers/models/layoutlmv2/configuration_layoutlmv2.py +17 -14
  698. transformers/models/layoutlmv2/image_processing_layoutlmv2.py +21 -18
  699. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +9 -7
  700. transformers/models/layoutlmv2/modeling_layoutlmv2.py +50 -64
  701. transformers/models/layoutlmv2/processing_layoutlmv2.py +44 -14
  702. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +126 -73
  703. transformers/models/layoutlmv3/configuration_layoutlmv3.py +19 -16
  704. transformers/models/layoutlmv3/image_processing_layoutlmv3.py +26 -24
  705. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +11 -9
  706. transformers/models/layoutlmv3/modeling_layoutlmv3.py +56 -82
  707. transformers/models/layoutlmv3/processing_layoutlmv3.py +46 -14
  708. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +134 -74
  709. transformers/models/layoutxlm/configuration_layoutxlm.py +17 -14
  710. transformers/models/layoutxlm/modular_layoutxlm.py +1 -0
  711. transformers/models/layoutxlm/processing_layoutxlm.py +44 -14
  712. transformers/models/layoutxlm/tokenization_layoutxlm.py +113 -77
  713. transformers/models/led/configuration_led.py +12 -8
  714. transformers/models/led/modeling_led.py +266 -124
  715. transformers/models/levit/configuration_levit.py +1 -0
  716. transformers/models/levit/image_processing_levit.py +21 -19
  717. transformers/models/levit/image_processing_levit_fast.py +5 -4
  718. transformers/models/levit/modeling_levit.py +19 -38
  719. transformers/models/lfm2/configuration_lfm2.py +30 -27
  720. transformers/models/lfm2/modeling_lfm2.py +50 -47
  721. transformers/models/lfm2/modular_lfm2.py +30 -29
  722. transformers/models/lfm2_moe/__init__.py +1 -0
  723. transformers/models/lfm2_moe/configuration_lfm2_moe.py +9 -6
  724. transformers/models/lfm2_moe/modeling_lfm2_moe.py +53 -61
  725. transformers/models/lfm2_moe/modular_lfm2_moe.py +37 -13
  726. transformers/models/lfm2_vl/configuration_lfm2_vl.py +1 -4
  727. transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +12 -41
  728. transformers/models/lfm2_vl/modeling_lfm2_vl.py +66 -84
  729. transformers/models/lfm2_vl/modular_lfm2_vl.py +56 -70
  730. transformers/models/lfm2_vl/processing_lfm2_vl.py +76 -96
  731. transformers/models/lightglue/image_processing_lightglue.py +15 -16
  732. transformers/models/lightglue/image_processing_lightglue_fast.py +9 -9
  733. transformers/models/lightglue/modeling_lightglue.py +31 -31
  734. transformers/models/lightglue/modular_lightglue.py +28 -29
  735. transformers/models/lilt/configuration_lilt.py +2 -6
  736. transformers/models/lilt/modeling_lilt.py +70 -76
  737. transformers/models/llama/configuration_llama.py +31 -26
  738. transformers/models/llama/modeling_llama.py +39 -36
  739. transformers/models/llama/tokenization_llama.py +44 -14
  740. transformers/models/llama4/configuration_llama4.py +30 -27
  741. transformers/models/llama4/image_processing_llama4_fast.py +14 -12
  742. transformers/models/llama4/modeling_llama4.py +113 -120
  743. transformers/models/llama4/processing_llama4.py +57 -33
  744. transformers/models/llava/configuration_llava.py +1 -10
  745. transformers/models/llava/image_processing_llava.py +28 -25
  746. transformers/models/llava/image_processing_llava_fast.py +11 -9
  747. transformers/models/llava/modeling_llava.py +109 -85
  748. transformers/models/llava/processing_llava.py +51 -18
  749. transformers/models/llava_next/configuration_llava_next.py +2 -2
  750. transformers/models/llava_next/image_processing_llava_next.py +45 -43
  751. transformers/models/llava_next/image_processing_llava_next_fast.py +13 -11
  752. transformers/models/llava_next/modeling_llava_next.py +107 -110
  753. transformers/models/llava_next/processing_llava_next.py +47 -18
  754. transformers/models/llava_next_video/configuration_llava_next_video.py +7 -4
  755. transformers/models/llava_next_video/modeling_llava_next_video.py +158 -175
  756. transformers/models/llava_next_video/modular_llava_next_video.py +150 -155
  757. transformers/models/llava_next_video/processing_llava_next_video.py +63 -21
  758. transformers/models/llava_next_video/video_processing_llava_next_video.py +1 -0
  759. transformers/models/llava_onevision/configuration_llava_onevision.py +7 -4
  760. transformers/models/llava_onevision/image_processing_llava_onevision.py +42 -40
  761. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +15 -14
  762. transformers/models/llava_onevision/modeling_llava_onevision.py +169 -177
  763. transformers/models/llava_onevision/modular_llava_onevision.py +156 -163
  764. transformers/models/llava_onevision/processing_llava_onevision.py +53 -21
  765. transformers/models/llava_onevision/video_processing_llava_onevision.py +1 -0
  766. transformers/models/longcat_flash/__init__.py +1 -0
  767. transformers/models/longcat_flash/configuration_longcat_flash.py +42 -37
  768. transformers/models/longcat_flash/modeling_longcat_flash.py +36 -36
  769. transformers/models/longcat_flash/modular_longcat_flash.py +21 -21
  770. transformers/models/longformer/configuration_longformer.py +5 -5
  771. transformers/models/longformer/modeling_longformer.py +101 -105
  772. transformers/models/longt5/configuration_longt5.py +7 -9
  773. transformers/models/longt5/modeling_longt5.py +49 -49
  774. transformers/models/luke/configuration_luke.py +2 -8
  775. transformers/models/luke/modeling_luke.py +181 -188
  776. transformers/models/luke/tokenization_luke.py +140 -107
  777. transformers/models/lxmert/configuration_lxmert.py +1 -16
  778. transformers/models/lxmert/modeling_lxmert.py +74 -65
  779. transformers/models/m2m_100/configuration_m2m_100.py +9 -7
  780. transformers/models/m2m_100/modeling_m2m_100.py +71 -83
  781. transformers/models/m2m_100/tokenization_m2m_100.py +8 -8
  782. transformers/models/mamba/configuration_mamba.py +2 -1
  783. transformers/models/mamba/modeling_mamba.py +66 -58
  784. transformers/models/mamba2/configuration_mamba2.py +8 -5
  785. transformers/models/mamba2/modeling_mamba2.py +69 -68
  786. transformers/models/marian/configuration_marian.py +5 -10
  787. transformers/models/marian/modeling_marian.py +87 -93
  788. transformers/models/marian/tokenization_marian.py +6 -6
  789. transformers/models/markuplm/configuration_markuplm.py +7 -4
  790. transformers/models/markuplm/feature_extraction_markuplm.py +2 -1
  791. transformers/models/markuplm/modeling_markuplm.py +70 -69
  792. transformers/models/markuplm/processing_markuplm.py +38 -31
  793. transformers/models/markuplm/tokenization_markuplm.py +136 -93
  794. transformers/models/mask2former/configuration_mask2former.py +8 -5
  795. transformers/models/mask2former/image_processing_mask2former.py +85 -84
  796. transformers/models/mask2former/image_processing_mask2former_fast.py +40 -37
  797. transformers/models/mask2former/modeling_mask2former.py +103 -118
  798. transformers/models/mask2former/modular_mask2former.py +8 -6
  799. transformers/models/maskformer/configuration_maskformer.py +9 -6
  800. transformers/models/maskformer/configuration_maskformer_swin.py +1 -0
  801. transformers/models/maskformer/image_processing_maskformer.py +85 -84
  802. transformers/models/maskformer/image_processing_maskformer_fast.py +40 -36
  803. transformers/models/maskformer/modeling_maskformer.py +65 -79
  804. transformers/models/maskformer/modeling_maskformer_swin.py +32 -36
  805. transformers/models/mbart/configuration_mbart.py +4 -9
  806. transformers/models/mbart/modeling_mbart.py +116 -131
  807. transformers/models/mbart/tokenization_mbart.py +54 -11
  808. transformers/models/mbart50/tokenization_mbart50.py +13 -8
  809. transformers/models/megatron_bert/configuration_megatron_bert.py +3 -13
  810. transformers/models/megatron_bert/modeling_megatron_bert.py +150 -148
  811. transformers/models/metaclip_2/configuration_metaclip_2.py +1 -4
  812. transformers/models/metaclip_2/modeling_metaclip_2.py +84 -91
  813. transformers/models/metaclip_2/modular_metaclip_2.py +45 -61
  814. transformers/models/mgp_str/configuration_mgp_str.py +1 -0
  815. transformers/models/mgp_str/modeling_mgp_str.py +18 -20
  816. transformers/models/mgp_str/processing_mgp_str.py +20 -3
  817. transformers/models/mgp_str/tokenization_mgp_str.py +3 -1
  818. transformers/models/mimi/configuration_mimi.py +40 -42
  819. transformers/models/mimi/modeling_mimi.py +113 -142
  820. transformers/models/minimax/__init__.py +1 -0
  821. transformers/models/minimax/configuration_minimax.py +43 -37
  822. transformers/models/minimax/modeling_minimax.py +51 -61
  823. transformers/models/minimax/modular_minimax.py +62 -68
  824. transformers/models/ministral/configuration_ministral.py +29 -25
  825. transformers/models/ministral/modeling_ministral.py +38 -36
  826. transformers/models/ministral/modular_ministral.py +37 -32
  827. transformers/models/ministral3/configuration_ministral3.py +27 -24
  828. transformers/models/ministral3/modeling_ministral3.py +37 -36
  829. transformers/models/ministral3/modular_ministral3.py +5 -4
  830. transformers/models/mistral/configuration_mistral.py +29 -24
  831. transformers/models/mistral/modeling_mistral.py +37 -36
  832. transformers/models/mistral/modular_mistral.py +12 -11
  833. transformers/models/mistral3/configuration_mistral3.py +1 -4
  834. transformers/models/mistral3/modeling_mistral3.py +86 -89
  835. transformers/models/mistral3/modular_mistral3.py +68 -69
  836. transformers/models/mixtral/configuration_mixtral.py +34 -29
  837. transformers/models/mixtral/modeling_mixtral.py +45 -50
  838. transformers/models/mixtral/modular_mixtral.py +31 -32
  839. transformers/models/mlcd/configuration_mlcd.py +1 -0
  840. transformers/models/mlcd/modeling_mlcd.py +14 -20
  841. transformers/models/mlcd/modular_mlcd.py +13 -17
  842. transformers/models/mllama/configuration_mllama.py +15 -10
  843. transformers/models/mllama/image_processing_mllama.py +25 -23
  844. transformers/models/mllama/image_processing_mllama_fast.py +11 -11
  845. transformers/models/mllama/modeling_mllama.py +94 -105
  846. transformers/models/mllama/processing_mllama.py +55 -6
  847. transformers/models/mluke/tokenization_mluke.py +107 -101
  848. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +3 -5
  849. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +140 -155
  850. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +3 -5
  851. transformers/models/mobilebert/configuration_mobilebert.py +2 -4
  852. transformers/models/mobilebert/modeling_mobilebert.py +85 -77
  853. transformers/models/mobilebert/tokenization_mobilebert.py +1 -0
  854. transformers/models/mobilenet_v1/configuration_mobilenet_v1.py +1 -0
  855. transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +23 -20
  856. transformers/models/mobilenet_v1/image_processing_mobilenet_v1_fast.py +1 -0
  857. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +16 -15
  858. transformers/models/mobilenet_v2/configuration_mobilenet_v2.py +1 -0
  859. transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +51 -48
  860. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +15 -13
  861. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +22 -24
  862. transformers/models/mobilevit/configuration_mobilevit.py +1 -0
  863. transformers/models/mobilevit/image_processing_mobilevit.py +49 -46
  864. transformers/models/mobilevit/image_processing_mobilevit_fast.py +14 -12
  865. transformers/models/mobilevit/modeling_mobilevit.py +21 -28
  866. transformers/models/mobilevitv2/configuration_mobilevitv2.py +1 -0
  867. transformers/models/mobilevitv2/modeling_mobilevitv2.py +22 -28
  868. transformers/models/modernbert/configuration_modernbert.py +42 -44
  869. transformers/models/modernbert/modeling_modernbert.py +133 -145
  870. transformers/models/modernbert/modular_modernbert.py +170 -186
  871. transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +40 -40
  872. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +57 -62
  873. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +86 -94
  874. transformers/models/moonshine/configuration_moonshine.py +31 -34
  875. transformers/models/moonshine/modeling_moonshine.py +71 -71
  876. transformers/models/moonshine/modular_moonshine.py +83 -88
  877. transformers/models/moshi/configuration_moshi.py +23 -46
  878. transformers/models/moshi/modeling_moshi.py +187 -157
  879. transformers/models/mpnet/configuration_mpnet.py +2 -6
  880. transformers/models/mpnet/modeling_mpnet.py +57 -62
  881. transformers/models/mpnet/tokenization_mpnet.py +15 -4
  882. transformers/models/mpt/configuration_mpt.py +9 -5
  883. transformers/models/mpt/modeling_mpt.py +60 -60
  884. transformers/models/mra/configuration_mra.py +2 -8
  885. transformers/models/mra/modeling_mra.py +57 -64
  886. transformers/models/mt5/configuration_mt5.py +8 -10
  887. transformers/models/mt5/modeling_mt5.py +95 -87
  888. transformers/models/musicgen/configuration_musicgen.py +8 -12
  889. transformers/models/musicgen/modeling_musicgen.py +122 -118
  890. transformers/models/musicgen/processing_musicgen.py +21 -3
  891. transformers/models/musicgen_melody/configuration_musicgen_melody.py +8 -15
  892. transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py +9 -8
  893. transformers/models/musicgen_melody/modeling_musicgen_melody.py +123 -117
  894. transformers/models/musicgen_melody/processing_musicgen_melody.py +22 -3
  895. transformers/models/mvp/configuration_mvp.py +5 -8
  896. transformers/models/mvp/modeling_mvp.py +123 -135
  897. transformers/models/myt5/tokenization_myt5.py +10 -8
  898. transformers/models/nanochat/configuration_nanochat.py +8 -5
  899. transformers/models/nanochat/modeling_nanochat.py +40 -37
  900. transformers/models/nanochat/modular_nanochat.py +14 -12
  901. transformers/models/nemotron/configuration_nemotron.py +30 -25
  902. transformers/models/nemotron/modeling_nemotron.py +57 -56
  903. transformers/models/nllb/tokenization_nllb.py +28 -12
  904. transformers/models/nllb_moe/configuration_nllb_moe.py +9 -7
  905. transformers/models/nllb_moe/modeling_nllb_moe.py +69 -77
  906. transformers/models/nougat/image_processing_nougat.py +32 -29
  907. transformers/models/nougat/image_processing_nougat_fast.py +14 -12
  908. transformers/models/nougat/processing_nougat.py +39 -37
  909. transformers/models/nougat/tokenization_nougat.py +73 -18
  910. transformers/models/nystromformer/configuration_nystromformer.py +2 -8
  911. transformers/models/nystromformer/modeling_nystromformer.py +63 -74
  912. transformers/models/olmo/configuration_olmo.py +28 -23
  913. transformers/models/olmo/modeling_olmo.py +39 -36
  914. transformers/models/olmo/modular_olmo.py +11 -7
  915. transformers/models/olmo2/configuration_olmo2.py +28 -23
  916. transformers/models/olmo2/modeling_olmo2.py +41 -37
  917. transformers/models/olmo2/modular_olmo2.py +32 -29
  918. transformers/models/olmo3/__init__.py +1 -0
  919. transformers/models/olmo3/configuration_olmo3.py +30 -26
  920. transformers/models/olmo3/modeling_olmo3.py +39 -36
  921. transformers/models/olmo3/modular_olmo3.py +40 -37
  922. transformers/models/olmoe/configuration_olmoe.py +33 -29
  923. transformers/models/olmoe/modeling_olmoe.py +46 -52
  924. transformers/models/olmoe/modular_olmoe.py +15 -16
  925. transformers/models/omdet_turbo/configuration_omdet_turbo.py +4 -2
  926. transformers/models/omdet_turbo/modeling_omdet_turbo.py +47 -53
  927. transformers/models/omdet_turbo/processing_omdet_turbo.py +67 -19
  928. transformers/models/oneformer/configuration_oneformer.py +8 -5
  929. transformers/models/oneformer/image_processing_oneformer.py +84 -83
  930. transformers/models/oneformer/image_processing_oneformer_fast.py +42 -41
  931. transformers/models/oneformer/modeling_oneformer.py +171 -147
  932. transformers/models/oneformer/processing_oneformer.py +43 -28
  933. transformers/models/openai/configuration_openai.py +1 -16
  934. transformers/models/openai/modeling_openai.py +51 -65
  935. transformers/models/openai/tokenization_openai.py +47 -8
  936. transformers/models/opt/configuration_opt.py +7 -6
  937. transformers/models/opt/modeling_opt.py +76 -78
  938. transformers/models/ovis2/__init__.py +1 -0
  939. transformers/models/ovis2/configuration_ovis2.py +1 -0
  940. transformers/models/ovis2/image_processing_ovis2.py +24 -22
  941. transformers/models/ovis2/image_processing_ovis2_fast.py +11 -9
  942. transformers/models/ovis2/modeling_ovis2.py +142 -111
  943. transformers/models/ovis2/modular_ovis2.py +45 -90
  944. transformers/models/ovis2/processing_ovis2.py +40 -12
  945. transformers/models/owlv2/configuration_owlv2.py +2 -4
  946. transformers/models/owlv2/image_processing_owlv2.py +21 -20
  947. transformers/models/owlv2/image_processing_owlv2_fast.py +15 -12
  948. transformers/models/owlv2/modeling_owlv2.py +117 -133
  949. transformers/models/owlv2/modular_owlv2.py +14 -11
  950. transformers/models/owlv2/processing_owlv2.py +49 -20
  951. transformers/models/owlvit/configuration_owlvit.py +2 -4
  952. transformers/models/owlvit/image_processing_owlvit.py +22 -21
  953. transformers/models/owlvit/image_processing_owlvit_fast.py +3 -2
  954. transformers/models/owlvit/modeling_owlvit.py +116 -132
  955. transformers/models/owlvit/processing_owlvit.py +48 -20
  956. transformers/models/paligemma/configuration_paligemma.py +1 -4
  957. transformers/models/paligemma/modeling_paligemma.py +93 -103
  958. transformers/models/paligemma/processing_paligemma.py +66 -13
  959. transformers/models/parakeet/configuration_parakeet.py +14 -7
  960. transformers/models/parakeet/feature_extraction_parakeet.py +12 -10
  961. transformers/models/parakeet/modeling_parakeet.py +28 -32
  962. transformers/models/parakeet/modular_parakeet.py +20 -23
  963. transformers/models/parakeet/processing_parakeet.py +5 -13
  964. transformers/models/parakeet/{tokenization_parakeet.py → tokenization_parakeet_fast.py} +7 -5
  965. transformers/models/patchtsmixer/configuration_patchtsmixer.py +8 -5
  966. transformers/models/patchtsmixer/modeling_patchtsmixer.py +62 -70
  967. transformers/models/patchtst/configuration_patchtst.py +9 -6
  968. transformers/models/patchtst/modeling_patchtst.py +80 -97
  969. transformers/models/pegasus/configuration_pegasus.py +5 -8
  970. transformers/models/pegasus/modeling_pegasus.py +66 -72
  971. transformers/models/pegasus/tokenization_pegasus.py +45 -15
  972. transformers/models/pegasus_x/configuration_pegasus_x.py +4 -5
  973. transformers/models/pegasus_x/modeling_pegasus_x.py +52 -55
  974. transformers/models/perceiver/configuration_perceiver.py +1 -0
  975. transformers/models/perceiver/image_processing_perceiver.py +25 -22
  976. transformers/models/perceiver/image_processing_perceiver_fast.py +9 -7
  977. transformers/models/perceiver/modeling_perceiver.py +146 -165
  978. transformers/models/perceiver/tokenization_perceiver.py +6 -3
  979. transformers/models/perception_lm/configuration_perception_lm.py +1 -0
  980. transformers/models/perception_lm/image_processing_perception_lm_fast.py +10 -8
  981. transformers/models/perception_lm/modeling_perception_lm.py +70 -71
  982. transformers/models/perception_lm/modular_perception_lm.py +61 -65
  983. transformers/models/perception_lm/processing_perception_lm.py +47 -13
  984. transformers/models/perception_lm/video_processing_perception_lm.py +1 -0
  985. transformers/models/persimmon/configuration_persimmon.py +28 -23
  986. transformers/models/persimmon/modeling_persimmon.py +45 -43
  987. transformers/models/phi/configuration_phi.py +28 -23
  988. transformers/models/phi/modeling_phi.py +43 -40
  989. transformers/models/phi/modular_phi.py +24 -23
  990. transformers/models/phi3/configuration_phi3.py +33 -28
  991. transformers/models/phi3/modeling_phi3.py +38 -36
  992. transformers/models/phi3/modular_phi3.py +17 -13
  993. transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +33 -30
  994. transformers/models/phi4_multimodal/feature_extraction_phi4_multimodal.py +9 -7
  995. transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +11 -11
  996. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +78 -95
  997. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +80 -98
  998. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +44 -7
  999. transformers/models/phimoe/configuration_phimoe.py +36 -31
  1000. transformers/models/phimoe/modeling_phimoe.py +45 -50
  1001. transformers/models/phimoe/modular_phimoe.py +4 -3
  1002. transformers/models/phobert/tokenization_phobert.py +6 -4
  1003. transformers/models/pix2struct/configuration_pix2struct.py +10 -12
  1004. transformers/models/pix2struct/image_processing_pix2struct.py +19 -15
  1005. transformers/models/pix2struct/image_processing_pix2struct_fast.py +15 -12
  1006. transformers/models/pix2struct/modeling_pix2struct.py +52 -58
  1007. transformers/models/pix2struct/processing_pix2struct.py +30 -5
  1008. transformers/models/pixtral/configuration_pixtral.py +14 -11
  1009. transformers/models/pixtral/image_processing_pixtral.py +28 -26
  1010. transformers/models/pixtral/image_processing_pixtral_fast.py +11 -10
  1011. transformers/models/pixtral/modeling_pixtral.py +34 -28
  1012. transformers/models/pixtral/processing_pixtral.py +53 -21
  1013. transformers/models/plbart/configuration_plbart.py +5 -8
  1014. transformers/models/plbart/modeling_plbart.py +106 -119
  1015. transformers/models/plbart/modular_plbart.py +33 -39
  1016. transformers/models/plbart/tokenization_plbart.py +7 -4
  1017. transformers/models/poolformer/configuration_poolformer.py +1 -0
  1018. transformers/models/poolformer/image_processing_poolformer.py +24 -21
  1019. transformers/models/poolformer/image_processing_poolformer_fast.py +15 -13
  1020. transformers/models/poolformer/modeling_poolformer.py +13 -23
  1021. transformers/models/pop2piano/configuration_pop2piano.py +8 -7
  1022. transformers/models/pop2piano/feature_extraction_pop2piano.py +9 -6
  1023. transformers/models/pop2piano/modeling_pop2piano.py +24 -26
  1024. transformers/models/pop2piano/processing_pop2piano.py +33 -25
  1025. transformers/models/pop2piano/tokenization_pop2piano.py +23 -15
  1026. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +3 -3
  1027. transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py +28 -28
  1028. transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +21 -20
  1029. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +13 -16
  1030. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +13 -16
  1031. transformers/models/prophetnet/configuration_prophetnet.py +38 -37
  1032. transformers/models/prophetnet/modeling_prophetnet.py +131 -114
  1033. transformers/models/prophetnet/tokenization_prophetnet.py +16 -14
  1034. transformers/models/pvt/configuration_pvt.py +1 -0
  1035. transformers/models/pvt/image_processing_pvt.py +27 -24
  1036. transformers/models/pvt/image_processing_pvt_fast.py +2 -1
  1037. transformers/models/pvt/modeling_pvt.py +21 -21
  1038. transformers/models/pvt_v2/configuration_pvt_v2.py +4 -2
  1039. transformers/models/pvt_v2/modeling_pvt_v2.py +25 -28
  1040. transformers/models/qwen2/configuration_qwen2.py +25 -32
  1041. transformers/models/qwen2/modeling_qwen2.py +38 -36
  1042. transformers/models/qwen2/modular_qwen2.py +12 -11
  1043. transformers/models/qwen2/tokenization_qwen2.py +23 -12
  1044. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +26 -32
  1045. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +277 -340
  1046. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +211 -278
  1047. transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py +49 -41
  1048. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +35 -29
  1049. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +148 -203
  1050. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +118 -93
  1051. transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py +43 -7
  1052. transformers/models/qwen2_audio/configuration_qwen2_audio.py +1 -0
  1053. transformers/models/qwen2_audio/modeling_qwen2_audio.py +40 -40
  1054. transformers/models/qwen2_audio/processing_qwen2_audio.py +42 -13
  1055. transformers/models/qwen2_moe/configuration_qwen2_moe.py +35 -42
  1056. transformers/models/qwen2_moe/modeling_qwen2_moe.py +46 -51
  1057. transformers/models/qwen2_moe/modular_qwen2_moe.py +10 -7
  1058. transformers/models/qwen2_vl/configuration_qwen2_vl.py +34 -29
  1059. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +42 -41
  1060. transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +15 -12
  1061. transformers/models/qwen2_vl/modeling_qwen2_vl.py +153 -199
  1062. transformers/models/qwen2_vl/processing_qwen2_vl.py +44 -7
  1063. transformers/models/qwen2_vl/video_processing_qwen2_vl.py +18 -38
  1064. transformers/models/qwen3/configuration_qwen3.py +27 -34
  1065. transformers/models/qwen3/modeling_qwen3.py +39 -36
  1066. transformers/models/qwen3/modular_qwen3.py +6 -4
  1067. transformers/models/qwen3_moe/configuration_qwen3_moe.py +32 -39
  1068. transformers/models/qwen3_moe/modeling_qwen3_moe.py +46 -51
  1069. transformers/models/qwen3_moe/modular_qwen3_moe.py +13 -10
  1070. transformers/models/qwen3_next/configuration_qwen3_next.py +35 -45
  1071. transformers/models/qwen3_next/modeling_qwen3_next.py +51 -47
  1072. transformers/models/qwen3_next/modular_qwen3_next.py +35 -34
  1073. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +101 -135
  1074. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +252 -355
  1075. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +196 -250
  1076. transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py +48 -40
  1077. transformers/models/qwen3_vl/configuration_qwen3_vl.py +29 -27
  1078. transformers/models/qwen3_vl/modeling_qwen3_vl.py +155 -233
  1079. transformers/models/qwen3_vl/modular_qwen3_vl.py +179 -206
  1080. transformers/models/qwen3_vl/processing_qwen3_vl.py +42 -6
  1081. transformers/models/qwen3_vl/video_processing_qwen3_vl.py +12 -10
  1082. transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +30 -23
  1083. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +303 -358
  1084. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +124 -87
  1085. transformers/models/rag/configuration_rag.py +15 -6
  1086. transformers/models/rag/modeling_rag.py +130 -127
  1087. transformers/models/rag/retrieval_rag.py +5 -3
  1088. transformers/models/rag/tokenization_rag.py +50 -0
  1089. transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +30 -29
  1090. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +42 -53
  1091. transformers/models/reformer/configuration_reformer.py +8 -7
  1092. transformers/models/reformer/modeling_reformer.py +69 -80
  1093. transformers/models/reformer/tokenization_reformer.py +31 -11
  1094. transformers/models/regnet/configuration_regnet.py +1 -0
  1095. transformers/models/regnet/modeling_regnet.py +8 -15
  1096. transformers/models/rembert/configuration_rembert.py +2 -8
  1097. transformers/models/rembert/modeling_rembert.py +111 -121
  1098. transformers/models/rembert/tokenization_rembert.py +12 -2
  1099. transformers/models/resnet/configuration_resnet.py +1 -0
  1100. transformers/models/resnet/modeling_resnet.py +13 -27
  1101. transformers/models/roberta/configuration_roberta.py +3 -11
  1102. transformers/models/roberta/modeling_roberta.py +93 -94
  1103. transformers/models/roberta/modular_roberta.py +58 -58
  1104. transformers/models/roberta/tokenization_roberta.py +29 -17
  1105. transformers/models/roberta/tokenization_roberta_old.py +4 -2
  1106. transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +3 -11
  1107. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +93 -94
  1108. transformers/models/roc_bert/configuration_roc_bert.py +2 -8
  1109. transformers/models/roc_bert/modeling_roc_bert.py +121 -122
  1110. transformers/models/roc_bert/tokenization_roc_bert.py +94 -88
  1111. transformers/models/roformer/configuration_roformer.py +3 -13
  1112. transformers/models/roformer/modeling_roformer.py +81 -85
  1113. transformers/models/roformer/tokenization_roformer.py +412 -74
  1114. transformers/models/roformer/tokenization_roformer_fast.py +160 -0
  1115. transformers/models/roformer/tokenization_utils.py +1 -0
  1116. transformers/models/rt_detr/configuration_rt_detr.py +2 -1
  1117. transformers/models/rt_detr/configuration_rt_detr_resnet.py +1 -0
  1118. transformers/models/rt_detr/image_processing_rt_detr.py +55 -54
  1119. transformers/models/rt_detr/image_processing_rt_detr_fast.py +26 -26
  1120. transformers/models/rt_detr/modeling_rt_detr.py +90 -99
  1121. transformers/models/rt_detr/modeling_rt_detr_resnet.py +6 -13
  1122. transformers/models/rt_detr/modular_rt_detr.py +16 -16
  1123. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +4 -6
  1124. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +90 -101
  1125. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +12 -19
  1126. transformers/models/rwkv/configuration_rwkv.py +4 -2
  1127. transformers/models/rwkv/modeling_rwkv.py +32 -31
  1128. transformers/models/sam/configuration_sam.py +1 -3
  1129. transformers/models/sam/image_processing_sam.py +60 -59
  1130. transformers/models/sam/image_processing_sam_fast.py +27 -25
  1131. transformers/models/sam/modeling_sam.py +41 -47
  1132. transformers/models/sam/processing_sam.py +27 -39
  1133. transformers/models/sam2/configuration_sam2.py +3 -2
  1134. transformers/models/sam2/image_processing_sam2_fast.py +15 -14
  1135. transformers/models/sam2/modeling_sam2.py +90 -96
  1136. transformers/models/sam2/modular_sam2.py +91 -86
  1137. transformers/models/sam2/processing_sam2.py +47 -31
  1138. transformers/models/sam2_video/configuration_sam2_video.py +1 -0
  1139. transformers/models/sam2_video/modeling_sam2_video.py +144 -151
  1140. transformers/models/sam2_video/modular_sam2_video.py +104 -101
  1141. transformers/models/sam2_video/processing_sam2_video.py +66 -49
  1142. transformers/models/sam2_video/video_processing_sam2_video.py +4 -1
  1143. transformers/models/sam3/configuration_sam3.py +2 -21
  1144. transformers/models/sam3/image_processing_sam3_fast.py +20 -17
  1145. transformers/models/sam3/modeling_sam3.py +170 -184
  1146. transformers/models/sam3/modular_sam3.py +8 -3
  1147. transformers/models/sam3/processing_sam3.py +52 -37
  1148. transformers/models/sam3_tracker/__init__.py +1 -0
  1149. transformers/models/sam3_tracker/configuration_sam3_tracker.py +3 -1
  1150. transformers/models/sam3_tracker/modeling_sam3_tracker.py +77 -82
  1151. transformers/models/sam3_tracker/modular_sam3_tracker.py +3 -8
  1152. transformers/models/sam3_tracker/processing_sam3_tracker.py +48 -31
  1153. transformers/models/sam3_tracker_video/__init__.py +1 -0
  1154. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +1 -25
  1155. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +122 -135
  1156. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +26 -35
  1157. transformers/models/sam3_tracker_video/processing_sam3_tracker_video.py +66 -50
  1158. transformers/models/sam3_video/configuration_sam3_video.py +1 -14
  1159. transformers/models/sam3_video/modeling_sam3_video.py +34 -33
  1160. transformers/models/sam3_video/processing_sam3_video.py +46 -26
  1161. transformers/models/sam_hq/__init__.py +1 -1
  1162. transformers/models/sam_hq/configuration_sam_hq.py +1 -3
  1163. transformers/models/sam_hq/modeling_sam_hq.py +69 -74
  1164. transformers/models/sam_hq/modular_sam_hq.py +25 -23
  1165. transformers/models/sam_hq/{processing_sam_hq.py → processing_samhq.py} +29 -41
  1166. transformers/models/seamless_m4t/configuration_seamless_m4t.py +10 -8
  1167. transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py +11 -8
  1168. transformers/models/seamless_m4t/modeling_seamless_m4t.py +194 -212
  1169. transformers/models/seamless_m4t/processing_seamless_m4t.py +39 -18
  1170. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +77 -40
  1171. transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +10 -8
  1172. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +196 -204
  1173. transformers/models/seed_oss/configuration_seed_oss.py +32 -28
  1174. transformers/models/seed_oss/modeling_seed_oss.py +35 -33
  1175. transformers/models/seed_oss/modular_seed_oss.py +4 -3
  1176. transformers/models/segformer/configuration_segformer.py +10 -0
  1177. transformers/models/segformer/image_processing_segformer.py +42 -39
  1178. transformers/models/segformer/image_processing_segformer_fast.py +12 -10
  1179. transformers/models/segformer/modeling_segformer.py +31 -34
  1180. transformers/models/segformer/modular_segformer.py +10 -8
  1181. transformers/models/seggpt/configuration_seggpt.py +1 -0
  1182. transformers/models/seggpt/image_processing_seggpt.py +41 -38
  1183. transformers/models/seggpt/modeling_seggpt.py +38 -50
  1184. transformers/models/sew/configuration_sew.py +2 -4
  1185. transformers/models/sew/modeling_sew.py +36 -38
  1186. transformers/models/sew/modular_sew.py +13 -13
  1187. transformers/models/sew_d/configuration_sew_d.py +2 -4
  1188. transformers/models/sew_d/modeling_sew_d.py +30 -31
  1189. transformers/models/shieldgemma2/configuration_shieldgemma2.py +1 -0
  1190. transformers/models/shieldgemma2/modeling_shieldgemma2.py +17 -16
  1191. transformers/models/shieldgemma2/processing_shieldgemma2.py +5 -3
  1192. transformers/models/siglip/configuration_siglip.py +2 -4
  1193. transformers/models/siglip/image_processing_siglip.py +20 -17
  1194. transformers/models/siglip/image_processing_siglip_fast.py +1 -0
  1195. transformers/models/siglip/modeling_siglip.py +75 -84
  1196. transformers/models/siglip/processing_siglip.py +14 -2
  1197. transformers/models/siglip/tokenization_siglip.py +7 -6
  1198. transformers/models/siglip2/configuration_siglip2.py +2 -5
  1199. transformers/models/siglip2/image_processing_siglip2.py +16 -15
  1200. transformers/models/siglip2/image_processing_siglip2_fast.py +7 -6
  1201. transformers/models/siglip2/modeling_siglip2.py +129 -143
  1202. transformers/models/siglip2/modular_siglip2.py +46 -47
  1203. transformers/models/siglip2/processing_siglip2.py +14 -2
  1204. transformers/models/smollm3/configuration_smollm3.py +32 -29
  1205. transformers/models/smollm3/modeling_smollm3.py +39 -36
  1206. transformers/models/smollm3/modular_smollm3.py +35 -33
  1207. transformers/models/smolvlm/configuration_smolvlm.py +4 -2
  1208. transformers/models/smolvlm/image_processing_smolvlm.py +43 -42
  1209. transformers/models/smolvlm/image_processing_smolvlm_fast.py +15 -41
  1210. transformers/models/smolvlm/modeling_smolvlm.py +94 -126
  1211. transformers/models/smolvlm/modular_smolvlm.py +39 -50
  1212. transformers/models/smolvlm/processing_smolvlm.py +83 -15
  1213. transformers/models/smolvlm/video_processing_smolvlm.py +18 -16
  1214. transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py +1 -0
  1215. transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +27 -26
  1216. transformers/models/speech_to_text/configuration_speech_to_text.py +9 -9
  1217. transformers/models/speech_to_text/feature_extraction_speech_to_text.py +13 -10
  1218. transformers/models/speech_to_text/modeling_speech_to_text.py +54 -66
  1219. transformers/models/speech_to_text/processing_speech_to_text.py +30 -4
  1220. transformers/models/speech_to_text/tokenization_speech_to_text.py +6 -5
  1221. transformers/models/speecht5/configuration_speecht5.py +9 -7
  1222. transformers/models/speecht5/feature_extraction_speecht5.py +37 -16
  1223. transformers/models/speecht5/modeling_speecht5.py +175 -213
  1224. transformers/models/speecht5/number_normalizer.py +1 -0
  1225. transformers/models/speecht5/processing_speecht5.py +37 -3
  1226. transformers/models/speecht5/tokenization_speecht5.py +5 -4
  1227. transformers/models/splinter/configuration_splinter.py +7 -6
  1228. transformers/models/splinter/modeling_splinter.py +59 -71
  1229. transformers/models/splinter/tokenization_splinter.py +30 -9
  1230. transformers/models/squeezebert/configuration_squeezebert.py +2 -14
  1231. transformers/models/squeezebert/modeling_squeezebert.py +62 -68
  1232. transformers/models/squeezebert/tokenization_squeezebert.py +1 -0
  1233. transformers/models/stablelm/configuration_stablelm.py +29 -24
  1234. transformers/models/stablelm/modeling_stablelm.py +45 -44
  1235. transformers/models/starcoder2/configuration_starcoder2.py +27 -30
  1236. transformers/models/starcoder2/modeling_starcoder2.py +41 -39
  1237. transformers/models/starcoder2/modular_starcoder2.py +16 -14
  1238. transformers/models/superglue/configuration_superglue.py +3 -7
  1239. transformers/models/superglue/image_processing_superglue.py +15 -15
  1240. transformers/models/superglue/image_processing_superglue_fast.py +10 -9
  1241. transformers/models/superglue/modeling_superglue.py +37 -42
  1242. transformers/models/superpoint/image_processing_superpoint.py +15 -15
  1243. transformers/models/superpoint/image_processing_superpoint_fast.py +11 -8
  1244. transformers/models/superpoint/modeling_superpoint.py +16 -18
  1245. transformers/models/swiftformer/configuration_swiftformer.py +1 -0
  1246. transformers/models/swiftformer/modeling_swiftformer.py +14 -18
  1247. transformers/models/swin/configuration_swin.py +1 -0
  1248. transformers/models/swin/modeling_swin.py +86 -86
  1249. transformers/models/swin2sr/configuration_swin2sr.py +1 -0
  1250. transformers/models/swin2sr/image_processing_swin2sr.py +13 -10
  1251. transformers/models/swin2sr/image_processing_swin2sr_fast.py +8 -4
  1252. transformers/models/swin2sr/modeling_swin2sr.py +63 -81
  1253. transformers/models/swinv2/configuration_swinv2.py +1 -0
  1254. transformers/models/swinv2/modeling_swinv2.py +104 -108
  1255. transformers/models/switch_transformers/configuration_switch_transformers.py +7 -11
  1256. transformers/models/switch_transformers/modeling_switch_transformers.py +44 -37
  1257. transformers/models/switch_transformers/modular_switch_transformers.py +41 -34
  1258. transformers/models/t5/configuration_t5.py +8 -14
  1259. transformers/models/t5/modeling_t5.py +92 -88
  1260. transformers/models/t5/tokenization_t5.py +9 -3
  1261. transformers/models/t5gemma/configuration_t5gemma.py +41 -43
  1262. transformers/models/t5gemma/modeling_t5gemma.py +107 -104
  1263. transformers/models/t5gemma/modular_t5gemma.py +120 -124
  1264. transformers/models/t5gemma2/configuration_t5gemma2.py +120 -80
  1265. transformers/models/t5gemma2/modeling_t5gemma2.py +125 -141
  1266. transformers/models/t5gemma2/modular_t5gemma2.py +104 -393
  1267. transformers/models/table_transformer/configuration_table_transformer.py +2 -1
  1268. transformers/models/table_transformer/modeling_table_transformer.py +49 -51
  1269. transformers/models/tapas/configuration_tapas.py +2 -12
  1270. transformers/models/tapas/modeling_tapas.py +67 -68
  1271. transformers/models/tapas/tokenization_tapas.py +153 -115
  1272. transformers/models/textnet/configuration_textnet.py +1 -0
  1273. transformers/models/textnet/image_processing_textnet.py +25 -22
  1274. transformers/models/textnet/image_processing_textnet_fast.py +10 -8
  1275. transformers/models/textnet/modeling_textnet.py +16 -28
  1276. transformers/models/time_series_transformer/configuration_time_series_transformer.py +8 -5
  1277. transformers/models/time_series_transformer/modeling_time_series_transformer.py +81 -83
  1278. transformers/models/timesfm/configuration_timesfm.py +1 -0
  1279. transformers/models/timesfm/modeling_timesfm.py +22 -33
  1280. transformers/models/timesfm/modular_timesfm.py +21 -32
  1281. transformers/models/timesformer/configuration_timesformer.py +1 -0
  1282. transformers/models/timesformer/modeling_timesformer.py +16 -15
  1283. transformers/models/timm_backbone/configuration_timm_backbone.py +1 -0
  1284. transformers/models/timm_backbone/modeling_timm_backbone.py +15 -17
  1285. transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -5
  1286. transformers/models/timm_wrapper/image_processing_timm_wrapper.py +5 -4
  1287. transformers/models/timm_wrapper/modeling_timm_wrapper.py +29 -34
  1288. transformers/models/trocr/configuration_trocr.py +8 -11
  1289. transformers/models/trocr/modeling_trocr.py +44 -45
  1290. transformers/models/trocr/processing_trocr.py +25 -5
  1291. transformers/models/tvp/configuration_tvp.py +2 -5
  1292. transformers/models/tvp/image_processing_tvp.py +52 -50
  1293. transformers/models/tvp/image_processing_tvp_fast.py +15 -15
  1294. transformers/models/tvp/modeling_tvp.py +27 -27
  1295. transformers/models/tvp/processing_tvp.py +14 -2
  1296. transformers/models/udop/configuration_udop.py +7 -16
  1297. transformers/models/udop/modeling_udop.py +73 -71
  1298. transformers/models/udop/processing_udop.py +26 -7
  1299. transformers/models/udop/tokenization_udop.py +105 -84
  1300. transformers/models/umt5/configuration_umt5.py +7 -8
  1301. transformers/models/umt5/modeling_umt5.py +90 -94
  1302. transformers/models/unispeech/configuration_unispeech.py +2 -4
  1303. transformers/models/unispeech/modeling_unispeech.py +49 -51
  1304. transformers/models/unispeech/modular_unispeech.py +22 -22
  1305. transformers/models/unispeech_sat/configuration_unispeech_sat.py +2 -4
  1306. transformers/models/unispeech_sat/modeling_unispeech_sat.py +65 -69
  1307. transformers/models/unispeech_sat/modular_unispeech_sat.py +23 -23
  1308. transformers/models/univnet/feature_extraction_univnet.py +14 -14
  1309. transformers/models/univnet/modeling_univnet.py +8 -8
  1310. transformers/models/upernet/configuration_upernet.py +1 -0
  1311. transformers/models/upernet/modeling_upernet.py +13 -11
  1312. transformers/models/vaultgemma/__init__.py +1 -0
  1313. transformers/models/vaultgemma/configuration_vaultgemma.py +33 -29
  1314. transformers/models/vaultgemma/modeling_vaultgemma.py +41 -39
  1315. transformers/models/vaultgemma/modular_vaultgemma.py +31 -29
  1316. transformers/models/video_llama_3/configuration_video_llama_3.py +0 -4
  1317. transformers/models/video_llama_3/image_processing_video_llama_3.py +42 -43
  1318. transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +14 -12
  1319. transformers/models/video_llama_3/modeling_video_llama_3.py +109 -157
  1320. transformers/models/video_llama_3/modular_video_llama_3.py +146 -155
  1321. transformers/models/video_llama_3/processing_video_llama_3.py +39 -5
  1322. transformers/models/video_llama_3/video_processing_video_llama_3.py +23 -42
  1323. transformers/models/video_llava/configuration_video_llava.py +1 -4
  1324. transformers/models/video_llava/image_processing_video_llava.py +38 -35
  1325. transformers/models/video_llava/modeling_video_llava.py +146 -146
  1326. transformers/models/video_llava/processing_video_llava.py +78 -38
  1327. transformers/models/video_llava/video_processing_video_llava.py +1 -0
  1328. transformers/models/videomae/configuration_videomae.py +1 -0
  1329. transformers/models/videomae/image_processing_videomae.py +34 -31
  1330. transformers/models/videomae/modeling_videomae.py +17 -14
  1331. transformers/models/videomae/video_processing_videomae.py +1 -0
  1332. transformers/models/vilt/configuration_vilt.py +4 -6
  1333. transformers/models/vilt/image_processing_vilt.py +30 -29
  1334. transformers/models/vilt/image_processing_vilt_fast.py +16 -15
  1335. transformers/models/vilt/modeling_vilt.py +90 -116
  1336. transformers/models/vilt/processing_vilt.py +14 -2
  1337. transformers/models/vipllava/configuration_vipllava.py +1 -4
  1338. transformers/models/vipllava/modeling_vipllava.py +70 -99
  1339. transformers/models/vipllava/modular_vipllava.py +54 -78
  1340. transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +1 -0
  1341. transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +27 -28
  1342. transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py +1 -0
  1343. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +41 -46
  1344. transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py +16 -2
  1345. transformers/models/visual_bert/configuration_visual_bert.py +2 -6
  1346. transformers/models/visual_bert/modeling_visual_bert.py +92 -98
  1347. transformers/models/vit/configuration_vit.py +1 -0
  1348. transformers/models/vit/image_processing_vit.py +22 -19
  1349. transformers/models/vit/image_processing_vit_fast.py +1 -0
  1350. transformers/models/vit/modeling_vit.py +17 -17
  1351. transformers/models/vit_mae/configuration_vit_mae.py +1 -0
  1352. transformers/models/vit_mae/modeling_vit_mae.py +27 -29
  1353. transformers/models/vit_msn/configuration_vit_msn.py +1 -0
  1354. transformers/models/vit_msn/modeling_vit_msn.py +16 -18
  1355. transformers/models/vitdet/configuration_vitdet.py +1 -0
  1356. transformers/models/vitdet/modeling_vitdet.py +14 -14
  1357. transformers/models/vitmatte/configuration_vitmatte.py +5 -2
  1358. transformers/models/vitmatte/image_processing_vitmatte.py +18 -15
  1359. transformers/models/vitmatte/image_processing_vitmatte_fast.py +18 -16
  1360. transformers/models/vitmatte/modeling_vitmatte.py +11 -14
  1361. transformers/models/vitpose/configuration_vitpose.py +7 -4
  1362. transformers/models/vitpose/image_processing_vitpose.py +25 -24
  1363. transformers/models/vitpose/image_processing_vitpose_fast.py +11 -9
  1364. transformers/models/vitpose/modeling_vitpose.py +14 -14
  1365. transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +1 -0
  1366. transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +10 -8
  1367. transformers/models/vits/configuration_vits.py +1 -4
  1368. transformers/models/vits/modeling_vits.py +42 -44
  1369. transformers/models/vits/tokenization_vits.py +4 -3
  1370. transformers/models/vivit/configuration_vivit.py +1 -0
  1371. transformers/models/vivit/image_processing_vivit.py +39 -36
  1372. transformers/models/vivit/modeling_vivit.py +8 -6
  1373. transformers/models/vjepa2/__init__.py +1 -0
  1374. transformers/models/vjepa2/configuration_vjepa2.py +1 -0
  1375. transformers/models/vjepa2/modeling_vjepa2.py +32 -31
  1376. transformers/models/vjepa2/video_processing_vjepa2.py +1 -0
  1377. transformers/models/voxtral/__init__.py +1 -0
  1378. transformers/models/voxtral/configuration_voxtral.py +2 -0
  1379. transformers/models/voxtral/modeling_voxtral.py +47 -40
  1380. transformers/models/voxtral/modular_voxtral.py +40 -37
  1381. transformers/models/voxtral/processing_voxtral.py +48 -25
  1382. transformers/models/wav2vec2/configuration_wav2vec2.py +2 -4
  1383. transformers/models/wav2vec2/feature_extraction_wav2vec2.py +10 -7
  1384. transformers/models/wav2vec2/modeling_wav2vec2.py +121 -73
  1385. transformers/models/wav2vec2/processing_wav2vec2.py +35 -6
  1386. transformers/models/wav2vec2/tokenization_wav2vec2.py +332 -20
  1387. transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +2 -4
  1388. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +62 -70
  1389. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +48 -57
  1390. transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +35 -6
  1391. transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +2 -4
  1392. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +77 -90
  1393. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +30 -37
  1394. transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +17 -16
  1395. transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +55 -36
  1396. transformers/models/wavlm/configuration_wavlm.py +2 -4
  1397. transformers/models/wavlm/modeling_wavlm.py +48 -50
  1398. transformers/models/wavlm/modular_wavlm.py +5 -4
  1399. transformers/models/whisper/configuration_whisper.py +5 -6
  1400. transformers/models/whisper/english_normalizer.py +4 -3
  1401. transformers/models/whisper/feature_extraction_whisper.py +24 -9
  1402. transformers/models/whisper/generation_whisper.py +48 -26
  1403. transformers/models/whisper/modeling_whisper.py +73 -79
  1404. transformers/models/whisper/processing_whisper.py +20 -3
  1405. transformers/models/whisper/tokenization_whisper.py +43 -11
  1406. transformers/models/x_clip/configuration_x_clip.py +2 -4
  1407. transformers/models/x_clip/modeling_x_clip.py +93 -96
  1408. transformers/models/x_clip/processing_x_clip.py +14 -2
  1409. transformers/models/xcodec/configuration_xcodec.py +6 -4
  1410. transformers/models/xcodec/modeling_xcodec.py +17 -20
  1411. transformers/models/xglm/configuration_xglm.py +8 -9
  1412. transformers/models/xglm/modeling_xglm.py +55 -60
  1413. transformers/models/xglm/tokenization_xglm.py +11 -3
  1414. transformers/models/xlm/configuration_xlm.py +8 -10
  1415. transformers/models/xlm/modeling_xlm.py +144 -144
  1416. transformers/models/xlm/tokenization_xlm.py +5 -3
  1417. transformers/models/xlm_roberta/configuration_xlm_roberta.py +3 -11
  1418. transformers/models/xlm_roberta/modeling_xlm_roberta.py +194 -195
  1419. transformers/models/xlm_roberta/modular_xlm_roberta.py +53 -50
  1420. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +18 -8
  1421. transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +2 -10
  1422. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +93 -94
  1423. transformers/models/xlm_roberta_xl/modular_xlm_roberta_xl.py +70 -67
  1424. transformers/models/xlnet/configuration_xlnet.py +12 -3
  1425. transformers/models/xlnet/modeling_xlnet.py +163 -152
  1426. transformers/models/xlnet/tokenization_xlnet.py +9 -2
  1427. transformers/models/xlstm/configuration_xlstm.py +12 -8
  1428. transformers/models/xlstm/modeling_xlstm.py +65 -62
  1429. transformers/models/xmod/configuration_xmod.py +3 -11
  1430. transformers/models/xmod/modeling_xmod.py +110 -108
  1431. transformers/models/yolos/configuration_yolos.py +1 -0
  1432. transformers/models/yolos/image_processing_yolos.py +62 -60
  1433. transformers/models/yolos/image_processing_yolos_fast.py +45 -42
  1434. transformers/models/yolos/modeling_yolos.py +16 -16
  1435. transformers/models/yolos/modular_yolos.py +19 -17
  1436. transformers/models/yoso/configuration_yoso.py +2 -8
  1437. transformers/models/yoso/modeling_yoso.py +63 -70
  1438. transformers/models/zamba/configuration_zamba.py +8 -5
  1439. transformers/models/zamba/modeling_zamba.py +78 -81
  1440. transformers/models/zamba2/configuration_zamba2.py +50 -44
  1441. transformers/models/zamba2/modeling_zamba2.py +97 -97
  1442. transformers/models/zamba2/modular_zamba2.py +48 -46
  1443. transformers/models/zoedepth/configuration_zoedepth.py +2 -1
  1444. transformers/models/zoedepth/image_processing_zoedepth.py +29 -28
  1445. transformers/models/zoedepth/image_processing_zoedepth_fast.py +24 -21
  1446. transformers/models/zoedepth/modeling_zoedepth.py +18 -26
  1447. transformers/pipelines/__init__.py +114 -57
  1448. transformers/pipelines/any_to_any.py +22 -14
  1449. transformers/pipelines/audio_utils.py +2 -1
  1450. transformers/pipelines/automatic_speech_recognition.py +12 -20
  1451. transformers/pipelines/base.py +27 -15
  1452. transformers/{models/pe_audio/processing_pe_audio.py → pipelines/deprecated/__init__.py} +3 -10
  1453. transformers/pipelines/deprecated/text2text_generation.py +408 -0
  1454. transformers/pipelines/document_question_answering.py +2 -4
  1455. transformers/pipelines/image_text_to_text.py +1 -0
  1456. transformers/pipelines/image_to_text.py +229 -0
  1457. transformers/pipelines/question_answering.py +44 -5
  1458. transformers/pipelines/text_classification.py +14 -1
  1459. transformers/pipelines/text_generation.py +1 -1
  1460. transformers/pipelines/text_to_audio.py +2 -2
  1461. transformers/pipelines/token_classification.py +22 -1
  1462. transformers/pipelines/video_classification.py +9 -1
  1463. transformers/pipelines/zero_shot_audio_classification.py +1 -0
  1464. transformers/pipelines/zero_shot_classification.py +6 -0
  1465. transformers/pipelines/zero_shot_image_classification.py +7 -0
  1466. transformers/processing_utils.py +145 -230
  1467. transformers/quantizers/auto.py +4 -2
  1468. transformers/quantizers/base.py +173 -53
  1469. transformers/quantizers/quantizer_aqlm.py +23 -2
  1470. transformers/quantizers/quantizer_auto_round.py +12 -2
  1471. transformers/quantizers/quantizer_awq.py +89 -20
  1472. transformers/quantizers/quantizer_bitnet.py +14 -4
  1473. transformers/quantizers/quantizer_bnb_4bit.py +155 -18
  1474. transformers/quantizers/quantizer_bnb_8bit.py +110 -24
  1475. transformers/quantizers/quantizer_compressed_tensors.py +9 -2
  1476. transformers/quantizers/quantizer_eetq.py +74 -16
  1477. transformers/quantizers/quantizer_fbgemm_fp8.py +138 -38
  1478. transformers/quantizers/quantizer_finegrained_fp8.py +113 -26
  1479. transformers/quantizers/quantizer_fp_quant.py +82 -52
  1480. transformers/quantizers/quantizer_gptq.py +28 -8
  1481. transformers/quantizers/quantizer_higgs.py +60 -42
  1482. transformers/quantizers/quantizer_hqq.py +153 -144
  1483. transformers/quantizers/quantizer_mxfp4.py +194 -14
  1484. transformers/quantizers/quantizer_quanto.py +79 -35
  1485. transformers/quantizers/quantizer_quark.py +18 -36
  1486. transformers/quantizers/quantizer_spqr.py +12 -4
  1487. transformers/quantizers/quantizer_torchao.py +325 -50
  1488. transformers/quantizers/quantizer_vptq.py +27 -4
  1489. transformers/quantizers/quantizers_utils.py +0 -20
  1490. transformers/safetensors_conversion.py +3 -9
  1491. transformers/testing_utils.py +82 -326
  1492. transformers/tokenization_mistral_common.py +903 -568
  1493. transformers/tokenization_utils_base.py +340 -220
  1494. transformers/tokenization_utils_sentencepiece.py +6 -5
  1495. transformers/tokenization_utils_tokenizers.py +113 -226
  1496. transformers/trainer.py +53 -60
  1497. transformers/trainer_callback.py +0 -8
  1498. transformers/trainer_seq2seq.py +1 -5
  1499. transformers/trainer_utils.py +1 -1
  1500. transformers/training_args.py +41 -77
  1501. transformers/utils/__init__.py +4 -8
  1502. transformers/utils/attention_visualizer.py +5 -5
  1503. transformers/utils/auto_docstring.py +37 -599
  1504. transformers/utils/doc.py +36 -4
  1505. transformers/utils/dummy_pt_objects.py +42 -0
  1506. transformers/utils/generic.py +28 -111
  1507. transformers/utils/hub.py +15 -5
  1508. transformers/utils/import_utils.py +32 -165
  1509. transformers/utils/kernel_config.py +19 -74
  1510. transformers/utils/loading_report.py +15 -25
  1511. transformers/utils/quantization_config.py +241 -72
  1512. transformers/video_processing_utils.py +39 -41
  1513. transformers/video_utils.py +22 -18
  1514. {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/METADATA +236 -284
  1515. transformers-5.0.0rc0.dist-info/RECORD +1987 -0
  1516. {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/WHEEL +1 -1
  1517. transformers/integrations/moe.py +0 -360
  1518. transformers/integrations/quark.py +0 -53
  1519. transformers/loss/loss_lw_detr.py +0 -356
  1520. transformers/models/ernie4_5_vl_moe/__init__.py +0 -31
  1521. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +0 -340
  1522. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +0 -455
  1523. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +0 -231
  1524. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +0 -1936
  1525. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +0 -1925
  1526. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +0 -249
  1527. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +0 -593
  1528. transformers/models/fast_vlm/__init__.py +0 -27
  1529. transformers/models/fast_vlm/configuration_fast_vlm.py +0 -137
  1530. transformers/models/fast_vlm/modeling_fast_vlm.py +0 -432
  1531. transformers/models/fast_vlm/modular_fast_vlm.py +0 -373
  1532. transformers/models/glm4_moe_lite/__init__.py +0 -28
  1533. transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +0 -233
  1534. transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +0 -740
  1535. transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +0 -302
  1536. transformers/models/glm_image/__init__.py +0 -31
  1537. transformers/models/glm_image/configuration_glm_image.py +0 -351
  1538. transformers/models/glm_image/image_processing_glm_image.py +0 -503
  1539. transformers/models/glm_image/image_processing_glm_image_fast.py +0 -294
  1540. transformers/models/glm_image/modeling_glm_image.py +0 -1642
  1541. transformers/models/glm_image/modular_glm_image.py +0 -1531
  1542. transformers/models/glm_image/processing_glm_image.py +0 -217
  1543. transformers/models/glmasr/__init__.py +0 -29
  1544. transformers/models/glmasr/configuration_glmasr.py +0 -196
  1545. transformers/models/glmasr/modeling_glmasr.py +0 -517
  1546. transformers/models/glmasr/modular_glmasr.py +0 -443
  1547. transformers/models/glmasr/processing_glmasr.py +0 -331
  1548. transformers/models/jais2/__init__.py +0 -27
  1549. transformers/models/jais2/configuration_jais2.py +0 -148
  1550. transformers/models/jais2/modeling_jais2.py +0 -484
  1551. transformers/models/jais2/modular_jais2.py +0 -194
  1552. transformers/models/lasr/__init__.py +0 -29
  1553. transformers/models/lasr/configuration_lasr.py +0 -244
  1554. transformers/models/lasr/feature_extraction_lasr.py +0 -275
  1555. transformers/models/lasr/modeling_lasr.py +0 -727
  1556. transformers/models/lasr/modular_lasr.py +0 -574
  1557. transformers/models/lasr/processing_lasr.py +0 -100
  1558. transformers/models/lasr/tokenization_lasr.py +0 -184
  1559. transformers/models/lighton_ocr/__init__.py +0 -28
  1560. transformers/models/lighton_ocr/configuration_lighton_ocr.py +0 -128
  1561. transformers/models/lighton_ocr/modeling_lighton_ocr.py +0 -463
  1562. transformers/models/lighton_ocr/modular_lighton_ocr.py +0 -404
  1563. transformers/models/lighton_ocr/processing_lighton_ocr.py +0 -229
  1564. transformers/models/lw_detr/__init__.py +0 -27
  1565. transformers/models/lw_detr/configuration_lw_detr.py +0 -374
  1566. transformers/models/lw_detr/modeling_lw_detr.py +0 -1702
  1567. transformers/models/lw_detr/modular_lw_detr.py +0 -1615
  1568. transformers/models/minimax_m2/__init__.py +0 -28
  1569. transformers/models/minimax_m2/configuration_minimax_m2.py +0 -188
  1570. transformers/models/minimax_m2/modeling_minimax_m2.py +0 -704
  1571. transformers/models/minimax_m2/modular_minimax_m2.py +0 -346
  1572. transformers/models/paddleocr_vl/__init__.py +0 -31
  1573. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +0 -335
  1574. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +0 -503
  1575. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +0 -209
  1576. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +0 -1683
  1577. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +0 -1380
  1578. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +0 -133
  1579. transformers/models/pe_audio/__init__.py +0 -29
  1580. transformers/models/pe_audio/configuration_pe_audio.py +0 -204
  1581. transformers/models/pe_audio/feature_extraction_pe_audio.py +0 -160
  1582. transformers/models/pe_audio/modeling_pe_audio.py +0 -819
  1583. transformers/models/pe_audio/modular_pe_audio.py +0 -298
  1584. transformers/models/pe_audio_video/__init__.py +0 -28
  1585. transformers/models/pe_audio_video/configuration_pe_audio_video.py +0 -223
  1586. transformers/models/pe_audio_video/modeling_pe_audio_video.py +0 -971
  1587. transformers/models/pe_audio_video/modular_pe_audio_video.py +0 -763
  1588. transformers/models/pe_video/__init__.py +0 -29
  1589. transformers/models/pe_video/configuration_pe_video.py +0 -209
  1590. transformers/models/pe_video/modeling_pe_video.py +0 -647
  1591. transformers/models/pe_video/modular_pe_video.py +0 -231
  1592. transformers/models/pe_video/processing_pe_video.py +0 -10
  1593. transformers/models/pe_video/video_processing_pe_video.py +0 -64
  1594. transformers/models/pixio/__init__.py +0 -29
  1595. transformers/models/pixio/configuration_pixio.py +0 -150
  1596. transformers/models/pixio/modeling_pixio.py +0 -507
  1597. transformers/models/pixio/modular_pixio.py +0 -403
  1598. transformers/models/solar_open/__init__.py +0 -27
  1599. transformers/models/solar_open/configuration_solar_open.py +0 -184
  1600. transformers/models/solar_open/modeling_solar_open.py +0 -642
  1601. transformers/models/solar_open/modular_solar_open.py +0 -224
  1602. transformers/trainer_jit_checkpoint.py +0 -125
  1603. transformers-5.0.0.dist-info/RECORD +0 -2068
  1604. {transformers-5.0.0.dist-info/licenses → transformers-5.0.0rc0.dist-info}/LICENSE +0 -0
  1605. {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/entry_points.txt +0 -0
  1606. {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ # coding=utf-8
1
2
  # Copyright 2018 The HuggingFace Inc. team.
2
3
  #
3
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,17 +15,19 @@
14
15
  """Auto Tokenizer class."""
15
16
 
16
17
  import importlib
18
+ import inspect
17
19
  import json
18
20
  import os
19
21
  from collections import OrderedDict
20
- from typing import Any
22
+ from typing import Any, Optional, Union
21
23
 
22
24
  from transformers.utils.import_utils import is_mistral_common_available
23
25
 
24
26
  from ...configuration_utils import PreTrainedConfig
25
27
  from ...dynamic_module_utils import get_class_from_dynamic_module, resolve_trust_remote_code
26
28
  from ...modeling_gguf_pytorch_utils import load_gguf_checkpoint
27
- from ...tokenization_utils_base import TOKENIZER_CONFIG_FILE
29
+ from ...tokenization_python import PreTrainedTokenizer, PythonBackend
30
+ from ...tokenization_utils_base import TOKENIZER_CONFIG_FILE, find_sentencepiece_model_file, load_vocab_and_merges
28
31
  from ...utils import (
29
32
  extract_commit_hash,
30
33
  is_g2p_en_available,
@@ -32,7 +35,7 @@ from ...utils import (
32
35
  is_tokenizers_available,
33
36
  logging,
34
37
  )
35
- from ...utils.hub import cached_file
38
+ from ...utils.hub import cached_file, has_file
36
39
  from ..encoder_decoder import EncoderDecoderConfig
37
40
  from .auto_factory import _LazyAutoMapping
38
41
  from .configuration_auto import (
@@ -60,12 +63,13 @@ logger = logging.get_logger(__name__)
60
63
  REGISTERED_TOKENIZER_CLASSES: dict[str, type[Any]] = {}
61
64
  REGISTERED_FAST_ALIASES: dict[str, type[Any]] = {}
62
65
 
63
- TOKENIZER_MAPPING_NAMES = OrderedDict[str, str | None](
66
+ TOKENIZER_MAPPING_NAMES = OrderedDict[str, Optional[str]](
64
67
  [
65
- ("aimv2", "CLIPTokenizer" if is_tokenizers_available() else None),
68
+ ("aimv2", "CLIPTokenizerFast" if is_tokenizers_available() else None),
66
69
  ("albert", "AlbertTokenizer" if is_tokenizers_available() else None),
67
70
  ("align", "BertTokenizer" if is_tokenizers_available() else None),
68
- ("audioflamingo3", "Qwen2Tokenizer" if is_tokenizers_available() else None),
71
+ ("arcee", "LlamaTokenizerFast" if is_tokenizers_available() else None),
72
+ ("aria", "LlamaTokenizerFast" if is_tokenizers_available() else None),
69
73
  ("aya_vision", "CohereTokenizer" if is_tokenizers_available() else None),
70
74
  ("bark", "BertTokenizer" if is_tokenizers_available() else None),
71
75
  ("bart", "RobertaTokenizer" if is_tokenizers_available() else None),
@@ -78,15 +82,19 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, str | None](
78
82
  ("big_bird", "BigBirdTokenizer" if is_tokenizers_available() else None),
79
83
  ("bigbird_pegasus", "PegasusTokenizer" if is_tokenizers_available() else None),
80
84
  ("biogpt", "BioGptTokenizer"),
85
+ ("bitnet", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
81
86
  ("blenderbot", "BlenderbotTokenizer" if is_tokenizers_available() else None),
82
87
  ("blenderbot-small", "BlenderbotSmallTokenizer"),
83
88
  ("blip", "BertTokenizer" if is_tokenizers_available() else None),
84
89
  ("blip-2", "GPT2Tokenizer" if is_tokenizers_available() else None),
90
+ ("bloom", "TokenizersBackend" if is_tokenizers_available() else None),
91
+ ("blt", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
85
92
  ("bridgetower", "RobertaTokenizer"),
86
93
  ("bros", "BertTokenizer" if is_tokenizers_available() else None),
87
94
  ("byt5", "ByT5Tokenizer"),
88
95
  ("camembert", "CamembertTokenizer" if is_tokenizers_available() else None),
89
96
  ("canine", "CanineTokenizer"),
97
+ ("chameleon", "LlamaTokenizerFast" if is_tokenizers_available() else None),
90
98
  ("chinese_clip", "BertTokenizer" if is_tokenizers_available() else None),
91
99
  ("clap", "RobertaTokenizer"),
92
100
  ("clip", "CLIPTokenizer" if is_tokenizers_available() else None),
@@ -96,231 +104,265 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, str | None](
96
104
  ("codegen", "GPT2Tokenizer" if is_tokenizers_available() else None),
97
105
  ("cohere", "CohereTokenizer" if is_tokenizers_available() else None),
98
106
  ("cohere2", "CohereTokenizer" if is_tokenizers_available() else None),
99
- ("colqwen2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
107
+ ("colpali", "LlamaTokenizerFast" if is_tokenizers_available() else None),
108
+ ("colqwen2", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
100
109
  ("convbert", "BertTokenizer" if is_tokenizers_available() else None),
101
110
  ("cpm", "CpmTokenizer" if is_tokenizers_available() else None),
102
111
  ("cpmant", "CpmAntTokenizer"),
112
+ ("csm", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
103
113
  ("ctrl", "CTRLTokenizer"),
104
114
  ("data2vec-audio", "Wav2Vec2CTCTokenizer"),
105
115
  ("data2vec-text", "RobertaTokenizer"),
106
116
  ("dbrx", "GPT2Tokenizer" if is_tokenizers_available() else None),
107
117
  ("deberta", "DebertaTokenizer" if is_tokenizers_available() else None),
108
118
  ("deberta-v2", "DebertaV2Tokenizer" if is_tokenizers_available() else None),
119
+ ("deepseek_v2", "LlamaTokenizerFast" if is_tokenizers_available() else None),
120
+ ("deepseek_v3", "LlamaTokenizerFast" if is_tokenizers_available() else None),
121
+ ("deepseek_vl", "LlamaTokenizerFast" if is_tokenizers_available() else None),
122
+ ("deepseek_vl_hybrid", "LlamaTokenizerFast" if is_tokenizers_available() else None),
109
123
  ("dia", "DiaTokenizer"),
124
+ ("diffllama", "LlamaTokenizerFast" if is_tokenizers_available() else None),
110
125
  ("distilbert", "BertTokenizer" if is_tokenizers_available() else None),
111
- ("dpr", "DPRQuestionEncoderTokenizer" if is_tokenizers_available() else None),
126
+ ("dpr", "DPRQuestionEncoderTokenizerFast" if is_tokenizers_available() else None),
112
127
  ("electra", "BertTokenizer" if is_tokenizers_available() else None),
113
128
  ("emu3", "GPT2Tokenizer" if is_tokenizers_available() else None),
114
129
  ("ernie", "BertTokenizer" if is_tokenizers_available() else None),
130
+ ("ernie4_5", "LlamaTokenizerFast" if is_tokenizers_available() else None),
131
+ ("ernie4_5_moe", "LlamaTokenizerFast" if is_tokenizers_available() else None),
115
132
  ("esm", "EsmTokenizer"),
116
133
  ("exaone4", "GPT2Tokenizer" if is_tokenizers_available() else None),
117
- ("falcon_mamba", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
134
+ ("falcon", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
135
+ ("falcon_mamba", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
118
136
  ("fastspeech2_conformer", "FastSpeech2ConformerTokenizer" if is_g2p_en_available() else None),
119
137
  ("flaubert", "FlaubertTokenizer"),
120
138
  ("flava", "BertTokenizer" if is_tokenizers_available() else None),
121
139
  ("flex_olmo", "GPT2Tokenizer" if is_tokenizers_available() else None),
122
140
  ("florence2", "BartTokenizer" if is_tokenizers_available() else None),
123
- ("fnet", "FNetTokenizer" if is_tokenizers_available() else None),
141
+ ("fnet", "FNetTokenizerFast" if is_tokenizers_available() else None),
124
142
  ("fsmt", "FSMTTokenizer"),
125
143
  ("funnel", "FunnelTokenizer" if is_tokenizers_available() else None),
126
- ("gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
127
- ("gemma2", "GemmaTokenizer" if is_tokenizers_available() else None),
128
- ("gemma3", "GemmaTokenizer" if is_tokenizers_available() else None),
129
- ("gemma3_text", "GemmaTokenizer" if is_tokenizers_available() else None),
130
- ("gemma3n", "GemmaTokenizer" if is_tokenizers_available() else None),
131
- ("gemma3n_text", "GemmaTokenizer" if is_tokenizers_available() else None),
144
+ ("gemma", "GemmaTokenizerFast" if is_tokenizers_available() else None),
145
+ ("gemma2", "GemmaTokenizerFast" if is_tokenizers_available() else None),
146
+ ("gemma3", "GemmaTokenizerFast" if is_tokenizers_available() else None),
147
+ ("gemma3_text", "GemmaTokenizerFast" if is_tokenizers_available() else None),
148
+ ("gemma3n", "GemmaTokenizerFast" if is_tokenizers_available() else None),
149
+ ("gemma3n_text", "GemmaTokenizerFast" if is_tokenizers_available() else None),
132
150
  ("git", "BertTokenizer" if is_tokenizers_available() else None),
133
- ("glm", "TokenizersBackend" if is_tokenizers_available() else None),
134
- ("glm4", "TokenizersBackend" if is_tokenizers_available() else None),
135
- ("glm4_moe", "TokenizersBackend" if is_tokenizers_available() else None),
136
- ("glm4_moe_lite", "TokenizersBackend" if is_tokenizers_available() else None),
137
- ("glm4v", "TokenizersBackend" if is_tokenizers_available() else None),
138
- ("glm4v_moe", "TokenizersBackend" if is_tokenizers_available() else None),
139
- ("glm_image", "TokenizersBackend" if is_tokenizers_available() else None),
140
- ("glmasr", "TokenizersBackend" if is_tokenizers_available() else None),
141
- ("got_ocr2", "TokenizersBackend" if is_tokenizers_available() else None),
151
+ ("glm", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
152
+ ("glm4", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
153
+ ("glm4_moe", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
154
+ ("glm4v", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
155
+ ("glm4v_moe", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
156
+ ("got_ocr2", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
142
157
  ("gpt-sw3", "GPTSw3Tokenizer" if is_sentencepiece_available() else None),
143
158
  ("gpt2", "GPT2Tokenizer" if is_tokenizers_available() else None),
144
159
  ("gpt_bigcode", "GPT2Tokenizer" if is_tokenizers_available() else None),
145
160
  ("gpt_neo", "GPT2Tokenizer" if is_tokenizers_available() else None),
146
161
  ("gpt_neox", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
147
162
  ("gpt_neox_japanese", "GPTNeoXJapaneseTokenizer"),
163
+ ("gpt_oss", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
148
164
  ("gptj", "GPT2Tokenizer" if is_tokenizers_available() else None),
149
165
  ("granite", "GPT2Tokenizer"),
150
166
  ("granitemoe", "GPT2Tokenizer"),
151
167
  ("granitemoehybrid", "GPT2Tokenizer"),
152
168
  ("granitemoeshared", "GPT2Tokenizer"),
153
169
  ("grounding-dino", "BertTokenizer" if is_tokenizers_available() else None),
154
- ("groupvit", "CLIPTokenizer" if is_tokenizers_available() else None),
170
+ ("groupvit", "CLIPTokenizerFast" if is_tokenizers_available() else None),
171
+ ("helium", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
155
172
  ("herbert", "HerbertTokenizer" if is_tokenizers_available() else None),
156
173
  ("hubert", "Wav2Vec2CTCTokenizer"),
157
174
  ("ibert", "RobertaTokenizer"),
158
- ("idefics", "LlamaTokenizer" if is_tokenizers_available() else None),
159
- ("idefics2", "LlamaTokenizer" if is_tokenizers_available() else None),
175
+ ("idefics", "LlamaTokenizerFast" if is_tokenizers_available() else None),
176
+ ("idefics2", "LlamaTokenizerFast" if is_tokenizers_available() else None),
177
+ ("idefics3", "LlamaTokenizerFast" if is_tokenizers_available() else None),
160
178
  ("instructblip", "GPT2Tokenizer" if is_tokenizers_available() else None),
161
179
  ("instructblipvideo", "GPT2Tokenizer" if is_tokenizers_available() else None),
162
- ("internvl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
163
- ("jais2", "GPT2Tokenizer" if is_tokenizers_available() else None),
180
+ ("internvl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
181
+ ("jamba", "LlamaTokenizerFast" if is_tokenizers_available() else None),
182
+ ("janus", "LlamaTokenizerFast" if is_tokenizers_available() else None),
183
+ ("jetmoe", "LlamaTokenizerFast" if is_tokenizers_available() else None),
164
184
  ("kosmos-2", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
165
- ("lasr_ctc", "ParakeetTokenizer" if is_tokenizers_available() else None),
166
- ("lasr_encoder", "ParakeetTokenizer" if is_tokenizers_available() else None),
185
+ ("kosmos-2.5", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
167
186
  ("layoutlm", "BertTokenizer" if is_tokenizers_available() else None),
168
187
  ("layoutlmv2", "LayoutLMv2Tokenizer" if is_tokenizers_available() else None),
169
188
  ("layoutlmv3", "LayoutLMv3Tokenizer" if is_tokenizers_available() else None),
170
189
  ("layoutxlm", "LayoutXLMTokenizer" if is_tokenizers_available() else None),
171
190
  ("led", "LEDTokenizer" if is_tokenizers_available() else None),
172
- ("lighton_ocr", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
191
+ ("lfm2_vl", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
173
192
  ("lilt", "RobertaTokenizer" if is_tokenizers_available() else None),
193
+ ("llama", "LlamaTokenizer" if is_tokenizers_available() else None),
194
+ ("llama4", "LlamaTokenizerFast" if is_tokenizers_available() else None),
195
+ ("llama4_text", "LlamaTokenizerFast" if is_tokenizers_available() else None),
196
+ ("llava", "LlamaTokenizerFast" if is_tokenizers_available() else None),
197
+ ("llava_next", "LlamaTokenizerFast" if is_tokenizers_available() else None),
198
+ ("llava_next_video", "LlamaTokenizerFast" if is_tokenizers_available() else None),
199
+ ("llava_onevision", "LlamaTokenizerFast" if is_tokenizers_available() else None),
174
200
  ("longformer", "RobertaTokenizer" if is_tokenizers_available() else None),
175
201
  ("longt5", "T5Tokenizer" if is_tokenizers_available() else None),
176
202
  ("luke", "LukeTokenizer"),
177
203
  ("lxmert", "LxmertTokenizer" if is_tokenizers_available() else None),
178
204
  ("m2m_100", "M2M100Tokenizer" if is_sentencepiece_available() else None),
179
- ("mamba", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
180
- ("mamba2", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
205
+ ("mamba", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
206
+ ("mamba2", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
181
207
  ("marian", "MarianTokenizer" if is_sentencepiece_available() else None),
182
- ("markuplm", "MarkupLMTokenizer" if is_tokenizers_available() else None),
183
208
  ("mbart", "MBartTokenizer" if is_tokenizers_available() else None),
184
209
  ("mbart50", "MBart50Tokenizer" if is_tokenizers_available() else None),
185
210
  ("mega", "RobertaTokenizer"),
186
211
  ("megatron-bert", "BertTokenizer" if is_tokenizers_available() else None),
187
- ("metaclip_2", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
212
+ ("metaclip_2", "XLMRobertaTokenizerFast" if is_tokenizers_available() else None),
188
213
  ("mgp-str", "MgpstrTokenizer"),
214
+ ("minimax", "GPT2Tokenizer" if is_tokenizers_available() else None),
189
215
  (
190
216
  "ministral3",
191
- "MistralCommonBackend"
192
- if is_mistral_common_available()
193
- else ("TokenizersBackend" if is_tokenizers_available() else None),
217
+ (
218
+ "MistralCommonBackend"
219
+ if is_mistral_common_available()
220
+ else ("LlamaTokenizer" if is_sentencepiece_available() else None),
221
+ "LlamaTokenizerFast" if is_tokenizers_available() and not is_mistral_common_available() else None,
222
+ ),
194
223
  ),
195
224
  (
196
225
  "mistral",
197
226
  "MistralCommonBackend"
198
227
  if is_mistral_common_available()
199
- else ("TokenizersBackend" if is_tokenizers_available() else None),
228
+ else ("LlamaTokenizerFast" if is_tokenizers_available() else None),
200
229
  ),
201
230
  (
202
231
  "mistral3",
203
- "MistralCommonBackend"
204
- if is_mistral_common_available()
205
- else ("TokenizersBackend" if is_tokenizers_available() else None),
232
+ (
233
+ "MistralCommonBackend"
234
+ if is_mistral_common_available()
235
+ else ("LlamaTokenizer" if is_sentencepiece_available() else None),
236
+ "LlamaTokenizerFast" if is_tokenizers_available() and not is_mistral_common_available() else None,
237
+ ),
206
238
  ),
207
239
  (
208
240
  "mixtral",
209
241
  "MistralCommonBackend"
210
242
  if is_mistral_common_available()
211
- else ("TokenizersBackend" if is_tokenizers_available() else None),
243
+ else ("LlamaTokenizerFast" if is_tokenizers_available() else None),
212
244
  ),
245
+ ("mllama", "LlamaTokenizerFast" if is_tokenizers_available() else None),
213
246
  ("mluke", "MLukeTokenizer" if is_sentencepiece_available() else None),
214
247
  ("mm-grounding-dino", "BertTokenizer" if is_tokenizers_available() else None),
215
248
  ("mobilebert", "MobileBertTokenizer" if is_tokenizers_available() else None),
249
+ ("modernbert", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
250
+ ("moonshine", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
251
+ ("moshi", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
216
252
  ("mpnet", "MPNetTokenizer" if is_tokenizers_available() else None),
217
- ("mpt", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
253
+ ("mpt", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
218
254
  ("mra", "RobertaTokenizer"),
219
255
  ("mt5", "T5Tokenizer" if is_tokenizers_available() else None),
220
256
  ("musicgen", "T5Tokenizer" if is_tokenizers_available() else None),
221
257
  ("musicgen_melody", "T5Tokenizer" if is_tokenizers_available() else None),
222
258
  ("mvp", "MvpTokenizer" if is_tokenizers_available() else None),
223
259
  ("myt5", "MyT5Tokenizer"),
260
+ ("nemotron", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
224
261
  ("nezha", "BertTokenizer" if is_tokenizers_available() else None),
225
262
  ("nllb", "NllbTokenizer" if is_tokenizers_available() else None),
226
263
  ("nllb-moe", "NllbTokenizer" if is_tokenizers_available() else None),
227
264
  ("nougat", "NougatTokenizer" if is_tokenizers_available() else None),
228
- ("nystromformer", "AlbertTokenizer" if is_tokenizers_available() else None),
229
- ("olmo", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
230
- ("olmo2", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
265
+ ("nystromformer", "AlbertTokenizerFast" if is_tokenizers_available() else None),
266
+ ("olmo", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
267
+ ("olmo2", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
231
268
  ("olmo3", "GPT2Tokenizer" if is_tokenizers_available() else None),
232
- ("olmoe", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
233
- ("omdet-turbo", "CLIPTokenizer" if is_tokenizers_available() else None),
234
- ("oneformer", "CLIPTokenizer" if is_tokenizers_available() else None),
269
+ ("olmoe", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
270
+ ("omdet-turbo", "CLIPTokenizerFast" if is_tokenizers_available() else None),
271
+ ("oneformer", "CLIPTokenizerFast" if is_tokenizers_available() else None),
235
272
  ("openai-gpt", "OpenAIGPTTokenizer" if is_tokenizers_available() else None),
236
273
  ("opt", "GPT2Tokenizer" if is_tokenizers_available() else None),
237
- ("ovis2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
238
- ("owlv2", "CLIPTokenizer" if is_tokenizers_available() else None),
239
- ("owlvit", "CLIPTokenizer" if is_tokenizers_available() else None),
274
+ ("ovis2", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
275
+ ("owlv2", "CLIPTokenizerFast" if is_tokenizers_available() else None),
276
+ ("owlvit", "CLIPTokenizerFast" if is_tokenizers_available() else None),
277
+ ("paligemma", "LlamaTokenizerFast" if is_tokenizers_available() else None),
240
278
  ("pegasus", "PegasusTokenizer" if is_tokenizers_available() else None),
241
279
  ("pegasus_x", "PegasusTokenizer" if is_tokenizers_available() else None),
242
280
  ("perceiver", "PerceiverTokenizer"),
281
+ ("persimmon", "LlamaTokenizerFast" if is_tokenizers_available() else None),
243
282
  ("phi", "GPT2Tokenizer" if is_tokenizers_available() else None),
283
+ ("phi3", "LlamaTokenizerFast" if is_tokenizers_available() else None),
284
+ ("phimoe", "LlamaTokenizerFast" if is_tokenizers_available() else None),
244
285
  ("phobert", "PhobertTokenizer"),
245
286
  ("pix2struct", "T5Tokenizer" if is_tokenizers_available() else None),
246
287
  (
247
288
  "pixtral",
248
289
  "MistralCommonBackend"
249
290
  if is_mistral_common_available()
250
- else ("TokenizersBackend" if is_tokenizers_available() else None),
291
+ else ("PreTrainedTokenizerFast" if is_tokenizers_available() else None),
251
292
  ),
252
293
  ("plbart", "PLBartTokenizer" if is_tokenizers_available() else None),
253
294
  ("prophetnet", "ProphetNetTokenizer"),
254
295
  ("qdqbert", "BertTokenizer" if is_tokenizers_available() else None),
255
- ("qwen2", "Qwen2Tokenizer" if is_tokenizers_available() else None),
256
- ("qwen2_5_omni", "Qwen2Tokenizer" if is_tokenizers_available() else None),
257
- ("qwen2_5_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
258
- ("qwen2_audio", "Qwen2Tokenizer" if is_tokenizers_available() else None),
259
- ("qwen2_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
260
- ("qwen2_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
261
- ("qwen3", "Qwen2Tokenizer" if is_tokenizers_available() else None),
262
- ("qwen3_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
263
- ("qwen3_next", "Qwen2Tokenizer" if is_tokenizers_available() else None),
264
- ("qwen3_omni_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
265
- ("qwen3_vl", "Qwen2Tokenizer" if is_tokenizers_available() else None),
266
- ("qwen3_vl_moe", "Qwen2Tokenizer" if is_tokenizers_available() else None),
296
+ ("qwen2", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
297
+ ("qwen2_5_omni", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
298
+ ("qwen2_5_vl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
299
+ ("qwen2_audio", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
300
+ ("qwen2_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
301
+ ("qwen2_vl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
302
+ ("qwen3", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
303
+ ("qwen3_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
304
+ ("qwen3_next", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
305
+ ("qwen3_omni_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
306
+ ("qwen3_vl", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
307
+ ("qwen3_vl_moe", "Qwen2TokenizerFast" if is_tokenizers_available() else None),
267
308
  ("rag", "RagTokenizer"),
268
309
  ("realm", "BertTokenizer" if is_tokenizers_available() else None),
269
- ("recurrent_gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
310
+ ("recurrent_gemma", "GemmaTokenizerFast" if is_tokenizers_available() else None),
270
311
  ("reformer", "ReformerTokenizer" if is_tokenizers_available() else None),
271
312
  ("rembert", "RemBertTokenizer" if is_tokenizers_available() else None),
272
313
  ("retribert", "BertTokenizer" if is_tokenizers_available() else None),
273
314
  ("roberta", "RobertaTokenizer"),
274
315
  ("roberta-prelayernorm", "RobertaTokenizer"),
275
316
  ("roc_bert", "RoCBertTokenizer"),
276
- ("roformer", "RoFormerTokenizer" if is_tokenizers_available() else None),
277
- ("rwkv", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
278
- ("sam3", "CLIPTokenizer" if is_tokenizers_available() else None),
279
- ("sam3_video", "CLIPTokenizer" if is_tokenizers_available() else None),
317
+ ("roformer", "RoFormerTokenizerFast" if is_tokenizers_available() else None),
318
+ ("rwkv", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
280
319
  ("seamless_m4t", "SeamlessM4TTokenizer" if is_tokenizers_available() else None),
281
320
  ("seamless_m4t_v2", "SeamlessM4TTokenizer" if is_tokenizers_available() else None),
282
- ("shieldgemma2", "GemmaTokenizer" if is_tokenizers_available() else None),
321
+ ("shieldgemma2", "GemmaTokenizerFast" if is_tokenizers_available() else None),
283
322
  ("siglip", "SiglipTokenizer" if is_sentencepiece_available() else None),
284
- ("siglip2", "GemmaTokenizer" if is_tokenizers_available() else None),
323
+ ("siglip2", "GemmaTokenizerFast" if is_tokenizers_available() else None),
324
+ ("smollm3", "PreTrainedTokenizerFast" if is_tokenizers_available() else None),
285
325
  ("speech_to_text", "Speech2TextTokenizer" if is_sentencepiece_available() else None),
286
326
  ("speecht5", "SpeechT5Tokenizer" if is_sentencepiece_available() else None),
287
327
  ("splinter", "SplinterTokenizer"),
288
328
  ("squeezebert", "BertTokenizer" if is_tokenizers_available() else None),
289
- ("stablelm", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
329
+ ("stablelm", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
290
330
  ("starcoder2", "GPT2Tokenizer" if is_tokenizers_available() else None),
291
331
  ("switch_transformers", "T5Tokenizer" if is_tokenizers_available() else None),
292
332
  ("t5", "T5Tokenizer" if is_tokenizers_available() else None),
293
- ("t5gemma", "GemmaTokenizer" if is_tokenizers_available() else None),
333
+ ("t5gemma", "GemmaTokenizerFast" if is_tokenizers_available() else None),
294
334
  ("tapas", "TapasTokenizer"),
295
335
  ("trocr", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
296
336
  ("tvp", "BertTokenizer" if is_tokenizers_available() else None),
297
337
  ("udop", "UdopTokenizer" if is_tokenizers_available() else None),
298
338
  ("umt5", "T5Tokenizer" if is_tokenizers_available() else None),
299
- ("unispeech", "Wav2Vec2CTCTokenizer"),
300
- ("unispeech-sat", "Wav2Vec2CTCTokenizer"),
339
+ ("video_llava", "LlamaTokenizerFast" if is_tokenizers_available() else None),
301
340
  ("vilt", "BertTokenizer" if is_tokenizers_available() else None),
341
+ ("vipllava", "LlamaTokenizerFast" if is_tokenizers_available() else None),
302
342
  ("visual_bert", "BertTokenizer" if is_tokenizers_available() else None),
303
343
  ("vits", "VitsTokenizer"),
304
344
  (
305
345
  "voxtral",
306
346
  "MistralCommonBackend"
307
347
  if is_mistral_common_available()
308
- else ("TokenizersBackend" if is_tokenizers_available() else None),
348
+ else ("PreTrainedTokenizerFast" if is_tokenizers_available() else None),
309
349
  ),
310
350
  ("wav2vec2", "Wav2Vec2CTCTokenizer"),
311
351
  ("wav2vec2-bert", "Wav2Vec2CTCTokenizer"),
312
352
  ("wav2vec2-conformer", "Wav2Vec2CTCTokenizer"),
313
353
  ("wav2vec2_phoneme", "Wav2Vec2PhonemeCTCTokenizer"),
314
354
  ("whisper", "WhisperTokenizer" if is_tokenizers_available() else None),
315
- ("xclip", "CLIPTokenizer" if is_tokenizers_available() else None),
355
+ ("xclip", "CLIPTokenizerFast" if is_tokenizers_available() else None),
316
356
  ("xglm", "XGLMTokenizer" if is_tokenizers_available() else None),
317
357
  ("xlm", "XLMTokenizer"),
318
358
  ("xlm-roberta", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
319
359
  ("xlm-roberta-xl", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
320
360
  ("xlnet", "XLNetTokenizer" if is_tokenizers_available() else None),
321
- ("xlstm", "GPTNeoXTokenizer" if is_tokenizers_available() else None),
322
- ("xmod", "XLMRobertaTokenizer" if is_tokenizers_available() else None),
361
+ ("xlstm", "GPTNeoXTokenizerFast" if is_tokenizers_available() else None),
362
+ ("xmod", "XLMRobertaTokenizerFast" if is_tokenizers_available() else None),
323
363
  ("yoso", "AlbertTokenizer" if is_tokenizers_available() else None),
364
+ ("zamba", "LlamaTokenizerFast" if is_tokenizers_available() else None),
365
+ ("zamba2", "LlamaTokenizerFast" if is_tokenizers_available() else None),
324
366
  ]
325
367
  )
326
368
 
@@ -346,18 +388,14 @@ def load_merges(merges_file):
346
388
  return merges
347
389
 
348
390
 
349
- def tokenizer_class_from_name(class_name: str) -> type[Any] | None:
350
- # Bloom tokenizer classes were removed but should map to the fast backend for BC
351
- if class_name in {"BloomTokenizer", "BloomTokenizerFast"}:
352
- return TokenizersBackend
353
-
391
+ def tokenizer_class_from_name(class_name: str) -> Union[type[Any], None]:
354
392
  if class_name in REGISTERED_FAST_ALIASES:
355
393
  return REGISTERED_FAST_ALIASES[class_name]
356
394
 
357
395
  if class_name in REGISTERED_TOKENIZER_CLASSES:
358
396
  return REGISTERED_TOKENIZER_CLASSES[class_name]
359
397
 
360
- if class_name == "TokenizersBackend":
398
+ if class_name == "PreTrainedTokenizerFast":
361
399
  return TokenizersBackend
362
400
 
363
401
  # V5: TOKENIZER_MAPPING_NAMES now maps to single strings, not tuples
@@ -366,7 +404,7 @@ def tokenizer_class_from_name(class_name: str) -> type[Any] | None:
366
404
  module_name = model_type_to_module_name(module_name)
367
405
  if (
368
406
  module_name in ["mistral", "mistral3", "mixtral", "ministral", "ministral3", "pixtral", "voxtral"]
369
- and class_name == "MistralCommonBackend"
407
+ and class_name == "MistralCommonTokenizer"
370
408
  ):
371
409
  module = importlib.import_module(".tokenization_mistral_common", "transformers")
372
410
  else:
@@ -390,13 +428,409 @@ def tokenizer_class_from_name(class_name: str) -> type[Any] | None:
390
428
  return None
391
429
 
392
430
 
431
+ def _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs):
432
+ # Delegate to shared helper to avoid duplication
433
+ return find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
434
+
435
+
436
+ def _load_tokenizers_backend(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs):
437
+ """
438
+ Load a tokenizer using only the tokenizers backend (no SentencePiece fallback).
439
+
440
+ This function attempts to load with the following priority:
441
+ 1. If tokenizer.json exists, load directly
442
+ 2. If any .model file (SPM) exists, try extracting vocab and merges
443
+ 3. If vocab.json and merges.txt exist, load with those
444
+ 4. If vocab.txt exists (WordPiece models), load with that
445
+
446
+ Args:
447
+ tokenizer_class: The tokenizer class to instantiate
448
+ pretrained_model_name_or_path: Path or model id
449
+ inputs: Additional positional arguments for tokenizer init
450
+ kwargs: Additional keyword arguments
451
+
452
+ Returns:
453
+ An instantiated tokenizer object
454
+
455
+ Raises:
456
+ ValueError: If tokenizer could not be loaded with tokenizers backend
457
+ """
458
+ files_loaded = []
459
+
460
+ # Try tokenizer.json first
461
+ try:
462
+ tokenizer_json_exists = has_file(
463
+ pretrained_model_name_or_path,
464
+ "tokenizer.json",
465
+ revision=kwargs.get("revision"),
466
+ token=kwargs.get("token"),
467
+ cache_dir=kwargs.get("cache_dir"),
468
+ local_files_only=kwargs.get("local_files_only", False),
469
+ )
470
+ except Exception:
471
+ tokenizer_json_exists = False
472
+
473
+ if tokenizer_json_exists:
474
+ files_loaded.append("tokenizer.json")
475
+ kwargs["backend"] = "tokenizers"
476
+ kwargs["files_loaded"] = files_loaded
477
+ # Some old models have uploaded a tokenizer.json but haven't updated tokenizer_config.json to point to the correct tokenizer class
478
+ tokenizer_class = (
479
+ TokenizersBackend
480
+ if tokenizer_class.__name__ in ("PythonBackend", "PreTrainedTokenizer")
481
+ else tokenizer_class
482
+ )
483
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
484
+
485
+ # Try tekken.json (Mistral format)
486
+ try:
487
+ if has_file(
488
+ pretrained_model_name_or_path,
489
+ "tekken.json",
490
+ revision=kwargs.get("revision"),
491
+ token=kwargs.get("token"),
492
+ cache_dir=kwargs.get("cache_dir"),
493
+ local_files_only=kwargs.get("local_files_only", False),
494
+ ):
495
+ from ...integrations.mistral import convert_tekken_tokenizer
496
+
497
+ tekken_file = cached_file(
498
+ pretrained_model_name_or_path,
499
+ "tekken.json",
500
+ **{
501
+ k: v
502
+ for k, v in kwargs.items()
503
+ if k
504
+ in ["cache_dir", "force_download", "proxies", "token", "revision", "local_files_only", "subfolder"]
505
+ },
506
+ )
507
+ if tekken_file is not None:
508
+ files_loaded.append("tekken.json")
509
+ kwargs["backend"] = "tokenizers"
510
+ kwargs["files_loaded"] = files_loaded
511
+ return convert_tekken_tokenizer(tekken_file)
512
+ except (ImportError, Exception):
513
+ pass
514
+
515
+ # Try extracting from SentencePiece model
516
+ spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
517
+ if spm_file is not None:
518
+ try:
519
+ resolved_spm = cached_file(
520
+ pretrained_model_name_or_path,
521
+ spm_file,
522
+ cache_dir=kwargs.get("cache_dir"),
523
+ force_download=kwargs.get("force_download", False),
524
+ proxies=kwargs.get("proxies"),
525
+ token=kwargs.get("token"),
526
+ revision=kwargs.get("revision"),
527
+ local_files_only=kwargs.get("local_files_only", False),
528
+ subfolder=kwargs.get("subfolder", ""),
529
+ )
530
+ except Exception:
531
+ resolved_spm = None
532
+
533
+ if resolved_spm is not None:
534
+ try:
535
+ from ...tokenization_utils_sentencepiece import SentencePieceExtractor
536
+
537
+ fast_sig = inspect.signature(getattr(tokenizer_class, "__init__", tokenizer_class))
538
+ if "vocab" in fast_sig.parameters:
539
+ try:
540
+ vocab_ids, vocab_scores, merges = SentencePieceExtractor(resolved_spm).extract()
541
+ files_loaded.append(spm_file)
542
+ kwargs["backend"] = "tokenizers"
543
+ kwargs["files_loaded"] = files_loaded
544
+ # If tokenizer needs both vocab and merges (BPE models)
545
+ if "merges" in fast_sig.parameters:
546
+ return tokenizer_class.from_pretrained(
547
+ pretrained_model_name_or_path, *inputs, vocab=vocab_scores, merges=merges, **kwargs
548
+ )
549
+ # If tokenizer only needs vocab (Unigram models like NLLB, SeamlessM4T)
550
+ else:
551
+ return tokenizer_class.from_pretrained(
552
+ pretrained_model_name_or_path, *inputs, vocab=vocab_scores, **kwargs
553
+ )
554
+ except Exception:
555
+ pass
556
+ except ImportError as e:
557
+ if "sentencepiece" in str(e).lower() or "SentencePiece" in str(e):
558
+ raise ImportError(
559
+ f"This checkpoint only contains a SentencePiece model file ({spm_file}), but the `sentencepiece` library is not installed. "
560
+ f"Please install sentencepiece to load this tokenizer: `pip install sentencepiece`"
561
+ ) from e
562
+ raise
563
+ except Exception:
564
+ pass
565
+
566
+ vocab, merges, loaded = load_vocab_and_merges(pretrained_model_name_or_path, **kwargs)
567
+ if vocab is not None:
568
+ files_loaded.extend(loaded)
569
+ if issubclass(tokenizer_class, PreTrainedTokenizer):
570
+ kwargs["backend"] = "python"
571
+ else:
572
+ kwargs["backend"] = "tokenizers"
573
+ kwargs["files_loaded"] = files_loaded
574
+ if merges is not None:
575
+ return tokenizer_class.from_pretrained(
576
+ pretrained_model_name_or_path, *inputs, vocab=vocab, merges=merges, **kwargs
577
+ )
578
+ else:
579
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, vocab=vocab, **kwargs)
580
+
581
+ # Try vocab.txt (WordPiece models like SplinterTokenizer)
582
+ try:
583
+ resolved_vocab_txt = cached_file(
584
+ pretrained_model_name_or_path,
585
+ "vocab.txt",
586
+ cache_dir=kwargs.get("cache_dir"),
587
+ force_download=kwargs.get("force_download", False),
588
+ proxies=kwargs.get("proxies"),
589
+ token=kwargs.get("token"),
590
+ revision=kwargs.get("revision"),
591
+ local_files_only=kwargs.get("local_files_only", False),
592
+ subfolder=kwargs.get("subfolder", ""),
593
+ )
594
+ except Exception:
595
+ resolved_vocab_txt = None
596
+
597
+ if resolved_vocab_txt is not None:
598
+ try:
599
+ fast_sig = inspect.signature(getattr(tokenizer_class, "__init__", tokenizer_class))
600
+ if "vocab" in fast_sig.parameters:
601
+ # Load vocab.txt: each line is a token, line number is the ID
602
+ vocab = OrderedDict()
603
+ with open(resolved_vocab_txt, "r", encoding="utf-8") as reader:
604
+ tokens = reader.readlines()
605
+ for index, token in enumerate(tokens):
606
+ token = token.rstrip("\n")
607
+ vocab[token] = index
608
+ files_loaded.append("vocab.txt")
609
+ kwargs["backend"] = "tokenizers"
610
+ kwargs["files_loaded"] = files_loaded
611
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, vocab=vocab, **kwargs)
612
+ except Exception:
613
+ pass
614
+
615
+ # If all methods failed, raise an error
616
+ raise ValueError(
617
+ f"Could not load tokenizer from {pretrained_model_name_or_path} using tokenizers backend. "
618
+ "No tokenizer.json, tekken.json, vocab.json+merges.txt, vocab.txt, or compatible SentencePiece model found."
619
+ )
620
+
621
+
622
+ def _try_load_tokenizer_with_fallbacks(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs):
623
+ """
624
+ Try to load a tokenizer with backend selection.
625
+
626
+ This function routes to the appropriate backend based on the 'backend' parameter:
627
+ - "tokenizers" (default): Uses HuggingFace tokenizers library backend
628
+ - "sentencepiece": Uses SentencePiece backend
629
+
630
+ For the tokenizers backend, attempts to load with the following priority:
631
+ 1. If tokenizer.json exists, load directly
632
+ 2. If any .model file (SPM) exists, try extracting vocab and merges
633
+ 3. If vocab.json and merges.txt exist, load with those
634
+ 4. Fallback to SentencePieceBackend if available
635
+
636
+ Args:
637
+ tokenizer_class: The tokenizer class to instantiate (can be None)
638
+ pretrained_model_name_or_path: Path or model id
639
+ inputs: Additional positional arguments for tokenizer init
640
+ kwargs: Additional keyword arguments (may include 'backend' parameter, defaults to "tokenizers")
641
+
642
+ Returns:
643
+ An instantiated tokenizer object
644
+
645
+ Raises:
646
+ ValueError: If no tokenizer could be loaded
647
+ """
648
+ # Extract the backend parameter - default to "tokenizers" to prioritize tokenizers backend
649
+ backend = kwargs.pop("backend", "tokenizers")
650
+
651
+ # Validate backend parameter
652
+ if backend not in ["sentencepiece", "tokenizers"]:
653
+ logger.warning(
654
+ f"Invalid backend '{backend}' specified. Valid options are 'tokenizers' or 'sentencepiece'. "
655
+ "Defaulting to 'tokenizers' backend."
656
+ )
657
+ backend = "tokenizers"
658
+
659
+ # Route to SentencePiece backend if requested
660
+ if backend == "sentencepiece":
661
+ if SentencePieceBackend is None:
662
+ raise ValueError(
663
+ "SentencePiece backend was requested but sentencepiece is not installed. "
664
+ "Please install it with: pip install sentencepiece"
665
+ )
666
+ logger.info("Loading tokenizer with SentencePiece backend")
667
+ # Track files loaded for SentencePiece backend
668
+ spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
669
+ files_loaded = [spm_file] if spm_file else []
670
+ kwargs["backend"] = "sentencepiece"
671
+ kwargs["files_loaded"] = files_loaded
672
+ # Resolve the SPM file path and pass it as vocab_file
673
+ if spm_file is not None:
674
+ resolved_vocab_file = cached_file(
675
+ pretrained_model_name_or_path,
676
+ spm_file,
677
+ cache_dir=kwargs.get("cache_dir"),
678
+ force_download=kwargs.get("force_download", False),
679
+ proxies=kwargs.get("proxies"),
680
+ token=kwargs.get("token"),
681
+ revision=kwargs.get("revision"),
682
+ local_files_only=kwargs.get("local_files_only", False),
683
+ subfolder=kwargs.get("subfolder", ""),
684
+ )
685
+ kwargs["vocab_file"] = resolved_vocab_file
686
+ if isinstance(tokenizer_class, type) and issubclass(tokenizer_class, SentencePieceBackend):
687
+ logger.info("Loading tokenizer with SentencePiece backend using tokenizer class")
688
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
689
+ return SentencePieceBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
690
+
691
+ # Route to tokenizers backend (default)
692
+ if backend == "tokenizers":
693
+ if tokenizer_class is not None:
694
+ # Check if tokenizer_class inherits from PreTrainedTokenizer (but not from TokenizersBackend/SentencePieceBackend)
695
+ # These are edge cases with custom logic (e.g., BioGptTokenizer with Moses tokenization)
696
+ from ...tokenization_python import PreTrainedTokenizer
697
+
698
+ # Build list of backend classes to check against
699
+ backend_classes = [TokenizersBackend] if TokenizersBackend else []
700
+ if SentencePieceBackend:
701
+ backend_classes.append(SentencePieceBackend)
702
+
703
+ # Check if it's a custom PreTrainedTokenizer (not a backend class)
704
+ is_custom_pre_trained = (
705
+ isinstance(tokenizer_class, type)
706
+ and issubclass(tokenizer_class, PreTrainedTokenizer)
707
+ and not any(issubclass(tokenizer_class, bc) for bc in backend_classes)
708
+ and tokenizer_class.__name__ not in ("PythonBackend", "PreTrainedTokenizer")
709
+ )
710
+
711
+ # Check if it's a completely custom tokenizer (not PreTrainedTokenizer, not backend class)
712
+ # e.g., MistralCommonBackend which has its own from_pretrained logic
713
+ inherits_from_backend = isinstance(tokenizer_class, type) and any(
714
+ bc and issubclass(tokenizer_class, bc) for bc in backend_classes
715
+ )
716
+ is_completely_custom = (
717
+ isinstance(tokenizer_class, type)
718
+ and not issubclass(tokenizer_class, PythonBackend)
719
+ and not inherits_from_backend
720
+ )
721
+
722
+ if is_custom_pre_trained:
723
+ logger.info("Loading tokenizer with custom PreTrainedTokenizer backend (edge case)")
724
+ # Track the backend type for custom tokenizers
725
+ kwargs["backend"] = "custom"
726
+ kwargs["files_loaded"] = [] # Custom tokenizers may load various files
727
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
728
+
729
+ if is_completely_custom:
730
+ # For completely custom tokenizers (like MistralCommonBackend), try calling from_pretrained directly
731
+ logger.info("Loading tokenizer with custom tokenizer class (non-PreTrainedTokenizer)")
732
+ # Filter out AutoTokenizer-specific kwargs that custom tokenizers don't accept
733
+ custom_kwargs = {k: v for k, v in kwargs.items() if k not in ["backend", "files_loaded"]}
734
+ custom_kwargs["_from_auto"] = True # Signal that this is called from AutoTokenizer
735
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **custom_kwargs)
736
+
737
+ if TokenizersBackend is None:
738
+ raise ValueError(
739
+ "Tokenizers backend is the default but tokenizers library is not installed. "
740
+ "Please install it with: pip install tokenizers"
741
+ )
742
+ logger.info("Loading tokenizer with tokenizers backend")
743
+ try:
744
+ return _load_tokenizers_backend(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs)
745
+ except ValueError as e:
746
+ # If tokenizers backend fails, try falling back to SentencePiece backend if available
747
+ spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
748
+ if spm_file is not None and SentencePieceBackend is not None:
749
+ logger.info(
750
+ f"Tokenizers backend failed: {e}. "
751
+ f"Falling back to SentencePieceBackend since {spm_file} file was found."
752
+ )
753
+ files_loaded = [spm_file]
754
+ kwargs["backend"] = "sentencepiece"
755
+ kwargs["files_loaded"] = files_loaded
756
+ # Resolve the SPM file path and pass it as vocab_file
757
+ resolved_vocab_file = cached_file(
758
+ pretrained_model_name_or_path,
759
+ spm_file,
760
+ cache_dir=kwargs.get("cache_dir"),
761
+ force_download=kwargs.get("force_download", False),
762
+ proxies=kwargs.get("proxies"),
763
+ token=kwargs.get("token"),
764
+ revision=kwargs.get("revision"),
765
+ local_files_only=kwargs.get("local_files_only", False),
766
+ subfolder=kwargs.get("subfolder", ""),
767
+ )
768
+ kwargs["vocab_file"] = resolved_vocab_file
769
+ if tokenizer_class is not None and issubclass(tokenizer_class, SentencePieceBackend):
770
+ logger.info(
771
+ "Falling back to SentencePiece backend using tokenizer class that inherits from SentencePieceBackend."
772
+ )
773
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
774
+ return SentencePieceBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
775
+ # If no fallback available, try calling tokenizer class directly as last resort
776
+ if hasattr(tokenizer_class, "from_pretrained"):
777
+ logger.info(
778
+ f"Tokenizers backend failed: {e}. Trying to load tokenizer directly from tokenizer class."
779
+ )
780
+ # Filter out AutoTokenizer-specific kwargs that custom tokenizers don't accept
781
+ custom_kwargs = {k: v for k, v in kwargs.items() if k not in ["backend", "files_loaded"]}
782
+ custom_kwargs["_from_auto"] = True # Signal that this is called from AutoTokenizer
783
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **custom_kwargs)
784
+ # Re-raise if no fallback options available
785
+ raise
786
+
787
+ # If no tokenizer class but tokenizers backend requested, fall back to SentencePiece if available
788
+ spm_file = _find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs)
789
+ if spm_file is not None and SentencePieceBackend is not None:
790
+ logger.info(
791
+ f"Tokenizers backend was requested but no tokenizer class found. "
792
+ f"Falling back to SentencePieceBackend since {spm_file} file was found."
793
+ )
794
+ files_loaded = [spm_file]
795
+ kwargs["backend"] = "sentencepiece"
796
+ kwargs["files_loaded"] = files_loaded
797
+ # Resolve the SPM file path and pass it as vocab_file
798
+ resolved_vocab_file = cached_file(
799
+ pretrained_model_name_or_path,
800
+ spm_file,
801
+ cache_dir=kwargs.get("cache_dir"),
802
+ force_download=kwargs.get("force_download", False),
803
+ proxies=kwargs.get("proxies"),
804
+ token=kwargs.get("token"),
805
+ revision=kwargs.get("revision"),
806
+ local_files_only=kwargs.get("local_files_only", False),
807
+ subfolder=kwargs.get("subfolder", ""),
808
+ )
809
+ kwargs["vocab_file"] = resolved_vocab_file
810
+ if (
811
+ tokenizer_class is not None
812
+ and SentencePieceBackend is not None
813
+ and issubclass(tokenizer_class, SentencePieceBackend)
814
+ ):
815
+ logger.info(
816
+ "Falling back to SentencePiece backend using tokenizer class that inherits from SentencePieceBackend."
817
+ )
818
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
819
+ return SentencePieceBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
820
+
821
+ raise ValueError(
822
+ f"Could not load tokenizer from {pretrained_model_name_or_path}. "
823
+ "No tokenizer class could be determined and no SentencePiece model found."
824
+ )
825
+
826
+
393
827
  def get_tokenizer_config(
394
- pretrained_model_name_or_path: str | os.PathLike[str],
395
- cache_dir: str | os.PathLike[str] | None = None,
828
+ pretrained_model_name_or_path: Union[str, os.PathLike[str]],
829
+ cache_dir: Optional[Union[str, os.PathLike[str]]] = None,
396
830
  force_download: bool = False,
397
- proxies: dict[str, str] | None = None,
398
- token: bool | str | None = None,
399
- revision: str | None = None,
831
+ proxies: Optional[dict[str, str]] = None,
832
+ token: Optional[Union[bool, str]] = None,
833
+ revision: Optional[str] = None,
400
834
  local_files_only: bool = False,
401
835
  subfolder: str = "",
402
836
  **kwargs,
@@ -504,7 +938,7 @@ class AutoTokenizer:
504
938
  @replace_list_option_in_docstrings(TOKENIZER_MAPPING_NAMES)
505
939
  def from_pretrained(
506
940
  cls, pretrained_model_name_or_path, *inputs, **kwargs
507
- ) -> TokenizersBackend | SentencePieceBackend:
941
+ ) -> Union[TokenizersBackend, SentencePieceBackend]:
508
942
  r"""
509
943
  Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.
510
944
 
@@ -582,6 +1016,18 @@ class AutoTokenizer:
582
1016
  >>> # Explicitly use the sentencepiece backend
583
1017
  >>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="sentencepiece")
584
1018
  ```"""
1019
+ use_auth_token = kwargs.pop("use_auth_token", None)
1020
+ if use_auth_token is not None:
1021
+ logger.warning(
1022
+ "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
1023
+ FutureWarning,
1024
+ )
1025
+ if kwargs.get("token") is not None:
1026
+ raise ValueError(
1027
+ "`token` and `use_auth_token` are both specified. Please set only the argument `token`."
1028
+ )
1029
+ kwargs["token"] = use_auth_token
1030
+
585
1031
  config = kwargs.pop("config", None)
586
1032
  kwargs["_from_auto"] = True
587
1033
 
@@ -608,25 +1054,11 @@ class AutoTokenizer:
608
1054
 
609
1055
  return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
610
1056
 
611
- if gguf_file:
612
- gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **kwargs)
613
- config_dict = load_gguf_checkpoint(gguf_path, return_tensors=False)["config"]
614
- config = AutoConfig.for_model(**config_dict)
615
- elif config is None:
616
- try:
617
- config = AutoConfig.from_pretrained(
618
- pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
619
- )
620
- except Exception:
621
- config = PreTrainedConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
622
-
623
- config_model_type = config.model_type
624
-
625
1057
  # Next, let's try to use the tokenizer_config file to get the tokenizer class.
626
1058
  tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
627
- tokenizer_config_class = tokenizer_config.get("tokenizer_class", None)
628
-
629
- # Check for auto_map early to handle dynamic tokenizers properly
1059
+ if "_commit_hash" in tokenizer_config:
1060
+ kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
1061
+ config_tokenizer_class = tokenizer_config.get("tokenizer_class")
630
1062
  tokenizer_auto_map = None
631
1063
  if "auto_map" in tokenizer_config:
632
1064
  if isinstance(tokenizer_config["auto_map"], (tuple, list)):
@@ -635,36 +1067,34 @@ class AutoTokenizer:
635
1067
  else:
636
1068
  tokenizer_auto_map = tokenizer_config["auto_map"].get("AutoTokenizer", None)
637
1069
 
638
- # if there is a config, we can check that the tokenizer class != than model class and can thus assume we need to use TokenizersBackend
639
- # Skip this early exit if auto_map is present (custom tokenizer with trust_remote_code)
1070
+ # If that did not work, let's try to use the config.
1071
+ if config_tokenizer_class is None:
1072
+ if not isinstance(config, PreTrainedConfig):
1073
+ if gguf_file:
1074
+ gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **kwargs)
1075
+ config_dict = load_gguf_checkpoint(gguf_path, return_tensors=False)["config"]
1076
+ config = AutoConfig.for_model(**config_dict)
1077
+ else:
1078
+ config = AutoConfig.from_pretrained(
1079
+ pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
1080
+ )
1081
+ config_tokenizer_class = config.tokenizer_class
1082
+ if hasattr(config, "auto_map") and "AutoTokenizer" in config.auto_map:
1083
+ tokenizer_auto_map = config.auto_map["AutoTokenizer"]
1084
+
640
1085
  if (
641
- tokenizer_auto_map is None
642
- and tokenizer_config_class is not None
643
- and config_model_type is not None
644
- and config_model_type != ""
645
- and TOKENIZER_MAPPING_NAMES.get(config_model_type, "").replace("Fast", "")
646
- != tokenizer_config_class.replace("Fast", "")
1086
+ config_tokenizer_class is not None
1087
+ and config_tokenizer_class != "PreTrainedTokenizerFast"
1088
+ and "Fast" in config_tokenizer_class
647
1089
  ):
648
- # new model, but we ignore it unless the model type is the same
649
- try:
650
- return TokenizersBackend.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
651
- except Exception:
652
- return tokenizer_class_from_name(tokenizer_config_class).from_pretrained(
653
- pretrained_model_name_or_path, *inputs, **kwargs
654
- )
655
-
656
- if "_commit_hash" in tokenizer_config:
657
- kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
658
-
659
- if tokenizer_config_class:
660
- tokenizer_config_class = tokenizer_config_class.replace("Fast", "")
1090
+ config_tokenizer_class = config_tokenizer_class[:-4]
661
1091
 
662
1092
  has_remote_code = tokenizer_auto_map is not None
663
1093
  has_local_code = type(config) in TOKENIZER_MAPPING or (
664
- tokenizer_config_class is not None
1094
+ config_tokenizer_class is not None
665
1095
  and (
666
- tokenizer_class_from_name(tokenizer_config_class) is not None
667
- or tokenizer_class_from_name(tokenizer_config_class + "Fast") is not None
1096
+ tokenizer_class_from_name(config_tokenizer_class) is not None
1097
+ or tokenizer_class_from_name(config_tokenizer_class + "Fast") is not None
668
1098
  )
669
1099
  )
670
1100
  if has_remote_code:
@@ -688,24 +1118,17 @@ class AutoTokenizer:
688
1118
  return tokenizer_class.from_pretrained(
689
1119
  pretrained_model_name_or_path, *inputs, trust_remote_code=trust_remote_code, **kwargs
690
1120
  )
691
- elif tokenizer_config_class is not None:
692
- tokenizer_class_candidate = tokenizer_config_class
693
- tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
694
- if tokenizer_class is None and not tokenizer_class_candidate.endswith("Fast"):
695
- tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate + "Fast")
696
- if tokenizer_class is not None and tokenizer_class.__name__ == "PythonBackend":
697
- tokenizer_class = TokenizersBackend
698
- # Fallback to TokenizersBackend if the class wasn't found
699
- if tokenizer_class is None:
700
- tokenizer_class = TokenizersBackend
1121
+ elif config_tokenizer_class is not None:
1122
+ fast_tokenizer_class = None
1123
+ if fast_tokenizer_class is None:
1124
+ tokenizer_class_candidate = config_tokenizer_class
1125
+ tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
1126
+ if tokenizer_class is None and not tokenizer_class_candidate.endswith("Fast"):
1127
+ tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate + "Fast")
1128
+ else:
1129
+ tokenizer_class = fast_tokenizer_class
701
1130
 
702
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
703
- elif getattr(config, "tokenizer_class", None):
704
- _class = config.tokenizer_class
705
- if "PreTrainedTokenizerFast" not in _class:
706
- _class = _class.replace("Fast", "")
707
- tokenizer_class = tokenizer_class_from_name(_class)
708
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
1131
+ return _try_load_tokenizer_with_fallbacks(tokenizer_class, pretrained_model_name_or_path, inputs, kwargs)
709
1132
 
710
1133
  # Otherwise we have to be creative.
711
1134
  # if model is an encoder decoder, the encoder tokenizer class is used by default
@@ -719,25 +1142,19 @@ class AutoTokenizer:
719
1142
  )
720
1143
  config = config.encoder
721
1144
 
722
- model_type = config_class_to_model_type(type(config).__name__) or getattr(config, "model_type", None)
1145
+ model_type = config_class_to_model_type(type(config).__name__)
723
1146
  if model_type is not None:
724
- tokenizer_class = TOKENIZER_MAPPING.get(type(config), TokenizersBackend)
725
- if tokenizer_class is not None:
726
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
1147
+ tokenizer_class = TOKENIZER_MAPPING[type(config)]
727
1148
 
728
- # Fallback: try tokenizer_class from tokenizer_config.json
729
- tokenizer_config_class = tokenizer_config.get("tokenizer_class", None)
730
- if tokenizer_config_class is not None:
731
- if tokenizer_config_class != "TokenizersBackend" and "Fast" in tokenizer_config_class:
732
- tokenizer_config_class = tokenizer_config_class[:-4]
733
- tokenizer_class = tokenizer_class_from_name(tokenizer_config_class)
734
- if tokenizer_class is None and not tokenizer_config_class.endswith("Fast"):
735
- tokenizer_class = tokenizer_class_from_name(tokenizer_config_class + "Fast")
736
- if tokenizer_class is not None and tokenizer_class.__name__ == "PythonBackend":
737
- tokenizer_class = TokenizersBackend
738
- if tokenizer_class is None:
739
- tokenizer_class = TokenizersBackend
740
- return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
1149
+ if tokenizer_class is not None:
1150
+ return _try_load_tokenizer_with_fallbacks(
1151
+ tokenizer_class, pretrained_model_name_or_path, inputs, kwargs
1152
+ )
1153
+ else:
1154
+ raise ValueError(
1155
+ "This tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed "
1156
+ "in order to use this tokenizer."
1157
+ )
741
1158
 
742
1159
  raise ValueError(
743
1160
  f"Unrecognized configuration class {config.__class__} to build an AutoTokenizer.\n"