transformers 5.0.0rc2__py3-none-any.whl → 5.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1594) hide show
  1. transformers/__init__.py +11 -37
  2. transformers/activations.py +2 -2
  3. transformers/audio_utils.py +32 -32
  4. transformers/backbone_utils.py +326 -0
  5. transformers/cache_utils.py +26 -126
  6. transformers/cli/chat.py +3 -3
  7. transformers/cli/serve.py +13 -10
  8. transformers/cli/transformers.py +2 -1
  9. transformers/configuration_utils.py +22 -92
  10. transformers/conversion_mapping.py +150 -26
  11. transformers/convert_slow_tokenizer.py +9 -12
  12. transformers/core_model_loading.py +217 -129
  13. transformers/data/processors/glue.py +0 -1
  14. transformers/data/processors/utils.py +0 -1
  15. transformers/data/processors/xnli.py +0 -1
  16. transformers/dependency_versions_check.py +0 -1
  17. transformers/dependency_versions_table.py +10 -11
  18. transformers/distributed/configuration_utils.py +1 -2
  19. transformers/dynamic_module_utils.py +23 -23
  20. transformers/feature_extraction_sequence_utils.py +19 -23
  21. transformers/feature_extraction_utils.py +14 -14
  22. transformers/file_utils.py +0 -2
  23. transformers/generation/candidate_generator.py +2 -4
  24. transformers/generation/configuration_utils.py +54 -39
  25. transformers/generation/continuous_batching/__init__.py +0 -1
  26. transformers/generation/continuous_batching/cache.py +74 -44
  27. transformers/generation/continuous_batching/cache_manager.py +28 -28
  28. transformers/generation/continuous_batching/continuous_api.py +133 -414
  29. transformers/generation/continuous_batching/input_ouputs.py +464 -0
  30. transformers/generation/continuous_batching/requests.py +77 -19
  31. transformers/generation/continuous_batching/scheduler.py +154 -104
  32. transformers/generation/logits_process.py +10 -133
  33. transformers/generation/stopping_criteria.py +1 -2
  34. transformers/generation/streamers.py +0 -1
  35. transformers/generation/utils.py +91 -121
  36. transformers/generation/watermarking.py +2 -3
  37. transformers/hf_argparser.py +9 -13
  38. transformers/hyperparameter_search.py +1 -2
  39. transformers/image_processing_base.py +9 -9
  40. transformers/image_processing_utils.py +11 -15
  41. transformers/image_processing_utils_fast.py +70 -71
  42. transformers/image_transforms.py +73 -42
  43. transformers/image_utils.py +30 -37
  44. transformers/initialization.py +57 -0
  45. transformers/integrations/__init__.py +10 -24
  46. transformers/integrations/accelerate.py +47 -11
  47. transformers/integrations/awq.py +1 -3
  48. transformers/integrations/deepspeed.py +146 -4
  49. transformers/integrations/eetq.py +0 -1
  50. transformers/integrations/executorch.py +2 -6
  51. transformers/integrations/fbgemm_fp8.py +1 -2
  52. transformers/integrations/finegrained_fp8.py +149 -13
  53. transformers/integrations/flash_attention.py +3 -8
  54. transformers/integrations/flex_attention.py +1 -1
  55. transformers/integrations/fp_quant.py +4 -6
  56. transformers/integrations/ggml.py +0 -1
  57. transformers/integrations/hub_kernels.py +18 -7
  58. transformers/integrations/integration_utils.py +2 -3
  59. transformers/integrations/moe.py +226 -106
  60. transformers/integrations/mxfp4.py +52 -40
  61. transformers/integrations/peft.py +488 -176
  62. transformers/integrations/quark.py +2 -4
  63. transformers/integrations/tensor_parallel.py +641 -581
  64. transformers/integrations/torchao.py +4 -6
  65. transformers/loss/loss_lw_detr.py +356 -0
  66. transformers/loss/loss_utils.py +2 -0
  67. transformers/masking_utils.py +199 -59
  68. transformers/model_debugging_utils.py +4 -5
  69. transformers/modelcard.py +14 -192
  70. transformers/modeling_attn_mask_utils.py +19 -19
  71. transformers/modeling_flash_attention_utils.py +28 -29
  72. transformers/modeling_gguf_pytorch_utils.py +5 -5
  73. transformers/modeling_layers.py +21 -22
  74. transformers/modeling_outputs.py +242 -253
  75. transformers/modeling_rope_utils.py +32 -32
  76. transformers/modeling_utils.py +416 -438
  77. transformers/models/__init__.py +10 -0
  78. transformers/models/afmoe/configuration_afmoe.py +40 -33
  79. transformers/models/afmoe/modeling_afmoe.py +38 -41
  80. transformers/models/afmoe/modular_afmoe.py +23 -25
  81. transformers/models/aimv2/configuration_aimv2.py +2 -10
  82. transformers/models/aimv2/modeling_aimv2.py +46 -45
  83. transformers/models/aimv2/modular_aimv2.py +13 -19
  84. transformers/models/albert/configuration_albert.py +8 -2
  85. transformers/models/albert/modeling_albert.py +70 -72
  86. transformers/models/albert/tokenization_albert.py +1 -4
  87. transformers/models/align/configuration_align.py +8 -6
  88. transformers/models/align/modeling_align.py +83 -86
  89. transformers/models/align/processing_align.py +2 -30
  90. transformers/models/altclip/configuration_altclip.py +4 -7
  91. transformers/models/altclip/modeling_altclip.py +106 -103
  92. transformers/models/altclip/processing_altclip.py +2 -15
  93. transformers/models/apertus/__init__.py +0 -1
  94. transformers/models/apertus/configuration_apertus.py +23 -28
  95. transformers/models/apertus/modeling_apertus.py +35 -38
  96. transformers/models/apertus/modular_apertus.py +36 -40
  97. transformers/models/arcee/configuration_arcee.py +25 -30
  98. transformers/models/arcee/modeling_arcee.py +35 -38
  99. transformers/models/arcee/modular_arcee.py +20 -23
  100. transformers/models/aria/configuration_aria.py +31 -44
  101. transformers/models/aria/image_processing_aria.py +25 -27
  102. transformers/models/aria/modeling_aria.py +102 -102
  103. transformers/models/aria/modular_aria.py +111 -124
  104. transformers/models/aria/processing_aria.py +28 -35
  105. transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +0 -1
  106. transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py +3 -6
  107. transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +9 -11
  108. transformers/models/audioflamingo3/__init__.py +0 -1
  109. transformers/models/audioflamingo3/configuration_audioflamingo3.py +0 -1
  110. transformers/models/audioflamingo3/modeling_audioflamingo3.py +60 -52
  111. transformers/models/audioflamingo3/modular_audioflamingo3.py +52 -43
  112. transformers/models/audioflamingo3/processing_audioflamingo3.py +6 -8
  113. transformers/models/auto/auto_factory.py +12 -11
  114. transformers/models/auto/configuration_auto.py +48 -5
  115. transformers/models/auto/feature_extraction_auto.py +5 -7
  116. transformers/models/auto/image_processing_auto.py +30 -39
  117. transformers/models/auto/modeling_auto.py +33 -199
  118. transformers/models/auto/processing_auto.py +11 -19
  119. transformers/models/auto/tokenization_auto.py +38 -37
  120. transformers/models/auto/video_processing_auto.py +7 -8
  121. transformers/models/autoformer/configuration_autoformer.py +4 -7
  122. transformers/models/autoformer/modeling_autoformer.py +100 -101
  123. transformers/models/aya_vision/configuration_aya_vision.py +4 -1
  124. transformers/models/aya_vision/modeling_aya_vision.py +64 -99
  125. transformers/models/aya_vision/modular_aya_vision.py +46 -74
  126. transformers/models/aya_vision/processing_aya_vision.py +25 -53
  127. transformers/models/bamba/configuration_bamba.py +46 -39
  128. transformers/models/bamba/modeling_bamba.py +83 -119
  129. transformers/models/bamba/modular_bamba.py +70 -109
  130. transformers/models/bark/configuration_bark.py +6 -8
  131. transformers/models/bark/generation_configuration_bark.py +3 -5
  132. transformers/models/bark/modeling_bark.py +64 -65
  133. transformers/models/bark/processing_bark.py +19 -41
  134. transformers/models/bart/configuration_bart.py +9 -5
  135. transformers/models/bart/modeling_bart.py +124 -129
  136. transformers/models/barthez/tokenization_barthez.py +1 -4
  137. transformers/models/bartpho/tokenization_bartpho.py +6 -7
  138. transformers/models/beit/configuration_beit.py +2 -15
  139. transformers/models/beit/image_processing_beit.py +53 -56
  140. transformers/models/beit/image_processing_beit_fast.py +11 -12
  141. transformers/models/beit/modeling_beit.py +65 -62
  142. transformers/models/bert/configuration_bert.py +12 -2
  143. transformers/models/bert/modeling_bert.py +117 -152
  144. transformers/models/bert/tokenization_bert.py +2 -4
  145. transformers/models/bert/tokenization_bert_legacy.py +3 -5
  146. transformers/models/bert_generation/configuration_bert_generation.py +17 -2
  147. transformers/models/bert_generation/modeling_bert_generation.py +53 -55
  148. transformers/models/bert_generation/tokenization_bert_generation.py +2 -3
  149. transformers/models/bert_japanese/tokenization_bert_japanese.py +5 -6
  150. transformers/models/bertweet/tokenization_bertweet.py +1 -3
  151. transformers/models/big_bird/configuration_big_bird.py +12 -9
  152. transformers/models/big_bird/modeling_big_bird.py +107 -124
  153. transformers/models/big_bird/tokenization_big_bird.py +1 -4
  154. transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +9 -9
  155. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +118 -118
  156. transformers/models/biogpt/configuration_biogpt.py +8 -2
  157. transformers/models/biogpt/modeling_biogpt.py +73 -79
  158. transformers/models/biogpt/modular_biogpt.py +60 -66
  159. transformers/models/biogpt/tokenization_biogpt.py +3 -5
  160. transformers/models/bit/configuration_bit.py +2 -5
  161. transformers/models/bit/image_processing_bit.py +21 -24
  162. transformers/models/bit/image_processing_bit_fast.py +0 -1
  163. transformers/models/bit/modeling_bit.py +15 -16
  164. transformers/models/bitnet/configuration_bitnet.py +23 -28
  165. transformers/models/bitnet/modeling_bitnet.py +34 -38
  166. transformers/models/bitnet/modular_bitnet.py +7 -10
  167. transformers/models/blenderbot/configuration_blenderbot.py +8 -5
  168. transformers/models/blenderbot/modeling_blenderbot.py +68 -99
  169. transformers/models/blenderbot/tokenization_blenderbot.py +0 -1
  170. transformers/models/blenderbot_small/configuration_blenderbot_small.py +8 -5
  171. transformers/models/blenderbot_small/modeling_blenderbot_small.py +70 -72
  172. transformers/models/blenderbot_small/tokenization_blenderbot_small.py +1 -3
  173. transformers/models/blip/configuration_blip.py +9 -10
  174. transformers/models/blip/image_processing_blip.py +17 -20
  175. transformers/models/blip/image_processing_blip_fast.py +0 -1
  176. transformers/models/blip/modeling_blip.py +115 -108
  177. transformers/models/blip/modeling_blip_text.py +63 -65
  178. transformers/models/blip/processing_blip.py +5 -36
  179. transformers/models/blip_2/configuration_blip_2.py +2 -2
  180. transformers/models/blip_2/modeling_blip_2.py +145 -121
  181. transformers/models/blip_2/processing_blip_2.py +8 -38
  182. transformers/models/bloom/configuration_bloom.py +5 -2
  183. transformers/models/bloom/modeling_bloom.py +60 -60
  184. transformers/models/blt/configuration_blt.py +94 -86
  185. transformers/models/blt/modeling_blt.py +93 -90
  186. transformers/models/blt/modular_blt.py +127 -69
  187. transformers/models/bridgetower/configuration_bridgetower.py +7 -2
  188. transformers/models/bridgetower/image_processing_bridgetower.py +34 -35
  189. transformers/models/bridgetower/image_processing_bridgetower_fast.py +13 -14
  190. transformers/models/bridgetower/modeling_bridgetower.py +136 -124
  191. transformers/models/bridgetower/processing_bridgetower.py +2 -16
  192. transformers/models/bros/configuration_bros.py +24 -18
  193. transformers/models/bros/modeling_bros.py +78 -80
  194. transformers/models/bros/processing_bros.py +2 -12
  195. transformers/models/byt5/tokenization_byt5.py +4 -6
  196. transformers/models/camembert/configuration_camembert.py +8 -2
  197. transformers/models/camembert/modeling_camembert.py +97 -99
  198. transformers/models/camembert/modular_camembert.py +51 -54
  199. transformers/models/camembert/tokenization_camembert.py +1 -4
  200. transformers/models/canine/configuration_canine.py +4 -2
  201. transformers/models/canine/modeling_canine.py +73 -75
  202. transformers/models/canine/tokenization_canine.py +0 -1
  203. transformers/models/chameleon/configuration_chameleon.py +29 -34
  204. transformers/models/chameleon/image_processing_chameleon.py +21 -24
  205. transformers/models/chameleon/image_processing_chameleon_fast.py +5 -6
  206. transformers/models/chameleon/modeling_chameleon.py +135 -92
  207. transformers/models/chameleon/processing_chameleon.py +16 -41
  208. transformers/models/chinese_clip/configuration_chinese_clip.py +10 -8
  209. transformers/models/chinese_clip/image_processing_chinese_clip.py +21 -24
  210. transformers/models/chinese_clip/image_processing_chinese_clip_fast.py +0 -1
  211. transformers/models/chinese_clip/modeling_chinese_clip.py +93 -95
  212. transformers/models/chinese_clip/processing_chinese_clip.py +2 -15
  213. transformers/models/clap/configuration_clap.py +4 -9
  214. transformers/models/clap/feature_extraction_clap.py +9 -10
  215. transformers/models/clap/modeling_clap.py +109 -111
  216. transformers/models/clap/processing_clap.py +2 -15
  217. transformers/models/clip/configuration_clip.py +4 -2
  218. transformers/models/clip/image_processing_clip.py +21 -24
  219. transformers/models/clip/image_processing_clip_fast.py +9 -1
  220. transformers/models/clip/modeling_clip.py +70 -68
  221. transformers/models/clip/processing_clip.py +2 -14
  222. transformers/models/clip/tokenization_clip.py +2 -5
  223. transformers/models/clipseg/configuration_clipseg.py +4 -2
  224. transformers/models/clipseg/modeling_clipseg.py +113 -112
  225. transformers/models/clipseg/processing_clipseg.py +19 -42
  226. transformers/models/clvp/configuration_clvp.py +15 -5
  227. transformers/models/clvp/feature_extraction_clvp.py +7 -10
  228. transformers/models/clvp/modeling_clvp.py +138 -145
  229. transformers/models/clvp/number_normalizer.py +1 -2
  230. transformers/models/clvp/processing_clvp.py +3 -20
  231. transformers/models/clvp/tokenization_clvp.py +0 -1
  232. transformers/models/code_llama/tokenization_code_llama.py +3 -6
  233. transformers/models/codegen/configuration_codegen.py +4 -4
  234. transformers/models/codegen/modeling_codegen.py +50 -49
  235. transformers/models/codegen/tokenization_codegen.py +5 -6
  236. transformers/models/cohere/configuration_cohere.py +25 -30
  237. transformers/models/cohere/modeling_cohere.py +39 -42
  238. transformers/models/cohere/modular_cohere.py +27 -31
  239. transformers/models/cohere/tokenization_cohere.py +5 -6
  240. transformers/models/cohere2/configuration_cohere2.py +27 -32
  241. transformers/models/cohere2/modeling_cohere2.py +38 -41
  242. transformers/models/cohere2/modular_cohere2.py +48 -52
  243. transformers/models/cohere2_vision/configuration_cohere2_vision.py +5 -1
  244. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +9 -10
  245. transformers/models/cohere2_vision/modeling_cohere2_vision.py +52 -55
  246. transformers/models/cohere2_vision/modular_cohere2_vision.py +41 -43
  247. transformers/models/cohere2_vision/processing_cohere2_vision.py +6 -36
  248. transformers/models/colpali/configuration_colpali.py +0 -1
  249. transformers/models/colpali/modeling_colpali.py +14 -16
  250. transformers/models/colpali/modular_colpali.py +11 -51
  251. transformers/models/colpali/processing_colpali.py +14 -52
  252. transformers/models/colqwen2/modeling_colqwen2.py +27 -28
  253. transformers/models/colqwen2/modular_colqwen2.py +36 -74
  254. transformers/models/colqwen2/processing_colqwen2.py +16 -52
  255. transformers/models/conditional_detr/configuration_conditional_detr.py +19 -47
  256. transformers/models/conditional_detr/image_processing_conditional_detr.py +67 -70
  257. transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +50 -36
  258. transformers/models/conditional_detr/modeling_conditional_detr.py +851 -1001
  259. transformers/models/conditional_detr/modular_conditional_detr.py +901 -5
  260. transformers/models/convbert/configuration_convbert.py +11 -8
  261. transformers/models/convbert/modeling_convbert.py +85 -87
  262. transformers/models/convbert/tokenization_convbert.py +0 -1
  263. transformers/models/convnext/configuration_convnext.py +2 -5
  264. transformers/models/convnext/image_processing_convnext.py +18 -21
  265. transformers/models/convnext/image_processing_convnext_fast.py +7 -8
  266. transformers/models/convnext/modeling_convnext.py +12 -14
  267. transformers/models/convnextv2/configuration_convnextv2.py +2 -5
  268. transformers/models/convnextv2/modeling_convnextv2.py +12 -14
  269. transformers/models/cpm/tokenization_cpm.py +6 -7
  270. transformers/models/cpm/tokenization_cpm_fast.py +3 -5
  271. transformers/models/cpmant/configuration_cpmant.py +4 -1
  272. transformers/models/cpmant/modeling_cpmant.py +38 -40
  273. transformers/models/cpmant/tokenization_cpmant.py +1 -3
  274. transformers/models/csm/configuration_csm.py +58 -66
  275. transformers/models/csm/generation_csm.py +13 -14
  276. transformers/models/csm/modeling_csm.py +81 -84
  277. transformers/models/csm/modular_csm.py +56 -58
  278. transformers/models/csm/processing_csm.py +25 -68
  279. transformers/models/ctrl/configuration_ctrl.py +16 -1
  280. transformers/models/ctrl/modeling_ctrl.py +51 -66
  281. transformers/models/ctrl/tokenization_ctrl.py +0 -1
  282. transformers/models/cvt/configuration_cvt.py +0 -1
  283. transformers/models/cvt/modeling_cvt.py +13 -15
  284. transformers/models/cwm/__init__.py +0 -1
  285. transformers/models/cwm/configuration_cwm.py +8 -12
  286. transformers/models/cwm/modeling_cwm.py +36 -38
  287. transformers/models/cwm/modular_cwm.py +10 -12
  288. transformers/models/d_fine/configuration_d_fine.py +10 -57
  289. transformers/models/d_fine/modeling_d_fine.py +786 -927
  290. transformers/models/d_fine/modular_d_fine.py +339 -417
  291. transformers/models/dab_detr/configuration_dab_detr.py +22 -49
  292. transformers/models/dab_detr/modeling_dab_detr.py +79 -77
  293. transformers/models/dac/configuration_dac.py +0 -1
  294. transformers/models/dac/feature_extraction_dac.py +6 -9
  295. transformers/models/dac/modeling_dac.py +22 -24
  296. transformers/models/data2vec/configuration_data2vec_audio.py +4 -2
  297. transformers/models/data2vec/configuration_data2vec_text.py +11 -3
  298. transformers/models/data2vec/configuration_data2vec_vision.py +0 -1
  299. transformers/models/data2vec/modeling_data2vec_audio.py +55 -59
  300. transformers/models/data2vec/modeling_data2vec_text.py +97 -99
  301. transformers/models/data2vec/modeling_data2vec_vision.py +45 -44
  302. transformers/models/data2vec/modular_data2vec_audio.py +6 -1
  303. transformers/models/data2vec/modular_data2vec_text.py +51 -54
  304. transformers/models/dbrx/configuration_dbrx.py +29 -22
  305. transformers/models/dbrx/modeling_dbrx.py +45 -48
  306. transformers/models/dbrx/modular_dbrx.py +37 -39
  307. transformers/models/deberta/configuration_deberta.py +6 -1
  308. transformers/models/deberta/modeling_deberta.py +57 -60
  309. transformers/models/deberta/tokenization_deberta.py +2 -5
  310. transformers/models/deberta_v2/configuration_deberta_v2.py +6 -1
  311. transformers/models/deberta_v2/modeling_deberta_v2.py +63 -65
  312. transformers/models/deberta_v2/tokenization_deberta_v2.py +1 -4
  313. transformers/models/decision_transformer/configuration_decision_transformer.py +3 -2
  314. transformers/models/decision_transformer/modeling_decision_transformer.py +51 -53
  315. transformers/models/deepseek_v2/configuration_deepseek_v2.py +41 -47
  316. transformers/models/deepseek_v2/modeling_deepseek_v2.py +39 -41
  317. transformers/models/deepseek_v2/modular_deepseek_v2.py +48 -52
  318. transformers/models/deepseek_v3/configuration_deepseek_v3.py +42 -48
  319. transformers/models/deepseek_v3/modeling_deepseek_v3.py +38 -40
  320. transformers/models/deepseek_v3/modular_deepseek_v3.py +10 -10
  321. transformers/models/deepseek_vl/configuration_deepseek_vl.py +6 -3
  322. transformers/models/deepseek_vl/image_processing_deepseek_vl.py +27 -28
  323. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +12 -11
  324. transformers/models/deepseek_vl/modeling_deepseek_vl.py +48 -43
  325. transformers/models/deepseek_vl/modular_deepseek_vl.py +15 -43
  326. transformers/models/deepseek_vl/processing_deepseek_vl.py +10 -41
  327. transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +7 -5
  328. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +37 -37
  329. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +22 -22
  330. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +100 -56
  331. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +141 -109
  332. transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py +12 -44
  333. transformers/models/deformable_detr/configuration_deformable_detr.py +22 -46
  334. transformers/models/deformable_detr/image_processing_deformable_detr.py +59 -61
  335. transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +42 -28
  336. transformers/models/deformable_detr/modeling_deformable_detr.py +454 -652
  337. transformers/models/deformable_detr/modular_deformable_detr.py +1385 -5
  338. transformers/models/deit/configuration_deit.py +0 -1
  339. transformers/models/deit/image_processing_deit.py +18 -21
  340. transformers/models/deit/image_processing_deit_fast.py +0 -1
  341. transformers/models/deit/modeling_deit.py +27 -25
  342. transformers/models/depth_anything/configuration_depth_anything.py +12 -43
  343. transformers/models/depth_anything/modeling_depth_anything.py +10 -11
  344. transformers/models/depth_pro/configuration_depth_pro.py +0 -1
  345. transformers/models/depth_pro/image_processing_depth_pro.py +22 -23
  346. transformers/models/depth_pro/image_processing_depth_pro_fast.py +8 -9
  347. transformers/models/depth_pro/modeling_depth_pro.py +29 -27
  348. transformers/models/detr/configuration_detr.py +18 -50
  349. transformers/models/detr/image_processing_detr.py +64 -66
  350. transformers/models/detr/image_processing_detr_fast.py +33 -34
  351. transformers/models/detr/modeling_detr.py +748 -789
  352. transformers/models/dia/configuration_dia.py +9 -15
  353. transformers/models/dia/feature_extraction_dia.py +6 -9
  354. transformers/models/dia/generation_dia.py +48 -53
  355. transformers/models/dia/modeling_dia.py +68 -71
  356. transformers/models/dia/modular_dia.py +56 -58
  357. transformers/models/dia/processing_dia.py +39 -29
  358. transformers/models/dia/tokenization_dia.py +3 -6
  359. transformers/models/diffllama/configuration_diffllama.py +25 -30
  360. transformers/models/diffllama/modeling_diffllama.py +45 -53
  361. transformers/models/diffllama/modular_diffllama.py +18 -25
  362. transformers/models/dinat/configuration_dinat.py +2 -5
  363. transformers/models/dinat/modeling_dinat.py +47 -48
  364. transformers/models/dinov2/configuration_dinov2.py +2 -5
  365. transformers/models/dinov2/modeling_dinov2.py +20 -21
  366. transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +3 -5
  367. transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +21 -21
  368. transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +11 -14
  369. transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +6 -11
  370. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +5 -9
  371. transformers/models/dinov3_vit/configuration_dinov3_vit.py +7 -12
  372. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +7 -8
  373. transformers/models/dinov3_vit/modeling_dinov3_vit.py +19 -22
  374. transformers/models/dinov3_vit/modular_dinov3_vit.py +16 -19
  375. transformers/models/distilbert/configuration_distilbert.py +8 -2
  376. transformers/models/distilbert/modeling_distilbert.py +47 -49
  377. transformers/models/distilbert/tokenization_distilbert.py +0 -1
  378. transformers/models/doge/__init__.py +0 -1
  379. transformers/models/doge/configuration_doge.py +42 -35
  380. transformers/models/doge/modeling_doge.py +46 -49
  381. transformers/models/doge/modular_doge.py +77 -68
  382. transformers/models/donut/configuration_donut_swin.py +0 -1
  383. transformers/models/donut/image_processing_donut.py +26 -29
  384. transformers/models/donut/image_processing_donut_fast.py +9 -14
  385. transformers/models/donut/modeling_donut_swin.py +44 -46
  386. transformers/models/donut/processing_donut.py +5 -26
  387. transformers/models/dots1/configuration_dots1.py +43 -36
  388. transformers/models/dots1/modeling_dots1.py +35 -38
  389. transformers/models/dots1/modular_dots1.py +0 -1
  390. transformers/models/dpr/configuration_dpr.py +19 -2
  391. transformers/models/dpr/modeling_dpr.py +37 -39
  392. transformers/models/dpr/tokenization_dpr.py +7 -9
  393. transformers/models/dpr/tokenization_dpr_fast.py +7 -9
  394. transformers/models/dpt/configuration_dpt.py +23 -66
  395. transformers/models/dpt/image_processing_dpt.py +65 -66
  396. transformers/models/dpt/image_processing_dpt_fast.py +18 -19
  397. transformers/models/dpt/modeling_dpt.py +38 -36
  398. transformers/models/dpt/modular_dpt.py +14 -15
  399. transformers/models/edgetam/configuration_edgetam.py +1 -2
  400. transformers/models/edgetam/modeling_edgetam.py +87 -89
  401. transformers/models/edgetam/modular_edgetam.py +7 -13
  402. transformers/models/edgetam_video/__init__.py +0 -1
  403. transformers/models/edgetam_video/configuration_edgetam_video.py +0 -1
  404. transformers/models/edgetam_video/modeling_edgetam_video.py +126 -128
  405. transformers/models/edgetam_video/modular_edgetam_video.py +25 -27
  406. transformers/models/efficientloftr/configuration_efficientloftr.py +4 -5
  407. transformers/models/efficientloftr/image_processing_efficientloftr.py +14 -16
  408. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +8 -7
  409. transformers/models/efficientloftr/modeling_efficientloftr.py +46 -38
  410. transformers/models/efficientloftr/modular_efficientloftr.py +1 -3
  411. transformers/models/efficientnet/configuration_efficientnet.py +0 -1
  412. transformers/models/efficientnet/image_processing_efficientnet.py +23 -26
  413. transformers/models/efficientnet/image_processing_efficientnet_fast.py +16 -17
  414. transformers/models/efficientnet/modeling_efficientnet.py +12 -14
  415. transformers/models/electra/configuration_electra.py +13 -3
  416. transformers/models/electra/modeling_electra.py +107 -109
  417. transformers/models/emu3/configuration_emu3.py +17 -17
  418. transformers/models/emu3/image_processing_emu3.py +44 -39
  419. transformers/models/emu3/modeling_emu3.py +143 -109
  420. transformers/models/emu3/modular_emu3.py +109 -73
  421. transformers/models/emu3/processing_emu3.py +18 -43
  422. transformers/models/encodec/configuration_encodec.py +2 -4
  423. transformers/models/encodec/feature_extraction_encodec.py +10 -13
  424. transformers/models/encodec/modeling_encodec.py +25 -29
  425. transformers/models/encoder_decoder/configuration_encoder_decoder.py +12 -2
  426. transformers/models/encoder_decoder/modeling_encoder_decoder.py +37 -43
  427. transformers/models/eomt/configuration_eomt.py +12 -14
  428. transformers/models/eomt/image_processing_eomt.py +53 -55
  429. transformers/models/eomt/image_processing_eomt_fast.py +18 -19
  430. transformers/models/eomt/modeling_eomt.py +19 -21
  431. transformers/models/eomt/modular_eomt.py +28 -30
  432. transformers/models/eomt_dinov3/__init__.py +28 -0
  433. transformers/models/eomt_dinov3/configuration_eomt_dinov3.py +204 -0
  434. transformers/models/eomt_dinov3/modeling_eomt_dinov3.py +1376 -0
  435. transformers/models/eomt_dinov3/modular_eomt_dinov3.py +454 -0
  436. transformers/models/ernie/configuration_ernie.py +24 -3
  437. transformers/models/ernie/modeling_ernie.py +127 -162
  438. transformers/models/ernie/modular_ernie.py +91 -103
  439. transformers/models/ernie4_5/configuration_ernie4_5.py +23 -27
  440. transformers/models/ernie4_5/modeling_ernie4_5.py +35 -37
  441. transformers/models/ernie4_5/modular_ernie4_5.py +1 -3
  442. transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +34 -39
  443. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +40 -42
  444. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +7 -9
  445. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +17 -7
  446. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +34 -35
  447. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +6 -7
  448. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +305 -267
  449. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +163 -142
  450. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +3 -5
  451. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +17 -18
  452. transformers/models/esm/configuration_esm.py +11 -15
  453. transformers/models/esm/modeling_esm.py +35 -37
  454. transformers/models/esm/modeling_esmfold.py +43 -50
  455. transformers/models/esm/openfold_utils/chunk_utils.py +6 -6
  456. transformers/models/esm/openfold_utils/loss.py +1 -2
  457. transformers/models/esm/openfold_utils/protein.py +15 -16
  458. transformers/models/esm/openfold_utils/tensor_utils.py +6 -6
  459. transformers/models/esm/tokenization_esm.py +2 -4
  460. transformers/models/evolla/configuration_evolla.py +50 -40
  461. transformers/models/evolla/modeling_evolla.py +69 -68
  462. transformers/models/evolla/modular_evolla.py +50 -48
  463. transformers/models/evolla/processing_evolla.py +23 -35
  464. transformers/models/exaone4/configuration_exaone4.py +27 -27
  465. transformers/models/exaone4/modeling_exaone4.py +36 -39
  466. transformers/models/exaone4/modular_exaone4.py +51 -50
  467. transformers/models/exaone_moe/__init__.py +27 -0
  468. transformers/models/exaone_moe/configuration_exaone_moe.py +235 -0
  469. transformers/models/exaone_moe/modeling_exaone_moe.py +665 -0
  470. transformers/models/exaone_moe/modular_exaone_moe.py +373 -0
  471. transformers/models/falcon/configuration_falcon.py +31 -26
  472. transformers/models/falcon/modeling_falcon.py +76 -84
  473. transformers/models/falcon_h1/configuration_falcon_h1.py +57 -51
  474. transformers/models/falcon_h1/modeling_falcon_h1.py +74 -109
  475. transformers/models/falcon_h1/modular_falcon_h1.py +68 -100
  476. transformers/models/falcon_mamba/configuration_falcon_mamba.py +5 -2
  477. transformers/models/falcon_mamba/modeling_falcon_mamba.py +64 -73
  478. transformers/models/falcon_mamba/modular_falcon_mamba.py +14 -13
  479. transformers/models/fast_vlm/configuration_fast_vlm.py +10 -0
  480. transformers/models/fast_vlm/modeling_fast_vlm.py +70 -97
  481. transformers/models/fast_vlm/modular_fast_vlm.py +148 -38
  482. transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +2 -6
  483. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +45 -47
  484. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -3
  485. transformers/models/flaubert/configuration_flaubert.py +10 -5
  486. transformers/models/flaubert/modeling_flaubert.py +125 -129
  487. transformers/models/flaubert/tokenization_flaubert.py +3 -5
  488. transformers/models/flava/configuration_flava.py +9 -9
  489. transformers/models/flava/image_processing_flava.py +66 -67
  490. transformers/models/flava/image_processing_flava_fast.py +46 -47
  491. transformers/models/flava/modeling_flava.py +144 -135
  492. transformers/models/flava/processing_flava.py +2 -12
  493. transformers/models/flex_olmo/__init__.py +0 -1
  494. transformers/models/flex_olmo/configuration_flex_olmo.py +34 -39
  495. transformers/models/flex_olmo/modeling_flex_olmo.py +41 -43
  496. transformers/models/flex_olmo/modular_flex_olmo.py +46 -51
  497. transformers/models/florence2/configuration_florence2.py +4 -1
  498. transformers/models/florence2/modeling_florence2.py +96 -72
  499. transformers/models/florence2/modular_florence2.py +100 -107
  500. transformers/models/florence2/processing_florence2.py +18 -47
  501. transformers/models/fnet/configuration_fnet.py +6 -2
  502. transformers/models/fnet/modeling_fnet.py +69 -80
  503. transformers/models/fnet/tokenization_fnet.py +0 -1
  504. transformers/models/focalnet/configuration_focalnet.py +2 -5
  505. transformers/models/focalnet/modeling_focalnet.py +49 -48
  506. transformers/models/fsmt/configuration_fsmt.py +12 -17
  507. transformers/models/fsmt/modeling_fsmt.py +47 -48
  508. transformers/models/fsmt/tokenization_fsmt.py +3 -5
  509. transformers/models/funnel/configuration_funnel.py +8 -1
  510. transformers/models/funnel/modeling_funnel.py +91 -93
  511. transformers/models/funnel/tokenization_funnel.py +2 -5
  512. transformers/models/fuyu/configuration_fuyu.py +28 -34
  513. transformers/models/fuyu/image_processing_fuyu.py +29 -31
  514. transformers/models/fuyu/image_processing_fuyu_fast.py +17 -17
  515. transformers/models/fuyu/modeling_fuyu.py +50 -52
  516. transformers/models/fuyu/processing_fuyu.py +9 -36
  517. transformers/models/gemma/configuration_gemma.py +25 -30
  518. transformers/models/gemma/modeling_gemma.py +36 -38
  519. transformers/models/gemma/modular_gemma.py +33 -36
  520. transformers/models/gemma/tokenization_gemma.py +3 -6
  521. transformers/models/gemma2/configuration_gemma2.py +30 -35
  522. transformers/models/gemma2/modeling_gemma2.py +38 -41
  523. transformers/models/gemma2/modular_gemma2.py +63 -67
  524. transformers/models/gemma3/configuration_gemma3.py +53 -48
  525. transformers/models/gemma3/image_processing_gemma3.py +29 -31
  526. transformers/models/gemma3/image_processing_gemma3_fast.py +11 -12
  527. transformers/models/gemma3/modeling_gemma3.py +123 -122
  528. transformers/models/gemma3/modular_gemma3.py +128 -125
  529. transformers/models/gemma3/processing_gemma3.py +5 -5
  530. transformers/models/gemma3n/configuration_gemma3n.py +42 -30
  531. transformers/models/gemma3n/feature_extraction_gemma3n.py +9 -11
  532. transformers/models/gemma3n/modeling_gemma3n.py +166 -147
  533. transformers/models/gemma3n/modular_gemma3n.py +176 -148
  534. transformers/models/gemma3n/processing_gemma3n.py +12 -26
  535. transformers/models/git/configuration_git.py +5 -8
  536. transformers/models/git/modeling_git.py +115 -127
  537. transformers/models/git/processing_git.py +2 -14
  538. transformers/models/glm/configuration_glm.py +26 -30
  539. transformers/models/glm/modeling_glm.py +36 -39
  540. transformers/models/glm/modular_glm.py +4 -7
  541. transformers/models/glm4/configuration_glm4.py +26 -30
  542. transformers/models/glm4/modeling_glm4.py +39 -41
  543. transformers/models/glm4/modular_glm4.py +8 -10
  544. transformers/models/glm46v/configuration_glm46v.py +4 -1
  545. transformers/models/glm46v/image_processing_glm46v.py +40 -38
  546. transformers/models/glm46v/image_processing_glm46v_fast.py +9 -9
  547. transformers/models/glm46v/modeling_glm46v.py +138 -93
  548. transformers/models/glm46v/modular_glm46v.py +5 -3
  549. transformers/models/glm46v/processing_glm46v.py +7 -41
  550. transformers/models/glm46v/video_processing_glm46v.py +9 -11
  551. transformers/models/glm4_moe/configuration_glm4_moe.py +42 -35
  552. transformers/models/glm4_moe/modeling_glm4_moe.py +36 -39
  553. transformers/models/glm4_moe/modular_glm4_moe.py +43 -36
  554. transformers/models/glm4_moe_lite/__init__.py +28 -0
  555. transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +233 -0
  556. transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +740 -0
  557. transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +302 -0
  558. transformers/models/glm4v/configuration_glm4v.py +25 -24
  559. transformers/models/glm4v/image_processing_glm4v.py +39 -38
  560. transformers/models/glm4v/image_processing_glm4v_fast.py +8 -9
  561. transformers/models/glm4v/modeling_glm4v.py +249 -210
  562. transformers/models/glm4v/modular_glm4v.py +211 -230
  563. transformers/models/glm4v/processing_glm4v.py +7 -41
  564. transformers/models/glm4v/video_processing_glm4v.py +9 -11
  565. transformers/models/glm4v_moe/configuration_glm4v_moe.py +136 -127
  566. transformers/models/glm4v_moe/modeling_glm4v_moe.py +348 -356
  567. transformers/models/glm4v_moe/modular_glm4v_moe.py +76 -174
  568. transformers/models/glm_image/__init__.py +31 -0
  569. transformers/models/glm_image/configuration_glm_image.py +358 -0
  570. transformers/models/glm_image/image_processing_glm_image.py +503 -0
  571. transformers/models/glm_image/image_processing_glm_image_fast.py +294 -0
  572. transformers/models/glm_image/modeling_glm_image.py +1691 -0
  573. transformers/models/glm_image/modular_glm_image.py +1640 -0
  574. transformers/models/glm_image/processing_glm_image.py +265 -0
  575. transformers/models/glm_ocr/__init__.py +28 -0
  576. transformers/models/glm_ocr/configuration_glm_ocr.py +312 -0
  577. transformers/models/glm_ocr/modeling_glm_ocr.py +1633 -0
  578. transformers/models/glm_ocr/modular_glm_ocr.py +428 -0
  579. transformers/models/glmasr/__init__.py +0 -1
  580. transformers/models/glmasr/configuration_glmasr.py +0 -1
  581. transformers/models/glmasr/modeling_glmasr.py +51 -46
  582. transformers/models/glmasr/modular_glmasr.py +39 -29
  583. transformers/models/glmasr/processing_glmasr.py +7 -8
  584. transformers/models/glpn/configuration_glpn.py +0 -1
  585. transformers/models/glpn/image_processing_glpn.py +11 -12
  586. transformers/models/glpn/image_processing_glpn_fast.py +11 -12
  587. transformers/models/glpn/modeling_glpn.py +14 -14
  588. transformers/models/got_ocr2/configuration_got_ocr2.py +10 -13
  589. transformers/models/got_ocr2/image_processing_got_ocr2.py +22 -24
  590. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +9 -10
  591. transformers/models/got_ocr2/modeling_got_ocr2.py +69 -77
  592. transformers/models/got_ocr2/modular_got_ocr2.py +60 -52
  593. transformers/models/got_ocr2/processing_got_ocr2.py +42 -63
  594. transformers/models/gpt2/configuration_gpt2.py +13 -2
  595. transformers/models/gpt2/modeling_gpt2.py +111 -113
  596. transformers/models/gpt2/tokenization_gpt2.py +6 -9
  597. transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +7 -2
  598. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +78 -84
  599. transformers/models/gpt_neo/configuration_gpt_neo.py +9 -2
  600. transformers/models/gpt_neo/modeling_gpt_neo.py +66 -71
  601. transformers/models/gpt_neox/configuration_gpt_neox.py +27 -25
  602. transformers/models/gpt_neox/modeling_gpt_neox.py +74 -76
  603. transformers/models/gpt_neox/modular_gpt_neox.py +68 -70
  604. transformers/models/gpt_neox/tokenization_gpt_neox.py +2 -5
  605. transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +24 -19
  606. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +43 -46
  607. transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +1 -3
  608. transformers/models/gpt_oss/configuration_gpt_oss.py +31 -30
  609. transformers/models/gpt_oss/modeling_gpt_oss.py +80 -114
  610. transformers/models/gpt_oss/modular_gpt_oss.py +62 -97
  611. transformers/models/gpt_sw3/tokenization_gpt_sw3.py +4 -4
  612. transformers/models/gptj/configuration_gptj.py +4 -5
  613. transformers/models/gptj/modeling_gptj.py +85 -88
  614. transformers/models/granite/configuration_granite.py +28 -33
  615. transformers/models/granite/modeling_granite.py +43 -45
  616. transformers/models/granite/modular_granite.py +29 -31
  617. transformers/models/granite_speech/configuration_granite_speech.py +0 -1
  618. transformers/models/granite_speech/feature_extraction_granite_speech.py +1 -3
  619. transformers/models/granite_speech/modeling_granite_speech.py +84 -60
  620. transformers/models/granite_speech/processing_granite_speech.py +11 -4
  621. transformers/models/granitemoe/configuration_granitemoe.py +31 -36
  622. transformers/models/granitemoe/modeling_granitemoe.py +39 -41
  623. transformers/models/granitemoe/modular_granitemoe.py +21 -23
  624. transformers/models/granitemoehybrid/__init__.py +0 -1
  625. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +55 -48
  626. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +82 -118
  627. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +57 -65
  628. transformers/models/granitemoeshared/configuration_granitemoeshared.py +33 -37
  629. transformers/models/granitemoeshared/modeling_granitemoeshared.py +52 -56
  630. transformers/models/granitemoeshared/modular_granitemoeshared.py +19 -21
  631. transformers/models/grounding_dino/configuration_grounding_dino.py +10 -46
  632. transformers/models/grounding_dino/image_processing_grounding_dino.py +60 -62
  633. transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +28 -29
  634. transformers/models/grounding_dino/modeling_grounding_dino.py +161 -181
  635. transformers/models/grounding_dino/modular_grounding_dino.py +2 -3
  636. transformers/models/grounding_dino/processing_grounding_dino.py +10 -38
  637. transformers/models/groupvit/configuration_groupvit.py +4 -2
  638. transformers/models/groupvit/modeling_groupvit.py +98 -92
  639. transformers/models/helium/configuration_helium.py +25 -29
  640. transformers/models/helium/modeling_helium.py +37 -40
  641. transformers/models/helium/modular_helium.py +3 -7
  642. transformers/models/herbert/tokenization_herbert.py +4 -6
  643. transformers/models/hgnet_v2/configuration_hgnet_v2.py +2 -5
  644. transformers/models/hgnet_v2/modeling_hgnet_v2.py +12 -14
  645. transformers/models/hgnet_v2/modular_hgnet_v2.py +13 -17
  646. transformers/models/hiera/configuration_hiera.py +2 -5
  647. transformers/models/hiera/modeling_hiera.py +71 -70
  648. transformers/models/hubert/configuration_hubert.py +4 -2
  649. transformers/models/hubert/modeling_hubert.py +42 -41
  650. transformers/models/hubert/modular_hubert.py +8 -11
  651. transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +26 -31
  652. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +58 -37
  653. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +31 -11
  654. transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +31 -36
  655. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +54 -44
  656. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +27 -15
  657. transformers/models/ibert/configuration_ibert.py +4 -2
  658. transformers/models/ibert/modeling_ibert.py +60 -62
  659. transformers/models/ibert/quant_modules.py +0 -1
  660. transformers/models/idefics/configuration_idefics.py +5 -8
  661. transformers/models/idefics/image_processing_idefics.py +13 -15
  662. transformers/models/idefics/modeling_idefics.py +63 -65
  663. transformers/models/idefics/perceiver.py +1 -3
  664. transformers/models/idefics/processing_idefics.py +32 -48
  665. transformers/models/idefics/vision.py +27 -28
  666. transformers/models/idefics2/configuration_idefics2.py +1 -3
  667. transformers/models/idefics2/image_processing_idefics2.py +31 -32
  668. transformers/models/idefics2/image_processing_idefics2_fast.py +8 -8
  669. transformers/models/idefics2/modeling_idefics2.py +126 -106
  670. transformers/models/idefics2/processing_idefics2.py +10 -68
  671. transformers/models/idefics3/configuration_idefics3.py +1 -4
  672. transformers/models/idefics3/image_processing_idefics3.py +42 -43
  673. transformers/models/idefics3/image_processing_idefics3_fast.py +40 -15
  674. transformers/models/idefics3/modeling_idefics3.py +113 -92
  675. transformers/models/idefics3/processing_idefics3.py +15 -69
  676. transformers/models/ijepa/configuration_ijepa.py +0 -1
  677. transformers/models/ijepa/modeling_ijepa.py +13 -14
  678. transformers/models/ijepa/modular_ijepa.py +5 -7
  679. transformers/models/imagegpt/configuration_imagegpt.py +9 -2
  680. transformers/models/imagegpt/image_processing_imagegpt.py +17 -18
  681. transformers/models/imagegpt/image_processing_imagegpt_fast.py +10 -11
  682. transformers/models/imagegpt/modeling_imagegpt.py +65 -62
  683. transformers/models/informer/configuration_informer.py +6 -9
  684. transformers/models/informer/modeling_informer.py +87 -89
  685. transformers/models/informer/modular_informer.py +13 -16
  686. transformers/models/instructblip/configuration_instructblip.py +2 -2
  687. transformers/models/instructblip/modeling_instructblip.py +104 -79
  688. transformers/models/instructblip/processing_instructblip.py +10 -36
  689. transformers/models/instructblipvideo/configuration_instructblipvideo.py +2 -2
  690. transformers/models/instructblipvideo/modeling_instructblipvideo.py +108 -105
  691. transformers/models/instructblipvideo/modular_instructblipvideo.py +73 -64
  692. transformers/models/instructblipvideo/processing_instructblipvideo.py +14 -33
  693. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +6 -7
  694. transformers/models/internvl/configuration_internvl.py +5 -1
  695. transformers/models/internvl/modeling_internvl.py +76 -98
  696. transformers/models/internvl/modular_internvl.py +45 -59
  697. transformers/models/internvl/processing_internvl.py +12 -45
  698. transformers/models/internvl/video_processing_internvl.py +10 -11
  699. transformers/models/jais2/configuration_jais2.py +25 -29
  700. transformers/models/jais2/modeling_jais2.py +36 -38
  701. transformers/models/jais2/modular_jais2.py +20 -22
  702. transformers/models/jamba/configuration_jamba.py +5 -8
  703. transformers/models/jamba/modeling_jamba.py +47 -50
  704. transformers/models/jamba/modular_jamba.py +40 -41
  705. transformers/models/janus/configuration_janus.py +0 -1
  706. transformers/models/janus/image_processing_janus.py +37 -39
  707. transformers/models/janus/image_processing_janus_fast.py +20 -21
  708. transformers/models/janus/modeling_janus.py +103 -188
  709. transformers/models/janus/modular_janus.py +122 -83
  710. transformers/models/janus/processing_janus.py +17 -43
  711. transformers/models/jetmoe/configuration_jetmoe.py +26 -27
  712. transformers/models/jetmoe/modeling_jetmoe.py +42 -45
  713. transformers/models/jetmoe/modular_jetmoe.py +33 -36
  714. transformers/models/kosmos2/configuration_kosmos2.py +10 -9
  715. transformers/models/kosmos2/modeling_kosmos2.py +199 -178
  716. transformers/models/kosmos2/processing_kosmos2.py +40 -55
  717. transformers/models/kosmos2_5/__init__.py +0 -1
  718. transformers/models/kosmos2_5/configuration_kosmos2_5.py +8 -9
  719. transformers/models/kosmos2_5/image_processing_kosmos2_5.py +10 -12
  720. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -11
  721. transformers/models/kosmos2_5/modeling_kosmos2_5.py +162 -172
  722. transformers/models/kosmos2_5/processing_kosmos2_5.py +8 -29
  723. transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +31 -28
  724. transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py +12 -14
  725. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +103 -106
  726. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +20 -22
  727. transformers/models/kyutai_speech_to_text/processing_kyutai_speech_to_text.py +2 -8
  728. transformers/models/lasr/configuration_lasr.py +3 -7
  729. transformers/models/lasr/feature_extraction_lasr.py +10 -12
  730. transformers/models/lasr/modeling_lasr.py +21 -24
  731. transformers/models/lasr/modular_lasr.py +11 -13
  732. transformers/models/lasr/processing_lasr.py +12 -6
  733. transformers/models/lasr/tokenization_lasr.py +2 -4
  734. transformers/models/layoutlm/configuration_layoutlm.py +14 -2
  735. transformers/models/layoutlm/modeling_layoutlm.py +70 -72
  736. transformers/models/layoutlmv2/configuration_layoutlmv2.py +14 -17
  737. transformers/models/layoutlmv2/image_processing_layoutlmv2.py +18 -21
  738. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +7 -8
  739. transformers/models/layoutlmv2/modeling_layoutlmv2.py +48 -50
  740. transformers/models/layoutlmv2/processing_layoutlmv2.py +14 -44
  741. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +63 -74
  742. transformers/models/layoutlmv3/configuration_layoutlmv3.py +16 -19
  743. transformers/models/layoutlmv3/image_processing_layoutlmv3.py +24 -26
  744. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +9 -10
  745. transformers/models/layoutlmv3/modeling_layoutlmv3.py +49 -51
  746. transformers/models/layoutlmv3/processing_layoutlmv3.py +14 -46
  747. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +64 -75
  748. transformers/models/layoutxlm/configuration_layoutxlm.py +14 -17
  749. transformers/models/layoutxlm/modular_layoutxlm.py +0 -1
  750. transformers/models/layoutxlm/processing_layoutxlm.py +14 -44
  751. transformers/models/layoutxlm/tokenization_layoutxlm.py +65 -76
  752. transformers/models/led/configuration_led.py +8 -12
  753. transformers/models/led/modeling_led.py +113 -267
  754. transformers/models/levit/configuration_levit.py +0 -1
  755. transformers/models/levit/image_processing_levit.py +19 -21
  756. transformers/models/levit/image_processing_levit_fast.py +4 -5
  757. transformers/models/levit/modeling_levit.py +17 -19
  758. transformers/models/lfm2/configuration_lfm2.py +27 -30
  759. transformers/models/lfm2/modeling_lfm2.py +46 -48
  760. transformers/models/lfm2/modular_lfm2.py +32 -32
  761. transformers/models/lfm2_moe/__init__.py +0 -1
  762. transformers/models/lfm2_moe/configuration_lfm2_moe.py +6 -9
  763. transformers/models/lfm2_moe/modeling_lfm2_moe.py +48 -49
  764. transformers/models/lfm2_moe/modular_lfm2_moe.py +8 -9
  765. transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -1
  766. transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +43 -20
  767. transformers/models/lfm2_vl/modeling_lfm2_vl.py +73 -61
  768. transformers/models/lfm2_vl/modular_lfm2_vl.py +66 -54
  769. transformers/models/lfm2_vl/processing_lfm2_vl.py +14 -34
  770. transformers/models/lightglue/image_processing_lightglue.py +16 -15
  771. transformers/models/lightglue/image_processing_lightglue_fast.py +8 -7
  772. transformers/models/lightglue/modeling_lightglue.py +31 -33
  773. transformers/models/lightglue/modular_lightglue.py +31 -31
  774. transformers/models/lighton_ocr/__init__.py +28 -0
  775. transformers/models/lighton_ocr/configuration_lighton_ocr.py +128 -0
  776. transformers/models/lighton_ocr/modeling_lighton_ocr.py +463 -0
  777. transformers/models/lighton_ocr/modular_lighton_ocr.py +404 -0
  778. transformers/models/lighton_ocr/processing_lighton_ocr.py +229 -0
  779. transformers/models/lilt/configuration_lilt.py +6 -2
  780. transformers/models/lilt/modeling_lilt.py +53 -55
  781. transformers/models/llama/configuration_llama.py +26 -31
  782. transformers/models/llama/modeling_llama.py +35 -38
  783. transformers/models/llama/tokenization_llama.py +2 -4
  784. transformers/models/llama4/configuration_llama4.py +87 -69
  785. transformers/models/llama4/image_processing_llama4_fast.py +11 -12
  786. transformers/models/llama4/modeling_llama4.py +116 -115
  787. transformers/models/llama4/processing_llama4.py +33 -57
  788. transformers/models/llava/configuration_llava.py +10 -1
  789. transformers/models/llava/image_processing_llava.py +25 -28
  790. transformers/models/llava/image_processing_llava_fast.py +9 -10
  791. transformers/models/llava/modeling_llava.py +73 -102
  792. transformers/models/llava/processing_llava.py +18 -51
  793. transformers/models/llava_next/configuration_llava_next.py +2 -2
  794. transformers/models/llava_next/image_processing_llava_next.py +43 -45
  795. transformers/models/llava_next/image_processing_llava_next_fast.py +11 -12
  796. transformers/models/llava_next/modeling_llava_next.py +103 -104
  797. transformers/models/llava_next/processing_llava_next.py +18 -47
  798. transformers/models/llava_next_video/configuration_llava_next_video.py +10 -7
  799. transformers/models/llava_next_video/modeling_llava_next_video.py +168 -155
  800. transformers/models/llava_next_video/modular_llava_next_video.py +154 -147
  801. transformers/models/llava_next_video/processing_llava_next_video.py +21 -63
  802. transformers/models/llava_next_video/video_processing_llava_next_video.py +0 -1
  803. transformers/models/llava_onevision/configuration_llava_onevision.py +10 -7
  804. transformers/models/llava_onevision/image_processing_llava_onevision.py +40 -42
  805. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +14 -14
  806. transformers/models/llava_onevision/modeling_llava_onevision.py +170 -166
  807. transformers/models/llava_onevision/modular_llava_onevision.py +156 -152
  808. transformers/models/llava_onevision/processing_llava_onevision.py +21 -53
  809. transformers/models/llava_onevision/video_processing_llava_onevision.py +0 -1
  810. transformers/models/longcat_flash/__init__.py +0 -1
  811. transformers/models/longcat_flash/configuration_longcat_flash.py +39 -45
  812. transformers/models/longcat_flash/modeling_longcat_flash.py +37 -38
  813. transformers/models/longcat_flash/modular_longcat_flash.py +23 -24
  814. transformers/models/longformer/configuration_longformer.py +5 -5
  815. transformers/models/longformer/modeling_longformer.py +99 -101
  816. transformers/models/longt5/configuration_longt5.py +9 -7
  817. transformers/models/longt5/modeling_longt5.py +45 -45
  818. transformers/models/luke/configuration_luke.py +8 -2
  819. transformers/models/luke/modeling_luke.py +179 -181
  820. transformers/models/luke/tokenization_luke.py +99 -105
  821. transformers/{pipelines/deprecated → models/lw_detr}/__init__.py +14 -3
  822. transformers/models/lw_detr/configuration_lw_detr.py +362 -0
  823. transformers/models/lw_detr/modeling_lw_detr.py +1697 -0
  824. transformers/models/lw_detr/modular_lw_detr.py +1609 -0
  825. transformers/models/lxmert/configuration_lxmert.py +16 -1
  826. transformers/models/lxmert/modeling_lxmert.py +63 -74
  827. transformers/models/m2m_100/configuration_m2m_100.py +7 -9
  828. transformers/models/m2m_100/modeling_m2m_100.py +72 -74
  829. transformers/models/m2m_100/tokenization_m2m_100.py +8 -8
  830. transformers/models/mamba/configuration_mamba.py +5 -3
  831. transformers/models/mamba/modeling_mamba.py +61 -70
  832. transformers/models/mamba2/configuration_mamba2.py +5 -8
  833. transformers/models/mamba2/modeling_mamba2.py +66 -79
  834. transformers/models/marian/configuration_marian.py +10 -5
  835. transformers/models/marian/modeling_marian.py +88 -90
  836. transformers/models/marian/tokenization_marian.py +6 -6
  837. transformers/models/markuplm/configuration_markuplm.py +4 -7
  838. transformers/models/markuplm/feature_extraction_markuplm.py +1 -2
  839. transformers/models/markuplm/modeling_markuplm.py +63 -65
  840. transformers/models/markuplm/processing_markuplm.py +31 -38
  841. transformers/models/markuplm/tokenization_markuplm.py +67 -77
  842. transformers/models/mask2former/configuration_mask2former.py +14 -52
  843. transformers/models/mask2former/image_processing_mask2former.py +84 -85
  844. transformers/models/mask2former/image_processing_mask2former_fast.py +36 -36
  845. transformers/models/mask2former/modeling_mask2former.py +108 -104
  846. transformers/models/mask2former/modular_mask2former.py +6 -8
  847. transformers/models/maskformer/configuration_maskformer.py +17 -51
  848. transformers/models/maskformer/configuration_maskformer_swin.py +2 -5
  849. transformers/models/maskformer/image_processing_maskformer.py +84 -85
  850. transformers/models/maskformer/image_processing_maskformer_fast.py +35 -36
  851. transformers/models/maskformer/modeling_maskformer.py +71 -67
  852. transformers/models/maskformer/modeling_maskformer_swin.py +20 -23
  853. transformers/models/mbart/configuration_mbart.py +9 -5
  854. transformers/models/mbart/modeling_mbart.py +120 -119
  855. transformers/models/mbart/tokenization_mbart.py +2 -4
  856. transformers/models/mbart50/tokenization_mbart50.py +3 -5
  857. transformers/models/megatron_bert/configuration_megatron_bert.py +13 -3
  858. transformers/models/megatron_bert/modeling_megatron_bert.py +139 -165
  859. transformers/models/metaclip_2/configuration_metaclip_2.py +4 -1
  860. transformers/models/metaclip_2/modeling_metaclip_2.py +94 -87
  861. transformers/models/metaclip_2/modular_metaclip_2.py +59 -45
  862. transformers/models/mgp_str/configuration_mgp_str.py +0 -1
  863. transformers/models/mgp_str/modeling_mgp_str.py +18 -18
  864. transformers/models/mgp_str/processing_mgp_str.py +3 -20
  865. transformers/models/mgp_str/tokenization_mgp_str.py +1 -3
  866. transformers/models/mimi/configuration_mimi.py +42 -40
  867. transformers/models/mimi/modeling_mimi.py +116 -115
  868. transformers/models/minimax/__init__.py +0 -1
  869. transformers/models/minimax/configuration_minimax.py +40 -47
  870. transformers/models/minimax/modeling_minimax.py +46 -49
  871. transformers/models/minimax/modular_minimax.py +59 -65
  872. transformers/models/minimax_m2/__init__.py +28 -0
  873. transformers/models/minimax_m2/configuration_minimax_m2.py +188 -0
  874. transformers/models/minimax_m2/modeling_minimax_m2.py +704 -0
  875. transformers/models/minimax_m2/modular_minimax_m2.py +346 -0
  876. transformers/models/ministral/configuration_ministral.py +25 -29
  877. transformers/models/ministral/modeling_ministral.py +35 -37
  878. transformers/models/ministral/modular_ministral.py +32 -37
  879. transformers/models/ministral3/configuration_ministral3.py +23 -26
  880. transformers/models/ministral3/modeling_ministral3.py +35 -37
  881. transformers/models/ministral3/modular_ministral3.py +7 -8
  882. transformers/models/mistral/configuration_mistral.py +24 -29
  883. transformers/models/mistral/modeling_mistral.py +35 -37
  884. transformers/models/mistral/modular_mistral.py +14 -15
  885. transformers/models/mistral3/configuration_mistral3.py +4 -1
  886. transformers/models/mistral3/modeling_mistral3.py +79 -82
  887. transformers/models/mistral3/modular_mistral3.py +66 -67
  888. transformers/models/mixtral/configuration_mixtral.py +32 -38
  889. transformers/models/mixtral/modeling_mixtral.py +39 -42
  890. transformers/models/mixtral/modular_mixtral.py +26 -29
  891. transformers/models/mlcd/configuration_mlcd.py +0 -1
  892. transformers/models/mlcd/modeling_mlcd.py +17 -17
  893. transformers/models/mlcd/modular_mlcd.py +16 -16
  894. transformers/models/mllama/configuration_mllama.py +10 -15
  895. transformers/models/mllama/image_processing_mllama.py +23 -25
  896. transformers/models/mllama/image_processing_mllama_fast.py +11 -11
  897. transformers/models/mllama/modeling_mllama.py +100 -103
  898. transformers/models/mllama/processing_mllama.py +6 -55
  899. transformers/models/mluke/tokenization_mluke.py +97 -103
  900. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +10 -46
  901. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +159 -179
  902. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +10 -46
  903. transformers/models/mobilebert/configuration_mobilebert.py +4 -2
  904. transformers/models/mobilebert/modeling_mobilebert.py +78 -88
  905. transformers/models/mobilebert/tokenization_mobilebert.py +0 -1
  906. transformers/models/mobilenet_v1/configuration_mobilenet_v1.py +0 -1
  907. transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +20 -23
  908. transformers/models/mobilenet_v1/image_processing_mobilenet_v1_fast.py +0 -1
  909. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +13 -16
  910. transformers/models/mobilenet_v2/configuration_mobilenet_v2.py +0 -1
  911. transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +48 -51
  912. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +14 -15
  913. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +21 -22
  914. transformers/models/mobilevit/configuration_mobilevit.py +0 -1
  915. transformers/models/mobilevit/image_processing_mobilevit.py +41 -44
  916. transformers/models/mobilevit/image_processing_mobilevit_fast.py +12 -13
  917. transformers/models/mobilevit/modeling_mobilevit.py +21 -21
  918. transformers/models/mobilevitv2/configuration_mobilevitv2.py +0 -1
  919. transformers/models/mobilevitv2/modeling_mobilevitv2.py +21 -22
  920. transformers/models/modernbert/configuration_modernbert.py +76 -51
  921. transformers/models/modernbert/modeling_modernbert.py +188 -943
  922. transformers/models/modernbert/modular_modernbert.py +255 -978
  923. transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +50 -44
  924. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +54 -64
  925. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +92 -92
  926. transformers/models/moonshine/configuration_moonshine.py +34 -31
  927. transformers/models/moonshine/modeling_moonshine.py +70 -72
  928. transformers/models/moonshine/modular_moonshine.py +91 -86
  929. transformers/models/moshi/configuration_moshi.py +46 -23
  930. transformers/models/moshi/modeling_moshi.py +134 -142
  931. transformers/models/mpnet/configuration_mpnet.py +6 -2
  932. transformers/models/mpnet/modeling_mpnet.py +55 -57
  933. transformers/models/mpnet/tokenization_mpnet.py +1 -4
  934. transformers/models/mpt/configuration_mpt.py +17 -9
  935. transformers/models/mpt/modeling_mpt.py +58 -60
  936. transformers/models/mra/configuration_mra.py +8 -2
  937. transformers/models/mra/modeling_mra.py +54 -56
  938. transformers/models/mt5/configuration_mt5.py +9 -6
  939. transformers/models/mt5/modeling_mt5.py +80 -85
  940. transformers/models/musicgen/configuration_musicgen.py +12 -8
  941. transformers/models/musicgen/modeling_musicgen.py +114 -116
  942. transformers/models/musicgen/processing_musicgen.py +3 -21
  943. transformers/models/musicgen_melody/configuration_musicgen_melody.py +15 -8
  944. transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py +8 -9
  945. transformers/models/musicgen_melody/modeling_musicgen_melody.py +113 -126
  946. transformers/models/musicgen_melody/processing_musicgen_melody.py +3 -22
  947. transformers/models/mvp/configuration_mvp.py +8 -5
  948. transformers/models/mvp/modeling_mvp.py +121 -123
  949. transformers/models/myt5/tokenization_myt5.py +8 -10
  950. transformers/models/nanochat/configuration_nanochat.py +5 -8
  951. transformers/models/nanochat/modeling_nanochat.py +36 -39
  952. transformers/models/nanochat/modular_nanochat.py +16 -18
  953. transformers/models/nemotron/configuration_nemotron.py +25 -30
  954. transformers/models/nemotron/modeling_nemotron.py +53 -66
  955. transformers/models/nllb/tokenization_nllb.py +14 -14
  956. transformers/models/nllb_moe/configuration_nllb_moe.py +7 -10
  957. transformers/models/nllb_moe/modeling_nllb_moe.py +70 -72
  958. transformers/models/nougat/image_processing_nougat.py +29 -32
  959. transformers/models/nougat/image_processing_nougat_fast.py +12 -13
  960. transformers/models/nougat/processing_nougat.py +37 -39
  961. transformers/models/nougat/tokenization_nougat.py +5 -7
  962. transformers/models/nystromformer/configuration_nystromformer.py +8 -2
  963. transformers/models/nystromformer/modeling_nystromformer.py +61 -63
  964. transformers/models/olmo/configuration_olmo.py +23 -28
  965. transformers/models/olmo/modeling_olmo.py +35 -38
  966. transformers/models/olmo/modular_olmo.py +8 -12
  967. transformers/models/olmo2/configuration_olmo2.py +27 -32
  968. transformers/models/olmo2/modeling_olmo2.py +36 -39
  969. transformers/models/olmo2/modular_olmo2.py +36 -38
  970. transformers/models/olmo3/__init__.py +0 -1
  971. transformers/models/olmo3/configuration_olmo3.py +30 -34
  972. transformers/models/olmo3/modeling_olmo3.py +35 -38
  973. transformers/models/olmo3/modular_olmo3.py +44 -47
  974. transformers/models/olmoe/configuration_olmoe.py +29 -33
  975. transformers/models/olmoe/modeling_olmoe.py +41 -43
  976. transformers/models/olmoe/modular_olmoe.py +15 -16
  977. transformers/models/omdet_turbo/configuration_omdet_turbo.py +14 -50
  978. transformers/models/omdet_turbo/modeling_omdet_turbo.py +59 -57
  979. transformers/models/omdet_turbo/processing_omdet_turbo.py +19 -67
  980. transformers/models/oneformer/configuration_oneformer.py +11 -51
  981. transformers/models/oneformer/image_processing_oneformer.py +83 -84
  982. transformers/models/oneformer/image_processing_oneformer_fast.py +41 -42
  983. transformers/models/oneformer/modeling_oneformer.py +137 -133
  984. transformers/models/oneformer/processing_oneformer.py +28 -43
  985. transformers/models/openai/configuration_openai.py +16 -1
  986. transformers/models/openai/modeling_openai.py +50 -51
  987. transformers/models/openai/tokenization_openai.py +2 -5
  988. transformers/models/opt/configuration_opt.py +6 -7
  989. transformers/models/opt/modeling_opt.py +79 -80
  990. transformers/models/ovis2/__init__.py +0 -1
  991. transformers/models/ovis2/configuration_ovis2.py +4 -1
  992. transformers/models/ovis2/image_processing_ovis2.py +22 -24
  993. transformers/models/ovis2/image_processing_ovis2_fast.py +9 -10
  994. transformers/models/ovis2/modeling_ovis2.py +99 -142
  995. transformers/models/ovis2/modular_ovis2.py +82 -45
  996. transformers/models/ovis2/processing_ovis2.py +12 -40
  997. transformers/models/owlv2/configuration_owlv2.py +4 -2
  998. transformers/models/owlv2/image_processing_owlv2.py +20 -21
  999. transformers/models/owlv2/image_processing_owlv2_fast.py +12 -13
  1000. transformers/models/owlv2/modeling_owlv2.py +122 -114
  1001. transformers/models/owlv2/modular_owlv2.py +11 -12
  1002. transformers/models/owlv2/processing_owlv2.py +20 -49
  1003. transformers/models/owlvit/configuration_owlvit.py +4 -2
  1004. transformers/models/owlvit/image_processing_owlvit.py +21 -22
  1005. transformers/models/owlvit/image_processing_owlvit_fast.py +2 -3
  1006. transformers/models/owlvit/modeling_owlvit.py +121 -113
  1007. transformers/models/owlvit/processing_owlvit.py +20 -48
  1008. transformers/models/paddleocr_vl/__init__.py +0 -1
  1009. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +28 -29
  1010. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +34 -35
  1011. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +12 -12
  1012. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +159 -158
  1013. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +148 -119
  1014. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +1 -3
  1015. transformers/models/paligemma/configuration_paligemma.py +4 -1
  1016. transformers/models/paligemma/modeling_paligemma.py +81 -79
  1017. transformers/models/paligemma/processing_paligemma.py +13 -66
  1018. transformers/models/parakeet/configuration_parakeet.py +3 -8
  1019. transformers/models/parakeet/feature_extraction_parakeet.py +10 -12
  1020. transformers/models/parakeet/modeling_parakeet.py +21 -25
  1021. transformers/models/parakeet/modular_parakeet.py +19 -21
  1022. transformers/models/parakeet/processing_parakeet.py +12 -5
  1023. transformers/models/parakeet/tokenization_parakeet.py +2 -4
  1024. transformers/models/patchtsmixer/configuration_patchtsmixer.py +5 -8
  1025. transformers/models/patchtsmixer/modeling_patchtsmixer.py +63 -65
  1026. transformers/models/patchtst/configuration_patchtst.py +6 -9
  1027. transformers/models/patchtst/modeling_patchtst.py +75 -77
  1028. transformers/models/pe_audio/__init__.py +0 -1
  1029. transformers/models/pe_audio/configuration_pe_audio.py +14 -16
  1030. transformers/models/pe_audio/feature_extraction_pe_audio.py +6 -8
  1031. transformers/models/pe_audio/modeling_pe_audio.py +30 -31
  1032. transformers/models/pe_audio/modular_pe_audio.py +17 -18
  1033. transformers/models/pe_audio/processing_pe_audio.py +0 -1
  1034. transformers/models/pe_audio_video/__init__.py +0 -1
  1035. transformers/models/pe_audio_video/configuration_pe_audio_video.py +15 -17
  1036. transformers/models/pe_audio_video/modeling_pe_audio_video.py +64 -65
  1037. transformers/models/pe_audio_video/modular_pe_audio_video.py +56 -57
  1038. transformers/models/pe_audio_video/processing_pe_audio_video.py +0 -1
  1039. transformers/models/pe_video/__init__.py +0 -1
  1040. transformers/models/pe_video/configuration_pe_video.py +14 -16
  1041. transformers/models/pe_video/modeling_pe_video.py +57 -46
  1042. transformers/models/pe_video/modular_pe_video.py +47 -35
  1043. transformers/models/pe_video/video_processing_pe_video.py +2 -4
  1044. transformers/models/pegasus/configuration_pegasus.py +8 -6
  1045. transformers/models/pegasus/modeling_pegasus.py +67 -69
  1046. transformers/models/pegasus/tokenization_pegasus.py +1 -4
  1047. transformers/models/pegasus_x/configuration_pegasus_x.py +5 -4
  1048. transformers/models/pegasus_x/modeling_pegasus_x.py +53 -55
  1049. transformers/models/perceiver/configuration_perceiver.py +0 -1
  1050. transformers/models/perceiver/image_processing_perceiver.py +22 -25
  1051. transformers/models/perceiver/image_processing_perceiver_fast.py +7 -8
  1052. transformers/models/perceiver/modeling_perceiver.py +152 -145
  1053. transformers/models/perceiver/tokenization_perceiver.py +3 -6
  1054. transformers/models/perception_lm/configuration_perception_lm.py +0 -1
  1055. transformers/models/perception_lm/image_processing_perception_lm_fast.py +8 -9
  1056. transformers/models/perception_lm/modeling_perception_lm.py +64 -67
  1057. transformers/models/perception_lm/modular_perception_lm.py +58 -58
  1058. transformers/models/perception_lm/processing_perception_lm.py +13 -47
  1059. transformers/models/perception_lm/video_processing_perception_lm.py +0 -1
  1060. transformers/models/persimmon/configuration_persimmon.py +23 -28
  1061. transformers/models/persimmon/modeling_persimmon.py +44 -47
  1062. transformers/models/phi/configuration_phi.py +27 -28
  1063. transformers/models/phi/modeling_phi.py +39 -41
  1064. transformers/models/phi/modular_phi.py +26 -26
  1065. transformers/models/phi3/configuration_phi3.py +32 -37
  1066. transformers/models/phi3/modeling_phi3.py +37 -40
  1067. transformers/models/phi3/modular_phi3.py +16 -20
  1068. transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +36 -39
  1069. transformers/models/phi4_multimodal/feature_extraction_phi4_multimodal.py +7 -9
  1070. transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +11 -11
  1071. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +100 -117
  1072. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +103 -90
  1073. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +7 -42
  1074. transformers/models/phimoe/configuration_phimoe.py +31 -36
  1075. transformers/models/phimoe/modeling_phimoe.py +50 -77
  1076. transformers/models/phimoe/modular_phimoe.py +12 -8
  1077. transformers/models/phobert/tokenization_phobert.py +4 -6
  1078. transformers/models/pix2struct/configuration_pix2struct.py +12 -10
  1079. transformers/models/pix2struct/image_processing_pix2struct.py +15 -19
  1080. transformers/models/pix2struct/image_processing_pix2struct_fast.py +12 -15
  1081. transformers/models/pix2struct/modeling_pix2struct.py +56 -52
  1082. transformers/models/pix2struct/processing_pix2struct.py +5 -26
  1083. transformers/models/pixio/__init__.py +0 -1
  1084. transformers/models/pixio/configuration_pixio.py +2 -5
  1085. transformers/models/pixio/modeling_pixio.py +16 -17
  1086. transformers/models/pixio/modular_pixio.py +7 -8
  1087. transformers/models/pixtral/configuration_pixtral.py +11 -14
  1088. transformers/models/pixtral/image_processing_pixtral.py +26 -28
  1089. transformers/models/pixtral/image_processing_pixtral_fast.py +10 -11
  1090. transformers/models/pixtral/modeling_pixtral.py +31 -37
  1091. transformers/models/pixtral/processing_pixtral.py +18 -52
  1092. transformers/models/plbart/configuration_plbart.py +8 -6
  1093. transformers/models/plbart/modeling_plbart.py +109 -109
  1094. transformers/models/plbart/modular_plbart.py +31 -33
  1095. transformers/models/plbart/tokenization_plbart.py +4 -5
  1096. transformers/models/poolformer/configuration_poolformer.py +0 -1
  1097. transformers/models/poolformer/image_processing_poolformer.py +21 -24
  1098. transformers/models/poolformer/image_processing_poolformer_fast.py +13 -14
  1099. transformers/models/poolformer/modeling_poolformer.py +10 -12
  1100. transformers/models/pop2piano/configuration_pop2piano.py +7 -7
  1101. transformers/models/pop2piano/feature_extraction_pop2piano.py +6 -9
  1102. transformers/models/pop2piano/modeling_pop2piano.py +24 -24
  1103. transformers/models/pop2piano/processing_pop2piano.py +25 -33
  1104. transformers/models/pop2piano/tokenization_pop2piano.py +15 -23
  1105. transformers/models/pp_doclayout_v3/__init__.py +30 -0
  1106. transformers/models/pp_doclayout_v3/configuration_pp_doclayout_v3.py +277 -0
  1107. transformers/models/pp_doclayout_v3/image_processing_pp_doclayout_v3_fast.py +305 -0
  1108. transformers/models/pp_doclayout_v3/modeling_pp_doclayout_v3.py +2083 -0
  1109. transformers/models/pp_doclayout_v3/modular_pp_doclayout_v3.py +1549 -0
  1110. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +13 -46
  1111. transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py +28 -28
  1112. transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +20 -21
  1113. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +17 -16
  1114. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +21 -20
  1115. transformers/models/prophetnet/configuration_prophetnet.py +37 -38
  1116. transformers/models/prophetnet/modeling_prophetnet.py +121 -153
  1117. transformers/models/prophetnet/tokenization_prophetnet.py +14 -16
  1118. transformers/models/pvt/configuration_pvt.py +0 -1
  1119. transformers/models/pvt/image_processing_pvt.py +24 -27
  1120. transformers/models/pvt/image_processing_pvt_fast.py +1 -2
  1121. transformers/models/pvt/modeling_pvt.py +19 -21
  1122. transformers/models/pvt_v2/configuration_pvt_v2.py +4 -8
  1123. transformers/models/pvt_v2/modeling_pvt_v2.py +27 -28
  1124. transformers/models/qwen2/configuration_qwen2.py +32 -25
  1125. transformers/models/qwen2/modeling_qwen2.py +35 -37
  1126. transformers/models/qwen2/modular_qwen2.py +14 -15
  1127. transformers/models/qwen2/tokenization_qwen2.py +2 -9
  1128. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +36 -27
  1129. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +241 -214
  1130. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +228 -193
  1131. transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py +41 -49
  1132. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +28 -34
  1133. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +188 -145
  1134. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +64 -91
  1135. transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py +7 -43
  1136. transformers/models/qwen2_audio/configuration_qwen2_audio.py +0 -1
  1137. transformers/models/qwen2_audio/modeling_qwen2_audio.py +39 -41
  1138. transformers/models/qwen2_audio/processing_qwen2_audio.py +13 -42
  1139. transformers/models/qwen2_moe/configuration_qwen2_moe.py +42 -35
  1140. transformers/models/qwen2_moe/modeling_qwen2_moe.py +40 -43
  1141. transformers/models/qwen2_moe/modular_qwen2_moe.py +10 -13
  1142. transformers/models/qwen2_vl/configuration_qwen2_vl.py +28 -33
  1143. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +38 -40
  1144. transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +12 -15
  1145. transformers/models/qwen2_vl/modeling_qwen2_vl.py +184 -141
  1146. transformers/models/qwen2_vl/processing_qwen2_vl.py +7 -44
  1147. transformers/models/qwen2_vl/video_processing_qwen2_vl.py +38 -18
  1148. transformers/models/qwen3/configuration_qwen3.py +34 -27
  1149. transformers/models/qwen3/modeling_qwen3.py +35 -38
  1150. transformers/models/qwen3/modular_qwen3.py +7 -9
  1151. transformers/models/qwen3_moe/configuration_qwen3_moe.py +45 -35
  1152. transformers/models/qwen3_moe/modeling_qwen3_moe.py +40 -43
  1153. transformers/models/qwen3_moe/modular_qwen3_moe.py +10 -13
  1154. transformers/models/qwen3_next/configuration_qwen3_next.py +47 -38
  1155. transformers/models/qwen3_next/modeling_qwen3_next.py +44 -47
  1156. transformers/models/qwen3_next/modular_qwen3_next.py +37 -38
  1157. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +139 -106
  1158. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +266 -206
  1159. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +228 -181
  1160. transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py +40 -48
  1161. transformers/models/qwen3_vl/configuration_qwen3_vl.py +22 -24
  1162. transformers/models/qwen3_vl/modeling_qwen3_vl.py +185 -122
  1163. transformers/models/qwen3_vl/modular_qwen3_vl.py +153 -139
  1164. transformers/models/qwen3_vl/processing_qwen3_vl.py +6 -42
  1165. transformers/models/qwen3_vl/video_processing_qwen3_vl.py +10 -12
  1166. transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +27 -30
  1167. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +249 -178
  1168. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +55 -42
  1169. transformers/models/rag/configuration_rag.py +6 -7
  1170. transformers/models/rag/modeling_rag.py +119 -121
  1171. transformers/models/rag/retrieval_rag.py +3 -5
  1172. transformers/models/rag/tokenization_rag.py +0 -50
  1173. transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +29 -30
  1174. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +35 -39
  1175. transformers/models/reformer/configuration_reformer.py +7 -8
  1176. transformers/models/reformer/modeling_reformer.py +67 -68
  1177. transformers/models/reformer/tokenization_reformer.py +3 -6
  1178. transformers/models/regnet/configuration_regnet.py +0 -1
  1179. transformers/models/regnet/modeling_regnet.py +7 -9
  1180. transformers/models/rembert/configuration_rembert.py +8 -2
  1181. transformers/models/rembert/modeling_rembert.py +108 -132
  1182. transformers/models/rembert/tokenization_rembert.py +1 -4
  1183. transformers/models/resnet/configuration_resnet.py +2 -5
  1184. transformers/models/resnet/modeling_resnet.py +14 -15
  1185. transformers/models/roberta/configuration_roberta.py +11 -3
  1186. transformers/models/roberta/modeling_roberta.py +97 -99
  1187. transformers/models/roberta/modular_roberta.py +55 -58
  1188. transformers/models/roberta/tokenization_roberta.py +2 -5
  1189. transformers/models/roberta/tokenization_roberta_old.py +2 -4
  1190. transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +11 -3
  1191. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +97 -99
  1192. transformers/models/roc_bert/configuration_roc_bert.py +8 -2
  1193. transformers/models/roc_bert/modeling_roc_bert.py +125 -162
  1194. transformers/models/roc_bert/tokenization_roc_bert.py +88 -94
  1195. transformers/models/roformer/configuration_roformer.py +13 -3
  1196. transformers/models/roformer/modeling_roformer.py +79 -95
  1197. transformers/models/roformer/tokenization_roformer.py +3 -6
  1198. transformers/models/roformer/tokenization_utils.py +0 -1
  1199. transformers/models/rt_detr/configuration_rt_detr.py +8 -50
  1200. transformers/models/rt_detr/configuration_rt_detr_resnet.py +2 -5
  1201. transformers/models/rt_detr/image_processing_rt_detr.py +54 -55
  1202. transformers/models/rt_detr/image_processing_rt_detr_fast.py +39 -26
  1203. transformers/models/rt_detr/modeling_rt_detr.py +643 -804
  1204. transformers/models/rt_detr/modeling_rt_detr_resnet.py +4 -7
  1205. transformers/models/rt_detr/modular_rt_detr.py +1522 -20
  1206. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +12 -58
  1207. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +384 -521
  1208. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +27 -70
  1209. transformers/models/rwkv/configuration_rwkv.py +2 -4
  1210. transformers/models/rwkv/modeling_rwkv.py +29 -54
  1211. transformers/models/sam/configuration_sam.py +2 -1
  1212. transformers/models/sam/image_processing_sam.py +59 -60
  1213. transformers/models/sam/image_processing_sam_fast.py +25 -26
  1214. transformers/models/sam/modeling_sam.py +46 -43
  1215. transformers/models/sam/processing_sam.py +39 -27
  1216. transformers/models/sam2/configuration_sam2.py +1 -2
  1217. transformers/models/sam2/image_processing_sam2_fast.py +14 -15
  1218. transformers/models/sam2/modeling_sam2.py +96 -94
  1219. transformers/models/sam2/modular_sam2.py +85 -94
  1220. transformers/models/sam2/processing_sam2.py +31 -47
  1221. transformers/models/sam2_video/configuration_sam2_video.py +0 -1
  1222. transformers/models/sam2_video/modeling_sam2_video.py +114 -116
  1223. transformers/models/sam2_video/modular_sam2_video.py +72 -89
  1224. transformers/models/sam2_video/processing_sam2_video.py +49 -66
  1225. transformers/models/sam2_video/video_processing_sam2_video.py +1 -4
  1226. transformers/models/sam3/configuration_sam3.py +0 -1
  1227. transformers/models/sam3/image_processing_sam3_fast.py +17 -20
  1228. transformers/models/sam3/modeling_sam3.py +94 -100
  1229. transformers/models/sam3/modular_sam3.py +3 -8
  1230. transformers/models/sam3/processing_sam3.py +37 -52
  1231. transformers/models/sam3_tracker/__init__.py +0 -1
  1232. transformers/models/sam3_tracker/configuration_sam3_tracker.py +1 -3
  1233. transformers/models/sam3_tracker/modeling_sam3_tracker.py +79 -80
  1234. transformers/models/sam3_tracker/modular_sam3_tracker.py +0 -2
  1235. transformers/models/sam3_tracker/processing_sam3_tracker.py +31 -48
  1236. transformers/models/sam3_tracker_video/__init__.py +0 -1
  1237. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +0 -1
  1238. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +115 -114
  1239. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +10 -24
  1240. transformers/models/sam3_tracker_video/processing_sam3_tracker_video.py +50 -66
  1241. transformers/models/sam3_video/configuration_sam3_video.py +0 -1
  1242. transformers/models/sam3_video/modeling_sam3_video.py +56 -45
  1243. transformers/models/sam3_video/processing_sam3_video.py +25 -45
  1244. transformers/models/sam_hq/__init__.py +1 -1
  1245. transformers/models/sam_hq/configuration_sam_hq.py +2 -1
  1246. transformers/models/sam_hq/modeling_sam_hq.py +52 -50
  1247. transformers/models/sam_hq/modular_sam_hq.py +23 -25
  1248. transformers/models/sam_hq/{processing_samhq.py → processing_sam_hq.py} +41 -29
  1249. transformers/models/seamless_m4t/configuration_seamless_m4t.py +8 -10
  1250. transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py +8 -11
  1251. transformers/models/seamless_m4t/modeling_seamless_m4t.py +180 -182
  1252. transformers/models/seamless_m4t/processing_seamless_m4t.py +18 -39
  1253. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +15 -20
  1254. transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +8 -10
  1255. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +193 -195
  1256. transformers/models/seed_oss/configuration_seed_oss.py +30 -34
  1257. transformers/models/seed_oss/modeling_seed_oss.py +34 -36
  1258. transformers/models/seed_oss/modular_seed_oss.py +6 -7
  1259. transformers/models/segformer/configuration_segformer.py +0 -10
  1260. transformers/models/segformer/image_processing_segformer.py +39 -42
  1261. transformers/models/segformer/image_processing_segformer_fast.py +11 -12
  1262. transformers/models/segformer/modeling_segformer.py +28 -28
  1263. transformers/models/segformer/modular_segformer.py +8 -9
  1264. transformers/models/seggpt/configuration_seggpt.py +0 -1
  1265. transformers/models/seggpt/image_processing_seggpt.py +38 -41
  1266. transformers/models/seggpt/modeling_seggpt.py +48 -38
  1267. transformers/models/sew/configuration_sew.py +4 -2
  1268. transformers/models/sew/modeling_sew.py +42 -40
  1269. transformers/models/sew/modular_sew.py +12 -13
  1270. transformers/models/sew_d/configuration_sew_d.py +4 -2
  1271. transformers/models/sew_d/modeling_sew_d.py +32 -31
  1272. transformers/models/shieldgemma2/configuration_shieldgemma2.py +0 -1
  1273. transformers/models/shieldgemma2/modeling_shieldgemma2.py +19 -21
  1274. transformers/models/shieldgemma2/processing_shieldgemma2.py +3 -5
  1275. transformers/models/siglip/configuration_siglip.py +4 -2
  1276. transformers/models/siglip/image_processing_siglip.py +17 -20
  1277. transformers/models/siglip/image_processing_siglip_fast.py +0 -1
  1278. transformers/models/siglip/modeling_siglip.py +65 -110
  1279. transformers/models/siglip/processing_siglip.py +2 -14
  1280. transformers/models/siglip/tokenization_siglip.py +6 -7
  1281. transformers/models/siglip2/__init__.py +1 -0
  1282. transformers/models/siglip2/configuration_siglip2.py +4 -2
  1283. transformers/models/siglip2/image_processing_siglip2.py +15 -16
  1284. transformers/models/siglip2/image_processing_siglip2_fast.py +6 -7
  1285. transformers/models/siglip2/modeling_siglip2.py +89 -130
  1286. transformers/models/siglip2/modular_siglip2.py +95 -48
  1287. transformers/models/siglip2/processing_siglip2.py +2 -14
  1288. transformers/models/siglip2/tokenization_siglip2.py +95 -0
  1289. transformers/models/smollm3/configuration_smollm3.py +29 -32
  1290. transformers/models/smollm3/modeling_smollm3.py +35 -38
  1291. transformers/models/smollm3/modular_smollm3.py +36 -38
  1292. transformers/models/smolvlm/configuration_smolvlm.py +2 -4
  1293. transformers/models/smolvlm/image_processing_smolvlm.py +42 -43
  1294. transformers/models/smolvlm/image_processing_smolvlm_fast.py +41 -15
  1295. transformers/models/smolvlm/modeling_smolvlm.py +124 -96
  1296. transformers/models/smolvlm/modular_smolvlm.py +50 -39
  1297. transformers/models/smolvlm/processing_smolvlm.py +15 -76
  1298. transformers/models/smolvlm/video_processing_smolvlm.py +16 -17
  1299. transformers/models/solar_open/__init__.py +27 -0
  1300. transformers/models/solar_open/configuration_solar_open.py +184 -0
  1301. transformers/models/solar_open/modeling_solar_open.py +642 -0
  1302. transformers/models/solar_open/modular_solar_open.py +224 -0
  1303. transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py +0 -1
  1304. transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +26 -27
  1305. transformers/models/speech_to_text/configuration_speech_to_text.py +9 -9
  1306. transformers/models/speech_to_text/feature_extraction_speech_to_text.py +10 -13
  1307. transformers/models/speech_to_text/modeling_speech_to_text.py +55 -57
  1308. transformers/models/speech_to_text/processing_speech_to_text.py +4 -30
  1309. transformers/models/speech_to_text/tokenization_speech_to_text.py +5 -6
  1310. transformers/models/speecht5/configuration_speecht5.py +7 -9
  1311. transformers/models/speecht5/feature_extraction_speecht5.py +16 -37
  1312. transformers/models/speecht5/modeling_speecht5.py +172 -174
  1313. transformers/models/speecht5/number_normalizer.py +0 -1
  1314. transformers/models/speecht5/processing_speecht5.py +3 -37
  1315. transformers/models/speecht5/tokenization_speecht5.py +4 -5
  1316. transformers/models/splinter/configuration_splinter.py +6 -7
  1317. transformers/models/splinter/modeling_splinter.py +62 -59
  1318. transformers/models/splinter/tokenization_splinter.py +2 -4
  1319. transformers/models/squeezebert/configuration_squeezebert.py +14 -2
  1320. transformers/models/squeezebert/modeling_squeezebert.py +60 -62
  1321. transformers/models/squeezebert/tokenization_squeezebert.py +0 -1
  1322. transformers/models/stablelm/configuration_stablelm.py +28 -29
  1323. transformers/models/stablelm/modeling_stablelm.py +44 -47
  1324. transformers/models/starcoder2/configuration_starcoder2.py +30 -27
  1325. transformers/models/starcoder2/modeling_starcoder2.py +38 -41
  1326. transformers/models/starcoder2/modular_starcoder2.py +17 -19
  1327. transformers/models/superglue/configuration_superglue.py +7 -3
  1328. transformers/models/superglue/image_processing_superglue.py +15 -15
  1329. transformers/models/superglue/image_processing_superglue_fast.py +8 -8
  1330. transformers/models/superglue/modeling_superglue.py +41 -37
  1331. transformers/models/superpoint/image_processing_superpoint.py +15 -15
  1332. transformers/models/superpoint/image_processing_superpoint_fast.py +7 -9
  1333. transformers/models/superpoint/modeling_superpoint.py +17 -16
  1334. transformers/models/swiftformer/configuration_swiftformer.py +0 -1
  1335. transformers/models/swiftformer/modeling_swiftformer.py +12 -14
  1336. transformers/models/swin/configuration_swin.py +2 -5
  1337. transformers/models/swin/modeling_swin.py +69 -78
  1338. transformers/models/swin2sr/configuration_swin2sr.py +0 -1
  1339. transformers/models/swin2sr/image_processing_swin2sr.py +10 -13
  1340. transformers/models/swin2sr/image_processing_swin2sr_fast.py +4 -7
  1341. transformers/models/swin2sr/modeling_swin2sr.py +30 -30
  1342. transformers/models/swinv2/configuration_swinv2.py +2 -5
  1343. transformers/models/swinv2/modeling_swinv2.py +65 -74
  1344. transformers/models/switch_transformers/configuration_switch_transformers.py +11 -7
  1345. transformers/models/switch_transformers/modeling_switch_transformers.py +35 -36
  1346. transformers/models/switch_transformers/modular_switch_transformers.py +32 -33
  1347. transformers/models/t5/configuration_t5.py +9 -9
  1348. transformers/models/t5/modeling_t5.py +80 -85
  1349. transformers/models/t5/tokenization_t5.py +1 -3
  1350. transformers/models/t5gemma/configuration_t5gemma.py +43 -59
  1351. transformers/models/t5gemma/modeling_t5gemma.py +105 -108
  1352. transformers/models/t5gemma/modular_t5gemma.py +128 -142
  1353. transformers/models/t5gemma2/configuration_t5gemma2.py +86 -100
  1354. transformers/models/t5gemma2/modeling_t5gemma2.py +234 -194
  1355. transformers/models/t5gemma2/modular_t5gemma2.py +279 -264
  1356. transformers/models/table_transformer/configuration_table_transformer.py +18 -50
  1357. transformers/models/table_transformer/modeling_table_transformer.py +73 -101
  1358. transformers/models/tapas/configuration_tapas.py +12 -2
  1359. transformers/models/tapas/modeling_tapas.py +65 -67
  1360. transformers/models/tapas/tokenization_tapas.py +116 -153
  1361. transformers/models/textnet/configuration_textnet.py +4 -7
  1362. transformers/models/textnet/image_processing_textnet.py +22 -25
  1363. transformers/models/textnet/image_processing_textnet_fast.py +8 -9
  1364. transformers/models/textnet/modeling_textnet.py +28 -28
  1365. transformers/models/time_series_transformer/configuration_time_series_transformer.py +5 -8
  1366. transformers/models/time_series_transformer/modeling_time_series_transformer.py +82 -84
  1367. transformers/models/timesfm/configuration_timesfm.py +0 -1
  1368. transformers/models/timesfm/modeling_timesfm.py +22 -25
  1369. transformers/models/timesfm/modular_timesfm.py +21 -24
  1370. transformers/models/timesformer/configuration_timesformer.py +0 -1
  1371. transformers/models/timesformer/modeling_timesformer.py +13 -16
  1372. transformers/models/timm_backbone/configuration_timm_backbone.py +33 -8
  1373. transformers/models/timm_backbone/modeling_timm_backbone.py +25 -30
  1374. transformers/models/timm_wrapper/configuration_timm_wrapper.py +2 -3
  1375. transformers/models/timm_wrapper/image_processing_timm_wrapper.py +4 -5
  1376. transformers/models/timm_wrapper/modeling_timm_wrapper.py +22 -19
  1377. transformers/models/trocr/configuration_trocr.py +11 -8
  1378. transformers/models/trocr/modeling_trocr.py +42 -42
  1379. transformers/models/trocr/processing_trocr.py +5 -25
  1380. transformers/models/tvp/configuration_tvp.py +10 -36
  1381. transformers/models/tvp/image_processing_tvp.py +50 -52
  1382. transformers/models/tvp/image_processing_tvp_fast.py +15 -15
  1383. transformers/models/tvp/modeling_tvp.py +26 -28
  1384. transformers/models/tvp/processing_tvp.py +2 -14
  1385. transformers/models/udop/configuration_udop.py +16 -8
  1386. transformers/models/udop/modeling_udop.py +73 -72
  1387. transformers/models/udop/processing_udop.py +7 -26
  1388. transformers/models/udop/tokenization_udop.py +80 -93
  1389. transformers/models/umt5/configuration_umt5.py +8 -7
  1390. transformers/models/umt5/modeling_umt5.py +87 -84
  1391. transformers/models/unispeech/configuration_unispeech.py +4 -2
  1392. transformers/models/unispeech/modeling_unispeech.py +54 -53
  1393. transformers/models/unispeech/modular_unispeech.py +20 -22
  1394. transformers/models/unispeech_sat/configuration_unispeech_sat.py +4 -2
  1395. transformers/models/unispeech_sat/modeling_unispeech_sat.py +70 -69
  1396. transformers/models/unispeech_sat/modular_unispeech_sat.py +21 -23
  1397. transformers/models/univnet/feature_extraction_univnet.py +14 -14
  1398. transformers/models/univnet/modeling_univnet.py +7 -8
  1399. transformers/models/upernet/configuration_upernet.py +8 -36
  1400. transformers/models/upernet/modeling_upernet.py +11 -14
  1401. transformers/models/vaultgemma/__init__.py +0 -1
  1402. transformers/models/vaultgemma/configuration_vaultgemma.py +29 -33
  1403. transformers/models/vaultgemma/modeling_vaultgemma.py +38 -40
  1404. transformers/models/vaultgemma/modular_vaultgemma.py +29 -31
  1405. transformers/models/video_llama_3/configuration_video_llama_3.py +4 -0
  1406. transformers/models/video_llama_3/image_processing_video_llama_3.py +40 -40
  1407. transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +12 -14
  1408. transformers/models/video_llama_3/modeling_video_llama_3.py +149 -112
  1409. transformers/models/video_llama_3/modular_video_llama_3.py +152 -150
  1410. transformers/models/video_llama_3/processing_video_llama_3.py +5 -39
  1411. transformers/models/video_llama_3/video_processing_video_llama_3.py +45 -24
  1412. transformers/models/video_llava/configuration_video_llava.py +4 -1
  1413. transformers/models/video_llava/image_processing_video_llava.py +35 -38
  1414. transformers/models/video_llava/modeling_video_llava.py +139 -143
  1415. transformers/models/video_llava/processing_video_llava.py +38 -78
  1416. transformers/models/video_llava/video_processing_video_llava.py +0 -1
  1417. transformers/models/videomae/configuration_videomae.py +0 -1
  1418. transformers/models/videomae/image_processing_videomae.py +31 -34
  1419. transformers/models/videomae/modeling_videomae.py +17 -20
  1420. transformers/models/videomae/video_processing_videomae.py +0 -1
  1421. transformers/models/vilt/configuration_vilt.py +4 -2
  1422. transformers/models/vilt/image_processing_vilt.py +29 -30
  1423. transformers/models/vilt/image_processing_vilt_fast.py +15 -16
  1424. transformers/models/vilt/modeling_vilt.py +103 -90
  1425. transformers/models/vilt/processing_vilt.py +2 -14
  1426. transformers/models/vipllava/configuration_vipllava.py +4 -1
  1427. transformers/models/vipllava/modeling_vipllava.py +92 -67
  1428. transformers/models/vipllava/modular_vipllava.py +78 -54
  1429. transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +0 -1
  1430. transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +28 -27
  1431. transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py +0 -1
  1432. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +45 -41
  1433. transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py +2 -16
  1434. transformers/models/visual_bert/configuration_visual_bert.py +6 -2
  1435. transformers/models/visual_bert/modeling_visual_bert.py +90 -92
  1436. transformers/models/vit/configuration_vit.py +2 -3
  1437. transformers/models/vit/image_processing_vit.py +19 -22
  1438. transformers/models/vit/image_processing_vit_fast.py +0 -1
  1439. transformers/models/vit/modeling_vit.py +20 -20
  1440. transformers/models/vit_mae/configuration_vit_mae.py +0 -1
  1441. transformers/models/vit_mae/modeling_vit_mae.py +32 -30
  1442. transformers/models/vit_msn/configuration_vit_msn.py +0 -1
  1443. transformers/models/vit_msn/modeling_vit_msn.py +21 -19
  1444. transformers/models/vitdet/configuration_vitdet.py +2 -5
  1445. transformers/models/vitdet/modeling_vitdet.py +14 -17
  1446. transformers/models/vitmatte/configuration_vitmatte.py +7 -39
  1447. transformers/models/vitmatte/image_processing_vitmatte.py +15 -18
  1448. transformers/models/vitmatte/image_processing_vitmatte_fast.py +16 -17
  1449. transformers/models/vitmatte/modeling_vitmatte.py +10 -12
  1450. transformers/models/vitpose/configuration_vitpose.py +7 -47
  1451. transformers/models/vitpose/image_processing_vitpose.py +24 -25
  1452. transformers/models/vitpose/image_processing_vitpose_fast.py +9 -10
  1453. transformers/models/vitpose/modeling_vitpose.py +15 -15
  1454. transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +2 -5
  1455. transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +13 -16
  1456. transformers/models/vits/configuration_vits.py +4 -1
  1457. transformers/models/vits/modeling_vits.py +43 -42
  1458. transformers/models/vits/tokenization_vits.py +3 -4
  1459. transformers/models/vivit/configuration_vivit.py +0 -1
  1460. transformers/models/vivit/image_processing_vivit.py +36 -39
  1461. transformers/models/vivit/modeling_vivit.py +9 -11
  1462. transformers/models/vjepa2/__init__.py +0 -1
  1463. transformers/models/vjepa2/configuration_vjepa2.py +0 -1
  1464. transformers/models/vjepa2/modeling_vjepa2.py +39 -41
  1465. transformers/models/vjepa2/video_processing_vjepa2.py +0 -1
  1466. transformers/models/voxtral/__init__.py +0 -1
  1467. transformers/models/voxtral/configuration_voxtral.py +0 -2
  1468. transformers/models/voxtral/modeling_voxtral.py +41 -48
  1469. transformers/models/voxtral/modular_voxtral.py +35 -38
  1470. transformers/models/voxtral/processing_voxtral.py +25 -48
  1471. transformers/models/wav2vec2/configuration_wav2vec2.py +4 -2
  1472. transformers/models/wav2vec2/feature_extraction_wav2vec2.py +7 -10
  1473. transformers/models/wav2vec2/modeling_wav2vec2.py +74 -126
  1474. transformers/models/wav2vec2/processing_wav2vec2.py +6 -35
  1475. transformers/models/wav2vec2/tokenization_wav2vec2.py +20 -332
  1476. transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +4 -2
  1477. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +49 -52
  1478. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +45 -48
  1479. transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +6 -35
  1480. transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +4 -2
  1481. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +62 -65
  1482. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +15 -18
  1483. transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +16 -17
  1484. transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +36 -55
  1485. transformers/models/wavlm/configuration_wavlm.py +4 -2
  1486. transformers/models/wavlm/modeling_wavlm.py +49 -49
  1487. transformers/models/wavlm/modular_wavlm.py +4 -5
  1488. transformers/models/whisper/configuration_whisper.py +6 -5
  1489. transformers/models/whisper/english_normalizer.py +3 -4
  1490. transformers/models/whisper/feature_extraction_whisper.py +9 -24
  1491. transformers/models/whisper/generation_whisper.py +26 -49
  1492. transformers/models/whisper/modeling_whisper.py +71 -73
  1493. transformers/models/whisper/processing_whisper.py +3 -20
  1494. transformers/models/whisper/tokenization_whisper.py +9 -30
  1495. transformers/models/x_clip/configuration_x_clip.py +4 -2
  1496. transformers/models/x_clip/modeling_x_clip.py +94 -96
  1497. transformers/models/x_clip/processing_x_clip.py +2 -14
  1498. transformers/models/xcodec/configuration_xcodec.py +4 -6
  1499. transformers/models/xcodec/modeling_xcodec.py +15 -17
  1500. transformers/models/xglm/configuration_xglm.py +9 -8
  1501. transformers/models/xglm/modeling_xglm.py +49 -55
  1502. transformers/models/xglm/tokenization_xglm.py +1 -4
  1503. transformers/models/xlm/configuration_xlm.py +10 -8
  1504. transformers/models/xlm/modeling_xlm.py +127 -131
  1505. transformers/models/xlm/tokenization_xlm.py +3 -5
  1506. transformers/models/xlm_roberta/configuration_xlm_roberta.py +11 -3
  1507. transformers/models/xlm_roberta/modeling_xlm_roberta.py +96 -98
  1508. transformers/models/xlm_roberta/modular_xlm_roberta.py +50 -53
  1509. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +1 -4
  1510. transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +10 -2
  1511. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +97 -99
  1512. transformers/models/xlm_roberta_xl/modular_xlm_roberta_xl.py +67 -70
  1513. transformers/models/xlnet/configuration_xlnet.py +3 -12
  1514. transformers/models/xlnet/modeling_xlnet.py +149 -162
  1515. transformers/models/xlnet/tokenization_xlnet.py +1 -4
  1516. transformers/models/xlstm/configuration_xlstm.py +8 -12
  1517. transformers/models/xlstm/modeling_xlstm.py +61 -96
  1518. transformers/models/xmod/configuration_xmod.py +11 -3
  1519. transformers/models/xmod/modeling_xmod.py +111 -116
  1520. transformers/models/yolos/configuration_yolos.py +0 -1
  1521. transformers/models/yolos/image_processing_yolos.py +60 -62
  1522. transformers/models/yolos/image_processing_yolos_fast.py +42 -45
  1523. transformers/models/yolos/modeling_yolos.py +19 -21
  1524. transformers/models/yolos/modular_yolos.py +17 -19
  1525. transformers/models/yoso/configuration_yoso.py +8 -2
  1526. transformers/models/yoso/modeling_yoso.py +60 -62
  1527. transformers/models/youtu/__init__.py +27 -0
  1528. transformers/models/youtu/configuration_youtu.py +194 -0
  1529. transformers/models/youtu/modeling_youtu.py +619 -0
  1530. transformers/models/youtu/modular_youtu.py +254 -0
  1531. transformers/models/zamba/configuration_zamba.py +5 -8
  1532. transformers/models/zamba/modeling_zamba.py +93 -125
  1533. transformers/models/zamba2/configuration_zamba2.py +44 -50
  1534. transformers/models/zamba2/modeling_zamba2.py +137 -165
  1535. transformers/models/zamba2/modular_zamba2.py +79 -74
  1536. transformers/models/zoedepth/configuration_zoedepth.py +17 -41
  1537. transformers/models/zoedepth/image_processing_zoedepth.py +28 -29
  1538. transformers/models/zoedepth/image_processing_zoedepth_fast.py +20 -21
  1539. transformers/models/zoedepth/modeling_zoedepth.py +19 -19
  1540. transformers/pipelines/__init__.py +47 -106
  1541. transformers/pipelines/any_to_any.py +15 -23
  1542. transformers/pipelines/audio_utils.py +1 -2
  1543. transformers/pipelines/automatic_speech_recognition.py +0 -2
  1544. transformers/pipelines/base.py +13 -17
  1545. transformers/pipelines/image_text_to_text.py +1 -2
  1546. transformers/pipelines/question_answering.py +4 -43
  1547. transformers/pipelines/text_classification.py +1 -14
  1548. transformers/pipelines/text_to_audio.py +5 -1
  1549. transformers/pipelines/token_classification.py +1 -22
  1550. transformers/pipelines/video_classification.py +1 -9
  1551. transformers/pipelines/zero_shot_audio_classification.py +0 -1
  1552. transformers/pipelines/zero_shot_classification.py +0 -6
  1553. transformers/pipelines/zero_shot_image_classification.py +0 -7
  1554. transformers/processing_utils.py +128 -137
  1555. transformers/pytorch_utils.py +2 -26
  1556. transformers/quantizers/base.py +10 -0
  1557. transformers/quantizers/quantizer_compressed_tensors.py +7 -5
  1558. transformers/quantizers/quantizer_fbgemm_fp8.py +20 -23
  1559. transformers/quantizers/quantizer_finegrained_fp8.py +14 -20
  1560. transformers/quantizers/quantizer_mxfp4.py +1 -1
  1561. transformers/quantizers/quantizer_quark.py +0 -1
  1562. transformers/quantizers/quantizer_torchao.py +3 -19
  1563. transformers/safetensors_conversion.py +11 -4
  1564. transformers/testing_utils.py +6 -65
  1565. transformers/tokenization_mistral_common.py +563 -903
  1566. transformers/tokenization_python.py +6 -4
  1567. transformers/tokenization_utils_base.py +228 -341
  1568. transformers/tokenization_utils_sentencepiece.py +5 -6
  1569. transformers/tokenization_utils_tokenizers.py +36 -7
  1570. transformers/trainer.py +30 -41
  1571. transformers/trainer_jit_checkpoint.py +1 -2
  1572. transformers/trainer_seq2seq.py +1 -1
  1573. transformers/training_args.py +414 -420
  1574. transformers/utils/__init__.py +1 -4
  1575. transformers/utils/attention_visualizer.py +1 -1
  1576. transformers/utils/auto_docstring.py +567 -18
  1577. transformers/utils/backbone_utils.py +13 -373
  1578. transformers/utils/doc.py +4 -36
  1579. transformers/utils/dummy_pt_objects.py +0 -42
  1580. transformers/utils/generic.py +70 -34
  1581. transformers/utils/import_utils.py +72 -75
  1582. transformers/utils/loading_report.py +135 -107
  1583. transformers/utils/quantization_config.py +8 -31
  1584. transformers/video_processing_utils.py +24 -25
  1585. transformers/video_utils.py +21 -23
  1586. {transformers-5.0.0rc2.dist-info → transformers-5.1.0.dist-info}/METADATA +120 -239
  1587. transformers-5.1.0.dist-info/RECORD +2092 -0
  1588. {transformers-5.0.0rc2.dist-info → transformers-5.1.0.dist-info}/WHEEL +1 -1
  1589. transformers/pipelines/deprecated/text2text_generation.py +0 -408
  1590. transformers/pipelines/image_to_text.py +0 -229
  1591. transformers-5.0.0rc2.dist-info/RECORD +0 -2042
  1592. {transformers-5.0.0rc2.dist-info → transformers-5.1.0.dist-info}/entry_points.txt +0 -0
  1593. {transformers-5.0.0rc2.dist-info → transformers-5.1.0.dist-info}/licenses/LICENSE +0 -0
  1594. {transformers-5.0.0rc2.dist-info → transformers-5.1.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,4 @@
1
1
  # base
2
- # coding=utf-8
3
2
  # Copyright 2020 The HuggingFace Inc. team.
4
3
  #
5
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,7 +29,7 @@ from collections import OrderedDict, UserDict
30
29
  from collections.abc import Callable, Collection, Mapping, Sequence, Sized
31
30
  from dataclasses import dataclass
32
31
  from pathlib import Path
33
- from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union
32
+ from typing import TYPE_CHECKING, Any, NamedTuple, Union
34
33
 
35
34
  import numpy as np
36
35
  from huggingface_hub import create_repo, is_offline_mode, list_repo_files
@@ -219,11 +218,11 @@ class BatchEncoding(UserDict):
219
218
 
220
219
  def __init__(
221
220
  self,
222
- data: Optional[dict[str, Any]] = None,
223
- encoding: Optional[Union[EncodingFast, Sequence[EncodingFast]]] = None,
224
- tensor_type: Union[None, str, TensorType] = None,
221
+ data: dict[str, Any] | None = None,
222
+ encoding: EncodingFast | Sequence[EncodingFast] | None = None,
223
+ tensor_type: None | str | TensorType = None,
225
224
  prepend_batch_axis: bool = False,
226
- n_sequences: Optional[int] = None,
225
+ n_sequences: int | None = None,
227
226
  ):
228
227
  super().__init__(data)
229
228
 
@@ -241,7 +240,7 @@ class BatchEncoding(UserDict):
241
240
  self.convert_to_tensors(tensor_type=tensor_type, prepend_batch_axis=prepend_batch_axis)
242
241
 
243
242
  @property
244
- def n_sequences(self) -> Optional[int]:
243
+ def n_sequences(self) -> int | None:
245
244
  """
246
245
  `Optional[int]`: The number of sequences used to generate each sample from the batch encoded in this
247
246
  [`BatchEncoding`]. Currently can be one of `None` (unknown), `1` (a single sentence) or `2` (a pair of
@@ -249,7 +248,7 @@ class BatchEncoding(UserDict):
249
248
  """
250
249
  return self._n_sequences
251
250
 
252
- def __getitem__(self, item: Union[int, str]) -> Union[Any, EncodingFast]:
251
+ def __getitem__(self, item: int | str) -> Any | EncodingFast:
253
252
  """
254
253
  If the key is a string, returns the value of the dict associated to `key` ('input_ids', 'attention_mask',
255
254
  etc.).
@@ -299,7 +298,7 @@ class BatchEncoding(UserDict):
299
298
  return self._encodings is not None
300
299
 
301
300
  @property
302
- def encodings(self) -> Optional[list[EncodingFast]]:
301
+ def encodings(self) -> list[EncodingFast] | None:
303
302
  """
304
303
  `Optional[list[tokenizers.Encoding]]`: The list all encodings from the tokenization process. Returns `None` if
305
304
  the input was tokenized through Python (i.e., not a fast) tokenizer.
@@ -324,7 +323,7 @@ class BatchEncoding(UserDict):
324
323
  )
325
324
  return self._encodings[batch_index].tokens
326
325
 
327
- def sequence_ids(self, batch_index: int = 0) -> list[Optional[int]]:
326
+ def sequence_ids(self, batch_index: int = 0) -> list[int | None]:
328
327
  """
329
328
  Return a list mapping the tokens to the id of their original sentences:
330
329
 
@@ -348,7 +347,7 @@ class BatchEncoding(UserDict):
348
347
  )
349
348
  return self._encodings[batch_index].sequence_ids
350
349
 
351
- def word_ids(self, batch_index: int = 0) -> list[Optional[int]]:
350
+ def word_ids(self, batch_index: int = 0) -> list[int | None]:
352
351
  """
353
352
  Return a list mapping the tokens to their actual word in the initial sentence for a fast tokenizer.
354
353
 
@@ -367,7 +366,7 @@ class BatchEncoding(UserDict):
367
366
  )
368
367
  return self._encodings[batch_index].word_ids
369
368
 
370
- def token_to_sequence(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int:
369
+ def token_to_sequence(self, batch_or_token_index: int, token_index: int | None = None) -> int:
371
370
  """
372
371
  Get the index of the sequence represented by the given token. In the general use case, this method returns `0`
373
372
  for a single sequence or the first sequence of a pair, and `1` for the second sequence of a pair
@@ -406,7 +405,7 @@ class BatchEncoding(UserDict):
406
405
  token_index = self._seq_len + token_index
407
406
  return self._encodings[batch_index].token_to_sequence(token_index)
408
407
 
409
- def token_to_word(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int:
408
+ def token_to_word(self, batch_or_token_index: int, token_index: int | None = None) -> int:
410
409
  """
411
410
  Get the index of the word corresponding (i.e. comprising) to an encoded token in a sequence of the batch.
412
411
 
@@ -445,8 +444,8 @@ class BatchEncoding(UserDict):
445
444
  return self._encodings[batch_index].token_to_word(token_index)
446
445
 
447
446
  def word_to_tokens(
448
- self, batch_or_word_index: int, word_index: Optional[int] = None, sequence_index: int = 0
449
- ) -> Optional[TokenSpan]:
447
+ self, batch_or_word_index: int, word_index: int | None = None, sequence_index: int = 0
448
+ ) -> TokenSpan | None:
450
449
  """
451
450
  Get the encoded token span corresponding to a word in a sequence of the batch.
452
451
 
@@ -497,7 +496,7 @@ class BatchEncoding(UserDict):
497
496
  span = self._encodings[batch_index].word_to_tokens(word_index, sequence_index)
498
497
  return TokenSpan(*span) if span is not None else None
499
498
 
500
- def token_to_chars(self, batch_or_token_index: int, token_index: Optional[int] = None) -> Optional[CharSpan]:
499
+ def token_to_chars(self, batch_or_token_index: int, token_index: int | None = None) -> CharSpan | None:
501
500
  """
502
501
  Get the character span corresponding to an encoded token in a sequence of the batch.
503
502
 
@@ -536,9 +535,7 @@ class BatchEncoding(UserDict):
536
535
 
537
536
  return CharSpan(*span_indices) if span_indices is not None else None
538
537
 
539
- def char_to_token(
540
- self, batch_or_char_index: int, char_index: Optional[int] = None, sequence_index: int = 0
541
- ) -> int:
538
+ def char_to_token(self, batch_or_char_index: int, char_index: int | None = None, sequence_index: int = 0) -> int:
542
539
  """
543
540
  Get the index of the token in the encoded output comprising a character in the original string for a sequence
544
541
  of the batch.
@@ -579,7 +576,7 @@ class BatchEncoding(UserDict):
579
576
  return self._encodings[batch_index].char_to_token(char_index, sequence_index)
580
577
 
581
578
  def word_to_chars(
582
- self, batch_or_word_index: int, word_index: Optional[int] = None, sequence_index: int = 0
579
+ self, batch_or_word_index: int, word_index: int | None = None, sequence_index: int = 0
583
580
  ) -> CharSpan:
584
581
  """
585
582
  Get the character span in the original string corresponding to given word in a sequence of the batch.
@@ -623,7 +620,7 @@ class BatchEncoding(UserDict):
623
620
  word_index = batch_or_word_index
624
621
  return CharSpan(*(self._encodings[batch_index].word_to_chars(word_index, sequence_index)))
625
622
 
626
- def char_to_word(self, batch_or_char_index: int, char_index: Optional[int] = None, sequence_index: int = 0) -> int:
623
+ def char_to_word(self, batch_or_char_index: int, char_index: int | None = None, sequence_index: int = 0) -> int:
627
624
  """
628
625
  Get the word in the original string corresponding to a character in the original string of a sequence of the
629
626
  batch.
@@ -662,9 +659,7 @@ class BatchEncoding(UserDict):
662
659
  char_index = batch_or_char_index
663
660
  return self._encodings[batch_index].char_to_word(char_index, sequence_index)
664
661
 
665
- def convert_to_tensors(
666
- self, tensor_type: Optional[Union[str, TensorType]] = None, prepend_batch_axis: bool = False
667
- ):
662
+ def convert_to_tensors(self, tensor_type: str | TensorType | None = None, prepend_batch_axis: bool = False):
668
663
  """
669
664
  Convert the inner content to tensors.
670
665
 
@@ -758,7 +753,7 @@ class BatchEncoding(UserDict):
758
753
 
759
754
  return self
760
755
 
761
- def to(self, device: Union[str, torch.device], *, non_blocking: bool = False) -> BatchEncoding:
756
+ def to(self, device: str | torch.device, *, non_blocking: bool = False) -> BatchEncoding:
762
757
  """
763
758
  Send all values to device by calling `v.to(device, non_blocking=non_blocking)` (PyTorch only).
764
759
 
@@ -968,7 +963,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
968
963
 
969
964
  vocab_files_names: dict[str, str] = {}
970
965
  pretrained_vocab_files_map: dict[str, dict[str, str]] = {}
971
- _auto_class: Optional[str] = None
966
+ _auto_class: str | None = None
972
967
 
973
968
  # first name has to correspond to main model input name
974
969
  # to make sure `tokenizer.pad(...)` works correctly
@@ -995,14 +990,13 @@ class PreTrainedTokenizerBase(PushToHubMixin):
995
990
  if hasattr(self, key) and callable(getattr(self, key)):
996
991
  raise AttributeError(f"{key} conflicts with the method {key} in {self.__class__.__name__}")
997
992
 
993
+ # V5: Convert deprecated additional_special_tokens to extra_special_tokens before storing init_kwargs
994
+ if "additional_special_tokens" in kwargs and "extra_special_tokens" not in kwargs:
995
+ kwargs["extra_special_tokens"] = kwargs.pop("additional_special_tokens")
996
+
998
997
  self.init_kwargs = copy.deepcopy(kwargs)
999
998
  self.name_or_path = kwargs.pop("name_or_path", "")
1000
999
  self._processor_class = kwargs.pop("processor_class", None)
1001
- # Store additional_special_tokens in init_kwargs before conversion for backward compatibility
1002
- additional_special_tokens_value = kwargs.pop("additional_special_tokens", None)
1003
- if "additional_special_tokens" not in self.init_kwargs:
1004
- self.init_kwargs["additional_special_tokens"] = additional_special_tokens_value
1005
- kwargs.setdefault("extra_special_tokens", additional_special_tokens_value)
1006
1000
 
1007
1001
  self._pad_token_type_id = 0
1008
1002
  self.verbose = kwargs.pop("verbose", False)
@@ -1030,21 +1024,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1030
1024
  else:
1031
1025
  raise TypeError(f"Special token {key} has to be either str or AddedToken but got: {type(value)}")
1032
1026
  elif key == "extra_special_tokens":
1033
- # V5: Support extra_special_tokens in __init__
1034
1027
  value = kwargs.pop(key)
1035
1028
  if value is None:
1036
1029
  continue
1037
- # If dict: treat as model specific named special tokens (attributes)
1038
1030
  if isinstance(value, dict):
1039
1031
  self._set_model_specific_special_tokens(special_tokens=value)
1040
- else:
1041
- if not isinstance(value, (list, tuple)) or not all(
1042
- isinstance(t, (str, AddedToken)) for t in value
1043
- ):
1044
- raise TypeError(
1045
- "extra_special_tokens must be a list/tuple of str or AddedToken, or a dict mapping names to tokens"
1046
- )
1032
+ elif isinstance(value, (list, tuple)):
1047
1033
  self._extra_special_tokens = list(value)
1034
+ else:
1035
+ raise TypeError("extra_special_tokens must be a list/tuple of tokens or a dict of named tokens")
1048
1036
  elif (
1049
1037
  key.endswith("_token")
1050
1038
  and key not in self.SPECIAL_TOKENS_ATTRIBUTES
@@ -1104,7 +1092,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1104
1092
  # ---- Special tokens API (moved from SpecialTokensMixin) ----
1105
1093
  def add_special_tokens(
1106
1094
  self,
1107
- special_tokens_dict: dict[str, Union[str, AddedToken, Sequence[Union[str, AddedToken]]]],
1095
+ special_tokens_dict: dict[str, str | AddedToken | Sequence[str | AddedToken]],
1108
1096
  replace_extra_special_tokens=True,
1109
1097
  ) -> int:
1110
1098
  """
@@ -1168,8 +1156,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1168
1156
  # V5: Allowed keys are SPECIAL_TOKENS_ATTRIBUTES + "extra_special_tokens"
1169
1157
  # Backward compatibility: convert "additional_special_tokens" to "extra_special_tokens"
1170
1158
  special_tokens_dict = dict(special_tokens_dict)
1171
- if "additional_special_tokens" in special_tokens_dict and "extra_special_tokens" not in special_tokens_dict:
1172
- special_tokens_dict["extra_special_tokens"] = special_tokens_dict.pop("additional_special_tokens")
1159
+ if "additional_special_tokens" in special_tokens_dict:
1160
+ special_tokens_dict.setdefault(
1161
+ "extra_special_tokens", special_tokens_dict.pop("additional_special_tokens")
1162
+ )
1173
1163
 
1174
1164
  allowed_keys = set(self.SPECIAL_TOKENS_ATTRIBUTES) | {"extra_special_tokens"}
1175
1165
  tokens_to_add = []
@@ -1208,7 +1198,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1208
1198
  return self.add_tokens(tokens_to_add, special_tokens=True)
1209
1199
 
1210
1200
  def add_tokens(
1211
- self, new_tokens: Union[str, AddedToken, Sequence[Union[str, AddedToken]]], special_tokens: bool = False
1201
+ self, new_tokens: str | AddedToken | Sequence[str | AddedToken], special_tokens: bool = False
1212
1202
  ) -> int:
1213
1203
  """
1214
1204
  #TODO remove this from here! PreTrainedTOkeniuzerBase should be agnostic of AddedToken.
@@ -1248,7 +1238,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1248
1238
  new_tokens = [new_tokens]
1249
1239
  return self._add_tokens(new_tokens, special_tokens=special_tokens)
1250
1240
 
1251
- def _add_tokens(self, new_tokens: Union[list[str], list[AddedToken]], special_tokens: bool = False) -> int:
1241
+ def _add_tokens(self, new_tokens: list[str] | list[AddedToken], special_tokens: bool = False) -> int:
1252
1242
  raise NotImplementedError
1253
1243
 
1254
1244
  @property
@@ -1256,84 +1246,53 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1256
1246
  return self._pad_token_type_id
1257
1247
 
1258
1248
  def __setattr__(self, key, value):
1259
- key_without_id = key
1260
- key_is_special_id = key.endswith("_id") or key.endswith("_ids")
1261
- if key_is_special_id:
1262
- key_without_id = key[:-3] if not key.endswith("_ids") else key[:-4]
1249
+ # Handle _id/_ids suffix (eg. bos_token_id -> bos_token)
1250
+ key_without_id = key.removesuffix("_ids").removesuffix("_id") if key.endswith(("_id", "_ids")) else key
1251
+
1252
+ # Named special tokens (bos_token, eos_token, etc.)
1253
+ if key_without_id in self.SPECIAL_TOKENS_ATTRIBUTES:
1254
+ if key != key_without_id and value is not None:
1255
+ value = self.convert_ids_to_tokens(value)
1256
+ if value is not None and not isinstance(value, (str, AddedToken)):
1257
+ raise ValueError(f"Cannot set a non-string value as the {key_without_id}")
1258
+ self._special_tokens_map[key_without_id] = value
1259
+ return
1263
1260
 
1264
- # Check if this is a named special token
1265
- if (
1266
- self.__dict__.get("_special_tokens_map", None) is not None
1267
- and key_without_id in self.SPECIAL_TOKENS_ATTRIBUTES
1268
- ):
1269
- if key_is_special_id:
1270
- if value is not None:
1271
- value = self.convert_ids_to_tokens(value)
1272
- key = key_without_id
1273
-
1274
- if not isinstance(value, (str, AddedToken)) and value is not None:
1275
- raise ValueError(f"Cannot set a non-string value as the {key}")
1276
- self._special_tokens_map[key] = value
1277
- # Check if this is extra_special_tokens or extra_special_tokens_ids
1278
- elif self.__dict__.get("_extra_special_tokens", None) is not None and key_without_id == "extra_special_tokens":
1279
- if key_is_special_id:
1280
- if value is not None:
1281
- value = [self.convert_ids_to_tokens(val) for val in value]
1282
- key = key_without_id
1261
+ # Extra special tokens: model-specific special tokens without standard names (eg. <mask_1>)
1262
+ if key_without_id == "extra_special_tokens":
1263
+ if key != key_without_id and value is not None and isinstance(value, (list, tuple)):
1264
+ value = [self.convert_ids_to_tokens(v) for v in value]
1265
+ if not isinstance(value, (list, tuple)) and value is not None:
1266
+ raise ValueError(f"extra_special_tokens must be a list or tuple, got {type(value)}")
1267
+ self._extra_special_tokens = [] if value is None else list(value)
1268
+ return
1283
1269
 
1284
- if key == "extra_special_tokens":
1285
- if value is None:
1286
- self._extra_special_tokens = []
1287
- elif isinstance(value, dict):
1288
- # Dict is treated as model-specific special tokens (such as multimodal tokens)
1289
- self._set_model_specific_special_tokens(special_tokens=value)
1290
- elif isinstance(value, (list, tuple)):
1291
- self._extra_special_tokens = list(value)
1292
- else:
1293
- raise ValueError(f"extra_special_tokens must be a list, tuple, or dict, got {type(value)}")
1294
- else:
1295
- super().__setattr__(key, value)
1270
+ super().__setattr__(key, value)
1296
1271
 
1297
1272
  def __getattr__(self, key):
1298
- key_without_id = key
1299
- key_is_special_id = key.endswith("_id") or key.endswith("_ids")
1300
- if key_is_special_id:
1301
- key_without_id = key[:-3] if not key.endswith("_ids") else key[:-4]
1302
-
1303
- # Check if this is a named special token
1304
- if (
1305
- self.__dict__.get("_special_tokens_map", None) is not None
1306
- and key_without_id in self.SPECIAL_TOKENS_ATTRIBUTES
1307
- ):
1308
- _special_tokens_map = self.__dict__["_special_tokens_map"]
1309
- if not key_is_special_id:
1310
- if _special_tokens_map[key_without_id] is None:
1311
- if self.verbose:
1312
- logger.error(f"Using {key}, but it is not set yet.")
1313
- return None
1314
- value = _special_tokens_map[key_without_id]
1315
- return str(value)
1316
- else:
1317
- attr_as_tokens = getattr(self, key_without_id)
1318
- return self.convert_tokens_to_ids(attr_as_tokens) if attr_as_tokens is not None else None
1319
-
1320
- # Check if this is extra_special_tokens or extra_special_tokens_ids
1321
- elif key_without_id == "extra_special_tokens":
1322
- if self.__dict__.get("_extra_special_tokens", None) is not None:
1323
- if not key_is_special_id:
1324
- return [str(tok) for tok in self.__dict__["_extra_special_tokens"]]
1325
- else:
1326
- # extra_special_tokens_ids
1327
- tokens = self.__dict__["_extra_special_tokens"]
1328
- return self.convert_tokens_to_ids([str(tok) for tok in tokens]) if tokens else []
1273
+ # Handle _id/_ids suffix (eg. bos_token_id -> bos_token)
1274
+ key_without_id = key.removesuffix("_ids").removesuffix("_id") if key.endswith(("_id", "_ids")) else key
1275
+
1276
+ # Named special tokens (bos_token, eos_token, etc.)
1277
+ if key_without_id in self.SPECIAL_TOKENS_ATTRIBUTES:
1278
+ token_value = self._special_tokens_map.get(key_without_id)
1279
+ if token_value is None:
1280
+ if self.verbose:
1281
+ logger.error(f"Using {key}, but it is not set yet.")
1282
+ return None
1283
+ return self.convert_tokens_to_ids(str(token_value)) if key != key_without_id else str(token_value)
1284
+
1285
+ # Extra special tokens
1286
+ if key_without_id == "extra_special_tokens":
1287
+ tokens = [str(tok) for tok in self._extra_special_tokens]
1288
+ return self.convert_tokens_to_ids(tokens) if key != key_without_id else tokens
1329
1289
 
1330
1290
  if key not in self.__dict__:
1331
1291
  raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
1332
- else:
1333
- return super().__getattr__(key)
1292
+ return super().__getattr__(key)
1334
1293
 
1335
1294
  def get_special_tokens_mask(
1336
- self, token_ids_0: list[int], token_ids_1: Optional[list[int]] = None, already_has_special_tokens: bool = False
1295
+ self, token_ids_0: list[int], token_ids_1: list[int] | None = None, already_has_special_tokens: bool = False
1337
1296
  ) -> list[int]:
1338
1297
  """
1339
1298
  Retrieve sequence ids from a token list that has no special tokens added.
@@ -1422,7 +1381,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1422
1381
  """
1423
1382
  return self.convert_tokens_to_ids(self.all_special_tokens)
1424
1383
 
1425
- def _set_model_specific_special_tokens(self, special_tokens: dict[str, Union[str, AddedToken]]):
1384
+ def _set_model_specific_special_tokens(self, special_tokens: dict[str, str | AddedToken]):
1426
1385
  """
1427
1386
  Adds new model-specific special tokens (e.g., for multimodal models).
1428
1387
 
@@ -1475,7 +1434,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1475
1434
  """
1476
1435
  raise NotImplementedError()
1477
1436
 
1478
- def convert_tokens_to_ids(self, tokens: Union[str, list[str]]) -> Union[int, list[int]]:
1437
+ def convert_tokens_to_ids(self, tokens: str | list[str]) -> int | list[int]:
1479
1438
  """
1480
1439
  Converts a token string (or a sequence of tokens) in a single integer id (or a sequence of ids), using the
1481
1440
  vocabulary.
@@ -1491,9 +1450,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1491
1450
 
1492
1451
  return [self._convert_token_to_id_with_added_voc(token) for token in tokens]
1493
1452
 
1494
- def convert_ids_to_tokens(
1495
- self, ids: Union[int, list[int]], skip_special_tokens: bool = False
1496
- ) -> Union[str, list[str]]:
1453
+ def convert_ids_to_tokens(self, ids: int | list[int], skip_special_tokens: bool = False) -> str | list[str]:
1497
1454
  """
1498
1455
  Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
1499
1456
  added tokens.
@@ -1512,12 +1469,12 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1512
1469
  @classmethod
1513
1470
  def from_pretrained(
1514
1471
  cls,
1515
- pretrained_model_name_or_path: Union[str, os.PathLike],
1472
+ pretrained_model_name_or_path: str | os.PathLike,
1516
1473
  *init_inputs,
1517
- cache_dir: Optional[Union[str, os.PathLike]] = None,
1474
+ cache_dir: str | os.PathLike | None = None,
1518
1475
  force_download: bool = False,
1519
1476
  local_files_only: bool = False,
1520
- token: Optional[Union[str, bool]] = None,
1477
+ token: str | bool | None = None,
1521
1478
  revision: str = "main",
1522
1479
  trust_remote_code=False,
1523
1480
  **kwargs,
@@ -1614,6 +1571,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1614
1571
 
1615
1572
  pretrained_model_name_or_path = str(pretrained_model_name_or_path)
1616
1573
  vocab_files = {}
1574
+ additional_files_names = {}
1617
1575
  init_configuration = {}
1618
1576
 
1619
1577
  is_local = os.path.isdir(pretrained_model_name_or_path)
@@ -1655,29 +1613,26 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1655
1613
  # Check for versioned tokenizer files
1656
1614
  if "tokenizer_file" in vocab_files:
1657
1615
  fast_tokenizer_file = FULL_TOKENIZER_FILE
1658
- try:
1659
- resolved_config_file = cached_file(
1660
- pretrained_model_name_or_path,
1661
- TOKENIZER_CONFIG_FILE,
1662
- cache_dir=cache_dir,
1663
- force_download=force_download,
1664
- proxies=proxies,
1665
- token=token,
1666
- revision=revision,
1667
- local_files_only=local_files_only,
1668
- subfolder=subfolder,
1669
- user_agent=user_agent,
1670
- _raise_exceptions_for_missing_entries=False,
1671
- _commit_hash=commit_hash,
1672
- )
1673
- if resolved_config_file is not None:
1674
- with open(resolved_config_file, encoding="utf-8") as reader:
1675
- tokenizer_config = json.load(reader)
1676
- if "fast_tokenizer_files" in tokenizer_config:
1677
- fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
1678
- commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
1679
- except Exception:
1680
- pass
1616
+ resolved_config_file = cached_file(
1617
+ pretrained_model_name_or_path,
1618
+ TOKENIZER_CONFIG_FILE,
1619
+ cache_dir=cache_dir,
1620
+ force_download=force_download,
1621
+ proxies=proxies,
1622
+ token=token,
1623
+ revision=revision,
1624
+ local_files_only=local_files_only,
1625
+ subfolder=subfolder,
1626
+ user_agent=user_agent,
1627
+ _raise_exceptions_for_missing_entries=False,
1628
+ _commit_hash=commit_hash,
1629
+ )
1630
+ if resolved_config_file is not None:
1631
+ with open(resolved_config_file, encoding="utf-8") as reader:
1632
+ tokenizer_config = json.load(reader)
1633
+ if "fast_tokenizer_files" in tokenizer_config:
1634
+ fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
1635
+ commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
1681
1636
  vocab_files["tokenizer_file"] = fast_tokenizer_file
1682
1637
 
1683
1638
  # This block looks for any extra chat template files
@@ -1826,52 +1781,25 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1826
1781
  if isinstance(init_kwargs["auto_map"], (tuple, list)):
1827
1782
  init_kwargs["auto_map"] = {"AutoTokenizer": init_kwargs["auto_map"]}
1828
1783
 
1829
- # Preserve extra_special_tokens from tokenizer_config.json before updating with kwargs
1830
- # extra_special_tokens should be a list (user-defined extra tokens)
1831
- extra_special_tokens_from_config = init_kwargs.get("extra_special_tokens")
1832
- if isinstance(extra_special_tokens_from_config, (list, tuple)):
1833
- extra_special_tokens_from_config = list(extra_special_tokens_from_config)
1834
- else:
1835
- extra_special_tokens_from_config = None
1836
-
1837
1784
  # Update with newly provided kwargs
1838
1785
  init_kwargs.update(kwargs)
1839
1786
 
1840
- # V5: Backward compatibility - convert old "additional_special_tokens" to "extra_special_tokens"
1841
- if "additional_special_tokens" in init_kwargs and "extra_special_tokens" not in init_kwargs:
1842
- init_kwargs["extra_special_tokens"] = init_kwargs.pop("additional_special_tokens")
1843
- # Restore extra_special_tokens from config if kwargs overwrote it or it's missing
1844
- elif extra_special_tokens_from_config is not None:
1845
- if "extra_special_tokens" not in init_kwargs or not isinstance(
1846
- init_kwargs.get("extra_special_tokens"), (list, tuple)
1847
- ):
1848
- init_kwargs["extra_special_tokens"] = extra_special_tokens_from_config
1849
-
1850
- # V5: Get model-specific special tokens from config (saved as individual keys in special_tokens_map)
1851
- # These need to be grouped as extra_special_tokens dict so __init__ can save them to attributes
1852
- if "extra_special_tokens" not in init_kwargs or not isinstance(init_kwargs.get("extra_special_tokens"), dict):
1853
- default_attrs = set(cls.SPECIAL_TOKENS_ATTRIBUTES)
1854
- model_specific_tokens = {
1855
- key: init_kwargs.pop(key)
1856
- for key in list(init_kwargs.keys())
1857
- if key not in default_attrs
1858
- and key.endswith("_token")
1859
- and isinstance(init_kwargs[key], (str, AddedToken))
1860
- }
1861
- if model_specific_tokens:
1862
- # If extra_special_tokens is already a list, we need to preserve it
1863
- if "extra_special_tokens" in init_kwargs and isinstance(
1864
- init_kwargs["extra_special_tokens"], (list, tuple)
1865
- ):
1866
- # Keep the list as is, but also add model-specific tokens as a separate dict
1867
- # Convert to model_specific_special_tokens so __init__ handles it
1868
- init_kwargs["model_specific_special_tokens"] = model_specific_tokens
1869
- else:
1870
- init_kwargs["extra_special_tokens"] = model_specific_tokens
1871
- elif isinstance(init_kwargs.get("extra_special_tokens"), dict):
1872
- # If extra_special_tokens is already a dict, convert it to model_specific_special_tokens
1873
- # so __init__ handles it properly
1874
- init_kwargs["model_specific_special_tokens"] = init_kwargs.pop("extra_special_tokens")
1787
+ # V5: Convert deprecated additional_special_tokens to extra_special_tokens
1788
+ if "additional_special_tokens" in init_kwargs:
1789
+ init_kwargs.setdefault("extra_special_tokens", init_kwargs.pop("additional_special_tokens"))
1790
+
1791
+ # V5: Collect model-specific tokens (custom *_token keys not in standard attributes)
1792
+ default_attrs = set(cls.SPECIAL_TOKENS_ATTRIBUTES)
1793
+ model_specific_tokens = {
1794
+ key: init_kwargs.pop(key)
1795
+ for key in list(init_kwargs.keys())
1796
+ if key not in default_attrs and key.endswith("_token") and isinstance(init_kwargs[key], (str, AddedToken))
1797
+ }
1798
+ # If extra_special_tokens is a dict, merge it into model_specific_tokens
1799
+ if isinstance(init_kwargs.get("extra_special_tokens"), dict):
1800
+ model_specific_tokens.update(init_kwargs.pop("extra_special_tokens"))
1801
+ if model_specific_tokens:
1802
+ init_kwargs["model_specific_special_tokens"] = model_specific_tokens
1875
1803
 
1876
1804
  # Merge resolved_vocab_files arguments in init_kwargs.
1877
1805
  added_tokens_file = resolved_vocab_files.pop("added_tokens_file", None)
@@ -1900,82 +1828,45 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1900
1828
  f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instance"
1901
1829
  )
1902
1830
  else:
1903
- # begin legacy: read the added_tokens_file and update kwargs with special_tokens_map if modified
1831
+ # Legacy: read special_tokens_map.json and merge into init_kwargs
1904
1832
  if special_tokens_map_file is not None:
1905
- with open(special_tokens_map_file, encoding="utf-8") as special_tokens_map_handle:
1906
- special_tokens_map = json.load(special_tokens_map_handle)
1907
- # Preserve extra_special_tokens from tokenizer_config.json before processing special_tokens_map
1908
- extra_special_tokens_before_map = init_kwargs.get("extra_special_tokens")
1909
- if isinstance(extra_special_tokens_before_map, (list, tuple)):
1910
- extra_special_tokens_before_map = list(extra_special_tokens_before_map)
1911
- else:
1912
- extra_special_tokens_before_map = None
1913
-
1914
- for key, value in special_tokens_map.items():
1915
- if key in kwargs and kwargs[key]:
1916
- # This value has already been redefined by the kwargs
1917
- # We keep this new value and ignore the one stored in the special_tokens_map_file
1918
- continue
1919
- # V5: Convert dict-format tokens to AddedToken
1920
- if isinstance(value, dict):
1921
- value["special"] = True
1922
- value = AddedToken(**value)
1923
- elif key == "extra_special_tokens":
1924
- # Handle extra_special_tokens from special_tokens_map.json
1925
- if isinstance(value, dict):
1926
- # Dict format for model-specific tokens - keep as is
1927
- init_kwargs[key] = value
1928
- continue
1929
- elif isinstance(value, list):
1930
- # List format - merge with existing if present
1931
- existing = init_kwargs.pop("extra_special_tokens", []) or []
1932
- if not isinstance(existing, (list, tuple)):
1933
- existing = []
1934
- for token in value:
1935
- if isinstance(token, dict):
1936
- token = AddedToken(**token, special=True)
1937
- if token not in existing:
1938
- existing.append(token)
1939
- init_kwargs[key] = existing
1940
- continue
1941
- init_kwargs[key] = value
1942
-
1943
- # Restore extra_special_tokens from tokenizer_config.json if not in special_tokens_map.json
1944
- if (
1945
- "extra_special_tokens" not in special_tokens_map
1946
- and extra_special_tokens_before_map is not None
1947
- ):
1948
- if "extra_special_tokens" not in init_kwargs or not isinstance(
1949
- init_kwargs.get("extra_special_tokens"), (list, tuple)
1950
- ):
1951
- init_kwargs["extra_special_tokens"] = extra_special_tokens_before_map
1952
-
1953
- # Convert extra_special_tokens dict to model_specific_special_tokens if it's a dict
1954
- if isinstance(init_kwargs.get("extra_special_tokens"), dict):
1955
- init_kwargs["model_specific_special_tokens"] = init_kwargs.pop("extra_special_tokens")
1833
+ with open(special_tokens_map_file, encoding="utf-8") as f:
1834
+ special_tokens_map = json.load(f)
1835
+ for key, value in special_tokens_map.items():
1836
+ if key in kwargs and kwargs[key]:
1837
+ continue # User-provided kwargs take precedence
1838
+ if isinstance(value, dict) and key != "extra_special_tokens":
1839
+ value = AddedToken(**value, special=True)
1840
+ elif key == "extra_special_tokens" and isinstance(value, list):
1841
+ # Merge list tokens, converting dicts to AddedToken
1842
+ existing = list(init_kwargs.get("extra_special_tokens") or [])
1843
+ for tok in value:
1844
+ tok = AddedToken(**tok, special=True) if isinstance(tok, dict) else tok
1845
+ if tok not in existing:
1846
+ existing.append(tok)
1847
+ value = existing
1848
+ init_kwargs[key] = value
1849
+ # Convert dict extra_special_tokens to model_specific_special_tokens
1850
+ if isinstance(init_kwargs.get("extra_special_tokens"), dict):
1851
+ init_kwargs.setdefault("model_specific_special_tokens", {}).update(
1852
+ init_kwargs.pop("extra_special_tokens")
1853
+ )
1956
1854
 
1957
1855
  # slow -> slow|fast, legacy: convert the `"added_tokens.json"` file to `added_tokens_decoder`.
1958
1856
  # this is for legacy purpose. We don't add the tokens after init for efficiency.
1959
1857
  if added_tokens_file is not None:
1960
- special_tokens = []
1961
1858
  # V5: Check both named and extra special tokens
1962
- for key in cls.SPECIAL_TOKENS_ATTRIBUTES:
1963
- if key in init_kwargs and init_kwargs[key] is not None:
1964
- special_tokens.append(str(init_kwargs[key]))
1965
-
1966
- # Handle extra_special_tokens
1967
- if "extra_special_tokens" in init_kwargs and init_kwargs["extra_special_tokens"] is not None:
1968
- special_tokens += [str(token) for token in init_kwargs["extra_special_tokens"]]
1859
+ special_tokens = {str(init_kwargs[k]) for k in cls.SPECIAL_TOKENS_ATTRIBUTES if init_kwargs.get(k)}
1860
+ special_tokens.update(str(t) for t in (init_kwargs.get("extra_special_tokens") or []))
1969
1861
 
1970
- with open(added_tokens_file, encoding="utf-8") as added_tokens_handle:
1971
- added_tok_encoder = json.load(added_tokens_handle)
1862
+ with open(added_tokens_file, encoding="utf-8") as f:
1863
+ added_tok_encoder = json.load(f)
1972
1864
  for str_token, index in added_tok_encoder.items():
1973
- # if index not in added_tokens_decoder and str_token not in added_tokens_map:
1974
- special = str_token in special_tokens
1865
+ is_special = str_token in special_tokens
1975
1866
  added_tokens_decoder[index] = AddedToken(
1976
- str_token, rstrip=False, lstrip=False, normalized=not special, special=special
1867
+ str_token, rstrip=False, lstrip=False, normalized=not is_special, special=is_special
1977
1868
  )
1978
- added_tokens_map[str(token)] = added_tokens_decoder[index]
1869
+ added_tokens_map[str_token] = added_tokens_decoder[index]
1979
1870
 
1980
1871
  # allows converting a fast -> slow: add the `tokenizer.json`'s `"added_tokens"` to the slow tokenizer
1981
1872
  # if `tokenizer_config.json` is `None`
@@ -2032,7 +1923,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2032
1923
  return kwargs
2033
1924
 
2034
1925
  @classmethod
2035
- def convert_added_tokens(cls, obj: Union[AddedToken, Any], save=False, add_type_field=True):
1926
+ def convert_added_tokens(cls, obj: AddedToken | Any, save=False, add_type_field=True):
2036
1927
  if isinstance(obj, dict) and "__type" in obj and obj["__type"] == "AddedToken":
2037
1928
  obj.pop("__type")
2038
1929
  return AddedToken(**obj)
@@ -2052,9 +1943,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2052
1943
 
2053
1944
  def save_pretrained(
2054
1945
  self,
2055
- save_directory: Union[str, os.PathLike],
2056
- legacy_format: Optional[bool] = None,
2057
- filename_prefix: Optional[str] = None,
1946
+ save_directory: str | os.PathLike,
1947
+ legacy_format: bool | None = None,
1948
+ filename_prefix: str | None = None,
2058
1949
  push_to_hub: bool = False,
2059
1950
  **kwargs,
2060
1951
  ) -> tuple[str, ...]:
@@ -2210,10 +2101,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2210
2101
 
2211
2102
  def _save_pretrained(
2212
2103
  self,
2213
- save_directory: Union[str, os.PathLike],
2104
+ save_directory: str | os.PathLike,
2214
2105
  file_names: tuple[str, ...],
2215
- legacy_format: Optional[bool] = None,
2216
- filename_prefix: Optional[str] = None,
2106
+ legacy_format: bool | None = None,
2107
+ filename_prefix: str | None = None,
2217
2108
  ) -> tuple[str, ...]:
2218
2109
  """
2219
2110
  Save a tokenizer using the slow-tokenizer/legacy format: vocabulary + added tokens.
@@ -2243,7 +2134,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2243
2134
 
2244
2135
  return file_names + vocab_files + (added_tokens_file,)
2245
2136
 
2246
- def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str, ...]:
2137
+ def save_vocabulary(self, save_directory: str, filename_prefix: str | None = None) -> tuple[str, ...]:
2247
2138
  """
2248
2139
  Save only the vocabulary of the tokenizer (vocabulary + added tokens).
2249
2140
 
@@ -2261,7 +2152,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2261
2152
  """
2262
2153
  raise NotImplementedError
2263
2154
 
2264
- def tokenize(self, text: str, pair: Optional[str] = None, add_special_tokens: bool = False, **kwargs) -> list[str]:
2155
+ def tokenize(self, text: str, pair: str | None = None, add_special_tokens: bool = False, **kwargs) -> list[str]:
2265
2156
  """
2266
2157
  Converts a string into a sequence of tokens, replacing unknown tokens with the `unk_token`.
2267
2158
 
@@ -2293,15 +2184,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2293
2184
  )
2294
2185
  def encode(
2295
2186
  self,
2296
- text: Union[TextInput, PreTokenizedInput, EncodedInput],
2297
- text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
2187
+ text: TextInput | PreTokenizedInput | EncodedInput,
2188
+ text_pair: TextInput | PreTokenizedInput | EncodedInput | None = None,
2298
2189
  add_special_tokens: bool = True,
2299
- padding: Union[bool, str, PaddingStrategy] = False,
2300
- truncation: Union[bool, str, TruncationStrategy, None] = None,
2301
- max_length: Optional[int] = None,
2190
+ padding: bool | str | PaddingStrategy = False,
2191
+ truncation: bool | str | TruncationStrategy | None = None,
2192
+ max_length: int | None = None,
2302
2193
  stride: int = 0,
2303
- padding_side: Optional[str] = None,
2304
- return_tensors: Optional[Union[str, TensorType]] = None,
2194
+ padding_side: str | None = None,
2195
+ return_tensors: str | TensorType | None = None,
2305
2196
  **kwargs,
2306
2197
  ) -> list[int]:
2307
2198
  """
@@ -2319,15 +2210,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2319
2210
  the `tokenize` method) or a list of integers (tokenized string ids using the `convert_tokens_to_ids`
2320
2211
  method).
2321
2212
  """
2322
- padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
2213
+ padding_strategy, truncation_strategy, max_length, kwargs_updated = self._get_padding_truncation_strategies(
2323
2214
  padding=padding,
2324
2215
  truncation=truncation,
2325
2216
  max_length=max_length,
2326
- pad_to_multiple_of=kwargs.get("pad_to_multiple_of"),
2327
- verbose=kwargs.get("verbose", True),
2328
2217
  **kwargs,
2329
2218
  )
2330
2219
 
2220
+ kwargs.update(kwargs_updated)
2221
+
2331
2222
  encoded_inputs = self._encode_plus(
2332
2223
  text,
2333
2224
  text_pair=text_pair,
@@ -2470,29 +2361,27 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2470
2361
  @add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
2471
2362
  def __call__(
2472
2363
  self,
2473
- text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput], None] = None,
2474
- text_pair: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
2475
- text_target: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput], None] = None,
2476
- text_pair_target: Optional[
2477
- Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]
2478
- ] = None,
2364
+ text: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
2365
+ text_pair: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
2366
+ text_target: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
2367
+ text_pair_target: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
2479
2368
  add_special_tokens: bool = True,
2480
- padding: Union[bool, str, PaddingStrategy] = False,
2481
- truncation: Union[bool, str, TruncationStrategy, None] = None,
2482
- max_length: Optional[int] = None,
2369
+ padding: bool | str | PaddingStrategy = False,
2370
+ truncation: bool | str | TruncationStrategy | None = None,
2371
+ max_length: int | None = None,
2483
2372
  stride: int = 0,
2484
2373
  is_split_into_words: bool = False,
2485
- pad_to_multiple_of: Optional[int] = None,
2486
- padding_side: Optional[str] = None,
2487
- return_tensors: Optional[Union[str, TensorType]] = None,
2488
- return_token_type_ids: Optional[bool] = None,
2489
- return_attention_mask: Optional[bool] = None,
2374
+ pad_to_multiple_of: int | None = None,
2375
+ padding_side: str | None = None,
2376
+ return_tensors: str | TensorType | None = None,
2377
+ return_token_type_ids: bool | None = None,
2378
+ return_attention_mask: bool | None = None,
2490
2379
  return_overflowing_tokens: bool = False,
2491
2380
  return_special_tokens_mask: bool = False,
2492
2381
  return_offsets_mapping: bool = False,
2493
2382
  return_length: bool = False,
2494
2383
  verbose: bool = True,
2495
- tokenizer_kwargs: Optional[dict[str, Any]] = None,
2384
+ tokenizer_kwargs: dict[str, Any] | None = None,
2496
2385
  **kwargs,
2497
2386
  ) -> BatchEncoding:
2498
2387
  """
@@ -2597,19 +2486,19 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2597
2486
 
2598
2487
  def _encode_plus(
2599
2488
  self,
2600
- text: Union[TextInput, PreTokenizedInput, EncodedInput],
2601
- text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
2489
+ text: TextInput | PreTokenizedInput | EncodedInput,
2490
+ text_pair: TextInput | PreTokenizedInput | EncodedInput | None = None,
2602
2491
  add_special_tokens: bool = True,
2603
2492
  padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
2604
2493
  truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
2605
- max_length: Optional[int] = None,
2494
+ max_length: int | None = None,
2606
2495
  stride: int = 0,
2607
2496
  is_split_into_words: bool = False,
2608
- pad_to_multiple_of: Optional[int] = None,
2609
- padding_side: Optional[str] = None,
2610
- return_tensors: Optional[Union[str, TensorType]] = None,
2611
- return_token_type_ids: Optional[bool] = None,
2612
- return_attention_mask: Optional[bool] = None,
2497
+ pad_to_multiple_of: int | None = None,
2498
+ padding_side: str | None = None,
2499
+ return_tensors: str | TensorType | None = None,
2500
+ return_token_type_ids: bool | None = None,
2501
+ return_attention_mask: bool | None = None,
2613
2502
  return_overflowing_tokens: bool = False,
2614
2503
  return_special_tokens_mask: bool = False,
2615
2504
  return_offsets_mapping: bool = False,
@@ -2622,19 +2511,17 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2622
2511
 
2623
2512
  def pad(
2624
2513
  self,
2625
- encoded_inputs: Union[
2626
- BatchEncoding,
2627
- list[BatchEncoding],
2628
- dict[str, EncodedInput],
2629
- dict[str, list[EncodedInput]],
2630
- list[dict[str, EncodedInput]],
2631
- ],
2632
- padding: Union[bool, str, PaddingStrategy] = True,
2633
- max_length: Optional[int] = None,
2634
- pad_to_multiple_of: Optional[int] = None,
2635
- padding_side: Optional[str] = None,
2636
- return_attention_mask: Optional[bool] = None,
2637
- return_tensors: Optional[Union[str, TensorType]] = None,
2514
+ encoded_inputs: BatchEncoding
2515
+ | list[BatchEncoding]
2516
+ | dict[str, EncodedInput]
2517
+ | dict[str, list[EncodedInput]]
2518
+ | list[dict[str, EncodedInput]],
2519
+ padding: bool | str | PaddingStrategy = True,
2520
+ max_length: int | None = None,
2521
+ pad_to_multiple_of: int | None = None,
2522
+ padding_side: str | None = None,
2523
+ return_attention_mask: bool | None = None,
2524
+ return_tensors: str | TensorType | None = None,
2638
2525
  verbose: bool = True,
2639
2526
  ) -> BatchEncoding:
2640
2527
  """
@@ -2795,12 +2682,12 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2795
2682
 
2796
2683
  def _pad(
2797
2684
  self,
2798
- encoded_inputs: Union[dict[str, EncodedInput], BatchEncoding],
2799
- max_length: Optional[int] = None,
2685
+ encoded_inputs: dict[str, EncodedInput] | BatchEncoding,
2686
+ max_length: int | None = None,
2800
2687
  padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
2801
- pad_to_multiple_of: Optional[int] = None,
2802
- padding_side: Optional[str] = None,
2803
- return_attention_mask: Optional[bool] = None,
2688
+ pad_to_multiple_of: int | None = None,
2689
+ padding_side: str | None = None,
2690
+ return_attention_mask: bool | None = None,
2804
2691
  ) -> dict:
2805
2692
  """
2806
2693
  Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
@@ -2890,10 +2777,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2890
2777
 
2891
2778
  def decode(
2892
2779
  self,
2893
- token_ids: Union[int, list[int], list[list[int]], np.ndarray, torch.Tensor],
2780
+ token_ids: int | list[int] | list[list[int]] | np.ndarray | torch.Tensor,
2894
2781
  skip_special_tokens: bool = False,
2895
2782
  **kwargs,
2896
- ) -> Union[str, list[str]]:
2783
+ ) -> str | list[str]:
2897
2784
  """
2898
2785
  Converts a sequence of ids into a string, or a list of sequences into a list of strings,
2899
2786
  using the tokenizer and vocabulary with options to remove special tokens and clean up
@@ -2938,9 +2825,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2938
2825
 
2939
2826
  def batch_decode(
2940
2827
  self,
2941
- sequences: Union[list[int], list[list[int]], np.ndarray, torch.Tensor],
2828
+ sequences: list[int] | list[list[int]] | np.ndarray | torch.Tensor,
2942
2829
  skip_special_tokens: bool = False,
2943
- clean_up_tokenization_spaces: Optional[bool] = None,
2830
+ clean_up_tokenization_spaces: bool | None = None,
2944
2831
  **kwargs,
2945
2832
  ) -> list[str]:
2946
2833
  """
@@ -2977,14 +2864,14 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2977
2864
 
2978
2865
  def _decode(
2979
2866
  self,
2980
- token_ids: Union[int, list[int]],
2867
+ token_ids: int | list[int],
2981
2868
  skip_special_tokens: bool = False,
2982
- clean_up_tokenization_spaces: Optional[bool] = None,
2869
+ clean_up_tokenization_spaces: bool | None = None,
2983
2870
  **kwargs,
2984
2871
  ) -> str:
2985
2872
  raise NotImplementedError
2986
2873
 
2987
- def _eventual_warn_about_too_long_sequence(self, ids: list[int], max_length: Optional[int], verbose: bool):
2874
+ def _eventual_warn_about_too_long_sequence(self, ids: list[int], max_length: int | None, verbose: bool):
2988
2875
  """
2989
2876
  Depending on the input and internal state we might trigger a warning about a sequence that is too long for its
2990
2877
  corresponding model
@@ -3026,22 +2913,22 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3026
2913
 
3027
2914
  def apply_chat_template(
3028
2915
  self,
3029
- conversation: Union[list[dict[str, str]], list[list[dict[str, str]]]],
3030
- tools: Optional[list[Union[dict, Callable]]] = None,
3031
- documents: Optional[list[dict[str, str]]] = None,
3032
- chat_template: Optional[str] = None,
2916
+ conversation: list[dict[str, str]] | list[list[dict[str, str]]],
2917
+ tools: list[dict | Callable] | None = None,
2918
+ documents: list[dict[str, str]] | None = None,
2919
+ chat_template: str | None = None,
3033
2920
  add_generation_prompt: bool = False,
3034
2921
  continue_final_message: bool = False,
3035
2922
  tokenize: bool = True,
3036
- padding: Union[bool, str, PaddingStrategy] = False,
2923
+ padding: bool | str | PaddingStrategy = False,
3037
2924
  truncation: bool = False,
3038
- max_length: Optional[int] = None,
3039
- return_tensors: Optional[Union[str, TensorType]] = None,
2925
+ max_length: int | None = None,
2926
+ return_tensors: str | TensorType | None = None,
3040
2927
  return_dict: bool = True,
3041
2928
  return_assistant_tokens_mask: bool = False,
3042
- tokenizer_kwargs: Optional[dict[str, Any]] = None,
2929
+ tokenizer_kwargs: dict[str, Any] | None = None,
3043
2930
  **kwargs,
3044
- ) -> Union[str, list[int], list[str], list[list[int]], BatchEncoding]:
2931
+ ) -> str | list[int] | list[str] | list[list[int]] | BatchEncoding:
3045
2932
  """
3046
2933
  Converts a list of dictionaries with `"role"` and `"content"` keys to a list of token
3047
2934
  ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
@@ -3095,7 +2982,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3095
2982
  values are:
3096
2983
  - `'pt'`: Return PyTorch `torch.Tensor` objects.
3097
2984
  - `'np'`: Return NumPy `np.ndarray` objects.
3098
- return_dict (`bool`, defaults to `False`):
2985
+ return_dict (`bool`, defaults to `True`):
3099
2986
  Whether to return a dictionary with named outputs. Has no effect if tokenize is `False`.
3100
2987
  tokenizer_kwargs (`dict[str: Any]`, *optional*): Additional kwargs to pass to the tokenizer.
3101
2988
  return_assistant_tokens_mask (`bool`, defaults to `False`):
@@ -3199,7 +3086,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3199
3086
  def encode_message_with_chat_template(
3200
3087
  self,
3201
3088
  message: dict[str, str],
3202
- conversation_history: Optional[list[dict[str, str]]] = None,
3089
+ conversation_history: list[dict[str, str]] | None = None,
3203
3090
  **kwargs,
3204
3091
  ) -> list[int]:
3205
3092
  """
@@ -3256,7 +3143,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3256
3143
  return tokens[i:]
3257
3144
  return tokens[min_len:]
3258
3145
 
3259
- def get_chat_template(self, chat_template: Optional[str] = None, tools: Optional[list[dict]] = None) -> str:
3146
+ def get_chat_template(self, chat_template: str | None = None, tools: list[dict] | None = None) -> str:
3260
3147
  """
3261
3148
  Retrieve the chat template string used for tokenizing chat messages. This template is used
3262
3149
  internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
@@ -3312,9 +3199,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3312
3199
 
3313
3200
  def save_chat_templates(
3314
3201
  self,
3315
- save_directory: Union[str, os.PathLike],
3202
+ save_directory: str | os.PathLike,
3316
3203
  tokenizer_config: dict,
3317
- filename_prefix: Optional[str],
3204
+ filename_prefix: str | None,
3318
3205
  save_jinja_files: bool,
3319
3206
  ):
3320
3207
  """
@@ -3461,7 +3348,8 @@ def find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs):
3461
3348
  ):
3462
3349
  return candidate
3463
3350
  except Exception:
3464
- pass
3351
+ # TODO: tighten to OSError / ProxyError
3352
+ continue
3465
3353
 
3466
3354
  subfolder = kwargs.get("subfolder", "")
3467
3355
  local_files_only = kwargs.get("local_files_only", False)
@@ -3491,8 +3379,9 @@ def find_sentencepiece_model_file(pretrained_model_name_or_path, **kwargs):
3491
3379
  for entry in entries:
3492
3380
  if entry.path.endswith(".model"):
3493
3381
  return entry.path if not subfolder else entry.path.removeprefix(f"{subfolder}/")
3494
- except Exception:
3495
- pass
3382
+ except Exception as e:
3383
+ # TODO: tighten exception class
3384
+ logger.debug(f"Could not list Hub repository files: {e}")
3496
3385
 
3497
3386
  return None
3498
3387
 
@@ -3613,9 +3502,7 @@ def _get_prepend_scheme(add_prefix_space: bool, original_tokenizer) -> str:
3613
3502
  return prepend_scheme
3614
3503
 
3615
3504
 
3616
- def generate_merges(
3617
- vocab, vocab_scores: Optional[dict[str, float]] = None, skip_tokens: Optional[Collection[str]] = None
3618
- ):
3505
+ def generate_merges(vocab, vocab_scores: dict[str, float] | None = None, skip_tokens: Collection[str] | None = None):
3619
3506
  skip_tokens = set(skip_tokens) if skip_tokens is not None else set()
3620
3507
  reverse = vocab_scores is not None
3621
3508
  vocab_scores = dict(vocab_scores) if reverse else vocab