transformers 5.0.0__py3-none-any.whl → 5.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1606) hide show
  1. transformers/__init__.py +36 -55
  2. transformers/activations.py +1 -1
  3. transformers/audio_utils.py +33 -32
  4. transformers/cache_utils.py +139 -32
  5. transformers/cli/chat.py +3 -3
  6. transformers/cli/serve.py +19 -49
  7. transformers/cli/transformers.py +1 -2
  8. transformers/configuration_utils.py +155 -129
  9. transformers/conversion_mapping.py +22 -158
  10. transformers/convert_slow_tokenizer.py +17 -227
  11. transformers/core_model_loading.py +185 -528
  12. transformers/data/data_collator.py +4 -12
  13. transformers/data/processors/glue.py +1 -0
  14. transformers/data/processors/utils.py +1 -0
  15. transformers/data/processors/xnli.py +1 -0
  16. transformers/dependency_versions_check.py +1 -0
  17. transformers/dependency_versions_table.py +7 -5
  18. transformers/distributed/configuration_utils.py +2 -1
  19. transformers/dynamic_module_utils.py +25 -24
  20. transformers/feature_extraction_sequence_utils.py +23 -19
  21. transformers/feature_extraction_utils.py +33 -64
  22. transformers/file_utils.py +1 -0
  23. transformers/generation/__init__.py +1 -11
  24. transformers/generation/candidate_generator.py +33 -80
  25. transformers/generation/configuration_utils.py +133 -189
  26. transformers/generation/continuous_batching/__init__.py +1 -4
  27. transformers/generation/continuous_batching/cache.py +25 -83
  28. transformers/generation/continuous_batching/cache_manager.py +45 -155
  29. transformers/generation/continuous_batching/continuous_api.py +147 -270
  30. transformers/generation/continuous_batching/requests.py +3 -51
  31. transformers/generation/continuous_batching/scheduler.py +105 -160
  32. transformers/generation/logits_process.py +128 -0
  33. transformers/generation/stopping_criteria.py +1 -1
  34. transformers/generation/streamers.py +1 -0
  35. transformers/generation/utils.py +123 -122
  36. transformers/generation/watermarking.py +6 -8
  37. transformers/hf_argparser.py +13 -9
  38. transformers/hyperparameter_search.py +2 -1
  39. transformers/image_processing_base.py +23 -12
  40. transformers/image_processing_utils.py +15 -11
  41. transformers/image_processing_utils_fast.py +75 -85
  42. transformers/image_transforms.py +42 -73
  43. transformers/image_utils.py +32 -30
  44. transformers/initialization.py +0 -37
  45. transformers/integrations/__init__.py +2 -16
  46. transformers/integrations/accelerate.py +113 -58
  47. transformers/integrations/aqlm.py +66 -36
  48. transformers/integrations/awq.py +516 -45
  49. transformers/integrations/bitnet.py +105 -47
  50. transformers/integrations/bitsandbytes.py +202 -91
  51. transformers/integrations/deepspeed.py +4 -161
  52. transformers/integrations/eetq.py +82 -84
  53. transformers/integrations/executorch.py +1 -1
  54. transformers/integrations/fbgemm_fp8.py +145 -190
  55. transformers/integrations/finegrained_fp8.py +215 -249
  56. transformers/integrations/flash_attention.py +3 -3
  57. transformers/integrations/flex_attention.py +1 -1
  58. transformers/integrations/fp_quant.py +0 -90
  59. transformers/integrations/ggml.py +2 -11
  60. transformers/integrations/higgs.py +62 -37
  61. transformers/integrations/hub_kernels.py +8 -65
  62. transformers/integrations/integration_utils.py +3 -47
  63. transformers/integrations/mistral.py +0 -12
  64. transformers/integrations/mxfp4.py +80 -33
  65. transformers/integrations/peft.py +191 -483
  66. transformers/integrations/quanto.py +56 -77
  67. transformers/integrations/spqr.py +90 -42
  68. transformers/integrations/tensor_parallel.py +221 -167
  69. transformers/integrations/torchao.py +43 -35
  70. transformers/integrations/vptq.py +59 -40
  71. transformers/kernels/__init__.py +0 -0
  72. transformers/{models/pe_audio_video/processing_pe_audio_video.py → kernels/falcon_mamba/__init__.py} +3 -12
  73. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +529 -0
  74. transformers/loss/loss_utils.py +0 -2
  75. transformers/masking_utils.py +55 -51
  76. transformers/model_debugging_utils.py +5 -4
  77. transformers/modelcard.py +194 -15
  78. transformers/modeling_attn_mask_utils.py +19 -19
  79. transformers/modeling_flash_attention_utils.py +27 -27
  80. transformers/modeling_gguf_pytorch_utils.py +24 -79
  81. transformers/modeling_layers.py +22 -21
  82. transformers/modeling_outputs.py +253 -242
  83. transformers/modeling_rope_utils.py +117 -138
  84. transformers/modeling_utils.py +739 -850
  85. transformers/models/__init__.py +0 -27
  86. transformers/models/afmoe/configuration_afmoe.py +33 -40
  87. transformers/models/afmoe/modeling_afmoe.py +54 -42
  88. transformers/models/afmoe/modular_afmoe.py +33 -23
  89. transformers/models/aimv2/configuration_aimv2.py +10 -2
  90. transformers/models/aimv2/modeling_aimv2.py +42 -47
  91. transformers/models/aimv2/modular_aimv2.py +19 -17
  92. transformers/models/albert/configuration_albert.py +2 -8
  93. transformers/models/albert/modeling_albert.py +69 -70
  94. transformers/models/albert/tokenization_albert.py +14 -5
  95. transformers/models/align/configuration_align.py +6 -8
  96. transformers/models/align/modeling_align.py +89 -94
  97. transformers/models/align/processing_align.py +30 -2
  98. transformers/models/altclip/configuration_altclip.py +7 -4
  99. transformers/models/altclip/modeling_altclip.py +103 -114
  100. transformers/models/altclip/processing_altclip.py +15 -2
  101. transformers/models/apertus/__init__.py +1 -0
  102. transformers/models/apertus/configuration_apertus.py +28 -23
  103. transformers/models/apertus/modeling_apertus.py +40 -39
  104. transformers/models/apertus/modular_apertus.py +38 -37
  105. transformers/models/arcee/configuration_arcee.py +30 -25
  106. transformers/models/arcee/modeling_arcee.py +39 -36
  107. transformers/models/arcee/modular_arcee.py +23 -20
  108. transformers/models/aria/configuration_aria.py +44 -31
  109. transformers/models/aria/image_processing_aria.py +27 -25
  110. transformers/models/aria/modeling_aria.py +106 -110
  111. transformers/models/aria/modular_aria.py +127 -118
  112. transformers/models/aria/processing_aria.py +35 -28
  113. transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +1 -0
  114. transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py +6 -3
  115. transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +8 -6
  116. transformers/models/audioflamingo3/__init__.py +1 -0
  117. transformers/models/audioflamingo3/configuration_audioflamingo3.py +1 -0
  118. transformers/models/audioflamingo3/modeling_audioflamingo3.py +49 -58
  119. transformers/models/audioflamingo3/modular_audioflamingo3.py +43 -53
  120. transformers/models/audioflamingo3/processing_audioflamingo3.py +30 -33
  121. transformers/models/auto/auto_factory.py +7 -6
  122. transformers/models/auto/configuration_auto.py +5 -66
  123. transformers/models/auto/feature_extraction_auto.py +10 -14
  124. transformers/models/auto/image_processing_auto.py +41 -32
  125. transformers/models/auto/modeling_auto.py +188 -46
  126. transformers/models/auto/processing_auto.py +11 -24
  127. transformers/models/auto/tokenization_auto.py +588 -171
  128. transformers/models/auto/video_processing_auto.py +10 -12
  129. transformers/models/autoformer/configuration_autoformer.py +7 -4
  130. transformers/models/autoformer/modeling_autoformer.py +101 -104
  131. transformers/models/aya_vision/configuration_aya_vision.py +1 -4
  132. transformers/models/aya_vision/modeling_aya_vision.py +102 -71
  133. transformers/models/aya_vision/modular_aya_vision.py +74 -46
  134. transformers/models/aya_vision/processing_aya_vision.py +53 -25
  135. transformers/models/bamba/configuration_bamba.py +39 -34
  136. transformers/models/bamba/modeling_bamba.py +86 -82
  137. transformers/models/bamba/modular_bamba.py +72 -70
  138. transformers/models/bark/configuration_bark.py +8 -6
  139. transformers/models/bark/generation_configuration_bark.py +5 -3
  140. transformers/models/bark/modeling_bark.py +57 -54
  141. transformers/models/bark/processing_bark.py +41 -19
  142. transformers/models/bart/configuration_bart.py +6 -9
  143. transformers/models/bart/modeling_bart.py +126 -135
  144. transformers/models/barthez/tokenization_barthez.py +11 -3
  145. transformers/models/bartpho/tokenization_bartpho.py +7 -6
  146. transformers/models/beit/configuration_beit.py +11 -0
  147. transformers/models/beit/image_processing_beit.py +56 -53
  148. transformers/models/beit/image_processing_beit_fast.py +12 -10
  149. transformers/models/beit/modeling_beit.py +60 -69
  150. transformers/models/bert/configuration_bert.py +2 -12
  151. transformers/models/bert/modeling_bert.py +122 -114
  152. transformers/models/bert/tokenization_bert.py +23 -8
  153. transformers/models/bert/tokenization_bert_legacy.py +5 -3
  154. transformers/models/bert_generation/configuration_bert_generation.py +2 -17
  155. transformers/models/bert_generation/modeling_bert_generation.py +49 -49
  156. transformers/models/bert_generation/tokenization_bert_generation.py +3 -2
  157. transformers/models/bert_japanese/tokenization_bert_japanese.py +6 -5
  158. transformers/models/bertweet/tokenization_bertweet.py +3 -1
  159. transformers/models/big_bird/configuration_big_bird.py +9 -12
  160. transformers/models/big_bird/modeling_big_bird.py +109 -116
  161. transformers/models/big_bird/tokenization_big_bird.py +43 -16
  162. transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +9 -9
  163. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +117 -130
  164. transformers/models/biogpt/configuration_biogpt.py +2 -8
  165. transformers/models/biogpt/modeling_biogpt.py +76 -72
  166. transformers/models/biogpt/modular_biogpt.py +66 -62
  167. transformers/models/biogpt/tokenization_biogpt.py +5 -3
  168. transformers/models/bit/configuration_bit.py +1 -0
  169. transformers/models/bit/image_processing_bit.py +24 -21
  170. transformers/models/bit/image_processing_bit_fast.py +1 -0
  171. transformers/models/bit/modeling_bit.py +12 -25
  172. transformers/models/bitnet/configuration_bitnet.py +28 -23
  173. transformers/models/bitnet/modeling_bitnet.py +39 -36
  174. transformers/models/bitnet/modular_bitnet.py +6 -4
  175. transformers/models/blenderbot/configuration_blenderbot.py +5 -8
  176. transformers/models/blenderbot/modeling_blenderbot.py +96 -77
  177. transformers/models/blenderbot/tokenization_blenderbot.py +24 -18
  178. transformers/models/blenderbot_small/configuration_blenderbot_small.py +5 -8
  179. transformers/models/blenderbot_small/modeling_blenderbot_small.py +69 -79
  180. transformers/models/blenderbot_small/tokenization_blenderbot_small.py +3 -1
  181. transformers/models/blip/configuration_blip.py +10 -9
  182. transformers/models/blip/image_processing_blip.py +20 -17
  183. transformers/models/blip/image_processing_blip_fast.py +1 -0
  184. transformers/models/blip/modeling_blip.py +108 -117
  185. transformers/models/blip/modeling_blip_text.py +65 -73
  186. transformers/models/blip/processing_blip.py +36 -5
  187. transformers/models/blip_2/configuration_blip_2.py +2 -2
  188. transformers/models/blip_2/modeling_blip_2.py +118 -146
  189. transformers/models/blip_2/processing_blip_2.py +38 -8
  190. transformers/models/bloom/configuration_bloom.py +2 -5
  191. transformers/models/bloom/modeling_bloom.py +104 -77
  192. transformers/models/blt/configuration_blt.py +86 -94
  193. transformers/models/blt/modeling_blt.py +81 -238
  194. transformers/models/blt/modular_blt.py +65 -228
  195. transformers/models/bridgetower/configuration_bridgetower.py +2 -7
  196. transformers/models/bridgetower/image_processing_bridgetower.py +35 -34
  197. transformers/models/bridgetower/image_processing_bridgetower_fast.py +16 -13
  198. transformers/models/bridgetower/modeling_bridgetower.py +119 -141
  199. transformers/models/bridgetower/processing_bridgetower.py +16 -2
  200. transformers/models/bros/configuration_bros.py +18 -24
  201. transformers/models/bros/modeling_bros.py +80 -90
  202. transformers/models/bros/processing_bros.py +12 -2
  203. transformers/models/byt5/tokenization_byt5.py +6 -4
  204. transformers/models/camembert/configuration_camembert.py +2 -8
  205. transformers/models/camembert/modeling_camembert.py +195 -196
  206. transformers/models/camembert/modular_camembert.py +54 -51
  207. transformers/models/camembert/tokenization_camembert.py +13 -6
  208. transformers/models/canine/configuration_canine.py +2 -4
  209. transformers/models/canine/modeling_canine.py +75 -84
  210. transformers/models/canine/tokenization_canine.py +1 -2
  211. transformers/models/chameleon/configuration_chameleon.py +34 -29
  212. transformers/models/chameleon/image_processing_chameleon.py +24 -21
  213. transformers/models/chameleon/image_processing_chameleon_fast.py +6 -5
  214. transformers/models/chameleon/modeling_chameleon.py +93 -142
  215. transformers/models/chameleon/processing_chameleon.py +41 -16
  216. transformers/models/chinese_clip/configuration_chinese_clip.py +8 -10
  217. transformers/models/chinese_clip/image_processing_chinese_clip.py +24 -21
  218. transformers/models/chinese_clip/image_processing_chinese_clip_fast.py +1 -0
  219. transformers/models/chinese_clip/modeling_chinese_clip.py +92 -96
  220. transformers/models/chinese_clip/processing_chinese_clip.py +15 -2
  221. transformers/models/clap/configuration_clap.py +9 -4
  222. transformers/models/clap/feature_extraction_clap.py +12 -11
  223. transformers/models/clap/modeling_clap.py +123 -136
  224. transformers/models/clap/processing_clap.py +15 -2
  225. transformers/models/clip/configuration_clip.py +2 -4
  226. transformers/models/clip/image_processing_clip.py +24 -21
  227. transformers/models/clip/image_processing_clip_fast.py +1 -9
  228. transformers/models/clip/modeling_clip.py +65 -65
  229. transformers/models/clip/processing_clip.py +14 -2
  230. transformers/models/clip/tokenization_clip.py +46 -21
  231. transformers/models/clipseg/configuration_clipseg.py +2 -4
  232. transformers/models/clipseg/modeling_clipseg.py +109 -119
  233. transformers/models/clipseg/processing_clipseg.py +42 -19
  234. transformers/models/clvp/configuration_clvp.py +5 -15
  235. transformers/models/clvp/feature_extraction_clvp.py +10 -7
  236. transformers/models/clvp/modeling_clvp.py +146 -155
  237. transformers/models/clvp/number_normalizer.py +2 -1
  238. transformers/models/clvp/processing_clvp.py +20 -3
  239. transformers/models/clvp/tokenization_clvp.py +64 -1
  240. transformers/models/code_llama/tokenization_code_llama.py +44 -18
  241. transformers/models/codegen/configuration_codegen.py +4 -4
  242. transformers/models/codegen/modeling_codegen.py +53 -63
  243. transformers/models/codegen/tokenization_codegen.py +47 -17
  244. transformers/models/cohere/configuration_cohere.py +30 -25
  245. transformers/models/cohere/modeling_cohere.py +42 -40
  246. transformers/models/cohere/modular_cohere.py +29 -26
  247. transformers/models/cohere/tokenization_cohere.py +46 -15
  248. transformers/models/cohere2/configuration_cohere2.py +32 -31
  249. transformers/models/cohere2/modeling_cohere2.py +44 -42
  250. transformers/models/cohere2/modular_cohere2.py +54 -54
  251. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +14 -13
  252. transformers/models/cohere2_vision/modeling_cohere2_vision.py +58 -59
  253. transformers/models/cohere2_vision/modular_cohere2_vision.py +46 -45
  254. transformers/models/cohere2_vision/processing_cohere2_vision.py +36 -6
  255. transformers/models/colpali/configuration_colpali.py +1 -0
  256. transformers/models/colpali/modeling_colpali.py +16 -14
  257. transformers/models/colpali/modular_colpali.py +51 -11
  258. transformers/models/colpali/processing_colpali.py +52 -14
  259. transformers/models/colqwen2/modeling_colqwen2.py +28 -28
  260. transformers/models/colqwen2/modular_colqwen2.py +74 -37
  261. transformers/models/colqwen2/processing_colqwen2.py +52 -16
  262. transformers/models/conditional_detr/configuration_conditional_detr.py +2 -1
  263. transformers/models/conditional_detr/image_processing_conditional_detr.py +70 -67
  264. transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +36 -36
  265. transformers/models/conditional_detr/modeling_conditional_detr.py +87 -99
  266. transformers/models/conditional_detr/modular_conditional_detr.py +3 -49
  267. transformers/models/convbert/configuration_convbert.py +8 -11
  268. transformers/models/convbert/modeling_convbert.py +87 -94
  269. transformers/models/convbert/tokenization_convbert.py +1 -0
  270. transformers/models/convnext/configuration_convnext.py +1 -0
  271. transformers/models/convnext/image_processing_convnext.py +23 -20
  272. transformers/models/convnext/image_processing_convnext_fast.py +21 -16
  273. transformers/models/convnext/modeling_convnext.py +12 -9
  274. transformers/models/convnextv2/configuration_convnextv2.py +1 -0
  275. transformers/models/convnextv2/modeling_convnextv2.py +12 -9
  276. transformers/models/cpm/tokenization_cpm.py +7 -6
  277. transformers/models/cpm/tokenization_cpm_fast.py +5 -3
  278. transformers/models/cpmant/configuration_cpmant.py +1 -4
  279. transformers/models/cpmant/modeling_cpmant.py +40 -38
  280. transformers/models/cpmant/tokenization_cpmant.py +3 -1
  281. transformers/models/csm/configuration_csm.py +66 -58
  282. transformers/models/csm/generation_csm.py +35 -31
  283. transformers/models/csm/modeling_csm.py +85 -85
  284. transformers/models/csm/modular_csm.py +58 -58
  285. transformers/models/csm/processing_csm.py +68 -25
  286. transformers/models/ctrl/configuration_ctrl.py +1 -16
  287. transformers/models/ctrl/modeling_ctrl.py +44 -54
  288. transformers/models/ctrl/tokenization_ctrl.py +1 -0
  289. transformers/models/cvt/configuration_cvt.py +1 -0
  290. transformers/models/cvt/modeling_cvt.py +16 -20
  291. transformers/models/cwm/__init__.py +1 -0
  292. transformers/models/cwm/configuration_cwm.py +12 -8
  293. transformers/models/cwm/modeling_cwm.py +39 -37
  294. transformers/models/cwm/modular_cwm.py +12 -10
  295. transformers/models/d_fine/configuration_d_fine.py +5 -7
  296. transformers/models/d_fine/modeling_d_fine.py +128 -138
  297. transformers/models/d_fine/modular_d_fine.py +18 -33
  298. transformers/models/dab_detr/configuration_dab_detr.py +3 -6
  299. transformers/models/dab_detr/modeling_dab_detr.py +75 -81
  300. transformers/models/dac/configuration_dac.py +1 -0
  301. transformers/models/dac/feature_extraction_dac.py +9 -6
  302. transformers/models/dac/modeling_dac.py +26 -24
  303. transformers/models/data2vec/configuration_data2vec_audio.py +2 -4
  304. transformers/models/data2vec/configuration_data2vec_text.py +3 -11
  305. transformers/models/data2vec/configuration_data2vec_vision.py +1 -0
  306. transformers/models/data2vec/modeling_data2vec_audio.py +56 -57
  307. transformers/models/data2vec/modeling_data2vec_text.py +93 -98
  308. transformers/models/data2vec/modeling_data2vec_vision.py +45 -49
  309. transformers/models/data2vec/modular_data2vec_audio.py +1 -6
  310. transformers/models/data2vec/modular_data2vec_text.py +54 -58
  311. transformers/models/dbrx/configuration_dbrx.py +22 -36
  312. transformers/models/dbrx/modeling_dbrx.py +45 -42
  313. transformers/models/dbrx/modular_dbrx.py +33 -31
  314. transformers/models/deberta/configuration_deberta.py +1 -6
  315. transformers/models/deberta/modeling_deberta.py +60 -64
  316. transformers/models/deberta/tokenization_deberta.py +21 -9
  317. transformers/models/deberta_v2/configuration_deberta_v2.py +1 -6
  318. transformers/models/deberta_v2/modeling_deberta_v2.py +65 -71
  319. transformers/models/deberta_v2/tokenization_deberta_v2.py +29 -11
  320. transformers/models/decision_transformer/configuration_decision_transformer.py +2 -3
  321. transformers/models/decision_transformer/modeling_decision_transformer.py +56 -60
  322. transformers/models/deepseek_v2/configuration_deepseek_v2.py +44 -39
  323. transformers/models/deepseek_v2/modeling_deepseek_v2.py +43 -43
  324. transformers/models/deepseek_v2/modular_deepseek_v2.py +49 -48
  325. transformers/models/deepseek_v3/configuration_deepseek_v3.py +45 -40
  326. transformers/models/deepseek_v3/modeling_deepseek_v3.py +42 -45
  327. transformers/models/deepseek_v3/modular_deepseek_v3.py +9 -14
  328. transformers/models/deepseek_vl/configuration_deepseek_vl.py +3 -2
  329. transformers/models/deepseek_vl/image_processing_deepseek_vl.py +26 -25
  330. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +10 -10
  331. transformers/models/deepseek_vl/modeling_deepseek_vl.py +48 -57
  332. transformers/models/deepseek_vl/modular_deepseek_vl.py +43 -14
  333. transformers/models/deepseek_vl/processing_deepseek_vl.py +41 -10
  334. transformers/models/deepseek_vl_hybrid/configuration_deepseek_vl_hybrid.py +5 -3
  335. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +35 -35
  336. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +24 -20
  337. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +61 -109
  338. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +118 -146
  339. transformers/models/deepseek_vl_hybrid/processing_deepseek_vl_hybrid.py +44 -12
  340. transformers/models/deformable_detr/configuration_deformable_detr.py +3 -2
  341. transformers/models/deformable_detr/image_processing_deformable_detr.py +61 -59
  342. transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +28 -28
  343. transformers/models/deformable_detr/modeling_deformable_detr.py +82 -88
  344. transformers/models/deformable_detr/modular_deformable_detr.py +3 -1
  345. transformers/models/deit/configuration_deit.py +1 -0
  346. transformers/models/deit/image_processing_deit.py +21 -18
  347. transformers/models/deit/image_processing_deit_fast.py +1 -0
  348. transformers/models/deit/modeling_deit.py +22 -24
  349. transformers/models/depth_anything/configuration_depth_anything.py +4 -2
  350. transformers/models/depth_anything/modeling_depth_anything.py +10 -10
  351. transformers/models/depth_pro/configuration_depth_pro.py +1 -0
  352. transformers/models/depth_pro/image_processing_depth_pro.py +23 -22
  353. transformers/models/depth_pro/image_processing_depth_pro_fast.py +10 -8
  354. transformers/models/depth_pro/modeling_depth_pro.py +27 -31
  355. transformers/models/detr/configuration_detr.py +2 -1
  356. transformers/models/detr/image_processing_detr.py +66 -64
  357. transformers/models/detr/image_processing_detr_fast.py +34 -33
  358. transformers/models/detr/modeling_detr.py +79 -95
  359. transformers/models/dia/configuration_dia.py +15 -9
  360. transformers/models/dia/feature_extraction_dia.py +9 -6
  361. transformers/models/dia/generation_dia.py +50 -48
  362. transformers/models/dia/modeling_dia.py +69 -78
  363. transformers/models/dia/modular_dia.py +56 -64
  364. transformers/models/dia/processing_dia.py +29 -39
  365. transformers/models/dia/tokenization_dia.py +6 -3
  366. transformers/models/diffllama/configuration_diffllama.py +30 -25
  367. transformers/models/diffllama/modeling_diffllama.py +49 -46
  368. transformers/models/diffllama/modular_diffllama.py +19 -17
  369. transformers/models/dinat/configuration_dinat.py +1 -0
  370. transformers/models/dinat/modeling_dinat.py +44 -47
  371. transformers/models/dinov2/configuration_dinov2.py +1 -0
  372. transformers/models/dinov2/modeling_dinov2.py +15 -15
  373. transformers/models/dinov2_with_registers/configuration_dinov2_with_registers.py +1 -1
  374. transformers/models/dinov2_with_registers/modeling_dinov2_with_registers.py +15 -16
  375. transformers/models/dinov2_with_registers/modular_dinov2_with_registers.py +9 -9
  376. transformers/models/dinov3_convnext/configuration_dinov3_convnext.py +7 -4
  377. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +6 -3
  378. transformers/models/dinov3_vit/configuration_dinov3_vit.py +8 -5
  379. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +9 -7
  380. transformers/models/dinov3_vit/modeling_dinov3_vit.py +18 -19
  381. transformers/models/dinov3_vit/modular_dinov3_vit.py +15 -16
  382. transformers/models/distilbert/configuration_distilbert.py +2 -8
  383. transformers/models/distilbert/modeling_distilbert.py +55 -55
  384. transformers/models/distilbert/tokenization_distilbert.py +1 -13
  385. transformers/models/doge/__init__.py +1 -0
  386. transformers/models/doge/configuration_doge.py +32 -39
  387. transformers/models/doge/modeling_doge.py +49 -45
  388. transformers/models/doge/modular_doge.py +63 -71
  389. transformers/models/donut/configuration_donut_swin.py +1 -0
  390. transformers/models/donut/image_processing_donut.py +29 -26
  391. transformers/models/donut/image_processing_donut_fast.py +15 -9
  392. transformers/models/donut/modeling_donut_swin.py +58 -62
  393. transformers/models/donut/processing_donut.py +26 -5
  394. transformers/models/dots1/configuration_dots1.py +33 -41
  395. transformers/models/dots1/modeling_dots1.py +45 -54
  396. transformers/models/dots1/modular_dots1.py +4 -5
  397. transformers/models/dpr/configuration_dpr.py +2 -19
  398. transformers/models/dpr/modeling_dpr.py +39 -42
  399. transformers/models/dpr/tokenization_dpr.py +9 -19
  400. transformers/models/dpr/tokenization_dpr_fast.py +9 -7
  401. transformers/models/dpt/configuration_dpt.py +2 -1
  402. transformers/models/dpt/image_processing_dpt.py +66 -65
  403. transformers/models/dpt/image_processing_dpt_fast.py +20 -18
  404. transformers/models/dpt/modeling_dpt.py +30 -32
  405. transformers/models/dpt/modular_dpt.py +17 -15
  406. transformers/models/edgetam/configuration_edgetam.py +3 -2
  407. transformers/models/edgetam/modeling_edgetam.py +86 -86
  408. transformers/models/edgetam/modular_edgetam.py +26 -21
  409. transformers/models/edgetam_video/__init__.py +1 -0
  410. transformers/models/edgetam_video/configuration_edgetam_video.py +1 -0
  411. transformers/models/edgetam_video/modeling_edgetam_video.py +158 -169
  412. transformers/models/edgetam_video/modular_edgetam_video.py +37 -30
  413. transformers/models/efficientloftr/configuration_efficientloftr.py +5 -4
  414. transformers/models/efficientloftr/image_processing_efficientloftr.py +16 -14
  415. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +9 -9
  416. transformers/models/efficientloftr/modeling_efficientloftr.py +38 -59
  417. transformers/models/efficientloftr/modular_efficientloftr.py +3 -1
  418. transformers/models/efficientnet/configuration_efficientnet.py +1 -0
  419. transformers/models/efficientnet/image_processing_efficientnet.py +32 -28
  420. transformers/models/efficientnet/image_processing_efficientnet_fast.py +19 -17
  421. transformers/models/efficientnet/modeling_efficientnet.py +15 -19
  422. transformers/models/electra/configuration_electra.py +3 -13
  423. transformers/models/electra/modeling_electra.py +103 -108
  424. transformers/models/emu3/configuration_emu3.py +17 -13
  425. transformers/models/emu3/image_processing_emu3.py +39 -44
  426. transformers/models/emu3/modeling_emu3.py +108 -148
  427. transformers/models/emu3/modular_emu3.py +73 -115
  428. transformers/models/emu3/processing_emu3.py +43 -18
  429. transformers/models/encodec/configuration_encodec.py +4 -2
  430. transformers/models/encodec/feature_extraction_encodec.py +13 -10
  431. transformers/models/encodec/modeling_encodec.py +29 -39
  432. transformers/models/encoder_decoder/configuration_encoder_decoder.py +2 -12
  433. transformers/models/encoder_decoder/modeling_encoder_decoder.py +43 -37
  434. transformers/models/eomt/configuration_eomt.py +1 -0
  435. transformers/models/eomt/image_processing_eomt.py +56 -66
  436. transformers/models/eomt/image_processing_eomt_fast.py +33 -76
  437. transformers/models/eomt/modeling_eomt.py +18 -23
  438. transformers/models/eomt/modular_eomt.py +13 -18
  439. transformers/models/ernie/configuration_ernie.py +3 -24
  440. transformers/models/ernie/modeling_ernie.py +132 -127
  441. transformers/models/ernie/modular_ernie.py +103 -97
  442. transformers/models/ernie4_5/configuration_ernie4_5.py +27 -23
  443. transformers/models/ernie4_5/modeling_ernie4_5.py +38 -36
  444. transformers/models/ernie4_5/modular_ernie4_5.py +4 -3
  445. transformers/models/ernie4_5_moe/configuration_ernie4_5_moe.py +36 -32
  446. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +55 -56
  447. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +46 -18
  448. transformers/models/esm/configuration_esm.py +15 -11
  449. transformers/models/esm/modeling_esm.py +34 -38
  450. transformers/models/esm/modeling_esmfold.py +49 -53
  451. transformers/models/esm/openfold_utils/chunk_utils.py +6 -6
  452. transformers/models/esm/openfold_utils/loss.py +2 -1
  453. transformers/models/esm/openfold_utils/protein.py +16 -15
  454. transformers/models/esm/openfold_utils/tensor_utils.py +6 -6
  455. transformers/models/esm/tokenization_esm.py +4 -2
  456. transformers/models/evolla/configuration_evolla.py +40 -50
  457. transformers/models/evolla/modeling_evolla.py +66 -71
  458. transformers/models/evolla/modular_evolla.py +47 -53
  459. transformers/models/evolla/processing_evolla.py +35 -23
  460. transformers/models/exaone4/configuration_exaone4.py +25 -23
  461. transformers/models/exaone4/modeling_exaone4.py +38 -35
  462. transformers/models/exaone4/modular_exaone4.py +46 -44
  463. transformers/models/falcon/configuration_falcon.py +26 -31
  464. transformers/models/falcon/modeling_falcon.py +80 -82
  465. transformers/models/falcon_h1/configuration_falcon_h1.py +51 -45
  466. transformers/models/falcon_h1/modeling_falcon_h1.py +82 -85
  467. transformers/models/falcon_h1/modular_falcon_h1.py +51 -56
  468. transformers/models/falcon_mamba/configuration_falcon_mamba.py +2 -1
  469. transformers/models/falcon_mamba/modeling_falcon_mamba.py +82 -75
  470. transformers/models/falcon_mamba/modular_falcon_mamba.py +45 -28
  471. transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +6 -2
  472. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +60 -76
  473. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +3 -2
  474. transformers/models/flaubert/configuration_flaubert.py +5 -10
  475. transformers/models/flaubert/modeling_flaubert.py +143 -145
  476. transformers/models/flaubert/tokenization_flaubert.py +5 -3
  477. transformers/models/flava/configuration_flava.py +6 -5
  478. transformers/models/flava/image_processing_flava.py +67 -66
  479. transformers/models/flava/image_processing_flava_fast.py +49 -46
  480. transformers/models/flava/modeling_flava.py +136 -153
  481. transformers/models/flava/processing_flava.py +12 -2
  482. transformers/models/flex_olmo/__init__.py +1 -0
  483. transformers/models/flex_olmo/configuration_flex_olmo.py +32 -28
  484. transformers/models/flex_olmo/modeling_flex_olmo.py +47 -47
  485. transformers/models/flex_olmo/modular_flex_olmo.py +44 -40
  486. transformers/models/florence2/configuration_florence2.py +1 -0
  487. transformers/models/florence2/modeling_florence2.py +69 -111
  488. transformers/models/florence2/modular_florence2.py +101 -104
  489. transformers/models/florence2/processing_florence2.py +47 -18
  490. transformers/models/fnet/configuration_fnet.py +2 -6
  491. transformers/models/fnet/modeling_fnet.py +80 -83
  492. transformers/models/fnet/tokenization_fnet.py +1 -0
  493. transformers/models/focalnet/configuration_focalnet.py +1 -0
  494. transformers/models/focalnet/modeling_focalnet.py +45 -51
  495. transformers/models/fsmt/configuration_fsmt.py +17 -12
  496. transformers/models/fsmt/modeling_fsmt.py +48 -49
  497. transformers/models/fsmt/tokenization_fsmt.py +5 -3
  498. transformers/models/funnel/configuration_funnel.py +1 -8
  499. transformers/models/funnel/modeling_funnel.py +93 -99
  500. transformers/models/funnel/tokenization_funnel.py +27 -17
  501. transformers/models/fuyu/configuration_fuyu.py +34 -28
  502. transformers/models/fuyu/image_processing_fuyu.py +31 -29
  503. transformers/models/fuyu/image_processing_fuyu_fast.py +17 -17
  504. transformers/models/fuyu/modeling_fuyu.py +53 -53
  505. transformers/models/fuyu/processing_fuyu.py +34 -23
  506. transformers/models/gemma/configuration_gemma.py +30 -25
  507. transformers/models/gemma/modeling_gemma.py +50 -46
  508. transformers/models/gemma/modular_gemma.py +47 -42
  509. transformers/models/gemma/tokenization_gemma.py +30 -10
  510. transformers/models/gemma2/configuration_gemma2.py +35 -30
  511. transformers/models/gemma2/modeling_gemma2.py +42 -39
  512. transformers/models/gemma2/modular_gemma2.py +66 -63
  513. transformers/models/gemma3/configuration_gemma3.py +44 -44
  514. transformers/models/gemma3/image_processing_gemma3.py +31 -29
  515. transformers/models/gemma3/image_processing_gemma3_fast.py +13 -11
  516. transformers/models/gemma3/modeling_gemma3.py +207 -159
  517. transformers/models/gemma3/modular_gemma3.py +204 -153
  518. transformers/models/gemma3/processing_gemma3.py +5 -5
  519. transformers/models/gemma3n/configuration_gemma3n.py +26 -36
  520. transformers/models/gemma3n/feature_extraction_gemma3n.py +11 -9
  521. transformers/models/gemma3n/modeling_gemma3n.py +356 -222
  522. transformers/models/gemma3n/modular_gemma3n.py +207 -230
  523. transformers/models/gemma3n/processing_gemma3n.py +26 -12
  524. transformers/models/git/configuration_git.py +8 -5
  525. transformers/models/git/modeling_git.py +204 -266
  526. transformers/models/git/processing_git.py +14 -2
  527. transformers/models/glm/configuration_glm.py +28 -24
  528. transformers/models/glm/modeling_glm.py +40 -37
  529. transformers/models/glm/modular_glm.py +7 -4
  530. transformers/models/glm4/configuration_glm4.py +28 -24
  531. transformers/models/glm4/modeling_glm4.py +42 -40
  532. transformers/models/glm4/modular_glm4.py +10 -8
  533. transformers/models/glm46v/configuration_glm46v.py +1 -0
  534. transformers/models/glm46v/image_processing_glm46v.py +40 -35
  535. transformers/models/glm46v/image_processing_glm46v_fast.py +9 -9
  536. transformers/models/glm46v/modeling_glm46v.py +90 -137
  537. transformers/models/glm46v/modular_glm46v.py +3 -4
  538. transformers/models/glm46v/processing_glm46v.py +41 -7
  539. transformers/models/glm46v/video_processing_glm46v.py +11 -9
  540. transformers/models/glm4_moe/configuration_glm4_moe.py +32 -40
  541. transformers/models/glm4_moe/modeling_glm4_moe.py +42 -45
  542. transformers/models/glm4_moe/modular_glm4_moe.py +34 -42
  543. transformers/models/glm4v/configuration_glm4v.py +20 -18
  544. transformers/models/glm4v/image_processing_glm4v.py +40 -34
  545. transformers/models/glm4v/image_processing_glm4v_fast.py +9 -8
  546. transformers/models/glm4v/modeling_glm4v.py +205 -254
  547. transformers/models/glm4v/modular_glm4v.py +224 -210
  548. transformers/models/glm4v/processing_glm4v.py +41 -7
  549. transformers/models/glm4v/video_processing_glm4v.py +11 -9
  550. transformers/models/glm4v_moe/configuration_glm4v_moe.py +125 -136
  551. transformers/models/glm4v_moe/modeling_glm4v_moe.py +368 -377
  552. transformers/models/glm4v_moe/modular_glm4v_moe.py +169 -83
  553. transformers/models/glpn/configuration_glpn.py +1 -0
  554. transformers/models/glpn/image_processing_glpn.py +12 -11
  555. transformers/models/glpn/image_processing_glpn_fast.py +13 -11
  556. transformers/models/glpn/modeling_glpn.py +14 -16
  557. transformers/models/got_ocr2/configuration_got_ocr2.py +12 -4
  558. transformers/models/got_ocr2/image_processing_got_ocr2.py +24 -22
  559. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +11 -9
  560. transformers/models/got_ocr2/modeling_got_ocr2.py +80 -77
  561. transformers/models/got_ocr2/modular_got_ocr2.py +51 -54
  562. transformers/models/got_ocr2/processing_got_ocr2.py +63 -42
  563. transformers/models/gpt2/configuration_gpt2.py +2 -13
  564. transformers/models/gpt2/modeling_gpt2.py +115 -120
  565. transformers/models/gpt2/tokenization_gpt2.py +46 -15
  566. transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +2 -5
  567. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +89 -79
  568. transformers/models/gpt_neo/configuration_gpt_neo.py +2 -9
  569. transformers/models/gpt_neo/modeling_gpt_neo.py +67 -83
  570. transformers/models/gpt_neox/configuration_gpt_neox.py +25 -25
  571. transformers/models/gpt_neox/modeling_gpt_neox.py +75 -76
  572. transformers/models/gpt_neox/modular_gpt_neox.py +66 -67
  573. transformers/models/gpt_neox/tokenization_gpt_neox.py +51 -9
  574. transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +19 -24
  575. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +47 -46
  576. transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +3 -1
  577. transformers/models/gpt_oss/configuration_gpt_oss.py +28 -46
  578. transformers/models/gpt_oss/modeling_gpt_oss.py +121 -83
  579. transformers/models/gpt_oss/modular_gpt_oss.py +103 -64
  580. transformers/models/gpt_sw3/tokenization_gpt_sw3.py +4 -4
  581. transformers/models/gptj/configuration_gptj.py +4 -4
  582. transformers/models/gptj/modeling_gptj.py +87 -101
  583. transformers/models/granite/configuration_granite.py +33 -28
  584. transformers/models/granite/modeling_granite.py +46 -44
  585. transformers/models/granite/modular_granite.py +31 -29
  586. transformers/models/granite_speech/configuration_granite_speech.py +1 -0
  587. transformers/models/granite_speech/feature_extraction_granite_speech.py +3 -1
  588. transformers/models/granite_speech/modeling_granite_speech.py +52 -82
  589. transformers/models/granite_speech/processing_granite_speech.py +4 -11
  590. transformers/models/granitemoe/configuration_granitemoe.py +36 -31
  591. transformers/models/granitemoe/modeling_granitemoe.py +46 -41
  592. transformers/models/granitemoe/modular_granitemoe.py +27 -22
  593. transformers/models/granitemoehybrid/__init__.py +1 -0
  594. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +47 -46
  595. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +93 -97
  596. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +21 -54
  597. transformers/models/granitemoeshared/configuration_granitemoeshared.py +37 -33
  598. transformers/models/granitemoeshared/modeling_granitemoeshared.py +61 -54
  599. transformers/models/granitemoeshared/modular_granitemoeshared.py +21 -19
  600. transformers/models/grounding_dino/configuration_grounding_dino.py +4 -6
  601. transformers/models/grounding_dino/image_processing_grounding_dino.py +62 -60
  602. transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +29 -28
  603. transformers/models/grounding_dino/modeling_grounding_dino.py +140 -155
  604. transformers/models/grounding_dino/modular_grounding_dino.py +3 -2
  605. transformers/models/grounding_dino/processing_grounding_dino.py +38 -10
  606. transformers/models/groupvit/configuration_groupvit.py +2 -4
  607. transformers/models/groupvit/modeling_groupvit.py +93 -107
  608. transformers/models/helium/configuration_helium.py +29 -25
  609. transformers/models/helium/modeling_helium.py +40 -38
  610. transformers/models/helium/modular_helium.py +7 -3
  611. transformers/models/herbert/tokenization_herbert.py +28 -10
  612. transformers/models/hgnet_v2/configuration_hgnet_v2.py +1 -0
  613. transformers/models/hgnet_v2/modeling_hgnet_v2.py +10 -24
  614. transformers/models/hgnet_v2/modular_hgnet_v2.py +10 -24
  615. transformers/models/hiera/configuration_hiera.py +1 -0
  616. transformers/models/hiera/modeling_hiera.py +66 -72
  617. transformers/models/hubert/configuration_hubert.py +2 -4
  618. transformers/models/hubert/modeling_hubert.py +37 -42
  619. transformers/models/hubert/modular_hubert.py +11 -13
  620. transformers/models/hunyuan_v1_dense/configuration_hunyuan_v1_dense.py +31 -26
  621. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +38 -35
  622. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +6 -4
  623. transformers/models/hunyuan_v1_moe/__init__.py +1 -1
  624. transformers/models/hunyuan_v1_moe/configuration_hunyuan_v1_moe.py +36 -31
  625. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +42 -47
  626. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +9 -9
  627. transformers/models/ibert/configuration_ibert.py +2 -4
  628. transformers/models/ibert/modeling_ibert.py +62 -82
  629. transformers/models/ibert/quant_modules.py +1 -0
  630. transformers/models/idefics/configuration_idefics.py +8 -5
  631. transformers/models/idefics/image_processing_idefics.py +15 -13
  632. transformers/models/idefics/modeling_idefics.py +82 -75
  633. transformers/models/idefics/perceiver.py +3 -1
  634. transformers/models/idefics/processing_idefics.py +48 -32
  635. transformers/models/idefics/vision.py +25 -24
  636. transformers/models/idefics2/configuration_idefics2.py +3 -1
  637. transformers/models/idefics2/image_processing_idefics2.py +32 -31
  638. transformers/models/idefics2/image_processing_idefics2_fast.py +8 -8
  639. transformers/models/idefics2/modeling_idefics2.py +101 -127
  640. transformers/models/idefics2/processing_idefics2.py +68 -10
  641. transformers/models/idefics3/configuration_idefics3.py +4 -1
  642. transformers/models/idefics3/image_processing_idefics3.py +43 -42
  643. transformers/models/idefics3/image_processing_idefics3_fast.py +15 -40
  644. transformers/models/idefics3/modeling_idefics3.py +90 -115
  645. transformers/models/idefics3/processing_idefics3.py +69 -15
  646. transformers/models/ijepa/configuration_ijepa.py +1 -0
  647. transformers/models/ijepa/modeling_ijepa.py +11 -10
  648. transformers/models/ijepa/modular_ijepa.py +7 -5
  649. transformers/models/imagegpt/configuration_imagegpt.py +2 -9
  650. transformers/models/imagegpt/image_processing_imagegpt.py +18 -17
  651. transformers/models/imagegpt/image_processing_imagegpt_fast.py +16 -11
  652. transformers/models/imagegpt/modeling_imagegpt.py +65 -76
  653. transformers/models/informer/configuration_informer.py +9 -6
  654. transformers/models/informer/modeling_informer.py +86 -88
  655. transformers/models/informer/modular_informer.py +16 -14
  656. transformers/models/instructblip/configuration_instructblip.py +2 -2
  657. transformers/models/instructblip/modeling_instructblip.py +63 -103
  658. transformers/models/instructblip/processing_instructblip.py +36 -10
  659. transformers/models/instructblipvideo/configuration_instructblipvideo.py +2 -2
  660. transformers/models/instructblipvideo/modeling_instructblipvideo.py +139 -157
  661. transformers/models/instructblipvideo/modular_instructblipvideo.py +64 -73
  662. transformers/models/instructblipvideo/processing_instructblipvideo.py +33 -14
  663. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +8 -6
  664. transformers/models/internvl/configuration_internvl.py +1 -0
  665. transformers/models/internvl/modeling_internvl.py +106 -85
  666. transformers/models/internvl/modular_internvl.py +67 -47
  667. transformers/models/internvl/processing_internvl.py +45 -12
  668. transformers/models/internvl/video_processing_internvl.py +12 -10
  669. transformers/models/jamba/configuration_jamba.py +8 -5
  670. transformers/models/jamba/modeling_jamba.py +66 -68
  671. transformers/models/jamba/modular_jamba.py +55 -54
  672. transformers/models/janus/configuration_janus.py +1 -0
  673. transformers/models/janus/image_processing_janus.py +37 -35
  674. transformers/models/janus/image_processing_janus_fast.py +20 -18
  675. transformers/models/janus/modeling_janus.py +191 -115
  676. transformers/models/janus/modular_janus.py +84 -133
  677. transformers/models/janus/processing_janus.py +43 -17
  678. transformers/models/jetmoe/configuration_jetmoe.py +26 -24
  679. transformers/models/jetmoe/modeling_jetmoe.py +46 -43
  680. transformers/models/jetmoe/modular_jetmoe.py +33 -31
  681. transformers/models/kosmos2/configuration_kosmos2.py +9 -10
  682. transformers/models/kosmos2/modeling_kosmos2.py +173 -208
  683. transformers/models/kosmos2/processing_kosmos2.py +55 -40
  684. transformers/models/kosmos2_5/__init__.py +1 -0
  685. transformers/models/kosmos2_5/configuration_kosmos2_5.py +9 -8
  686. transformers/models/kosmos2_5/image_processing_kosmos2_5.py +12 -10
  687. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +13 -4
  688. transformers/models/kosmos2_5/modeling_kosmos2_5.py +118 -132
  689. transformers/models/kosmos2_5/processing_kosmos2_5.py +29 -8
  690. transformers/models/kyutai_speech_to_text/configuration_kyutai_speech_to_text.py +28 -31
  691. transformers/models/kyutai_speech_to_text/feature_extraction_kyutai_speech_to_text.py +14 -12
  692. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +100 -110
  693. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +22 -28
  694. transformers/models/kyutai_speech_to_text/processing_kyutai_speech_to_text.py +8 -2
  695. transformers/models/layoutlm/configuration_layoutlm.py +2 -14
  696. transformers/models/layoutlm/modeling_layoutlm.py +72 -77
  697. transformers/models/layoutlmv2/configuration_layoutlmv2.py +17 -14
  698. transformers/models/layoutlmv2/image_processing_layoutlmv2.py +21 -18
  699. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +9 -7
  700. transformers/models/layoutlmv2/modeling_layoutlmv2.py +50 -64
  701. transformers/models/layoutlmv2/processing_layoutlmv2.py +44 -14
  702. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +126 -73
  703. transformers/models/layoutlmv3/configuration_layoutlmv3.py +19 -16
  704. transformers/models/layoutlmv3/image_processing_layoutlmv3.py +26 -24
  705. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +11 -9
  706. transformers/models/layoutlmv3/modeling_layoutlmv3.py +56 -82
  707. transformers/models/layoutlmv3/processing_layoutlmv3.py +46 -14
  708. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +134 -74
  709. transformers/models/layoutxlm/configuration_layoutxlm.py +17 -14
  710. transformers/models/layoutxlm/modular_layoutxlm.py +1 -0
  711. transformers/models/layoutxlm/processing_layoutxlm.py +44 -14
  712. transformers/models/layoutxlm/tokenization_layoutxlm.py +113 -77
  713. transformers/models/led/configuration_led.py +12 -8
  714. transformers/models/led/modeling_led.py +266 -124
  715. transformers/models/levit/configuration_levit.py +1 -0
  716. transformers/models/levit/image_processing_levit.py +21 -19
  717. transformers/models/levit/image_processing_levit_fast.py +5 -4
  718. transformers/models/levit/modeling_levit.py +19 -38
  719. transformers/models/lfm2/configuration_lfm2.py +30 -27
  720. transformers/models/lfm2/modeling_lfm2.py +50 -47
  721. transformers/models/lfm2/modular_lfm2.py +30 -29
  722. transformers/models/lfm2_moe/__init__.py +1 -0
  723. transformers/models/lfm2_moe/configuration_lfm2_moe.py +9 -6
  724. transformers/models/lfm2_moe/modeling_lfm2_moe.py +53 -61
  725. transformers/models/lfm2_moe/modular_lfm2_moe.py +37 -13
  726. transformers/models/lfm2_vl/configuration_lfm2_vl.py +1 -4
  727. transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +12 -41
  728. transformers/models/lfm2_vl/modeling_lfm2_vl.py +66 -84
  729. transformers/models/lfm2_vl/modular_lfm2_vl.py +56 -70
  730. transformers/models/lfm2_vl/processing_lfm2_vl.py +76 -96
  731. transformers/models/lightglue/image_processing_lightglue.py +15 -16
  732. transformers/models/lightglue/image_processing_lightglue_fast.py +9 -9
  733. transformers/models/lightglue/modeling_lightglue.py +31 -31
  734. transformers/models/lightglue/modular_lightglue.py +28 -29
  735. transformers/models/lilt/configuration_lilt.py +2 -6
  736. transformers/models/lilt/modeling_lilt.py +70 -76
  737. transformers/models/llama/configuration_llama.py +31 -26
  738. transformers/models/llama/modeling_llama.py +39 -36
  739. transformers/models/llama/tokenization_llama.py +44 -14
  740. transformers/models/llama4/configuration_llama4.py +30 -27
  741. transformers/models/llama4/image_processing_llama4_fast.py +14 -12
  742. transformers/models/llama4/modeling_llama4.py +113 -120
  743. transformers/models/llama4/processing_llama4.py +57 -33
  744. transformers/models/llava/configuration_llava.py +1 -10
  745. transformers/models/llava/image_processing_llava.py +28 -25
  746. transformers/models/llava/image_processing_llava_fast.py +11 -9
  747. transformers/models/llava/modeling_llava.py +109 -85
  748. transformers/models/llava/processing_llava.py +51 -18
  749. transformers/models/llava_next/configuration_llava_next.py +2 -2
  750. transformers/models/llava_next/image_processing_llava_next.py +45 -43
  751. transformers/models/llava_next/image_processing_llava_next_fast.py +13 -11
  752. transformers/models/llava_next/modeling_llava_next.py +107 -110
  753. transformers/models/llava_next/processing_llava_next.py +47 -18
  754. transformers/models/llava_next_video/configuration_llava_next_video.py +7 -4
  755. transformers/models/llava_next_video/modeling_llava_next_video.py +158 -175
  756. transformers/models/llava_next_video/modular_llava_next_video.py +150 -155
  757. transformers/models/llava_next_video/processing_llava_next_video.py +63 -21
  758. transformers/models/llava_next_video/video_processing_llava_next_video.py +1 -0
  759. transformers/models/llava_onevision/configuration_llava_onevision.py +7 -4
  760. transformers/models/llava_onevision/image_processing_llava_onevision.py +42 -40
  761. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +15 -14
  762. transformers/models/llava_onevision/modeling_llava_onevision.py +169 -177
  763. transformers/models/llava_onevision/modular_llava_onevision.py +156 -163
  764. transformers/models/llava_onevision/processing_llava_onevision.py +53 -21
  765. transformers/models/llava_onevision/video_processing_llava_onevision.py +1 -0
  766. transformers/models/longcat_flash/__init__.py +1 -0
  767. transformers/models/longcat_flash/configuration_longcat_flash.py +42 -37
  768. transformers/models/longcat_flash/modeling_longcat_flash.py +36 -36
  769. transformers/models/longcat_flash/modular_longcat_flash.py +21 -21
  770. transformers/models/longformer/configuration_longformer.py +5 -5
  771. transformers/models/longformer/modeling_longformer.py +101 -105
  772. transformers/models/longt5/configuration_longt5.py +7 -9
  773. transformers/models/longt5/modeling_longt5.py +49 -49
  774. transformers/models/luke/configuration_luke.py +2 -8
  775. transformers/models/luke/modeling_luke.py +181 -188
  776. transformers/models/luke/tokenization_luke.py +140 -107
  777. transformers/models/lxmert/configuration_lxmert.py +1 -16
  778. transformers/models/lxmert/modeling_lxmert.py +74 -65
  779. transformers/models/m2m_100/configuration_m2m_100.py +9 -7
  780. transformers/models/m2m_100/modeling_m2m_100.py +71 -83
  781. transformers/models/m2m_100/tokenization_m2m_100.py +8 -8
  782. transformers/models/mamba/configuration_mamba.py +2 -1
  783. transformers/models/mamba/modeling_mamba.py +66 -58
  784. transformers/models/mamba2/configuration_mamba2.py +8 -5
  785. transformers/models/mamba2/modeling_mamba2.py +69 -68
  786. transformers/models/marian/configuration_marian.py +5 -10
  787. transformers/models/marian/modeling_marian.py +87 -93
  788. transformers/models/marian/tokenization_marian.py +6 -6
  789. transformers/models/markuplm/configuration_markuplm.py +7 -4
  790. transformers/models/markuplm/feature_extraction_markuplm.py +2 -1
  791. transformers/models/markuplm/modeling_markuplm.py +70 -69
  792. transformers/models/markuplm/processing_markuplm.py +38 -31
  793. transformers/models/markuplm/tokenization_markuplm.py +136 -93
  794. transformers/models/mask2former/configuration_mask2former.py +8 -5
  795. transformers/models/mask2former/image_processing_mask2former.py +85 -84
  796. transformers/models/mask2former/image_processing_mask2former_fast.py +40 -37
  797. transformers/models/mask2former/modeling_mask2former.py +103 -118
  798. transformers/models/mask2former/modular_mask2former.py +8 -6
  799. transformers/models/maskformer/configuration_maskformer.py +9 -6
  800. transformers/models/maskformer/configuration_maskformer_swin.py +1 -0
  801. transformers/models/maskformer/image_processing_maskformer.py +85 -84
  802. transformers/models/maskformer/image_processing_maskformer_fast.py +40 -36
  803. transformers/models/maskformer/modeling_maskformer.py +65 -79
  804. transformers/models/maskformer/modeling_maskformer_swin.py +32 -36
  805. transformers/models/mbart/configuration_mbart.py +4 -9
  806. transformers/models/mbart/modeling_mbart.py +116 -131
  807. transformers/models/mbart/tokenization_mbart.py +54 -11
  808. transformers/models/mbart50/tokenization_mbart50.py +13 -8
  809. transformers/models/megatron_bert/configuration_megatron_bert.py +3 -13
  810. transformers/models/megatron_bert/modeling_megatron_bert.py +150 -148
  811. transformers/models/metaclip_2/configuration_metaclip_2.py +1 -4
  812. transformers/models/metaclip_2/modeling_metaclip_2.py +84 -91
  813. transformers/models/metaclip_2/modular_metaclip_2.py +45 -61
  814. transformers/models/mgp_str/configuration_mgp_str.py +1 -0
  815. transformers/models/mgp_str/modeling_mgp_str.py +18 -20
  816. transformers/models/mgp_str/processing_mgp_str.py +20 -3
  817. transformers/models/mgp_str/tokenization_mgp_str.py +3 -1
  818. transformers/models/mimi/configuration_mimi.py +40 -42
  819. transformers/models/mimi/modeling_mimi.py +113 -142
  820. transformers/models/minimax/__init__.py +1 -0
  821. transformers/models/minimax/configuration_minimax.py +43 -37
  822. transformers/models/minimax/modeling_minimax.py +51 -61
  823. transformers/models/minimax/modular_minimax.py +62 -68
  824. transformers/models/ministral/configuration_ministral.py +29 -25
  825. transformers/models/ministral/modeling_ministral.py +38 -36
  826. transformers/models/ministral/modular_ministral.py +37 -32
  827. transformers/models/ministral3/configuration_ministral3.py +27 -24
  828. transformers/models/ministral3/modeling_ministral3.py +37 -36
  829. transformers/models/ministral3/modular_ministral3.py +5 -4
  830. transformers/models/mistral/configuration_mistral.py +29 -24
  831. transformers/models/mistral/modeling_mistral.py +37 -36
  832. transformers/models/mistral/modular_mistral.py +12 -11
  833. transformers/models/mistral3/configuration_mistral3.py +1 -4
  834. transformers/models/mistral3/modeling_mistral3.py +86 -89
  835. transformers/models/mistral3/modular_mistral3.py +68 -69
  836. transformers/models/mixtral/configuration_mixtral.py +34 -29
  837. transformers/models/mixtral/modeling_mixtral.py +45 -50
  838. transformers/models/mixtral/modular_mixtral.py +31 -32
  839. transformers/models/mlcd/configuration_mlcd.py +1 -0
  840. transformers/models/mlcd/modeling_mlcd.py +14 -20
  841. transformers/models/mlcd/modular_mlcd.py +13 -17
  842. transformers/models/mllama/configuration_mllama.py +15 -10
  843. transformers/models/mllama/image_processing_mllama.py +25 -23
  844. transformers/models/mllama/image_processing_mllama_fast.py +11 -11
  845. transformers/models/mllama/modeling_mllama.py +94 -105
  846. transformers/models/mllama/processing_mllama.py +55 -6
  847. transformers/models/mluke/tokenization_mluke.py +107 -101
  848. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +3 -5
  849. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +140 -155
  850. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +3 -5
  851. transformers/models/mobilebert/configuration_mobilebert.py +2 -4
  852. transformers/models/mobilebert/modeling_mobilebert.py +85 -77
  853. transformers/models/mobilebert/tokenization_mobilebert.py +1 -0
  854. transformers/models/mobilenet_v1/configuration_mobilenet_v1.py +1 -0
  855. transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +23 -20
  856. transformers/models/mobilenet_v1/image_processing_mobilenet_v1_fast.py +1 -0
  857. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +16 -15
  858. transformers/models/mobilenet_v2/configuration_mobilenet_v2.py +1 -0
  859. transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +51 -48
  860. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +15 -13
  861. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +22 -24
  862. transformers/models/mobilevit/configuration_mobilevit.py +1 -0
  863. transformers/models/mobilevit/image_processing_mobilevit.py +49 -46
  864. transformers/models/mobilevit/image_processing_mobilevit_fast.py +14 -12
  865. transformers/models/mobilevit/modeling_mobilevit.py +21 -28
  866. transformers/models/mobilevitv2/configuration_mobilevitv2.py +1 -0
  867. transformers/models/mobilevitv2/modeling_mobilevitv2.py +22 -28
  868. transformers/models/modernbert/configuration_modernbert.py +42 -44
  869. transformers/models/modernbert/modeling_modernbert.py +133 -145
  870. transformers/models/modernbert/modular_modernbert.py +170 -186
  871. transformers/models/modernbert_decoder/configuration_modernbert_decoder.py +40 -40
  872. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +57 -62
  873. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +86 -94
  874. transformers/models/moonshine/configuration_moonshine.py +31 -34
  875. transformers/models/moonshine/modeling_moonshine.py +71 -71
  876. transformers/models/moonshine/modular_moonshine.py +83 -88
  877. transformers/models/moshi/configuration_moshi.py +23 -46
  878. transformers/models/moshi/modeling_moshi.py +187 -157
  879. transformers/models/mpnet/configuration_mpnet.py +2 -6
  880. transformers/models/mpnet/modeling_mpnet.py +57 -62
  881. transformers/models/mpnet/tokenization_mpnet.py +15 -4
  882. transformers/models/mpt/configuration_mpt.py +9 -5
  883. transformers/models/mpt/modeling_mpt.py +60 -60
  884. transformers/models/mra/configuration_mra.py +2 -8
  885. transformers/models/mra/modeling_mra.py +57 -64
  886. transformers/models/mt5/configuration_mt5.py +8 -10
  887. transformers/models/mt5/modeling_mt5.py +95 -87
  888. transformers/models/musicgen/configuration_musicgen.py +8 -12
  889. transformers/models/musicgen/modeling_musicgen.py +122 -118
  890. transformers/models/musicgen/processing_musicgen.py +21 -3
  891. transformers/models/musicgen_melody/configuration_musicgen_melody.py +8 -15
  892. transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py +9 -8
  893. transformers/models/musicgen_melody/modeling_musicgen_melody.py +123 -117
  894. transformers/models/musicgen_melody/processing_musicgen_melody.py +22 -3
  895. transformers/models/mvp/configuration_mvp.py +5 -8
  896. transformers/models/mvp/modeling_mvp.py +123 -135
  897. transformers/models/myt5/tokenization_myt5.py +10 -8
  898. transformers/models/nanochat/configuration_nanochat.py +8 -5
  899. transformers/models/nanochat/modeling_nanochat.py +40 -37
  900. transformers/models/nanochat/modular_nanochat.py +14 -12
  901. transformers/models/nemotron/configuration_nemotron.py +30 -25
  902. transformers/models/nemotron/modeling_nemotron.py +57 -56
  903. transformers/models/nllb/tokenization_nllb.py +28 -12
  904. transformers/models/nllb_moe/configuration_nllb_moe.py +9 -7
  905. transformers/models/nllb_moe/modeling_nllb_moe.py +69 -77
  906. transformers/models/nougat/image_processing_nougat.py +32 -29
  907. transformers/models/nougat/image_processing_nougat_fast.py +14 -12
  908. transformers/models/nougat/processing_nougat.py +39 -37
  909. transformers/models/nougat/tokenization_nougat.py +73 -18
  910. transformers/models/nystromformer/configuration_nystromformer.py +2 -8
  911. transformers/models/nystromformer/modeling_nystromformer.py +63 -74
  912. transformers/models/olmo/configuration_olmo.py +28 -23
  913. transformers/models/olmo/modeling_olmo.py +39 -36
  914. transformers/models/olmo/modular_olmo.py +11 -7
  915. transformers/models/olmo2/configuration_olmo2.py +28 -23
  916. transformers/models/olmo2/modeling_olmo2.py +41 -37
  917. transformers/models/olmo2/modular_olmo2.py +32 -29
  918. transformers/models/olmo3/__init__.py +1 -0
  919. transformers/models/olmo3/configuration_olmo3.py +30 -26
  920. transformers/models/olmo3/modeling_olmo3.py +39 -36
  921. transformers/models/olmo3/modular_olmo3.py +40 -37
  922. transformers/models/olmoe/configuration_olmoe.py +33 -29
  923. transformers/models/olmoe/modeling_olmoe.py +46 -52
  924. transformers/models/olmoe/modular_olmoe.py +15 -16
  925. transformers/models/omdet_turbo/configuration_omdet_turbo.py +4 -2
  926. transformers/models/omdet_turbo/modeling_omdet_turbo.py +47 -53
  927. transformers/models/omdet_turbo/processing_omdet_turbo.py +67 -19
  928. transformers/models/oneformer/configuration_oneformer.py +8 -5
  929. transformers/models/oneformer/image_processing_oneformer.py +84 -83
  930. transformers/models/oneformer/image_processing_oneformer_fast.py +42 -41
  931. transformers/models/oneformer/modeling_oneformer.py +171 -147
  932. transformers/models/oneformer/processing_oneformer.py +43 -28
  933. transformers/models/openai/configuration_openai.py +1 -16
  934. transformers/models/openai/modeling_openai.py +51 -65
  935. transformers/models/openai/tokenization_openai.py +47 -8
  936. transformers/models/opt/configuration_opt.py +7 -6
  937. transformers/models/opt/modeling_opt.py +76 -78
  938. transformers/models/ovis2/__init__.py +1 -0
  939. transformers/models/ovis2/configuration_ovis2.py +1 -0
  940. transformers/models/ovis2/image_processing_ovis2.py +24 -22
  941. transformers/models/ovis2/image_processing_ovis2_fast.py +11 -9
  942. transformers/models/ovis2/modeling_ovis2.py +142 -111
  943. transformers/models/ovis2/modular_ovis2.py +45 -90
  944. transformers/models/ovis2/processing_ovis2.py +40 -12
  945. transformers/models/owlv2/configuration_owlv2.py +2 -4
  946. transformers/models/owlv2/image_processing_owlv2.py +21 -20
  947. transformers/models/owlv2/image_processing_owlv2_fast.py +15 -12
  948. transformers/models/owlv2/modeling_owlv2.py +117 -133
  949. transformers/models/owlv2/modular_owlv2.py +14 -11
  950. transformers/models/owlv2/processing_owlv2.py +49 -20
  951. transformers/models/owlvit/configuration_owlvit.py +2 -4
  952. transformers/models/owlvit/image_processing_owlvit.py +22 -21
  953. transformers/models/owlvit/image_processing_owlvit_fast.py +3 -2
  954. transformers/models/owlvit/modeling_owlvit.py +116 -132
  955. transformers/models/owlvit/processing_owlvit.py +48 -20
  956. transformers/models/paligemma/configuration_paligemma.py +1 -4
  957. transformers/models/paligemma/modeling_paligemma.py +93 -103
  958. transformers/models/paligemma/processing_paligemma.py +66 -13
  959. transformers/models/parakeet/configuration_parakeet.py +14 -7
  960. transformers/models/parakeet/feature_extraction_parakeet.py +12 -10
  961. transformers/models/parakeet/modeling_parakeet.py +28 -32
  962. transformers/models/parakeet/modular_parakeet.py +20 -23
  963. transformers/models/parakeet/processing_parakeet.py +5 -13
  964. transformers/models/parakeet/{tokenization_parakeet.py → tokenization_parakeet_fast.py} +7 -5
  965. transformers/models/patchtsmixer/configuration_patchtsmixer.py +8 -5
  966. transformers/models/patchtsmixer/modeling_patchtsmixer.py +62 -70
  967. transformers/models/patchtst/configuration_patchtst.py +9 -6
  968. transformers/models/patchtst/modeling_patchtst.py +80 -97
  969. transformers/models/pegasus/configuration_pegasus.py +5 -8
  970. transformers/models/pegasus/modeling_pegasus.py +66 -72
  971. transformers/models/pegasus/tokenization_pegasus.py +45 -15
  972. transformers/models/pegasus_x/configuration_pegasus_x.py +4 -5
  973. transformers/models/pegasus_x/modeling_pegasus_x.py +52 -55
  974. transformers/models/perceiver/configuration_perceiver.py +1 -0
  975. transformers/models/perceiver/image_processing_perceiver.py +25 -22
  976. transformers/models/perceiver/image_processing_perceiver_fast.py +9 -7
  977. transformers/models/perceiver/modeling_perceiver.py +146 -165
  978. transformers/models/perceiver/tokenization_perceiver.py +6 -3
  979. transformers/models/perception_lm/configuration_perception_lm.py +1 -0
  980. transformers/models/perception_lm/image_processing_perception_lm_fast.py +10 -8
  981. transformers/models/perception_lm/modeling_perception_lm.py +70 -71
  982. transformers/models/perception_lm/modular_perception_lm.py +61 -65
  983. transformers/models/perception_lm/processing_perception_lm.py +47 -13
  984. transformers/models/perception_lm/video_processing_perception_lm.py +1 -0
  985. transformers/models/persimmon/configuration_persimmon.py +28 -23
  986. transformers/models/persimmon/modeling_persimmon.py +45 -43
  987. transformers/models/phi/configuration_phi.py +28 -23
  988. transformers/models/phi/modeling_phi.py +43 -40
  989. transformers/models/phi/modular_phi.py +24 -23
  990. transformers/models/phi3/configuration_phi3.py +33 -28
  991. transformers/models/phi3/modeling_phi3.py +38 -36
  992. transformers/models/phi3/modular_phi3.py +17 -13
  993. transformers/models/phi4_multimodal/configuration_phi4_multimodal.py +33 -30
  994. transformers/models/phi4_multimodal/feature_extraction_phi4_multimodal.py +9 -7
  995. transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +11 -11
  996. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +78 -95
  997. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +80 -98
  998. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +44 -7
  999. transformers/models/phimoe/configuration_phimoe.py +36 -31
  1000. transformers/models/phimoe/modeling_phimoe.py +45 -50
  1001. transformers/models/phimoe/modular_phimoe.py +4 -3
  1002. transformers/models/phobert/tokenization_phobert.py +6 -4
  1003. transformers/models/pix2struct/configuration_pix2struct.py +10 -12
  1004. transformers/models/pix2struct/image_processing_pix2struct.py +19 -15
  1005. transformers/models/pix2struct/image_processing_pix2struct_fast.py +15 -12
  1006. transformers/models/pix2struct/modeling_pix2struct.py +52 -58
  1007. transformers/models/pix2struct/processing_pix2struct.py +30 -5
  1008. transformers/models/pixtral/configuration_pixtral.py +14 -11
  1009. transformers/models/pixtral/image_processing_pixtral.py +28 -26
  1010. transformers/models/pixtral/image_processing_pixtral_fast.py +11 -10
  1011. transformers/models/pixtral/modeling_pixtral.py +34 -28
  1012. transformers/models/pixtral/processing_pixtral.py +53 -21
  1013. transformers/models/plbart/configuration_plbart.py +5 -8
  1014. transformers/models/plbart/modeling_plbart.py +106 -119
  1015. transformers/models/plbart/modular_plbart.py +33 -39
  1016. transformers/models/plbart/tokenization_plbart.py +7 -4
  1017. transformers/models/poolformer/configuration_poolformer.py +1 -0
  1018. transformers/models/poolformer/image_processing_poolformer.py +24 -21
  1019. transformers/models/poolformer/image_processing_poolformer_fast.py +15 -13
  1020. transformers/models/poolformer/modeling_poolformer.py +13 -23
  1021. transformers/models/pop2piano/configuration_pop2piano.py +8 -7
  1022. transformers/models/pop2piano/feature_extraction_pop2piano.py +9 -6
  1023. transformers/models/pop2piano/modeling_pop2piano.py +24 -26
  1024. transformers/models/pop2piano/processing_pop2piano.py +33 -25
  1025. transformers/models/pop2piano/tokenization_pop2piano.py +23 -15
  1026. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +3 -3
  1027. transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything.py +28 -28
  1028. transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +21 -20
  1029. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +13 -16
  1030. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +13 -16
  1031. transformers/models/prophetnet/configuration_prophetnet.py +38 -37
  1032. transformers/models/prophetnet/modeling_prophetnet.py +131 -114
  1033. transformers/models/prophetnet/tokenization_prophetnet.py +16 -14
  1034. transformers/models/pvt/configuration_pvt.py +1 -0
  1035. transformers/models/pvt/image_processing_pvt.py +27 -24
  1036. transformers/models/pvt/image_processing_pvt_fast.py +2 -1
  1037. transformers/models/pvt/modeling_pvt.py +21 -21
  1038. transformers/models/pvt_v2/configuration_pvt_v2.py +4 -2
  1039. transformers/models/pvt_v2/modeling_pvt_v2.py +25 -28
  1040. transformers/models/qwen2/configuration_qwen2.py +25 -32
  1041. transformers/models/qwen2/modeling_qwen2.py +38 -36
  1042. transformers/models/qwen2/modular_qwen2.py +12 -11
  1043. transformers/models/qwen2/tokenization_qwen2.py +23 -12
  1044. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +26 -32
  1045. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +277 -340
  1046. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +211 -278
  1047. transformers/models/qwen2_5_omni/processing_qwen2_5_omni.py +49 -41
  1048. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +35 -29
  1049. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +148 -203
  1050. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +118 -93
  1051. transformers/models/qwen2_5_vl/processing_qwen2_5_vl.py +43 -7
  1052. transformers/models/qwen2_audio/configuration_qwen2_audio.py +1 -0
  1053. transformers/models/qwen2_audio/modeling_qwen2_audio.py +40 -40
  1054. transformers/models/qwen2_audio/processing_qwen2_audio.py +42 -13
  1055. transformers/models/qwen2_moe/configuration_qwen2_moe.py +35 -42
  1056. transformers/models/qwen2_moe/modeling_qwen2_moe.py +46 -51
  1057. transformers/models/qwen2_moe/modular_qwen2_moe.py +10 -7
  1058. transformers/models/qwen2_vl/configuration_qwen2_vl.py +34 -29
  1059. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +42 -41
  1060. transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +15 -12
  1061. transformers/models/qwen2_vl/modeling_qwen2_vl.py +153 -199
  1062. transformers/models/qwen2_vl/processing_qwen2_vl.py +44 -7
  1063. transformers/models/qwen2_vl/video_processing_qwen2_vl.py +18 -38
  1064. transformers/models/qwen3/configuration_qwen3.py +27 -34
  1065. transformers/models/qwen3/modeling_qwen3.py +39 -36
  1066. transformers/models/qwen3/modular_qwen3.py +6 -4
  1067. transformers/models/qwen3_moe/configuration_qwen3_moe.py +32 -39
  1068. transformers/models/qwen3_moe/modeling_qwen3_moe.py +46 -51
  1069. transformers/models/qwen3_moe/modular_qwen3_moe.py +13 -10
  1070. transformers/models/qwen3_next/configuration_qwen3_next.py +35 -45
  1071. transformers/models/qwen3_next/modeling_qwen3_next.py +51 -47
  1072. transformers/models/qwen3_next/modular_qwen3_next.py +35 -34
  1073. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +101 -135
  1074. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +252 -355
  1075. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +196 -250
  1076. transformers/models/qwen3_omni_moe/processing_qwen3_omni_moe.py +48 -40
  1077. transformers/models/qwen3_vl/configuration_qwen3_vl.py +29 -27
  1078. transformers/models/qwen3_vl/modeling_qwen3_vl.py +155 -233
  1079. transformers/models/qwen3_vl/modular_qwen3_vl.py +179 -206
  1080. transformers/models/qwen3_vl/processing_qwen3_vl.py +42 -6
  1081. transformers/models/qwen3_vl/video_processing_qwen3_vl.py +12 -10
  1082. transformers/models/qwen3_vl_moe/configuration_qwen3_vl_moe.py +30 -23
  1083. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +303 -358
  1084. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +124 -87
  1085. transformers/models/rag/configuration_rag.py +15 -6
  1086. transformers/models/rag/modeling_rag.py +130 -127
  1087. transformers/models/rag/retrieval_rag.py +5 -3
  1088. transformers/models/rag/tokenization_rag.py +50 -0
  1089. transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +30 -29
  1090. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +42 -53
  1091. transformers/models/reformer/configuration_reformer.py +8 -7
  1092. transformers/models/reformer/modeling_reformer.py +69 -80
  1093. transformers/models/reformer/tokenization_reformer.py +31 -11
  1094. transformers/models/regnet/configuration_regnet.py +1 -0
  1095. transformers/models/regnet/modeling_regnet.py +8 -15
  1096. transformers/models/rembert/configuration_rembert.py +2 -8
  1097. transformers/models/rembert/modeling_rembert.py +111 -121
  1098. transformers/models/rembert/tokenization_rembert.py +12 -2
  1099. transformers/models/resnet/configuration_resnet.py +1 -0
  1100. transformers/models/resnet/modeling_resnet.py +13 -27
  1101. transformers/models/roberta/configuration_roberta.py +3 -11
  1102. transformers/models/roberta/modeling_roberta.py +93 -94
  1103. transformers/models/roberta/modular_roberta.py +58 -58
  1104. transformers/models/roberta/tokenization_roberta.py +29 -17
  1105. transformers/models/roberta/tokenization_roberta_old.py +4 -2
  1106. transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +3 -11
  1107. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +93 -94
  1108. transformers/models/roc_bert/configuration_roc_bert.py +2 -8
  1109. transformers/models/roc_bert/modeling_roc_bert.py +121 -122
  1110. transformers/models/roc_bert/tokenization_roc_bert.py +94 -88
  1111. transformers/models/roformer/configuration_roformer.py +3 -13
  1112. transformers/models/roformer/modeling_roformer.py +81 -85
  1113. transformers/models/roformer/tokenization_roformer.py +412 -74
  1114. transformers/models/roformer/tokenization_roformer_fast.py +160 -0
  1115. transformers/models/roformer/tokenization_utils.py +1 -0
  1116. transformers/models/rt_detr/configuration_rt_detr.py +2 -1
  1117. transformers/models/rt_detr/configuration_rt_detr_resnet.py +1 -0
  1118. transformers/models/rt_detr/image_processing_rt_detr.py +55 -54
  1119. transformers/models/rt_detr/image_processing_rt_detr_fast.py +26 -26
  1120. transformers/models/rt_detr/modeling_rt_detr.py +90 -99
  1121. transformers/models/rt_detr/modeling_rt_detr_resnet.py +6 -13
  1122. transformers/models/rt_detr/modular_rt_detr.py +16 -16
  1123. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +4 -6
  1124. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +90 -101
  1125. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +12 -19
  1126. transformers/models/rwkv/configuration_rwkv.py +4 -2
  1127. transformers/models/rwkv/modeling_rwkv.py +32 -31
  1128. transformers/models/sam/configuration_sam.py +1 -3
  1129. transformers/models/sam/image_processing_sam.py +60 -59
  1130. transformers/models/sam/image_processing_sam_fast.py +27 -25
  1131. transformers/models/sam/modeling_sam.py +41 -47
  1132. transformers/models/sam/processing_sam.py +27 -39
  1133. transformers/models/sam2/configuration_sam2.py +3 -2
  1134. transformers/models/sam2/image_processing_sam2_fast.py +15 -14
  1135. transformers/models/sam2/modeling_sam2.py +90 -96
  1136. transformers/models/sam2/modular_sam2.py +91 -86
  1137. transformers/models/sam2/processing_sam2.py +47 -31
  1138. transformers/models/sam2_video/configuration_sam2_video.py +1 -0
  1139. transformers/models/sam2_video/modeling_sam2_video.py +144 -151
  1140. transformers/models/sam2_video/modular_sam2_video.py +104 -101
  1141. transformers/models/sam2_video/processing_sam2_video.py +66 -49
  1142. transformers/models/sam2_video/video_processing_sam2_video.py +4 -1
  1143. transformers/models/sam3/configuration_sam3.py +2 -21
  1144. transformers/models/sam3/image_processing_sam3_fast.py +20 -17
  1145. transformers/models/sam3/modeling_sam3.py +170 -184
  1146. transformers/models/sam3/modular_sam3.py +8 -3
  1147. transformers/models/sam3/processing_sam3.py +52 -37
  1148. transformers/models/sam3_tracker/__init__.py +1 -0
  1149. transformers/models/sam3_tracker/configuration_sam3_tracker.py +3 -1
  1150. transformers/models/sam3_tracker/modeling_sam3_tracker.py +77 -82
  1151. transformers/models/sam3_tracker/modular_sam3_tracker.py +3 -8
  1152. transformers/models/sam3_tracker/processing_sam3_tracker.py +48 -31
  1153. transformers/models/sam3_tracker_video/__init__.py +1 -0
  1154. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +1 -25
  1155. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +122 -135
  1156. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +26 -35
  1157. transformers/models/sam3_tracker_video/processing_sam3_tracker_video.py +66 -50
  1158. transformers/models/sam3_video/configuration_sam3_video.py +1 -14
  1159. transformers/models/sam3_video/modeling_sam3_video.py +34 -33
  1160. transformers/models/sam3_video/processing_sam3_video.py +46 -26
  1161. transformers/models/sam_hq/__init__.py +1 -1
  1162. transformers/models/sam_hq/configuration_sam_hq.py +1 -3
  1163. transformers/models/sam_hq/modeling_sam_hq.py +69 -74
  1164. transformers/models/sam_hq/modular_sam_hq.py +25 -23
  1165. transformers/models/sam_hq/{processing_sam_hq.py → processing_samhq.py} +29 -41
  1166. transformers/models/seamless_m4t/configuration_seamless_m4t.py +10 -8
  1167. transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py +11 -8
  1168. transformers/models/seamless_m4t/modeling_seamless_m4t.py +194 -212
  1169. transformers/models/seamless_m4t/processing_seamless_m4t.py +39 -18
  1170. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +77 -40
  1171. transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +10 -8
  1172. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +196 -204
  1173. transformers/models/seed_oss/configuration_seed_oss.py +32 -28
  1174. transformers/models/seed_oss/modeling_seed_oss.py +35 -33
  1175. transformers/models/seed_oss/modular_seed_oss.py +4 -3
  1176. transformers/models/segformer/configuration_segformer.py +10 -0
  1177. transformers/models/segformer/image_processing_segformer.py +42 -39
  1178. transformers/models/segformer/image_processing_segformer_fast.py +12 -10
  1179. transformers/models/segformer/modeling_segformer.py +31 -34
  1180. transformers/models/segformer/modular_segformer.py +10 -8
  1181. transformers/models/seggpt/configuration_seggpt.py +1 -0
  1182. transformers/models/seggpt/image_processing_seggpt.py +41 -38
  1183. transformers/models/seggpt/modeling_seggpt.py +38 -50
  1184. transformers/models/sew/configuration_sew.py +2 -4
  1185. transformers/models/sew/modeling_sew.py +36 -38
  1186. transformers/models/sew/modular_sew.py +13 -13
  1187. transformers/models/sew_d/configuration_sew_d.py +2 -4
  1188. transformers/models/sew_d/modeling_sew_d.py +30 -31
  1189. transformers/models/shieldgemma2/configuration_shieldgemma2.py +1 -0
  1190. transformers/models/shieldgemma2/modeling_shieldgemma2.py +17 -16
  1191. transformers/models/shieldgemma2/processing_shieldgemma2.py +5 -3
  1192. transformers/models/siglip/configuration_siglip.py +2 -4
  1193. transformers/models/siglip/image_processing_siglip.py +20 -17
  1194. transformers/models/siglip/image_processing_siglip_fast.py +1 -0
  1195. transformers/models/siglip/modeling_siglip.py +75 -84
  1196. transformers/models/siglip/processing_siglip.py +14 -2
  1197. transformers/models/siglip/tokenization_siglip.py +7 -6
  1198. transformers/models/siglip2/configuration_siglip2.py +2 -5
  1199. transformers/models/siglip2/image_processing_siglip2.py +16 -15
  1200. transformers/models/siglip2/image_processing_siglip2_fast.py +7 -6
  1201. transformers/models/siglip2/modeling_siglip2.py +129 -143
  1202. transformers/models/siglip2/modular_siglip2.py +46 -47
  1203. transformers/models/siglip2/processing_siglip2.py +14 -2
  1204. transformers/models/smollm3/configuration_smollm3.py +32 -29
  1205. transformers/models/smollm3/modeling_smollm3.py +39 -36
  1206. transformers/models/smollm3/modular_smollm3.py +35 -33
  1207. transformers/models/smolvlm/configuration_smolvlm.py +4 -2
  1208. transformers/models/smolvlm/image_processing_smolvlm.py +43 -42
  1209. transformers/models/smolvlm/image_processing_smolvlm_fast.py +15 -41
  1210. transformers/models/smolvlm/modeling_smolvlm.py +94 -126
  1211. transformers/models/smolvlm/modular_smolvlm.py +39 -50
  1212. transformers/models/smolvlm/processing_smolvlm.py +83 -15
  1213. transformers/models/smolvlm/video_processing_smolvlm.py +18 -16
  1214. transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py +1 -0
  1215. transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +27 -26
  1216. transformers/models/speech_to_text/configuration_speech_to_text.py +9 -9
  1217. transformers/models/speech_to_text/feature_extraction_speech_to_text.py +13 -10
  1218. transformers/models/speech_to_text/modeling_speech_to_text.py +54 -66
  1219. transformers/models/speech_to_text/processing_speech_to_text.py +30 -4
  1220. transformers/models/speech_to_text/tokenization_speech_to_text.py +6 -5
  1221. transformers/models/speecht5/configuration_speecht5.py +9 -7
  1222. transformers/models/speecht5/feature_extraction_speecht5.py +37 -16
  1223. transformers/models/speecht5/modeling_speecht5.py +175 -213
  1224. transformers/models/speecht5/number_normalizer.py +1 -0
  1225. transformers/models/speecht5/processing_speecht5.py +37 -3
  1226. transformers/models/speecht5/tokenization_speecht5.py +5 -4
  1227. transformers/models/splinter/configuration_splinter.py +7 -6
  1228. transformers/models/splinter/modeling_splinter.py +59 -71
  1229. transformers/models/splinter/tokenization_splinter.py +30 -9
  1230. transformers/models/squeezebert/configuration_squeezebert.py +2 -14
  1231. transformers/models/squeezebert/modeling_squeezebert.py +62 -68
  1232. transformers/models/squeezebert/tokenization_squeezebert.py +1 -0
  1233. transformers/models/stablelm/configuration_stablelm.py +29 -24
  1234. transformers/models/stablelm/modeling_stablelm.py +45 -44
  1235. transformers/models/starcoder2/configuration_starcoder2.py +27 -30
  1236. transformers/models/starcoder2/modeling_starcoder2.py +41 -39
  1237. transformers/models/starcoder2/modular_starcoder2.py +16 -14
  1238. transformers/models/superglue/configuration_superglue.py +3 -7
  1239. transformers/models/superglue/image_processing_superglue.py +15 -15
  1240. transformers/models/superglue/image_processing_superglue_fast.py +10 -9
  1241. transformers/models/superglue/modeling_superglue.py +37 -42
  1242. transformers/models/superpoint/image_processing_superpoint.py +15 -15
  1243. transformers/models/superpoint/image_processing_superpoint_fast.py +11 -8
  1244. transformers/models/superpoint/modeling_superpoint.py +16 -18
  1245. transformers/models/swiftformer/configuration_swiftformer.py +1 -0
  1246. transformers/models/swiftformer/modeling_swiftformer.py +14 -18
  1247. transformers/models/swin/configuration_swin.py +1 -0
  1248. transformers/models/swin/modeling_swin.py +86 -86
  1249. transformers/models/swin2sr/configuration_swin2sr.py +1 -0
  1250. transformers/models/swin2sr/image_processing_swin2sr.py +13 -10
  1251. transformers/models/swin2sr/image_processing_swin2sr_fast.py +8 -4
  1252. transformers/models/swin2sr/modeling_swin2sr.py +63 -81
  1253. transformers/models/swinv2/configuration_swinv2.py +1 -0
  1254. transformers/models/swinv2/modeling_swinv2.py +104 -108
  1255. transformers/models/switch_transformers/configuration_switch_transformers.py +7 -11
  1256. transformers/models/switch_transformers/modeling_switch_transformers.py +44 -37
  1257. transformers/models/switch_transformers/modular_switch_transformers.py +41 -34
  1258. transformers/models/t5/configuration_t5.py +8 -14
  1259. transformers/models/t5/modeling_t5.py +92 -88
  1260. transformers/models/t5/tokenization_t5.py +9 -3
  1261. transformers/models/t5gemma/configuration_t5gemma.py +41 -43
  1262. transformers/models/t5gemma/modeling_t5gemma.py +107 -104
  1263. transformers/models/t5gemma/modular_t5gemma.py +120 -124
  1264. transformers/models/t5gemma2/configuration_t5gemma2.py +120 -80
  1265. transformers/models/t5gemma2/modeling_t5gemma2.py +125 -141
  1266. transformers/models/t5gemma2/modular_t5gemma2.py +104 -393
  1267. transformers/models/table_transformer/configuration_table_transformer.py +2 -1
  1268. transformers/models/table_transformer/modeling_table_transformer.py +49 -51
  1269. transformers/models/tapas/configuration_tapas.py +2 -12
  1270. transformers/models/tapas/modeling_tapas.py +67 -68
  1271. transformers/models/tapas/tokenization_tapas.py +153 -115
  1272. transformers/models/textnet/configuration_textnet.py +1 -0
  1273. transformers/models/textnet/image_processing_textnet.py +25 -22
  1274. transformers/models/textnet/image_processing_textnet_fast.py +10 -8
  1275. transformers/models/textnet/modeling_textnet.py +16 -28
  1276. transformers/models/time_series_transformer/configuration_time_series_transformer.py +8 -5
  1277. transformers/models/time_series_transformer/modeling_time_series_transformer.py +81 -83
  1278. transformers/models/timesfm/configuration_timesfm.py +1 -0
  1279. transformers/models/timesfm/modeling_timesfm.py +22 -33
  1280. transformers/models/timesfm/modular_timesfm.py +21 -32
  1281. transformers/models/timesformer/configuration_timesformer.py +1 -0
  1282. transformers/models/timesformer/modeling_timesformer.py +16 -15
  1283. transformers/models/timm_backbone/configuration_timm_backbone.py +1 -0
  1284. transformers/models/timm_backbone/modeling_timm_backbone.py +15 -17
  1285. transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -5
  1286. transformers/models/timm_wrapper/image_processing_timm_wrapper.py +5 -4
  1287. transformers/models/timm_wrapper/modeling_timm_wrapper.py +29 -34
  1288. transformers/models/trocr/configuration_trocr.py +8 -11
  1289. transformers/models/trocr/modeling_trocr.py +44 -45
  1290. transformers/models/trocr/processing_trocr.py +25 -5
  1291. transformers/models/tvp/configuration_tvp.py +2 -5
  1292. transformers/models/tvp/image_processing_tvp.py +52 -50
  1293. transformers/models/tvp/image_processing_tvp_fast.py +15 -15
  1294. transformers/models/tvp/modeling_tvp.py +27 -27
  1295. transformers/models/tvp/processing_tvp.py +14 -2
  1296. transformers/models/udop/configuration_udop.py +7 -16
  1297. transformers/models/udop/modeling_udop.py +73 -71
  1298. transformers/models/udop/processing_udop.py +26 -7
  1299. transformers/models/udop/tokenization_udop.py +105 -84
  1300. transformers/models/umt5/configuration_umt5.py +7 -8
  1301. transformers/models/umt5/modeling_umt5.py +90 -94
  1302. transformers/models/unispeech/configuration_unispeech.py +2 -4
  1303. transformers/models/unispeech/modeling_unispeech.py +49 -51
  1304. transformers/models/unispeech/modular_unispeech.py +22 -22
  1305. transformers/models/unispeech_sat/configuration_unispeech_sat.py +2 -4
  1306. transformers/models/unispeech_sat/modeling_unispeech_sat.py +65 -69
  1307. transformers/models/unispeech_sat/modular_unispeech_sat.py +23 -23
  1308. transformers/models/univnet/feature_extraction_univnet.py +14 -14
  1309. transformers/models/univnet/modeling_univnet.py +8 -8
  1310. transformers/models/upernet/configuration_upernet.py +1 -0
  1311. transformers/models/upernet/modeling_upernet.py +13 -11
  1312. transformers/models/vaultgemma/__init__.py +1 -0
  1313. transformers/models/vaultgemma/configuration_vaultgemma.py +33 -29
  1314. transformers/models/vaultgemma/modeling_vaultgemma.py +41 -39
  1315. transformers/models/vaultgemma/modular_vaultgemma.py +31 -29
  1316. transformers/models/video_llama_3/configuration_video_llama_3.py +0 -4
  1317. transformers/models/video_llama_3/image_processing_video_llama_3.py +42 -43
  1318. transformers/models/video_llama_3/image_processing_video_llama_3_fast.py +14 -12
  1319. transformers/models/video_llama_3/modeling_video_llama_3.py +109 -157
  1320. transformers/models/video_llama_3/modular_video_llama_3.py +146 -155
  1321. transformers/models/video_llama_3/processing_video_llama_3.py +39 -5
  1322. transformers/models/video_llama_3/video_processing_video_llama_3.py +23 -42
  1323. transformers/models/video_llava/configuration_video_llava.py +1 -4
  1324. transformers/models/video_llava/image_processing_video_llava.py +38 -35
  1325. transformers/models/video_llava/modeling_video_llava.py +146 -146
  1326. transformers/models/video_llava/processing_video_llava.py +78 -38
  1327. transformers/models/video_llava/video_processing_video_llava.py +1 -0
  1328. transformers/models/videomae/configuration_videomae.py +1 -0
  1329. transformers/models/videomae/image_processing_videomae.py +34 -31
  1330. transformers/models/videomae/modeling_videomae.py +17 -14
  1331. transformers/models/videomae/video_processing_videomae.py +1 -0
  1332. transformers/models/vilt/configuration_vilt.py +4 -6
  1333. transformers/models/vilt/image_processing_vilt.py +30 -29
  1334. transformers/models/vilt/image_processing_vilt_fast.py +16 -15
  1335. transformers/models/vilt/modeling_vilt.py +90 -116
  1336. transformers/models/vilt/processing_vilt.py +14 -2
  1337. transformers/models/vipllava/configuration_vipllava.py +1 -4
  1338. transformers/models/vipllava/modeling_vipllava.py +70 -99
  1339. transformers/models/vipllava/modular_vipllava.py +54 -78
  1340. transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +1 -0
  1341. transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +27 -28
  1342. transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py +1 -0
  1343. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +41 -46
  1344. transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py +16 -2
  1345. transformers/models/visual_bert/configuration_visual_bert.py +2 -6
  1346. transformers/models/visual_bert/modeling_visual_bert.py +92 -98
  1347. transformers/models/vit/configuration_vit.py +1 -0
  1348. transformers/models/vit/image_processing_vit.py +22 -19
  1349. transformers/models/vit/image_processing_vit_fast.py +1 -0
  1350. transformers/models/vit/modeling_vit.py +17 -17
  1351. transformers/models/vit_mae/configuration_vit_mae.py +1 -0
  1352. transformers/models/vit_mae/modeling_vit_mae.py +27 -29
  1353. transformers/models/vit_msn/configuration_vit_msn.py +1 -0
  1354. transformers/models/vit_msn/modeling_vit_msn.py +16 -18
  1355. transformers/models/vitdet/configuration_vitdet.py +1 -0
  1356. transformers/models/vitdet/modeling_vitdet.py +14 -14
  1357. transformers/models/vitmatte/configuration_vitmatte.py +5 -2
  1358. transformers/models/vitmatte/image_processing_vitmatte.py +18 -15
  1359. transformers/models/vitmatte/image_processing_vitmatte_fast.py +18 -16
  1360. transformers/models/vitmatte/modeling_vitmatte.py +11 -14
  1361. transformers/models/vitpose/configuration_vitpose.py +7 -4
  1362. transformers/models/vitpose/image_processing_vitpose.py +25 -24
  1363. transformers/models/vitpose/image_processing_vitpose_fast.py +11 -9
  1364. transformers/models/vitpose/modeling_vitpose.py +14 -14
  1365. transformers/models/vitpose_backbone/configuration_vitpose_backbone.py +1 -0
  1366. transformers/models/vitpose_backbone/modeling_vitpose_backbone.py +10 -8
  1367. transformers/models/vits/configuration_vits.py +1 -4
  1368. transformers/models/vits/modeling_vits.py +42 -44
  1369. transformers/models/vits/tokenization_vits.py +4 -3
  1370. transformers/models/vivit/configuration_vivit.py +1 -0
  1371. transformers/models/vivit/image_processing_vivit.py +39 -36
  1372. transformers/models/vivit/modeling_vivit.py +8 -6
  1373. transformers/models/vjepa2/__init__.py +1 -0
  1374. transformers/models/vjepa2/configuration_vjepa2.py +1 -0
  1375. transformers/models/vjepa2/modeling_vjepa2.py +32 -31
  1376. transformers/models/vjepa2/video_processing_vjepa2.py +1 -0
  1377. transformers/models/voxtral/__init__.py +1 -0
  1378. transformers/models/voxtral/configuration_voxtral.py +2 -0
  1379. transformers/models/voxtral/modeling_voxtral.py +47 -40
  1380. transformers/models/voxtral/modular_voxtral.py +40 -37
  1381. transformers/models/voxtral/processing_voxtral.py +48 -25
  1382. transformers/models/wav2vec2/configuration_wav2vec2.py +2 -4
  1383. transformers/models/wav2vec2/feature_extraction_wav2vec2.py +10 -7
  1384. transformers/models/wav2vec2/modeling_wav2vec2.py +121 -73
  1385. transformers/models/wav2vec2/processing_wav2vec2.py +35 -6
  1386. transformers/models/wav2vec2/tokenization_wav2vec2.py +332 -20
  1387. transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +2 -4
  1388. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +62 -70
  1389. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +48 -57
  1390. transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +35 -6
  1391. transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +2 -4
  1392. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +77 -90
  1393. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +30 -37
  1394. transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +17 -16
  1395. transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +55 -36
  1396. transformers/models/wavlm/configuration_wavlm.py +2 -4
  1397. transformers/models/wavlm/modeling_wavlm.py +48 -50
  1398. transformers/models/wavlm/modular_wavlm.py +5 -4
  1399. transformers/models/whisper/configuration_whisper.py +5 -6
  1400. transformers/models/whisper/english_normalizer.py +4 -3
  1401. transformers/models/whisper/feature_extraction_whisper.py +24 -9
  1402. transformers/models/whisper/generation_whisper.py +48 -26
  1403. transformers/models/whisper/modeling_whisper.py +73 -79
  1404. transformers/models/whisper/processing_whisper.py +20 -3
  1405. transformers/models/whisper/tokenization_whisper.py +43 -11
  1406. transformers/models/x_clip/configuration_x_clip.py +2 -4
  1407. transformers/models/x_clip/modeling_x_clip.py +93 -96
  1408. transformers/models/x_clip/processing_x_clip.py +14 -2
  1409. transformers/models/xcodec/configuration_xcodec.py +6 -4
  1410. transformers/models/xcodec/modeling_xcodec.py +17 -20
  1411. transformers/models/xglm/configuration_xglm.py +8 -9
  1412. transformers/models/xglm/modeling_xglm.py +55 -60
  1413. transformers/models/xglm/tokenization_xglm.py +11 -3
  1414. transformers/models/xlm/configuration_xlm.py +8 -10
  1415. transformers/models/xlm/modeling_xlm.py +144 -144
  1416. transformers/models/xlm/tokenization_xlm.py +5 -3
  1417. transformers/models/xlm_roberta/configuration_xlm_roberta.py +3 -11
  1418. transformers/models/xlm_roberta/modeling_xlm_roberta.py +194 -195
  1419. transformers/models/xlm_roberta/modular_xlm_roberta.py +53 -50
  1420. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +18 -8
  1421. transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +2 -10
  1422. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +93 -94
  1423. transformers/models/xlm_roberta_xl/modular_xlm_roberta_xl.py +70 -67
  1424. transformers/models/xlnet/configuration_xlnet.py +12 -3
  1425. transformers/models/xlnet/modeling_xlnet.py +163 -152
  1426. transformers/models/xlnet/tokenization_xlnet.py +9 -2
  1427. transformers/models/xlstm/configuration_xlstm.py +12 -8
  1428. transformers/models/xlstm/modeling_xlstm.py +65 -62
  1429. transformers/models/xmod/configuration_xmod.py +3 -11
  1430. transformers/models/xmod/modeling_xmod.py +110 -108
  1431. transformers/models/yolos/configuration_yolos.py +1 -0
  1432. transformers/models/yolos/image_processing_yolos.py +62 -60
  1433. transformers/models/yolos/image_processing_yolos_fast.py +45 -42
  1434. transformers/models/yolos/modeling_yolos.py +16 -16
  1435. transformers/models/yolos/modular_yolos.py +19 -17
  1436. transformers/models/yoso/configuration_yoso.py +2 -8
  1437. transformers/models/yoso/modeling_yoso.py +63 -70
  1438. transformers/models/zamba/configuration_zamba.py +8 -5
  1439. transformers/models/zamba/modeling_zamba.py +78 -81
  1440. transformers/models/zamba2/configuration_zamba2.py +50 -44
  1441. transformers/models/zamba2/modeling_zamba2.py +97 -97
  1442. transformers/models/zamba2/modular_zamba2.py +48 -46
  1443. transformers/models/zoedepth/configuration_zoedepth.py +2 -1
  1444. transformers/models/zoedepth/image_processing_zoedepth.py +29 -28
  1445. transformers/models/zoedepth/image_processing_zoedepth_fast.py +24 -21
  1446. transformers/models/zoedepth/modeling_zoedepth.py +18 -26
  1447. transformers/pipelines/__init__.py +114 -57
  1448. transformers/pipelines/any_to_any.py +22 -14
  1449. transformers/pipelines/audio_utils.py +2 -1
  1450. transformers/pipelines/automatic_speech_recognition.py +12 -20
  1451. transformers/pipelines/base.py +27 -15
  1452. transformers/{models/pe_audio/processing_pe_audio.py → pipelines/deprecated/__init__.py} +3 -10
  1453. transformers/pipelines/deprecated/text2text_generation.py +408 -0
  1454. transformers/pipelines/document_question_answering.py +2 -4
  1455. transformers/pipelines/image_text_to_text.py +1 -0
  1456. transformers/pipelines/image_to_text.py +229 -0
  1457. transformers/pipelines/question_answering.py +44 -5
  1458. transformers/pipelines/text_classification.py +14 -1
  1459. transformers/pipelines/text_generation.py +1 -1
  1460. transformers/pipelines/text_to_audio.py +2 -2
  1461. transformers/pipelines/token_classification.py +22 -1
  1462. transformers/pipelines/video_classification.py +9 -1
  1463. transformers/pipelines/zero_shot_audio_classification.py +1 -0
  1464. transformers/pipelines/zero_shot_classification.py +6 -0
  1465. transformers/pipelines/zero_shot_image_classification.py +7 -0
  1466. transformers/processing_utils.py +145 -230
  1467. transformers/quantizers/auto.py +4 -2
  1468. transformers/quantizers/base.py +173 -53
  1469. transformers/quantizers/quantizer_aqlm.py +23 -2
  1470. transformers/quantizers/quantizer_auto_round.py +12 -2
  1471. transformers/quantizers/quantizer_awq.py +89 -20
  1472. transformers/quantizers/quantizer_bitnet.py +14 -4
  1473. transformers/quantizers/quantizer_bnb_4bit.py +155 -18
  1474. transformers/quantizers/quantizer_bnb_8bit.py +110 -24
  1475. transformers/quantizers/quantizer_compressed_tensors.py +9 -2
  1476. transformers/quantizers/quantizer_eetq.py +74 -16
  1477. transformers/quantizers/quantizer_fbgemm_fp8.py +138 -38
  1478. transformers/quantizers/quantizer_finegrained_fp8.py +113 -26
  1479. transformers/quantizers/quantizer_fp_quant.py +82 -52
  1480. transformers/quantizers/quantizer_gptq.py +28 -8
  1481. transformers/quantizers/quantizer_higgs.py +60 -42
  1482. transformers/quantizers/quantizer_hqq.py +153 -144
  1483. transformers/quantizers/quantizer_mxfp4.py +194 -14
  1484. transformers/quantizers/quantizer_quanto.py +79 -35
  1485. transformers/quantizers/quantizer_quark.py +18 -36
  1486. transformers/quantizers/quantizer_spqr.py +12 -4
  1487. transformers/quantizers/quantizer_torchao.py +325 -50
  1488. transformers/quantizers/quantizer_vptq.py +27 -4
  1489. transformers/quantizers/quantizers_utils.py +0 -20
  1490. transformers/safetensors_conversion.py +3 -9
  1491. transformers/testing_utils.py +82 -326
  1492. transformers/tokenization_mistral_common.py +903 -568
  1493. transformers/tokenization_utils_base.py +340 -220
  1494. transformers/tokenization_utils_sentencepiece.py +6 -5
  1495. transformers/tokenization_utils_tokenizers.py +113 -226
  1496. transformers/trainer.py +53 -60
  1497. transformers/trainer_callback.py +0 -8
  1498. transformers/trainer_seq2seq.py +1 -5
  1499. transformers/trainer_utils.py +1 -1
  1500. transformers/training_args.py +41 -77
  1501. transformers/utils/__init__.py +4 -8
  1502. transformers/utils/attention_visualizer.py +5 -5
  1503. transformers/utils/auto_docstring.py +37 -599
  1504. transformers/utils/doc.py +36 -4
  1505. transformers/utils/dummy_pt_objects.py +42 -0
  1506. transformers/utils/generic.py +28 -111
  1507. transformers/utils/hub.py +15 -5
  1508. transformers/utils/import_utils.py +32 -165
  1509. transformers/utils/kernel_config.py +19 -74
  1510. transformers/utils/loading_report.py +15 -25
  1511. transformers/utils/quantization_config.py +241 -72
  1512. transformers/video_processing_utils.py +39 -41
  1513. transformers/video_utils.py +22 -18
  1514. {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/METADATA +236 -284
  1515. transformers-5.0.0rc0.dist-info/RECORD +1987 -0
  1516. {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/WHEEL +1 -1
  1517. transformers/integrations/moe.py +0 -360
  1518. transformers/integrations/quark.py +0 -53
  1519. transformers/loss/loss_lw_detr.py +0 -356
  1520. transformers/models/ernie4_5_vl_moe/__init__.py +0 -31
  1521. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +0 -340
  1522. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +0 -455
  1523. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +0 -231
  1524. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +0 -1936
  1525. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +0 -1925
  1526. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +0 -249
  1527. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +0 -593
  1528. transformers/models/fast_vlm/__init__.py +0 -27
  1529. transformers/models/fast_vlm/configuration_fast_vlm.py +0 -137
  1530. transformers/models/fast_vlm/modeling_fast_vlm.py +0 -432
  1531. transformers/models/fast_vlm/modular_fast_vlm.py +0 -373
  1532. transformers/models/glm4_moe_lite/__init__.py +0 -28
  1533. transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +0 -233
  1534. transformers/models/glm4_moe_lite/modeling_glm4_moe_lite.py +0 -740
  1535. transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +0 -302
  1536. transformers/models/glm_image/__init__.py +0 -31
  1537. transformers/models/glm_image/configuration_glm_image.py +0 -351
  1538. transformers/models/glm_image/image_processing_glm_image.py +0 -503
  1539. transformers/models/glm_image/image_processing_glm_image_fast.py +0 -294
  1540. transformers/models/glm_image/modeling_glm_image.py +0 -1642
  1541. transformers/models/glm_image/modular_glm_image.py +0 -1531
  1542. transformers/models/glm_image/processing_glm_image.py +0 -217
  1543. transformers/models/glmasr/__init__.py +0 -29
  1544. transformers/models/glmasr/configuration_glmasr.py +0 -196
  1545. transformers/models/glmasr/modeling_glmasr.py +0 -517
  1546. transformers/models/glmasr/modular_glmasr.py +0 -443
  1547. transformers/models/glmasr/processing_glmasr.py +0 -331
  1548. transformers/models/jais2/__init__.py +0 -27
  1549. transformers/models/jais2/configuration_jais2.py +0 -148
  1550. transformers/models/jais2/modeling_jais2.py +0 -484
  1551. transformers/models/jais2/modular_jais2.py +0 -194
  1552. transformers/models/lasr/__init__.py +0 -29
  1553. transformers/models/lasr/configuration_lasr.py +0 -244
  1554. transformers/models/lasr/feature_extraction_lasr.py +0 -275
  1555. transformers/models/lasr/modeling_lasr.py +0 -727
  1556. transformers/models/lasr/modular_lasr.py +0 -574
  1557. transformers/models/lasr/processing_lasr.py +0 -100
  1558. transformers/models/lasr/tokenization_lasr.py +0 -184
  1559. transformers/models/lighton_ocr/__init__.py +0 -28
  1560. transformers/models/lighton_ocr/configuration_lighton_ocr.py +0 -128
  1561. transformers/models/lighton_ocr/modeling_lighton_ocr.py +0 -463
  1562. transformers/models/lighton_ocr/modular_lighton_ocr.py +0 -404
  1563. transformers/models/lighton_ocr/processing_lighton_ocr.py +0 -229
  1564. transformers/models/lw_detr/__init__.py +0 -27
  1565. transformers/models/lw_detr/configuration_lw_detr.py +0 -374
  1566. transformers/models/lw_detr/modeling_lw_detr.py +0 -1702
  1567. transformers/models/lw_detr/modular_lw_detr.py +0 -1615
  1568. transformers/models/minimax_m2/__init__.py +0 -28
  1569. transformers/models/minimax_m2/configuration_minimax_m2.py +0 -188
  1570. transformers/models/minimax_m2/modeling_minimax_m2.py +0 -704
  1571. transformers/models/minimax_m2/modular_minimax_m2.py +0 -346
  1572. transformers/models/paddleocr_vl/__init__.py +0 -31
  1573. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +0 -335
  1574. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +0 -503
  1575. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +0 -209
  1576. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +0 -1683
  1577. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +0 -1380
  1578. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +0 -133
  1579. transformers/models/pe_audio/__init__.py +0 -29
  1580. transformers/models/pe_audio/configuration_pe_audio.py +0 -204
  1581. transformers/models/pe_audio/feature_extraction_pe_audio.py +0 -160
  1582. transformers/models/pe_audio/modeling_pe_audio.py +0 -819
  1583. transformers/models/pe_audio/modular_pe_audio.py +0 -298
  1584. transformers/models/pe_audio_video/__init__.py +0 -28
  1585. transformers/models/pe_audio_video/configuration_pe_audio_video.py +0 -223
  1586. transformers/models/pe_audio_video/modeling_pe_audio_video.py +0 -971
  1587. transformers/models/pe_audio_video/modular_pe_audio_video.py +0 -763
  1588. transformers/models/pe_video/__init__.py +0 -29
  1589. transformers/models/pe_video/configuration_pe_video.py +0 -209
  1590. transformers/models/pe_video/modeling_pe_video.py +0 -647
  1591. transformers/models/pe_video/modular_pe_video.py +0 -231
  1592. transformers/models/pe_video/processing_pe_video.py +0 -10
  1593. transformers/models/pe_video/video_processing_pe_video.py +0 -64
  1594. transformers/models/pixio/__init__.py +0 -29
  1595. transformers/models/pixio/configuration_pixio.py +0 -150
  1596. transformers/models/pixio/modeling_pixio.py +0 -507
  1597. transformers/models/pixio/modular_pixio.py +0 -403
  1598. transformers/models/solar_open/__init__.py +0 -27
  1599. transformers/models/solar_open/configuration_solar_open.py +0 -184
  1600. transformers/models/solar_open/modeling_solar_open.py +0 -642
  1601. transformers/models/solar_open/modular_solar_open.py +0 -224
  1602. transformers/trainer_jit_checkpoint.py +0 -125
  1603. transformers-5.0.0.dist-info/RECORD +0 -2068
  1604. {transformers-5.0.0.dist-info/licenses → transformers-5.0.0rc0.dist-info}/LICENSE +0 -0
  1605. {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/entry_points.txt +0 -0
  1606. {transformers-5.0.0.dist-info → transformers-5.0.0rc0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  # base
2
+ # coding=utf-8
2
3
  # Copyright 2020 The HuggingFace Inc. team.
3
4
  #
4
5
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,21 +19,19 @@ fronting encoding methods) Special token mixing (host the special tokens logic)
18
19
  of output with special method for the Fast tokenizers)
19
20
  """
20
21
 
21
- from __future__ import annotations
22
-
23
22
  import copy
24
23
  import json
25
24
  import os
26
25
  import re
27
26
  import warnings
28
27
  from collections import OrderedDict, UserDict
29
- from collections.abc import Callable, Collection, Mapping, Sequence, Sized
28
+ from collections.abc import Callable, Mapping, Sequence, Sized
30
29
  from dataclasses import dataclass
31
30
  from pathlib import Path
32
- from typing import TYPE_CHECKING, Any, NamedTuple, Union
31
+ from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union
33
32
 
34
33
  import numpy as np
35
- from huggingface_hub import create_repo, is_offline_mode, list_repo_files
34
+ from huggingface_hub import create_repo, list_repo_files
36
35
  from packaging import version
37
36
 
38
37
  from . import __version__
@@ -50,6 +49,7 @@ from .utils import (
50
49
  extract_commit_hash,
51
50
  is_mlx_available,
52
51
  is_numpy_array,
52
+ is_offline_mode,
53
53
  is_protobuf_available,
54
54
  is_tokenizers_available,
55
55
  is_torch_available,
@@ -60,7 +60,6 @@ from .utils import (
60
60
  requires_backends,
61
61
  to_py_obj,
62
62
  )
63
- from .utils.chat_parsing_utils import recursive_parse
64
63
  from .utils.chat_template_utils import render_jinja_template
65
64
  from .utils.import_utils import PROTOBUF_IMPORT_ERROR
66
65
 
@@ -218,11 +217,11 @@ class BatchEncoding(UserDict):
218
217
 
219
218
  def __init__(
220
219
  self,
221
- data: dict[str, Any] | None = None,
222
- encoding: EncodingFast | Sequence[EncodingFast] | None = None,
223
- tensor_type: None | str | TensorType = None,
220
+ data: Optional[dict[str, Any]] = None,
221
+ encoding: Optional[Union[EncodingFast, Sequence[EncodingFast]]] = None,
222
+ tensor_type: Union[None, str, TensorType] = None,
224
223
  prepend_batch_axis: bool = False,
225
- n_sequences: int | None = None,
224
+ n_sequences: Optional[int] = None,
226
225
  ):
227
226
  super().__init__(data)
228
227
 
@@ -240,7 +239,7 @@ class BatchEncoding(UserDict):
240
239
  self.convert_to_tensors(tensor_type=tensor_type, prepend_batch_axis=prepend_batch_axis)
241
240
 
242
241
  @property
243
- def n_sequences(self) -> int | None:
242
+ def n_sequences(self) -> Optional[int]:
244
243
  """
245
244
  `Optional[int]`: The number of sequences used to generate each sample from the batch encoded in this
246
245
  [`BatchEncoding`]. Currently can be one of `None` (unknown), `1` (a single sentence) or `2` (a pair of
@@ -248,7 +247,7 @@ class BatchEncoding(UserDict):
248
247
  """
249
248
  return self._n_sequences
250
249
 
251
- def __getitem__(self, item: int | str) -> Any | EncodingFast:
250
+ def __getitem__(self, item: Union[int, str]) -> Union[Any, EncodingFast]:
252
251
  """
253
252
  If the key is a string, returns the value of the dict associated to `key` ('input_ids', 'attention_mask',
254
253
  etc.).
@@ -298,7 +297,7 @@ class BatchEncoding(UserDict):
298
297
  return self._encodings is not None
299
298
 
300
299
  @property
301
- def encodings(self) -> list[EncodingFast] | None:
300
+ def encodings(self) -> Optional[list[EncodingFast]]:
302
301
  """
303
302
  `Optional[list[tokenizers.Encoding]]`: The list all encodings from the tokenization process. Returns `None` if
304
303
  the input was tokenized through Python (i.e., not a fast) tokenizer.
@@ -323,7 +322,7 @@ class BatchEncoding(UserDict):
323
322
  )
324
323
  return self._encodings[batch_index].tokens
325
324
 
326
- def sequence_ids(self, batch_index: int = 0) -> list[int | None]:
325
+ def sequence_ids(self, batch_index: int = 0) -> list[Optional[int]]:
327
326
  """
328
327
  Return a list mapping the tokens to the id of their original sentences:
329
328
 
@@ -347,7 +346,7 @@ class BatchEncoding(UserDict):
347
346
  )
348
347
  return self._encodings[batch_index].sequence_ids
349
348
 
350
- def word_ids(self, batch_index: int = 0) -> list[int | None]:
349
+ def word_ids(self, batch_index: int = 0) -> list[Optional[int]]:
351
350
  """
352
351
  Return a list mapping the tokens to their actual word in the initial sentence for a fast tokenizer.
353
352
 
@@ -366,7 +365,7 @@ class BatchEncoding(UserDict):
366
365
  )
367
366
  return self._encodings[batch_index].word_ids
368
367
 
369
- def token_to_sequence(self, batch_or_token_index: int, token_index: int | None = None) -> int:
368
+ def token_to_sequence(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int:
370
369
  """
371
370
  Get the index of the sequence represented by the given token. In the general use case, this method returns `0`
372
371
  for a single sequence or the first sequence of a pair, and `1` for the second sequence of a pair
@@ -405,7 +404,7 @@ class BatchEncoding(UserDict):
405
404
  token_index = self._seq_len + token_index
406
405
  return self._encodings[batch_index].token_to_sequence(token_index)
407
406
 
408
- def token_to_word(self, batch_or_token_index: int, token_index: int | None = None) -> int:
407
+ def token_to_word(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int:
409
408
  """
410
409
  Get the index of the word corresponding (i.e. comprising) to an encoded token in a sequence of the batch.
411
410
 
@@ -444,8 +443,8 @@ class BatchEncoding(UserDict):
444
443
  return self._encodings[batch_index].token_to_word(token_index)
445
444
 
446
445
  def word_to_tokens(
447
- self, batch_or_word_index: int, word_index: int | None = None, sequence_index: int = 0
448
- ) -> TokenSpan | None:
446
+ self, batch_or_word_index: int, word_index: Optional[int] = None, sequence_index: int = 0
447
+ ) -> Optional[TokenSpan]:
449
448
  """
450
449
  Get the encoded token span corresponding to a word in a sequence of the batch.
451
450
 
@@ -496,7 +495,7 @@ class BatchEncoding(UserDict):
496
495
  span = self._encodings[batch_index].word_to_tokens(word_index, sequence_index)
497
496
  return TokenSpan(*span) if span is not None else None
498
497
 
499
- def token_to_chars(self, batch_or_token_index: int, token_index: int | None = None) -> CharSpan | None:
498
+ def token_to_chars(self, batch_or_token_index: int, token_index: Optional[int] = None) -> Optional[CharSpan]:
500
499
  """
501
500
  Get the character span corresponding to an encoded token in a sequence of the batch.
502
501
 
@@ -535,7 +534,9 @@ class BatchEncoding(UserDict):
535
534
 
536
535
  return CharSpan(*span_indices) if span_indices is not None else None
537
536
 
538
- def char_to_token(self, batch_or_char_index: int, char_index: int | None = None, sequence_index: int = 0) -> int:
537
+ def char_to_token(
538
+ self, batch_or_char_index: int, char_index: Optional[int] = None, sequence_index: int = 0
539
+ ) -> int:
539
540
  """
540
541
  Get the index of the token in the encoded output comprising a character in the original string for a sequence
541
542
  of the batch.
@@ -576,7 +577,7 @@ class BatchEncoding(UserDict):
576
577
  return self._encodings[batch_index].char_to_token(char_index, sequence_index)
577
578
 
578
579
  def word_to_chars(
579
- self, batch_or_word_index: int, word_index: int | None = None, sequence_index: int = 0
580
+ self, batch_or_word_index: int, word_index: Optional[int] = None, sequence_index: int = 0
580
581
  ) -> CharSpan:
581
582
  """
582
583
  Get the character span in the original string corresponding to given word in a sequence of the batch.
@@ -620,7 +621,7 @@ class BatchEncoding(UserDict):
620
621
  word_index = batch_or_word_index
621
622
  return CharSpan(*(self._encodings[batch_index].word_to_chars(word_index, sequence_index)))
622
623
 
623
- def char_to_word(self, batch_or_char_index: int, char_index: int | None = None, sequence_index: int = 0) -> int:
624
+ def char_to_word(self, batch_or_char_index: int, char_index: Optional[int] = None, sequence_index: int = 0) -> int:
624
625
  """
625
626
  Get the word in the original string corresponding to a character in the original string of a sequence of the
626
627
  batch.
@@ -659,7 +660,9 @@ class BatchEncoding(UserDict):
659
660
  char_index = batch_or_char_index
660
661
  return self._encodings[batch_index].char_to_word(char_index, sequence_index)
661
662
 
662
- def convert_to_tensors(self, tensor_type: str | TensorType | None = None, prepend_batch_axis: bool = False):
663
+ def convert_to_tensors(
664
+ self, tensor_type: Optional[Union[str, TensorType]] = None, prepend_batch_axis: bool = False
665
+ ):
663
666
  """
664
667
  Convert the inner content to tensors.
665
668
 
@@ -753,7 +756,7 @@ class BatchEncoding(UserDict):
753
756
 
754
757
  return self
755
758
 
756
- def to(self, device: str | torch.device, *, non_blocking: bool = False) -> BatchEncoding:
759
+ def to(self, device: Union[str, "torch.device"], *, non_blocking: bool = False) -> "BatchEncoding":
757
760
  """
758
761
  Send all values to device by calling `v.to(device, non_blocking=non_blocking)` (PyTorch only).
759
762
 
@@ -963,11 +966,11 @@ class PreTrainedTokenizerBase(PushToHubMixin):
963
966
 
964
967
  vocab_files_names: dict[str, str] = {}
965
968
  pretrained_vocab_files_map: dict[str, dict[str, str]] = {}
966
- _auto_class: str | None = None
969
+ _auto_class: Optional[str] = None
967
970
 
968
971
  # first name has to correspond to main model input name
969
972
  # to make sure `tokenizer.pad(...)` works correctly
970
- model_input_names: list[str] = ["input_ids", "attention_mask"]
973
+ model_input_names: list[str] = ["input_ids", "token_type_ids", "attention_mask"]
971
974
  padding_side: str = "right"
972
975
  truncation_side: str = "right"
973
976
  slow_tokenizer_class = None
@@ -1099,7 +1102,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1099
1102
  # ---- Special tokens API (moved from SpecialTokensMixin) ----
1100
1103
  def add_special_tokens(
1101
1104
  self,
1102
- special_tokens_dict: dict[str, str | AddedToken | Sequence[str | AddedToken]],
1105
+ special_tokens_dict: dict[str, Union[str, AddedToken, Sequence[Union[str, AddedToken]]]],
1103
1106
  replace_extra_special_tokens=True,
1104
1107
  ) -> int:
1105
1108
  """
@@ -1203,7 +1206,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1203
1206
  return self.add_tokens(tokens_to_add, special_tokens=True)
1204
1207
 
1205
1208
  def add_tokens(
1206
- self, new_tokens: str | AddedToken | Sequence[str | AddedToken], special_tokens: bool = False
1209
+ self, new_tokens: Union[str, AddedToken, Sequence[Union[str, AddedToken]]], special_tokens: bool = False
1207
1210
  ) -> int:
1208
1211
  """
1209
1212
  #TODO remove this from here! PreTrainedTOkeniuzerBase should be agnostic of AddedToken.
@@ -1243,7 +1246,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1243
1246
  new_tokens = [new_tokens]
1244
1247
  return self._add_tokens(new_tokens, special_tokens=special_tokens)
1245
1248
 
1246
- def _add_tokens(self, new_tokens: list[str] | list[AddedToken], special_tokens: bool = False) -> int:
1249
+ def _add_tokens(self, new_tokens: Union[list[str], list[AddedToken]], special_tokens: bool = False) -> int:
1247
1250
  raise NotImplementedError
1248
1251
 
1249
1252
  @property
@@ -1328,7 +1331,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1328
1331
  return super().__getattr__(key)
1329
1332
 
1330
1333
  def get_special_tokens_mask(
1331
- self, token_ids_0: list[int], token_ids_1: list[int] | None = None, already_has_special_tokens: bool = False
1334
+ self, token_ids_0: list[int], token_ids_1: Optional[list[int]] = None, already_has_special_tokens: bool = False
1332
1335
  ) -> list[int]:
1333
1336
  """
1334
1337
  Retrieve sequence ids from a token list that has no special tokens added.
@@ -1417,7 +1420,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1417
1420
  """
1418
1421
  return self.convert_tokens_to_ids(self.all_special_tokens)
1419
1422
 
1420
- def _set_model_specific_special_tokens(self, special_tokens: dict[str, str | AddedToken]):
1423
+ def _set_model_specific_special_tokens(self, special_tokens: dict[str, Union[str, AddedToken]]):
1421
1424
  """
1422
1425
  Adds new model-specific special tokens (e.g., for multimodal models).
1423
1426
 
@@ -1470,7 +1473,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1470
1473
  """
1471
1474
  raise NotImplementedError()
1472
1475
 
1473
- def convert_tokens_to_ids(self, tokens: str | list[str]) -> int | list[int]:
1476
+ def convert_tokens_to_ids(self, tokens: Union[str, list[str]]) -> Union[int, list[int]]:
1474
1477
  """
1475
1478
  Converts a token string (or a sequence of tokens) in a single integer id (or a sequence of ids), using the
1476
1479
  vocabulary.
@@ -1486,7 +1489,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1486
1489
 
1487
1490
  return [self._convert_token_to_id_with_added_voc(token) for token in tokens]
1488
1491
 
1489
- def convert_ids_to_tokens(self, ids: int | list[int], skip_special_tokens: bool = False) -> str | list[str]:
1492
+ def convert_ids_to_tokens(
1493
+ self, ids: Union[int, list[int]], skip_special_tokens: bool = False
1494
+ ) -> Union[str, list[str]]:
1490
1495
  """
1491
1496
  Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
1492
1497
  added tokens.
@@ -1505,12 +1510,12 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1505
1510
  @classmethod
1506
1511
  def from_pretrained(
1507
1512
  cls,
1508
- pretrained_model_name_or_path: str | os.PathLike,
1513
+ pretrained_model_name_or_path: Union[str, os.PathLike],
1509
1514
  *init_inputs,
1510
- cache_dir: str | os.PathLike | None = None,
1515
+ cache_dir: Optional[Union[str, os.PathLike]] = None,
1511
1516
  force_download: bool = False,
1512
1517
  local_files_only: bool = False,
1513
- token: str | bool | None = None,
1518
+ token: Optional[Union[str, bool]] = None,
1514
1519
  revision: str = "main",
1515
1520
  trust_remote_code=False,
1516
1521
  **kwargs,
@@ -1607,7 +1612,6 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1607
1612
 
1608
1613
  pretrained_model_name_or_path = str(pretrained_model_name_or_path)
1609
1614
  vocab_files = {}
1610
- additional_files_names = {}
1611
1615
  init_configuration = {}
1612
1616
 
1613
1617
  is_local = os.path.isdir(pretrained_model_name_or_path)
@@ -1625,9 +1629,11 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1625
1629
  f"Calling {cls.__name__}.from_pretrained() with the path to a single file or url is not "
1626
1630
  "supported for this tokenizer. Use a model identifier or the path to a directory instead."
1627
1631
  )
1628
- file_id = "vocab_file"
1629
- if pretrained_model_name_or_path.endswith("tokenizer.json"):
1630
- file_id = "tokenizer_file"
1632
+ # Use first vocab file that's not tokenizer_file
1633
+ file_id = list(cls.vocab_files_names.keys())[0]
1634
+ if file_id == "tokenizer_file" and vocab_files_count > 1:
1635
+ file_id = [k for k in cls.vocab_files_names.keys() if k != "tokenizer_file"][0]
1636
+
1631
1637
  vocab_files[file_id] = pretrained_model_name_or_path
1632
1638
  single_file_id = file_id
1633
1639
  else:
@@ -1645,10 +1651,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1645
1651
  }
1646
1652
 
1647
1653
  vocab_files = {**cls.vocab_files_names, **additional_files_names}
1648
-
1649
- # Check for versioned tokenizer files
1650
1654
  if "tokenizer_file" in vocab_files:
1655
+ # Try to get the tokenizer config to see if there are versioned tokenizer files.
1651
1656
  fast_tokenizer_file = FULL_TOKENIZER_FILE
1657
+
1652
1658
  try:
1653
1659
  resolved_config_file = cached_file(
1654
1660
  pretrained_model_name_or_path,
@@ -1664,33 +1670,43 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1664
1670
  _raise_exceptions_for_missing_entries=False,
1665
1671
  _commit_hash=commit_hash,
1666
1672
  )
1667
- if resolved_config_file is not None:
1668
- with open(resolved_config_file, encoding="utf-8") as reader:
1669
- tokenizer_config = json.load(reader)
1670
- if "fast_tokenizer_files" in tokenizer_config:
1671
- fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
1672
- commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
1673
+ except OSError:
1674
+ # Re-raise any error raised by cached_file in order to get a helpful error message
1675
+ raise
1673
1676
  except Exception:
1674
- pass
1677
+ # For any other exception, we throw a generic error.
1678
+ raise OSError(
1679
+ f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from "
1680
+ "'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
1681
+ f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
1682
+ f"containing all relevant files for a {cls.__name__} tokenizer."
1683
+ )
1684
+
1685
+ commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
1686
+ if resolved_config_file is not None:
1687
+ with open(resolved_config_file, encoding="utf-8") as reader:
1688
+ tokenizer_config = json.load(reader)
1689
+ if "fast_tokenizer_files" in tokenizer_config:
1690
+ fast_tokenizer_file = get_fast_tokenizer_file(tokenizer_config["fast_tokenizer_files"])
1675
1691
  vocab_files["tokenizer_file"] = fast_tokenizer_file
1676
1692
 
1677
- # This block looks for any extra chat template files
1678
- if is_local:
1679
- template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR)
1680
- if template_dir.is_dir():
1681
- for template_file in template_dir.glob("*.jinja"):
1682
- template_name = template_file.name.removesuffix(".jinja")
1683
- vocab_files[f"chat_template_{template_name}"] = f"{CHAT_TEMPLATE_DIR}/{template_file.name}"
1684
- else:
1685
- for template in list_repo_templates(
1686
- pretrained_model_name_or_path,
1687
- local_files_only=local_files_only,
1688
- revision=revision,
1689
- cache_dir=cache_dir,
1690
- token=token,
1691
- ):
1692
- template = template.removesuffix(".jinja")
1693
- vocab_files[f"chat_template_{template}"] = f"{CHAT_TEMPLATE_DIR}/{template}.jinja"
1693
+ # This block looks for any extra chat template files
1694
+ if is_local:
1695
+ template_dir = Path(pretrained_model_name_or_path, CHAT_TEMPLATE_DIR)
1696
+ if template_dir.is_dir():
1697
+ for template_file in template_dir.glob("*.jinja"):
1698
+ template_name = template_file.name.removesuffix(".jinja")
1699
+ vocab_files[f"chat_template_{template_name}"] = f"{CHAT_TEMPLATE_DIR}/{template_file.name}"
1700
+ else:
1701
+ for template in list_repo_templates(
1702
+ pretrained_model_name_or_path,
1703
+ local_files_only=local_files_only,
1704
+ revision=revision,
1705
+ cache_dir=cache_dir,
1706
+ token=token,
1707
+ ):
1708
+ template = template.removesuffix(".jinja")
1709
+ vocab_files[f"chat_template_{template}"] = f"{CHAT_TEMPLATE_DIR}/{template}.jinja"
1694
1710
 
1695
1711
  remote_files = []
1696
1712
  if not is_local and not local_files_only:
@@ -1748,6 +1764,11 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1748
1764
  if file_id not in resolved_vocab_files:
1749
1765
  continue
1750
1766
 
1767
+ if is_local:
1768
+ logger.info(f"loading file {file_path}")
1769
+ else:
1770
+ logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
1771
+
1751
1772
  return cls._from_pretrained(
1752
1773
  resolved_vocab_files,
1753
1774
  pretrained_model_name_or_path,
@@ -1777,6 +1798,29 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1777
1798
  trust_remote_code=False,
1778
1799
  **kwargs,
1779
1800
  ):
1801
+ # We instantiate fast tokenizers based on a slow tokenizer if we don't have access to the tokenizer.json
1802
+ # file or if `from_slow` is set to True.
1803
+ from_slow = kwargs.get("from_slow", False)
1804
+ gguf_file = kwargs.get("gguf_file")
1805
+ has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
1806
+
1807
+ # If one passes a GGUF file path to `gguf_file` there is no need for this check as the tokenizer will be
1808
+ # loaded directly from the GGUF file.
1809
+ if (from_slow or not has_tokenizer_file) and cls.slow_tokenizer_class is not None and not gguf_file:
1810
+ slow_tokenizer = (cls.slow_tokenizer_class)._from_pretrained(
1811
+ copy.deepcopy(resolved_vocab_files),
1812
+ pretrained_model_name_or_path,
1813
+ copy.deepcopy(init_configuration),
1814
+ *init_inputs,
1815
+ token=token,
1816
+ cache_dir=cache_dir,
1817
+ local_files_only=local_files_only,
1818
+ _commit_hash=_commit_hash,
1819
+ **(copy.deepcopy(kwargs)),
1820
+ )
1821
+ else:
1822
+ slow_tokenizer = None
1823
+
1780
1824
  # Prepare tokenizer initialization kwargs
1781
1825
  # Did we saved some inputs and kwargs to reload ?
1782
1826
  tokenizer_config_file = resolved_vocab_files.pop("tokenizer_config_file", None)
@@ -1785,16 +1829,14 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1785
1829
  init_kwargs = json.load(tokenizer_config_handle)
1786
1830
  # used in the past to check if the tokenizer class matches the class in the repo
1787
1831
  init_kwargs.pop("tokenizer_class", None)
1832
+ if not has_tokenizer_file:
1833
+ init_kwargs.get("tokenizer_file", None)
1788
1834
  saved_init_inputs = init_kwargs.pop("init_inputs", ())
1789
1835
  if not init_inputs:
1790
1836
  init_inputs = saved_init_inputs
1791
1837
  else:
1792
1838
  init_kwargs = init_configuration
1793
1839
 
1794
- if resolved_vocab_files.get("tokenizer_file", None) is not None:
1795
- init_kwargs.pop("add_bos_token", None)
1796
- init_kwargs.pop("add_eos_token", None)
1797
-
1798
1840
  # If independent chat template file(s) exist, they take priority over template entries in the tokenizer config
1799
1841
  chat_templates = {}
1800
1842
  chat_template_file = resolved_vocab_files.pop("chat_template_file", None)
@@ -1875,6 +1917,8 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1875
1917
  init_kwargs[args_name] = file_path
1876
1918
  tokenizer_file = resolved_vocab_files.get("tokenizer_file", None)
1877
1919
 
1920
+ if slow_tokenizer is not None:
1921
+ init_kwargs["__slow_tokenizer"] = slow_tokenizer
1878
1922
  init_kwargs["name_or_path"] = pretrained_model_name_or_path
1879
1923
  init_kwargs["is_local"] = _is_local
1880
1924
 
@@ -1993,12 +2037,28 @@ class PreTrainedTokenizerBase(PushToHubMixin):
1993
2037
  if key in init_kwargs and added_tokens_map != {} and init_kwargs[key] is not None:
1994
2038
  init_kwargs[key] = added_tokens_map.get(str(init_kwargs[key]), init_kwargs[key])
1995
2039
 
1996
- # From pretrained with the legacy fixes
1997
- # for `tokenizers` based tokenizer, we actually want to have vocab and merges pre-extracted from whatever inputs
1998
- # for `none` (PythonBackend) based tokenizer, we also want the vocab file / merge files not extracted.
1999
- # for `sentencepiece` based tokenizer, we pass the sentencepiece model file directly.
2000
- init_kwargs = cls.convert_to_native_format(**init_kwargs)
2040
+ # Track which files were loaded (if not already set by AutoTokenizer)
2041
+ if "files_loaded" not in init_kwargs:
2042
+ files_loaded = []
2043
+ # Check which files this tokenizer class actually uses based on vocab_files_names
2044
+ tokenizer_needs_files = set(cls.vocab_files_names.keys()) if hasattr(cls, "vocab_files_names") else set()
2045
+
2046
+ # If tokenizer_file is in the class's vocab_files_names and exists, prioritize it (TokenizersBackend)
2047
+ if "tokenizer_file" in tokenizer_needs_files and resolved_vocab_files.get("tokenizer_file"):
2048
+ files_loaded.append(os.path.basename(resolved_vocab_files["tokenizer_file"]))
2049
+ else:
2050
+ # Otherwise, add the actual vocab files that were used by this tokenizer class
2051
+ for file_key, file_path in resolved_vocab_files.items():
2052
+ if (
2053
+ file_path
2054
+ and file_key not in ["tokenizer_config_file", "special_tokens_map_file", "added_tokens_file"]
2055
+ and file_key in tokenizer_needs_files
2056
+ ):
2057
+ # Extract just the filename from the path
2058
+ files_loaded.append(os.path.basename(file_path))
2059
+ init_kwargs["files_loaded"] = files_loaded
2001
2060
 
2061
+ # Instantiate the tokenizer.
2002
2062
  try:
2003
2063
  tokenizer = cls(*init_inputs, **init_kwargs)
2004
2064
  except import_protobuf_decode_error():
@@ -2019,14 +2079,120 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2019
2079
  "Unable to load vocabulary from file. "
2020
2080
  "Please check that the provided vocabulary is accessible and not corrupted."
2021
2081
  )
2022
- return tokenizer
2023
2082
 
2024
- @classmethod
2025
- def convert_to_native_format(cls, **kwargs):
2026
- return kwargs
2083
+ # If tokenizer_file exists and tokenizer has a TokenizersBackend, replace the blank tokenizer with tokenizer.json
2084
+ if tokenizer_file is not None and hasattr(tokenizer, "_tokenizer"):
2085
+ from tokenizers import Tokenizer as TokenizerFast
2086
+
2087
+ tokenizer._tokenizer = TokenizerFast.from_file(tokenizer_file)
2088
+ # Re-run post-initialization if the tokenizer has it
2089
+ if hasattr(tokenizer, "_post_init"):
2090
+ tokenizer._post_init()
2091
+ # If only SPM exists, try to get vocab and merges and init to load a tokenizers-backend
2092
+ else:
2093
+ spm_filename = find_sentencepiece_model_file(
2094
+ pretrained_model_name_or_path,
2095
+ revision=kwargs.get("revision"),
2096
+ token=kwargs.get("token"),
2097
+ cache_dir=kwargs.get("cache_dir"),
2098
+ local_files_only=kwargs.get("local_files_only", False),
2099
+ subfolder=kwargs.get("subfolder", ""),
2100
+ )
2101
+ if spm_filename is not None:
2102
+ try:
2103
+ resolved_spm = cached_file(
2104
+ pretrained_model_name_or_path,
2105
+ spm_filename,
2106
+ cache_dir=kwargs.get("cache_dir"),
2107
+ force_download=kwargs.get("force_download", False),
2108
+ proxies=kwargs.get("proxies"),
2109
+ token=kwargs.get("token"),
2110
+ revision=kwargs.get("revision"),
2111
+ local_files_only=kwargs.get("local_files_only", False),
2112
+ subfolder=kwargs.get("subfolder", ""),
2113
+ )
2114
+ except Exception:
2115
+ resolved_spm = None
2116
+ if resolved_spm is not None:
2117
+ try:
2118
+ # Mirror AutoTokenizer fallback: extract vocab/merges from SentencePiece
2119
+ import inspect as _inspect
2120
+
2121
+ from .tokenization_utils_sentencepiece import SentencePieceExtractor
2122
+
2123
+ class_sig = _inspect.signature(getattr(cls, "__init__", cls))
2124
+ vocab_ids, vocab_scores, merges = SentencePieceExtractor(resolved_spm).extract()
2125
+ files_loaded = [spm_filename]
2126
+ init_kwargs["backend"] = "tokenizers"
2127
+ init_kwargs["files_loaded"] = files_loaded
2128
+ # If tokenizer needs merges too (BPE), pass both; unigram models only need vocab
2129
+ if "merges" in class_sig.parameters:
2130
+ return cls.from_pretrained(
2131
+ pretrained_model_name_or_path,
2132
+ *init_inputs,
2133
+ vocab=vocab_scores,
2134
+ merges=merges,
2135
+ **init_kwargs,
2136
+ )
2137
+ elif "vocab" in class_sig.parameters:
2138
+ return cls.from_pretrained(
2139
+ pretrained_model_name_or_path,
2140
+ *init_inputs,
2141
+ vocab=vocab_scores,
2142
+ **init_kwargs,
2143
+ )
2144
+ except Exception as e:
2145
+ logger.warning(
2146
+ f"Could not extract vocab/merges from the SentencePiece model to initialize a Tokenizers backend: {e}. We are falling back so we are falling back to the standard loading method."
2147
+ )
2148
+ pass
2149
+ # Fallback to vocab.json + merges.txt (BPE) or just vocab.json (WordLevel/WordPiece)
2150
+ vocab, merges, files_loaded = load_vocab_and_merges(
2151
+ pretrained_model_name_or_path,
2152
+ cache_dir=kwargs.get("cache_dir"),
2153
+ force_download=kwargs.get("force_download", False),
2154
+ proxies=kwargs.get("proxies"),
2155
+ token=kwargs.get("token"),
2156
+ revision=kwargs.get("revision"),
2157
+ local_files_only=kwargs.get("local_files_only", False),
2158
+ subfolder=kwargs.get("subfolder", ""),
2159
+ )
2160
+
2161
+ if vocab is not None:
2162
+ try:
2163
+ import inspect as _inspect
2164
+
2165
+ class_sig = _inspect.signature(getattr(cls, "__init__", cls))
2166
+ init_kwargs["backend"] = "tokenizers"
2167
+ init_kwargs["files_loaded"] = files_loaded
2168
+
2169
+ if merges is not None and "merges" in class_sig.parameters:
2170
+ return cls.from_pretrained(
2171
+ pretrained_model_name_or_path,
2172
+ *init_inputs,
2173
+ vocab=vocab,
2174
+ merges=merges,
2175
+ **init_kwargs,
2176
+ )
2177
+ elif "vocab" in class_sig.parameters:
2178
+ return cls.from_pretrained(
2179
+ pretrained_model_name_or_path,
2180
+ *init_inputs,
2181
+ vocab=vocab,
2182
+ **init_kwargs,
2183
+ )
2184
+ except Exception:
2185
+ pass
2186
+ if added_tokens_decoder != {} and max(list(added_tokens_decoder.keys())[-1], 0) > tokenizer.vocab_size:
2187
+ logger.info(
2188
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are"
2189
+ " fine-tuned or trained."
2190
+ )
2191
+
2192
+ return tokenizer
2027
2193
 
2028
2194
  @classmethod
2029
- def convert_added_tokens(cls, obj: AddedToken | Any, save=False, add_type_field=True):
2195
+ def convert_added_tokens(cls, obj: Union[AddedToken, Any], save=False, add_type_field=True):
2030
2196
  if isinstance(obj, dict) and "__type" in obj and obj["__type"] == "AddedToken":
2031
2197
  obj.pop("__type")
2032
2198
  return AddedToken(**obj)
@@ -2046,9 +2212,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2046
2212
 
2047
2213
  def save_pretrained(
2048
2214
  self,
2049
- save_directory: str | os.PathLike,
2050
- legacy_format: bool | None = None,
2051
- filename_prefix: str | None = None,
2215
+ save_directory: Union[str, os.PathLike],
2216
+ legacy_format: Optional[bool] = None,
2217
+ filename_prefix: Optional[str] = None,
2052
2218
  push_to_hub: bool = False,
2053
2219
  **kwargs,
2054
2220
  ) -> tuple[str, ...]:
@@ -2105,13 +2271,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2105
2271
  )
2106
2272
 
2107
2273
  tokenizer_config = copy.deepcopy(self.init_kwargs)
2108
- tokenizer_config.pop("add_bos_token", None)
2109
- tokenizer_config.pop("add_eos_token", None)
2110
2274
 
2111
2275
  # Let's save the init kwargs
2112
2276
  target_keys = set(self.init_kwargs.keys())
2113
- target_keys.discard("add_bos_token")
2114
- target_keys.discard("add_eos_token")
2115
2277
  # Let's save the special tokens map (only the strings)
2116
2278
  target_keys.update(["model_max_length"])
2117
2279
 
@@ -2146,10 +2308,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2146
2308
  # Add tokenizer class to the tokenizer config to be able to reload it with from_pretrained
2147
2309
  tokenizer_class = self.__class__.__name__
2148
2310
 
2149
- # tokenizers backend don't need to save added_tokens_decoder and additional_special_tokens
2311
+ # tokenizers backend don't need to save added_tokens_decoder
2150
2312
  if any(base.__name__ == "TokenizersBackend" for base in self.__class__.__mro__):
2151
2313
  tokenizer_config.pop("added_tokens_decoder", None)
2152
- tokenizer_config.pop("additional_special_tokens", None)
2153
2314
 
2154
2315
  # Remove the Fast at the end if we can save the slow tokenizer
2155
2316
  if tokenizer_class.endswith("Fast") and getattr(self, "can_save_slow_tokenizer", False):
@@ -2204,10 +2365,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2204
2365
 
2205
2366
  def _save_pretrained(
2206
2367
  self,
2207
- save_directory: str | os.PathLike,
2368
+ save_directory: Union[str, os.PathLike],
2208
2369
  file_names: tuple[str, ...],
2209
- legacy_format: bool | None = None,
2210
- filename_prefix: str | None = None,
2370
+ legacy_format: Optional[bool] = None,
2371
+ filename_prefix: Optional[str] = None,
2211
2372
  ) -> tuple[str, ...]:
2212
2373
  """
2213
2374
  Save a tokenizer using the slow-tokenizer/legacy format: vocabulary + added tokens.
@@ -2237,7 +2398,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2237
2398
 
2238
2399
  return file_names + vocab_files + (added_tokens_file,)
2239
2400
 
2240
- def save_vocabulary(self, save_directory: str, filename_prefix: str | None = None) -> tuple[str, ...]:
2401
+ def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> tuple[str, ...]:
2241
2402
  """
2242
2403
  Save only the vocabulary of the tokenizer (vocabulary + added tokens).
2243
2404
 
@@ -2255,7 +2416,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2255
2416
  """
2256
2417
  raise NotImplementedError
2257
2418
 
2258
- def tokenize(self, text: str, pair: str | None = None, add_special_tokens: bool = False, **kwargs) -> list[str]:
2419
+ def tokenize(self, text: str, pair: Optional[str] = None, add_special_tokens: bool = False, **kwargs) -> list[str]:
2259
2420
  """
2260
2421
  Converts a string into a sequence of tokens, replacing unknown tokens with the `unk_token`.
2261
2422
 
@@ -2287,15 +2448,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2287
2448
  )
2288
2449
  def encode(
2289
2450
  self,
2290
- text: TextInput | PreTokenizedInput | EncodedInput,
2291
- text_pair: TextInput | PreTokenizedInput | EncodedInput | None = None,
2451
+ text: Union[TextInput, PreTokenizedInput, EncodedInput],
2452
+ text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
2292
2453
  add_special_tokens: bool = True,
2293
- padding: bool | str | PaddingStrategy = False,
2294
- truncation: bool | str | TruncationStrategy | None = None,
2295
- max_length: int | None = None,
2454
+ padding: Union[bool, str, PaddingStrategy] = False,
2455
+ truncation: Union[bool, str, TruncationStrategy, None] = None,
2456
+ max_length: Optional[int] = None,
2296
2457
  stride: int = 0,
2297
- padding_side: str | None = None,
2298
- return_tensors: str | TensorType | None = None,
2458
+ padding_side: Optional[str] = None,
2459
+ return_tensors: Optional[Union[str, TensorType]] = None,
2299
2460
  **kwargs,
2300
2461
  ) -> list[int]:
2301
2462
  """
@@ -2313,15 +2474,15 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2313
2474
  the `tokenize` method) or a list of integers (tokenized string ids using the `convert_tokens_to_ids`
2314
2475
  method).
2315
2476
  """
2316
- padding_strategy, truncation_strategy, max_length, kwargs_updated = self._get_padding_truncation_strategies(
2477
+ padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
2317
2478
  padding=padding,
2318
2479
  truncation=truncation,
2319
2480
  max_length=max_length,
2481
+ pad_to_multiple_of=kwargs.get("pad_to_multiple_of"),
2482
+ verbose=kwargs.get("verbose", True),
2320
2483
  **kwargs,
2321
2484
  )
2322
2485
 
2323
- kwargs.update(kwargs_updated)
2324
-
2325
2486
  encoded_inputs = self._encode_plus(
2326
2487
  text,
2327
2488
  text_pair=text_pair,
@@ -2464,27 +2625,29 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2464
2625
  @add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
2465
2626
  def __call__(
2466
2627
  self,
2467
- text: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
2468
- text_pair: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
2469
- text_target: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
2470
- text_pair_target: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] | None = None,
2628
+ text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput], None] = None,
2629
+ text_pair: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
2630
+ text_target: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput], None] = None,
2631
+ text_pair_target: Optional[
2632
+ Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]
2633
+ ] = None,
2471
2634
  add_special_tokens: bool = True,
2472
- padding: bool | str | PaddingStrategy = False,
2473
- truncation: bool | str | TruncationStrategy | None = None,
2474
- max_length: int | None = None,
2635
+ padding: Union[bool, str, PaddingStrategy] = False,
2636
+ truncation: Union[bool, str, TruncationStrategy, None] = None,
2637
+ max_length: Optional[int] = None,
2475
2638
  stride: int = 0,
2476
2639
  is_split_into_words: bool = False,
2477
- pad_to_multiple_of: int | None = None,
2478
- padding_side: str | None = None,
2479
- return_tensors: str | TensorType | None = None,
2480
- return_token_type_ids: bool | None = None,
2481
- return_attention_mask: bool | None = None,
2640
+ pad_to_multiple_of: Optional[int] = None,
2641
+ padding_side: Optional[str] = None,
2642
+ return_tensors: Optional[Union[str, TensorType]] = None,
2643
+ return_token_type_ids: Optional[bool] = None,
2644
+ return_attention_mask: Optional[bool] = None,
2482
2645
  return_overflowing_tokens: bool = False,
2483
2646
  return_special_tokens_mask: bool = False,
2484
2647
  return_offsets_mapping: bool = False,
2485
2648
  return_length: bool = False,
2486
2649
  verbose: bool = True,
2487
- tokenizer_kwargs: dict[str, Any] | None = None,
2650
+ tokenizer_kwargs: Optional[dict[str, Any]] = None,
2488
2651
  **kwargs,
2489
2652
  ) -> BatchEncoding:
2490
2653
  """
@@ -2589,19 +2752,19 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2589
2752
 
2590
2753
  def _encode_plus(
2591
2754
  self,
2592
- text: TextInput | PreTokenizedInput | EncodedInput,
2593
- text_pair: TextInput | PreTokenizedInput | EncodedInput | None = None,
2755
+ text: Union[TextInput, PreTokenizedInput, EncodedInput],
2756
+ text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
2594
2757
  add_special_tokens: bool = True,
2595
2758
  padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
2596
2759
  truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
2597
- max_length: int | None = None,
2760
+ max_length: Optional[int] = None,
2598
2761
  stride: int = 0,
2599
2762
  is_split_into_words: bool = False,
2600
- pad_to_multiple_of: int | None = None,
2601
- padding_side: str | None = None,
2602
- return_tensors: str | TensorType | None = None,
2603
- return_token_type_ids: bool | None = None,
2604
- return_attention_mask: bool | None = None,
2763
+ pad_to_multiple_of: Optional[int] = None,
2764
+ padding_side: Optional[str] = None,
2765
+ return_tensors: Optional[Union[str, TensorType]] = None,
2766
+ return_token_type_ids: Optional[bool] = None,
2767
+ return_attention_mask: Optional[bool] = None,
2605
2768
  return_overflowing_tokens: bool = False,
2606
2769
  return_special_tokens_mask: bool = False,
2607
2770
  return_offsets_mapping: bool = False,
@@ -2614,17 +2777,19 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2614
2777
 
2615
2778
  def pad(
2616
2779
  self,
2617
- encoded_inputs: BatchEncoding
2618
- | list[BatchEncoding]
2619
- | dict[str, EncodedInput]
2620
- | dict[str, list[EncodedInput]]
2621
- | list[dict[str, EncodedInput]],
2622
- padding: bool | str | PaddingStrategy = True,
2623
- max_length: int | None = None,
2624
- pad_to_multiple_of: int | None = None,
2625
- padding_side: str | None = None,
2626
- return_attention_mask: bool | None = None,
2627
- return_tensors: str | TensorType | None = None,
2780
+ encoded_inputs: Union[
2781
+ BatchEncoding,
2782
+ list[BatchEncoding],
2783
+ dict[str, EncodedInput],
2784
+ dict[str, list[EncodedInput]],
2785
+ list[dict[str, EncodedInput]],
2786
+ ],
2787
+ padding: Union[bool, str, PaddingStrategy] = True,
2788
+ max_length: Optional[int] = None,
2789
+ pad_to_multiple_of: Optional[int] = None,
2790
+ padding_side: Optional[str] = None,
2791
+ return_attention_mask: Optional[bool] = None,
2792
+ return_tensors: Optional[Union[str, TensorType]] = None,
2628
2793
  verbose: bool = True,
2629
2794
  ) -> BatchEncoding:
2630
2795
  """
@@ -2785,12 +2950,12 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2785
2950
 
2786
2951
  def _pad(
2787
2952
  self,
2788
- encoded_inputs: dict[str, EncodedInput] | BatchEncoding,
2789
- max_length: int | None = None,
2953
+ encoded_inputs: Union[dict[str, EncodedInput], BatchEncoding],
2954
+ max_length: Optional[int] = None,
2790
2955
  padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
2791
- pad_to_multiple_of: int | None = None,
2792
- padding_side: str | None = None,
2793
- return_attention_mask: bool | None = None,
2956
+ pad_to_multiple_of: Optional[int] = None,
2957
+ padding_side: Optional[str] = None,
2958
+ return_attention_mask: Optional[bool] = None,
2794
2959
  ) -> dict:
2795
2960
  """
2796
2961
  Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
@@ -2880,10 +3045,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2880
3045
 
2881
3046
  def decode(
2882
3047
  self,
2883
- token_ids: int | list[int] | list[list[int]] | np.ndarray | torch.Tensor,
3048
+ token_ids: Union[int, list[int], list[list[int]], np.ndarray, "torch.Tensor"],
2884
3049
  skip_special_tokens: bool = False,
2885
3050
  **kwargs,
2886
- ) -> str | list[str]:
3051
+ ) -> Union[str, list[str]]:
2887
3052
  """
2888
3053
  Converts a sequence of ids into a string, or a list of sequences into a list of strings,
2889
3054
  using the tokenizer and vocabulary with options to remove special tokens and clean up
@@ -2928,9 +3093,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2928
3093
 
2929
3094
  def batch_decode(
2930
3095
  self,
2931
- sequences: list[int] | list[list[int]] | np.ndarray | torch.Tensor,
3096
+ sequences: Union[list[int], list[list[int]], np.ndarray, "torch.Tensor"],
2932
3097
  skip_special_tokens: bool = False,
2933
- clean_up_tokenization_spaces: bool | None = None,
3098
+ clean_up_tokenization_spaces: Optional[bool] = None,
2934
3099
  **kwargs,
2935
3100
  ) -> list[str]:
2936
3101
  """
@@ -2967,14 +3132,14 @@ class PreTrainedTokenizerBase(PushToHubMixin):
2967
3132
 
2968
3133
  def _decode(
2969
3134
  self,
2970
- token_ids: int | list[int],
3135
+ token_ids: Union[int, list[int]],
2971
3136
  skip_special_tokens: bool = False,
2972
- clean_up_tokenization_spaces: bool | None = None,
3137
+ clean_up_tokenization_spaces: Optional[bool] = None,
2973
3138
  **kwargs,
2974
3139
  ) -> str:
2975
3140
  raise NotImplementedError
2976
3141
 
2977
- def _eventual_warn_about_too_long_sequence(self, ids: list[int], max_length: int | None, verbose: bool):
3142
+ def _eventual_warn_about_too_long_sequence(self, ids: list[int], max_length: Optional[int], verbose: bool):
2978
3143
  """
2979
3144
  Depending on the input and internal state we might trigger a warning about a sequence that is too long for its
2980
3145
  corresponding model
@@ -3016,22 +3181,22 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3016
3181
 
3017
3182
  def apply_chat_template(
3018
3183
  self,
3019
- conversation: list[dict[str, str]] | list[list[dict[str, str]]],
3020
- tools: list[dict | Callable] | None = None,
3021
- documents: list[dict[str, str]] | None = None,
3022
- chat_template: str | None = None,
3184
+ conversation: Union[list[dict[str, str]], list[list[dict[str, str]]]],
3185
+ tools: Optional[list[Union[dict, Callable]]] = None,
3186
+ documents: Optional[list[dict[str, str]]] = None,
3187
+ chat_template: Optional[str] = None,
3023
3188
  add_generation_prompt: bool = False,
3024
3189
  continue_final_message: bool = False,
3025
3190
  tokenize: bool = True,
3026
- padding: bool | str | PaddingStrategy = False,
3191
+ padding: Union[bool, str, PaddingStrategy] = False,
3027
3192
  truncation: bool = False,
3028
- max_length: int | None = None,
3029
- return_tensors: str | TensorType | None = None,
3030
- return_dict: bool = True,
3193
+ max_length: Optional[int] = None,
3194
+ return_tensors: Optional[Union[str, TensorType]] = None,
3195
+ return_dict: bool = False,
3031
3196
  return_assistant_tokens_mask: bool = False,
3032
- tokenizer_kwargs: dict[str, Any] | None = None,
3197
+ tokenizer_kwargs: Optional[dict[str, Any]] = None,
3033
3198
  **kwargs,
3034
- ) -> str | list[int] | list[str] | list[list[int]] | BatchEncoding:
3199
+ ) -> Union[str, list[int], list[str], list[list[int]], BatchEncoding]:
3035
3200
  """
3036
3201
  Converts a list of dictionaries with `"role"` and `"content"` keys to a list of token
3037
3202
  ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
@@ -3085,7 +3250,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3085
3250
  values are:
3086
3251
  - `'pt'`: Return PyTorch `torch.Tensor` objects.
3087
3252
  - `'np'`: Return NumPy `np.ndarray` objects.
3088
- return_dict (`bool`, defaults to `True`):
3253
+ return_dict (`bool`, defaults to `False`):
3089
3254
  Whether to return a dictionary with named outputs. Has no effect if tokenize is `False`.
3090
3255
  tokenizer_kwargs (`dict[str: Any]`, *optional*): Additional kwargs to pass to the tokenizer.
3091
3256
  return_assistant_tokens_mask (`bool`, defaults to `False`):
@@ -3100,11 +3265,14 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3100
3265
  set, will return a dict of tokenizer outputs instead.
3101
3266
  """
3102
3267
 
3103
- if not tokenize:
3104
- return_dict = False # dicts are only returned by the tokenizer anyway
3268
+ if return_dict and not tokenize:
3269
+ raise ValueError(
3270
+ "`return_dict=True` is incompatible with `tokenize=False`, because there is no dict "
3271
+ "of tokenizer outputs to return."
3272
+ )
3105
3273
 
3106
- if return_assistant_tokens_mask and not (return_dict and tokenize):
3107
- raise ValueError("`return_assistant_tokens_mask=True` requires `return_dict=True` and `tokenize=True`")
3274
+ if return_assistant_tokens_mask and not return_dict:
3275
+ raise ValueError("`return_assistant_tokens_mask=True` is incompatible with `return_dict=False`")
3108
3276
 
3109
3277
  if tokenizer_kwargs is None:
3110
3278
  tokenizer_kwargs = {}
@@ -3189,7 +3357,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3189
3357
  def encode_message_with_chat_template(
3190
3358
  self,
3191
3359
  message: dict[str, str],
3192
- conversation_history: list[dict[str, str]] | None = None,
3360
+ conversation_history: Optional[list[dict[str, str]]] = None,
3193
3361
  **kwargs,
3194
3362
  ) -> list[int]:
3195
3363
  """
@@ -3219,17 +3387,13 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3219
3387
  )
3220
3388
 
3221
3389
  if conversation_history is None or len(conversation_history) == 0:
3222
- return self.apply_chat_template(
3223
- [message], add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
3224
- )
3390
+ return self.apply_chat_template([message], add_generation_prompt=False, tokenize=True, **kwargs)
3225
3391
 
3226
3392
  conversation = conversation_history + [message]
3227
- tokens = self.apply_chat_template(
3228
- conversation, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
3229
- )
3393
+ tokens = self.apply_chat_template(conversation, add_generation_prompt=False, tokenize=True, **kwargs)
3230
3394
 
3231
3395
  prefix_tokens = self.apply_chat_template(
3232
- conversation_history, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
3396
+ conversation_history, add_generation_prompt=False, tokenize=True, **kwargs
3233
3397
  )
3234
3398
  # It's possible that the prefix tokens are not a prefix of the full list of tokens.
3235
3399
  # For example, if the prefix is `<s>User: Hi` and the full conversation is `<s>User: Hi</s><s>Assistant: Hello`.
@@ -3246,7 +3410,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3246
3410
  return tokens[i:]
3247
3411
  return tokens[min_len:]
3248
3412
 
3249
- def get_chat_template(self, chat_template: str | None = None, tools: list[dict] | None = None) -> str:
3413
+ def get_chat_template(self, chat_template: Optional[str] = None, tools: Optional[list[dict]] = None) -> str:
3250
3414
  """
3251
3415
  Retrieve the chat template string used for tokenizing chat messages. This template is used
3252
3416
  internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
@@ -3302,9 +3466,9 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3302
3466
 
3303
3467
  def save_chat_templates(
3304
3468
  self,
3305
- save_directory: str | os.PathLike,
3469
+ save_directory: Union[str, os.PathLike],
3306
3470
  tokenizer_config: dict,
3307
- filename_prefix: str | None,
3471
+ filename_prefix: Optional[str],
3308
3472
  save_jinja_files: bool,
3309
3473
  ):
3310
3474
  """
@@ -3355,45 +3519,6 @@ class PreTrainedTokenizerBase(PushToHubMixin):
3355
3519
  tokenizer_config["chat_template"] = self.chat_template
3356
3520
  return tokenizer_config, saved_raw_chat_template_files
3357
3521
 
3358
- def parse_response(
3359
- self,
3360
- response: str | list[str | int | list[int]] | np.ndarray | torch.Tensor,
3361
- schema: list | dict | None = None,
3362
- ):
3363
- """
3364
- Converts an output string created by generating text from a model into a parsed message dictionary.
3365
- This method is intended for use with chat models, and will read the tokenizer's `response_schema` attribute to
3366
- control parsing, although this can be overridden by passing a `response_schema` argument directly.
3367
-
3368
- This method is currently **highly experimental** and the schema specification is likely to change in future!
3369
- We recommend not building production code on top of it just yet.
3370
-
3371
- Args:
3372
- response (`str`):
3373
- The output string generated by the model. This can be either a decoded string or list of strings,
3374
- or token IDs as a list/array.
3375
- schema (`Union[list, dict]`, *optional*):
3376
- A response schema that indicates the expected output format and how parsing should be performed.
3377
- If not provided, the tokenizer's `response_schema` attribute will be used.
3378
- """
3379
- batched = (
3380
- (isinstance(response, list) and not isinstance(response[0], int))
3381
- or getattr(response, "ndim", 0) > 1 # For torch/numpy tensors
3382
- )
3383
-
3384
- if schema is None:
3385
- if getattr(self, "response_schema", None) is None:
3386
- raise AttributeError("This tokenizer does not have a `response_schema` for parsing chat responses!")
3387
- schema = self.response_schema
3388
- if batched:
3389
- if not (isinstance(response, list) and isinstance(response[0], str)):
3390
- response = self.batch_decode(response)
3391
- return [recursive_parse(single_response, schema) for single_response in response]
3392
- else:
3393
- if not isinstance(response, str):
3394
- response = self.decode(response)
3395
- return recursive_parse(response, schema)
3396
-
3397
3522
 
3398
3523
  def get_fast_tokenizer_file(tokenization_files: list[str]) -> str:
3399
3524
  """
@@ -3603,20 +3728,15 @@ def _get_prepend_scheme(add_prefix_space: bool, original_tokenizer) -> str:
3603
3728
  return prepend_scheme
3604
3729
 
3605
3730
 
3606
- def generate_merges(vocab, vocab_scores: dict[str, float] | None = None, skip_tokens: Collection[str] | None = None):
3607
- skip_tokens = set(skip_tokens) if skip_tokens is not None else set()
3731
+ def generate_merges(vocab, vocab_scores: Optional[dict[str, float]] = None):
3608
3732
  reverse = vocab_scores is not None
3609
3733
  vocab_scores = dict(vocab_scores) if reverse else vocab
3610
3734
 
3611
3735
  merges = []
3612
3736
  for merge, piece_score in vocab_scores.items():
3613
- if merge in skip_tokens:
3614
- continue
3615
3737
  local = []
3616
3738
  for index in range(1, len(merge)):
3617
3739
  piece_l, piece_r = merge[:index], merge[index:]
3618
- if piece_l in skip_tokens or piece_r in skip_tokens:
3619
- continue
3620
3740
  if piece_l in vocab and piece_r in vocab:
3621
3741
  local.append((piece_l, piece_r, piece_score))
3622
3742
  local = sorted(local, key=lambda x: (vocab[x[0]], vocab[x[1]]))