transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. transformers/__init__.py +30 -3
  2. transformers/cli/serve.py +47 -17
  3. transformers/conversion_mapping.py +15 -2
  4. transformers/convert_slow_tokenizer.py +225 -10
  5. transformers/core_model_loading.py +196 -135
  6. transformers/data/data_collator.py +12 -4
  7. transformers/dependency_versions_table.py +1 -2
  8. transformers/dynamic_module_utils.py +1 -2
  9. transformers/feature_extraction_utils.py +1 -2
  10. transformers/file_utils.py +0 -1
  11. transformers/generation/__init__.py +11 -1
  12. transformers/generation/configuration_utils.py +3 -2
  13. transformers/generation/continuous_batching/__init__.py +4 -0
  14. transformers/generation/continuous_batching/continuous_api.py +134 -79
  15. transformers/image_processing_base.py +1 -2
  16. transformers/integrations/__init__.py +4 -2
  17. transformers/integrations/accelerate.py +15 -3
  18. transformers/integrations/aqlm.py +38 -66
  19. transformers/integrations/awq.py +48 -514
  20. transformers/integrations/bitnet.py +45 -100
  21. transformers/integrations/bitsandbytes.py +79 -191
  22. transformers/integrations/deepspeed.py +1 -0
  23. transformers/integrations/eetq.py +84 -79
  24. transformers/integrations/fbgemm_fp8.py +191 -145
  25. transformers/integrations/finegrained_fp8.py +236 -193
  26. transformers/integrations/fp_quant.py +92 -0
  27. transformers/integrations/ggml.py +11 -1
  28. transformers/integrations/higgs.py +40 -62
  29. transformers/integrations/hub_kernels.py +42 -3
  30. transformers/integrations/integration_utils.py +10 -0
  31. transformers/integrations/mxfp4.py +25 -65
  32. transformers/integrations/peft.py +7 -29
  33. transformers/integrations/quanto.py +73 -55
  34. transformers/integrations/quark.py +55 -0
  35. transformers/integrations/spqr.py +44 -90
  36. transformers/integrations/torchao.py +32 -38
  37. transformers/integrations/vptq.py +42 -59
  38. transformers/modelcard.py +1 -2
  39. transformers/modeling_gguf_pytorch_utils.py +8 -0
  40. transformers/modeling_rope_utils.py +30 -6
  41. transformers/modeling_utils.py +116 -112
  42. transformers/models/__init__.py +3 -0
  43. transformers/models/afmoe/modeling_afmoe.py +4 -4
  44. transformers/models/albert/tokenization_albert.py +6 -12
  45. transformers/models/align/modeling_align.py +2 -0
  46. transformers/models/altclip/modeling_altclip.py +4 -0
  47. transformers/models/apertus/modeling_apertus.py +4 -4
  48. transformers/models/arcee/modeling_arcee.py +4 -4
  49. transformers/models/aria/modeling_aria.py +4 -4
  50. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  51. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  52. transformers/models/auto/configuration_auto.py +11 -0
  53. transformers/models/auto/feature_extraction_auto.py +2 -0
  54. transformers/models/auto/image_processing_auto.py +1 -0
  55. transformers/models/auto/modeling_auto.py +6 -0
  56. transformers/models/auto/processing_auto.py +18 -10
  57. transformers/models/auto/tokenization_auto.py +74 -472
  58. transformers/models/autoformer/modeling_autoformer.py +4 -0
  59. transformers/models/bamba/modeling_bamba.py +4 -3
  60. transformers/models/bark/modeling_bark.py +2 -0
  61. transformers/models/bart/modeling_bart.py +7 -0
  62. transformers/models/barthez/tokenization_barthez.py +5 -10
  63. transformers/models/beit/modeling_beit.py +6 -1
  64. transformers/models/bert/tokenization_bert.py +8 -21
  65. transformers/models/big_bird/modeling_big_bird.py +6 -0
  66. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  67. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
  68. transformers/models/biogpt/modeling_biogpt.py +2 -0
  69. transformers/models/biogpt/modular_biogpt.py +2 -0
  70. transformers/models/bit/modeling_bit.py +11 -2
  71. transformers/models/bitnet/modeling_bitnet.py +4 -4
  72. transformers/models/blenderbot/modeling_blenderbot.py +5 -0
  73. transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
  74. transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
  75. transformers/models/blip/modeling_blip_text.py +2 -0
  76. transformers/models/blip_2/modeling_blip_2.py +2 -1
  77. transformers/models/bloom/modeling_bloom.py +4 -0
  78. transformers/models/blt/modeling_blt.py +2 -2
  79. transformers/models/blt/modular_blt.py +2 -2
  80. transformers/models/bridgetower/modeling_bridgetower.py +5 -1
  81. transformers/models/bros/modeling_bros.py +4 -0
  82. transformers/models/camembert/tokenization_camembert.py +8 -12
  83. transformers/models/canine/modeling_canine.py +5 -0
  84. transformers/models/chameleon/modeling_chameleon.py +2 -1
  85. transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
  86. transformers/models/clap/modeling_clap.py +5 -0
  87. transformers/models/clip/tokenization_clip.py +22 -44
  88. transformers/models/clipseg/modeling_clipseg.py +5 -0
  89. transformers/models/clvp/modeling_clvp.py +5 -0
  90. transformers/models/clvp/tokenization_clvp.py +1 -63
  91. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  92. transformers/models/codegen/tokenization_codegen.py +14 -43
  93. transformers/models/cohere/modeling_cohere.py +4 -3
  94. transformers/models/cohere/modular_cohere.py +2 -1
  95. transformers/models/cohere/tokenization_cohere.py +12 -42
  96. transformers/models/cohere2/modeling_cohere2.py +7 -6
  97. transformers/models/cohere2/modular_cohere2.py +5 -5
  98. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
  99. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  100. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  101. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  102. transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
  103. transformers/models/convbert/modeling_convbert.py +6 -0
  104. transformers/models/convnext/modeling_convnext.py +2 -4
  105. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  106. transformers/models/csm/modeling_csm.py +4 -3
  107. transformers/models/ctrl/modeling_ctrl.py +1 -0
  108. transformers/models/cvt/modeling_cvt.py +2 -0
  109. transformers/models/cwm/modeling_cwm.py +4 -4
  110. transformers/models/d_fine/modeling_d_fine.py +2 -0
  111. transformers/models/d_fine/modular_d_fine.py +1 -0
  112. transformers/models/dab_detr/modeling_dab_detr.py +4 -0
  113. transformers/models/dac/modeling_dac.py +2 -2
  114. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  115. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  116. transformers/models/dbrx/modeling_dbrx.py +2 -2
  117. transformers/models/deberta/modeling_deberta.py +5 -0
  118. transformers/models/deberta/tokenization_deberta.py +11 -20
  119. transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
  120. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  121. transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
  122. transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
  123. transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
  124. transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
  125. transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
  126. transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
  127. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  128. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  129. transformers/models/detr/modeling_detr.py +5 -0
  130. transformers/models/dia/modeling_dia.py +4 -3
  131. transformers/models/dia/modular_dia.py +0 -1
  132. transformers/models/diffllama/modeling_diffllama.py +2 -2
  133. transformers/models/dinat/modeling_dinat.py +3 -0
  134. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  135. transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
  136. transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
  137. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  138. transformers/models/doge/modeling_doge.py +2 -3
  139. transformers/models/doge/modular_doge.py +0 -1
  140. transformers/models/donut/modeling_donut_swin.py +2 -0
  141. transformers/models/dots1/modeling_dots1.py +10 -7
  142. transformers/models/dots1/modular_dots1.py +5 -3
  143. transformers/models/dpr/modeling_dpr.py +5 -0
  144. transformers/models/dpr/tokenization_dpr.py +12 -0
  145. transformers/models/edgetam/modeling_edgetam.py +1 -1
  146. transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
  147. transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
  148. transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
  149. transformers/models/efficientnet/modeling_efficientnet.py +2 -0
  150. transformers/models/emu3/modeling_emu3.py +4 -4
  151. transformers/models/eomt/image_processing_eomt.py +13 -1
  152. transformers/models/eomt/image_processing_eomt_fast.py +14 -2
  153. transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
  154. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  155. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
  156. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
  157. transformers/models/esm/modeling_esmfold.py +5 -4
  158. transformers/models/evolla/modeling_evolla.py +4 -4
  159. transformers/models/exaone4/modeling_exaone4.py +2 -2
  160. transformers/models/exaone4/modular_exaone4.py +0 -1
  161. transformers/models/falcon/modeling_falcon.py +6 -1
  162. transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
  163. transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
  164. transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
  165. transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
  166. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  167. transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
  168. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  169. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
  170. transformers/models/flaubert/modeling_flaubert.py +7 -0
  171. transformers/models/flava/modeling_flava.py +6 -1
  172. transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
  173. transformers/models/florence2/modeling_florence2.py +2 -1
  174. transformers/models/florence2/modular_florence2.py +2 -1
  175. transformers/models/fnet/modeling_fnet.py +7 -0
  176. transformers/models/focalnet/modeling_focalnet.py +4 -0
  177. transformers/models/fsmt/modeling_fsmt.py +2 -0
  178. transformers/models/funnel/modeling_funnel.py +8 -0
  179. transformers/models/funnel/tokenization_funnel.py +17 -24
  180. transformers/models/fuyu/processing_fuyu.py +3 -3
  181. transformers/models/gemma/modeling_gemma.py +4 -4
  182. transformers/models/gemma/tokenization_gemma.py +10 -27
  183. transformers/models/gemma2/modeling_gemma2.py +4 -4
  184. transformers/models/gemma2/modular_gemma2.py +2 -1
  185. transformers/models/gemma3/modeling_gemma3.py +14 -84
  186. transformers/models/gemma3/modular_gemma3.py +12 -81
  187. transformers/models/gemma3n/modeling_gemma3n.py +18 -209
  188. transformers/models/gemma3n/modular_gemma3n.py +17 -59
  189. transformers/models/git/modeling_git.py +2 -0
  190. transformers/models/glm/modeling_glm.py +4 -4
  191. transformers/models/glm4/modeling_glm4.py +4 -4
  192. transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
  193. transformers/models/glm4v/configuration_glm4v.py +3 -1
  194. transformers/models/glm4v/modeling_glm4v.py +3 -3
  195. transformers/models/glm4v/modular_glm4v.py +6 -4
  196. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  197. transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
  198. transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
  199. transformers/models/glpn/modeling_glpn.py +2 -0
  200. transformers/models/gpt2/modeling_gpt2.py +5 -1
  201. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  202. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
  203. transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
  204. transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
  205. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  206. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  207. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
  208. transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
  209. transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
  210. transformers/models/gptj/modeling_gptj.py +3 -0
  211. transformers/models/granite/modeling_granite.py +4 -4
  212. transformers/models/granitemoe/modeling_granitemoe.py +4 -6
  213. transformers/models/granitemoe/modular_granitemoe.py +0 -2
  214. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
  215. transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
  216. transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
  217. transformers/models/groupvit/modeling_groupvit.py +3 -0
  218. transformers/models/helium/modeling_helium.py +4 -3
  219. transformers/models/herbert/tokenization_herbert.py +9 -25
  220. transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
  221. transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
  222. transformers/models/hiera/modeling_hiera.py +4 -0
  223. transformers/models/hubert/modeling_hubert.py +3 -0
  224. transformers/models/hubert/modular_hubert.py +1 -0
  225. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
  226. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
  227. transformers/models/ibert/modeling_ibert.py +6 -0
  228. transformers/models/idefics/modeling_idefics.py +5 -21
  229. transformers/models/imagegpt/modeling_imagegpt.py +2 -1
  230. transformers/models/informer/modeling_informer.py +4 -0
  231. transformers/models/informer/modular_informer.py +1 -0
  232. transformers/models/internvl/modeling_internvl.py +2 -4
  233. transformers/models/internvl/modular_internvl.py +2 -4
  234. transformers/models/jamba/modeling_jamba.py +2 -2
  235. transformers/models/janus/modeling_janus.py +1 -0
  236. transformers/models/janus/modular_janus.py +1 -0
  237. transformers/models/jetmoe/modeling_jetmoe.py +2 -2
  238. transformers/models/kosmos2/modeling_kosmos2.py +1 -0
  239. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
  240. transformers/models/lasr/__init__.py +29 -0
  241. transformers/models/lasr/configuration_lasr.py +244 -0
  242. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  243. transformers/models/lasr/modeling_lasr.py +729 -0
  244. transformers/models/lasr/modular_lasr.py +569 -0
  245. transformers/models/lasr/processing_lasr.py +96 -0
  246. transformers/models/lasr/tokenization_lasr.py +186 -0
  247. transformers/models/layoutlm/modeling_layoutlm.py +5 -0
  248. transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
  249. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
  250. transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
  251. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  252. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  253. transformers/models/led/modeling_led.py +6 -0
  254. transformers/models/levit/modeling_levit.py +3 -0
  255. transformers/models/lfm2/modeling_lfm2.py +4 -5
  256. transformers/models/lfm2/modular_lfm2.py +0 -1
  257. transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
  258. transformers/models/lightglue/modeling_lightglue.py +3 -1
  259. transformers/models/lightglue/modular_lightglue.py +1 -0
  260. transformers/models/lilt/modeling_lilt.py +4 -0
  261. transformers/models/llama/modeling_llama.py +4 -4
  262. transformers/models/llama/tokenization_llama.py +15 -43
  263. transformers/models/llama4/modeling_llama4.py +3 -2
  264. transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
  265. transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
  266. transformers/models/longformer/modeling_longformer.py +6 -0
  267. transformers/models/longt5/modeling_longt5.py +4 -0
  268. transformers/models/luke/modeling_luke.py +9 -0
  269. transformers/models/luke/tokenization_luke.py +11 -38
  270. transformers/models/lxmert/modeling_lxmert.py +2 -0
  271. transformers/models/m2m_100/modeling_m2m_100.py +4 -0
  272. transformers/models/mamba/modeling_mamba.py +14 -22
  273. transformers/models/marian/modeling_marian.py +5 -0
  274. transformers/models/markuplm/modeling_markuplm.py +4 -0
  275. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  276. transformers/models/mask2former/modeling_mask2former.py +2 -0
  277. transformers/models/maskformer/modeling_maskformer.py +2 -0
  278. transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
  279. transformers/models/mbart/modeling_mbart.py +7 -0
  280. transformers/models/mbart/tokenization_mbart.py +11 -52
  281. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  282. transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
  283. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  284. transformers/models/mimi/modeling_mimi.py +3 -1
  285. transformers/models/minimax/modeling_minimax.py +4 -4
  286. transformers/models/ministral/modeling_ministral.py +4 -4
  287. transformers/models/ministral3/configuration_ministral3.py +1 -1
  288. transformers/models/ministral3/modeling_ministral3.py +4 -3
  289. transformers/models/mistral/modeling_mistral.py +4 -3
  290. transformers/models/mixtral/modeling_mixtral.py +4 -4
  291. transformers/models/mllama/modeling_mllama.py +2 -2
  292. transformers/models/mluke/tokenization_mluke.py +6 -6
  293. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
  294. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  295. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  296. transformers/models/mobilevit/modeling_mobilevit.py +3 -0
  297. transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
  298. transformers/models/modernbert/modeling_modernbert.py +4 -1
  299. transformers/models/modernbert/modular_modernbert.py +2 -0
  300. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
  301. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
  302. transformers/models/moonshine/modeling_moonshine.py +4 -2
  303. transformers/models/moshi/modeling_moshi.py +5 -2
  304. transformers/models/mpnet/modeling_mpnet.py +5 -0
  305. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  306. transformers/models/mpt/modeling_mpt.py +2 -0
  307. transformers/models/mra/modeling_mra.py +6 -0
  308. transformers/models/mt5/modeling_mt5.py +7 -0
  309. transformers/models/musicgen/modeling_musicgen.py +2 -0
  310. transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
  311. transformers/models/mvp/modeling_mvp.py +7 -0
  312. transformers/models/nanochat/modeling_nanochat.py +4 -4
  313. transformers/models/nemotron/modeling_nemotron.py +4 -2
  314. transformers/models/nllb/tokenization_nllb.py +8 -22
  315. transformers/models/nougat/tokenization_nougat.py +11 -59
  316. transformers/models/nystromformer/modeling_nystromformer.py +6 -0
  317. transformers/models/olmo/modeling_olmo.py +4 -4
  318. transformers/models/olmo/modular_olmo.py +2 -2
  319. transformers/models/olmo2/modeling_olmo2.py +4 -5
  320. transformers/models/olmo2/modular_olmo2.py +0 -1
  321. transformers/models/olmo3/modeling_olmo3.py +4 -4
  322. transformers/models/olmoe/modeling_olmoe.py +4 -4
  323. transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
  324. transformers/models/oneformer/modeling_oneformer.py +4 -1
  325. transformers/models/openai/modeling_openai.py +3 -0
  326. transformers/models/openai/tokenization_openai.py +10 -46
  327. transformers/models/opt/modeling_opt.py +2 -0
  328. transformers/models/owlv2/modeling_owlv2.py +4 -0
  329. transformers/models/owlvit/modeling_owlvit.py +4 -0
  330. transformers/models/paddleocr_vl/__init__.py +32 -0
  331. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  332. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
  333. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  334. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
  335. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
  336. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  337. transformers/models/parakeet/configuration_parakeet.py +4 -6
  338. transformers/models/parakeet/modeling_parakeet.py +9 -6
  339. transformers/models/parakeet/modular_parakeet.py +2 -2
  340. transformers/models/parakeet/processing_parakeet.py +1 -0
  341. transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
  342. transformers/models/patchtst/modeling_patchtst.py +20 -2
  343. transformers/models/pegasus/modeling_pegasus.py +5 -0
  344. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  345. transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
  346. transformers/models/perceiver/modeling_perceiver.py +8 -0
  347. transformers/models/persimmon/modeling_persimmon.py +2 -1
  348. transformers/models/phi/modeling_phi.py +4 -5
  349. transformers/models/phi/modular_phi.py +0 -1
  350. transformers/models/phi3/modeling_phi3.py +2 -1
  351. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
  352. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
  353. transformers/models/phimoe/modeling_phimoe.py +4 -4
  354. transformers/models/phimoe/modular_phimoe.py +2 -2
  355. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  356. transformers/models/pixtral/modeling_pixtral.py +2 -1
  357. transformers/models/plbart/modeling_plbart.py +6 -0
  358. transformers/models/plbart/modular_plbart.py +2 -0
  359. transformers/models/plbart/tokenization_plbart.py +0 -2
  360. transformers/models/poolformer/modeling_poolformer.py +2 -0
  361. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  362. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  363. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  364. transformers/models/prophetnet/modeling_prophetnet.py +3 -0
  365. transformers/models/pvt/modeling_pvt.py +2 -0
  366. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  367. transformers/models/qwen2/modeling_qwen2.py +4 -4
  368. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  369. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  370. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
  371. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
  372. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  373. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
  374. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
  375. transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
  376. transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
  377. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  378. transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
  379. transformers/models/qwen3/modeling_qwen3.py +4 -4
  380. transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
  381. transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
  382. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
  383. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
  384. transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
  385. transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
  386. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
  387. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
  388. transformers/models/rag/modeling_rag.py +1 -0
  389. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
  390. transformers/models/reformer/modeling_reformer.py +4 -0
  391. transformers/models/reformer/tokenization_reformer.py +11 -28
  392. transformers/models/regnet/modeling_regnet.py +6 -1
  393. transformers/models/rembert/modeling_rembert.py +6 -0
  394. transformers/models/rembert/tokenization_rembert.py +3 -10
  395. transformers/models/resnet/modeling_resnet.py +11 -2
  396. transformers/models/roberta/tokenization_roberta.py +18 -27
  397. transformers/models/roformer/modeling_roformer.py +6 -0
  398. transformers/models/roformer/tokenization_roformer.py +77 -412
  399. transformers/models/rt_detr/modeling_rt_detr.py +2 -0
  400. transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
  401. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
  402. transformers/models/rwkv/modeling_rwkv.py +1 -0
  403. transformers/models/sam2/modeling_sam2.py +2 -2
  404. transformers/models/sam2/modular_sam2.py +2 -2
  405. transformers/models/sam2_video/modeling_sam2_video.py +1 -0
  406. transformers/models/sam2_video/modular_sam2_video.py +1 -0
  407. transformers/models/sam3/modeling_sam3.py +77 -80
  408. transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
  409. transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
  410. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
  411. transformers/models/sam3_video/modeling_sam3_video.py +1 -0
  412. transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
  413. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  414. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
  415. transformers/models/seed_oss/modeling_seed_oss.py +2 -2
  416. transformers/models/segformer/modeling_segformer.py +4 -1
  417. transformers/models/seggpt/modeling_seggpt.py +2 -0
  418. transformers/models/sew/modeling_sew.py +3 -0
  419. transformers/models/sew/modular_sew.py +1 -0
  420. transformers/models/sew_d/modeling_sew_d.py +3 -0
  421. transformers/models/siglip2/modeling_siglip2.py +4 -0
  422. transformers/models/siglip2/modular_siglip2.py +4 -0
  423. transformers/models/smollm3/modeling_smollm3.py +4 -4
  424. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  425. transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
  426. transformers/models/speecht5/modeling_speecht5.py +13 -1
  427. transformers/models/splinter/modeling_splinter.py +3 -0
  428. transformers/models/splinter/tokenization_splinter.py +9 -28
  429. transformers/models/squeezebert/modeling_squeezebert.py +6 -0
  430. transformers/models/stablelm/modeling_stablelm.py +3 -1
  431. transformers/models/starcoder2/modeling_starcoder2.py +4 -3
  432. transformers/models/superglue/modeling_superglue.py +1 -0
  433. transformers/models/superpoint/modeling_superpoint.py +1 -0
  434. transformers/models/swiftformer/modeling_swiftformer.py +2 -0
  435. transformers/models/swin/modeling_swin.py +4 -0
  436. transformers/models/swin2sr/modeling_swin2sr.py +2 -0
  437. transformers/models/swinv2/modeling_swinv2.py +4 -0
  438. transformers/models/t5/modeling_t5.py +7 -0
  439. transformers/models/t5/tokenization_t5.py +4 -8
  440. transformers/models/t5gemma/modeling_t5gemma.py +5 -5
  441. transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
  442. transformers/models/table_transformer/modeling_table_transformer.py +4 -0
  443. transformers/models/tapas/modeling_tapas.py +3 -0
  444. transformers/models/textnet/modeling_textnet.py +11 -2
  445. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  446. transformers/models/timesfm/modeling_timesfm.py +2 -0
  447. transformers/models/timesfm/modular_timesfm.py +2 -0
  448. transformers/models/timesformer/modeling_timesformer.py +2 -0
  449. transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
  450. transformers/models/trocr/modeling_trocr.py +2 -0
  451. transformers/models/tvp/modeling_tvp.py +2 -0
  452. transformers/models/udop/modeling_udop.py +4 -0
  453. transformers/models/udop/tokenization_udop.py +5 -13
  454. transformers/models/umt5/modeling_umt5.py +7 -0
  455. transformers/models/unispeech/modeling_unispeech.py +4 -0
  456. transformers/models/unispeech/modular_unispeech.py +2 -0
  457. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  458. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  459. transformers/models/univnet/modeling_univnet.py +1 -0
  460. transformers/models/upernet/modeling_upernet.py +1 -0
  461. transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
  462. transformers/models/vilt/modeling_vilt.py +6 -0
  463. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  464. transformers/models/visual_bert/modeling_visual_bert.py +6 -0
  465. transformers/models/vitdet/modeling_vitdet.py +2 -0
  466. transformers/models/vitmatte/modeling_vitmatte.py +1 -0
  467. transformers/models/vits/modeling_vits.py +1 -0
  468. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  469. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  470. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
  471. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
  472. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
  473. transformers/models/wavlm/modeling_wavlm.py +5 -0
  474. transformers/models/whisper/modeling_whisper.py +6 -0
  475. transformers/models/whisper/tokenization_whisper.py +4 -15
  476. transformers/models/x_clip/modeling_x_clip.py +3 -0
  477. transformers/models/xglm/modeling_xglm.py +1 -0
  478. transformers/models/xglm/tokenization_xglm.py +4 -9
  479. transformers/models/xlm/modeling_xlm.py +5 -0
  480. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  481. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  482. transformers/models/yoso/modeling_yoso.py +6 -0
  483. transformers/models/zamba/modeling_zamba.py +2 -0
  484. transformers/models/zamba2/modeling_zamba2.py +4 -2
  485. transformers/models/zamba2/modular_zamba2.py +1 -1
  486. transformers/models/zoedepth/modeling_zoedepth.py +1 -0
  487. transformers/pipelines/__init__.py +2 -3
  488. transformers/pipelines/base.py +1 -9
  489. transformers/pipelines/document_question_answering.py +3 -1
  490. transformers/pipelines/text_generation.py +1 -1
  491. transformers/processing_utils.py +23 -11
  492. transformers/quantizers/base.py +35 -110
  493. transformers/quantizers/quantizer_aqlm.py +1 -5
  494. transformers/quantizers/quantizer_auto_round.py +1 -2
  495. transformers/quantizers/quantizer_awq.py +17 -81
  496. transformers/quantizers/quantizer_bitnet.py +3 -8
  497. transformers/quantizers/quantizer_bnb_4bit.py +13 -110
  498. transformers/quantizers/quantizer_bnb_8bit.py +16 -92
  499. transformers/quantizers/quantizer_compressed_tensors.py +1 -5
  500. transformers/quantizers/quantizer_eetq.py +14 -62
  501. transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
  502. transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
  503. transformers/quantizers/quantizer_fp_quant.py +48 -78
  504. transformers/quantizers/quantizer_gptq.py +7 -24
  505. transformers/quantizers/quantizer_higgs.py +40 -54
  506. transformers/quantizers/quantizer_hqq.py +144 -153
  507. transformers/quantizers/quantizer_mxfp4.py +13 -167
  508. transformers/quantizers/quantizer_quanto.py +20 -64
  509. transformers/quantizers/quantizer_quark.py +36 -17
  510. transformers/quantizers/quantizer_spqr.py +1 -4
  511. transformers/quantizers/quantizer_torchao.py +23 -202
  512. transformers/quantizers/quantizer_vptq.py +8 -22
  513. transformers/quantizers/quantizers_utils.py +20 -0
  514. transformers/testing_utils.py +297 -36
  515. transformers/tokenization_mistral_common.py +4 -0
  516. transformers/tokenization_utils_base.py +113 -222
  517. transformers/tokenization_utils_tokenizers.py +168 -107
  518. transformers/trainer.py +28 -31
  519. transformers/trainer_jit_checkpoint.py +126 -0
  520. transformers/trainer_utils.py +1 -1
  521. transformers/training_args.py +66 -28
  522. transformers/utils/__init__.py +3 -4
  523. transformers/utils/auto_docstring.py +1 -0
  524. transformers/utils/generic.py +27 -1
  525. transformers/utils/hub.py +5 -15
  526. transformers/utils/import_utils.py +61 -16
  527. transformers/utils/kernel_config.py +4 -2
  528. transformers/utils/loading_report.py +19 -10
  529. transformers/utils/quantization_config.py +75 -242
  530. transformers/video_processing_utils.py +1 -2
  531. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
  532. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
  533. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
  534. transformers/kernels/__init__.py +0 -0
  535. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  536. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  537. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
  538. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
  539. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,186 @@
1
+ # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
2
+ # This file was automatically generated from src/transformers/models/lasr/modular_lasr.py.
3
+ # Do NOT edit this file manually as any edits will be overwritten by the generation of
4
+ # the file from the modular. If any change should be done, please apply the change to the
5
+ # modular_lasr.py file directly. One of our CI enforces this.
6
+ # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
7
+ # coding=utf-8
8
+ # Copyright 2025 The HuggingFace Inc. team and Google LLC. All rights reserved.
9
+ #
10
+ # Licensed under the Apache License, Version 2.0 (the "License");
11
+ # you may not use this file except in compliance with the License.
12
+ # You may obtain a copy of the License at
13
+ #
14
+ # http://www.apache.org/licenses/LICENSE-2.0
15
+ #
16
+ # Unless required by applicable law or agreed to in writing, software
17
+ # distributed under the License is distributed on an "AS IS" BASIS,
18
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ # See the License for the specific language governing permissions and
20
+ # limitations under the License.
21
+
22
+ import itertools
23
+ import re
24
+ from typing import Optional, Union
25
+
26
+ from tokenizers import Tokenizer, decoders, pre_tokenizers, processors
27
+ from tokenizers.models import Unigram
28
+
29
+ from ...tokenization_utils_tokenizers import TokenizersBackend
30
+
31
+
32
+ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer.json"}
33
+
34
+
35
+ class LasrTokenizer(TokenizersBackend):
36
+ """
37
+ Construct a LASR tokenizer (backed by HuggingFace's *tokenizers* library). Based on
38
+ [Unigram](https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=unigram#models).
39
+
40
+ This tokenizer inherits from [`TokenizersBackend`] which contains most of the main methods. Users should
41
+ refer to this superclass for more information regarding those methods.
42
+
43
+ Args:
44
+ vocab_file (`str`, *optional*):
45
+ [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
46
+ contains the vocabulary necessary to instantiate a tokenizer.
47
+ eos_token (`str`, *optional*, defaults to `"</s>"`):
48
+ The end of sequence token.
49
+
50
+ <Tip>
51
+
52
+ When building a sequence using special tokens, this is not the token that is used for the end of sequence.
53
+ The token used is the `sep_token`.
54
+
55
+ </Tip>
56
+
57
+ unk_token (`str`, *optional*, defaults to `"<unk>"`):
58
+ The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
59
+ token instead.
60
+ pad_token (`str`, *optional*, defaults to `"<pad>"`):
61
+ The token used for padding, for example when batching sequences of different lengths.
62
+ extra_ids (`int`, *optional*, defaults to 100):
63
+ Add a number of extra ids added to the vocabulary for use as sentinels. These tokens are accessible as
64
+ "<extra_id_{%d}>" where "{%d}" is a number between 0 and extra_ids-1. These tokens can be retrieved by
65
+ calling get_sentinel_tokens method and token ids can be by calling get_sentinel_token_ids method
66
+ additional_special_tokens (`list[str]`, *optional*):
67
+ Additional special tokens used by the tokenizer.
68
+ vocab (`str`, `dict` or `list`, *optional*):
69
+ Custom vocabulary dict. If not provided, a minimal vocabulary is created using the special tokens.
70
+ """
71
+
72
+ vocab_files_names = VOCAB_FILES_NAMES
73
+ model_input_names = ["input_ids", "attention_mask"]
74
+ model = Unigram
75
+
76
+ def __init__(
77
+ self,
78
+ eos_token="</s>",
79
+ unk_token="<unk>",
80
+ pad_token="<pad>",
81
+ extra_ids=100,
82
+ additional_special_tokens=None,
83
+ vocab=None,
84
+ vocab_file=None,
85
+ **kwargs,
86
+ ):
87
+ self._extra_ids = extra_ids
88
+
89
+ # Handle extra_ids and additional_special_tokens
90
+ if additional_special_tokens is not None:
91
+ extra_tokens = [x for x in additional_special_tokens if "<extra_id_" in str(x)]
92
+ if len(extra_tokens) < 1:
93
+ additional_special_tokens += [f"<extra_id_{i}>" for i in range(extra_ids)]
94
+ elif extra_ids > 0 and extra_ids != len(extra_tokens):
95
+ raise ValueError(
96
+ f"Both extra_ids ({extra_ids}) and additional_special_tokens ({additional_special_tokens}) are"
97
+ " provided to LasrTokenizer. In this case the additional_special_tokens must include the extra_ids"
98
+ " tokens"
99
+ )
100
+ else:
101
+ extra_tokens = [f"<extra_id_{i}>" for i in range(extra_ids)]
102
+ additional_special_tokens = extra_tokens
103
+
104
+ # LASR vocab structure: <pad>=0, </s>=1, <unk>=2, then regular vocab, then extra_ids in reverse
105
+ if vocab is not None:
106
+ self._vocab_scores = vocab
107
+ else:
108
+ self._vocab_scores = [
109
+ (str(pad_token), 0.0),
110
+ (str(eos_token), 0.0),
111
+ (str(unk_token), 0.0),
112
+ ("▁", -2.0), # Space token
113
+ ]
114
+ for i in range(extra_ids - 1, -1, -1):
115
+ self._vocab_scores.append((f"<extra_id_{i}>", 0.0))
116
+ self._tokenizer = Tokenizer(
117
+ Unigram(
118
+ self._vocab_scores,
119
+ unk_id=3,
120
+ byte_fallback=False,
121
+ )
122
+ )
123
+
124
+ self._tokenizer.normalizer = None
125
+
126
+ self._tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
127
+ [
128
+ pre_tokenizers.WhitespaceSplit(),
129
+ pre_tokenizers.Metaspace(replacement="▁", prepend_scheme="always", split=True),
130
+ ]
131
+ )
132
+
133
+ self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme="always", split=True)
134
+
135
+ super().__init__(
136
+ eos_token=eos_token,
137
+ unk_token=unk_token,
138
+ pad_token=pad_token,
139
+ extra_ids=extra_ids,
140
+ additional_special_tokens=additional_special_tokens,
141
+ **kwargs,
142
+ )
143
+
144
+ self._tokenizer.post_processor = processors.TemplateProcessing(
145
+ single=["$A", "</s>"],
146
+ pair=["$A", "</s>", "$B", "</s>"],
147
+ special_tokens=[
148
+ ("</s>", self.eos_token_id),
149
+ ],
150
+ )
151
+
152
+ def get_sentinel_tokens(self):
153
+ """Get the list of sentinel tokens (extra_id tokens) from additional_special_tokens."""
154
+ return list(
155
+ set(filter(lambda x: bool(re.search(r"<extra_id_\d+>", x)) is not None, self.additional_special_tokens))
156
+ )
157
+
158
+ def get_sentinel_token_ids(self):
159
+ """Get the token IDs for sentinel tokens."""
160
+ return [self.convert_tokens_to_ids(token) for token in self.get_sentinel_tokens()]
161
+
162
+ def _decode(
163
+ self,
164
+ token_ids: Union[int, list[int]],
165
+ skip_special_tokens: bool = False,
166
+ clean_up_tokenization_spaces: Optional[bool] = None,
167
+ group_tokens: bool = True,
168
+ **kwargs,
169
+ ) -> str:
170
+ if isinstance(token_ids, int):
171
+ token_ids = [token_ids]
172
+ if group_tokens:
173
+ token_ids = [token_group[0] for token_group in itertools.groupby(token_ids)]
174
+
175
+ # for CTC we filter out the blank token, which is the pad token
176
+ token_ids = [token for token in token_ids if token != self.pad_token_id]
177
+
178
+ return super()._decode(
179
+ token_ids=token_ids,
180
+ skip_special_tokens=skip_special_tokens,
181
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
182
+ **kwargs,
183
+ )
184
+
185
+
186
+ __all__ = ["LasrTokenizer"]
@@ -465,6 +465,7 @@ class LayoutLMModel(LayoutLMPreTrainedModel):
465
465
  output_attentions: Optional[bool] = None,
466
466
  output_hidden_states: Optional[bool] = None,
467
467
  return_dict: Optional[bool] = None,
468
+ **kwargs,
468
469
  ) -> Union[tuple, BaseModelOutputWithPooling]:
469
470
  r"""
470
471
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -600,6 +601,7 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel):
600
601
  output_attentions: Optional[bool] = None,
601
602
  output_hidden_states: Optional[bool] = None,
602
603
  return_dict: Optional[bool] = None,
604
+ **kwargs,
603
605
  ) -> Union[tuple, MaskedLMOutput]:
604
606
  r"""
605
607
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -716,6 +718,7 @@ class LayoutLMForSequenceClassification(LayoutLMPreTrainedModel):
716
718
  output_attentions: Optional[bool] = None,
717
719
  output_hidden_states: Optional[bool] = None,
718
720
  return_dict: Optional[bool] = None,
721
+ **kwargs,
719
722
  ) -> Union[tuple, SequenceClassifierOutput]:
720
723
  r"""
721
724
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -850,6 +853,7 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel):
850
853
  output_attentions: Optional[bool] = None,
851
854
  output_hidden_states: Optional[bool] = None,
852
855
  return_dict: Optional[bool] = None,
856
+ **kwargs,
853
857
  ) -> Union[tuple, TokenClassifierOutput]:
854
858
  r"""
855
859
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -963,6 +967,7 @@ class LayoutLMForQuestionAnswering(LayoutLMPreTrainedModel):
963
967
  output_attentions: Optional[bool] = None,
964
968
  output_hidden_states: Optional[bool] = None,
965
969
  return_dict: Optional[bool] = None,
970
+ **kwargs,
966
971
  ) -> Union[tuple, QuestionAnsweringModelOutput]:
967
972
  r"""
968
973
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -701,6 +701,7 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
701
701
  output_attentions: Optional[bool] = None,
702
702
  output_hidden_states: Optional[bool] = None,
703
703
  return_dict: Optional[bool] = None,
704
+ **kwargs,
704
705
  ) -> Union[tuple, BaseModelOutputWithPooling]:
705
706
  r"""
706
707
  bbox (`torch.LongTensor` of shape `((batch_size, sequence_length), 4)`, *optional*):
@@ -858,6 +859,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
858
859
  output_attentions: Optional[bool] = None,
859
860
  output_hidden_states: Optional[bool] = None,
860
861
  return_dict: Optional[bool] = None,
862
+ **kwargs,
861
863
  ) -> Union[tuple, SequenceClassifierOutput]:
862
864
  r"""
863
865
  input_ids (`torch.LongTensor` of shape `batch_size, sequence_length`):
@@ -1061,6 +1063,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
1061
1063
  output_attentions: Optional[bool] = None,
1062
1064
  output_hidden_states: Optional[bool] = None,
1063
1065
  return_dict: Optional[bool] = None,
1066
+ **kwargs,
1064
1067
  ) -> Union[tuple, TokenClassifierOutput]:
1065
1068
  r"""
1066
1069
  input_ids (`torch.LongTensor` of shape `batch_size, sequence_length`):
@@ -1212,6 +1215,7 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
1212
1215
  output_attentions: Optional[bool] = None,
1213
1216
  output_hidden_states: Optional[bool] = None,
1214
1217
  return_dict: Optional[bool] = None,
1218
+ **kwargs,
1215
1219
  ) -> Union[tuple, QuestionAnsweringModelOutput]:
1216
1220
  r"""
1217
1221
  input_ids (`torch.LongTensor` of shape `batch_size, sequence_length`):
@@ -159,22 +159,11 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
159
159
  """
160
160
 
161
161
  vocab_files_names = VOCAB_FILES_NAMES
162
- slow_tokenizer_class = None
163
-
164
- @staticmethod
165
- def _load_vocab_from_file(vocab_file):
166
- """Load vocab from a BERT-style vocab file (one token per line)."""
167
- vocab = {}
168
- with open(vocab_file, "r", encoding="utf-8") as reader:
169
- for index, line in enumerate(reader):
170
- token = line.rstrip("\n")
171
- vocab[token] = index
172
- return vocab
162
+ model = models.WordPiece
173
163
 
174
164
  def __init__(
175
165
  self,
176
- vocab=None,
177
- vocab_file=None,
166
+ vocab: Optional[Union[str, dict[str, int]]] = None,
178
167
  do_lower_case=True,
179
168
  unk_token="[UNK]",
180
169
  sep_token="[SEP]",
@@ -190,21 +179,12 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
190
179
  strip_accents=None,
191
180
  **kwargs,
192
181
  ):
193
- self.vocab_file = vocab_file
194
182
  self.do_lower_case = do_lower_case
195
183
 
196
- # Build vocab for WordPiece
197
184
  if vocab is not None:
198
- if isinstance(vocab, dict):
199
- _vocab = vocab
200
- else:
201
- raise ValueError("vocab must be a dict mapping tokens to ids")
202
- elif vocab_file is not None:
203
- # Load vocab from file (BERT format: one token per line)
204
- _vocab = self._load_vocab_from_file(vocab_file)
185
+ self._vocab = vocab
205
186
  else:
206
- # Initialize with at least the special tokens for WordPiece
207
- _vocab = {
187
+ self._vocab = {
208
188
  str(pad_token): 0,
209
189
  str(unk_token): 1,
210
190
  str(cls_token): 2,
@@ -212,10 +192,7 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
212
192
  str(mask_token): 4,
213
193
  }
214
194
 
215
- # Initialize WordPiece tokenizer
216
- self._tokenizer = Tokenizer(models.WordPiece(vocab=_vocab, unk_token=str(unk_token)))
217
-
218
- # Set normalizer
195
+ self._tokenizer = Tokenizer(models.WordPiece(vocab=self._vocab, unk_token=str(unk_token)))
219
196
  self._tokenizer.normalizer = normalizers.BertNormalizer(
220
197
  clean_text=True,
221
198
  handle_chinese_chars=tokenize_chinese_chars,
@@ -223,27 +200,9 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
223
200
  lowercase=do_lower_case,
224
201
  )
225
202
 
226
- # Set pre_tokenizer
227
203
  self._tokenizer.pre_tokenizer = pre_tokenizers.BertPreTokenizer()
228
-
229
- # Set decoder
230
204
  self._tokenizer.decoder = decoders.WordPiece(prefix="##")
231
-
232
- # Set post_processor (will be set after super().__init__ when we have token IDs)
233
- # Temporarily set to None, will be configured after parent init
234
- self._tokenizer.post_processor = None
235
-
236
- tokenizer_object = self._tokenizer
237
-
238
- # additional properties
239
- self.cls_token_box = cls_token_box
240
- self.sep_token_box = sep_token_box
241
- self.pad_token_box = pad_token_box
242
- self.pad_token_label = pad_token_label
243
- self.only_label_first_subword = only_label_first_subword
244
-
245
205
  super().__init__(
246
- tokenizer_object=tokenizer_object,
247
206
  do_lower_case=do_lower_case,
248
207
  unk_token=unk_token,
249
208
  sep_token=sep_token,
@@ -260,6 +219,11 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
260
219
  **kwargs,
261
220
  )
262
221
 
222
+ self.cls_token_box = cls_token_box
223
+ self.sep_token_box = sep_token_box
224
+ self.pad_token_box = pad_token_box
225
+ self.pad_token_label = pad_token_label
226
+
263
227
  # Now set post_processor with actual token IDs
264
228
  cls = str(self.cls_token)
265
229
  sep = str(self.sep_token)
@@ -275,13 +239,6 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
275
239
  ],
276
240
  )
277
241
 
278
- # additional properties
279
- self.cls_token_box = cls_token_box
280
- self.sep_token_box = sep_token_box
281
- self.pad_token_box = pad_token_box
282
- self.pad_token_label = pad_token_label
283
- self.only_label_first_subword = only_label_first_subword
284
-
285
242
  @add_end_docstrings(LAYOUTLMV2_ENCODE_KWARGS_DOCSTRING, LAYOUTLMV2_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
286
243
  def __call__(
287
244
  self,
@@ -657,6 +657,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
657
657
  output_attentions: Optional[bool] = None,
658
658
  output_hidden_states: Optional[bool] = None,
659
659
  return_dict: Optional[bool] = None,
660
+ **kwargs,
660
661
  ) -> Union[tuple, BaseModelOutput]:
661
662
  r"""
662
663
  input_ids (`torch.LongTensor` of shape `(batch_size, token_sequence_length)`):
@@ -897,6 +898,7 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
897
898
  output_hidden_states: Optional[bool] = None,
898
899
  return_dict: Optional[bool] = None,
899
900
  pixel_values: Optional[torch.LongTensor] = None,
901
+ **kwargs,
900
902
  ) -> Union[tuple, TokenClassifierOutput]:
901
903
  r"""
902
904
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -997,6 +999,7 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
997
999
  return_dict: Optional[bool] = None,
998
1000
  bbox: Optional[torch.LongTensor] = None,
999
1001
  pixel_values: Optional[torch.LongTensor] = None,
1002
+ **kwargs,
1000
1003
  ) -> Union[tuple, QuestionAnsweringModelOutput]:
1001
1004
  r"""
1002
1005
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -1115,6 +1118,7 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
1115
1118
  return_dict: Optional[bool] = None,
1116
1119
  bbox: Optional[torch.LongTensor] = None,
1117
1120
  pixel_values: Optional[torch.LongTensor] = None,
1121
+ **kwargs,
1118
1122
  ) -> Union[tuple, SequenceClassifierOutput]:
1119
1123
  r"""
1120
1124
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -14,7 +14,6 @@
14
14
  # limitations under the License.
15
15
  """Tokenization class for LayoutLMv3. Same as LayoutLMv2, but RoBERTa-like BPE tokenization instead of WordPiece."""
16
16
 
17
- import json
18
17
  from typing import Optional, Union
19
18
 
20
19
  from tokenizers import Tokenizer, decoders, models, pre_tokenizers, processors
@@ -159,15 +158,16 @@ class LayoutLMv3Tokenizer(TokenizersBackend):
159
158
  CrossEntropyLoss.
160
159
  only_label_first_subword (`bool`, *optional*, defaults to `True`):
161
160
  Whether or not to only label the first subword, in case word labels are provided.
162
- vocab (`dict`, *optional*):
163
- Custom vocabulary dictionary. If not provided, vocabulary is loaded from vocab_file when using from_pretrained.
164
- merges (`list`, *optional*):
165
- Custom merges list. If not provided, merges are loaded from merges_file when using from_pretrained.
161
+ vocab (`str` or `dict[str, int]`, *optional*):
162
+ Custom vocabulary dictionary. If not provided, vocabulary is loaded from `vocab_file` when using
163
+ `from_pretrained`.
164
+ merges (`str` or `list[str]`, *optional*):
165
+ Custom merges list. If not provided, merges are loaded from `merges_file` when using `from_pretrained`.
166
166
  """
167
167
 
168
168
  vocab_files_names = VOCAB_FILES_NAMES
169
169
  model_input_names = ["input_ids", "attention_mask", "bbox"]
170
- slow_tokenizer_class = None
170
+ model = models.BPE
171
171
 
172
172
  def __init__(
173
173
  self,
@@ -185,69 +185,26 @@ class LayoutLMv3Tokenizer(TokenizersBackend):
185
185
  pad_token_box=[0, 0, 0, 0],
186
186
  pad_token_label=-100,
187
187
  only_label_first_subword=True,
188
- vocab: Optional[dict] = None,
189
- merges: Optional[list] = None,
190
- vocab_file: Optional[str] = None,
191
- merges_file: Optional[str] = None,
188
+ vocab: Optional[Union[str, dict[str, int]]] = None,
189
+ merges: Optional[Union[str, list[str]]] = None,
192
190
  **kwargs,
193
191
  ):
194
192
  self.add_prefix_space = add_prefix_space
195
-
196
- # Build vocab and merges for BPE
197
- # Priority: 1) vocab/merges dicts/lists, 2) vocab_file/merges_file paths, 3) empty
198
- if vocab is not None:
199
- _vocab = vocab
200
- elif vocab_file is not None:
201
- with open(vocab_file, encoding="utf-8") as f:
202
- _vocab = json.load(f)
203
- else:
204
- _vocab = {}
205
-
206
- if merges is not None:
207
- _merges = merges
208
- elif merges_file is not None:
209
- _merges = []
210
- with open(merges_file, encoding="utf-8") as f:
211
- for line in f:
212
- line = line.strip()
213
- if line and not line.startswith("#"):
214
- _merges.append(tuple(line.split()))
215
- else:
216
- _merges = []
217
-
218
- # Initialize BPE tokenizer
193
+ self._vocab = vocab or {}
194
+ self._merges = merges or []
219
195
  self._tokenizer = Tokenizer(
220
196
  models.BPE(
221
- vocab=_vocab,
222
- merges=_merges,
197
+ vocab=self._vocab,
198
+ merges=self._merges,
223
199
  dropout=None,
224
200
  continuing_subword_prefix="",
225
201
  end_of_word_suffix="",
226
202
  fuse_unk=False,
227
203
  )
228
204
  )
229
-
230
- # Set pre_tokenizer (ByteLevel)
231
205
  self._tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=add_prefix_space)
232
-
233
- # Set decoder
234
206
  self._tokenizer.decoder = decoders.ByteLevel()
235
-
236
- # Set post_processor (will be set after super().__init__ when we have token IDs)
237
- # Temporarily set to None, will be configured after parent init
238
- self._tokenizer.post_processor = None
239
-
240
- tokenizer_object = self._tokenizer
241
-
242
- # additional properties
243
- self.cls_token_box = cls_token_box
244
- self.sep_token_box = sep_token_box
245
- self.pad_token_box = pad_token_box
246
- self.pad_token_label = pad_token_label
247
- self.only_label_first_subword = only_label_first_subword
248
-
249
207
  super().__init__(
250
- tokenizer_object=tokenizer_object,
251
208
  errors=errors,
252
209
  bos_token=bos_token,
253
210
  eos_token=eos_token,
@@ -277,18 +234,12 @@ class LayoutLMv3Tokenizer(TokenizersBackend):
277
234
  add_prefix_space=add_prefix_space,
278
235
  trim_offsets=True,
279
236
  )
280
-
281
- # additional properties
282
237
  self.cls_token_box = cls_token_box
283
238
  self.sep_token_box = sep_token_box
284
239
  self.pad_token_box = pad_token_box
285
240
  self.pad_token_label = pad_token_label
286
241
  self.only_label_first_subword = only_label_first_subword
287
242
 
288
- # Call _post_init for tokenizers created directly (not from_pretrained)
289
- # For from_pretrained, this will be called again after loading the tokenizer from file
290
- self._post_init()
291
-
292
243
  @add_end_docstrings(LAYOUTLMV3_ENCODE_KWARGS_DOCSTRING, LAYOUTLMV3_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
293
244
  def __call__(
294
245
  self,
@@ -150,8 +150,8 @@ class LayoutXLMTokenizer(TokenizersBackend):
150
150
  refer to this superclass for more information regarding those methods.
151
151
 
152
152
  Args:
153
- vocab (`list[tuple[str, float]]`, *optional*):
154
- Vocabulary for the tokenizer as a list of (token, score) tuples.
153
+ vocab (`str`, `dict` or `list`, *optional*):
154
+ Vocabulary for the tokenizer as a path, a dictionary or a list of `(token, score)` tuples.
155
155
  bos_token (`str`, *optional*, defaults to `"<s>"`):
156
156
  The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
157
157
 
@@ -206,12 +206,11 @@ class LayoutXLMTokenizer(TokenizersBackend):
206
206
 
207
207
  vocab_files_names = VOCAB_FILES_NAMES
208
208
  model_input_names = ["input_ids", "attention_mask"]
209
- slow_tokenizer_class = None
209
+ model = Unigram
210
210
 
211
211
  def __init__(
212
212
  self,
213
- vocab_file=None,
214
- vocab=None,
213
+ vocab: Optional[Union[str, list]] = None,
215
214
  bos_token="<s>",
216
215
  eos_token="</s>",
217
216
  sep_token="</s>",
@@ -229,17 +228,10 @@ class LayoutXLMTokenizer(TokenizersBackend):
229
228
  ):
230
229
  # Mask token behave like a normal word, i.e. include the space before it
231
230
  mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token
232
-
233
231
  self.add_prefix_space = add_prefix_space
234
232
 
235
- # Build vocab from list of tuples if provided, else use default
236
- # Handle both list of tuples (when creating) and dict (when loading)
237
233
  if vocab is not None:
238
- if isinstance(vocab, dict):
239
- # Convert dict to list of tuples
240
- self._vocab = [(token, score) for token, score in vocab.items()]
241
- else:
242
- self._vocab = vocab
234
+ self._vocab = vocab
243
235
  else:
244
236
  self._vocab = [
245
237
  ("<s>", 0.0),
@@ -250,10 +242,7 @@ class LayoutXLMTokenizer(TokenizersBackend):
250
242
  if mask_token not in [v[0] for v in self._vocab]:
251
243
  self._vocab.append((str(mask_token), 0.0))
252
244
 
253
- # Create the Unigram tokenizer
254
245
  self._tokenizer = Tokenizer(Unigram(self._vocab, unk_id=3, byte_fallback=False))
255
-
256
- # Set up normalizer (strip right, replace multiple spaces)
257
246
  self._tokenizer.normalizer = normalizers.Sequence(
258
247
  [
259
248
  normalizers.Strip(left=False, right=True),
@@ -261,30 +250,11 @@ class LayoutXLMTokenizer(TokenizersBackend):
261
250
  ]
262
251
  )
263
252
 
264
- # Set up pre_tokenizer (Metaspace)
265
253
  prepend_scheme = _get_prepend_scheme(add_prefix_space, self)
266
254
  self._tokenizer.pre_tokenizer = pre_tokenizers.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
267
255
 
268
- # Set up decoder
269
256
  self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
270
257
 
271
- # Set up post_processor for XLM-RoBERTa style
272
- # Get token IDs
273
- cls_token_id = self._get_token_id(str(cls_token))
274
- sep_token_id = self._get_token_id(str(sep_token))
275
-
276
- self._tokenizer.post_processor = processors.TemplateProcessing(
277
- single="<s> $A </s>",
278
- pair="<s> $A </s> </s> $B </s>",
279
- special_tokens=[
280
- ("<s>", cls_token_id),
281
- ("</s>", sep_token_id),
282
- ],
283
- )
284
-
285
- tokenizer_object = self._tokenizer
286
-
287
- # additional properties
288
258
  self.cls_token_box = cls_token_box
289
259
  self.sep_token_box = sep_token_box
290
260
  self.pad_token_box = pad_token_box
@@ -292,7 +262,6 @@ class LayoutXLMTokenizer(TokenizersBackend):
292
262
  self.only_label_first_subword = only_label_first_subword
293
263
 
294
264
  super().__init__(
295
- tokenizer_object=tokenizer_object,
296
265
  bos_token=bos_token,
297
266
  eos_token=eos_token,
298
267
  sep_token=sep_token,
@@ -300,7 +269,6 @@ class LayoutXLMTokenizer(TokenizersBackend):
300
269
  unk_token=unk_token,
301
270
  pad_token=pad_token,
302
271
  mask_token=mask_token,
303
- vocab_file=vocab_file,
304
272
  vocab=vocab,
305
273
  add_prefix_space=add_prefix_space,
306
274
  cls_token_box=cls_token_box,
@@ -311,7 +279,14 @@ class LayoutXLMTokenizer(TokenizersBackend):
311
279
  **kwargs,
312
280
  )
313
281
 
314
- self.vocab_file = vocab_file
282
+ self._tokenizer.post_processor = processors.TemplateProcessing(
283
+ single=f"{str(self.cls_token)}:0 $A:0 {str(self.sep_token)}:0",
284
+ pair=f"{str(self.cls_token)}:0 $A:0 {str(self.sep_token)}:0 {str(self.sep_token)}:0 $B:0 {str(self.sep_token)}:0",
285
+ special_tokens=[
286
+ (str(self.cls_token), self.cls_token_id),
287
+ (str(self.sep_token), self.sep_token_id),
288
+ ],
289
+ )
315
290
 
316
291
  def _get_token_id(self, token: str) -> int:
317
292
  """Helper to get token ID from vocab."""