transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (835) hide show
  1. transformers/__init__.py +49 -3
  2. transformers/activations.py +1 -1
  3. transformers/audio_utils.py +0 -1
  4. transformers/cache_utils.py +17 -15
  5. transformers/cli/serve.py +47 -17
  6. transformers/configuration_utils.py +114 -70
  7. transformers/conversion_mapping.py +83 -7
  8. transformers/convert_slow_tokenizer.py +225 -10
  9. transformers/core_model_loading.py +374 -147
  10. transformers/data/data_collator.py +12 -4
  11. transformers/dependency_versions_table.py +2 -3
  12. transformers/dynamic_module_utils.py +1 -2
  13. transformers/feature_extraction_utils.py +55 -24
  14. transformers/file_utils.py +0 -1
  15. transformers/generation/__init__.py +11 -1
  16. transformers/generation/candidate_generator.py +79 -31
  17. transformers/generation/configuration_utils.py +165 -124
  18. transformers/generation/continuous_batching/__init__.py +4 -0
  19. transformers/generation/continuous_batching/cache.py +47 -18
  20. transformers/generation/continuous_batching/cache_manager.py +131 -34
  21. transformers/generation/continuous_batching/continuous_api.py +228 -136
  22. transformers/generation/continuous_batching/requests.py +28 -1
  23. transformers/generation/continuous_batching/scheduler.py +11 -4
  24. transformers/generation/stopping_criteria.py +1 -1
  25. transformers/generation/utils.py +108 -110
  26. transformers/generation/watermarking.py +8 -5
  27. transformers/image_processing_base.py +3 -14
  28. transformers/image_processing_utils_fast.py +15 -4
  29. transformers/initialization.py +37 -0
  30. transformers/integrations/__init__.py +16 -2
  31. transformers/integrations/accelerate.py +58 -113
  32. transformers/integrations/aqlm.py +36 -66
  33. transformers/integrations/awq.py +46 -515
  34. transformers/integrations/bitnet.py +47 -105
  35. transformers/integrations/bitsandbytes.py +91 -202
  36. transformers/integrations/deepspeed.py +18 -2
  37. transformers/integrations/eetq.py +84 -81
  38. transformers/integrations/fbgemm_fp8.py +191 -145
  39. transformers/integrations/finegrained_fp8.py +241 -208
  40. transformers/integrations/flash_attention.py +2 -2
  41. transformers/integrations/fp_quant.py +92 -0
  42. transformers/integrations/ggml.py +11 -1
  43. transformers/integrations/higgs.py +37 -62
  44. transformers/integrations/hub_kernels.py +65 -8
  45. transformers/integrations/integration_utils.py +45 -0
  46. transformers/integrations/mistral.py +12 -0
  47. transformers/integrations/moe.py +240 -0
  48. transformers/integrations/mxfp4.py +28 -74
  49. transformers/integrations/peft.py +12 -29
  50. transformers/integrations/quanto.py +77 -56
  51. transformers/integrations/quark.py +55 -0
  52. transformers/integrations/spqr.py +42 -90
  53. transformers/integrations/tensor_parallel.py +167 -221
  54. transformers/integrations/torchao.py +32 -38
  55. transformers/integrations/vptq.py +40 -59
  56. transformers/modelcard.py +1 -2
  57. transformers/modeling_gguf_pytorch_utils.py +74 -19
  58. transformers/modeling_rope_utils.py +107 -86
  59. transformers/modeling_utils.py +611 -527
  60. transformers/models/__init__.py +22 -0
  61. transformers/models/afmoe/modeling_afmoe.py +10 -19
  62. transformers/models/afmoe/modular_afmoe.py +5 -13
  63. transformers/models/aimv2/modeling_aimv2.py +4 -0
  64. transformers/models/aimv2/modular_aimv2.py +4 -0
  65. transformers/models/albert/modeling_albert.py +3 -0
  66. transformers/models/albert/tokenization_albert.py +6 -12
  67. transformers/models/align/modeling_align.py +14 -6
  68. transformers/models/altclip/modeling_altclip.py +11 -3
  69. transformers/models/apertus/modeling_apertus.py +8 -6
  70. transformers/models/apertus/modular_apertus.py +4 -1
  71. transformers/models/arcee/modeling_arcee.py +5 -5
  72. transformers/models/aria/modeling_aria.py +12 -8
  73. transformers/models/aria/modular_aria.py +7 -3
  74. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  75. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  76. transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
  77. transformers/models/auto/auto_factory.py +1 -1
  78. transformers/models/auto/configuration_auto.py +38 -0
  79. transformers/models/auto/feature_extraction_auto.py +9 -3
  80. transformers/models/auto/image_processing_auto.py +5 -2
  81. transformers/models/auto/modeling_auto.py +37 -0
  82. transformers/models/auto/processing_auto.py +22 -10
  83. transformers/models/auto/tokenization_auto.py +147 -566
  84. transformers/models/auto/video_processing_auto.py +5 -2
  85. transformers/models/autoformer/modeling_autoformer.py +4 -0
  86. transformers/models/aya_vision/modeling_aya_vision.py +7 -3
  87. transformers/models/bamba/modeling_bamba.py +21 -21
  88. transformers/models/bamba/modular_bamba.py +17 -16
  89. transformers/models/bark/modeling_bark.py +11 -0
  90. transformers/models/bart/configuration_bart.py +0 -1
  91. transformers/models/bart/modeling_bart.py +14 -0
  92. transformers/models/barthez/tokenization_barthez.py +5 -10
  93. transformers/models/beit/image_processing_beit_fast.py +0 -1
  94. transformers/models/beit/modeling_beit.py +6 -1
  95. transformers/models/bert/modeling_bert.py +3 -0
  96. transformers/models/bert/tokenization_bert.py +8 -21
  97. transformers/models/bert_generation/modeling_bert_generation.py +2 -0
  98. transformers/models/big_bird/modeling_big_bird.py +9 -0
  99. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  100. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
  101. transformers/models/biogpt/modeling_biogpt.py +2 -0
  102. transformers/models/biogpt/modular_biogpt.py +2 -0
  103. transformers/models/bit/modeling_bit.py +16 -3
  104. transformers/models/bitnet/modeling_bitnet.py +5 -5
  105. transformers/models/blenderbot/modeling_blenderbot.py +12 -0
  106. transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
  107. transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
  108. transformers/models/blip/modeling_blip.py +2 -0
  109. transformers/models/blip/modeling_blip_text.py +10 -0
  110. transformers/models/blip_2/modeling_blip_2.py +4 -1
  111. transformers/models/bloom/modeling_bloom.py +17 -44
  112. transformers/models/blt/modeling_blt.py +164 -4
  113. transformers/models/blt/modular_blt.py +170 -5
  114. transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
  115. transformers/models/bridgetower/modeling_bridgetower.py +11 -1
  116. transformers/models/bros/modeling_bros.py +12 -0
  117. transformers/models/camembert/modeling_camembert.py +109 -106
  118. transformers/models/camembert/tokenization_camembert.py +8 -12
  119. transformers/models/canine/modeling_canine.py +11 -0
  120. transformers/models/canine/tokenization_canine.py +2 -0
  121. transformers/models/chameleon/modeling_chameleon.py +11 -5
  122. transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
  123. transformers/models/clap/feature_extraction_clap.py +2 -2
  124. transformers/models/clap/modeling_clap.py +30 -15
  125. transformers/models/clip/modeling_clip.py +2 -0
  126. transformers/models/clip/tokenization_clip.py +22 -44
  127. transformers/models/clipseg/modeling_clipseg.py +9 -0
  128. transformers/models/clvp/modeling_clvp.py +19 -3
  129. transformers/models/clvp/tokenization_clvp.py +1 -63
  130. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  131. transformers/models/codegen/modeling_codegen.py +13 -4
  132. transformers/models/codegen/tokenization_codegen.py +14 -43
  133. transformers/models/cohere/modeling_cohere.py +5 -4
  134. transformers/models/cohere/modular_cohere.py +2 -1
  135. transformers/models/cohere/tokenization_cohere.py +12 -42
  136. transformers/models/cohere2/modeling_cohere2.py +8 -7
  137. transformers/models/cohere2/modular_cohere2.py +5 -5
  138. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
  139. transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
  140. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  141. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  142. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  143. transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
  144. transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
  145. transformers/models/convbert/modeling_convbert.py +9 -0
  146. transformers/models/convnext/image_processing_convnext.py +2 -2
  147. transformers/models/convnext/image_processing_convnext_fast.py +9 -13
  148. transformers/models/convnext/modeling_convnext.py +2 -4
  149. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  150. transformers/models/csm/generation_csm.py +19 -22
  151. transformers/models/csm/modeling_csm.py +7 -4
  152. transformers/models/csm/modular_csm.py +2 -0
  153. transformers/models/ctrl/modeling_ctrl.py +15 -2
  154. transformers/models/cvt/modeling_cvt.py +7 -1
  155. transformers/models/cwm/modeling_cwm.py +5 -5
  156. transformers/models/d_fine/configuration_d_fine.py +3 -4
  157. transformers/models/d_fine/modeling_d_fine.py +48 -39
  158. transformers/models/d_fine/modular_d_fine.py +16 -4
  159. transformers/models/dab_detr/configuration_dab_detr.py +2 -2
  160. transformers/models/dab_detr/modeling_dab_detr.py +5 -1
  161. transformers/models/dac/modeling_dac.py +6 -6
  162. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  163. transformers/models/data2vec/modeling_data2vec_text.py +7 -0
  164. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  165. transformers/models/data2vec/modular_data2vec_text.py +7 -0
  166. transformers/models/dbrx/configuration_dbrx.py +9 -1
  167. transformers/models/dbrx/modeling_dbrx.py +3 -3
  168. transformers/models/deberta/modeling_deberta.py +7 -0
  169. transformers/models/deberta/tokenization_deberta.py +11 -20
  170. transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
  171. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  172. transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
  173. transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
  174. transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
  175. transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
  176. transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
  177. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
  178. transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
  179. transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
  180. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
  181. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
  182. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
  183. transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
  184. transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
  185. transformers/models/depth_anything/configuration_depth_anything.py +2 -3
  186. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  187. transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
  188. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  189. transformers/models/detr/configuration_detr.py +1 -1
  190. transformers/models/detr/modeling_detr.py +13 -1
  191. transformers/models/dia/generation_dia.py +3 -10
  192. transformers/models/dia/modeling_dia.py +16 -4
  193. transformers/models/dia/modular_dia.py +11 -1
  194. transformers/models/dia/processing_dia.py +1 -1
  195. transformers/models/diffllama/modeling_diffllama.py +5 -5
  196. transformers/models/diffllama/modular_diffllama.py +2 -2
  197. transformers/models/dinat/modeling_dinat.py +3 -0
  198. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  199. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
  200. transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
  201. transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
  202. transformers/models/distilbert/modeling_distilbert.py +11 -9
  203. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  204. transformers/models/doge/modeling_doge.py +3 -4
  205. transformers/models/doge/modular_doge.py +0 -1
  206. transformers/models/donut/image_processing_donut_fast.py +0 -1
  207. transformers/models/donut/modeling_donut_swin.py +18 -12
  208. transformers/models/dots1/modeling_dots1.py +23 -11
  209. transformers/models/dots1/modular_dots1.py +5 -3
  210. transformers/models/dpr/modeling_dpr.py +5 -0
  211. transformers/models/dpr/tokenization_dpr.py +12 -0
  212. transformers/models/dpt/configuration_dpt.py +1 -1
  213. transformers/models/dpt/image_processing_dpt_fast.py +1 -2
  214. transformers/models/dpt/modular_dpt.py +1 -2
  215. transformers/models/edgetam/configuration_edgetam.py +1 -1
  216. transformers/models/edgetam/modeling_edgetam.py +6 -3
  217. transformers/models/edgetam/modular_edgetam.py +15 -14
  218. transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
  219. transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
  220. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
  221. transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
  222. transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
  223. transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
  224. transformers/models/efficientnet/modeling_efficientnet.py +7 -1
  225. transformers/models/electra/modeling_electra.py +7 -0
  226. transformers/models/emu3/modeling_emu3.py +12 -6
  227. transformers/models/emu3/modular_emu3.py +7 -1
  228. transformers/models/encodec/modeling_encodec.py +14 -0
  229. transformers/models/eomt/image_processing_eomt.py +13 -1
  230. transformers/models/eomt/image_processing_eomt_fast.py +60 -16
  231. transformers/models/eomt/modeling_eomt.py +7 -0
  232. transformers/models/eomt/modular_eomt.py +7 -0
  233. transformers/models/ernie/modeling_ernie.py +6 -0
  234. transformers/models/ernie/modular_ernie.py +6 -0
  235. transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
  236. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  237. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
  238. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
  239. transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
  240. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
  241. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
  242. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
  243. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
  244. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
  245. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
  246. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
  247. transformers/models/esm/modeling_esm.py +6 -0
  248. transformers/models/esm/modeling_esmfold.py +11 -5
  249. transformers/models/evolla/modeling_evolla.py +13 -5
  250. transformers/models/evolla/modular_evolla.py +8 -0
  251. transformers/models/exaone4/modeling_exaone4.py +3 -3
  252. transformers/models/exaone4/modular_exaone4.py +0 -1
  253. transformers/models/falcon/modeling_falcon.py +9 -4
  254. transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
  255. transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
  256. transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
  257. transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
  258. transformers/models/fast_vlm/__init__.py +27 -0
  259. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  260. transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
  261. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  262. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
  263. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
  264. transformers/models/flaubert/modeling_flaubert.py +21 -15
  265. transformers/models/flava/image_processing_flava_fast.py +0 -2
  266. transformers/models/flava/modeling_flava.py +10 -2
  267. transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
  268. transformers/models/florence2/modeling_florence2.py +22 -4
  269. transformers/models/florence2/modular_florence2.py +15 -1
  270. transformers/models/fnet/modeling_fnet.py +14 -0
  271. transformers/models/focalnet/modeling_focalnet.py +4 -0
  272. transformers/models/fsmt/modeling_fsmt.py +2 -0
  273. transformers/models/funnel/modeling_funnel.py +8 -0
  274. transformers/models/funnel/tokenization_funnel.py +17 -24
  275. transformers/models/fuyu/image_processing_fuyu.py +1 -1
  276. transformers/models/fuyu/modeling_fuyu.py +3 -1
  277. transformers/models/fuyu/processing_fuyu.py +19 -3
  278. transformers/models/gemma/modeling_gemma.py +14 -16
  279. transformers/models/gemma/modular_gemma.py +9 -11
  280. transformers/models/gemma/tokenization_gemma.py +10 -27
  281. transformers/models/gemma2/modeling_gemma2.py +5 -5
  282. transformers/models/gemma2/modular_gemma2.py +3 -2
  283. transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
  284. transformers/models/gemma3/modeling_gemma3.py +42 -91
  285. transformers/models/gemma3/modular_gemma3.py +38 -87
  286. transformers/models/gemma3n/configuration_gemma3n.py +3 -0
  287. transformers/models/gemma3n/modeling_gemma3n.py +65 -218
  288. transformers/models/gemma3n/modular_gemma3n.py +68 -68
  289. transformers/models/git/modeling_git.py +183 -126
  290. transformers/models/glm/modeling_glm.py +5 -5
  291. transformers/models/glm4/modeling_glm4.py +5 -5
  292. transformers/models/glm46v/image_processing_glm46v.py +0 -4
  293. transformers/models/glm46v/modeling_glm46v.py +3 -1
  294. transformers/models/glm46v/modular_glm46v.py +3 -0
  295. transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
  296. transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
  297. transformers/models/glm4v/configuration_glm4v.py +3 -1
  298. transformers/models/glm4v/image_processing_glm4v.py +0 -4
  299. transformers/models/glm4v/modeling_glm4v.py +18 -8
  300. transformers/models/glm4v/modular_glm4v.py +17 -7
  301. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  302. transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
  303. transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
  304. transformers/models/glmasr/__init__.py +30 -0
  305. transformers/models/glmasr/configuration_glmasr.py +197 -0
  306. transformers/models/glmasr/modeling_glmasr.py +512 -0
  307. transformers/models/glmasr/modular_glmasr.py +433 -0
  308. transformers/models/glmasr/processing_glmasr.py +332 -0
  309. transformers/models/glpn/image_processing_glpn_fast.py +0 -1
  310. transformers/models/glpn/modeling_glpn.py +2 -0
  311. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
  312. transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
  313. transformers/models/gpt2/modeling_gpt2.py +13 -6
  314. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  315. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
  316. transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
  317. transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
  318. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  319. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  320. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
  321. transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
  322. transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
  323. transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
  324. transformers/models/gptj/modeling_gptj.py +18 -6
  325. transformers/models/granite/modeling_granite.py +5 -5
  326. transformers/models/granite_speech/modeling_granite_speech.py +15 -1
  327. transformers/models/granitemoe/modeling_granitemoe.py +6 -9
  328. transformers/models/granitemoe/modular_granitemoe.py +1 -4
  329. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
  330. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
  331. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
  332. transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
  333. transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
  334. transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
  335. transformers/models/groupvit/modeling_groupvit.py +9 -1
  336. transformers/models/helium/modeling_helium.py +5 -4
  337. transformers/models/herbert/tokenization_herbert.py +9 -25
  338. transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
  339. transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
  340. transformers/models/hiera/modeling_hiera.py +4 -0
  341. transformers/models/hubert/modeling_hubert.py +7 -0
  342. transformers/models/hubert/modular_hubert.py +5 -0
  343. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
  344. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
  345. transformers/models/hunyuan_v1_moe/__init__.py +1 -1
  346. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
  347. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
  348. transformers/models/ibert/modeling_ibert.py +22 -0
  349. transformers/models/idefics/modeling_idefics.py +15 -21
  350. transformers/models/idefics2/modeling_idefics2.py +7 -1
  351. transformers/models/idefics3/modeling_idefics3.py +5 -1
  352. transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
  353. transformers/models/imagegpt/modeling_imagegpt.py +11 -3
  354. transformers/models/informer/modeling_informer.py +4 -0
  355. transformers/models/informer/modular_informer.py +1 -0
  356. transformers/models/instructblip/modeling_instructblip.py +2 -0
  357. transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
  358. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
  359. transformers/models/internvl/modeling_internvl.py +13 -12
  360. transformers/models/internvl/modular_internvl.py +7 -13
  361. transformers/models/internvl/video_processing_internvl.py +0 -1
  362. transformers/models/jais2/__init__.py +27 -0
  363. transformers/models/jais2/configuration_jais2.py +152 -0
  364. transformers/models/jais2/modeling_jais2.py +486 -0
  365. transformers/models/jais2/modular_jais2.py +196 -0
  366. transformers/models/jamba/modeling_jamba.py +25 -20
  367. transformers/models/jamba/modular_jamba.py +17 -17
  368. transformers/models/janus/image_processing_janus_fast.py +0 -1
  369. transformers/models/janus/modeling_janus.py +16 -7
  370. transformers/models/janus/modular_janus.py +17 -7
  371. transformers/models/jetmoe/modeling_jetmoe.py +4 -4
  372. transformers/models/jetmoe/modular_jetmoe.py +1 -0
  373. transformers/models/kosmos2/modeling_kosmos2.py +15 -2
  374. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
  375. transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
  376. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
  377. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
  378. transformers/models/lasr/__init__.py +29 -0
  379. transformers/models/lasr/configuration_lasr.py +248 -0
  380. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  381. transformers/models/lasr/modeling_lasr.py +730 -0
  382. transformers/models/lasr/modular_lasr.py +576 -0
  383. transformers/models/lasr/processing_lasr.py +94 -0
  384. transformers/models/lasr/tokenization_lasr.py +186 -0
  385. transformers/models/layoutlm/modeling_layoutlm.py +10 -3
  386. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
  387. transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
  388. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
  389. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
  390. transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
  391. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  392. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  393. transformers/models/led/modeling_led.py +12 -0
  394. transformers/models/levit/modeling_levit.py +21 -0
  395. transformers/models/lfm2/modeling_lfm2.py +5 -6
  396. transformers/models/lfm2/modular_lfm2.py +0 -1
  397. transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
  398. transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
  399. transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
  400. transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
  401. transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
  402. transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
  403. transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
  404. transformers/models/lightglue/modeling_lightglue.py +3 -1
  405. transformers/models/lightglue/modular_lightglue.py +1 -0
  406. transformers/models/lilt/modeling_lilt.py +23 -15
  407. transformers/models/llama/modeling_llama.py +5 -5
  408. transformers/models/llama/tokenization_llama.py +15 -43
  409. transformers/models/llama4/image_processing_llama4_fast.py +1 -2
  410. transformers/models/llama4/modeling_llama4.py +11 -6
  411. transformers/models/llava/image_processing_llava_fast.py +0 -1
  412. transformers/models/llava/modeling_llava.py +12 -7
  413. transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
  414. transformers/models/llava_next/modeling_llava_next.py +7 -3
  415. transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
  416. transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
  417. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
  418. transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
  419. transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
  420. transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
  421. transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
  422. transformers/models/longformer/modeling_longformer.py +6 -0
  423. transformers/models/longt5/modeling_longt5.py +4 -4
  424. transformers/models/luke/modeling_luke.py +9 -0
  425. transformers/models/luke/tokenization_luke.py +11 -38
  426. transformers/models/lxmert/modeling_lxmert.py +2 -0
  427. transformers/models/m2m_100/modeling_m2m_100.py +14 -0
  428. transformers/models/mamba/modeling_mamba.py +16 -23
  429. transformers/models/mamba2/modeling_mamba2.py +24 -23
  430. transformers/models/marian/configuration_marian.py +1 -1
  431. transformers/models/marian/modeling_marian.py +8 -0
  432. transformers/models/markuplm/modeling_markuplm.py +9 -8
  433. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  434. transformers/models/mask2former/configuration_mask2former.py +3 -3
  435. transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
  436. transformers/models/mask2former/modeling_mask2former.py +11 -0
  437. transformers/models/maskformer/configuration_maskformer.py +3 -3
  438. transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
  439. transformers/models/maskformer/modeling_maskformer.py +11 -1
  440. transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
  441. transformers/models/mbart/configuration_mbart.py +1 -0
  442. transformers/models/mbart/modeling_mbart.py +14 -0
  443. transformers/models/mbart/tokenization_mbart.py +11 -52
  444. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  445. transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
  446. transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
  447. transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
  448. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  449. transformers/models/mimi/modeling_mimi.py +28 -5
  450. transformers/models/minimax/modeling_minimax.py +19 -6
  451. transformers/models/minimax/modular_minimax.py +12 -1
  452. transformers/models/ministral/modeling_ministral.py +5 -5
  453. transformers/models/ministral3/configuration_ministral3.py +1 -1
  454. transformers/models/ministral3/modeling_ministral3.py +5 -4
  455. transformers/models/mistral/modeling_mistral.py +5 -4
  456. transformers/models/mistral3/modeling_mistral3.py +10 -4
  457. transformers/models/mistral3/modular_mistral3.py +3 -1
  458. transformers/models/mixtral/modeling_mixtral.py +15 -7
  459. transformers/models/mixtral/modular_mixtral.py +6 -2
  460. transformers/models/mlcd/modeling_mlcd.py +6 -0
  461. transformers/models/mlcd/modular_mlcd.py +4 -0
  462. transformers/models/mllama/modeling_mllama.py +15 -4
  463. transformers/models/mluke/tokenization_mluke.py +6 -6
  464. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
  465. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
  466. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
  467. transformers/models/mobilebert/modeling_mobilebert.py +2 -0
  468. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  469. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
  470. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  471. transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
  472. transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
  473. transformers/models/mobilevit/modeling_mobilevit.py +7 -0
  474. transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
  475. transformers/models/modernbert/modeling_modernbert.py +16 -2
  476. transformers/models/modernbert/modular_modernbert.py +14 -1
  477. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
  478. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
  479. transformers/models/moonshine/modeling_moonshine.py +5 -3
  480. transformers/models/moshi/modeling_moshi.py +26 -53
  481. transformers/models/mpnet/modeling_mpnet.py +7 -0
  482. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  483. transformers/models/mpt/modeling_mpt.py +2 -0
  484. transformers/models/mra/modeling_mra.py +10 -1
  485. transformers/models/mt5/configuration_mt5.py +2 -3
  486. transformers/models/mt5/modeling_mt5.py +7 -10
  487. transformers/models/musicgen/modeling_musicgen.py +7 -9
  488. transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
  489. transformers/models/mvp/modeling_mvp.py +14 -0
  490. transformers/models/nanochat/modeling_nanochat.py +5 -5
  491. transformers/models/nemotron/modeling_nemotron.py +7 -5
  492. transformers/models/nllb/tokenization_nllb.py +8 -22
  493. transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
  494. transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
  495. transformers/models/nougat/image_processing_nougat_fast.py +0 -1
  496. transformers/models/nougat/tokenization_nougat.py +15 -68
  497. transformers/models/nystromformer/modeling_nystromformer.py +13 -0
  498. transformers/models/olmo/modeling_olmo.py +5 -5
  499. transformers/models/olmo/modular_olmo.py +2 -2
  500. transformers/models/olmo2/modeling_olmo2.py +5 -6
  501. transformers/models/olmo2/modular_olmo2.py +0 -1
  502. transformers/models/olmo3/modeling_olmo3.py +5 -5
  503. transformers/models/olmoe/modeling_olmoe.py +15 -7
  504. transformers/models/olmoe/modular_olmoe.py +4 -2
  505. transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
  506. transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
  507. transformers/models/oneformer/configuration_oneformer.py +3 -3
  508. transformers/models/oneformer/modeling_oneformer.py +11 -39
  509. transformers/models/openai/modeling_openai.py +15 -0
  510. transformers/models/openai/tokenization_openai.py +10 -46
  511. transformers/models/opt/modeling_opt.py +2 -0
  512. transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
  513. transformers/models/ovis2/modeling_ovis2.py +15 -3
  514. transformers/models/ovis2/modular_ovis2.py +8 -0
  515. transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
  516. transformers/models/owlv2/modeling_owlv2.py +11 -3
  517. transformers/models/owlv2/modular_owlv2.py +0 -2
  518. transformers/models/owlvit/modeling_owlvit.py +11 -3
  519. transformers/models/paddleocr_vl/__init__.py +32 -0
  520. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  521. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
  522. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  523. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
  524. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
  525. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  526. transformers/models/paligemma/modeling_paligemma.py +25 -17
  527. transformers/models/parakeet/configuration_parakeet.py +4 -6
  528. transformers/models/parakeet/modeling_parakeet.py +14 -6
  529. transformers/models/parakeet/modular_parakeet.py +7 -2
  530. transformers/models/parakeet/processing_parakeet.py +1 -0
  531. transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
  532. transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
  533. transformers/models/patchtst/modeling_patchtst.py +25 -6
  534. transformers/models/pe_audio/__init__.py +30 -0
  535. transformers/models/pe_audio/configuration_pe_audio.py +206 -0
  536. transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
  537. transformers/models/pe_audio/modeling_pe_audio.py +820 -0
  538. transformers/models/pe_audio/modular_pe_audio.py +299 -0
  539. transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
  540. transformers/models/pe_audio_video/__init__.py +29 -0
  541. transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
  542. transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
  543. transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
  544. transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
  545. transformers/models/pe_video/__init__.py +30 -0
  546. transformers/models/pe_video/configuration_pe_video.py +211 -0
  547. transformers/models/pe_video/modeling_pe_video.py +636 -0
  548. transformers/models/pe_video/modular_pe_video.py +219 -0
  549. transformers/models/pe_video/processing_pe_video.py +10 -0
  550. transformers/models/pe_video/video_processing_pe_video.py +66 -0
  551. transformers/models/pegasus/configuration_pegasus.py +1 -0
  552. transformers/models/pegasus/modeling_pegasus.py +8 -0
  553. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  554. transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
  555. transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
  556. transformers/models/perceiver/modeling_perceiver.py +13 -1
  557. transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
  558. transformers/models/perception_lm/modeling_perception_lm.py +7 -3
  559. transformers/models/perception_lm/modular_perception_lm.py +7 -3
  560. transformers/models/persimmon/modeling_persimmon.py +3 -2
  561. transformers/models/phi/modeling_phi.py +5 -6
  562. transformers/models/phi/modular_phi.py +0 -1
  563. transformers/models/phi3/modeling_phi3.py +3 -2
  564. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
  565. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
  566. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
  567. transformers/models/phimoe/modeling_phimoe.py +15 -7
  568. transformers/models/phimoe/modular_phimoe.py +3 -3
  569. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  570. transformers/models/pix2struct/processing_pix2struct.py +0 -4
  571. transformers/models/pixio/__init__.py +30 -0
  572. transformers/models/pixio/configuration_pixio.py +151 -0
  573. transformers/models/pixio/modeling_pixio.py +507 -0
  574. transformers/models/pixio/modular_pixio.py +404 -0
  575. transformers/models/pixtral/modeling_pixtral.py +3 -2
  576. transformers/models/pixtral/processing_pixtral.py +3 -1
  577. transformers/models/plbart/configuration_plbart.py +1 -0
  578. transformers/models/plbart/modeling_plbart.py +13 -0
  579. transformers/models/plbart/modular_plbart.py +8 -0
  580. transformers/models/plbart/tokenization_plbart.py +0 -2
  581. transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
  582. transformers/models/poolformer/modeling_poolformer.py +13 -1
  583. transformers/models/pop2piano/configuration_pop2piano.py +0 -1
  584. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  585. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
  586. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  587. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  588. transformers/models/prophetnet/modeling_prophetnet.py +5 -1
  589. transformers/models/pvt/modeling_pvt.py +2 -0
  590. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  591. transformers/models/qwen2/modeling_qwen2.py +5 -5
  592. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  593. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  594. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
  595. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
  596. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  597. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
  598. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
  599. transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
  600. transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
  601. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  602. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
  603. transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
  604. transformers/models/qwen3/modeling_qwen3.py +5 -5
  605. transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
  606. transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
  607. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
  608. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
  609. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
  610. transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
  611. transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
  612. transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
  613. transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
  614. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
  615. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
  616. transformers/models/rag/configuration_rag.py +0 -8
  617. transformers/models/rag/modeling_rag.py +8 -9
  618. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
  619. transformers/models/reformer/modeling_reformer.py +13 -1
  620. transformers/models/reformer/tokenization_reformer.py +11 -28
  621. transformers/models/regnet/modeling_regnet.py +10 -1
  622. transformers/models/rembert/modeling_rembert.py +13 -1
  623. transformers/models/rembert/tokenization_rembert.py +3 -10
  624. transformers/models/resnet/modeling_resnet.py +19 -5
  625. transformers/models/roberta/modeling_roberta.py +3 -0
  626. transformers/models/roberta/modular_roberta.py +3 -0
  627. transformers/models/roberta/tokenization_roberta.py +18 -27
  628. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
  629. transformers/models/roc_bert/modeling_roc_bert.py +3 -0
  630. transformers/models/roformer/modeling_roformer.py +6 -0
  631. transformers/models/roformer/tokenization_roformer.py +77 -412
  632. transformers/models/rt_detr/configuration_rt_detr.py +1 -1
  633. transformers/models/rt_detr/modeling_rt_detr.py +6 -0
  634. transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
  635. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
  636. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
  637. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
  638. transformers/models/rwkv/modeling_rwkv.py +2 -1
  639. transformers/models/sam/configuration_sam.py +1 -0
  640. transformers/models/sam/image_processing_sam_fast.py +0 -1
  641. transformers/models/sam/modeling_sam.py +4 -1
  642. transformers/models/sam2/configuration_sam2.py +1 -1
  643. transformers/models/sam2/modeling_sam2.py +7 -3
  644. transformers/models/sam2/modular_sam2.py +7 -3
  645. transformers/models/sam2_video/modeling_sam2_video.py +52 -43
  646. transformers/models/sam2_video/modular_sam2_video.py +32 -18
  647. transformers/models/sam3/configuration_sam3.py +21 -1
  648. transformers/models/sam3/modeling_sam3.py +100 -80
  649. transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
  650. transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
  651. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
  652. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
  653. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
  654. transformers/models/sam3_video/configuration_sam3_video.py +14 -0
  655. transformers/models/sam3_video/modeling_sam3_video.py +4 -3
  656. transformers/models/sam3_video/processing_sam3_video.py +1 -1
  657. transformers/models/sam_hq/configuration_sam_hq.py +1 -0
  658. transformers/models/sam_hq/modeling_sam_hq.py +26 -23
  659. transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
  660. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  661. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
  662. transformers/models/seed_oss/modeling_seed_oss.py +3 -3
  663. transformers/models/segformer/image_processing_segformer_fast.py +0 -1
  664. transformers/models/segformer/modeling_segformer.py +6 -3
  665. transformers/models/segformer/modular_segformer.py +0 -1
  666. transformers/models/seggpt/modeling_seggpt.py +2 -0
  667. transformers/models/sew/modeling_sew.py +3 -0
  668. transformers/models/sew/modular_sew.py +1 -0
  669. transformers/models/sew_d/modeling_sew_d.py +3 -0
  670. transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
  671. transformers/models/siglip/modeling_siglip.py +24 -2
  672. transformers/models/siglip2/modeling_siglip2.py +67 -41
  673. transformers/models/siglip2/modular_siglip2.py +4 -0
  674. transformers/models/smollm3/modeling_smollm3.py +5 -5
  675. transformers/models/smolvlm/modeling_smolvlm.py +5 -1
  676. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  677. transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
  678. transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
  679. transformers/models/speecht5/modeling_speecht5.py +41 -1
  680. transformers/models/splinter/modeling_splinter.py +12 -3
  681. transformers/models/splinter/tokenization_splinter.py +9 -28
  682. transformers/models/squeezebert/modeling_squeezebert.py +8 -0
  683. transformers/models/stablelm/modeling_stablelm.py +4 -2
  684. transformers/models/starcoder2/modeling_starcoder2.py +5 -4
  685. transformers/models/superglue/image_processing_superglue_fast.py +1 -2
  686. transformers/models/superglue/modeling_superglue.py +1 -0
  687. transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
  688. transformers/models/superpoint/modeling_superpoint.py +1 -0
  689. transformers/models/swiftformer/modeling_swiftformer.py +6 -0
  690. transformers/models/swin/modeling_swin.py +20 -12
  691. transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
  692. transformers/models/swin2sr/modeling_swin2sr.py +51 -33
  693. transformers/models/swinv2/modeling_swinv2.py +45 -33
  694. transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
  695. transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
  696. transformers/models/t5/configuration_t5.py +7 -1
  697. transformers/models/t5/modeling_t5.py +8 -7
  698. transformers/models/t5/tokenization_t5.py +4 -8
  699. transformers/models/t5gemma/modeling_t5gemma.py +6 -6
  700. transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
  701. transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
  702. transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
  703. transformers/models/table_transformer/configuration_table_transformer.py +1 -1
  704. transformers/models/table_transformer/modeling_table_transformer.py +5 -1
  705. transformers/models/tapas/modeling_tapas.py +3 -0
  706. transformers/models/textnet/image_processing_textnet_fast.py +0 -1
  707. transformers/models/textnet/modeling_textnet.py +11 -2
  708. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  709. transformers/models/timesfm/modeling_timesfm.py +14 -0
  710. transformers/models/timesfm/modular_timesfm.py +14 -0
  711. transformers/models/timesformer/modeling_timesformer.py +2 -0
  712. transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
  713. transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
  714. transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
  715. transformers/models/trocr/modeling_trocr.py +3 -2
  716. transformers/models/tvp/configuration_tvp.py +5 -1
  717. transformers/models/tvp/modeling_tvp.py +6 -4
  718. transformers/models/udop/configuration_udop.py +1 -0
  719. transformers/models/udop/modeling_udop.py +7 -7
  720. transformers/models/udop/tokenization_udop.py +5 -13
  721. transformers/models/umt5/configuration_umt5.py +2 -2
  722. transformers/models/umt5/modeling_umt5.py +7 -6
  723. transformers/models/unispeech/modeling_unispeech.py +4 -0
  724. transformers/models/unispeech/modular_unispeech.py +2 -0
  725. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  726. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  727. transformers/models/univnet/modeling_univnet.py +1 -0
  728. transformers/models/upernet/modeling_upernet.py +1 -0
  729. transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
  730. transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
  731. transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
  732. transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
  733. transformers/models/video_llava/modeling_video_llava.py +7 -3
  734. transformers/models/vilt/configuration_vilt.py +2 -2
  735. transformers/models/vilt/modeling_vilt.py +13 -0
  736. transformers/models/vipllava/modeling_vipllava.py +7 -3
  737. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  738. transformers/models/visual_bert/modeling_visual_bert.py +8 -0
  739. transformers/models/vitdet/modeling_vitdet.py +2 -0
  740. transformers/models/vitmatte/configuration_vitmatte.py +1 -1
  741. transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
  742. transformers/models/vitmatte/modeling_vitmatte.py +5 -0
  743. transformers/models/vitpose/configuration_vitpose.py +1 -1
  744. transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
  745. transformers/models/vits/modeling_vits.py +1 -0
  746. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  747. transformers/models/voxtral/modeling_voxtral.py +2 -2
  748. transformers/models/voxtral/modular_voxtral.py +2 -2
  749. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  750. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
  751. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
  752. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
  753. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
  754. transformers/models/wavlm/modeling_wavlm.py +5 -0
  755. transformers/models/whisper/generation_whisper.py +1 -0
  756. transformers/models/whisper/modeling_whisper.py +11 -3
  757. transformers/models/whisper/tokenization_whisper.py +4 -15
  758. transformers/models/x_clip/modeling_x_clip.py +5 -0
  759. transformers/models/xcodec/modeling_xcodec.py +5 -0
  760. transformers/models/xglm/modeling_xglm.py +11 -0
  761. transformers/models/xglm/tokenization_xglm.py +4 -9
  762. transformers/models/xlm/modeling_xlm.py +18 -14
  763. transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
  764. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  765. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
  766. transformers/models/xlnet/modeling_xlnet.py +3 -1
  767. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  768. transformers/models/xmod/modeling_xmod.py +3 -0
  769. transformers/models/yoso/modeling_yoso.py +10 -1
  770. transformers/models/zamba/modeling_zamba.py +4 -1
  771. transformers/models/zamba2/modeling_zamba2.py +7 -4
  772. transformers/models/zamba2/modular_zamba2.py +1 -1
  773. transformers/models/zoedepth/configuration_zoedepth.py +1 -1
  774. transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
  775. transformers/models/zoedepth/modeling_zoedepth.py +8 -0
  776. transformers/pipelines/__init__.py +11 -9
  777. transformers/pipelines/automatic_speech_recognition.py +20 -12
  778. transformers/pipelines/base.py +2 -10
  779. transformers/pipelines/document_question_answering.py +4 -2
  780. transformers/pipelines/question_answering.py +1 -1
  781. transformers/pipelines/text_generation.py +1 -1
  782. transformers/pipelines/text_to_audio.py +2 -2
  783. transformers/processing_utils.py +133 -50
  784. transformers/quantizers/auto.py +2 -4
  785. transformers/quantizers/base.py +44 -174
  786. transformers/quantizers/quantizer_aqlm.py +2 -23
  787. transformers/quantizers/quantizer_auto_round.py +2 -12
  788. transformers/quantizers/quantizer_awq.py +20 -89
  789. transformers/quantizers/quantizer_bitnet.py +4 -14
  790. transformers/quantizers/quantizer_bnb_4bit.py +18 -155
  791. transformers/quantizers/quantizer_bnb_8bit.py +24 -110
  792. transformers/quantizers/quantizer_compressed_tensors.py +2 -9
  793. transformers/quantizers/quantizer_eetq.py +16 -74
  794. transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
  795. transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
  796. transformers/quantizers/quantizer_fp_quant.py +52 -82
  797. transformers/quantizers/quantizer_gptq.py +8 -28
  798. transformers/quantizers/quantizer_higgs.py +42 -60
  799. transformers/quantizers/quantizer_hqq.py +144 -153
  800. transformers/quantizers/quantizer_mxfp4.py +14 -194
  801. transformers/quantizers/quantizer_quanto.py +35 -79
  802. transformers/quantizers/quantizer_quark.py +36 -17
  803. transformers/quantizers/quantizer_spqr.py +4 -12
  804. transformers/quantizers/quantizer_torchao.py +50 -325
  805. transformers/quantizers/quantizer_vptq.py +4 -27
  806. transformers/quantizers/quantizers_utils.py +20 -0
  807. transformers/testing_utils.py +324 -47
  808. transformers/tokenization_mistral_common.py +7 -2
  809. transformers/tokenization_utils_base.py +116 -224
  810. transformers/tokenization_utils_tokenizers.py +190 -106
  811. transformers/trainer.py +51 -32
  812. transformers/trainer_callback.py +8 -0
  813. transformers/trainer_jit_checkpoint.py +126 -0
  814. transformers/trainer_seq2seq.py +4 -0
  815. transformers/trainer_utils.py +1 -1
  816. transformers/training_args.py +74 -38
  817. transformers/utils/__init__.py +7 -4
  818. transformers/utils/attention_visualizer.py +4 -4
  819. transformers/utils/auto_docstring.py +35 -25
  820. transformers/utils/generic.py +47 -1
  821. transformers/utils/hub.py +5 -15
  822. transformers/utils/import_utils.py +112 -25
  823. transformers/utils/kernel_config.py +74 -19
  824. transformers/utils/loading_report.py +19 -10
  825. transformers/utils/quantization_config.py +78 -245
  826. transformers/video_processing_utils.py +17 -14
  827. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
  828. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
  829. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
  830. transformers/kernels/__init__.py +0 -0
  831. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  832. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  833. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
  834. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
  835. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,186 @@
1
+ # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
2
+ # This file was automatically generated from src/transformers/models/lasr/modular_lasr.py.
3
+ # Do NOT edit this file manually as any edits will be overwritten by the generation of
4
+ # the file from the modular. If any change should be done, please apply the change to the
5
+ # modular_lasr.py file directly. One of our CI enforces this.
6
+ # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
7
+ # coding=utf-8
8
+ # Copyright 2025 The HuggingFace Inc. team and Google LLC. All rights reserved.
9
+ #
10
+ # Licensed under the Apache License, Version 2.0 (the "License");
11
+ # you may not use this file except in compliance with the License.
12
+ # You may obtain a copy of the License at
13
+ #
14
+ # http://www.apache.org/licenses/LICENSE-2.0
15
+ #
16
+ # Unless required by applicable law or agreed to in writing, software
17
+ # distributed under the License is distributed on an "AS IS" BASIS,
18
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ # See the License for the specific language governing permissions and
20
+ # limitations under the License.
21
+
22
+ import itertools
23
+ import re
24
+ from typing import Optional, Union
25
+
26
+ from tokenizers import Tokenizer, decoders, pre_tokenizers, processors
27
+ from tokenizers.models import Unigram
28
+
29
+ from ...tokenization_utils_tokenizers import TokenizersBackend
30
+
31
+
32
+ VOCAB_FILES_NAMES = {"vocab_file": "spiece.model", "tokenizer_file": "tokenizer.json"}
33
+
34
+
35
+ class LasrTokenizer(TokenizersBackend):
36
+ """
37
+ Construct a LASR tokenizer (backed by HuggingFace's *tokenizers* library). Based on
38
+ [Unigram](https://huggingface.co/docs/tokenizers/python/latest/components.html?highlight=unigram#models).
39
+
40
+ This tokenizer inherits from [`TokenizersBackend`] which contains most of the main methods. Users should
41
+ refer to this superclass for more information regarding those methods.
42
+
43
+ Args:
44
+ vocab_file (`str`, *optional*):
45
+ [SentencePiece](https://github.com/google/sentencepiece) file (generally has a *.spm* extension) that
46
+ contains the vocabulary necessary to instantiate a tokenizer.
47
+ eos_token (`str`, *optional*, defaults to `"</s>"`):
48
+ The end of sequence token.
49
+
50
+ <Tip>
51
+
52
+ When building a sequence using special tokens, this is not the token that is used for the end of sequence.
53
+ The token used is the `sep_token`.
54
+
55
+ </Tip>
56
+
57
+ unk_token (`str`, *optional*, defaults to `"<unk>"`):
58
+ The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
59
+ token instead.
60
+ pad_token (`str`, *optional*, defaults to `"<pad>"`):
61
+ The token used for padding, for example when batching sequences of different lengths.
62
+ extra_ids (`int`, *optional*, defaults to 100):
63
+ Add a number of extra ids added to the vocabulary for use as sentinels. These tokens are accessible as
64
+ "<extra_id_{%d}>" where "{%d}" is a number between 0 and extra_ids-1. These tokens can be retrieved by
65
+ calling get_sentinel_tokens method and token ids can be by calling get_sentinel_token_ids method
66
+ additional_special_tokens (`list[str]`, *optional*):
67
+ Additional special tokens used by the tokenizer.
68
+ vocab (`str`, `dict` or `list`, *optional*):
69
+ Custom vocabulary dict. If not provided, a minimal vocabulary is created using the special tokens.
70
+ """
71
+
72
+ vocab_files_names = VOCAB_FILES_NAMES
73
+ model_input_names = ["input_ids", "attention_mask"]
74
+ model = Unigram
75
+
76
+ def __init__(
77
+ self,
78
+ eos_token="</s>",
79
+ unk_token="<unk>",
80
+ pad_token="<pad>",
81
+ extra_ids=100,
82
+ additional_special_tokens=None,
83
+ vocab=None,
84
+ vocab_file=None,
85
+ **kwargs,
86
+ ):
87
+ self._extra_ids = extra_ids
88
+
89
+ # Handle extra_ids and additional_special_tokens
90
+ if additional_special_tokens is not None:
91
+ extra_tokens = [x for x in additional_special_tokens if "<extra_id_" in str(x)]
92
+ if len(extra_tokens) < 1:
93
+ additional_special_tokens += [f"<extra_id_{i}>" for i in range(extra_ids)]
94
+ elif extra_ids > 0 and extra_ids != len(extra_tokens):
95
+ raise ValueError(
96
+ f"Both extra_ids ({extra_ids}) and additional_special_tokens ({additional_special_tokens}) are"
97
+ " provided to LasrTokenizer. In this case the additional_special_tokens must include the extra_ids"
98
+ " tokens"
99
+ )
100
+ else:
101
+ extra_tokens = [f"<extra_id_{i}>" for i in range(extra_ids)]
102
+ additional_special_tokens = extra_tokens
103
+
104
+ # LASR vocab structure: <pad>=0, </s>=1, <unk>=2, then regular vocab, then extra_ids in reverse
105
+ if vocab is not None:
106
+ self._vocab_scores = vocab
107
+ else:
108
+ self._vocab_scores = [
109
+ (str(pad_token), 0.0),
110
+ (str(eos_token), 0.0),
111
+ (str(unk_token), 0.0),
112
+ ("▁", -2.0), # Space token
113
+ ]
114
+ for i in range(extra_ids - 1, -1, -1):
115
+ self._vocab_scores.append((f"<extra_id_{i}>", 0.0))
116
+ self._tokenizer = Tokenizer(
117
+ Unigram(
118
+ self._vocab_scores,
119
+ unk_id=3,
120
+ byte_fallback=False,
121
+ )
122
+ )
123
+
124
+ self._tokenizer.normalizer = None
125
+
126
+ self._tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
127
+ [
128
+ pre_tokenizers.WhitespaceSplit(),
129
+ pre_tokenizers.Metaspace(replacement="▁", prepend_scheme="always", split=True),
130
+ ]
131
+ )
132
+
133
+ self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme="always", split=True)
134
+
135
+ super().__init__(
136
+ eos_token=eos_token,
137
+ unk_token=unk_token,
138
+ pad_token=pad_token,
139
+ extra_ids=extra_ids,
140
+ additional_special_tokens=additional_special_tokens,
141
+ **kwargs,
142
+ )
143
+
144
+ self._tokenizer.post_processor = processors.TemplateProcessing(
145
+ single=["$A", "</s>"],
146
+ pair=["$A", "</s>", "$B", "</s>"],
147
+ special_tokens=[
148
+ ("</s>", self.eos_token_id),
149
+ ],
150
+ )
151
+
152
+ def get_sentinel_tokens(self):
153
+ """Get the list of sentinel tokens (extra_id tokens) from additional_special_tokens."""
154
+ return list(
155
+ set(filter(lambda x: bool(re.search(r"<extra_id_\d+>", x)) is not None, self.additional_special_tokens))
156
+ )
157
+
158
+ def get_sentinel_token_ids(self):
159
+ """Get the token IDs for sentinel tokens."""
160
+ return [self.convert_tokens_to_ids(token) for token in self.get_sentinel_tokens()]
161
+
162
+ def _decode(
163
+ self,
164
+ token_ids: Union[int, list[int]],
165
+ skip_special_tokens: bool = False,
166
+ clean_up_tokenization_spaces: Optional[bool] = None,
167
+ group_tokens: bool = True,
168
+ **kwargs,
169
+ ) -> str:
170
+ if isinstance(token_ids, int):
171
+ token_ids = [token_ids]
172
+ if group_tokens:
173
+ token_ids = [token_group[0] for token_group in itertools.groupby(token_ids)]
174
+
175
+ # for CTC we filter out the blank token, which is the pad token
176
+ token_ids = [token for token in token_ids if token != self.pad_token_id]
177
+
178
+ return super()._decode(
179
+ token_ids=token_ids,
180
+ skip_special_tokens=skip_special_tokens,
181
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
182
+ **kwargs,
183
+ )
184
+
185
+
186
+ __all__ = ["LasrTokenizer"]
@@ -337,9 +337,9 @@ class LayoutLMEncoder(nn.Module):
337
337
  all_hidden_states = all_hidden_states + (hidden_states,)
338
338
 
339
339
  layer_outputs = layer_module(
340
- hidden_states=hidden_states,
341
- attention_mask=attention_mask,
342
- output_attentions=output_attentions,
340
+ hidden_states,
341
+ attention_mask,
342
+ output_attentions,
343
343
  **kwargs,
344
344
  )
345
345
 
@@ -431,6 +431,8 @@ class LayoutLMPreTrainedModel(PreTrainedModel):
431
431
  super()._init_weights(module)
432
432
  if isinstance(module, LayoutLMLMPredictionHead):
433
433
  init.zeros_(module.bias)
434
+ elif isinstance(module, LayoutLMEmbeddings):
435
+ init.copy_(module.position_ids, torch.arange(module.position_ids.shape[-1]).expand((1, -1)))
434
436
 
435
437
 
436
438
  @auto_docstring
@@ -465,6 +467,7 @@ class LayoutLMModel(LayoutLMPreTrainedModel):
465
467
  output_attentions: Optional[bool] = None,
466
468
  output_hidden_states: Optional[bool] = None,
467
469
  return_dict: Optional[bool] = None,
470
+ **kwargs,
468
471
  ) -> Union[tuple, BaseModelOutputWithPooling]:
469
472
  r"""
470
473
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -600,6 +603,7 @@ class LayoutLMForMaskedLM(LayoutLMPreTrainedModel):
600
603
  output_attentions: Optional[bool] = None,
601
604
  output_hidden_states: Optional[bool] = None,
602
605
  return_dict: Optional[bool] = None,
606
+ **kwargs,
603
607
  ) -> Union[tuple, MaskedLMOutput]:
604
608
  r"""
605
609
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -716,6 +720,7 @@ class LayoutLMForSequenceClassification(LayoutLMPreTrainedModel):
716
720
  output_attentions: Optional[bool] = None,
717
721
  output_hidden_states: Optional[bool] = None,
718
722
  return_dict: Optional[bool] = None,
723
+ **kwargs,
719
724
  ) -> Union[tuple, SequenceClassifierOutput]:
720
725
  r"""
721
726
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -850,6 +855,7 @@ class LayoutLMForTokenClassification(LayoutLMPreTrainedModel):
850
855
  output_attentions: Optional[bool] = None,
851
856
  output_hidden_states: Optional[bool] = None,
852
857
  return_dict: Optional[bool] = None,
858
+ **kwargs,
853
859
  ) -> Union[tuple, TokenClassifierOutput]:
854
860
  r"""
855
861
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -963,6 +969,7 @@ class LayoutLMForQuestionAnswering(LayoutLMPreTrainedModel):
963
969
  output_attentions: Optional[bool] = None,
964
970
  output_hidden_states: Optional[bool] = None,
965
971
  return_dict: Optional[bool] = None,
972
+ **kwargs,
966
973
  ) -> Union[tuple, QuestionAnsweringModelOutput]:
967
974
  r"""
968
975
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -101,7 +101,6 @@ class LayoutLMv2ImageProcessorFast(BaseImageProcessorFast):
101
101
  processed_images_grouped[shape] = stacked_images
102
102
 
103
103
  processed_images = reorder_images(processed_images_grouped, grouped_images_index)
104
- processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images
105
104
 
106
105
  data = BatchFeature(data={"pixel_values": processed_images}, tensor_type=return_tensors)
107
106
 
@@ -467,9 +467,21 @@ class LayoutLMv2PreTrainedModel(PreTrainedModel):
467
467
  if self.config.fast_qkv:
468
468
  init.zeros_(module.q_bias)
469
469
  init.zeros_(module.v_bias)
470
+ elif isinstance(module, LayoutLMv2Embeddings):
471
+ init.copy_(module.position_ids, torch.arange(module.position_ids.shape[-1]).expand((1, -1)))
472
+ elif isinstance(module, LayoutLMv2VisualBackbone):
473
+ num_channels = len(module.cfg.MODEL.PIXEL_MEAN)
474
+ init.copy_(module.pixel_mean, torch.Tensor(module.cfg.MODEL.PIXEL_MEAN).view(num_channels, 1, 1))
475
+ init.copy_(module.pixel_std, torch.Tensor(module.cfg.MODEL.PIXEL_STD).view(num_channels, 1, 1))
470
476
  elif isinstance(module, LayoutLMv2Model):
471
477
  if hasattr(module, "visual_segment_embedding"):
472
478
  init.normal_(module.visual_segment_embedding, mean=0.0, std=self.config.initializer_range)
479
+ # We check the existence of each one since detectron2 seems to do weird things
480
+ elif isinstance(module, detectron2.layers.FrozenBatchNorm2d):
481
+ init.ones_(module.weight)
482
+ init.zeros_(module.bias)
483
+ init.zeros_(module.running_mean)
484
+ init.constant_(module.running_var, 1.0 - module.eps)
473
485
 
474
486
 
475
487
  def my_convert_sync_batchnorm(module, process_group=None):
@@ -701,6 +713,7 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
701
713
  output_attentions: Optional[bool] = None,
702
714
  output_hidden_states: Optional[bool] = None,
703
715
  return_dict: Optional[bool] = None,
716
+ **kwargs,
704
717
  ) -> Union[tuple, BaseModelOutputWithPooling]:
705
718
  r"""
706
719
  bbox (`torch.LongTensor` of shape `((batch_size, sequence_length), 4)`, *optional*):
@@ -858,6 +871,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
858
871
  output_attentions: Optional[bool] = None,
859
872
  output_hidden_states: Optional[bool] = None,
860
873
  return_dict: Optional[bool] = None,
874
+ **kwargs,
861
875
  ) -> Union[tuple, SequenceClassifierOutput]:
862
876
  r"""
863
877
  input_ids (`torch.LongTensor` of shape `batch_size, sequence_length`):
@@ -1061,6 +1075,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
1061
1075
  output_attentions: Optional[bool] = None,
1062
1076
  output_hidden_states: Optional[bool] = None,
1063
1077
  return_dict: Optional[bool] = None,
1078
+ **kwargs,
1064
1079
  ) -> Union[tuple, TokenClassifierOutput]:
1065
1080
  r"""
1066
1081
  input_ids (`torch.LongTensor` of shape `batch_size, sequence_length`):
@@ -1212,6 +1227,7 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
1212
1227
  output_attentions: Optional[bool] = None,
1213
1228
  output_hidden_states: Optional[bool] = None,
1214
1229
  return_dict: Optional[bool] = None,
1230
+ **kwargs,
1215
1231
  ) -> Union[tuple, QuestionAnsweringModelOutput]:
1216
1232
  r"""
1217
1233
  input_ids (`torch.LongTensor` of shape `batch_size, sequence_length`):
@@ -159,22 +159,12 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
159
159
  """
160
160
 
161
161
  vocab_files_names = VOCAB_FILES_NAMES
162
- slow_tokenizer_class = None
163
-
164
- @staticmethod
165
- def _load_vocab_from_file(vocab_file):
166
- """Load vocab from a BERT-style vocab file (one token per line)."""
167
- vocab = {}
168
- with open(vocab_file, "r", encoding="utf-8") as reader:
169
- for index, line in enumerate(reader):
170
- token = line.rstrip("\n")
171
- vocab[token] = index
172
- return vocab
162
+ model = models.WordPiece
163
+ model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
173
164
 
174
165
  def __init__(
175
166
  self,
176
- vocab=None,
177
- vocab_file=None,
167
+ vocab: Optional[Union[str, dict[str, int]]] = None,
178
168
  do_lower_case=True,
179
169
  unk_token="[UNK]",
180
170
  sep_token="[SEP]",
@@ -190,21 +180,12 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
190
180
  strip_accents=None,
191
181
  **kwargs,
192
182
  ):
193
- self.vocab_file = vocab_file
194
183
  self.do_lower_case = do_lower_case
195
184
 
196
- # Build vocab for WordPiece
197
185
  if vocab is not None:
198
- if isinstance(vocab, dict):
199
- _vocab = vocab
200
- else:
201
- raise ValueError("vocab must be a dict mapping tokens to ids")
202
- elif vocab_file is not None:
203
- # Load vocab from file (BERT format: one token per line)
204
- _vocab = self._load_vocab_from_file(vocab_file)
186
+ self._vocab = vocab
205
187
  else:
206
- # Initialize with at least the special tokens for WordPiece
207
- _vocab = {
188
+ self._vocab = {
208
189
  str(pad_token): 0,
209
190
  str(unk_token): 1,
210
191
  str(cls_token): 2,
@@ -212,10 +193,7 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
212
193
  str(mask_token): 4,
213
194
  }
214
195
 
215
- # Initialize WordPiece tokenizer
216
- self._tokenizer = Tokenizer(models.WordPiece(vocab=_vocab, unk_token=str(unk_token)))
217
-
218
- # Set normalizer
196
+ self._tokenizer = Tokenizer(models.WordPiece(vocab=self._vocab, unk_token=str(unk_token)))
219
197
  self._tokenizer.normalizer = normalizers.BertNormalizer(
220
198
  clean_text=True,
221
199
  handle_chinese_chars=tokenize_chinese_chars,
@@ -223,27 +201,9 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
223
201
  lowercase=do_lower_case,
224
202
  )
225
203
 
226
- # Set pre_tokenizer
227
204
  self._tokenizer.pre_tokenizer = pre_tokenizers.BertPreTokenizer()
228
-
229
- # Set decoder
230
205
  self._tokenizer.decoder = decoders.WordPiece(prefix="##")
231
-
232
- # Set post_processor (will be set after super().__init__ when we have token IDs)
233
- # Temporarily set to None, will be configured after parent init
234
- self._tokenizer.post_processor = None
235
-
236
- tokenizer_object = self._tokenizer
237
-
238
- # additional properties
239
- self.cls_token_box = cls_token_box
240
- self.sep_token_box = sep_token_box
241
- self.pad_token_box = pad_token_box
242
- self.pad_token_label = pad_token_label
243
- self.only_label_first_subword = only_label_first_subword
244
-
245
206
  super().__init__(
246
- tokenizer_object=tokenizer_object,
247
207
  do_lower_case=do_lower_case,
248
208
  unk_token=unk_token,
249
209
  sep_token=sep_token,
@@ -260,6 +220,11 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
260
220
  **kwargs,
261
221
  )
262
222
 
223
+ self.cls_token_box = cls_token_box
224
+ self.sep_token_box = sep_token_box
225
+ self.pad_token_box = pad_token_box
226
+ self.pad_token_label = pad_token_label
227
+
263
228
  # Now set post_processor with actual token IDs
264
229
  cls = str(self.cls_token)
265
230
  sep = str(self.sep_token)
@@ -275,13 +240,6 @@ class LayoutLMv2Tokenizer(TokenizersBackend):
275
240
  ],
276
241
  )
277
242
 
278
- # additional properties
279
- self.cls_token_box = cls_token_box
280
- self.sep_token_box = sep_token_box
281
- self.pad_token_box = pad_token_box
282
- self.pad_token_label = pad_token_label
283
- self.only_label_first_subword = only_label_first_subword
284
-
285
243
  @add_end_docstrings(LAYOUTLMV2_ENCODE_KWARGS_DOCSTRING, LAYOUTLMV2_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
286
244
  def __call__(
287
245
  self,
@@ -115,7 +115,6 @@ class LayoutLMv3ImageProcessorFast(BaseImageProcessorFast):
115
115
  processed_images_grouped[shape] = stacked_images
116
116
 
117
117
  processed_images = reorder_images(processed_images_grouped, grouped_images_index)
118
- processed_images = torch.stack(processed_images, dim=0) if return_tensors else processed_images
119
118
 
120
119
  data = BatchFeature(data={"pixel_values": processed_images}, tensor_type=return_tensors)
121
120
 
@@ -212,6 +212,10 @@ class LayoutLMv3PreTrainedModel(PreTrainedModel):
212
212
  if self.config.visual_embed:
213
213
  init.zeros_(module.cls_token)
214
214
  init.zeros_(module.pos_embed)
215
+ if hasattr(module, "visual_bbox"):
216
+ init.copy_(module.visual_bbox, module.create_visual_bbox(image_size=(module.size, module.size)))
217
+ elif isinstance(module, LayoutLMv3TextEmbeddings):
218
+ init.copy_(module.position_ids, torch.arange(module.position_ids.shape[-1]).expand((1, -1)))
215
219
 
216
220
 
217
221
  class LayoutLMv3SelfAttention(nn.Module):
@@ -576,16 +580,18 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
576
580
  # when the input_size is larger in fine-tuning, we will interpolate the position embeddings in forward
577
581
  self.patch_embed = LayoutLMv3PatchEmbeddings(config)
578
582
 
579
- size = int(config.input_size / config.patch_size)
583
+ self.size = int(config.input_size / config.patch_size)
580
584
  self.cls_token = nn.Parameter(torch.zeros(1, 1, config.hidden_size))
581
- self.pos_embed = nn.Parameter(torch.zeros(1, size * size + 1, config.hidden_size))
585
+ self.pos_embed = nn.Parameter(torch.zeros(1, self.size * self.size + 1, config.hidden_size))
582
586
  self.pos_drop = nn.Dropout(p=0.0)
583
587
 
584
588
  self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
585
589
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
586
590
 
587
591
  if self.config.has_relative_attention_bias or self.config.has_spatial_attention_bias:
588
- self.init_visual_bbox(image_size=(size, size))
592
+ self.register_buffer(
593
+ "visual_bbox", self.create_visual_bbox(image_size=(self.size, self.size)), persistent=False
594
+ )
589
595
 
590
596
  self.norm = nn.LayerNorm(config.hidden_size, eps=1e-6)
591
597
 
@@ -599,7 +605,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
599
605
  def set_input_embeddings(self, value):
600
606
  self.embeddings.word_embeddings = value
601
607
 
602
- def init_visual_bbox(self, image_size=(14, 14), max_len=1000):
608
+ def create_visual_bbox(self, image_size=(14, 14), max_len=1000):
603
609
  """
604
610
  Create the bounding boxes for the visual (patch) tokens.
605
611
  """
@@ -620,7 +626,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
620
626
  ).view(-1, 4)
621
627
 
622
628
  cls_token_box = torch.tensor([[0 + 1, 0 + 1, max_len - 1, max_len - 1]])
623
- self.visual_bbox = torch.cat([cls_token_box, visual_bbox], dim=0)
629
+ return torch.cat([cls_token_box, visual_bbox], dim=0)
624
630
 
625
631
  def calculate_visual_bbox(self, device, dtype, batch_size):
626
632
  visual_bbox = self.visual_bbox.repeat(batch_size, 1, 1)
@@ -657,6 +663,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
657
663
  output_attentions: Optional[bool] = None,
658
664
  output_hidden_states: Optional[bool] = None,
659
665
  return_dict: Optional[bool] = None,
666
+ **kwargs,
660
667
  ) -> Union[tuple, BaseModelOutput]:
661
668
  r"""
662
669
  input_ids (`torch.LongTensor` of shape `(batch_size, token_sequence_length)`):
@@ -883,6 +890,12 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
883
890
 
884
891
  self.post_init()
885
892
 
893
+ def get_input_embeddings(self):
894
+ return self.layoutlmv3.get_input_embeddings()
895
+
896
+ def set_input_embeddings(self, value):
897
+ self.layoutlmv3.set_input_embeddings(value)
898
+
886
899
  @auto_docstring
887
900
  def forward(
888
901
  self,
@@ -897,6 +910,7 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
897
910
  output_hidden_states: Optional[bool] = None,
898
911
  return_dict: Optional[bool] = None,
899
912
  pixel_values: Optional[torch.LongTensor] = None,
913
+ **kwargs,
900
914
  ) -> Union[tuple, TokenClassifierOutput]:
901
915
  r"""
902
916
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -982,6 +996,12 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
982
996
 
983
997
  self.post_init()
984
998
 
999
+ def get_input_embeddings(self):
1000
+ return self.layoutlmv3.get_input_embeddings()
1001
+
1002
+ def set_input_embeddings(self, value):
1003
+ self.layoutlmv3.set_input_embeddings(value)
1004
+
985
1005
  @auto_docstring
986
1006
  def forward(
987
1007
  self,
@@ -997,6 +1017,7 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
997
1017
  return_dict: Optional[bool] = None,
998
1018
  bbox: Optional[torch.LongTensor] = None,
999
1019
  pixel_values: Optional[torch.LongTensor] = None,
1020
+ **kwargs,
1000
1021
  ) -> Union[tuple, QuestionAnsweringModelOutput]:
1001
1022
  r"""
1002
1023
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -1101,6 +1122,12 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
1101
1122
 
1102
1123
  self.post_init()
1103
1124
 
1125
+ def get_input_embeddings(self):
1126
+ return self.layoutlmv3.get_input_embeddings()
1127
+
1128
+ def set_input_embeddings(self, value):
1129
+ self.layoutlmv3.set_input_embeddings(value)
1130
+
1104
1131
  @auto_docstring
1105
1132
  def forward(
1106
1133
  self,
@@ -1115,6 +1142,7 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
1115
1142
  return_dict: Optional[bool] = None,
1116
1143
  bbox: Optional[torch.LongTensor] = None,
1117
1144
  pixel_values: Optional[torch.LongTensor] = None,
1145
+ **kwargs,
1118
1146
  ) -> Union[tuple, SequenceClassifierOutput]:
1119
1147
  r"""
1120
1148
  bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
@@ -14,7 +14,6 @@
14
14
  # limitations under the License.
15
15
  """Tokenization class for LayoutLMv3. Same as LayoutLMv2, but RoBERTa-like BPE tokenization instead of WordPiece."""
16
16
 
17
- import json
18
17
  from typing import Optional, Union
19
18
 
20
19
  from tokenizers import Tokenizer, decoders, models, pre_tokenizers, processors
@@ -159,15 +158,16 @@ class LayoutLMv3Tokenizer(TokenizersBackend):
159
158
  CrossEntropyLoss.
160
159
  only_label_first_subword (`bool`, *optional*, defaults to `True`):
161
160
  Whether or not to only label the first subword, in case word labels are provided.
162
- vocab (`dict`, *optional*):
163
- Custom vocabulary dictionary. If not provided, vocabulary is loaded from vocab_file when using from_pretrained.
164
- merges (`list`, *optional*):
165
- Custom merges list. If not provided, merges are loaded from merges_file when using from_pretrained.
161
+ vocab (`str` or `dict[str, int]`, *optional*):
162
+ Custom vocabulary dictionary. If not provided, vocabulary is loaded from `vocab_file` when using
163
+ `from_pretrained`.
164
+ merges (`str` or `list[str]`, *optional*):
165
+ Custom merges list. If not provided, merges are loaded from `merges_file` when using `from_pretrained`.
166
166
  """
167
167
 
168
168
  vocab_files_names = VOCAB_FILES_NAMES
169
169
  model_input_names = ["input_ids", "attention_mask", "bbox"]
170
- slow_tokenizer_class = None
170
+ model = models.BPE
171
171
 
172
172
  def __init__(
173
173
  self,
@@ -185,69 +185,26 @@ class LayoutLMv3Tokenizer(TokenizersBackend):
185
185
  pad_token_box=[0, 0, 0, 0],
186
186
  pad_token_label=-100,
187
187
  only_label_first_subword=True,
188
- vocab: Optional[dict] = None,
189
- merges: Optional[list] = None,
190
- vocab_file: Optional[str] = None,
191
- merges_file: Optional[str] = None,
188
+ vocab: Optional[Union[str, dict[str, int]]] = None,
189
+ merges: Optional[Union[str, list[str]]] = None,
192
190
  **kwargs,
193
191
  ):
194
192
  self.add_prefix_space = add_prefix_space
195
-
196
- # Build vocab and merges for BPE
197
- # Priority: 1) vocab/merges dicts/lists, 2) vocab_file/merges_file paths, 3) empty
198
- if vocab is not None:
199
- _vocab = vocab
200
- elif vocab_file is not None:
201
- with open(vocab_file, encoding="utf-8") as f:
202
- _vocab = json.load(f)
203
- else:
204
- _vocab = {}
205
-
206
- if merges is not None:
207
- _merges = merges
208
- elif merges_file is not None:
209
- _merges = []
210
- with open(merges_file, encoding="utf-8") as f:
211
- for line in f:
212
- line = line.strip()
213
- if line and not line.startswith("#"):
214
- _merges.append(tuple(line.split()))
215
- else:
216
- _merges = []
217
-
218
- # Initialize BPE tokenizer
193
+ self._vocab = vocab or {}
194
+ self._merges = merges or []
219
195
  self._tokenizer = Tokenizer(
220
196
  models.BPE(
221
- vocab=_vocab,
222
- merges=_merges,
197
+ vocab=self._vocab,
198
+ merges=self._merges,
223
199
  dropout=None,
224
200
  continuing_subword_prefix="",
225
201
  end_of_word_suffix="",
226
202
  fuse_unk=False,
227
203
  )
228
204
  )
229
-
230
- # Set pre_tokenizer (ByteLevel)
231
205
  self._tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=add_prefix_space)
232
-
233
- # Set decoder
234
206
  self._tokenizer.decoder = decoders.ByteLevel()
235
-
236
- # Set post_processor (will be set after super().__init__ when we have token IDs)
237
- # Temporarily set to None, will be configured after parent init
238
- self._tokenizer.post_processor = None
239
-
240
- tokenizer_object = self._tokenizer
241
-
242
- # additional properties
243
- self.cls_token_box = cls_token_box
244
- self.sep_token_box = sep_token_box
245
- self.pad_token_box = pad_token_box
246
- self.pad_token_label = pad_token_label
247
- self.only_label_first_subword = only_label_first_subword
248
-
249
207
  super().__init__(
250
- tokenizer_object=tokenizer_object,
251
208
  errors=errors,
252
209
  bos_token=bos_token,
253
210
  eos_token=eos_token,
@@ -277,18 +234,12 @@ class LayoutLMv3Tokenizer(TokenizersBackend):
277
234
  add_prefix_space=add_prefix_space,
278
235
  trim_offsets=True,
279
236
  )
280
-
281
- # additional properties
282
237
  self.cls_token_box = cls_token_box
283
238
  self.sep_token_box = sep_token_box
284
239
  self.pad_token_box = pad_token_box
285
240
  self.pad_token_label = pad_token_label
286
241
  self.only_label_first_subword = only_label_first_subword
287
242
 
288
- # Call _post_init for tokenizers created directly (not from_pretrained)
289
- # For from_pretrained, this will be called again after loading the tokenizer from file
290
- self._post_init()
291
-
292
243
  @add_end_docstrings(LAYOUTLMV3_ENCODE_KWARGS_DOCSTRING, LAYOUTLMV3_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
293
244
  def __call__(
294
245
  self,