transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. transformers/__init__.py +30 -3
  2. transformers/cli/serve.py +47 -17
  3. transformers/conversion_mapping.py +15 -2
  4. transformers/convert_slow_tokenizer.py +225 -10
  5. transformers/core_model_loading.py +196 -135
  6. transformers/data/data_collator.py +12 -4
  7. transformers/dependency_versions_table.py +1 -2
  8. transformers/dynamic_module_utils.py +1 -2
  9. transformers/feature_extraction_utils.py +1 -2
  10. transformers/file_utils.py +0 -1
  11. transformers/generation/__init__.py +11 -1
  12. transformers/generation/configuration_utils.py +3 -2
  13. transformers/generation/continuous_batching/__init__.py +4 -0
  14. transformers/generation/continuous_batching/continuous_api.py +134 -79
  15. transformers/image_processing_base.py +1 -2
  16. transformers/integrations/__init__.py +4 -2
  17. transformers/integrations/accelerate.py +15 -3
  18. transformers/integrations/aqlm.py +38 -66
  19. transformers/integrations/awq.py +48 -514
  20. transformers/integrations/bitnet.py +45 -100
  21. transformers/integrations/bitsandbytes.py +79 -191
  22. transformers/integrations/deepspeed.py +1 -0
  23. transformers/integrations/eetq.py +84 -79
  24. transformers/integrations/fbgemm_fp8.py +191 -145
  25. transformers/integrations/finegrained_fp8.py +236 -193
  26. transformers/integrations/fp_quant.py +92 -0
  27. transformers/integrations/ggml.py +11 -1
  28. transformers/integrations/higgs.py +40 -62
  29. transformers/integrations/hub_kernels.py +42 -3
  30. transformers/integrations/integration_utils.py +10 -0
  31. transformers/integrations/mxfp4.py +25 -65
  32. transformers/integrations/peft.py +7 -29
  33. transformers/integrations/quanto.py +73 -55
  34. transformers/integrations/quark.py +55 -0
  35. transformers/integrations/spqr.py +44 -90
  36. transformers/integrations/torchao.py +32 -38
  37. transformers/integrations/vptq.py +42 -59
  38. transformers/modelcard.py +1 -2
  39. transformers/modeling_gguf_pytorch_utils.py +8 -0
  40. transformers/modeling_rope_utils.py +30 -6
  41. transformers/modeling_utils.py +116 -112
  42. transformers/models/__init__.py +3 -0
  43. transformers/models/afmoe/modeling_afmoe.py +4 -4
  44. transformers/models/albert/tokenization_albert.py +6 -12
  45. transformers/models/align/modeling_align.py +2 -0
  46. transformers/models/altclip/modeling_altclip.py +4 -0
  47. transformers/models/apertus/modeling_apertus.py +4 -4
  48. transformers/models/arcee/modeling_arcee.py +4 -4
  49. transformers/models/aria/modeling_aria.py +4 -4
  50. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  51. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  52. transformers/models/auto/configuration_auto.py +11 -0
  53. transformers/models/auto/feature_extraction_auto.py +2 -0
  54. transformers/models/auto/image_processing_auto.py +1 -0
  55. transformers/models/auto/modeling_auto.py +6 -0
  56. transformers/models/auto/processing_auto.py +18 -10
  57. transformers/models/auto/tokenization_auto.py +74 -472
  58. transformers/models/autoformer/modeling_autoformer.py +4 -0
  59. transformers/models/bamba/modeling_bamba.py +4 -3
  60. transformers/models/bark/modeling_bark.py +2 -0
  61. transformers/models/bart/modeling_bart.py +7 -0
  62. transformers/models/barthez/tokenization_barthez.py +5 -10
  63. transformers/models/beit/modeling_beit.py +6 -1
  64. transformers/models/bert/tokenization_bert.py +8 -21
  65. transformers/models/big_bird/modeling_big_bird.py +6 -0
  66. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  67. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
  68. transformers/models/biogpt/modeling_biogpt.py +2 -0
  69. transformers/models/biogpt/modular_biogpt.py +2 -0
  70. transformers/models/bit/modeling_bit.py +11 -2
  71. transformers/models/bitnet/modeling_bitnet.py +4 -4
  72. transformers/models/blenderbot/modeling_blenderbot.py +5 -0
  73. transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
  74. transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
  75. transformers/models/blip/modeling_blip_text.py +2 -0
  76. transformers/models/blip_2/modeling_blip_2.py +2 -1
  77. transformers/models/bloom/modeling_bloom.py +4 -0
  78. transformers/models/blt/modeling_blt.py +2 -2
  79. transformers/models/blt/modular_blt.py +2 -2
  80. transformers/models/bridgetower/modeling_bridgetower.py +5 -1
  81. transformers/models/bros/modeling_bros.py +4 -0
  82. transformers/models/camembert/tokenization_camembert.py +8 -12
  83. transformers/models/canine/modeling_canine.py +5 -0
  84. transformers/models/chameleon/modeling_chameleon.py +2 -1
  85. transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
  86. transformers/models/clap/modeling_clap.py +5 -0
  87. transformers/models/clip/tokenization_clip.py +22 -44
  88. transformers/models/clipseg/modeling_clipseg.py +5 -0
  89. transformers/models/clvp/modeling_clvp.py +5 -0
  90. transformers/models/clvp/tokenization_clvp.py +1 -63
  91. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  92. transformers/models/codegen/tokenization_codegen.py +14 -43
  93. transformers/models/cohere/modeling_cohere.py +4 -3
  94. transformers/models/cohere/modular_cohere.py +2 -1
  95. transformers/models/cohere/tokenization_cohere.py +12 -42
  96. transformers/models/cohere2/modeling_cohere2.py +7 -6
  97. transformers/models/cohere2/modular_cohere2.py +5 -5
  98. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
  99. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  100. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  101. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  102. transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
  103. transformers/models/convbert/modeling_convbert.py +6 -0
  104. transformers/models/convnext/modeling_convnext.py +2 -4
  105. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  106. transformers/models/csm/modeling_csm.py +4 -3
  107. transformers/models/ctrl/modeling_ctrl.py +1 -0
  108. transformers/models/cvt/modeling_cvt.py +2 -0
  109. transformers/models/cwm/modeling_cwm.py +4 -4
  110. transformers/models/d_fine/modeling_d_fine.py +2 -0
  111. transformers/models/d_fine/modular_d_fine.py +1 -0
  112. transformers/models/dab_detr/modeling_dab_detr.py +4 -0
  113. transformers/models/dac/modeling_dac.py +2 -2
  114. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  115. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  116. transformers/models/dbrx/modeling_dbrx.py +2 -2
  117. transformers/models/deberta/modeling_deberta.py +5 -0
  118. transformers/models/deberta/tokenization_deberta.py +11 -20
  119. transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
  120. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  121. transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
  122. transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
  123. transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
  124. transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
  125. transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
  126. transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
  127. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  128. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  129. transformers/models/detr/modeling_detr.py +5 -0
  130. transformers/models/dia/modeling_dia.py +4 -3
  131. transformers/models/dia/modular_dia.py +0 -1
  132. transformers/models/diffllama/modeling_diffllama.py +2 -2
  133. transformers/models/dinat/modeling_dinat.py +3 -0
  134. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  135. transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
  136. transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
  137. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  138. transformers/models/doge/modeling_doge.py +2 -3
  139. transformers/models/doge/modular_doge.py +0 -1
  140. transformers/models/donut/modeling_donut_swin.py +2 -0
  141. transformers/models/dots1/modeling_dots1.py +10 -7
  142. transformers/models/dots1/modular_dots1.py +5 -3
  143. transformers/models/dpr/modeling_dpr.py +5 -0
  144. transformers/models/dpr/tokenization_dpr.py +12 -0
  145. transformers/models/edgetam/modeling_edgetam.py +1 -1
  146. transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
  147. transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
  148. transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
  149. transformers/models/efficientnet/modeling_efficientnet.py +2 -0
  150. transformers/models/emu3/modeling_emu3.py +4 -4
  151. transformers/models/eomt/image_processing_eomt.py +13 -1
  152. transformers/models/eomt/image_processing_eomt_fast.py +14 -2
  153. transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
  154. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  155. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
  156. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
  157. transformers/models/esm/modeling_esmfold.py +5 -4
  158. transformers/models/evolla/modeling_evolla.py +4 -4
  159. transformers/models/exaone4/modeling_exaone4.py +2 -2
  160. transformers/models/exaone4/modular_exaone4.py +0 -1
  161. transformers/models/falcon/modeling_falcon.py +6 -1
  162. transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
  163. transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
  164. transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
  165. transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
  166. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  167. transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
  168. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  169. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
  170. transformers/models/flaubert/modeling_flaubert.py +7 -0
  171. transformers/models/flava/modeling_flava.py +6 -1
  172. transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
  173. transformers/models/florence2/modeling_florence2.py +2 -1
  174. transformers/models/florence2/modular_florence2.py +2 -1
  175. transformers/models/fnet/modeling_fnet.py +7 -0
  176. transformers/models/focalnet/modeling_focalnet.py +4 -0
  177. transformers/models/fsmt/modeling_fsmt.py +2 -0
  178. transformers/models/funnel/modeling_funnel.py +8 -0
  179. transformers/models/funnel/tokenization_funnel.py +17 -24
  180. transformers/models/fuyu/processing_fuyu.py +3 -3
  181. transformers/models/gemma/modeling_gemma.py +4 -4
  182. transformers/models/gemma/tokenization_gemma.py +10 -27
  183. transformers/models/gemma2/modeling_gemma2.py +4 -4
  184. transformers/models/gemma2/modular_gemma2.py +2 -1
  185. transformers/models/gemma3/modeling_gemma3.py +14 -84
  186. transformers/models/gemma3/modular_gemma3.py +12 -81
  187. transformers/models/gemma3n/modeling_gemma3n.py +18 -209
  188. transformers/models/gemma3n/modular_gemma3n.py +17 -59
  189. transformers/models/git/modeling_git.py +2 -0
  190. transformers/models/glm/modeling_glm.py +4 -4
  191. transformers/models/glm4/modeling_glm4.py +4 -4
  192. transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
  193. transformers/models/glm4v/configuration_glm4v.py +3 -1
  194. transformers/models/glm4v/modeling_glm4v.py +3 -3
  195. transformers/models/glm4v/modular_glm4v.py +6 -4
  196. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  197. transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
  198. transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
  199. transformers/models/glpn/modeling_glpn.py +2 -0
  200. transformers/models/gpt2/modeling_gpt2.py +5 -1
  201. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  202. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
  203. transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
  204. transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
  205. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  206. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  207. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
  208. transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
  209. transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
  210. transformers/models/gptj/modeling_gptj.py +3 -0
  211. transformers/models/granite/modeling_granite.py +4 -4
  212. transformers/models/granitemoe/modeling_granitemoe.py +4 -6
  213. transformers/models/granitemoe/modular_granitemoe.py +0 -2
  214. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
  215. transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
  216. transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
  217. transformers/models/groupvit/modeling_groupvit.py +3 -0
  218. transformers/models/helium/modeling_helium.py +4 -3
  219. transformers/models/herbert/tokenization_herbert.py +9 -25
  220. transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
  221. transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
  222. transformers/models/hiera/modeling_hiera.py +4 -0
  223. transformers/models/hubert/modeling_hubert.py +3 -0
  224. transformers/models/hubert/modular_hubert.py +1 -0
  225. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
  226. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
  227. transformers/models/ibert/modeling_ibert.py +6 -0
  228. transformers/models/idefics/modeling_idefics.py +5 -21
  229. transformers/models/imagegpt/modeling_imagegpt.py +2 -1
  230. transformers/models/informer/modeling_informer.py +4 -0
  231. transformers/models/informer/modular_informer.py +1 -0
  232. transformers/models/internvl/modeling_internvl.py +2 -4
  233. transformers/models/internvl/modular_internvl.py +2 -4
  234. transformers/models/jamba/modeling_jamba.py +2 -2
  235. transformers/models/janus/modeling_janus.py +1 -0
  236. transformers/models/janus/modular_janus.py +1 -0
  237. transformers/models/jetmoe/modeling_jetmoe.py +2 -2
  238. transformers/models/kosmos2/modeling_kosmos2.py +1 -0
  239. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
  240. transformers/models/lasr/__init__.py +29 -0
  241. transformers/models/lasr/configuration_lasr.py +244 -0
  242. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  243. transformers/models/lasr/modeling_lasr.py +729 -0
  244. transformers/models/lasr/modular_lasr.py +569 -0
  245. transformers/models/lasr/processing_lasr.py +96 -0
  246. transformers/models/lasr/tokenization_lasr.py +186 -0
  247. transformers/models/layoutlm/modeling_layoutlm.py +5 -0
  248. transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
  249. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
  250. transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
  251. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  252. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  253. transformers/models/led/modeling_led.py +6 -0
  254. transformers/models/levit/modeling_levit.py +3 -0
  255. transformers/models/lfm2/modeling_lfm2.py +4 -5
  256. transformers/models/lfm2/modular_lfm2.py +0 -1
  257. transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
  258. transformers/models/lightglue/modeling_lightglue.py +3 -1
  259. transformers/models/lightglue/modular_lightglue.py +1 -0
  260. transformers/models/lilt/modeling_lilt.py +4 -0
  261. transformers/models/llama/modeling_llama.py +4 -4
  262. transformers/models/llama/tokenization_llama.py +15 -43
  263. transformers/models/llama4/modeling_llama4.py +3 -2
  264. transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
  265. transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
  266. transformers/models/longformer/modeling_longformer.py +6 -0
  267. transformers/models/longt5/modeling_longt5.py +4 -0
  268. transformers/models/luke/modeling_luke.py +9 -0
  269. transformers/models/luke/tokenization_luke.py +11 -38
  270. transformers/models/lxmert/modeling_lxmert.py +2 -0
  271. transformers/models/m2m_100/modeling_m2m_100.py +4 -0
  272. transformers/models/mamba/modeling_mamba.py +14 -22
  273. transformers/models/marian/modeling_marian.py +5 -0
  274. transformers/models/markuplm/modeling_markuplm.py +4 -0
  275. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  276. transformers/models/mask2former/modeling_mask2former.py +2 -0
  277. transformers/models/maskformer/modeling_maskformer.py +2 -0
  278. transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
  279. transformers/models/mbart/modeling_mbart.py +7 -0
  280. transformers/models/mbart/tokenization_mbart.py +11 -52
  281. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  282. transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
  283. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  284. transformers/models/mimi/modeling_mimi.py +3 -1
  285. transformers/models/minimax/modeling_minimax.py +4 -4
  286. transformers/models/ministral/modeling_ministral.py +4 -4
  287. transformers/models/ministral3/configuration_ministral3.py +1 -1
  288. transformers/models/ministral3/modeling_ministral3.py +4 -3
  289. transformers/models/mistral/modeling_mistral.py +4 -3
  290. transformers/models/mixtral/modeling_mixtral.py +4 -4
  291. transformers/models/mllama/modeling_mllama.py +2 -2
  292. transformers/models/mluke/tokenization_mluke.py +6 -6
  293. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
  294. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  295. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  296. transformers/models/mobilevit/modeling_mobilevit.py +3 -0
  297. transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
  298. transformers/models/modernbert/modeling_modernbert.py +4 -1
  299. transformers/models/modernbert/modular_modernbert.py +2 -0
  300. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
  301. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
  302. transformers/models/moonshine/modeling_moonshine.py +4 -2
  303. transformers/models/moshi/modeling_moshi.py +5 -2
  304. transformers/models/mpnet/modeling_mpnet.py +5 -0
  305. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  306. transformers/models/mpt/modeling_mpt.py +2 -0
  307. transformers/models/mra/modeling_mra.py +6 -0
  308. transformers/models/mt5/modeling_mt5.py +7 -0
  309. transformers/models/musicgen/modeling_musicgen.py +2 -0
  310. transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
  311. transformers/models/mvp/modeling_mvp.py +7 -0
  312. transformers/models/nanochat/modeling_nanochat.py +4 -4
  313. transformers/models/nemotron/modeling_nemotron.py +4 -2
  314. transformers/models/nllb/tokenization_nllb.py +8 -22
  315. transformers/models/nougat/tokenization_nougat.py +11 -59
  316. transformers/models/nystromformer/modeling_nystromformer.py +6 -0
  317. transformers/models/olmo/modeling_olmo.py +4 -4
  318. transformers/models/olmo/modular_olmo.py +2 -2
  319. transformers/models/olmo2/modeling_olmo2.py +4 -5
  320. transformers/models/olmo2/modular_olmo2.py +0 -1
  321. transformers/models/olmo3/modeling_olmo3.py +4 -4
  322. transformers/models/olmoe/modeling_olmoe.py +4 -4
  323. transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
  324. transformers/models/oneformer/modeling_oneformer.py +4 -1
  325. transformers/models/openai/modeling_openai.py +3 -0
  326. transformers/models/openai/tokenization_openai.py +10 -46
  327. transformers/models/opt/modeling_opt.py +2 -0
  328. transformers/models/owlv2/modeling_owlv2.py +4 -0
  329. transformers/models/owlvit/modeling_owlvit.py +4 -0
  330. transformers/models/paddleocr_vl/__init__.py +32 -0
  331. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  332. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
  333. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  334. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
  335. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
  336. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  337. transformers/models/parakeet/configuration_parakeet.py +4 -6
  338. transformers/models/parakeet/modeling_parakeet.py +9 -6
  339. transformers/models/parakeet/modular_parakeet.py +2 -2
  340. transformers/models/parakeet/processing_parakeet.py +1 -0
  341. transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
  342. transformers/models/patchtst/modeling_patchtst.py +20 -2
  343. transformers/models/pegasus/modeling_pegasus.py +5 -0
  344. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  345. transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
  346. transformers/models/perceiver/modeling_perceiver.py +8 -0
  347. transformers/models/persimmon/modeling_persimmon.py +2 -1
  348. transformers/models/phi/modeling_phi.py +4 -5
  349. transformers/models/phi/modular_phi.py +0 -1
  350. transformers/models/phi3/modeling_phi3.py +2 -1
  351. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
  352. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
  353. transformers/models/phimoe/modeling_phimoe.py +4 -4
  354. transformers/models/phimoe/modular_phimoe.py +2 -2
  355. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  356. transformers/models/pixtral/modeling_pixtral.py +2 -1
  357. transformers/models/plbart/modeling_plbart.py +6 -0
  358. transformers/models/plbart/modular_plbart.py +2 -0
  359. transformers/models/plbart/tokenization_plbart.py +0 -2
  360. transformers/models/poolformer/modeling_poolformer.py +2 -0
  361. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  362. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  363. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  364. transformers/models/prophetnet/modeling_prophetnet.py +3 -0
  365. transformers/models/pvt/modeling_pvt.py +2 -0
  366. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  367. transformers/models/qwen2/modeling_qwen2.py +4 -4
  368. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  369. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  370. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
  371. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
  372. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  373. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
  374. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
  375. transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
  376. transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
  377. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  378. transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
  379. transformers/models/qwen3/modeling_qwen3.py +4 -4
  380. transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
  381. transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
  382. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
  383. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
  384. transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
  385. transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
  386. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
  387. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
  388. transformers/models/rag/modeling_rag.py +1 -0
  389. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
  390. transformers/models/reformer/modeling_reformer.py +4 -0
  391. transformers/models/reformer/tokenization_reformer.py +11 -28
  392. transformers/models/regnet/modeling_regnet.py +6 -1
  393. transformers/models/rembert/modeling_rembert.py +6 -0
  394. transformers/models/rembert/tokenization_rembert.py +3 -10
  395. transformers/models/resnet/modeling_resnet.py +11 -2
  396. transformers/models/roberta/tokenization_roberta.py +18 -27
  397. transformers/models/roformer/modeling_roformer.py +6 -0
  398. transformers/models/roformer/tokenization_roformer.py +77 -412
  399. transformers/models/rt_detr/modeling_rt_detr.py +2 -0
  400. transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
  401. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
  402. transformers/models/rwkv/modeling_rwkv.py +1 -0
  403. transformers/models/sam2/modeling_sam2.py +2 -2
  404. transformers/models/sam2/modular_sam2.py +2 -2
  405. transformers/models/sam2_video/modeling_sam2_video.py +1 -0
  406. transformers/models/sam2_video/modular_sam2_video.py +1 -0
  407. transformers/models/sam3/modeling_sam3.py +77 -80
  408. transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
  409. transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
  410. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
  411. transformers/models/sam3_video/modeling_sam3_video.py +1 -0
  412. transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
  413. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  414. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
  415. transformers/models/seed_oss/modeling_seed_oss.py +2 -2
  416. transformers/models/segformer/modeling_segformer.py +4 -1
  417. transformers/models/seggpt/modeling_seggpt.py +2 -0
  418. transformers/models/sew/modeling_sew.py +3 -0
  419. transformers/models/sew/modular_sew.py +1 -0
  420. transformers/models/sew_d/modeling_sew_d.py +3 -0
  421. transformers/models/siglip2/modeling_siglip2.py +4 -0
  422. transformers/models/siglip2/modular_siglip2.py +4 -0
  423. transformers/models/smollm3/modeling_smollm3.py +4 -4
  424. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  425. transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
  426. transformers/models/speecht5/modeling_speecht5.py +13 -1
  427. transformers/models/splinter/modeling_splinter.py +3 -0
  428. transformers/models/splinter/tokenization_splinter.py +9 -28
  429. transformers/models/squeezebert/modeling_squeezebert.py +6 -0
  430. transformers/models/stablelm/modeling_stablelm.py +3 -1
  431. transformers/models/starcoder2/modeling_starcoder2.py +4 -3
  432. transformers/models/superglue/modeling_superglue.py +1 -0
  433. transformers/models/superpoint/modeling_superpoint.py +1 -0
  434. transformers/models/swiftformer/modeling_swiftformer.py +2 -0
  435. transformers/models/swin/modeling_swin.py +4 -0
  436. transformers/models/swin2sr/modeling_swin2sr.py +2 -0
  437. transformers/models/swinv2/modeling_swinv2.py +4 -0
  438. transformers/models/t5/modeling_t5.py +7 -0
  439. transformers/models/t5/tokenization_t5.py +4 -8
  440. transformers/models/t5gemma/modeling_t5gemma.py +5 -5
  441. transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
  442. transformers/models/table_transformer/modeling_table_transformer.py +4 -0
  443. transformers/models/tapas/modeling_tapas.py +3 -0
  444. transformers/models/textnet/modeling_textnet.py +11 -2
  445. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  446. transformers/models/timesfm/modeling_timesfm.py +2 -0
  447. transformers/models/timesfm/modular_timesfm.py +2 -0
  448. transformers/models/timesformer/modeling_timesformer.py +2 -0
  449. transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
  450. transformers/models/trocr/modeling_trocr.py +2 -0
  451. transformers/models/tvp/modeling_tvp.py +2 -0
  452. transformers/models/udop/modeling_udop.py +4 -0
  453. transformers/models/udop/tokenization_udop.py +5 -13
  454. transformers/models/umt5/modeling_umt5.py +7 -0
  455. transformers/models/unispeech/modeling_unispeech.py +4 -0
  456. transformers/models/unispeech/modular_unispeech.py +2 -0
  457. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  458. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  459. transformers/models/univnet/modeling_univnet.py +1 -0
  460. transformers/models/upernet/modeling_upernet.py +1 -0
  461. transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
  462. transformers/models/vilt/modeling_vilt.py +6 -0
  463. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  464. transformers/models/visual_bert/modeling_visual_bert.py +6 -0
  465. transformers/models/vitdet/modeling_vitdet.py +2 -0
  466. transformers/models/vitmatte/modeling_vitmatte.py +1 -0
  467. transformers/models/vits/modeling_vits.py +1 -0
  468. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  469. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  470. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
  471. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
  472. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
  473. transformers/models/wavlm/modeling_wavlm.py +5 -0
  474. transformers/models/whisper/modeling_whisper.py +6 -0
  475. transformers/models/whisper/tokenization_whisper.py +4 -15
  476. transformers/models/x_clip/modeling_x_clip.py +3 -0
  477. transformers/models/xglm/modeling_xglm.py +1 -0
  478. transformers/models/xglm/tokenization_xglm.py +4 -9
  479. transformers/models/xlm/modeling_xlm.py +5 -0
  480. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  481. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  482. transformers/models/yoso/modeling_yoso.py +6 -0
  483. transformers/models/zamba/modeling_zamba.py +2 -0
  484. transformers/models/zamba2/modeling_zamba2.py +4 -2
  485. transformers/models/zamba2/modular_zamba2.py +1 -1
  486. transformers/models/zoedepth/modeling_zoedepth.py +1 -0
  487. transformers/pipelines/__init__.py +2 -3
  488. transformers/pipelines/base.py +1 -9
  489. transformers/pipelines/document_question_answering.py +3 -1
  490. transformers/pipelines/text_generation.py +1 -1
  491. transformers/processing_utils.py +23 -11
  492. transformers/quantizers/base.py +35 -110
  493. transformers/quantizers/quantizer_aqlm.py +1 -5
  494. transformers/quantizers/quantizer_auto_round.py +1 -2
  495. transformers/quantizers/quantizer_awq.py +17 -81
  496. transformers/quantizers/quantizer_bitnet.py +3 -8
  497. transformers/quantizers/quantizer_bnb_4bit.py +13 -110
  498. transformers/quantizers/quantizer_bnb_8bit.py +16 -92
  499. transformers/quantizers/quantizer_compressed_tensors.py +1 -5
  500. transformers/quantizers/quantizer_eetq.py +14 -62
  501. transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
  502. transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
  503. transformers/quantizers/quantizer_fp_quant.py +48 -78
  504. transformers/quantizers/quantizer_gptq.py +7 -24
  505. transformers/quantizers/quantizer_higgs.py +40 -54
  506. transformers/quantizers/quantizer_hqq.py +144 -153
  507. transformers/quantizers/quantizer_mxfp4.py +13 -167
  508. transformers/quantizers/quantizer_quanto.py +20 -64
  509. transformers/quantizers/quantizer_quark.py +36 -17
  510. transformers/quantizers/quantizer_spqr.py +1 -4
  511. transformers/quantizers/quantizer_torchao.py +23 -202
  512. transformers/quantizers/quantizer_vptq.py +8 -22
  513. transformers/quantizers/quantizers_utils.py +20 -0
  514. transformers/testing_utils.py +297 -36
  515. transformers/tokenization_mistral_common.py +4 -0
  516. transformers/tokenization_utils_base.py +113 -222
  517. transformers/tokenization_utils_tokenizers.py +168 -107
  518. transformers/trainer.py +28 -31
  519. transformers/trainer_jit_checkpoint.py +126 -0
  520. transformers/trainer_utils.py +1 -1
  521. transformers/training_args.py +66 -28
  522. transformers/utils/__init__.py +3 -4
  523. transformers/utils/auto_docstring.py +1 -0
  524. transformers/utils/generic.py +27 -1
  525. transformers/utils/hub.py +5 -15
  526. transformers/utils/import_utils.py +61 -16
  527. transformers/utils/kernel_config.py +4 -2
  528. transformers/utils/loading_report.py +19 -10
  529. transformers/utils/quantization_config.py +75 -242
  530. transformers/video_processing_utils.py +1 -2
  531. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
  532. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
  533. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
  534. transformers/kernels/__init__.py +0 -0
  535. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  536. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  537. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
  538. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
  539. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -1006,6 +1006,7 @@ class UniSpeechSatModel(UniSpeechSatPreTrainedModel):
1006
1006
  output_attentions: Optional[bool] = None,
1007
1007
  output_hidden_states: Optional[bool] = None,
1008
1008
  return_dict: Optional[bool] = None,
1009
+ **kwargs,
1009
1010
  ) -> Union[tuple, UniSpeechSatBaseModelOutput]:
1010
1011
  r"""
1011
1012
  mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1120,6 +1121,7 @@ class UniSpeechSatForPreTraining(UniSpeechSatPreTrainedModel):
1120
1121
  output_attentions: Optional[bool] = None,
1121
1122
  output_hidden_states: Optional[bool] = None,
1122
1123
  return_dict: Optional[bool] = None,
1124
+ **kwargs,
1123
1125
  ) -> Union[tuple, UniSpeechSatForPreTrainingOutput]:
1124
1126
  r"""
1125
1127
  Example:
@@ -1251,6 +1253,7 @@ class UniSpeechSatForCTC(UniSpeechSatPreTrainedModel):
1251
1253
  output_hidden_states: Optional[bool] = None,
1252
1254
  return_dict: Optional[bool] = None,
1253
1255
  labels: Optional[torch.Tensor] = None,
1256
+ **kwargs,
1254
1257
  ) -> Union[tuple, CausalLMOutput]:
1255
1258
  r"""
1256
1259
  labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*):
@@ -1362,6 +1365,7 @@ class UniSpeechSatForSequenceClassification(UniSpeechSatPreTrainedModel):
1362
1365
  output_hidden_states: Optional[bool] = None,
1363
1366
  return_dict: Optional[bool] = None,
1364
1367
  labels: Optional[torch.Tensor] = None,
1368
+ **kwargs,
1365
1369
  ) -> Union[tuple, SequenceClassifierOutput]:
1366
1370
  r"""
1367
1371
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -1465,6 +1469,7 @@ class UniSpeechSatForAudioFrameClassification(UniSpeechSatPreTrainedModel):
1465
1469
  output_attentions: Optional[bool] = None,
1466
1470
  output_hidden_states: Optional[bool] = None,
1467
1471
  return_dict: Optional[bool] = None,
1472
+ **kwargs,
1468
1473
  ) -> Union[tuple, TokenClassifierOutput]:
1469
1474
  r"""
1470
1475
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -1636,6 +1641,7 @@ class UniSpeechSatForXVector(UniSpeechSatPreTrainedModel):
1636
1641
  output_hidden_states: Optional[bool] = None,
1637
1642
  return_dict: Optional[bool] = None,
1638
1643
  labels: Optional[torch.Tensor] = None,
1644
+ **kwargs,
1639
1645
  ) -> Union[tuple, XVectorOutput]:
1640
1646
  r"""
1641
1647
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -255,6 +255,7 @@ class UniSpeechSatModel(UniSpeechSatPreTrainedModel, Wav2Vec2Model):
255
255
  output_attentions: Optional[bool] = None,
256
256
  output_hidden_states: Optional[bool] = None,
257
257
  return_dict: Optional[bool] = None,
258
+ **kwargs,
258
259
  ) -> Union[tuple, UniSpeechSatBaseModelOutput]:
259
260
  r"""
260
261
  mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -369,6 +370,7 @@ class UniSpeechSatForPreTraining(UniSpeechSatPreTrainedModel):
369
370
  output_attentions: Optional[bool] = None,
370
371
  output_hidden_states: Optional[bool] = None,
371
372
  return_dict: Optional[bool] = None,
373
+ **kwargs,
372
374
  ) -> Union[tuple, UniSpeechSatForPreTrainingOutput]:
373
375
  r"""
374
376
  Example:
@@ -476,6 +476,7 @@ class UnivNetModel(PreTrainedModel):
476
476
  padding_mask: Optional[torch.FloatTensor] = None,
477
477
  generator: Optional[torch.Generator] = None,
478
478
  return_dict: Optional[bool] = None,
479
+ **kwargs,
479
480
  ) -> Union[tuple[torch.FloatTensor], UnivNetModelOutput]:
480
481
  r"""
481
482
  noise_sequence (`torch.FloatTensor`, *optional*):
@@ -301,6 +301,7 @@ class UperNetForSemanticSegmentation(UperNetPreTrainedModel):
301
301
  output_hidden_states: Optional[bool] = None,
302
302
  labels: Optional[torch.Tensor] = None,
303
303
  return_dict: Optional[bool] = None,
304
+ **kwargs,
304
305
  ) -> Union[tuple, SemanticSegmenterOutput]:
305
306
  r"""
306
307
  labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
@@ -29,7 +29,7 @@ from ... import initialization as init
29
29
  from ...activations import ACT2FN
30
30
  from ...cache_utils import Cache, DynamicCache
31
31
  from ...generation import GenerationMixin
32
- from ...integrations import use_kernel_func_from_hub
32
+ from ...integrations import use_kernel_func_from_hub, use_kernelized_func
33
33
  from ...masking_utils import create_causal_mask, create_sliding_window_causal_mask
34
34
  from ...modeling_flash_attention_utils import FlashAttentionKwargs
35
35
  from ...modeling_layers import GradientCheckpointingLayer
@@ -38,7 +38,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
38
38
  from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
39
39
  from ...processing_utils import Unpack
40
40
  from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
41
- from ...utils.generic import check_model_inputs
41
+ from ...utils.generic import check_model_inputs, maybe_autocast
42
42
  from .configuration_vaultgemma import VaultGemmaConfig
43
43
 
44
44
 
@@ -160,6 +160,7 @@ def eager_attention_forward(
160
160
  return attn_output, attn_weights
161
161
 
162
162
 
163
+ @use_kernelized_func(apply_rotary_pos_emb)
163
164
  class VaultGemmaAttention(nn.Module):
164
165
  """Multi-headed attention from 'Attention Is All You Need' paper"""
165
166
 
@@ -186,7 +187,6 @@ class VaultGemmaAttention(nn.Module):
186
187
  self.o_proj = nn.Linear(
187
188
  config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
188
189
  )
189
- self.rotary_fn = apply_rotary_pos_emb
190
190
  self.attn_logit_softcapping = self.config.attn_logit_softcapping
191
191
  self.sliding_window = config.sliding_window if self.layer_type == "sliding_attention" else None
192
192
 
@@ -336,7 +336,7 @@ class VaultGemmaRotaryEmbedding(nn.Module):
336
336
  position_ids_expanded = position_ids[:, None, :].float()
337
337
 
338
338
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
339
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
339
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
340
340
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
341
341
  emb = torch.cat((freqs, freqs), dim=-1)
342
342
  cos = emb.cos() * self.attention_scaling
@@ -556,6 +556,7 @@ class ViltModel(ViltPreTrainedModel):
556
556
  output_attentions: Optional[bool] = None,
557
557
  output_hidden_states: Optional[bool] = None,
558
558
  return_dict: Optional[bool] = None,
559
+ **kwargs,
559
560
  ) -> Union[BaseModelOutputWithPooling, tuple[torch.FloatTensor]]:
560
561
  r"""
561
562
  image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
@@ -708,6 +709,7 @@ class ViltForMaskedLM(ViltPreTrainedModel):
708
709
  output_attentions: Optional[bool] = None,
709
710
  output_hidden_states: Optional[bool] = None,
710
711
  return_dict: Optional[bool] = None,
712
+ **kwargs,
711
713
  ) -> Union[MaskedLMOutput, tuple[torch.FloatTensor]]:
712
714
  r"""
713
715
  image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
@@ -875,6 +877,7 @@ class ViltForQuestionAnswering(ViltPreTrainedModel):
875
877
  output_attentions: Optional[bool] = None,
876
878
  output_hidden_states: Optional[bool] = None,
877
879
  return_dict: Optional[bool] = None,
880
+ **kwargs,
878
881
  ) -> Union[SequenceClassifierOutput, tuple[torch.FloatTensor]]:
879
882
  r"""
880
883
  image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
@@ -979,6 +982,7 @@ class ViltForImageAndTextRetrieval(ViltPreTrainedModel):
979
982
  output_attentions: Optional[bool] = None,
980
983
  output_hidden_states: Optional[bool] = None,
981
984
  return_dict: Optional[bool] = None,
985
+ **kwargs,
982
986
  ) -> Union[SequenceClassifierOutput, tuple[torch.FloatTensor]]:
983
987
  r"""
984
988
  image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
@@ -1082,6 +1086,7 @@ class ViltForImagesAndTextClassification(ViltPreTrainedModel):
1082
1086
  output_attentions: Optional[bool] = None,
1083
1087
  output_hidden_states: Optional[bool] = None,
1084
1088
  return_dict: Optional[bool] = None,
1089
+ **kwargs,
1085
1090
  ) -> Union[ViltForImagesAndTextClassificationOutput, tuple[torch.FloatTensor]]:
1086
1091
  r"""
1087
1092
  image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
@@ -1210,6 +1215,7 @@ class ViltForTokenClassification(ViltPreTrainedModel):
1210
1215
  output_attentions: Optional[bool] = None,
1211
1216
  output_hidden_states: Optional[bool] = None,
1212
1217
  return_dict: Optional[bool] = None,
1218
+ **kwargs,
1213
1219
  ) -> Union[TokenClassifierOutput, tuple[torch.FloatTensor]]:
1214
1220
  r"""
1215
1221
  image_embeds (`torch.FloatTensor` of shape `(batch_size, num_patches, hidden_size)`, *optional*):
@@ -184,6 +184,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
184
184
  output_attentions: Optional[bool] = None,
185
185
  output_hidden_states: Optional[bool] = None,
186
186
  return_dict: Optional[bool] = None,
187
+ **kwargs,
187
188
  ) -> Union[tuple[torch.Tensor], CLIPOutput]:
188
189
  r"""
189
190
  return_loss (`bool`, *optional*):
@@ -550,6 +550,7 @@ class VisualBertModel(VisualBertPreTrainedModel):
550
550
  output_attentions: Optional[bool] = None,
551
551
  output_hidden_states: Optional[bool] = None,
552
552
  return_dict: Optional[bool] = None,
553
+ **kwargs,
553
554
  ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPooling]:
554
555
  r"""
555
556
  visual_embeds (`torch.FloatTensor` of shape `(batch_size, visual_seq_length, visual_embedding_dim)`, *optional*):
@@ -735,6 +736,7 @@ class VisualBertForPreTraining(VisualBertPreTrainedModel):
735
736
  return_dict: Optional[bool] = None,
736
737
  labels: Optional[torch.LongTensor] = None,
737
738
  sentence_image_labels: Optional[torch.LongTensor] = None,
739
+ **kwargs,
738
740
  ) -> Union[tuple[torch.Tensor], VisualBertForPreTrainingOutput]:
739
741
  r"""
740
742
  visual_embeds (`torch.FloatTensor` of shape `(batch_size, visual_seq_length, visual_embedding_dim)`, *optional*):
@@ -877,6 +879,7 @@ class VisualBertForMultipleChoice(VisualBertPreTrainedModel):
877
879
  output_hidden_states: Optional[bool] = None,
878
880
  return_dict: Optional[bool] = None,
879
881
  labels: Optional[torch.LongTensor] = None,
882
+ **kwargs,
880
883
  ) -> Union[tuple[torch.Tensor], MultipleChoiceModelOutput]:
881
884
  r"""
882
885
  input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
@@ -1063,6 +1066,7 @@ class VisualBertForQuestionAnswering(VisualBertPreTrainedModel):
1063
1066
  output_hidden_states: Optional[bool] = None,
1064
1067
  return_dict: Optional[bool] = None,
1065
1068
  labels: Optional[torch.LongTensor] = None,
1069
+ **kwargs,
1066
1070
  ) -> Union[tuple[torch.Tensor], SequenceClassifierOutput]:
1067
1071
  r"""
1068
1072
  visual_embeds (`torch.FloatTensor` of shape `(batch_size, visual_seq_length, visual_embedding_dim)`, *optional*):
@@ -1199,6 +1203,7 @@ class VisualBertForVisualReasoning(VisualBertPreTrainedModel):
1199
1203
  output_hidden_states: Optional[bool] = None,
1200
1204
  return_dict: Optional[bool] = None,
1201
1205
  labels: Optional[torch.LongTensor] = None,
1206
+ **kwargs,
1202
1207
  ) -> Union[tuple[torch.Tensor], SequenceClassifierOutput]:
1203
1208
  r"""
1204
1209
  visual_embeds (`torch.FloatTensor` of shape `(batch_size, visual_seq_length, visual_embedding_dim)`, *optional*):
@@ -1372,6 +1377,7 @@ class VisualBertForRegionToPhraseAlignment(VisualBertPreTrainedModel):
1372
1377
  return_dict: Optional[bool] = None,
1373
1378
  region_to_phrase_position: Optional[torch.LongTensor] = None,
1374
1379
  labels: Optional[torch.LongTensor] = None,
1380
+ **kwargs,
1375
1381
  ) -> Union[tuple[torch.Tensor], SequenceClassifierOutput]:
1376
1382
  r"""
1377
1383
  visual_embeds (`torch.FloatTensor` of shape `(batch_size, visual_seq_length, visual_embedding_dim)`, *optional*):
@@ -630,6 +630,7 @@ class VitDetModel(VitDetPreTrainedModel):
630
630
  output_attentions: Optional[bool] = None,
631
631
  output_hidden_states: Optional[bool] = None,
632
632
  return_dict: Optional[bool] = None,
633
+ **kwargs,
633
634
  ) -> Union[tuple, BaseModelOutput]:
634
635
  r"""
635
636
  Examples:
@@ -706,6 +707,7 @@ class VitDetBackbone(VitDetPreTrainedModel, BackboneMixin):
706
707
  output_hidden_states: Optional[bool] = None,
707
708
  output_attentions: Optional[bool] = None,
708
709
  return_dict: Optional[bool] = None,
710
+ **kwargs,
709
711
  ) -> BackboneOutput:
710
712
  r"""
711
713
  Examples:
@@ -234,6 +234,7 @@ class VitMatteForImageMatting(VitMattePreTrainedModel):
234
234
  output_hidden_states: Optional[bool] = None,
235
235
  labels: Optional[torch.Tensor] = None,
236
236
  return_dict: Optional[bool] = None,
237
+ **kwargs,
237
238
  ):
238
239
  r"""
239
240
  labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
@@ -1275,6 +1275,7 @@ class VitsModel(VitsPreTrainedModel):
1275
1275
  output_hidden_states: Optional[bool] = None,
1276
1276
  return_dict: Optional[bool] = None,
1277
1277
  labels: Optional[torch.FloatTensor] = None,
1278
+ **kwargs,
1278
1279
  ) -> Union[tuple[Any], VitsModelOutput]:
1279
1280
  r"""
1280
1281
  speaker_id (`int`, *optional*):
@@ -1088,6 +1088,7 @@ class VJEPA2ForVideoClassification(VJEPA2PreTrainedModel):
1088
1088
  labels: Optional[torch.Tensor] = None,
1089
1089
  output_attentions: Optional[bool] = None,
1090
1090
  output_hidden_states: Optional[bool] = None,
1091
+ **kwargs,
1091
1092
  ) -> Union[tuple, ImageClassifierOutput]:
1092
1093
  r"""
1093
1094
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -1340,6 +1340,7 @@ class Wav2Vec2Model(Wav2Vec2PreTrainedModel):
1340
1340
  output_attentions: Optional[bool] = None,
1341
1341
  output_hidden_states: Optional[bool] = None,
1342
1342
  return_dict: Optional[bool] = None,
1343
+ **kwargs,
1343
1344
  ) -> Union[tuple, Wav2Vec2BaseModelOutput]:
1344
1345
  r"""
1345
1346
  mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1453,6 +1454,7 @@ class Wav2Vec2ForPreTraining(Wav2Vec2PreTrainedModel):
1453
1454
  output_attentions: Optional[bool] = None,
1454
1455
  output_hidden_states: Optional[bool] = None,
1455
1456
  return_dict: Optional[bool] = None,
1457
+ **kwargs,
1456
1458
  ) -> Union[tuple, Wav2Vec2ForPreTrainingOutput]:
1457
1459
  r"""
1458
1460
  mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1628,6 +1630,7 @@ class Wav2Vec2ForMaskedLM(Wav2Vec2PreTrainedModel):
1628
1630
  output_hidden_states: Optional[bool] = None,
1629
1631
  return_dict: Optional[bool] = None,
1630
1632
  labels: Optional[torch.Tensor] = None,
1633
+ **kwargs,
1631
1634
  ) -> Union[tuple, MaskedLMOutput]:
1632
1635
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1633
1636
 
@@ -1729,6 +1732,7 @@ class Wav2Vec2ForCTC(Wav2Vec2PreTrainedModel):
1729
1732
  output_hidden_states: Optional[bool] = None,
1730
1733
  return_dict: Optional[bool] = None,
1731
1734
  labels: Optional[torch.Tensor] = None,
1735
+ **kwargs,
1732
1736
  ) -> Union[tuple, CausalLMOutput]:
1733
1737
  r"""
1734
1738
  labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*):
@@ -1840,6 +1844,7 @@ class Wav2Vec2ForSequenceClassification(Wav2Vec2PreTrainedModel):
1840
1844
  output_hidden_states: Optional[bool] = None,
1841
1845
  return_dict: Optional[bool] = None,
1842
1846
  labels: Optional[torch.Tensor] = None,
1847
+ **kwargs,
1843
1848
  ) -> Union[tuple, SequenceClassifierOutput]:
1844
1849
  r"""
1845
1850
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -1943,6 +1948,7 @@ class Wav2Vec2ForAudioFrameClassification(Wav2Vec2PreTrainedModel):
1943
1948
  output_attentions: Optional[bool] = None,
1944
1949
  output_hidden_states: Optional[bool] = None,
1945
1950
  return_dict: Optional[bool] = None,
1951
+ **kwargs,
1946
1952
  ) -> Union[tuple, TokenClassifierOutput]:
1947
1953
  r"""
1948
1954
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -2114,6 +2120,7 @@ class Wav2Vec2ForXVector(Wav2Vec2PreTrainedModel):
2114
2120
  output_hidden_states: Optional[bool] = None,
2115
2121
  return_dict: Optional[bool] = None,
2116
2122
  labels: Optional[torch.Tensor] = None,
2123
+ **kwargs,
2117
2124
  ) -> Union[tuple, XVectorOutput]:
2118
2125
  r"""
2119
2126
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -994,6 +994,7 @@ class Wav2Vec2BertModel(Wav2Vec2BertPreTrainedModel):
994
994
  output_attentions: Optional[bool] = None,
995
995
  output_hidden_states: Optional[bool] = None,
996
996
  return_dict: Optional[bool] = None,
997
+ **kwargs,
997
998
  ) -> Union[tuple, Wav2Vec2BertBaseModelOutput]:
998
999
  r"""
999
1000
  mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1086,6 +1087,7 @@ class Wav2Vec2BertForCTC(Wav2Vec2BertPreTrainedModel):
1086
1087
  output_hidden_states: Optional[bool] = None,
1087
1088
  return_dict: Optional[bool] = None,
1088
1089
  labels: Optional[torch.Tensor] = None,
1090
+ **kwargs,
1089
1091
  ) -> Union[tuple, CausalLMOutput]:
1090
1092
  r"""
1091
1093
  labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*):
@@ -1192,6 +1194,7 @@ class Wav2Vec2BertForSequenceClassification(Wav2Vec2BertPreTrainedModel):
1192
1194
  output_hidden_states: Optional[bool] = None,
1193
1195
  return_dict: Optional[bool] = None,
1194
1196
  labels: Optional[torch.Tensor] = None,
1197
+ **kwargs,
1195
1198
  ) -> Union[tuple, SequenceClassifierOutput]:
1196
1199
  r"""
1197
1200
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -1282,6 +1285,7 @@ class Wav2Vec2BertForAudioFrameClassification(Wav2Vec2BertPreTrainedModel):
1282
1285
  output_attentions: Optional[bool] = None,
1283
1286
  output_hidden_states: Optional[bool] = None,
1284
1287
  return_dict: Optional[bool] = None,
1288
+ **kwargs,
1285
1289
  ) -> Union[tuple, TokenClassifierOutput]:
1286
1290
  r"""
1287
1291
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -1440,6 +1444,7 @@ class Wav2Vec2BertForXVector(Wav2Vec2BertPreTrainedModel):
1440
1444
  output_hidden_states: Optional[bool] = None,
1441
1445
  return_dict: Optional[bool] = None,
1442
1446
  labels: Optional[torch.Tensor] = None,
1447
+ **kwargs,
1443
1448
  ) -> Union[tuple, XVectorOutput]:
1444
1449
  r"""
1445
1450
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -702,6 +702,7 @@ class Wav2Vec2BertModel(Wav2Vec2Model, Wav2Vec2BertPreTrainedModel):
702
702
  output_attentions: Optional[bool] = None,
703
703
  output_hidden_states: Optional[bool] = None,
704
704
  return_dict: Optional[bool] = None,
705
+ **kwargs,
705
706
  ) -> Union[tuple, Wav2Vec2BertBaseModelOutput]:
706
707
  r"""
707
708
  mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -768,6 +769,7 @@ class Wav2Vec2BertForCTC(Wav2Vec2ConformerForCTC):
768
769
  output_hidden_states: Optional[bool] = None,
769
770
  return_dict: Optional[bool] = None,
770
771
  labels: Optional[torch.Tensor] = None,
772
+ **kwargs,
771
773
  ) -> Union[tuple, CausalLMOutput]:
772
774
  r"""
773
775
  labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*):
@@ -856,6 +858,7 @@ class Wav2Vec2BertForSequenceClassification(Wav2Vec2ForSequenceClassification):
856
858
  output_hidden_states: Optional[bool] = None,
857
859
  return_dict: Optional[bool] = None,
858
860
  labels: Optional[torch.Tensor] = None,
861
+ **kwargs,
859
862
  ) -> Union[tuple, SequenceClassifierOutput]:
860
863
  r"""
861
864
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -926,6 +929,7 @@ class Wav2Vec2BertForAudioFrameClassification(Wav2Vec2ConformerForAudioFrameClas
926
929
  output_attentions: Optional[bool] = None,
927
930
  output_hidden_states: Optional[bool] = None,
928
931
  return_dict: Optional[bool] = None,
932
+ **kwargs,
929
933
  ) -> Union[tuple, TokenClassifierOutput]:
930
934
  r"""
931
935
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -987,6 +991,7 @@ class Wav2Vec2BertForXVector(Wav2Vec2ConformerForXVector):
987
991
  output_hidden_states: Optional[bool] = None,
988
992
  return_dict: Optional[bool] = None,
989
993
  labels: Optional[torch.Tensor] = None,
994
+ **kwargs,
990
995
  ) -> Union[tuple, XVectorOutput]:
991
996
  r"""
992
997
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -1142,6 +1142,7 @@ class Wav2Vec2ConformerModel(Wav2Vec2ConformerPreTrainedModel):
1142
1142
  output_attentions: Optional[bool] = None,
1143
1143
  output_hidden_states: Optional[bool] = None,
1144
1144
  return_dict: Optional[bool] = None,
1145
+ **kwargs,
1145
1146
  ) -> Union[tuple, Wav2Vec2ConformerBaseModelOutput]:
1146
1147
  r"""
1147
1148
  mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1255,6 +1256,7 @@ class Wav2Vec2ConformerForPreTraining(Wav2Vec2ConformerPreTrainedModel):
1255
1256
  output_attentions: Optional[bool] = None,
1256
1257
  output_hidden_states: Optional[bool] = None,
1257
1258
  return_dict: Optional[bool] = None,
1259
+ **kwargs,
1258
1260
  ) -> Union[tuple, Wav2Vec2ConformerForPreTrainingOutput]:
1259
1261
  r"""
1260
1262
  mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1459,6 +1461,7 @@ class Wav2Vec2ConformerForCTC(Wav2Vec2ConformerPreTrainedModel):
1459
1461
  output_hidden_states: Optional[bool] = None,
1460
1462
  return_dict: Optional[bool] = None,
1461
1463
  labels: Optional[torch.Tensor] = None,
1464
+ **kwargs,
1462
1465
  ) -> Union[tuple, CausalLMOutput]:
1463
1466
  r"""
1464
1467
  labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*):
@@ -1570,6 +1573,7 @@ class Wav2Vec2ConformerForSequenceClassification(Wav2Vec2ConformerPreTrainedMode
1570
1573
  output_hidden_states: Optional[bool] = None,
1571
1574
  return_dict: Optional[bool] = None,
1572
1575
  labels: Optional[torch.Tensor] = None,
1576
+ **kwargs,
1573
1577
  ) -> Union[tuple, SequenceClassifierOutput]:
1574
1578
  r"""
1575
1579
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -1673,6 +1677,7 @@ class Wav2Vec2ConformerForAudioFrameClassification(Wav2Vec2ConformerPreTrainedMo
1673
1677
  output_attentions: Optional[bool] = None,
1674
1678
  output_hidden_states: Optional[bool] = None,
1675
1679
  return_dict: Optional[bool] = None,
1680
+ **kwargs,
1676
1681
  ) -> Union[tuple, TokenClassifierOutput]:
1677
1682
  r"""
1678
1683
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -1844,6 +1849,7 @@ class Wav2Vec2ConformerForXVector(Wav2Vec2ConformerPreTrainedModel):
1844
1849
  output_hidden_states: Optional[bool] = None,
1845
1850
  return_dict: Optional[bool] = None,
1846
1851
  labels: Optional[torch.Tensor] = None,
1852
+ **kwargs,
1847
1853
  ) -> Union[tuple, XVectorOutput]:
1848
1854
  r"""
1849
1855
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -1047,6 +1047,7 @@ class WavLMModel(WavLMPreTrainedModel):
1047
1047
  output_attentions: Optional[bool] = None,
1048
1048
  output_hidden_states: Optional[bool] = None,
1049
1049
  return_dict: Optional[bool] = None,
1050
+ **kwargs,
1050
1051
  ) -> Union[tuple, WavLMBaseModelOutput]:
1051
1052
  r"""
1052
1053
  mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1180,6 +1181,7 @@ class WavLMForCTC(WavLMPreTrainedModel):
1180
1181
  output_hidden_states: Optional[bool] = None,
1181
1182
  return_dict: Optional[bool] = None,
1182
1183
  labels: Optional[torch.Tensor] = None,
1184
+ **kwargs,
1183
1185
  ) -> Union[tuple, CausalLMOutput]:
1184
1186
  r"""
1185
1187
  labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*):
@@ -1291,6 +1293,7 @@ class WavLMForSequenceClassification(WavLMPreTrainedModel):
1291
1293
  output_hidden_states: Optional[bool] = None,
1292
1294
  return_dict: Optional[bool] = None,
1293
1295
  labels: Optional[torch.Tensor] = None,
1296
+ **kwargs,
1294
1297
  ) -> Union[tuple, SequenceClassifierOutput]:
1295
1298
  r"""
1296
1299
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -1394,6 +1397,7 @@ class WavLMForAudioFrameClassification(WavLMPreTrainedModel):
1394
1397
  output_attentions: Optional[bool] = None,
1395
1398
  output_hidden_states: Optional[bool] = None,
1396
1399
  return_dict: Optional[bool] = None,
1400
+ **kwargs,
1397
1401
  ) -> Union[tuple, TokenClassifierOutput]:
1398
1402
  r"""
1399
1403
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -1565,6 +1569,7 @@ class WavLMForXVector(WavLMPreTrainedModel):
1565
1569
  output_hidden_states: Optional[bool] = None,
1566
1570
  return_dict: Optional[bool] = None,
1567
1571
  labels: Optional[torch.Tensor] = None,
1572
+ **kwargs,
1568
1573
  ) -> Union[tuple, XVectorOutput]:
1569
1574
  r"""
1570
1575
  input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
@@ -608,6 +608,7 @@ class WhisperEncoder(WhisperPreTrainedModel):
608
608
  output_attentions=None,
609
609
  output_hidden_states=None,
610
610
  return_dict=None,
611
+ **kwargs,
611
612
  ):
612
613
  r"""
613
614
  Args:
@@ -734,6 +735,7 @@ class WhisperDecoder(WhisperPreTrainedModel):
734
735
  output_hidden_states=None,
735
736
  return_dict=None,
736
737
  cache_position=None,
738
+ **kwargs,
737
739
  ):
738
740
  r"""
739
741
  Args:
@@ -982,6 +984,7 @@ class WhisperModel(WhisperPreTrainedModel):
982
984
  output_hidden_states: Optional[bool] = None,
983
985
  return_dict: Optional[bool] = None,
984
986
  cache_position: Optional[torch.LongTensor] = None,
987
+ **kwargs,
985
988
  ) -> Union[tuple[torch.Tensor], Seq2SeqModelOutput]:
986
989
  r"""
987
990
  decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
@@ -1129,6 +1132,7 @@ class WhisperForConditionalGeneration(WhisperGenerationMixin, WhisperPreTrainedM
1129
1132
  output_hidden_states: Optional[bool] = None,
1130
1133
  return_dict: Optional[bool] = None,
1131
1134
  cache_position: Optional[torch.LongTensor] = None,
1135
+ **kwargs,
1132
1136
  ) -> Union[tuple[torch.Tensor], Seq2SeqLMOutput]:
1133
1137
  r"""
1134
1138
  decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
@@ -1299,6 +1303,7 @@ class WhisperForCausalLM(WhisperPreTrainedModel, GenerationMixin):
1299
1303
  output_hidden_states: Optional[bool] = None,
1300
1304
  return_dict: Optional[bool] = None,
1301
1305
  cache_position: Optional[torch.LongTensor] = None,
1306
+ **kwargs,
1302
1307
  ) -> Union[tuple, CausalLMOutputWithCrossAttentions]:
1303
1308
  r"""
1304
1309
  encoder_outputs (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
@@ -1422,6 +1427,7 @@ class WhisperForAudioClassification(WhisperPreTrainedModel):
1422
1427
  output_attentions: Optional[bool] = None,
1423
1428
  output_hidden_states: Optional[bool] = None,
1424
1429
  return_dict: Optional[bool] = None,
1430
+ **kwargs,
1425
1431
  ) -> Union[tuple[torch.Tensor], SequenceClassifierOutput]:
1426
1432
  r"""
1427
1433
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -19,7 +19,7 @@ import os
19
19
  import re
20
20
  import warnings
21
21
  from functools import lru_cache
22
- from typing import Optional
22
+ from typing import Optional, Union
23
23
 
24
24
  import numpy as np
25
25
  from tokenizers import AddedToken, Tokenizer, decoders, pre_tokenizers, processors
@@ -204,10 +204,11 @@ class WhisperTokenizer(TokenizersBackend):
204
204
 
205
205
  vocab_files_names = VOCAB_FILES_NAMES
206
206
  model_input_names = ["input_ids", "attention_mask"]
207
+ model = BPE
207
208
 
208
209
  def __init__(
209
210
  self,
210
- vocab=None,
211
+ vocab: Optional[Union[str, dict[str, int]]] = None,
211
212
  merges=None,
212
213
  normalizer_file=None,
213
214
  unk_token="<|endoftext|>",
@@ -253,7 +254,6 @@ class WhisperTokenizer(TokenizersBackend):
253
254
  self._tokenizer.decoder = decoders.ByteLevel()
254
255
 
255
256
  super().__init__(
256
- tokenizer_object=self._tokenizer,
257
257
  unk_token=unk_token,
258
258
  bos_token=bos_token,
259
259
  eos_token=eos_token,
@@ -276,18 +276,7 @@ class WhisperTokenizer(TokenizersBackend):
276
276
  self.language = language
277
277
  self.task = task
278
278
  self.predict_timestamps = predict_timestamps
279
-
280
- self._post_init()
281
-
282
- def _post_init(self):
283
- """Post-initialization hook to set up prefix tokens after the tokenizer is fully loaded."""
284
- super()._post_init()
285
- # Set up prefix tokens if language or task is specified (may be set from config in from_pretrained)
286
- if hasattr(self, "language") and hasattr(self, "task") and hasattr(self, "predict_timestamps"):
287
- if self.language is not None or self.task is not None:
288
- self.set_prefix_tokens(
289
- language=self.language, task=self.task, predict_timestamps=self.predict_timestamps
290
- )
279
+ self.set_prefix_tokens()
291
280
 
292
281
  # Copied from transformers.models.whisper.tokenization_whisper.WhisperTokenizer._decode_with_timestamps
293
282
  def _decode_with_timestamps(
@@ -737,6 +737,7 @@ class XCLIPTextModel(XCLIPPreTrainedModel):
737
737
  output_attentions: Optional[bool] = None,
738
738
  output_hidden_states: Optional[bool] = None,
739
739
  return_dict: Optional[bool] = None,
740
+ **kwargs,
740
741
  ) -> Union[tuple, BaseModelOutputWithPooling]:
741
742
  r"""
742
743
  Examples:
@@ -927,6 +928,7 @@ class XCLIPVisionModel(XCLIPPreTrainedModel):
927
928
  output_attentions: Optional[bool] = None,
928
929
  output_hidden_states: Optional[bool] = None,
929
930
  return_dict: Optional[bool] = None,
931
+ **kwargs,
930
932
  ) -> Union[tuple, BaseModelOutputWithPooling]:
931
933
  r"""
932
934
  Examples:
@@ -1340,6 +1342,7 @@ class XCLIPModel(XCLIPPreTrainedModel):
1340
1342
  output_hidden_states: Optional[bool] = None,
1341
1343
  interpolate_pos_encoding: bool = False,
1342
1344
  return_dict: Optional[bool] = None,
1345
+ **kwargs,
1343
1346
  ) -> Union[tuple, XCLIPOutput]:
1344
1347
  r"""
1345
1348
  return_loss (`bool`, *optional*):
@@ -407,6 +407,7 @@ class XGLMModel(XGLMPreTrainedModel):
407
407
  output_hidden_states: Optional[bool] = None,
408
408
  return_dict: Optional[bool] = None,
409
409
  cache_position: Optional[torch.Tensor] = None,
410
+ **kwargs,
410
411
  ) -> Union[tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]:
411
412
  r"""
412
413
  encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*):