transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. transformers/__init__.py +30 -3
  2. transformers/cli/serve.py +47 -17
  3. transformers/conversion_mapping.py +15 -2
  4. transformers/convert_slow_tokenizer.py +225 -10
  5. transformers/core_model_loading.py +196 -135
  6. transformers/data/data_collator.py +12 -4
  7. transformers/dependency_versions_table.py +1 -2
  8. transformers/dynamic_module_utils.py +1 -2
  9. transformers/feature_extraction_utils.py +1 -2
  10. transformers/file_utils.py +0 -1
  11. transformers/generation/__init__.py +11 -1
  12. transformers/generation/configuration_utils.py +3 -2
  13. transformers/generation/continuous_batching/__init__.py +4 -0
  14. transformers/generation/continuous_batching/continuous_api.py +134 -79
  15. transformers/image_processing_base.py +1 -2
  16. transformers/integrations/__init__.py +4 -2
  17. transformers/integrations/accelerate.py +15 -3
  18. transformers/integrations/aqlm.py +38 -66
  19. transformers/integrations/awq.py +48 -514
  20. transformers/integrations/bitnet.py +45 -100
  21. transformers/integrations/bitsandbytes.py +79 -191
  22. transformers/integrations/deepspeed.py +1 -0
  23. transformers/integrations/eetq.py +84 -79
  24. transformers/integrations/fbgemm_fp8.py +191 -145
  25. transformers/integrations/finegrained_fp8.py +236 -193
  26. transformers/integrations/fp_quant.py +92 -0
  27. transformers/integrations/ggml.py +11 -1
  28. transformers/integrations/higgs.py +40 -62
  29. transformers/integrations/hub_kernels.py +42 -3
  30. transformers/integrations/integration_utils.py +10 -0
  31. transformers/integrations/mxfp4.py +25 -65
  32. transformers/integrations/peft.py +7 -29
  33. transformers/integrations/quanto.py +73 -55
  34. transformers/integrations/quark.py +55 -0
  35. transformers/integrations/spqr.py +44 -90
  36. transformers/integrations/torchao.py +32 -38
  37. transformers/integrations/vptq.py +42 -59
  38. transformers/modelcard.py +1 -2
  39. transformers/modeling_gguf_pytorch_utils.py +8 -0
  40. transformers/modeling_rope_utils.py +30 -6
  41. transformers/modeling_utils.py +116 -112
  42. transformers/models/__init__.py +3 -0
  43. transformers/models/afmoe/modeling_afmoe.py +4 -4
  44. transformers/models/albert/tokenization_albert.py +6 -12
  45. transformers/models/align/modeling_align.py +2 -0
  46. transformers/models/altclip/modeling_altclip.py +4 -0
  47. transformers/models/apertus/modeling_apertus.py +4 -4
  48. transformers/models/arcee/modeling_arcee.py +4 -4
  49. transformers/models/aria/modeling_aria.py +4 -4
  50. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  51. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  52. transformers/models/auto/configuration_auto.py +11 -0
  53. transformers/models/auto/feature_extraction_auto.py +2 -0
  54. transformers/models/auto/image_processing_auto.py +1 -0
  55. transformers/models/auto/modeling_auto.py +6 -0
  56. transformers/models/auto/processing_auto.py +18 -10
  57. transformers/models/auto/tokenization_auto.py +74 -472
  58. transformers/models/autoformer/modeling_autoformer.py +4 -0
  59. transformers/models/bamba/modeling_bamba.py +4 -3
  60. transformers/models/bark/modeling_bark.py +2 -0
  61. transformers/models/bart/modeling_bart.py +7 -0
  62. transformers/models/barthez/tokenization_barthez.py +5 -10
  63. transformers/models/beit/modeling_beit.py +6 -1
  64. transformers/models/bert/tokenization_bert.py +8 -21
  65. transformers/models/big_bird/modeling_big_bird.py +6 -0
  66. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  67. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
  68. transformers/models/biogpt/modeling_biogpt.py +2 -0
  69. transformers/models/biogpt/modular_biogpt.py +2 -0
  70. transformers/models/bit/modeling_bit.py +11 -2
  71. transformers/models/bitnet/modeling_bitnet.py +4 -4
  72. transformers/models/blenderbot/modeling_blenderbot.py +5 -0
  73. transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
  74. transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
  75. transformers/models/blip/modeling_blip_text.py +2 -0
  76. transformers/models/blip_2/modeling_blip_2.py +2 -1
  77. transformers/models/bloom/modeling_bloom.py +4 -0
  78. transformers/models/blt/modeling_blt.py +2 -2
  79. transformers/models/blt/modular_blt.py +2 -2
  80. transformers/models/bridgetower/modeling_bridgetower.py +5 -1
  81. transformers/models/bros/modeling_bros.py +4 -0
  82. transformers/models/camembert/tokenization_camembert.py +8 -12
  83. transformers/models/canine/modeling_canine.py +5 -0
  84. transformers/models/chameleon/modeling_chameleon.py +2 -1
  85. transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
  86. transformers/models/clap/modeling_clap.py +5 -0
  87. transformers/models/clip/tokenization_clip.py +22 -44
  88. transformers/models/clipseg/modeling_clipseg.py +5 -0
  89. transformers/models/clvp/modeling_clvp.py +5 -0
  90. transformers/models/clvp/tokenization_clvp.py +1 -63
  91. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  92. transformers/models/codegen/tokenization_codegen.py +14 -43
  93. transformers/models/cohere/modeling_cohere.py +4 -3
  94. transformers/models/cohere/modular_cohere.py +2 -1
  95. transformers/models/cohere/tokenization_cohere.py +12 -42
  96. transformers/models/cohere2/modeling_cohere2.py +7 -6
  97. transformers/models/cohere2/modular_cohere2.py +5 -5
  98. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
  99. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  100. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  101. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  102. transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
  103. transformers/models/convbert/modeling_convbert.py +6 -0
  104. transformers/models/convnext/modeling_convnext.py +2 -4
  105. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  106. transformers/models/csm/modeling_csm.py +4 -3
  107. transformers/models/ctrl/modeling_ctrl.py +1 -0
  108. transformers/models/cvt/modeling_cvt.py +2 -0
  109. transformers/models/cwm/modeling_cwm.py +4 -4
  110. transformers/models/d_fine/modeling_d_fine.py +2 -0
  111. transformers/models/d_fine/modular_d_fine.py +1 -0
  112. transformers/models/dab_detr/modeling_dab_detr.py +4 -0
  113. transformers/models/dac/modeling_dac.py +2 -2
  114. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  115. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  116. transformers/models/dbrx/modeling_dbrx.py +2 -2
  117. transformers/models/deberta/modeling_deberta.py +5 -0
  118. transformers/models/deberta/tokenization_deberta.py +11 -20
  119. transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
  120. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  121. transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
  122. transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
  123. transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
  124. transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
  125. transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
  126. transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
  127. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  128. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  129. transformers/models/detr/modeling_detr.py +5 -0
  130. transformers/models/dia/modeling_dia.py +4 -3
  131. transformers/models/dia/modular_dia.py +0 -1
  132. transformers/models/diffllama/modeling_diffllama.py +2 -2
  133. transformers/models/dinat/modeling_dinat.py +3 -0
  134. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  135. transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
  136. transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
  137. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  138. transformers/models/doge/modeling_doge.py +2 -3
  139. transformers/models/doge/modular_doge.py +0 -1
  140. transformers/models/donut/modeling_donut_swin.py +2 -0
  141. transformers/models/dots1/modeling_dots1.py +10 -7
  142. transformers/models/dots1/modular_dots1.py +5 -3
  143. transformers/models/dpr/modeling_dpr.py +5 -0
  144. transformers/models/dpr/tokenization_dpr.py +12 -0
  145. transformers/models/edgetam/modeling_edgetam.py +1 -1
  146. transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
  147. transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
  148. transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
  149. transformers/models/efficientnet/modeling_efficientnet.py +2 -0
  150. transformers/models/emu3/modeling_emu3.py +4 -4
  151. transformers/models/eomt/image_processing_eomt.py +13 -1
  152. transformers/models/eomt/image_processing_eomt_fast.py +14 -2
  153. transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
  154. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  155. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
  156. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
  157. transformers/models/esm/modeling_esmfold.py +5 -4
  158. transformers/models/evolla/modeling_evolla.py +4 -4
  159. transformers/models/exaone4/modeling_exaone4.py +2 -2
  160. transformers/models/exaone4/modular_exaone4.py +0 -1
  161. transformers/models/falcon/modeling_falcon.py +6 -1
  162. transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
  163. transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
  164. transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
  165. transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
  166. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  167. transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
  168. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  169. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
  170. transformers/models/flaubert/modeling_flaubert.py +7 -0
  171. transformers/models/flava/modeling_flava.py +6 -1
  172. transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
  173. transformers/models/florence2/modeling_florence2.py +2 -1
  174. transformers/models/florence2/modular_florence2.py +2 -1
  175. transformers/models/fnet/modeling_fnet.py +7 -0
  176. transformers/models/focalnet/modeling_focalnet.py +4 -0
  177. transformers/models/fsmt/modeling_fsmt.py +2 -0
  178. transformers/models/funnel/modeling_funnel.py +8 -0
  179. transformers/models/funnel/tokenization_funnel.py +17 -24
  180. transformers/models/fuyu/processing_fuyu.py +3 -3
  181. transformers/models/gemma/modeling_gemma.py +4 -4
  182. transformers/models/gemma/tokenization_gemma.py +10 -27
  183. transformers/models/gemma2/modeling_gemma2.py +4 -4
  184. transformers/models/gemma2/modular_gemma2.py +2 -1
  185. transformers/models/gemma3/modeling_gemma3.py +14 -84
  186. transformers/models/gemma3/modular_gemma3.py +12 -81
  187. transformers/models/gemma3n/modeling_gemma3n.py +18 -209
  188. transformers/models/gemma3n/modular_gemma3n.py +17 -59
  189. transformers/models/git/modeling_git.py +2 -0
  190. transformers/models/glm/modeling_glm.py +4 -4
  191. transformers/models/glm4/modeling_glm4.py +4 -4
  192. transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
  193. transformers/models/glm4v/configuration_glm4v.py +3 -1
  194. transformers/models/glm4v/modeling_glm4v.py +3 -3
  195. transformers/models/glm4v/modular_glm4v.py +6 -4
  196. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  197. transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
  198. transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
  199. transformers/models/glpn/modeling_glpn.py +2 -0
  200. transformers/models/gpt2/modeling_gpt2.py +5 -1
  201. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  202. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
  203. transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
  204. transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
  205. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  206. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  207. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
  208. transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
  209. transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
  210. transformers/models/gptj/modeling_gptj.py +3 -0
  211. transformers/models/granite/modeling_granite.py +4 -4
  212. transformers/models/granitemoe/modeling_granitemoe.py +4 -6
  213. transformers/models/granitemoe/modular_granitemoe.py +0 -2
  214. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
  215. transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
  216. transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
  217. transformers/models/groupvit/modeling_groupvit.py +3 -0
  218. transformers/models/helium/modeling_helium.py +4 -3
  219. transformers/models/herbert/tokenization_herbert.py +9 -25
  220. transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
  221. transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
  222. transformers/models/hiera/modeling_hiera.py +4 -0
  223. transformers/models/hubert/modeling_hubert.py +3 -0
  224. transformers/models/hubert/modular_hubert.py +1 -0
  225. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
  226. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
  227. transformers/models/ibert/modeling_ibert.py +6 -0
  228. transformers/models/idefics/modeling_idefics.py +5 -21
  229. transformers/models/imagegpt/modeling_imagegpt.py +2 -1
  230. transformers/models/informer/modeling_informer.py +4 -0
  231. transformers/models/informer/modular_informer.py +1 -0
  232. transformers/models/internvl/modeling_internvl.py +2 -4
  233. transformers/models/internvl/modular_internvl.py +2 -4
  234. transformers/models/jamba/modeling_jamba.py +2 -2
  235. transformers/models/janus/modeling_janus.py +1 -0
  236. transformers/models/janus/modular_janus.py +1 -0
  237. transformers/models/jetmoe/modeling_jetmoe.py +2 -2
  238. transformers/models/kosmos2/modeling_kosmos2.py +1 -0
  239. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
  240. transformers/models/lasr/__init__.py +29 -0
  241. transformers/models/lasr/configuration_lasr.py +244 -0
  242. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  243. transformers/models/lasr/modeling_lasr.py +729 -0
  244. transformers/models/lasr/modular_lasr.py +569 -0
  245. transformers/models/lasr/processing_lasr.py +96 -0
  246. transformers/models/lasr/tokenization_lasr.py +186 -0
  247. transformers/models/layoutlm/modeling_layoutlm.py +5 -0
  248. transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
  249. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
  250. transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
  251. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  252. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  253. transformers/models/led/modeling_led.py +6 -0
  254. transformers/models/levit/modeling_levit.py +3 -0
  255. transformers/models/lfm2/modeling_lfm2.py +4 -5
  256. transformers/models/lfm2/modular_lfm2.py +0 -1
  257. transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
  258. transformers/models/lightglue/modeling_lightglue.py +3 -1
  259. transformers/models/lightglue/modular_lightglue.py +1 -0
  260. transformers/models/lilt/modeling_lilt.py +4 -0
  261. transformers/models/llama/modeling_llama.py +4 -4
  262. transformers/models/llama/tokenization_llama.py +15 -43
  263. transformers/models/llama4/modeling_llama4.py +3 -2
  264. transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
  265. transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
  266. transformers/models/longformer/modeling_longformer.py +6 -0
  267. transformers/models/longt5/modeling_longt5.py +4 -0
  268. transformers/models/luke/modeling_luke.py +9 -0
  269. transformers/models/luke/tokenization_luke.py +11 -38
  270. transformers/models/lxmert/modeling_lxmert.py +2 -0
  271. transformers/models/m2m_100/modeling_m2m_100.py +4 -0
  272. transformers/models/mamba/modeling_mamba.py +14 -22
  273. transformers/models/marian/modeling_marian.py +5 -0
  274. transformers/models/markuplm/modeling_markuplm.py +4 -0
  275. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  276. transformers/models/mask2former/modeling_mask2former.py +2 -0
  277. transformers/models/maskformer/modeling_maskformer.py +2 -0
  278. transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
  279. transformers/models/mbart/modeling_mbart.py +7 -0
  280. transformers/models/mbart/tokenization_mbart.py +11 -52
  281. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  282. transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
  283. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  284. transformers/models/mimi/modeling_mimi.py +3 -1
  285. transformers/models/minimax/modeling_minimax.py +4 -4
  286. transformers/models/ministral/modeling_ministral.py +4 -4
  287. transformers/models/ministral3/configuration_ministral3.py +1 -1
  288. transformers/models/ministral3/modeling_ministral3.py +4 -3
  289. transformers/models/mistral/modeling_mistral.py +4 -3
  290. transformers/models/mixtral/modeling_mixtral.py +4 -4
  291. transformers/models/mllama/modeling_mllama.py +2 -2
  292. transformers/models/mluke/tokenization_mluke.py +6 -6
  293. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
  294. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  295. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  296. transformers/models/mobilevit/modeling_mobilevit.py +3 -0
  297. transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
  298. transformers/models/modernbert/modeling_modernbert.py +4 -1
  299. transformers/models/modernbert/modular_modernbert.py +2 -0
  300. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
  301. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
  302. transformers/models/moonshine/modeling_moonshine.py +4 -2
  303. transformers/models/moshi/modeling_moshi.py +5 -2
  304. transformers/models/mpnet/modeling_mpnet.py +5 -0
  305. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  306. transformers/models/mpt/modeling_mpt.py +2 -0
  307. transformers/models/mra/modeling_mra.py +6 -0
  308. transformers/models/mt5/modeling_mt5.py +7 -0
  309. transformers/models/musicgen/modeling_musicgen.py +2 -0
  310. transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
  311. transformers/models/mvp/modeling_mvp.py +7 -0
  312. transformers/models/nanochat/modeling_nanochat.py +4 -4
  313. transformers/models/nemotron/modeling_nemotron.py +4 -2
  314. transformers/models/nllb/tokenization_nllb.py +8 -22
  315. transformers/models/nougat/tokenization_nougat.py +11 -59
  316. transformers/models/nystromformer/modeling_nystromformer.py +6 -0
  317. transformers/models/olmo/modeling_olmo.py +4 -4
  318. transformers/models/olmo/modular_olmo.py +2 -2
  319. transformers/models/olmo2/modeling_olmo2.py +4 -5
  320. transformers/models/olmo2/modular_olmo2.py +0 -1
  321. transformers/models/olmo3/modeling_olmo3.py +4 -4
  322. transformers/models/olmoe/modeling_olmoe.py +4 -4
  323. transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
  324. transformers/models/oneformer/modeling_oneformer.py +4 -1
  325. transformers/models/openai/modeling_openai.py +3 -0
  326. transformers/models/openai/tokenization_openai.py +10 -46
  327. transformers/models/opt/modeling_opt.py +2 -0
  328. transformers/models/owlv2/modeling_owlv2.py +4 -0
  329. transformers/models/owlvit/modeling_owlvit.py +4 -0
  330. transformers/models/paddleocr_vl/__init__.py +32 -0
  331. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  332. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
  333. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  334. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
  335. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
  336. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  337. transformers/models/parakeet/configuration_parakeet.py +4 -6
  338. transformers/models/parakeet/modeling_parakeet.py +9 -6
  339. transformers/models/parakeet/modular_parakeet.py +2 -2
  340. transformers/models/parakeet/processing_parakeet.py +1 -0
  341. transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
  342. transformers/models/patchtst/modeling_patchtst.py +20 -2
  343. transformers/models/pegasus/modeling_pegasus.py +5 -0
  344. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  345. transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
  346. transformers/models/perceiver/modeling_perceiver.py +8 -0
  347. transformers/models/persimmon/modeling_persimmon.py +2 -1
  348. transformers/models/phi/modeling_phi.py +4 -5
  349. transformers/models/phi/modular_phi.py +0 -1
  350. transformers/models/phi3/modeling_phi3.py +2 -1
  351. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
  352. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
  353. transformers/models/phimoe/modeling_phimoe.py +4 -4
  354. transformers/models/phimoe/modular_phimoe.py +2 -2
  355. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  356. transformers/models/pixtral/modeling_pixtral.py +2 -1
  357. transformers/models/plbart/modeling_plbart.py +6 -0
  358. transformers/models/plbart/modular_plbart.py +2 -0
  359. transformers/models/plbart/tokenization_plbart.py +0 -2
  360. transformers/models/poolformer/modeling_poolformer.py +2 -0
  361. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  362. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  363. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  364. transformers/models/prophetnet/modeling_prophetnet.py +3 -0
  365. transformers/models/pvt/modeling_pvt.py +2 -0
  366. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  367. transformers/models/qwen2/modeling_qwen2.py +4 -4
  368. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  369. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  370. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
  371. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
  372. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  373. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
  374. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
  375. transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
  376. transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
  377. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  378. transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
  379. transformers/models/qwen3/modeling_qwen3.py +4 -4
  380. transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
  381. transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
  382. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
  383. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
  384. transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
  385. transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
  386. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
  387. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
  388. transformers/models/rag/modeling_rag.py +1 -0
  389. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
  390. transformers/models/reformer/modeling_reformer.py +4 -0
  391. transformers/models/reformer/tokenization_reformer.py +11 -28
  392. transformers/models/regnet/modeling_regnet.py +6 -1
  393. transformers/models/rembert/modeling_rembert.py +6 -0
  394. transformers/models/rembert/tokenization_rembert.py +3 -10
  395. transformers/models/resnet/modeling_resnet.py +11 -2
  396. transformers/models/roberta/tokenization_roberta.py +18 -27
  397. transformers/models/roformer/modeling_roformer.py +6 -0
  398. transformers/models/roformer/tokenization_roformer.py +77 -412
  399. transformers/models/rt_detr/modeling_rt_detr.py +2 -0
  400. transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
  401. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
  402. transformers/models/rwkv/modeling_rwkv.py +1 -0
  403. transformers/models/sam2/modeling_sam2.py +2 -2
  404. transformers/models/sam2/modular_sam2.py +2 -2
  405. transformers/models/sam2_video/modeling_sam2_video.py +1 -0
  406. transformers/models/sam2_video/modular_sam2_video.py +1 -0
  407. transformers/models/sam3/modeling_sam3.py +77 -80
  408. transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
  409. transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
  410. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
  411. transformers/models/sam3_video/modeling_sam3_video.py +1 -0
  412. transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
  413. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  414. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
  415. transformers/models/seed_oss/modeling_seed_oss.py +2 -2
  416. transformers/models/segformer/modeling_segformer.py +4 -1
  417. transformers/models/seggpt/modeling_seggpt.py +2 -0
  418. transformers/models/sew/modeling_sew.py +3 -0
  419. transformers/models/sew/modular_sew.py +1 -0
  420. transformers/models/sew_d/modeling_sew_d.py +3 -0
  421. transformers/models/siglip2/modeling_siglip2.py +4 -0
  422. transformers/models/siglip2/modular_siglip2.py +4 -0
  423. transformers/models/smollm3/modeling_smollm3.py +4 -4
  424. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  425. transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
  426. transformers/models/speecht5/modeling_speecht5.py +13 -1
  427. transformers/models/splinter/modeling_splinter.py +3 -0
  428. transformers/models/splinter/tokenization_splinter.py +9 -28
  429. transformers/models/squeezebert/modeling_squeezebert.py +6 -0
  430. transformers/models/stablelm/modeling_stablelm.py +3 -1
  431. transformers/models/starcoder2/modeling_starcoder2.py +4 -3
  432. transformers/models/superglue/modeling_superglue.py +1 -0
  433. transformers/models/superpoint/modeling_superpoint.py +1 -0
  434. transformers/models/swiftformer/modeling_swiftformer.py +2 -0
  435. transformers/models/swin/modeling_swin.py +4 -0
  436. transformers/models/swin2sr/modeling_swin2sr.py +2 -0
  437. transformers/models/swinv2/modeling_swinv2.py +4 -0
  438. transformers/models/t5/modeling_t5.py +7 -0
  439. transformers/models/t5/tokenization_t5.py +4 -8
  440. transformers/models/t5gemma/modeling_t5gemma.py +5 -5
  441. transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
  442. transformers/models/table_transformer/modeling_table_transformer.py +4 -0
  443. transformers/models/tapas/modeling_tapas.py +3 -0
  444. transformers/models/textnet/modeling_textnet.py +11 -2
  445. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  446. transformers/models/timesfm/modeling_timesfm.py +2 -0
  447. transformers/models/timesfm/modular_timesfm.py +2 -0
  448. transformers/models/timesformer/modeling_timesformer.py +2 -0
  449. transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
  450. transformers/models/trocr/modeling_trocr.py +2 -0
  451. transformers/models/tvp/modeling_tvp.py +2 -0
  452. transformers/models/udop/modeling_udop.py +4 -0
  453. transformers/models/udop/tokenization_udop.py +5 -13
  454. transformers/models/umt5/modeling_umt5.py +7 -0
  455. transformers/models/unispeech/modeling_unispeech.py +4 -0
  456. transformers/models/unispeech/modular_unispeech.py +2 -0
  457. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  458. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  459. transformers/models/univnet/modeling_univnet.py +1 -0
  460. transformers/models/upernet/modeling_upernet.py +1 -0
  461. transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
  462. transformers/models/vilt/modeling_vilt.py +6 -0
  463. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  464. transformers/models/visual_bert/modeling_visual_bert.py +6 -0
  465. transformers/models/vitdet/modeling_vitdet.py +2 -0
  466. transformers/models/vitmatte/modeling_vitmatte.py +1 -0
  467. transformers/models/vits/modeling_vits.py +1 -0
  468. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  469. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  470. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
  471. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
  472. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
  473. transformers/models/wavlm/modeling_wavlm.py +5 -0
  474. transformers/models/whisper/modeling_whisper.py +6 -0
  475. transformers/models/whisper/tokenization_whisper.py +4 -15
  476. transformers/models/x_clip/modeling_x_clip.py +3 -0
  477. transformers/models/xglm/modeling_xglm.py +1 -0
  478. transformers/models/xglm/tokenization_xglm.py +4 -9
  479. transformers/models/xlm/modeling_xlm.py +5 -0
  480. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  481. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  482. transformers/models/yoso/modeling_yoso.py +6 -0
  483. transformers/models/zamba/modeling_zamba.py +2 -0
  484. transformers/models/zamba2/modeling_zamba2.py +4 -2
  485. transformers/models/zamba2/modular_zamba2.py +1 -1
  486. transformers/models/zoedepth/modeling_zoedepth.py +1 -0
  487. transformers/pipelines/__init__.py +2 -3
  488. transformers/pipelines/base.py +1 -9
  489. transformers/pipelines/document_question_answering.py +3 -1
  490. transformers/pipelines/text_generation.py +1 -1
  491. transformers/processing_utils.py +23 -11
  492. transformers/quantizers/base.py +35 -110
  493. transformers/quantizers/quantizer_aqlm.py +1 -5
  494. transformers/quantizers/quantizer_auto_round.py +1 -2
  495. transformers/quantizers/quantizer_awq.py +17 -81
  496. transformers/quantizers/quantizer_bitnet.py +3 -8
  497. transformers/quantizers/quantizer_bnb_4bit.py +13 -110
  498. transformers/quantizers/quantizer_bnb_8bit.py +16 -92
  499. transformers/quantizers/quantizer_compressed_tensors.py +1 -5
  500. transformers/quantizers/quantizer_eetq.py +14 -62
  501. transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
  502. transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
  503. transformers/quantizers/quantizer_fp_quant.py +48 -78
  504. transformers/quantizers/quantizer_gptq.py +7 -24
  505. transformers/quantizers/quantizer_higgs.py +40 -54
  506. transformers/quantizers/quantizer_hqq.py +144 -153
  507. transformers/quantizers/quantizer_mxfp4.py +13 -167
  508. transformers/quantizers/quantizer_quanto.py +20 -64
  509. transformers/quantizers/quantizer_quark.py +36 -17
  510. transformers/quantizers/quantizer_spqr.py +1 -4
  511. transformers/quantizers/quantizer_torchao.py +23 -202
  512. transformers/quantizers/quantizer_vptq.py +8 -22
  513. transformers/quantizers/quantizers_utils.py +20 -0
  514. transformers/testing_utils.py +297 -36
  515. transformers/tokenization_mistral_common.py +4 -0
  516. transformers/tokenization_utils_base.py +113 -222
  517. transformers/tokenization_utils_tokenizers.py +168 -107
  518. transformers/trainer.py +28 -31
  519. transformers/trainer_jit_checkpoint.py +126 -0
  520. transformers/trainer_utils.py +1 -1
  521. transformers/training_args.py +66 -28
  522. transformers/utils/__init__.py +3 -4
  523. transformers/utils/auto_docstring.py +1 -0
  524. transformers/utils/generic.py +27 -1
  525. transformers/utils/hub.py +5 -15
  526. transformers/utils/import_utils.py +61 -16
  527. transformers/utils/kernel_config.py +4 -2
  528. transformers/utils/loading_report.py +19 -10
  529. transformers/utils/quantization_config.py +75 -242
  530. transformers/video_processing_utils.py +1 -2
  531. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
  532. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
  533. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
  534. transformers/kernels/__init__.py +0 -0
  535. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  536. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  537. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
  538. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
  539. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,7 @@
14
14
  # limitations under the License.
15
15
  """Tokenization classes for XGLM."""
16
16
 
17
- from typing import Optional
17
+ from typing import Optional, Union
18
18
 
19
19
  from tokenizers import Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
20
20
  from tokenizers.models import Unigram
@@ -50,7 +50,7 @@ class XGLMTokenizer(TokenizersBackend):
50
50
  The unknown token.
51
51
  pad_token (`str`, *optional*, defaults to `"<pad>"`):
52
52
  The token used for padding.
53
- vocab (`dict`, *optional*):
53
+ vocab (`str`, `dict` or `list`, *optional*):
54
54
  Custom vocabulary dictionary. If not provided, a minimal vocabulary is created.
55
55
  merges (`list[tuple[str, str]]`, *optional*):
56
56
  Custom merge rules for BPE. If not provided, merges are generated from the vocabulary.
@@ -60,18 +60,17 @@ class XGLMTokenizer(TokenizersBackend):
60
60
 
61
61
  vocab_files_names = VOCAB_FILES_NAMES
62
62
  model_input_names = ["input_ids", "attention_mask"]
63
- slow_tokenizer_class = None
63
+ model = Unigram
64
64
 
65
65
  def __init__(
66
66
  self,
67
+ vocab: Optional[Union[str, list[tuple[str, float]]]] = None,
67
68
  bos_token: str = "<s>",
68
69
  eos_token: str = "</s>",
69
70
  sep_token: str = "</s>",
70
71
  cls_token: str = "<s>",
71
72
  unk_token: str = "<unk>",
72
73
  pad_token: str = "<pad>",
73
- vocab: Optional[dict] = None,
74
- merges: Optional[list[tuple[str, str]]] = None,
75
74
  add_prefix_space: bool = True,
76
75
  **kwargs,
77
76
  ):
@@ -106,11 +105,7 @@ class XGLMTokenizer(TokenizersBackend):
106
105
  prepend_scheme = "always" if add_prefix_space else "never"
107
106
  self._tokenizer.pre_tokenizer = pre_tokenizers.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
108
107
  self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
109
-
110
- tokenizer_object = self._tokenizer
111
-
112
108
  super().__init__(
113
- tokenizer_object=tokenizer_object,
114
109
  bos_token=bos_token,
115
110
  eos_token=eos_token,
116
111
  sep_token=sep_token,
@@ -1082,6 +1082,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
1082
1082
  output_attentions: Optional[bool] = None,
1083
1083
  output_hidden_states: Optional[bool] = None,
1084
1084
  return_dict: Optional[bool] = None,
1085
+ **kwargs,
1085
1086
  ) -> Union[tuple, SequenceClassifierOutput]:
1086
1087
  r"""
1087
1088
  langs (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1190,6 +1191,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
1190
1191
  output_attentions: Optional[bool] = None,
1191
1192
  output_hidden_states: Optional[bool] = None,
1192
1193
  return_dict: Optional[bool] = None,
1194
+ **kwargs,
1193
1195
  ) -> Union[tuple, QuestionAnsweringModelOutput]:
1194
1196
  r"""
1195
1197
  langs (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1291,6 +1293,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
1291
1293
  output_attentions: Optional[bool] = None,
1292
1294
  output_hidden_states: Optional[bool] = None,
1293
1295
  return_dict: Optional[bool] = None,
1296
+ **kwargs,
1294
1297
  ) -> Union[tuple, XLMForQuestionAnsweringOutput]:
1295
1298
  r"""
1296
1299
  langs (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1406,6 +1409,7 @@ class XLMForTokenClassification(XLMPreTrainedModel):
1406
1409
  output_attentions: Optional[bool] = None,
1407
1410
  output_hidden_states: Optional[bool] = None,
1408
1411
  return_dict: Optional[bool] = None,
1412
+ **kwargs,
1409
1413
  ) -> Union[tuple, TokenClassifierOutput]:
1410
1414
  r"""
1411
1415
  langs (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1491,6 +1495,7 @@ class XLMForMultipleChoice(XLMPreTrainedModel):
1491
1495
  output_attentions: Optional[bool] = None,
1492
1496
  output_hidden_states: Optional[bool] = None,
1493
1497
  return_dict: Optional[bool] = None,
1498
+ **kwargs,
1494
1499
  ) -> Union[tuple, MultipleChoiceModelOutput]:
1495
1500
  r"""
1496
1501
  input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
@@ -14,7 +14,7 @@
14
14
  # limitations under the License
15
15
  """Tokenization classes for XLM-RoBERTa model (Tokenizers backend)."""
16
16
 
17
- from typing import Optional
17
+ from typing import Optional, Union
18
18
 
19
19
  from tokenizers import Tokenizer, decoders, normalizers, pre_tokenizers, processors
20
20
  from tokenizers.models import Unigram
@@ -47,16 +47,17 @@ class XLMRobertaTokenizer(TokenizersBackend):
47
47
  pad_token (`str`, optional, defaults to `"<pad>"`): The padding token.
48
48
  mask_token (`str`, optional, defaults to `"<mask>"`): The mask token.
49
49
  add_prefix_space (`bool`, optional, defaults to `True`): Whether to add an initial space.
50
- vocab (`dict`, optional): Custom vocabulary dictionary.
51
- merges (`list`, optional): Custom merges list.
50
+ vocab (`str`, `dict` or `list`, optional): Custom vocabulary dictionary.
52
51
  """
53
52
 
54
53
  vocab_files_names = VOCAB_FILES_NAMES
55
54
  model_input_names = ["input_ids", "attention_mask"]
56
- slow_tokenizer_class = None
55
+ model = Unigram
57
56
 
58
57
  def __init__(
59
58
  self,
59
+ vocab: Optional[Union[str, list[tuple[str, float]]]] = None,
60
+ add_prefix_space: bool = True,
60
61
  bos_token: str = "<s>",
61
62
  eos_token: str = "</s>",
62
63
  sep_token: str = "</s>",
@@ -64,9 +65,6 @@ class XLMRobertaTokenizer(TokenizersBackend):
64
65
  unk_token: str = "<unk>",
65
66
  pad_token: str = "<pad>",
66
67
  mask_token: str = "<mask>",
67
- add_prefix_space: bool = True,
68
- vocab: Optional[dict] = None,
69
- vocab_file: Optional[str] = None,
70
68
  **kwargs,
71
69
  ):
72
70
  self.add_prefix_space = add_prefix_space
@@ -99,11 +97,7 @@ class XLMRobertaTokenizer(TokenizersBackend):
99
97
  ]
100
98
  )
101
99
  self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
102
-
103
- tokenizer_object = self._tokenizer
104
-
105
100
  super().__init__(
106
- tokenizer_object=tokenizer_object,
107
101
  bos_token=bos_token,
108
102
  eos_token=eos_token,
109
103
  sep_token=sep_token,
@@ -116,14 +110,13 @@ class XLMRobertaTokenizer(TokenizersBackend):
116
110
  )
117
111
 
118
112
  self._tokenizer.post_processor = processors.TemplateProcessing(
119
- single=["$A", "</s>"],
120
- pair=["$A", "</s>", "$B", "</s>"],
113
+ single=[str(bos_token), "$A", str(eos_token)],
114
+ pair=[str(bos_token), "$A", str(eos_token), "$B", str(eos_token)],
121
115
  special_tokens=[
122
- ("</s>", self.eos_token_id),
116
+ (str(bos_token), self.bos_token_id),
117
+ (str(eos_token), self.eos_token_id),
123
118
  ],
124
119
  )
125
120
 
126
- self.vocab_file = vocab_file
127
-
128
121
 
129
122
  __all__ = ["XLMRobertaTokenizer"]
@@ -14,7 +14,7 @@
14
14
  # limitations under the License.
15
15
  """Tokenization classes for XLNet model."""
16
16
 
17
- from typing import Optional
17
+ from typing import Optional, Union
18
18
 
19
19
  from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
20
20
  from tokenizers.models import Unigram
@@ -98,10 +98,11 @@ class XLNetTokenizer(TokenizersBackend):
98
98
 
99
99
  vocab_files_names = VOCAB_FILES_NAMES
100
100
  padding_side = "left"
101
+ model = Unigram
101
102
 
102
103
  def __init__(
103
104
  self,
104
- vocab: Optional[list] = None,
105
+ vocab: Optional[Union[str, list[tuple[str, float]]]] = None,
105
106
  unk_id: int = 0,
106
107
  do_lower_case=False,
107
108
  remove_space=True,
@@ -159,13 +160,8 @@ class XLNetTokenizer(TokenizersBackend):
159
160
  self.do_lower_case = do_lower_case
160
161
  self.remove_space = remove_space
161
162
  self.keep_accents = keep_accents
162
-
163
163
  mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token
164
-
165
- tokenizer_object = self._tokenizer
166
-
167
164
  super().__init__(
168
- tokenizer_object=tokenizer_object,
169
165
  unk_id=unk_id,
170
166
  do_lower_case=do_lower_case,
171
167
  remove_space=remove_space,
@@ -642,6 +642,7 @@ class YosoModel(YosoPreTrainedModel):
642
642
  output_attentions: Optional[bool] = None,
643
643
  output_hidden_states: Optional[bool] = None,
644
644
  return_dict: Optional[bool] = None,
645
+ **kwargs,
645
646
  ) -> Union[tuple, BaseModelOutputWithCrossAttentions]:
646
647
  output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
647
648
  output_hidden_states = (
@@ -734,6 +735,7 @@ class YosoForMaskedLM(YosoPreTrainedModel):
734
735
  output_attentions: Optional[bool] = None,
735
736
  output_hidden_states: Optional[bool] = None,
736
737
  return_dict: Optional[bool] = None,
738
+ **kwargs,
737
739
  ) -> Union[tuple, MaskedLMOutput]:
738
740
  r"""
739
741
  labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -823,6 +825,7 @@ class YosoForSequenceClassification(YosoPreTrainedModel):
823
825
  output_attentions: Optional[bool] = None,
824
826
  output_hidden_states: Optional[bool] = None,
825
827
  return_dict: Optional[bool] = None,
828
+ **kwargs,
826
829
  ) -> Union[tuple, SequenceClassifierOutput]:
827
830
  r"""
828
831
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -904,6 +907,7 @@ class YosoForMultipleChoice(YosoPreTrainedModel):
904
907
  output_attentions: Optional[bool] = None,
905
908
  output_hidden_states: Optional[bool] = None,
906
909
  return_dict: Optional[bool] = None,
910
+ **kwargs,
907
911
  ) -> Union[tuple, MultipleChoiceModelOutput]:
908
912
  r"""
909
913
  input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
@@ -1009,6 +1013,7 @@ class YosoForTokenClassification(YosoPreTrainedModel):
1009
1013
  output_attentions: Optional[bool] = None,
1010
1014
  output_hidden_states: Optional[bool] = None,
1011
1015
  return_dict: Optional[bool] = None,
1016
+ **kwargs,
1012
1017
  ) -> Union[tuple, TokenClassifierOutput]:
1013
1018
  r"""
1014
1019
  labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1085,6 +1090,7 @@ class YosoForQuestionAnswering(YosoPreTrainedModel):
1085
1090
  output_attentions: Optional[bool] = None,
1086
1091
  output_hidden_states: Optional[bool] = None,
1087
1092
  return_dict: Optional[bool] = None,
1093
+ **kwargs,
1088
1094
  ) -> Union[tuple, QuestionAnsweringModelOutput]:
1089
1095
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1090
1096
 
@@ -870,6 +870,7 @@ class ZambaModel(ZambaPreTrainedModel):
870
870
  output_hidden_states: Optional[bool] = None,
871
871
  return_dict: Optional[bool] = None,
872
872
  cache_position: Optional[torch.LongTensor] = None,
873
+ **kwargs,
873
874
  ) -> Union[tuple, BaseModelOutputWithPast]:
874
875
  output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
875
876
  output_hidden_states = (
@@ -1192,6 +1193,7 @@ class ZambaForSequenceClassification(ZambaPreTrainedModel):
1192
1193
  output_attentions: Optional[bool] = None,
1193
1194
  output_hidden_states: Optional[bool] = None,
1194
1195
  return_dict: Optional[bool] = None,
1196
+ **kwargs,
1195
1197
  ) -> Union[tuple, SequenceClassifierOutputWithPast]:
1196
1198
  r"""
1197
1199
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -41,6 +41,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
41
41
  from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
42
42
  from ...processing_utils import Unpack
43
43
  from ...utils import auto_docstring, logging
44
+ from ...utils.generic import maybe_autocast
44
45
  from ...utils.import_utils import is_causal_conv1d_available, is_mamba_ssm_available
45
46
  from .configuration_zamba2 import Zamba2Config
46
47
 
@@ -263,7 +264,7 @@ class Zamba2RotaryEmbedding(nn.Module):
263
264
  position_ids_expanded = position_ids[:, None, :].float()
264
265
 
265
266
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
266
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
267
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
267
268
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
268
269
  emb = torch.cat((freqs, freqs), dim=-1)
269
270
  cos = emb.cos() * self.attention_scaling
@@ -424,7 +425,6 @@ class Zamba2Attention(nn.Module):
424
425
  attention_mask: Optional[torch.Tensor] = None,
425
426
  past_key_values: Optional[Zamba2HybridDynamicCache] = None,
426
427
  position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None,
427
- position_ids: Optional[torch.Tensor] = None,
428
428
  **kwargs: Unpack[FlashAttentionKwargs],
429
429
  ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
430
430
  input_shape = hidden_states.shape[:-1]
@@ -1294,6 +1294,7 @@ class Zamba2Model(Zamba2PreTrainedModel):
1294
1294
  output_hidden_states: Optional[bool] = None,
1295
1295
  return_dict: Optional[bool] = None,
1296
1296
  cache_position: Optional[torch.LongTensor] = None,
1297
+ **kwargs,
1297
1298
  ) -> Union[tuple, BaseModelOutputWithPast]:
1298
1299
  output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
1299
1300
  output_hidden_states = (
@@ -1638,6 +1639,7 @@ class Zamba2ForSequenceClassification(Zamba2PreTrainedModel):
1638
1639
  output_attentions: Optional[bool] = None,
1639
1640
  output_hidden_states: Optional[bool] = None,
1640
1641
  return_dict: Optional[bool] = None,
1642
+ **kwargs,
1641
1643
  ) -> Union[tuple, SequenceClassifierOutputWithPast]:
1642
1644
  r"""
1643
1645
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
@@ -232,7 +232,6 @@ class Zamba2Attention(ZambaAttention):
232
232
  attention_mask: Optional[torch.Tensor] = None,
233
233
  past_key_values: Optional[Zamba2HybridDynamicCache] = None,
234
234
  position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None,
235
- position_ids: Optional[torch.Tensor] = None,
236
235
  **kwargs: Unpack[FlashAttentionKwargs],
237
236
  ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
238
237
  input_shape = hidden_states.shape[:-1]
@@ -993,6 +992,7 @@ class Zamba2Model(ZambaModel, Zamba2PreTrainedModel):
993
992
  output_hidden_states: Optional[bool] = None,
994
993
  return_dict: Optional[bool] = None,
995
994
  cache_position: Optional[torch.LongTensor] = None,
995
+ **kwargs,
996
996
  ) -> Union[tuple, BaseModelOutputWithPast]:
997
997
  output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
998
998
  output_hidden_states = (
@@ -1251,6 +1251,7 @@ class ZoeDepthForDepthEstimation(ZoeDepthPreTrainedModel):
1251
1251
  output_attentions: Optional[bool] = None,
1252
1252
  output_hidden_states: Optional[bool] = None,
1253
1253
  return_dict: Optional[bool] = None,
1254
+ **kwargs,
1254
1255
  ) -> Union[tuple[torch.Tensor], DepthEstimatorOutput]:
1255
1256
  r"""
1256
1257
  labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
@@ -18,7 +18,7 @@ import warnings
18
18
  from pathlib import Path
19
19
  from typing import TYPE_CHECKING, Any, Optional, Union
20
20
 
21
- from huggingface_hub import model_info
21
+ from huggingface_hub import is_offline_mode, model_info
22
22
 
23
23
  from ..configuration_utils import PreTrainedConfig
24
24
  from ..dynamic_module_utils import get_class_from_dynamic_module
@@ -38,7 +38,6 @@ from ..utils import (
38
38
  extract_commit_hash,
39
39
  find_adapter_config_file,
40
40
  is_kenlm_available,
41
- is_offline_mode,
42
41
  is_peft_available,
43
42
  is_pyctcdecode_available,
44
43
  is_torch_available,
@@ -278,7 +277,7 @@ SUPPORTED_TASKS = {
278
277
  "image-to-text": {
279
278
  "impl": ImageToTextPipeline,
280
279
  "pt": (AutoModelForImageTextToText,) if is_torch_available() else (),
281
- "default": {"model": ("ydshieh/vit-gpt2-coco-en", "5bebf1e")},
280
+ "default": {"model": ("ydshieh/vit-gpt2-coco-en", "e460201")},
282
281
  "type": "multimodal",
283
282
  },
284
283
  "image-text-to-text": {
@@ -950,20 +950,13 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
950
950
  pipe_information["output_modalities"] = self.model.output_modalities
951
951
  return f"{self.__class__.__name__}: {pipe_information}"
952
952
 
953
- def save_pretrained(
954
- self,
955
- save_directory: str | os.PathLike,
956
- safe_serialization: bool = True,
957
- **kwargs: Any,
958
- ):
953
+ def save_pretrained(self, save_directory: str | os.PathLike, **kwargs: Any):
959
954
  """
960
955
  Save the pipeline's model and tokenizer.
961
956
 
962
957
  Args:
963
958
  save_directory (`str` or `os.PathLike`):
964
959
  A path to the directory where to saved. It will be created if it doesn't exist.
965
- safe_serialization (`str`):
966
- Whether to save the model using `safetensors` or PyTorch serialization.
967
960
  kwargs (`dict[str, Any]`, *optional*):
968
961
  Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
969
962
  """
@@ -992,7 +985,6 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
992
985
  # Save the pipeline custom code
993
986
  custom_object_save(self, save_directory)
994
987
 
995
- kwargs["safe_serialization"] = safe_serialization
996
988
  self.model.save_pretrained(save_directory, **kwargs)
997
989
 
998
990
  if self.tokenizer is not None:
@@ -146,7 +146,9 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
146
146
 
147
147
  def __init__(self, *args, **kwargs):
148
148
  super().__init__(*args, **kwargs)
149
- if self.tokenizer is not None and not self.tokenizer.__class__.__name__.endswith("Fast"):
149
+ if self.tokenizer is not None and not (
150
+ self.tokenizer.__class__.__name__.endswith("Fast") or self.tokenizer.backend == "tokenizers"
151
+ ):
150
152
  raise ValueError(
151
153
  "`DocumentQuestionAnsweringPipeline` requires a fast tokenizer, but a slow tokenizer "
152
154
  f"(`{self.tokenizer.__class__.__name__}`) is provided."
@@ -486,7 +486,7 @@ class TextGenerationPipeline(Pipeline):
486
486
  ]
487
487
  else:
488
488
  # When we're not starting from a prefill, the output is a new assistant message
489
- if self.tokenizer.response_schema:
489
+ if getattr(self.tokenizer, "response_schema", False):
490
490
  assistant_message = self.tokenizer.parse_response(all_text)
491
491
  else:
492
492
  # If there's no schema, then we have to assume it's all content
@@ -28,7 +28,7 @@ from typing import Annotated, Any, Literal, Optional, TypedDict, TypeVar, Union
28
28
 
29
29
  import numpy as np
30
30
  import typing_extensions
31
- from huggingface_hub import create_repo
31
+ from huggingface_hub import create_repo, is_offline_mode
32
32
  from huggingface_hub.dataclasses import validate_typed_dict
33
33
  from huggingface_hub.errors import EntryNotFoundError
34
34
 
@@ -54,7 +54,6 @@ from .utils import (
54
54
  cached_file,
55
55
  copy_func,
56
56
  direct_transformers_import,
57
- is_offline_mode,
58
57
  is_torch_available,
59
58
  list_repo_templates,
60
59
  logging,
@@ -696,14 +695,10 @@ class ProcessorMixin(PushToHubMixin):
696
695
  # extra attributes to be kept
697
696
  attrs_to_save += ["auto_map"]
698
697
 
699
- if "tokenizer" in output:
700
- del output["tokenizer"]
701
- if "qformer_tokenizer" in output:
702
- del output["qformer_tokenizer"]
703
- if "protein_tokenizer" in output:
704
- del output["protein_tokenizer"]
705
- if "char_tokenizer" in output:
706
- del output["char_tokenizer"]
698
+ for attribute in self.__class__.get_attributes():
699
+ if "tokenizer" in attribute and attribute in output:
700
+ del output[attribute]
701
+
707
702
  if "chat_template" in output:
708
703
  del output["chat_template"]
709
704
 
@@ -1465,7 +1460,24 @@ class ProcessorMixin(PushToHubMixin):
1465
1460
  # get args from processor init signature
1466
1461
  sub_processors = cls.get_attributes()
1467
1462
  for sub_processor_type in sub_processors:
1468
- if sub_processor_type in MODALITY_TO_AUTOPROCESSOR_MAPPING:
1463
+ if "FuyuProcessor" in cls.__name__ and "tokenizer" in sub_processor_type:
1464
+ from .tokenization_utils_tokenizers import TokenizersBackend
1465
+
1466
+ tokenizer = TokenizersBackend.from_pretrained(pretrained_model_name_or_path, **kwargs)
1467
+ if "token_type_ids" in tokenizer.model_input_names:
1468
+ tokenizer.model_input_names.remove("token_type_ids")
1469
+ args.append(tokenizer)
1470
+ elif "PixtralProcessor" in cls.__name__ and "tokenizer" in sub_processor_type:
1471
+ from tokenizers import pre_tokenizers
1472
+
1473
+ from .models.llama import LlamaTokenizer
1474
+
1475
+ tokenizer = LlamaTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
1476
+ tokenizer._tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
1477
+ [pre_tokenizers.ByteLevel(False), tokenizer._tokenizer.pre_tokenizer]
1478
+ )
1479
+ args.append(tokenizer)
1480
+ elif sub_processor_type in MODALITY_TO_AUTOPROCESSOR_MAPPING:
1469
1481
  auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING[sub_processor_type]
1470
1482
  sub_processor = auto_processor_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
1471
1483
  args.append(sub_processor)