transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (835) hide show
  1. transformers/__init__.py +49 -3
  2. transformers/activations.py +1 -1
  3. transformers/audio_utils.py +0 -1
  4. transformers/cache_utils.py +17 -15
  5. transformers/cli/serve.py +47 -17
  6. transformers/configuration_utils.py +114 -70
  7. transformers/conversion_mapping.py +83 -7
  8. transformers/convert_slow_tokenizer.py +225 -10
  9. transformers/core_model_loading.py +374 -147
  10. transformers/data/data_collator.py +12 -4
  11. transformers/dependency_versions_table.py +2 -3
  12. transformers/dynamic_module_utils.py +1 -2
  13. transformers/feature_extraction_utils.py +55 -24
  14. transformers/file_utils.py +0 -1
  15. transformers/generation/__init__.py +11 -1
  16. transformers/generation/candidate_generator.py +79 -31
  17. transformers/generation/configuration_utils.py +165 -124
  18. transformers/generation/continuous_batching/__init__.py +4 -0
  19. transformers/generation/continuous_batching/cache.py +47 -18
  20. transformers/generation/continuous_batching/cache_manager.py +131 -34
  21. transformers/generation/continuous_batching/continuous_api.py +228 -136
  22. transformers/generation/continuous_batching/requests.py +28 -1
  23. transformers/generation/continuous_batching/scheduler.py +11 -4
  24. transformers/generation/stopping_criteria.py +1 -1
  25. transformers/generation/utils.py +108 -110
  26. transformers/generation/watermarking.py +8 -5
  27. transformers/image_processing_base.py +3 -14
  28. transformers/image_processing_utils_fast.py +15 -4
  29. transformers/initialization.py +37 -0
  30. transformers/integrations/__init__.py +16 -2
  31. transformers/integrations/accelerate.py +58 -113
  32. transformers/integrations/aqlm.py +36 -66
  33. transformers/integrations/awq.py +46 -515
  34. transformers/integrations/bitnet.py +47 -105
  35. transformers/integrations/bitsandbytes.py +91 -202
  36. transformers/integrations/deepspeed.py +18 -2
  37. transformers/integrations/eetq.py +84 -81
  38. transformers/integrations/fbgemm_fp8.py +191 -145
  39. transformers/integrations/finegrained_fp8.py +241 -208
  40. transformers/integrations/flash_attention.py +2 -2
  41. transformers/integrations/fp_quant.py +92 -0
  42. transformers/integrations/ggml.py +11 -1
  43. transformers/integrations/higgs.py +37 -62
  44. transformers/integrations/hub_kernels.py +65 -8
  45. transformers/integrations/integration_utils.py +45 -0
  46. transformers/integrations/mistral.py +12 -0
  47. transformers/integrations/moe.py +240 -0
  48. transformers/integrations/mxfp4.py +28 -74
  49. transformers/integrations/peft.py +12 -29
  50. transformers/integrations/quanto.py +77 -56
  51. transformers/integrations/quark.py +55 -0
  52. transformers/integrations/spqr.py +42 -90
  53. transformers/integrations/tensor_parallel.py +167 -221
  54. transformers/integrations/torchao.py +32 -38
  55. transformers/integrations/vptq.py +40 -59
  56. transformers/modelcard.py +1 -2
  57. transformers/modeling_gguf_pytorch_utils.py +74 -19
  58. transformers/modeling_rope_utils.py +107 -86
  59. transformers/modeling_utils.py +611 -527
  60. transformers/models/__init__.py +22 -0
  61. transformers/models/afmoe/modeling_afmoe.py +10 -19
  62. transformers/models/afmoe/modular_afmoe.py +5 -13
  63. transformers/models/aimv2/modeling_aimv2.py +4 -0
  64. transformers/models/aimv2/modular_aimv2.py +4 -0
  65. transformers/models/albert/modeling_albert.py +3 -0
  66. transformers/models/albert/tokenization_albert.py +6 -12
  67. transformers/models/align/modeling_align.py +14 -6
  68. transformers/models/altclip/modeling_altclip.py +11 -3
  69. transformers/models/apertus/modeling_apertus.py +8 -6
  70. transformers/models/apertus/modular_apertus.py +4 -1
  71. transformers/models/arcee/modeling_arcee.py +5 -5
  72. transformers/models/aria/modeling_aria.py +12 -8
  73. transformers/models/aria/modular_aria.py +7 -3
  74. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  75. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  76. transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
  77. transformers/models/auto/auto_factory.py +1 -1
  78. transformers/models/auto/configuration_auto.py +38 -0
  79. transformers/models/auto/feature_extraction_auto.py +9 -3
  80. transformers/models/auto/image_processing_auto.py +5 -2
  81. transformers/models/auto/modeling_auto.py +37 -0
  82. transformers/models/auto/processing_auto.py +22 -10
  83. transformers/models/auto/tokenization_auto.py +147 -566
  84. transformers/models/auto/video_processing_auto.py +5 -2
  85. transformers/models/autoformer/modeling_autoformer.py +4 -0
  86. transformers/models/aya_vision/modeling_aya_vision.py +7 -3
  87. transformers/models/bamba/modeling_bamba.py +21 -21
  88. transformers/models/bamba/modular_bamba.py +17 -16
  89. transformers/models/bark/modeling_bark.py +11 -0
  90. transformers/models/bart/configuration_bart.py +0 -1
  91. transformers/models/bart/modeling_bart.py +14 -0
  92. transformers/models/barthez/tokenization_barthez.py +5 -10
  93. transformers/models/beit/image_processing_beit_fast.py +0 -1
  94. transformers/models/beit/modeling_beit.py +6 -1
  95. transformers/models/bert/modeling_bert.py +3 -0
  96. transformers/models/bert/tokenization_bert.py +8 -21
  97. transformers/models/bert_generation/modeling_bert_generation.py +2 -0
  98. transformers/models/big_bird/modeling_big_bird.py +9 -0
  99. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  100. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
  101. transformers/models/biogpt/modeling_biogpt.py +2 -0
  102. transformers/models/biogpt/modular_biogpt.py +2 -0
  103. transformers/models/bit/modeling_bit.py +16 -3
  104. transformers/models/bitnet/modeling_bitnet.py +5 -5
  105. transformers/models/blenderbot/modeling_blenderbot.py +12 -0
  106. transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
  107. transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
  108. transformers/models/blip/modeling_blip.py +2 -0
  109. transformers/models/blip/modeling_blip_text.py +10 -0
  110. transformers/models/blip_2/modeling_blip_2.py +4 -1
  111. transformers/models/bloom/modeling_bloom.py +17 -44
  112. transformers/models/blt/modeling_blt.py +164 -4
  113. transformers/models/blt/modular_blt.py +170 -5
  114. transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
  115. transformers/models/bridgetower/modeling_bridgetower.py +11 -1
  116. transformers/models/bros/modeling_bros.py +12 -0
  117. transformers/models/camembert/modeling_camembert.py +109 -106
  118. transformers/models/camembert/tokenization_camembert.py +8 -12
  119. transformers/models/canine/modeling_canine.py +11 -0
  120. transformers/models/canine/tokenization_canine.py +2 -0
  121. transformers/models/chameleon/modeling_chameleon.py +11 -5
  122. transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
  123. transformers/models/clap/feature_extraction_clap.py +2 -2
  124. transformers/models/clap/modeling_clap.py +30 -15
  125. transformers/models/clip/modeling_clip.py +2 -0
  126. transformers/models/clip/tokenization_clip.py +22 -44
  127. transformers/models/clipseg/modeling_clipseg.py +9 -0
  128. transformers/models/clvp/modeling_clvp.py +19 -3
  129. transformers/models/clvp/tokenization_clvp.py +1 -63
  130. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  131. transformers/models/codegen/modeling_codegen.py +13 -4
  132. transformers/models/codegen/tokenization_codegen.py +14 -43
  133. transformers/models/cohere/modeling_cohere.py +5 -4
  134. transformers/models/cohere/modular_cohere.py +2 -1
  135. transformers/models/cohere/tokenization_cohere.py +12 -42
  136. transformers/models/cohere2/modeling_cohere2.py +8 -7
  137. transformers/models/cohere2/modular_cohere2.py +5 -5
  138. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
  139. transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
  140. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  141. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  142. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  143. transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
  144. transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
  145. transformers/models/convbert/modeling_convbert.py +9 -0
  146. transformers/models/convnext/image_processing_convnext.py +2 -2
  147. transformers/models/convnext/image_processing_convnext_fast.py +9 -13
  148. transformers/models/convnext/modeling_convnext.py +2 -4
  149. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  150. transformers/models/csm/generation_csm.py +19 -22
  151. transformers/models/csm/modeling_csm.py +7 -4
  152. transformers/models/csm/modular_csm.py +2 -0
  153. transformers/models/ctrl/modeling_ctrl.py +15 -2
  154. transformers/models/cvt/modeling_cvt.py +7 -1
  155. transformers/models/cwm/modeling_cwm.py +5 -5
  156. transformers/models/d_fine/configuration_d_fine.py +3 -4
  157. transformers/models/d_fine/modeling_d_fine.py +48 -39
  158. transformers/models/d_fine/modular_d_fine.py +16 -4
  159. transformers/models/dab_detr/configuration_dab_detr.py +2 -2
  160. transformers/models/dab_detr/modeling_dab_detr.py +5 -1
  161. transformers/models/dac/modeling_dac.py +6 -6
  162. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  163. transformers/models/data2vec/modeling_data2vec_text.py +7 -0
  164. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  165. transformers/models/data2vec/modular_data2vec_text.py +7 -0
  166. transformers/models/dbrx/configuration_dbrx.py +9 -1
  167. transformers/models/dbrx/modeling_dbrx.py +3 -3
  168. transformers/models/deberta/modeling_deberta.py +7 -0
  169. transformers/models/deberta/tokenization_deberta.py +11 -20
  170. transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
  171. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  172. transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
  173. transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
  174. transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
  175. transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
  176. transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
  177. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
  178. transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
  179. transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
  180. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
  181. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
  182. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
  183. transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
  184. transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
  185. transformers/models/depth_anything/configuration_depth_anything.py +2 -3
  186. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  187. transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
  188. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  189. transformers/models/detr/configuration_detr.py +1 -1
  190. transformers/models/detr/modeling_detr.py +13 -1
  191. transformers/models/dia/generation_dia.py +3 -10
  192. transformers/models/dia/modeling_dia.py +16 -4
  193. transformers/models/dia/modular_dia.py +11 -1
  194. transformers/models/dia/processing_dia.py +1 -1
  195. transformers/models/diffllama/modeling_diffllama.py +5 -5
  196. transformers/models/diffllama/modular_diffllama.py +2 -2
  197. transformers/models/dinat/modeling_dinat.py +3 -0
  198. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  199. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
  200. transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
  201. transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
  202. transformers/models/distilbert/modeling_distilbert.py +11 -9
  203. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  204. transformers/models/doge/modeling_doge.py +3 -4
  205. transformers/models/doge/modular_doge.py +0 -1
  206. transformers/models/donut/image_processing_donut_fast.py +0 -1
  207. transformers/models/donut/modeling_donut_swin.py +18 -12
  208. transformers/models/dots1/modeling_dots1.py +23 -11
  209. transformers/models/dots1/modular_dots1.py +5 -3
  210. transformers/models/dpr/modeling_dpr.py +5 -0
  211. transformers/models/dpr/tokenization_dpr.py +12 -0
  212. transformers/models/dpt/configuration_dpt.py +1 -1
  213. transformers/models/dpt/image_processing_dpt_fast.py +1 -2
  214. transformers/models/dpt/modular_dpt.py +1 -2
  215. transformers/models/edgetam/configuration_edgetam.py +1 -1
  216. transformers/models/edgetam/modeling_edgetam.py +6 -3
  217. transformers/models/edgetam/modular_edgetam.py +15 -14
  218. transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
  219. transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
  220. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
  221. transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
  222. transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
  223. transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
  224. transformers/models/efficientnet/modeling_efficientnet.py +7 -1
  225. transformers/models/electra/modeling_electra.py +7 -0
  226. transformers/models/emu3/modeling_emu3.py +12 -6
  227. transformers/models/emu3/modular_emu3.py +7 -1
  228. transformers/models/encodec/modeling_encodec.py +14 -0
  229. transformers/models/eomt/image_processing_eomt.py +13 -1
  230. transformers/models/eomt/image_processing_eomt_fast.py +60 -16
  231. transformers/models/eomt/modeling_eomt.py +7 -0
  232. transformers/models/eomt/modular_eomt.py +7 -0
  233. transformers/models/ernie/modeling_ernie.py +6 -0
  234. transformers/models/ernie/modular_ernie.py +6 -0
  235. transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
  236. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  237. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
  238. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
  239. transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
  240. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
  241. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
  242. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
  243. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
  244. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
  245. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
  246. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
  247. transformers/models/esm/modeling_esm.py +6 -0
  248. transformers/models/esm/modeling_esmfold.py +11 -5
  249. transformers/models/evolla/modeling_evolla.py +13 -5
  250. transformers/models/evolla/modular_evolla.py +8 -0
  251. transformers/models/exaone4/modeling_exaone4.py +3 -3
  252. transformers/models/exaone4/modular_exaone4.py +0 -1
  253. transformers/models/falcon/modeling_falcon.py +9 -4
  254. transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
  255. transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
  256. transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
  257. transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
  258. transformers/models/fast_vlm/__init__.py +27 -0
  259. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  260. transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
  261. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  262. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
  263. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
  264. transformers/models/flaubert/modeling_flaubert.py +21 -15
  265. transformers/models/flava/image_processing_flava_fast.py +0 -2
  266. transformers/models/flava/modeling_flava.py +10 -2
  267. transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
  268. transformers/models/florence2/modeling_florence2.py +22 -4
  269. transformers/models/florence2/modular_florence2.py +15 -1
  270. transformers/models/fnet/modeling_fnet.py +14 -0
  271. transformers/models/focalnet/modeling_focalnet.py +4 -0
  272. transformers/models/fsmt/modeling_fsmt.py +2 -0
  273. transformers/models/funnel/modeling_funnel.py +8 -0
  274. transformers/models/funnel/tokenization_funnel.py +17 -24
  275. transformers/models/fuyu/image_processing_fuyu.py +1 -1
  276. transformers/models/fuyu/modeling_fuyu.py +3 -1
  277. transformers/models/fuyu/processing_fuyu.py +19 -3
  278. transformers/models/gemma/modeling_gemma.py +14 -16
  279. transformers/models/gemma/modular_gemma.py +9 -11
  280. transformers/models/gemma/tokenization_gemma.py +10 -27
  281. transformers/models/gemma2/modeling_gemma2.py +5 -5
  282. transformers/models/gemma2/modular_gemma2.py +3 -2
  283. transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
  284. transformers/models/gemma3/modeling_gemma3.py +42 -91
  285. transformers/models/gemma3/modular_gemma3.py +38 -87
  286. transformers/models/gemma3n/configuration_gemma3n.py +3 -0
  287. transformers/models/gemma3n/modeling_gemma3n.py +65 -218
  288. transformers/models/gemma3n/modular_gemma3n.py +68 -68
  289. transformers/models/git/modeling_git.py +183 -126
  290. transformers/models/glm/modeling_glm.py +5 -5
  291. transformers/models/glm4/modeling_glm4.py +5 -5
  292. transformers/models/glm46v/image_processing_glm46v.py +0 -4
  293. transformers/models/glm46v/modeling_glm46v.py +3 -1
  294. transformers/models/glm46v/modular_glm46v.py +3 -0
  295. transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
  296. transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
  297. transformers/models/glm4v/configuration_glm4v.py +3 -1
  298. transformers/models/glm4v/image_processing_glm4v.py +0 -4
  299. transformers/models/glm4v/modeling_glm4v.py +18 -8
  300. transformers/models/glm4v/modular_glm4v.py +17 -7
  301. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  302. transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
  303. transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
  304. transformers/models/glmasr/__init__.py +30 -0
  305. transformers/models/glmasr/configuration_glmasr.py +197 -0
  306. transformers/models/glmasr/modeling_glmasr.py +512 -0
  307. transformers/models/glmasr/modular_glmasr.py +433 -0
  308. transformers/models/glmasr/processing_glmasr.py +332 -0
  309. transformers/models/glpn/image_processing_glpn_fast.py +0 -1
  310. transformers/models/glpn/modeling_glpn.py +2 -0
  311. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
  312. transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
  313. transformers/models/gpt2/modeling_gpt2.py +13 -6
  314. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  315. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
  316. transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
  317. transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
  318. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  319. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  320. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
  321. transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
  322. transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
  323. transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
  324. transformers/models/gptj/modeling_gptj.py +18 -6
  325. transformers/models/granite/modeling_granite.py +5 -5
  326. transformers/models/granite_speech/modeling_granite_speech.py +15 -1
  327. transformers/models/granitemoe/modeling_granitemoe.py +6 -9
  328. transformers/models/granitemoe/modular_granitemoe.py +1 -4
  329. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
  330. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
  331. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
  332. transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
  333. transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
  334. transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
  335. transformers/models/groupvit/modeling_groupvit.py +9 -1
  336. transformers/models/helium/modeling_helium.py +5 -4
  337. transformers/models/herbert/tokenization_herbert.py +9 -25
  338. transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
  339. transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
  340. transformers/models/hiera/modeling_hiera.py +4 -0
  341. transformers/models/hubert/modeling_hubert.py +7 -0
  342. transformers/models/hubert/modular_hubert.py +5 -0
  343. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
  344. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
  345. transformers/models/hunyuan_v1_moe/__init__.py +1 -1
  346. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
  347. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
  348. transformers/models/ibert/modeling_ibert.py +22 -0
  349. transformers/models/idefics/modeling_idefics.py +15 -21
  350. transformers/models/idefics2/modeling_idefics2.py +7 -1
  351. transformers/models/idefics3/modeling_idefics3.py +5 -1
  352. transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
  353. transformers/models/imagegpt/modeling_imagegpt.py +11 -3
  354. transformers/models/informer/modeling_informer.py +4 -0
  355. transformers/models/informer/modular_informer.py +1 -0
  356. transformers/models/instructblip/modeling_instructblip.py +2 -0
  357. transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
  358. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
  359. transformers/models/internvl/modeling_internvl.py +13 -12
  360. transformers/models/internvl/modular_internvl.py +7 -13
  361. transformers/models/internvl/video_processing_internvl.py +0 -1
  362. transformers/models/jais2/__init__.py +27 -0
  363. transformers/models/jais2/configuration_jais2.py +152 -0
  364. transformers/models/jais2/modeling_jais2.py +486 -0
  365. transformers/models/jais2/modular_jais2.py +196 -0
  366. transformers/models/jamba/modeling_jamba.py +25 -20
  367. transformers/models/jamba/modular_jamba.py +17 -17
  368. transformers/models/janus/image_processing_janus_fast.py +0 -1
  369. transformers/models/janus/modeling_janus.py +16 -7
  370. transformers/models/janus/modular_janus.py +17 -7
  371. transformers/models/jetmoe/modeling_jetmoe.py +4 -4
  372. transformers/models/jetmoe/modular_jetmoe.py +1 -0
  373. transformers/models/kosmos2/modeling_kosmos2.py +15 -2
  374. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
  375. transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
  376. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
  377. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
  378. transformers/models/lasr/__init__.py +29 -0
  379. transformers/models/lasr/configuration_lasr.py +248 -0
  380. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  381. transformers/models/lasr/modeling_lasr.py +730 -0
  382. transformers/models/lasr/modular_lasr.py +576 -0
  383. transformers/models/lasr/processing_lasr.py +94 -0
  384. transformers/models/lasr/tokenization_lasr.py +186 -0
  385. transformers/models/layoutlm/modeling_layoutlm.py +10 -3
  386. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
  387. transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
  388. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
  389. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
  390. transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
  391. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  392. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  393. transformers/models/led/modeling_led.py +12 -0
  394. transformers/models/levit/modeling_levit.py +21 -0
  395. transformers/models/lfm2/modeling_lfm2.py +5 -6
  396. transformers/models/lfm2/modular_lfm2.py +0 -1
  397. transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
  398. transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
  399. transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
  400. transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
  401. transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
  402. transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
  403. transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
  404. transformers/models/lightglue/modeling_lightglue.py +3 -1
  405. transformers/models/lightglue/modular_lightglue.py +1 -0
  406. transformers/models/lilt/modeling_lilt.py +23 -15
  407. transformers/models/llama/modeling_llama.py +5 -5
  408. transformers/models/llama/tokenization_llama.py +15 -43
  409. transformers/models/llama4/image_processing_llama4_fast.py +1 -2
  410. transformers/models/llama4/modeling_llama4.py +11 -6
  411. transformers/models/llava/image_processing_llava_fast.py +0 -1
  412. transformers/models/llava/modeling_llava.py +12 -7
  413. transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
  414. transformers/models/llava_next/modeling_llava_next.py +7 -3
  415. transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
  416. transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
  417. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
  418. transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
  419. transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
  420. transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
  421. transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
  422. transformers/models/longformer/modeling_longformer.py +6 -0
  423. transformers/models/longt5/modeling_longt5.py +4 -4
  424. transformers/models/luke/modeling_luke.py +9 -0
  425. transformers/models/luke/tokenization_luke.py +11 -38
  426. transformers/models/lxmert/modeling_lxmert.py +2 -0
  427. transformers/models/m2m_100/modeling_m2m_100.py +14 -0
  428. transformers/models/mamba/modeling_mamba.py +16 -23
  429. transformers/models/mamba2/modeling_mamba2.py +24 -23
  430. transformers/models/marian/configuration_marian.py +1 -1
  431. transformers/models/marian/modeling_marian.py +8 -0
  432. transformers/models/markuplm/modeling_markuplm.py +9 -8
  433. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  434. transformers/models/mask2former/configuration_mask2former.py +3 -3
  435. transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
  436. transformers/models/mask2former/modeling_mask2former.py +11 -0
  437. transformers/models/maskformer/configuration_maskformer.py +3 -3
  438. transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
  439. transformers/models/maskformer/modeling_maskformer.py +11 -1
  440. transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
  441. transformers/models/mbart/configuration_mbart.py +1 -0
  442. transformers/models/mbart/modeling_mbart.py +14 -0
  443. transformers/models/mbart/tokenization_mbart.py +11 -52
  444. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  445. transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
  446. transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
  447. transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
  448. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  449. transformers/models/mimi/modeling_mimi.py +28 -5
  450. transformers/models/minimax/modeling_minimax.py +19 -6
  451. transformers/models/minimax/modular_minimax.py +12 -1
  452. transformers/models/ministral/modeling_ministral.py +5 -5
  453. transformers/models/ministral3/configuration_ministral3.py +1 -1
  454. transformers/models/ministral3/modeling_ministral3.py +5 -4
  455. transformers/models/mistral/modeling_mistral.py +5 -4
  456. transformers/models/mistral3/modeling_mistral3.py +10 -4
  457. transformers/models/mistral3/modular_mistral3.py +3 -1
  458. transformers/models/mixtral/modeling_mixtral.py +15 -7
  459. transformers/models/mixtral/modular_mixtral.py +6 -2
  460. transformers/models/mlcd/modeling_mlcd.py +6 -0
  461. transformers/models/mlcd/modular_mlcd.py +4 -0
  462. transformers/models/mllama/modeling_mllama.py +15 -4
  463. transformers/models/mluke/tokenization_mluke.py +6 -6
  464. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
  465. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
  466. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
  467. transformers/models/mobilebert/modeling_mobilebert.py +2 -0
  468. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  469. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
  470. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  471. transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
  472. transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
  473. transformers/models/mobilevit/modeling_mobilevit.py +7 -0
  474. transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
  475. transformers/models/modernbert/modeling_modernbert.py +16 -2
  476. transformers/models/modernbert/modular_modernbert.py +14 -1
  477. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
  478. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
  479. transformers/models/moonshine/modeling_moonshine.py +5 -3
  480. transformers/models/moshi/modeling_moshi.py +26 -53
  481. transformers/models/mpnet/modeling_mpnet.py +7 -0
  482. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  483. transformers/models/mpt/modeling_mpt.py +2 -0
  484. transformers/models/mra/modeling_mra.py +10 -1
  485. transformers/models/mt5/configuration_mt5.py +2 -3
  486. transformers/models/mt5/modeling_mt5.py +7 -10
  487. transformers/models/musicgen/modeling_musicgen.py +7 -9
  488. transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
  489. transformers/models/mvp/modeling_mvp.py +14 -0
  490. transformers/models/nanochat/modeling_nanochat.py +5 -5
  491. transformers/models/nemotron/modeling_nemotron.py +7 -5
  492. transformers/models/nllb/tokenization_nllb.py +8 -22
  493. transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
  494. transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
  495. transformers/models/nougat/image_processing_nougat_fast.py +0 -1
  496. transformers/models/nougat/tokenization_nougat.py +15 -68
  497. transformers/models/nystromformer/modeling_nystromformer.py +13 -0
  498. transformers/models/olmo/modeling_olmo.py +5 -5
  499. transformers/models/olmo/modular_olmo.py +2 -2
  500. transformers/models/olmo2/modeling_olmo2.py +5 -6
  501. transformers/models/olmo2/modular_olmo2.py +0 -1
  502. transformers/models/olmo3/modeling_olmo3.py +5 -5
  503. transformers/models/olmoe/modeling_olmoe.py +15 -7
  504. transformers/models/olmoe/modular_olmoe.py +4 -2
  505. transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
  506. transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
  507. transformers/models/oneformer/configuration_oneformer.py +3 -3
  508. transformers/models/oneformer/modeling_oneformer.py +11 -39
  509. transformers/models/openai/modeling_openai.py +15 -0
  510. transformers/models/openai/tokenization_openai.py +10 -46
  511. transformers/models/opt/modeling_opt.py +2 -0
  512. transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
  513. transformers/models/ovis2/modeling_ovis2.py +15 -3
  514. transformers/models/ovis2/modular_ovis2.py +8 -0
  515. transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
  516. transformers/models/owlv2/modeling_owlv2.py +11 -3
  517. transformers/models/owlv2/modular_owlv2.py +0 -2
  518. transformers/models/owlvit/modeling_owlvit.py +11 -3
  519. transformers/models/paddleocr_vl/__init__.py +32 -0
  520. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  521. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
  522. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  523. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
  524. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
  525. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  526. transformers/models/paligemma/modeling_paligemma.py +25 -17
  527. transformers/models/parakeet/configuration_parakeet.py +4 -6
  528. transformers/models/parakeet/modeling_parakeet.py +14 -6
  529. transformers/models/parakeet/modular_parakeet.py +7 -2
  530. transformers/models/parakeet/processing_parakeet.py +1 -0
  531. transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
  532. transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
  533. transformers/models/patchtst/modeling_patchtst.py +25 -6
  534. transformers/models/pe_audio/__init__.py +30 -0
  535. transformers/models/pe_audio/configuration_pe_audio.py +206 -0
  536. transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
  537. transformers/models/pe_audio/modeling_pe_audio.py +820 -0
  538. transformers/models/pe_audio/modular_pe_audio.py +299 -0
  539. transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
  540. transformers/models/pe_audio_video/__init__.py +29 -0
  541. transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
  542. transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
  543. transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
  544. transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
  545. transformers/models/pe_video/__init__.py +30 -0
  546. transformers/models/pe_video/configuration_pe_video.py +211 -0
  547. transformers/models/pe_video/modeling_pe_video.py +636 -0
  548. transformers/models/pe_video/modular_pe_video.py +219 -0
  549. transformers/models/pe_video/processing_pe_video.py +10 -0
  550. transformers/models/pe_video/video_processing_pe_video.py +66 -0
  551. transformers/models/pegasus/configuration_pegasus.py +1 -0
  552. transformers/models/pegasus/modeling_pegasus.py +8 -0
  553. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  554. transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
  555. transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
  556. transformers/models/perceiver/modeling_perceiver.py +13 -1
  557. transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
  558. transformers/models/perception_lm/modeling_perception_lm.py +7 -3
  559. transformers/models/perception_lm/modular_perception_lm.py +7 -3
  560. transformers/models/persimmon/modeling_persimmon.py +3 -2
  561. transformers/models/phi/modeling_phi.py +5 -6
  562. transformers/models/phi/modular_phi.py +0 -1
  563. transformers/models/phi3/modeling_phi3.py +3 -2
  564. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
  565. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
  566. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
  567. transformers/models/phimoe/modeling_phimoe.py +15 -7
  568. transformers/models/phimoe/modular_phimoe.py +3 -3
  569. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  570. transformers/models/pix2struct/processing_pix2struct.py +0 -4
  571. transformers/models/pixio/__init__.py +30 -0
  572. transformers/models/pixio/configuration_pixio.py +151 -0
  573. transformers/models/pixio/modeling_pixio.py +507 -0
  574. transformers/models/pixio/modular_pixio.py +404 -0
  575. transformers/models/pixtral/modeling_pixtral.py +3 -2
  576. transformers/models/pixtral/processing_pixtral.py +3 -1
  577. transformers/models/plbart/configuration_plbart.py +1 -0
  578. transformers/models/plbart/modeling_plbart.py +13 -0
  579. transformers/models/plbart/modular_plbart.py +8 -0
  580. transformers/models/plbart/tokenization_plbart.py +0 -2
  581. transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
  582. transformers/models/poolformer/modeling_poolformer.py +13 -1
  583. transformers/models/pop2piano/configuration_pop2piano.py +0 -1
  584. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  585. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
  586. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  587. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  588. transformers/models/prophetnet/modeling_prophetnet.py +5 -1
  589. transformers/models/pvt/modeling_pvt.py +2 -0
  590. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  591. transformers/models/qwen2/modeling_qwen2.py +5 -5
  592. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  593. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  594. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
  595. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
  596. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  597. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
  598. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
  599. transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
  600. transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
  601. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  602. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
  603. transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
  604. transformers/models/qwen3/modeling_qwen3.py +5 -5
  605. transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
  606. transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
  607. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
  608. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
  609. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
  610. transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
  611. transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
  612. transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
  613. transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
  614. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
  615. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
  616. transformers/models/rag/configuration_rag.py +0 -8
  617. transformers/models/rag/modeling_rag.py +8 -9
  618. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
  619. transformers/models/reformer/modeling_reformer.py +13 -1
  620. transformers/models/reformer/tokenization_reformer.py +11 -28
  621. transformers/models/regnet/modeling_regnet.py +10 -1
  622. transformers/models/rembert/modeling_rembert.py +13 -1
  623. transformers/models/rembert/tokenization_rembert.py +3 -10
  624. transformers/models/resnet/modeling_resnet.py +19 -5
  625. transformers/models/roberta/modeling_roberta.py +3 -0
  626. transformers/models/roberta/modular_roberta.py +3 -0
  627. transformers/models/roberta/tokenization_roberta.py +18 -27
  628. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
  629. transformers/models/roc_bert/modeling_roc_bert.py +3 -0
  630. transformers/models/roformer/modeling_roformer.py +6 -0
  631. transformers/models/roformer/tokenization_roformer.py +77 -412
  632. transformers/models/rt_detr/configuration_rt_detr.py +1 -1
  633. transformers/models/rt_detr/modeling_rt_detr.py +6 -0
  634. transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
  635. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
  636. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
  637. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
  638. transformers/models/rwkv/modeling_rwkv.py +2 -1
  639. transformers/models/sam/configuration_sam.py +1 -0
  640. transformers/models/sam/image_processing_sam_fast.py +0 -1
  641. transformers/models/sam/modeling_sam.py +4 -1
  642. transformers/models/sam2/configuration_sam2.py +1 -1
  643. transformers/models/sam2/modeling_sam2.py +7 -3
  644. transformers/models/sam2/modular_sam2.py +7 -3
  645. transformers/models/sam2_video/modeling_sam2_video.py +52 -43
  646. transformers/models/sam2_video/modular_sam2_video.py +32 -18
  647. transformers/models/sam3/configuration_sam3.py +21 -1
  648. transformers/models/sam3/modeling_sam3.py +100 -80
  649. transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
  650. transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
  651. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
  652. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
  653. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
  654. transformers/models/sam3_video/configuration_sam3_video.py +14 -0
  655. transformers/models/sam3_video/modeling_sam3_video.py +4 -3
  656. transformers/models/sam3_video/processing_sam3_video.py +1 -1
  657. transformers/models/sam_hq/configuration_sam_hq.py +1 -0
  658. transformers/models/sam_hq/modeling_sam_hq.py +26 -23
  659. transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
  660. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  661. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
  662. transformers/models/seed_oss/modeling_seed_oss.py +3 -3
  663. transformers/models/segformer/image_processing_segformer_fast.py +0 -1
  664. transformers/models/segformer/modeling_segformer.py +6 -3
  665. transformers/models/segformer/modular_segformer.py +0 -1
  666. transformers/models/seggpt/modeling_seggpt.py +2 -0
  667. transformers/models/sew/modeling_sew.py +3 -0
  668. transformers/models/sew/modular_sew.py +1 -0
  669. transformers/models/sew_d/modeling_sew_d.py +3 -0
  670. transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
  671. transformers/models/siglip/modeling_siglip.py +24 -2
  672. transformers/models/siglip2/modeling_siglip2.py +67 -41
  673. transformers/models/siglip2/modular_siglip2.py +4 -0
  674. transformers/models/smollm3/modeling_smollm3.py +5 -5
  675. transformers/models/smolvlm/modeling_smolvlm.py +5 -1
  676. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  677. transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
  678. transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
  679. transformers/models/speecht5/modeling_speecht5.py +41 -1
  680. transformers/models/splinter/modeling_splinter.py +12 -3
  681. transformers/models/splinter/tokenization_splinter.py +9 -28
  682. transformers/models/squeezebert/modeling_squeezebert.py +8 -0
  683. transformers/models/stablelm/modeling_stablelm.py +4 -2
  684. transformers/models/starcoder2/modeling_starcoder2.py +5 -4
  685. transformers/models/superglue/image_processing_superglue_fast.py +1 -2
  686. transformers/models/superglue/modeling_superglue.py +1 -0
  687. transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
  688. transformers/models/superpoint/modeling_superpoint.py +1 -0
  689. transformers/models/swiftformer/modeling_swiftformer.py +6 -0
  690. transformers/models/swin/modeling_swin.py +20 -12
  691. transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
  692. transformers/models/swin2sr/modeling_swin2sr.py +51 -33
  693. transformers/models/swinv2/modeling_swinv2.py +45 -33
  694. transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
  695. transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
  696. transformers/models/t5/configuration_t5.py +7 -1
  697. transformers/models/t5/modeling_t5.py +8 -7
  698. transformers/models/t5/tokenization_t5.py +4 -8
  699. transformers/models/t5gemma/modeling_t5gemma.py +6 -6
  700. transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
  701. transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
  702. transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
  703. transformers/models/table_transformer/configuration_table_transformer.py +1 -1
  704. transformers/models/table_transformer/modeling_table_transformer.py +5 -1
  705. transformers/models/tapas/modeling_tapas.py +3 -0
  706. transformers/models/textnet/image_processing_textnet_fast.py +0 -1
  707. transformers/models/textnet/modeling_textnet.py +11 -2
  708. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  709. transformers/models/timesfm/modeling_timesfm.py +14 -0
  710. transformers/models/timesfm/modular_timesfm.py +14 -0
  711. transformers/models/timesformer/modeling_timesformer.py +2 -0
  712. transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
  713. transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
  714. transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
  715. transformers/models/trocr/modeling_trocr.py +3 -2
  716. transformers/models/tvp/configuration_tvp.py +5 -1
  717. transformers/models/tvp/modeling_tvp.py +6 -4
  718. transformers/models/udop/configuration_udop.py +1 -0
  719. transformers/models/udop/modeling_udop.py +7 -7
  720. transformers/models/udop/tokenization_udop.py +5 -13
  721. transformers/models/umt5/configuration_umt5.py +2 -2
  722. transformers/models/umt5/modeling_umt5.py +7 -6
  723. transformers/models/unispeech/modeling_unispeech.py +4 -0
  724. transformers/models/unispeech/modular_unispeech.py +2 -0
  725. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  726. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  727. transformers/models/univnet/modeling_univnet.py +1 -0
  728. transformers/models/upernet/modeling_upernet.py +1 -0
  729. transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
  730. transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
  731. transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
  732. transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
  733. transformers/models/video_llava/modeling_video_llava.py +7 -3
  734. transformers/models/vilt/configuration_vilt.py +2 -2
  735. transformers/models/vilt/modeling_vilt.py +13 -0
  736. transformers/models/vipllava/modeling_vipllava.py +7 -3
  737. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  738. transformers/models/visual_bert/modeling_visual_bert.py +8 -0
  739. transformers/models/vitdet/modeling_vitdet.py +2 -0
  740. transformers/models/vitmatte/configuration_vitmatte.py +1 -1
  741. transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
  742. transformers/models/vitmatte/modeling_vitmatte.py +5 -0
  743. transformers/models/vitpose/configuration_vitpose.py +1 -1
  744. transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
  745. transformers/models/vits/modeling_vits.py +1 -0
  746. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  747. transformers/models/voxtral/modeling_voxtral.py +2 -2
  748. transformers/models/voxtral/modular_voxtral.py +2 -2
  749. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  750. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
  751. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
  752. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
  753. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
  754. transformers/models/wavlm/modeling_wavlm.py +5 -0
  755. transformers/models/whisper/generation_whisper.py +1 -0
  756. transformers/models/whisper/modeling_whisper.py +11 -3
  757. transformers/models/whisper/tokenization_whisper.py +4 -15
  758. transformers/models/x_clip/modeling_x_clip.py +5 -0
  759. transformers/models/xcodec/modeling_xcodec.py +5 -0
  760. transformers/models/xglm/modeling_xglm.py +11 -0
  761. transformers/models/xglm/tokenization_xglm.py +4 -9
  762. transformers/models/xlm/modeling_xlm.py +18 -14
  763. transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
  764. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  765. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
  766. transformers/models/xlnet/modeling_xlnet.py +3 -1
  767. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  768. transformers/models/xmod/modeling_xmod.py +3 -0
  769. transformers/models/yoso/modeling_yoso.py +10 -1
  770. transformers/models/zamba/modeling_zamba.py +4 -1
  771. transformers/models/zamba2/modeling_zamba2.py +7 -4
  772. transformers/models/zamba2/modular_zamba2.py +1 -1
  773. transformers/models/zoedepth/configuration_zoedepth.py +1 -1
  774. transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
  775. transformers/models/zoedepth/modeling_zoedepth.py +8 -0
  776. transformers/pipelines/__init__.py +11 -9
  777. transformers/pipelines/automatic_speech_recognition.py +20 -12
  778. transformers/pipelines/base.py +2 -10
  779. transformers/pipelines/document_question_answering.py +4 -2
  780. transformers/pipelines/question_answering.py +1 -1
  781. transformers/pipelines/text_generation.py +1 -1
  782. transformers/pipelines/text_to_audio.py +2 -2
  783. transformers/processing_utils.py +133 -50
  784. transformers/quantizers/auto.py +2 -4
  785. transformers/quantizers/base.py +44 -174
  786. transformers/quantizers/quantizer_aqlm.py +2 -23
  787. transformers/quantizers/quantizer_auto_round.py +2 -12
  788. transformers/quantizers/quantizer_awq.py +20 -89
  789. transformers/quantizers/quantizer_bitnet.py +4 -14
  790. transformers/quantizers/quantizer_bnb_4bit.py +18 -155
  791. transformers/quantizers/quantizer_bnb_8bit.py +24 -110
  792. transformers/quantizers/quantizer_compressed_tensors.py +2 -9
  793. transformers/quantizers/quantizer_eetq.py +16 -74
  794. transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
  795. transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
  796. transformers/quantizers/quantizer_fp_quant.py +52 -82
  797. transformers/quantizers/quantizer_gptq.py +8 -28
  798. transformers/quantizers/quantizer_higgs.py +42 -60
  799. transformers/quantizers/quantizer_hqq.py +144 -153
  800. transformers/quantizers/quantizer_mxfp4.py +14 -194
  801. transformers/quantizers/quantizer_quanto.py +35 -79
  802. transformers/quantizers/quantizer_quark.py +36 -17
  803. transformers/quantizers/quantizer_spqr.py +4 -12
  804. transformers/quantizers/quantizer_torchao.py +50 -325
  805. transformers/quantizers/quantizer_vptq.py +4 -27
  806. transformers/quantizers/quantizers_utils.py +20 -0
  807. transformers/testing_utils.py +324 -47
  808. transformers/tokenization_mistral_common.py +7 -2
  809. transformers/tokenization_utils_base.py +116 -224
  810. transformers/tokenization_utils_tokenizers.py +190 -106
  811. transformers/trainer.py +51 -32
  812. transformers/trainer_callback.py +8 -0
  813. transformers/trainer_jit_checkpoint.py +126 -0
  814. transformers/trainer_seq2seq.py +4 -0
  815. transformers/trainer_utils.py +1 -1
  816. transformers/training_args.py +74 -38
  817. transformers/utils/__init__.py +7 -4
  818. transformers/utils/attention_visualizer.py +4 -4
  819. transformers/utils/auto_docstring.py +35 -25
  820. transformers/utils/generic.py +47 -1
  821. transformers/utils/hub.py +5 -15
  822. transformers/utils/import_utils.py +112 -25
  823. transformers/utils/kernel_config.py +74 -19
  824. transformers/utils/loading_report.py +19 -10
  825. transformers/utils/quantization_config.py +78 -245
  826. transformers/video_processing_utils.py +17 -14
  827. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
  828. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
  829. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
  830. transformers/kernels/__init__.py +0 -0
  831. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  832. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  833. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
  834. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
  835. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from collections import defaultdict
15
14
  from typing import TYPE_CHECKING
16
15
 
17
16
  from .base import HfQuantizer
@@ -38,43 +37,20 @@ if is_torch_available():
38
37
  import torch
39
38
 
40
39
  from ..core_model_loading import WeightConverter
41
- from ..pytorch_utils import Conv1D
42
40
 
43
41
  logger = logging.get_logger(__name__)
44
42
 
45
43
 
46
44
  class Bnb4BitHfQuantizer(HfQuantizer):
47
45
  """
48
- 4-bit quantization from bitsandbytes quantization method:
49
- before loading: converts transformer layers into Linear4bit during loading: load 16bit weight and pass to the
50
- layer object after: quantizes individual weights in Linear4bit into 4bit at the first .cuda() call
51
- saving:
52
- from state dict, as usual; saves weights and `quant_state` components
53
- loading:
54
- need to locate `quant_state` components and pass to Param4bit constructor
46
+ 4-bit quantization from bitsandbytes quantization method
55
47
  """
56
48
 
57
- use_keep_in_fp32_modules = True
58
- requires_parameters_quantization = True
59
49
  requires_calibration = False
60
50
 
61
- required_packages = ["bitsandbytes", "accelerate"]
62
-
63
51
  def __init__(self, quantization_config, **kwargs):
64
52
  super().__init__(quantization_config, **kwargs)
65
53
 
66
- if self.quantization_config.llm_int8_skip_modules is not None:
67
- self.modules_to_not_convert = self.quantization_config.llm_int8_skip_modules
68
-
69
- # This describes the additional items that are saved on the state dict (on the params themselves)
70
- self.bnb_keys = [
71
- f"quant_state.bitsandbytes__{self.quantization_config.bnb_4bit_quant_type}",
72
- "absmax",
73
- "quant_map",
74
- ]
75
- if self.quantization_config.bnb_4bit_use_double_quant:
76
- self.bnb_keys.extend(["nested_absmax", "nested_quant_map"])
77
-
78
54
  def validate_environment(self, *args, **kwargs):
79
55
  if not is_accelerate_available():
80
56
  raise ImportError(
@@ -90,17 +66,9 @@ class Bnb4BitHfQuantizer(HfQuantizer):
90
66
  validate_bnb_backend_availability(raise_exception=True)
91
67
 
92
68
  device_map = kwargs.get("device_map")
93
- if (
94
- device_map is not None
95
- and isinstance(device_map, dict)
96
- and not self.quantization_config.llm_int8_enable_fp32_cpu_offload
97
- ):
98
- device_map_without_lm_head = {
99
- key: device_map[key] for key in device_map if key not in self.modules_to_not_convert
100
- }
101
- if set(device_map.values()) == {"cpu"}:
102
- pass
103
- elif "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
69
+ if not self.quantization_config.llm_int8_enable_fp32_cpu_offload and isinstance(device_map, dict):
70
+ values = set(device_map.values())
71
+ if values != {"cpu"} and ("cpu" in values or "disk" in values):
104
72
  raise ValueError(
105
73
  "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
106
74
  "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
@@ -110,115 +78,25 @@ class Bnb4BitHfQuantizer(HfQuantizer):
110
78
  "for more details. "
111
79
  )
112
80
 
113
- def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype":
114
- from accelerate.utils import CustomDtype
115
-
116
- if target_dtype != torch.int8:
117
- logger.info("target_dtype {target_dtype} is replaced by `CustomDtype.INT4` for 4-bit BnB quantization")
118
- return CustomDtype.INT4
81
+ def param_element_size(self, model: "PreTrainedModel", param_name: str, param: "torch.Tensor") -> float:
82
+ "Return the element size (in bytes) for `param_name`."
83
+ if self.param_needs_quantization(model, param_name):
84
+ # 4 bit
85
+ return 0.5
119
86
 
120
- def update_unexpected_keys(self, model, unexpected_keys: list[str]) -> list[str]:
121
- return [k for k in unexpected_keys if not any(k.endswith(x) for x in self.bnb_keys)]
87
+ return super().param_element_size(model, param_name, param)
122
88
 
123
89
  def param_needs_quantization(self, model: "PreTrainedModel", param_name: str, **kwargs) -> bool:
124
90
  import bitsandbytes as bnb
125
91
 
126
- # They are on the params themselves, so we cannot easily extract the module from the name
127
- if any(param_name.endswith(x) for x in self.bnb_keys):
128
- return True
129
92
  module, name = get_module_from_name(model, param_name)
130
93
  return isinstance(module, bnb.nn.Linear4bit) and name != "bias"
131
94
 
132
- def get_param_name(self, param_name: str) -> str:
133
- """
134
- Get the right param_name in order to get the module associated with the param.
135
- This is useful for quantized stats lile absmax or quant_map as we need to update the param_name to get the module as they are stored in ...weight.absmax.
136
- """
137
- if self.pre_quantized:
138
- # We need to get the param name of quantized weights and not its components. Otherwise, we won't be able to get the nn.Module associated.
139
- if any(param_name.endswith(x) for x in self.bnb_keys):
140
- param_name = (
141
- param_name.rsplit(".", 1)[0] if "quant_state." not in param_name else param_name.rsplit(".", 2)[0]
142
- )
143
- return param_name
144
-
145
- def create_quantized_param(
146
- self,
147
- model: "PreTrainedModel",
148
- param_value: "torch.Tensor",
149
- param_name: str,
150
- target_device: "torch.device",
151
- **kwargs,
152
- ):
153
- import bitsandbytes as bnb
154
-
155
- full_name = param_name
156
-
157
- # update param name to get the weights instead of the quantized stats
158
- param_name = self.get_param_name(param_name)
159
- module, tensor_name = get_module_from_name(model, param_name)
160
-
161
- # `torch.Tensor.to(<int num>)` is not supported by `torch_npu` (see this [issue](https://github.com/Ascend/pytorch/issues/16)).
162
- if isinstance(target_device, int) and is_torch_npu_available():
163
- target_device = f"npu:{target_device}"
164
-
165
- # construct `new_value` for the module._parameters[tensor_name]
166
- if self.pre_quantized:
167
- module_name = param_name.rsplit(".", 1)[0]
168
- # Save the states for later quantization when they are all gathered
169
- if not hasattr(self, "param_quant_stats"):
170
- self.param_quant_stats = defaultdict(dict)
171
- self.param_quant_stats[module_name].update({full_name: param_value})
172
-
173
- # We are ready for quantization in this case (note, the +1 is for the weight itself)
174
- if len(self.param_quant_stats[module_name]) == len(self.bnb_keys) + 1:
175
- weight = self.param_quant_stats[module_name].pop(f"{module_name}.weight")
176
- new_value = bnb.nn.Params4bit.from_prequantized(
177
- data=weight,
178
- quantized_stats=self.param_quant_stats[module_name],
179
- requires_grad=False,
180
- device=target_device,
181
- module=module,
182
- )
183
- # Set it
184
- module._parameters[tensor_name] = new_value
185
- # Delete the states
186
- del self.param_quant_stats[module_name]
187
- else:
188
- new_value = param_value.to("cpu")
189
- old_value = getattr(module, tensor_name)
190
-
191
- # Support models using `Conv1D` in place of `nn.Linear` (e.g. openai-community/gpt2) by transposing the weight matrix prior to quantization.
192
- # Since weights are saved in the correct "orientation", we skip transposing when loading.
193
- if issubclass(module.source_cls, Conv1D):
194
- new_value = new_value.T
195
-
196
- kwargs = old_value.__dict__
197
- kwargs.pop("_is_hf_initialized", None)
198
- new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(target_device)
199
-
200
- module._parameters[tensor_name] = new_value
201
-
202
- # Copied from transformers.quantizers.quantizer_bnb_8bit.Bnb8BitHfQuantizer.adjust_max_memory
203
95
  def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]:
204
96
  # need more space for buffers that are created during quantization
205
97
  max_memory = {key: val * 0.90 for key, val in max_memory.items()}
206
98
  return max_memory
207
99
 
208
- # Copied from transformers.quantizers.quantizer_bnb_8bit.Bnb8BitHfQuantizer.update_dtype
209
- def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
210
- if dtype is None:
211
- # We force the `dtype` to be float16, this is a requirement from `bitsandbytes`
212
- logger.info(
213
- "Overriding dtype=%s with `dtype=torch.float16` due to "
214
- "requirements of `bitsandbytes` to enable model loading in 8-bit or 4-bit. "
215
- "Pass your own dtype to specify the dtype of the remaining non-linear layers or pass"
216
- " dtype=torch.float16 to remove this warning.",
217
- dtype,
218
- )
219
- dtype = torch.float16
220
- return dtype
221
-
222
100
  def update_device_map(self, device_map):
223
101
  if device_map is None:
224
102
  if torch.cuda.is_available():
@@ -238,33 +116,23 @@ class Bnb4BitHfQuantizer(HfQuantizer):
238
116
  )
239
117
  return device_map
240
118
 
241
- # Copied from transformers.quantizers.quantizer_bnb_8bit.Bnb8BitHfQuantizer._process_model_before_weight_loading
242
119
  def _process_model_before_weight_loading(
243
120
  self,
244
121
  model: "PreTrainedModel",
245
122
  device_map,
246
- keep_in_fp32_modules: list[str] | None = None,
247
123
  **kwargs,
248
124
  ):
249
125
  from ..integrations import replace_with_bnb_linear
250
126
 
251
- llm_int8_enable_fp32_cpu_offload = self.quantization_config.llm_int8_enable_fp32_cpu_offload
252
-
253
127
  self.modules_to_not_convert = self.get_modules_to_not_convert(
254
- model, self.quantization_config.llm_int8_skip_modules, keep_in_fp32_modules
128
+ model, self.quantization_config.llm_int8_skip_modules, model._keep_in_fp32_modules
255
129
  )
256
130
 
257
- # Extend `self.modules_to_not_convert` to keys that are supposed to be offloaded to `cpu` or `disk`
258
- if isinstance(device_map, dict) and len(device_map.keys()) > 1:
259
- keys_on_cpu = [key for key, value in device_map.items() if value in ["disk", "cpu"]]
131
+ if self.quantization_config.llm_int8_enable_fp32_cpu_offload:
132
+ if isinstance(device_map, dict):
133
+ keys_on_cpu = [key for key, value in device_map.items() if value in ["disk", "cpu"]]
134
+ self.modules_to_not_convert.extend(keys_on_cpu)
260
135
 
261
- if len(keys_on_cpu) > 0 and not llm_int8_enable_fp32_cpu_offload:
262
- raise ValueError(
263
- "If you want to offload some keys to `cpu` or `disk`, you need to set "
264
- "`llm_int8_enable_fp32_cpu_offload=True`. Note that these modules will not be "
265
- " converted to 8-bit but kept in 32-bit."
266
- )
267
- self.modules_to_not_convert.extend(keys_on_cpu)
268
136
  model = replace_with_bnb_linear(
269
137
  model,
270
138
  modules_to_not_convert=self.modules_to_not_convert,
@@ -272,27 +140,22 @@ class Bnb4BitHfQuantizer(HfQuantizer):
272
140
  pre_quantized=self.pre_quantized,
273
141
  )
274
142
 
275
- model.config.quantization_config = self.quantization_config
276
-
277
- # Copied from transformers.quantizers.quantizer_bnb_8bit.Bnb8BitHfQuantizer._process_model_after_weight_loading with 8bit->4bit
278
143
  def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs):
279
144
  model.is_loaded_in_4bit = True
280
145
  model.is_4bit_serializable = self.is_serializable()
281
146
  return model
282
147
 
283
- def is_serializable(self, safe_serialization=None):
148
+ def is_serializable(self):
284
149
  return True
285
150
 
286
151
  @property
287
152
  def is_trainable(self) -> bool:
288
153
  return True
289
154
 
290
- def _dequantize(self, model):
155
+ def _dequantize(self, model, dtype=None):
291
156
  from ..integrations import dequantize_and_replace
292
157
 
293
- model = dequantize_and_replace(
294
- model, self.modules_to_not_convert, quantization_config=self.quantization_config
295
- )
158
+ model = dequantize_and_replace(model, quantization_config=self.quantization_config, dtype=dtype)
296
159
  return model
297
160
 
298
161
  def get_quantize_ops(self):
@@ -25,6 +25,8 @@ from ..utils import (
25
25
  is_accelerate_available,
26
26
  is_bitsandbytes_available,
27
27
  is_torch_available,
28
+ is_torch_hpu_available,
29
+ is_torch_npu_available,
28
30
  is_torch_xpu_available,
29
31
  logging,
30
32
  )
@@ -35,34 +37,20 @@ if is_torch_available():
35
37
  import torch
36
38
 
37
39
  from ..core_model_loading import WeightConverter
38
- from ..pytorch_utils import Conv1D
39
40
 
40
41
  logger = logging.get_logger(__name__)
41
42
 
42
43
 
43
44
  class Bnb8BitHfQuantizer(HfQuantizer):
44
45
  """
45
- 8-bit quantization from bitsandbytes quantization method:
46
- before loading: converts transformer layers into Linear8bitLt during loading: load 16bit weight and pass to the
47
- layer object after: quantizes individual weights in Linear8bitLt into 8bit at fitst .cuda() call
48
- saving:
49
- from state dict, as usual; saves weights and 'SCB' component
50
- loading:
51
- need to locate SCB component and pass to the Linear8bitLt object
46
+ 8-bit quantization from bitsandbytes quantization method
52
47
  """
53
48
 
54
- use_keep_in_fp32_modules = True
55
- requires_parameters_quantization = True
56
49
  requires_calibration = False
57
50
 
58
- required_packages = ["bitsandbytes", "accelerate"]
59
-
60
51
  def __init__(self, quantization_config, **kwargs):
61
52
  super().__init__(quantization_config, **kwargs)
62
53
 
63
- if self.quantization_config.llm_int8_skip_modules is not None:
64
- self.modules_to_not_convert = self.quantization_config.llm_int8_skip_modules
65
-
66
54
  def validate_environment(self, *args, **kwargs):
67
55
  if not is_accelerate_available():
68
56
  raise ImportError(
@@ -78,17 +66,9 @@ class Bnb8BitHfQuantizer(HfQuantizer):
78
66
  validate_bnb_backend_availability(raise_exception=True)
79
67
 
80
68
  device_map = kwargs.get("device_map")
81
- if (
82
- device_map is not None
83
- and isinstance(device_map, dict)
84
- and not self.quantization_config.llm_int8_enable_fp32_cpu_offload
85
- ):
86
- device_map_without_lm_head = {
87
- key: device_map[key] for key in device_map if key not in self.modules_to_not_convert
88
- }
89
- if set(device_map.values()) == {"cpu"}:
90
- pass
91
- elif "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
69
+ if not self.quantization_config.llm_int8_enable_fp32_cpu_offload and isinstance(device_map, dict):
70
+ values = set(device_map.values())
71
+ if values != {"cpu"} and ("cpu" in values or "disk" in values):
92
72
  raise ValueError(
93
73
  "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
94
74
  "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
@@ -103,23 +83,14 @@ class Bnb8BitHfQuantizer(HfQuantizer):
103
83
  max_memory = {key: val * 0.90 for key, val in max_memory.items()}
104
84
  return max_memory
105
85
 
106
- def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
107
- if dtype is None:
108
- # We force the `dtype` to be float16, this is a requirement from `bitsandbytes`
109
- logger.info(
110
- "Overriding dtype=%s with `dtype=torch.float16` due to "
111
- "requirements of `bitsandbytes` to enable model loading in 8-bit or 4-bit. "
112
- "Pass your own dtype to specify the dtype of the remaining non-linear layers or pass"
113
- " dtype=torch.float16 to remove this warning.",
114
- dtype,
115
- )
116
- dtype = torch.float16
117
- return dtype
118
-
119
86
  def update_device_map(self, device_map):
120
87
  if device_map is None:
121
88
  if torch.cuda.is_available():
122
89
  device_map = {"": torch.cuda.current_device()}
90
+ elif is_torch_npu_available():
91
+ device_map = {"": f"npu:{torch.npu.current_device()}"}
92
+ elif is_torch_hpu_available():
93
+ device_map = {"": f"hpu:{torch.hpu.current_device()}"}
123
94
  elif is_torch_xpu_available():
124
95
  device_map = {"": torch.xpu.current_device()}
125
96
  else:
@@ -131,14 +102,12 @@ class Bnb8BitHfQuantizer(HfQuantizer):
131
102
  )
132
103
  return device_map
133
104
 
134
- def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype":
135
- if target_dtype != torch.int8:
136
- logger.info("target_dtype {target_dtype} is replaced by `torch.int8` for 8-bit BnB quantization")
137
- return torch.int8
138
-
139
- def update_unexpected_keys(self, model, unexpected_keys: list[str]) -> list[str]:
140
- bnb_keys = ["SCB", "weight_format"]
141
- return [k for k in unexpected_keys if not any(k.endswith(x) for x in bnb_keys)]
105
+ def param_element_size(self, model: "PreTrainedModel", param_name: str, param: "torch.Tensor") -> float:
106
+ "Return the element size (in bytes) for `param_name`."
107
+ if self.param_needs_quantization(model, param_name):
108
+ # 8-bit
109
+ return 1
110
+ return super().param_element_size(model, param_name, param)
142
111
 
143
112
  def param_needs_quantization(self, model: "PreTrainedModel", param_name: str, **kwargs) -> bool:
144
113
  import bitsandbytes as bnb
@@ -146,47 +115,6 @@ class Bnb8BitHfQuantizer(HfQuantizer):
146
115
  module, name = get_module_from_name(model, param_name)
147
116
  return isinstance(module, bnb.nn.Linear8bitLt) and name != "bias"
148
117
 
149
- def create_quantized_param(
150
- self,
151
- model: "PreTrainedModel",
152
- param_value: "torch.Tensor",
153
- param_name: str,
154
- target_device: "torch.device",
155
- **kwargs,
156
- ):
157
- import bitsandbytes as bnb
158
-
159
- module, tensor_name = get_module_from_name(model, param_name)
160
-
161
- if self.pre_quantized and not self.is_serializable():
162
- raise ValueError(
163
- "Detected int8 weights but the version of bitsandbytes is not compatible with int8 serialization. "
164
- "Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`."
165
- )
166
- # Those 2 can only happen when self.pre_quantized == True
167
- if tensor_name == "SCB":
168
- setattr(module.weight, "SCB", param_value.to(target_device))
169
- return
170
- # It's not used, but it's getting serialized for BC reason...
171
- elif tensor_name == "weight_format":
172
- return
173
-
174
- # Support models using `Conv1D` in place of `nn.Linear` (e.g. openai-community/gpt2) by transposing the weight matrix prior to quantization.
175
- # Since weights are saved in the correct "orientation", we skip transposing when loading.
176
- if issubclass(module.source_cls, Conv1D) and not self.pre_quantized:
177
- param_value = param_value.T
178
-
179
- old_value = getattr(module, tensor_name)
180
- kwargs = old_value.__dict__
181
- kwargs.pop("_is_hf_initialized", None)
182
- # Need to pop SCB and reset it because of bnb internals that modifies its value when switching devices ...
183
- SCB = kwargs.pop("SCB", None)
184
- new_value = bnb.nn.Int8Params(param_value.to("cpu"), requires_grad=False, **kwargs).to(target_device)
185
- if SCB is not None:
186
- setattr(new_value, "SCB", SCB)
187
- # Set it to the module
188
- module._parameters[tensor_name] = new_value
189
-
190
118
  def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs):
191
119
  model.is_loaded_in_8bit = True
192
120
  model.is_8bit_serializable = self.is_serializable()
@@ -196,28 +124,18 @@ class Bnb8BitHfQuantizer(HfQuantizer):
196
124
  self,
197
125
  model: "PreTrainedModel",
198
126
  device_map,
199
- keep_in_fp32_modules: list[str] | None = None,
200
127
  **kwargs,
201
128
  ):
202
129
  from ..integrations import replace_with_bnb_linear
203
130
 
204
- llm_int8_enable_fp32_cpu_offload = self.quantization_config.llm_int8_enable_fp32_cpu_offload
205
-
206
131
  self.modules_to_not_convert = self.get_modules_to_not_convert(
207
- model, self.quantization_config.llm_int8_skip_modules, keep_in_fp32_modules
132
+ model, self.quantization_config.llm_int8_skip_modules, model._keep_in_fp32_modules
208
133
  )
209
134
 
210
- # Extend `self.modules_to_not_convert` to keys that are supposed to be offloaded to `cpu` or `disk`
211
- if isinstance(device_map, dict) and len(device_map.keys()) > 1:
212
- keys_on_cpu = [key for key, value in device_map.items() if value in ["disk", "cpu"]]
213
-
214
- if len(keys_on_cpu) > 0 and not llm_int8_enable_fp32_cpu_offload:
215
- raise ValueError(
216
- "If you want to offload some keys to `cpu` or `disk`, you need to set "
217
- "`llm_int8_enable_fp32_cpu_offload=True`. Note that these modules will not be "
218
- " converted to 8-bit but kept in 32-bit."
219
- )
220
- self.modules_to_not_convert.extend(keys_on_cpu)
135
+ if self.quantization_config.llm_int8_enable_fp32_cpu_offload:
136
+ if isinstance(device_map, dict):
137
+ keys_on_cpu = [key for key, value in device_map.items() if value in ["disk", "cpu"]]
138
+ self.modules_to_not_convert.extend(keys_on_cpu)
221
139
 
222
140
  model = replace_with_bnb_linear(
223
141
  model,
@@ -226,21 +144,17 @@ class Bnb8BitHfQuantizer(HfQuantizer):
226
144
  pre_quantized=self.pre_quantized,
227
145
  )
228
146
 
229
- model.config.quantization_config = self.quantization_config
230
-
231
- def is_serializable(self, safe_serialization=None):
147
+ def is_serializable(self):
232
148
  return True
233
149
 
234
150
  @property
235
151
  def is_trainable(self) -> bool:
236
152
  return True
237
153
 
238
- def _dequantize(self, model):
154
+ def _dequantize(self, model, dtype=None):
239
155
  from ..integrations import dequantize_and_replace
240
156
 
241
- model = dequantize_and_replace(
242
- model, self.modules_to_not_convert, quantization_config=self.quantization_config
243
- )
157
+ model = dequantize_and_replace(model, quantization_config=self.quantization_config, dtype=dtype)
244
158
  return model
245
159
 
246
160
  def get_quantize_ops(self):
@@ -31,7 +31,6 @@ class CompressedTensorsHfQuantizer(HfQuantizer):
31
31
  """
32
32
 
33
33
  requires_calibration = True
34
- required_packages = ["compressed_tensors"]
35
34
 
36
35
  def __init__(self, quantization_config: CompressedTensorsConfig, **kwargs):
37
36
  super().__init__(quantization_config, **kwargs)
@@ -58,15 +57,9 @@ class CompressedTensorsHfQuantizer(HfQuantizer):
58
57
  "Using `compressed_tensors` quantized models requires the compressed-tensors library: "
59
58
  "`pip install compressed-tensors`"
60
59
  )
61
- if not is_torch_available():
62
- # torch already should be installed as part of compressed tensors
63
- raise ImportError("torch is required for using compressed-tensors quantization")
64
60
 
65
61
  def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
66
- if dtype is None:
67
- logger.info("Loading model using torch.float16 for compressed-tensors quantization")
68
- dtype = torch.float16
69
- elif dtype != torch.float16:
62
+ if dtype != torch.float16:
70
63
  logger.info("We suggest you to set `dtype=torch.float16` for better efficiency with compressed_tensors.")
71
64
  return dtype
72
65
 
@@ -113,6 +106,6 @@ class CompressedTensorsHfQuantizer(HfQuantizer):
113
106
  # models need to be decompressed carry out qat
114
107
  return not self.run_compressed or not self.quantization_config.is_quantization_compressed
115
108
 
116
- def is_serializable(self, safe_serialization=None) -> bool:
109
+ def is_serializable(self) -> bool:
117
110
  """Models quantized using compressed tensors can be saved to disk"""
118
111
  return True
@@ -19,7 +19,7 @@ from .base import HfQuantizer
19
19
  if TYPE_CHECKING:
20
20
  from ..modeling_utils import PreTrainedModel
21
21
 
22
- from ..utils import is_accelerate_available, is_eetq_available, is_torch_available, logging
22
+ from ..utils import is_accelerate_available, is_kernels_available, is_torch_available, logging
23
23
  from .quantizers_utils import get_module_from_name
24
24
 
25
25
 
@@ -32,40 +32,17 @@ logger = logging.get_logger(__name__)
32
32
 
33
33
  class EetqHfQuantizer(HfQuantizer):
34
34
  """
35
- 8-bit quantization from EETQ quantization method:
36
- before loading: converts transformer layers into W8A16Linear during loading: load 16bit weight and pass to the
37
- layer object after: quantizes individual weights in Linear8bitLt into 8bit at first .cuda() call
35
+ 8-bit quantization from EETQ quantization method
38
36
  """
39
37
 
40
- requires_parameters_quantization = True
41
38
  requires_calibration = False
42
39
 
43
- required_packages = ["eetq", "accelerate"]
44
-
45
40
  def __init__(self, quantization_config, **kwargs):
46
41
  super().__init__(quantization_config, **kwargs)
47
- self.quantization_config = quantization_config
48
42
 
49
43
  def validate_environment(self, *args, **kwargs):
50
- if not is_eetq_available():
51
- raise ImportError(
52
- "Using `eetq` 8-bit quantization requires eetq."
53
- "Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQ"
54
- )
55
-
56
- try:
57
- import eetq # noqa: F401
58
- except ImportError as exc:
59
- if "shard_checkpoint" in str(exc):
60
- # EETQ 1.0.0 is currently broken with the latest transformers because it tries to import the removed
61
- # shard_checkpoint function, see https://github.com/NetEase-FuXi/EETQ/issues/34.
62
- # TODO: Update message once eetq releases a fix
63
- raise ImportError(
64
- "You are using a version of EETQ that is incompatible with the current transformers version. "
65
- "Either downgrade transformers to <= v4.46.3 or, if available, upgrade EETQ to > v1.0.0."
66
- ) from exc
67
- else:
68
- raise
44
+ if not is_kernels_available():
45
+ raise ImportError("Loading an EETQ quantized model requires kernels (`pip install kernels`)")
69
46
 
70
47
  if not is_accelerate_available():
71
48
  raise ImportError("Loading an EETQ quantized model requires accelerate (`pip install accelerate`)")
@@ -79,29 +56,20 @@ class EetqHfQuantizer(HfQuantizer):
79
56
  "You have loaded an EETQ model on CPU and have a CUDA device available, make sure to set "
80
57
  "your model on a GPU device in order to run your model."
81
58
  )
82
- elif device_map is not None:
83
- if isinstance(device_map, dict) and ("cpu" in device_map.values() or "disk" in device_map.values()):
59
+ elif isinstance(device_map, dict):
60
+ if len(device_map) > 1 and "cpu" in device_map.values() or "disk" in device_map.values():
84
61
  raise ValueError(
85
62
  "You are attempting to load an EETQ model with a device_map that contains a CPU or disk device."
86
63
  " This is not supported. Please remove the CPU or disk device from the device_map."
87
64
  )
88
65
 
89
66
  def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
90
- if dtype is None:
91
- dtype = torch.float16
92
- logger.info(
93
- "Overriding dtype=%s with `dtype=torch.float16` due to "
94
- "requirements of `eetq` to enable model loading in 8-bit. "
95
- "Pass your own dtype to specify the dtype of the remaining non-linear layers or pass"
96
- " dtype=torch.float16 to remove this warning.",
97
- dtype,
98
- )
99
- elif dtype != torch.float16:
67
+ if dtype != torch.float16:
100
68
  logger.info("We suggest you to set `dtype=torch.float16` for better efficiency with EETQ.")
101
69
  return dtype
102
70
 
103
71
  def param_needs_quantization(self, model: "PreTrainedModel", param_name: str, **kwargs) -> bool:
104
- from eetq import EetqLinear
72
+ from ..integrations.eetq import EetqLinear
105
73
 
106
74
  module, tensor_name = get_module_from_name(model, param_name)
107
75
 
@@ -112,55 +80,29 @@ class EetqHfQuantizer(HfQuantizer):
112
80
  return True
113
81
  return False
114
82
 
115
- def create_quantized_param(
116
- self,
117
- model: "PreTrainedModel",
118
- param_value: "torch.Tensor",
119
- param_name: str,
120
- target_device: "torch.device",
121
- **kwargs,
122
- ):
123
- from eetq import EetqLinear, quantize_and_preprocess_weights
124
-
125
- module, tensor_name = get_module_from_name(model, param_name)
126
- new_value, weight_scale = quantize_and_preprocess_weights(param_value)
127
-
128
- # Samity check
129
- if isinstance(module, EetqLinear):
130
- if self.pre_quantized or tensor_name == "bias":
131
- if tensor_name == "weight" and param_value.dtype != torch.int8:
132
- raise ValueError("Expect quantized weights but got an unquantized weight")
133
- else:
134
- if tensor_name == "weight_scale":
135
- raise ValueError("Expect unquantized weights but got a quantized weight_scale")
136
-
137
- module._buffers[tensor_name] = new_value.to(target_device)
138
- module.register("weight_scales", weight_scale.to(target_device))
139
-
140
83
  def _process_model_before_weight_loading(
141
84
  self,
142
85
  model: "PreTrainedModel",
143
- keep_in_fp32_modules: list[str] | None = None,
144
86
  **kwargs,
145
87
  ):
146
88
  from ..integrations import replace_with_eetq_linear
147
89
 
148
90
  self.modules_to_not_convert = self.get_modules_to_not_convert(
149
- model, self.quantization_config.modules_to_not_convert, keep_in_fp32_modules
91
+ model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules
150
92
  )
151
93
 
152
94
  model = replace_with_eetq_linear(
153
- model,
154
- modules_to_not_convert=self.modules_to_not_convert,
155
- quantization_config=self.quantization_config,
156
- pre_quantized=self.pre_quantized,
95
+ model, modules_to_not_convert=self.modules_to_not_convert, pre_quantized=self.pre_quantized
157
96
  )
158
97
 
159
- model.config.quantization_config = self.quantization_config
160
-
161
- def is_serializable(self, safe_serialization=None):
98
+ def is_serializable(self):
162
99
  return True
163
100
 
164
101
  @property
165
102
  def is_trainable(self) -> bool:
166
103
  return True
104
+
105
+ def get_quantize_ops(self):
106
+ from ..integrations.eetq import EetqQuantize
107
+
108
+ return EetqQuantize(self)