transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (835) hide show
  1. transformers/__init__.py +49 -3
  2. transformers/activations.py +1 -1
  3. transformers/audio_utils.py +0 -1
  4. transformers/cache_utils.py +17 -15
  5. transformers/cli/serve.py +47 -17
  6. transformers/configuration_utils.py +114 -70
  7. transformers/conversion_mapping.py +83 -7
  8. transformers/convert_slow_tokenizer.py +225 -10
  9. transformers/core_model_loading.py +374 -147
  10. transformers/data/data_collator.py +12 -4
  11. transformers/dependency_versions_table.py +2 -3
  12. transformers/dynamic_module_utils.py +1 -2
  13. transformers/feature_extraction_utils.py +55 -24
  14. transformers/file_utils.py +0 -1
  15. transformers/generation/__init__.py +11 -1
  16. transformers/generation/candidate_generator.py +79 -31
  17. transformers/generation/configuration_utils.py +165 -124
  18. transformers/generation/continuous_batching/__init__.py +4 -0
  19. transformers/generation/continuous_batching/cache.py +47 -18
  20. transformers/generation/continuous_batching/cache_manager.py +131 -34
  21. transformers/generation/continuous_batching/continuous_api.py +228 -136
  22. transformers/generation/continuous_batching/requests.py +28 -1
  23. transformers/generation/continuous_batching/scheduler.py +11 -4
  24. transformers/generation/stopping_criteria.py +1 -1
  25. transformers/generation/utils.py +108 -110
  26. transformers/generation/watermarking.py +8 -5
  27. transformers/image_processing_base.py +3 -14
  28. transformers/image_processing_utils_fast.py +15 -4
  29. transformers/initialization.py +37 -0
  30. transformers/integrations/__init__.py +16 -2
  31. transformers/integrations/accelerate.py +58 -113
  32. transformers/integrations/aqlm.py +36 -66
  33. transformers/integrations/awq.py +46 -515
  34. transformers/integrations/bitnet.py +47 -105
  35. transformers/integrations/bitsandbytes.py +91 -202
  36. transformers/integrations/deepspeed.py +18 -2
  37. transformers/integrations/eetq.py +84 -81
  38. transformers/integrations/fbgemm_fp8.py +191 -145
  39. transformers/integrations/finegrained_fp8.py +241 -208
  40. transformers/integrations/flash_attention.py +2 -2
  41. transformers/integrations/fp_quant.py +92 -0
  42. transformers/integrations/ggml.py +11 -1
  43. transformers/integrations/higgs.py +37 -62
  44. transformers/integrations/hub_kernels.py +65 -8
  45. transformers/integrations/integration_utils.py +45 -0
  46. transformers/integrations/mistral.py +12 -0
  47. transformers/integrations/moe.py +240 -0
  48. transformers/integrations/mxfp4.py +28 -74
  49. transformers/integrations/peft.py +12 -29
  50. transformers/integrations/quanto.py +77 -56
  51. transformers/integrations/quark.py +55 -0
  52. transformers/integrations/spqr.py +42 -90
  53. transformers/integrations/tensor_parallel.py +167 -221
  54. transformers/integrations/torchao.py +32 -38
  55. transformers/integrations/vptq.py +40 -59
  56. transformers/modelcard.py +1 -2
  57. transformers/modeling_gguf_pytorch_utils.py +74 -19
  58. transformers/modeling_rope_utils.py +107 -86
  59. transformers/modeling_utils.py +611 -527
  60. transformers/models/__init__.py +22 -0
  61. transformers/models/afmoe/modeling_afmoe.py +10 -19
  62. transformers/models/afmoe/modular_afmoe.py +5 -13
  63. transformers/models/aimv2/modeling_aimv2.py +4 -0
  64. transformers/models/aimv2/modular_aimv2.py +4 -0
  65. transformers/models/albert/modeling_albert.py +3 -0
  66. transformers/models/albert/tokenization_albert.py +6 -12
  67. transformers/models/align/modeling_align.py +14 -6
  68. transformers/models/altclip/modeling_altclip.py +11 -3
  69. transformers/models/apertus/modeling_apertus.py +8 -6
  70. transformers/models/apertus/modular_apertus.py +4 -1
  71. transformers/models/arcee/modeling_arcee.py +5 -5
  72. transformers/models/aria/modeling_aria.py +12 -8
  73. transformers/models/aria/modular_aria.py +7 -3
  74. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  75. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  76. transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
  77. transformers/models/auto/auto_factory.py +1 -1
  78. transformers/models/auto/configuration_auto.py +38 -0
  79. transformers/models/auto/feature_extraction_auto.py +9 -3
  80. transformers/models/auto/image_processing_auto.py +5 -2
  81. transformers/models/auto/modeling_auto.py +37 -0
  82. transformers/models/auto/processing_auto.py +22 -10
  83. transformers/models/auto/tokenization_auto.py +147 -566
  84. transformers/models/auto/video_processing_auto.py +5 -2
  85. transformers/models/autoformer/modeling_autoformer.py +4 -0
  86. transformers/models/aya_vision/modeling_aya_vision.py +7 -3
  87. transformers/models/bamba/modeling_bamba.py +21 -21
  88. transformers/models/bamba/modular_bamba.py +17 -16
  89. transformers/models/bark/modeling_bark.py +11 -0
  90. transformers/models/bart/configuration_bart.py +0 -1
  91. transformers/models/bart/modeling_bart.py +14 -0
  92. transformers/models/barthez/tokenization_barthez.py +5 -10
  93. transformers/models/beit/image_processing_beit_fast.py +0 -1
  94. transformers/models/beit/modeling_beit.py +6 -1
  95. transformers/models/bert/modeling_bert.py +3 -0
  96. transformers/models/bert/tokenization_bert.py +8 -21
  97. transformers/models/bert_generation/modeling_bert_generation.py +2 -0
  98. transformers/models/big_bird/modeling_big_bird.py +9 -0
  99. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  100. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +15 -2
  101. transformers/models/biogpt/modeling_biogpt.py +2 -0
  102. transformers/models/biogpt/modular_biogpt.py +2 -0
  103. transformers/models/bit/modeling_bit.py +16 -3
  104. transformers/models/bitnet/modeling_bitnet.py +5 -5
  105. transformers/models/blenderbot/modeling_blenderbot.py +12 -0
  106. transformers/models/blenderbot/tokenization_blenderbot.py +18 -23
  107. transformers/models/blenderbot_small/modeling_blenderbot_small.py +12 -0
  108. transformers/models/blip/modeling_blip.py +2 -0
  109. transformers/models/blip/modeling_blip_text.py +10 -0
  110. transformers/models/blip_2/modeling_blip_2.py +4 -1
  111. transformers/models/bloom/modeling_bloom.py +17 -44
  112. transformers/models/blt/modeling_blt.py +164 -4
  113. transformers/models/blt/modular_blt.py +170 -5
  114. transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
  115. transformers/models/bridgetower/modeling_bridgetower.py +11 -1
  116. transformers/models/bros/modeling_bros.py +12 -0
  117. transformers/models/camembert/modeling_camembert.py +109 -106
  118. transformers/models/camembert/tokenization_camembert.py +8 -12
  119. transformers/models/canine/modeling_canine.py +11 -0
  120. transformers/models/canine/tokenization_canine.py +2 -0
  121. transformers/models/chameleon/modeling_chameleon.py +11 -5
  122. transformers/models/chinese_clip/modeling_chinese_clip.py +9 -3
  123. transformers/models/clap/feature_extraction_clap.py +2 -2
  124. transformers/models/clap/modeling_clap.py +30 -15
  125. transformers/models/clip/modeling_clip.py +2 -0
  126. transformers/models/clip/tokenization_clip.py +22 -44
  127. transformers/models/clipseg/modeling_clipseg.py +9 -0
  128. transformers/models/clvp/modeling_clvp.py +19 -3
  129. transformers/models/clvp/tokenization_clvp.py +1 -63
  130. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  131. transformers/models/codegen/modeling_codegen.py +13 -4
  132. transformers/models/codegen/tokenization_codegen.py +14 -43
  133. transformers/models/cohere/modeling_cohere.py +5 -4
  134. transformers/models/cohere/modular_cohere.py +2 -1
  135. transformers/models/cohere/tokenization_cohere.py +12 -42
  136. transformers/models/cohere2/modeling_cohere2.py +8 -7
  137. transformers/models/cohere2/modular_cohere2.py +5 -5
  138. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -4
  139. transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
  140. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  141. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  142. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  143. transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
  144. transformers/models/conditional_detr/modeling_conditional_detr.py +9 -1
  145. transformers/models/convbert/modeling_convbert.py +9 -0
  146. transformers/models/convnext/image_processing_convnext.py +2 -2
  147. transformers/models/convnext/image_processing_convnext_fast.py +9 -13
  148. transformers/models/convnext/modeling_convnext.py +2 -4
  149. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  150. transformers/models/csm/generation_csm.py +19 -22
  151. transformers/models/csm/modeling_csm.py +7 -4
  152. transformers/models/csm/modular_csm.py +2 -0
  153. transformers/models/ctrl/modeling_ctrl.py +15 -2
  154. transformers/models/cvt/modeling_cvt.py +7 -1
  155. transformers/models/cwm/modeling_cwm.py +5 -5
  156. transformers/models/d_fine/configuration_d_fine.py +3 -4
  157. transformers/models/d_fine/modeling_d_fine.py +48 -39
  158. transformers/models/d_fine/modular_d_fine.py +16 -4
  159. transformers/models/dab_detr/configuration_dab_detr.py +2 -2
  160. transformers/models/dab_detr/modeling_dab_detr.py +5 -1
  161. transformers/models/dac/modeling_dac.py +6 -6
  162. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  163. transformers/models/data2vec/modeling_data2vec_text.py +7 -0
  164. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  165. transformers/models/data2vec/modular_data2vec_text.py +7 -0
  166. transformers/models/dbrx/configuration_dbrx.py +9 -1
  167. transformers/models/dbrx/modeling_dbrx.py +3 -3
  168. transformers/models/deberta/modeling_deberta.py +7 -0
  169. transformers/models/deberta/tokenization_deberta.py +11 -20
  170. transformers/models/deberta_v2/modeling_deberta_v2.py +8 -0
  171. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  172. transformers/models/decision_transformer/modeling_decision_transformer.py +12 -6
  173. transformers/models/deepseek_v2/modeling_deepseek_v2.py +9 -7
  174. transformers/models/deepseek_v2/modular_deepseek_v2.py +6 -4
  175. transformers/models/deepseek_v3/modeling_deepseek_v3.py +12 -7
  176. transformers/models/deepseek_v3/modular_deepseek_v3.py +7 -2
  177. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
  178. transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
  179. transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
  180. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
  181. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
  182. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
  183. transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
  184. transformers/models/deformable_detr/modeling_deformable_detr.py +5 -1
  185. transformers/models/depth_anything/configuration_depth_anything.py +2 -3
  186. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  187. transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
  188. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  189. transformers/models/detr/configuration_detr.py +1 -1
  190. transformers/models/detr/modeling_detr.py +13 -1
  191. transformers/models/dia/generation_dia.py +3 -10
  192. transformers/models/dia/modeling_dia.py +16 -4
  193. transformers/models/dia/modular_dia.py +11 -1
  194. transformers/models/dia/processing_dia.py +1 -1
  195. transformers/models/diffllama/modeling_diffllama.py +5 -5
  196. transformers/models/diffllama/modular_diffllama.py +2 -2
  197. transformers/models/dinat/modeling_dinat.py +3 -0
  198. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  199. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
  200. transformers/models/dinov3_vit/modeling_dinov3_vit.py +5 -2
  201. transformers/models/dinov3_vit/modular_dinov3_vit.py +5 -2
  202. transformers/models/distilbert/modeling_distilbert.py +11 -9
  203. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  204. transformers/models/doge/modeling_doge.py +3 -4
  205. transformers/models/doge/modular_doge.py +0 -1
  206. transformers/models/donut/image_processing_donut_fast.py +0 -1
  207. transformers/models/donut/modeling_donut_swin.py +18 -12
  208. transformers/models/dots1/modeling_dots1.py +23 -11
  209. transformers/models/dots1/modular_dots1.py +5 -3
  210. transformers/models/dpr/modeling_dpr.py +5 -0
  211. transformers/models/dpr/tokenization_dpr.py +12 -0
  212. transformers/models/dpt/configuration_dpt.py +1 -1
  213. transformers/models/dpt/image_processing_dpt_fast.py +1 -2
  214. transformers/models/dpt/modular_dpt.py +1 -2
  215. transformers/models/edgetam/configuration_edgetam.py +1 -1
  216. transformers/models/edgetam/modeling_edgetam.py +6 -3
  217. transformers/models/edgetam/modular_edgetam.py +15 -14
  218. transformers/models/edgetam_video/modeling_edgetam_video.py +56 -43
  219. transformers/models/edgetam_video/modular_edgetam_video.py +14 -19
  220. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
  221. transformers/models/efficientloftr/modeling_efficientloftr.py +16 -3
  222. transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
  223. transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
  224. transformers/models/efficientnet/modeling_efficientnet.py +7 -1
  225. transformers/models/electra/modeling_electra.py +7 -0
  226. transformers/models/emu3/modeling_emu3.py +12 -6
  227. transformers/models/emu3/modular_emu3.py +7 -1
  228. transformers/models/encodec/modeling_encodec.py +14 -0
  229. transformers/models/eomt/image_processing_eomt.py +13 -1
  230. transformers/models/eomt/image_processing_eomt_fast.py +60 -16
  231. transformers/models/eomt/modeling_eomt.py +7 -0
  232. transformers/models/eomt/modular_eomt.py +7 -0
  233. transformers/models/ernie/modeling_ernie.py +6 -0
  234. transformers/models/ernie/modular_ernie.py +6 -0
  235. transformers/models/ernie4_5/modeling_ernie4_5.py +5 -5
  236. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  237. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +20 -17
  238. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +11 -37
  239. transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
  240. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
  241. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
  242. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
  243. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
  244. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
  245. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
  246. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
  247. transformers/models/esm/modeling_esm.py +6 -0
  248. transformers/models/esm/modeling_esmfold.py +11 -5
  249. transformers/models/evolla/modeling_evolla.py +13 -5
  250. transformers/models/evolla/modular_evolla.py +8 -0
  251. transformers/models/exaone4/modeling_exaone4.py +3 -3
  252. transformers/models/exaone4/modular_exaone4.py +0 -1
  253. transformers/models/falcon/modeling_falcon.py +9 -4
  254. transformers/models/falcon_h1/modeling_falcon_h1.py +32 -26
  255. transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
  256. transformers/models/falcon_mamba/modeling_falcon_mamba.py +31 -37
  257. transformers/models/falcon_mamba/modular_falcon_mamba.py +19 -33
  258. transformers/models/fast_vlm/__init__.py +27 -0
  259. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  260. transformers/models/fast_vlm/modeling_fast_vlm.py +459 -0
  261. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  262. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +31 -13
  263. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
  264. transformers/models/flaubert/modeling_flaubert.py +21 -15
  265. transformers/models/flava/image_processing_flava_fast.py +0 -2
  266. transformers/models/flava/modeling_flava.py +10 -2
  267. transformers/models/flex_olmo/modeling_flex_olmo.py +10 -8
  268. transformers/models/florence2/modeling_florence2.py +22 -4
  269. transformers/models/florence2/modular_florence2.py +15 -1
  270. transformers/models/fnet/modeling_fnet.py +14 -0
  271. transformers/models/focalnet/modeling_focalnet.py +4 -0
  272. transformers/models/fsmt/modeling_fsmt.py +2 -0
  273. transformers/models/funnel/modeling_funnel.py +8 -0
  274. transformers/models/funnel/tokenization_funnel.py +17 -24
  275. transformers/models/fuyu/image_processing_fuyu.py +1 -1
  276. transformers/models/fuyu/modeling_fuyu.py +3 -1
  277. transformers/models/fuyu/processing_fuyu.py +19 -3
  278. transformers/models/gemma/modeling_gemma.py +14 -16
  279. transformers/models/gemma/modular_gemma.py +9 -11
  280. transformers/models/gemma/tokenization_gemma.py +10 -27
  281. transformers/models/gemma2/modeling_gemma2.py +5 -5
  282. transformers/models/gemma2/modular_gemma2.py +3 -2
  283. transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
  284. transformers/models/gemma3/modeling_gemma3.py +42 -91
  285. transformers/models/gemma3/modular_gemma3.py +38 -87
  286. transformers/models/gemma3n/configuration_gemma3n.py +3 -0
  287. transformers/models/gemma3n/modeling_gemma3n.py +65 -218
  288. transformers/models/gemma3n/modular_gemma3n.py +68 -68
  289. transformers/models/git/modeling_git.py +183 -126
  290. transformers/models/glm/modeling_glm.py +5 -5
  291. transformers/models/glm4/modeling_glm4.py +5 -5
  292. transformers/models/glm46v/image_processing_glm46v.py +0 -4
  293. transformers/models/glm46v/modeling_glm46v.py +3 -1
  294. transformers/models/glm46v/modular_glm46v.py +3 -0
  295. transformers/models/glm4_moe/modeling_glm4_moe.py +13 -7
  296. transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
  297. transformers/models/glm4v/configuration_glm4v.py +3 -1
  298. transformers/models/glm4v/image_processing_glm4v.py +0 -4
  299. transformers/models/glm4v/modeling_glm4v.py +18 -8
  300. transformers/models/glm4v/modular_glm4v.py +17 -7
  301. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  302. transformers/models/glm4v_moe/modeling_glm4v_moe.py +44 -27
  303. transformers/models/glm4v_moe/modular_glm4v_moe.py +13 -1
  304. transformers/models/glmasr/__init__.py +30 -0
  305. transformers/models/glmasr/configuration_glmasr.py +197 -0
  306. transformers/models/glmasr/modeling_glmasr.py +512 -0
  307. transformers/models/glmasr/modular_glmasr.py +433 -0
  308. transformers/models/glmasr/processing_glmasr.py +332 -0
  309. transformers/models/glpn/image_processing_glpn_fast.py +0 -1
  310. transformers/models/glpn/modeling_glpn.py +2 -0
  311. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
  312. transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
  313. transformers/models/gpt2/modeling_gpt2.py +13 -6
  314. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  315. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +4 -8
  316. transformers/models/gpt_neo/modeling_gpt_neo.py +19 -3
  317. transformers/models/gpt_neox/modeling_gpt_neox.py +6 -3
  318. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  319. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  320. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +4 -2
  321. transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
  322. transformers/models/gpt_oss/modeling_gpt_oss.py +10 -14
  323. transformers/models/gpt_oss/modular_gpt_oss.py +8 -12
  324. transformers/models/gptj/modeling_gptj.py +18 -6
  325. transformers/models/granite/modeling_granite.py +5 -5
  326. transformers/models/granite_speech/modeling_granite_speech.py +15 -1
  327. transformers/models/granitemoe/modeling_granitemoe.py +6 -9
  328. transformers/models/granitemoe/modular_granitemoe.py +1 -4
  329. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
  330. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +36 -28
  331. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
  332. transformers/models/granitemoeshared/modeling_granitemoeshared.py +6 -9
  333. transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
  334. transformers/models/grounding_dino/modeling_grounding_dino.py +8 -4
  335. transformers/models/groupvit/modeling_groupvit.py +9 -1
  336. transformers/models/helium/modeling_helium.py +5 -4
  337. transformers/models/herbert/tokenization_herbert.py +9 -25
  338. transformers/models/hgnet_v2/modeling_hgnet_v2.py +16 -1
  339. transformers/models/hgnet_v2/modular_hgnet_v2.py +16 -1
  340. transformers/models/hiera/modeling_hiera.py +4 -0
  341. transformers/models/hubert/modeling_hubert.py +7 -0
  342. transformers/models/hubert/modular_hubert.py +5 -0
  343. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +5 -5
  344. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
  345. transformers/models/hunyuan_v1_moe/__init__.py +1 -1
  346. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +15 -7
  347. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
  348. transformers/models/ibert/modeling_ibert.py +22 -0
  349. transformers/models/idefics/modeling_idefics.py +15 -21
  350. transformers/models/idefics2/modeling_idefics2.py +7 -1
  351. transformers/models/idefics3/modeling_idefics3.py +5 -1
  352. transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
  353. transformers/models/imagegpt/modeling_imagegpt.py +11 -3
  354. transformers/models/informer/modeling_informer.py +4 -0
  355. transformers/models/informer/modular_informer.py +1 -0
  356. transformers/models/instructblip/modeling_instructblip.py +2 -0
  357. transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
  358. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
  359. transformers/models/internvl/modeling_internvl.py +13 -12
  360. transformers/models/internvl/modular_internvl.py +7 -13
  361. transformers/models/internvl/video_processing_internvl.py +0 -1
  362. transformers/models/jais2/__init__.py +27 -0
  363. transformers/models/jais2/configuration_jais2.py +152 -0
  364. transformers/models/jais2/modeling_jais2.py +486 -0
  365. transformers/models/jais2/modular_jais2.py +196 -0
  366. transformers/models/jamba/modeling_jamba.py +25 -20
  367. transformers/models/jamba/modular_jamba.py +17 -17
  368. transformers/models/janus/image_processing_janus_fast.py +0 -1
  369. transformers/models/janus/modeling_janus.py +16 -7
  370. transformers/models/janus/modular_janus.py +17 -7
  371. transformers/models/jetmoe/modeling_jetmoe.py +4 -4
  372. transformers/models/jetmoe/modular_jetmoe.py +1 -0
  373. transformers/models/kosmos2/modeling_kosmos2.py +15 -2
  374. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
  375. transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
  376. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +12 -4
  377. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
  378. transformers/models/lasr/__init__.py +29 -0
  379. transformers/models/lasr/configuration_lasr.py +248 -0
  380. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  381. transformers/models/lasr/modeling_lasr.py +730 -0
  382. transformers/models/lasr/modular_lasr.py +576 -0
  383. transformers/models/lasr/processing_lasr.py +94 -0
  384. transformers/models/lasr/tokenization_lasr.py +186 -0
  385. transformers/models/layoutlm/modeling_layoutlm.py +10 -3
  386. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
  387. transformers/models/layoutlmv2/modeling_layoutlmv2.py +16 -0
  388. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +11 -53
  389. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
  390. transformers/models/layoutlmv3/modeling_layoutlmv3.py +33 -5
  391. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  392. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  393. transformers/models/led/modeling_led.py +12 -0
  394. transformers/models/levit/modeling_levit.py +21 -0
  395. transformers/models/lfm2/modeling_lfm2.py +5 -6
  396. transformers/models/lfm2/modular_lfm2.py +0 -1
  397. transformers/models/lfm2_moe/modeling_lfm2_moe.py +17 -8
  398. transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
  399. transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
  400. transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
  401. transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
  402. transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
  403. transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
  404. transformers/models/lightglue/modeling_lightglue.py +3 -1
  405. transformers/models/lightglue/modular_lightglue.py +1 -0
  406. transformers/models/lilt/modeling_lilt.py +23 -15
  407. transformers/models/llama/modeling_llama.py +5 -5
  408. transformers/models/llama/tokenization_llama.py +15 -43
  409. transformers/models/llama4/image_processing_llama4_fast.py +1 -2
  410. transformers/models/llama4/modeling_llama4.py +11 -6
  411. transformers/models/llava/image_processing_llava_fast.py +0 -1
  412. transformers/models/llava/modeling_llava.py +12 -7
  413. transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
  414. transformers/models/llava_next/modeling_llava_next.py +7 -3
  415. transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
  416. transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
  417. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
  418. transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
  419. transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
  420. transformers/models/longcat_flash/modeling_longcat_flash.py +6 -5
  421. transformers/models/longcat_flash/modular_longcat_flash.py +3 -2
  422. transformers/models/longformer/modeling_longformer.py +6 -0
  423. transformers/models/longt5/modeling_longt5.py +4 -4
  424. transformers/models/luke/modeling_luke.py +9 -0
  425. transformers/models/luke/tokenization_luke.py +11 -38
  426. transformers/models/lxmert/modeling_lxmert.py +2 -0
  427. transformers/models/m2m_100/modeling_m2m_100.py +14 -0
  428. transformers/models/mamba/modeling_mamba.py +16 -23
  429. transformers/models/mamba2/modeling_mamba2.py +24 -23
  430. transformers/models/marian/configuration_marian.py +1 -1
  431. transformers/models/marian/modeling_marian.py +8 -0
  432. transformers/models/markuplm/modeling_markuplm.py +9 -8
  433. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  434. transformers/models/mask2former/configuration_mask2former.py +3 -3
  435. transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
  436. transformers/models/mask2former/modeling_mask2former.py +11 -0
  437. transformers/models/maskformer/configuration_maskformer.py +3 -3
  438. transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
  439. transformers/models/maskformer/modeling_maskformer.py +11 -1
  440. transformers/models/maskformer/modeling_maskformer_swin.py +21 -15
  441. transformers/models/mbart/configuration_mbart.py +1 -0
  442. transformers/models/mbart/modeling_mbart.py +14 -0
  443. transformers/models/mbart/tokenization_mbart.py +11 -52
  444. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  445. transformers/models/megatron_bert/modeling_megatron_bert.py +9 -0
  446. transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
  447. transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
  448. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  449. transformers/models/mimi/modeling_mimi.py +28 -5
  450. transformers/models/minimax/modeling_minimax.py +19 -6
  451. transformers/models/minimax/modular_minimax.py +12 -1
  452. transformers/models/ministral/modeling_ministral.py +5 -5
  453. transformers/models/ministral3/configuration_ministral3.py +1 -1
  454. transformers/models/ministral3/modeling_ministral3.py +5 -4
  455. transformers/models/mistral/modeling_mistral.py +5 -4
  456. transformers/models/mistral3/modeling_mistral3.py +10 -4
  457. transformers/models/mistral3/modular_mistral3.py +3 -1
  458. transformers/models/mixtral/modeling_mixtral.py +15 -7
  459. transformers/models/mixtral/modular_mixtral.py +6 -2
  460. transformers/models/mlcd/modeling_mlcd.py +6 -0
  461. transformers/models/mlcd/modular_mlcd.py +4 -0
  462. transformers/models/mllama/modeling_mllama.py +15 -4
  463. transformers/models/mluke/tokenization_mluke.py +6 -6
  464. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
  465. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +8 -4
  466. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
  467. transformers/models/mobilebert/modeling_mobilebert.py +2 -0
  468. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  469. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
  470. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  471. transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
  472. transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
  473. transformers/models/mobilevit/modeling_mobilevit.py +7 -0
  474. transformers/models/mobilevitv2/modeling_mobilevitv2.py +7 -0
  475. transformers/models/modernbert/modeling_modernbert.py +16 -2
  476. transformers/models/modernbert/modular_modernbert.py +14 -1
  477. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +17 -10
  478. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +15 -8
  479. transformers/models/moonshine/modeling_moonshine.py +5 -3
  480. transformers/models/moshi/modeling_moshi.py +26 -53
  481. transformers/models/mpnet/modeling_mpnet.py +7 -0
  482. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  483. transformers/models/mpt/modeling_mpt.py +2 -0
  484. transformers/models/mra/modeling_mra.py +10 -1
  485. transformers/models/mt5/configuration_mt5.py +2 -3
  486. transformers/models/mt5/modeling_mt5.py +7 -10
  487. transformers/models/musicgen/modeling_musicgen.py +7 -9
  488. transformers/models/musicgen_melody/modeling_musicgen_melody.py +7 -0
  489. transformers/models/mvp/modeling_mvp.py +14 -0
  490. transformers/models/nanochat/modeling_nanochat.py +5 -5
  491. transformers/models/nemotron/modeling_nemotron.py +7 -5
  492. transformers/models/nllb/tokenization_nllb.py +8 -22
  493. transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
  494. transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
  495. transformers/models/nougat/image_processing_nougat_fast.py +0 -1
  496. transformers/models/nougat/tokenization_nougat.py +15 -68
  497. transformers/models/nystromformer/modeling_nystromformer.py +13 -0
  498. transformers/models/olmo/modeling_olmo.py +5 -5
  499. transformers/models/olmo/modular_olmo.py +2 -2
  500. transformers/models/olmo2/modeling_olmo2.py +5 -6
  501. transformers/models/olmo2/modular_olmo2.py +0 -1
  502. transformers/models/olmo3/modeling_olmo3.py +5 -5
  503. transformers/models/olmoe/modeling_olmoe.py +15 -7
  504. transformers/models/olmoe/modular_olmoe.py +4 -2
  505. transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
  506. transformers/models/omdet_turbo/modeling_omdet_turbo.py +6 -0
  507. transformers/models/oneformer/configuration_oneformer.py +3 -3
  508. transformers/models/oneformer/modeling_oneformer.py +11 -39
  509. transformers/models/openai/modeling_openai.py +15 -0
  510. transformers/models/openai/tokenization_openai.py +10 -46
  511. transformers/models/opt/modeling_opt.py +2 -0
  512. transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
  513. transformers/models/ovis2/modeling_ovis2.py +15 -3
  514. transformers/models/ovis2/modular_ovis2.py +8 -0
  515. transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
  516. transformers/models/owlv2/modeling_owlv2.py +11 -3
  517. transformers/models/owlv2/modular_owlv2.py +0 -2
  518. transformers/models/owlvit/modeling_owlvit.py +11 -3
  519. transformers/models/paddleocr_vl/__init__.py +32 -0
  520. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  521. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +504 -0
  522. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  523. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1682 -0
  524. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1359 -0
  525. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  526. transformers/models/paligemma/modeling_paligemma.py +25 -17
  527. transformers/models/parakeet/configuration_parakeet.py +4 -6
  528. transformers/models/parakeet/modeling_parakeet.py +14 -6
  529. transformers/models/parakeet/modular_parakeet.py +7 -2
  530. transformers/models/parakeet/processing_parakeet.py +1 -0
  531. transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
  532. transformers/models/patchtsmixer/modeling_patchtsmixer.py +10 -0
  533. transformers/models/patchtst/modeling_patchtst.py +25 -6
  534. transformers/models/pe_audio/__init__.py +30 -0
  535. transformers/models/pe_audio/configuration_pe_audio.py +206 -0
  536. transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
  537. transformers/models/pe_audio/modeling_pe_audio.py +820 -0
  538. transformers/models/pe_audio/modular_pe_audio.py +299 -0
  539. transformers/{kernels/falcon_mamba/__init__.py → models/pe_audio/processing_pe_audio.py} +11 -2
  540. transformers/models/pe_audio_video/__init__.py +29 -0
  541. transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
  542. transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
  543. transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
  544. transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
  545. transformers/models/pe_video/__init__.py +30 -0
  546. transformers/models/pe_video/configuration_pe_video.py +211 -0
  547. transformers/models/pe_video/modeling_pe_video.py +636 -0
  548. transformers/models/pe_video/modular_pe_video.py +219 -0
  549. transformers/models/pe_video/processing_pe_video.py +10 -0
  550. transformers/models/pe_video/video_processing_pe_video.py +66 -0
  551. transformers/models/pegasus/configuration_pegasus.py +1 -0
  552. transformers/models/pegasus/modeling_pegasus.py +8 -0
  553. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  554. transformers/models/pegasus_x/modeling_pegasus_x.py +5 -0
  555. transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
  556. transformers/models/perceiver/modeling_perceiver.py +13 -1
  557. transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
  558. transformers/models/perception_lm/modeling_perception_lm.py +7 -3
  559. transformers/models/perception_lm/modular_perception_lm.py +7 -3
  560. transformers/models/persimmon/modeling_persimmon.py +3 -2
  561. transformers/models/phi/modeling_phi.py +5 -6
  562. transformers/models/phi/modular_phi.py +0 -1
  563. transformers/models/phi3/modeling_phi3.py +3 -2
  564. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +9 -6
  565. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +7 -4
  566. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
  567. transformers/models/phimoe/modeling_phimoe.py +15 -7
  568. transformers/models/phimoe/modular_phimoe.py +3 -3
  569. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  570. transformers/models/pix2struct/processing_pix2struct.py +0 -4
  571. transformers/models/pixio/__init__.py +30 -0
  572. transformers/models/pixio/configuration_pixio.py +151 -0
  573. transformers/models/pixio/modeling_pixio.py +507 -0
  574. transformers/models/pixio/modular_pixio.py +404 -0
  575. transformers/models/pixtral/modeling_pixtral.py +3 -2
  576. transformers/models/pixtral/processing_pixtral.py +3 -1
  577. transformers/models/plbart/configuration_plbart.py +1 -0
  578. transformers/models/plbart/modeling_plbart.py +13 -0
  579. transformers/models/plbart/modular_plbart.py +8 -0
  580. transformers/models/plbart/tokenization_plbart.py +0 -2
  581. transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
  582. transformers/models/poolformer/modeling_poolformer.py +13 -1
  583. transformers/models/pop2piano/configuration_pop2piano.py +0 -1
  584. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  585. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
  586. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  587. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  588. transformers/models/prophetnet/modeling_prophetnet.py +5 -1
  589. transformers/models/pvt/modeling_pvt.py +2 -0
  590. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  591. transformers/models/qwen2/modeling_qwen2.py +5 -5
  592. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  593. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  594. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +116 -79
  595. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +71 -33
  596. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  597. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +23 -11
  598. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +29 -27
  599. transformers/models/qwen2_audio/modeling_qwen2_audio.py +4 -2
  600. transformers/models/qwen2_moe/modeling_qwen2_moe.py +15 -7
  601. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  602. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
  603. transformers/models/qwen2_vl/modeling_qwen2_vl.py +23 -20
  604. transformers/models/qwen3/modeling_qwen3.py +5 -5
  605. transformers/models/qwen3_moe/modeling_qwen3_moe.py +15 -7
  606. transformers/models/qwen3_next/modeling_qwen3_next.py +7 -8
  607. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
  608. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +112 -68
  609. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +62 -20
  610. transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
  611. transformers/models/qwen3_vl/modeling_qwen3_vl.py +57 -42
  612. transformers/models/qwen3_vl/modular_qwen3_vl.py +59 -46
  613. transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
  614. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +132 -148
  615. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +36 -82
  616. transformers/models/rag/configuration_rag.py +0 -8
  617. transformers/models/rag/modeling_rag.py +8 -9
  618. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +18 -3
  619. transformers/models/reformer/modeling_reformer.py +13 -1
  620. transformers/models/reformer/tokenization_reformer.py +11 -28
  621. transformers/models/regnet/modeling_regnet.py +10 -1
  622. transformers/models/rembert/modeling_rembert.py +13 -1
  623. transformers/models/rembert/tokenization_rembert.py +3 -10
  624. transformers/models/resnet/modeling_resnet.py +19 -5
  625. transformers/models/roberta/modeling_roberta.py +3 -0
  626. transformers/models/roberta/modular_roberta.py +3 -0
  627. transformers/models/roberta/tokenization_roberta.py +18 -27
  628. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
  629. transformers/models/roc_bert/modeling_roc_bert.py +3 -0
  630. transformers/models/roformer/modeling_roformer.py +6 -0
  631. transformers/models/roformer/tokenization_roformer.py +77 -412
  632. transformers/models/rt_detr/configuration_rt_detr.py +1 -1
  633. transformers/models/rt_detr/modeling_rt_detr.py +6 -0
  634. transformers/models/rt_detr/modeling_rt_detr_resnet.py +13 -4
  635. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
  636. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +9 -0
  637. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
  638. transformers/models/rwkv/modeling_rwkv.py +2 -1
  639. transformers/models/sam/configuration_sam.py +1 -0
  640. transformers/models/sam/image_processing_sam_fast.py +0 -1
  641. transformers/models/sam/modeling_sam.py +4 -1
  642. transformers/models/sam2/configuration_sam2.py +1 -1
  643. transformers/models/sam2/modeling_sam2.py +7 -3
  644. transformers/models/sam2/modular_sam2.py +7 -3
  645. transformers/models/sam2_video/modeling_sam2_video.py +52 -43
  646. transformers/models/sam2_video/modular_sam2_video.py +32 -18
  647. transformers/models/sam3/configuration_sam3.py +21 -1
  648. transformers/models/sam3/modeling_sam3.py +100 -80
  649. transformers/models/sam3_tracker/modeling_sam3_tracker.py +8 -1
  650. transformers/models/sam3_tracker/modular_sam3_tracker.py +8 -1
  651. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
  652. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +27 -15
  653. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
  654. transformers/models/sam3_video/configuration_sam3_video.py +14 -0
  655. transformers/models/sam3_video/modeling_sam3_video.py +4 -3
  656. transformers/models/sam3_video/processing_sam3_video.py +1 -1
  657. transformers/models/sam_hq/configuration_sam_hq.py +1 -0
  658. transformers/models/sam_hq/modeling_sam_hq.py +26 -23
  659. transformers/models/seamless_m4t/modeling_seamless_m4t.py +32 -12
  660. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  661. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +11 -1
  662. transformers/models/seed_oss/modeling_seed_oss.py +3 -3
  663. transformers/models/segformer/image_processing_segformer_fast.py +0 -1
  664. transformers/models/segformer/modeling_segformer.py +6 -3
  665. transformers/models/segformer/modular_segformer.py +0 -1
  666. transformers/models/seggpt/modeling_seggpt.py +2 -0
  667. transformers/models/sew/modeling_sew.py +3 -0
  668. transformers/models/sew/modular_sew.py +1 -0
  669. transformers/models/sew_d/modeling_sew_d.py +3 -0
  670. transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
  671. transformers/models/siglip/modeling_siglip.py +24 -2
  672. transformers/models/siglip2/modeling_siglip2.py +67 -41
  673. transformers/models/siglip2/modular_siglip2.py +4 -0
  674. transformers/models/smollm3/modeling_smollm3.py +5 -5
  675. transformers/models/smolvlm/modeling_smolvlm.py +5 -1
  676. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  677. transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
  678. transformers/models/speech_to_text/modeling_speech_to_text.py +14 -0
  679. transformers/models/speecht5/modeling_speecht5.py +41 -1
  680. transformers/models/splinter/modeling_splinter.py +12 -3
  681. transformers/models/splinter/tokenization_splinter.py +9 -28
  682. transformers/models/squeezebert/modeling_squeezebert.py +8 -0
  683. transformers/models/stablelm/modeling_stablelm.py +4 -2
  684. transformers/models/starcoder2/modeling_starcoder2.py +5 -4
  685. transformers/models/superglue/image_processing_superglue_fast.py +1 -2
  686. transformers/models/superglue/modeling_superglue.py +1 -0
  687. transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
  688. transformers/models/superpoint/modeling_superpoint.py +1 -0
  689. transformers/models/swiftformer/modeling_swiftformer.py +6 -0
  690. transformers/models/swin/modeling_swin.py +20 -12
  691. transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
  692. transformers/models/swin2sr/modeling_swin2sr.py +51 -33
  693. transformers/models/swinv2/modeling_swinv2.py +45 -33
  694. transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
  695. transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
  696. transformers/models/t5/configuration_t5.py +7 -1
  697. transformers/models/t5/modeling_t5.py +8 -7
  698. transformers/models/t5/tokenization_t5.py +4 -8
  699. transformers/models/t5gemma/modeling_t5gemma.py +6 -6
  700. transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
  701. transformers/models/t5gemma2/modeling_t5gemma2.py +19 -10
  702. transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
  703. transformers/models/table_transformer/configuration_table_transformer.py +1 -1
  704. transformers/models/table_transformer/modeling_table_transformer.py +5 -1
  705. transformers/models/tapas/modeling_tapas.py +3 -0
  706. transformers/models/textnet/image_processing_textnet_fast.py +0 -1
  707. transformers/models/textnet/modeling_textnet.py +11 -2
  708. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  709. transformers/models/timesfm/modeling_timesfm.py +14 -0
  710. transformers/models/timesfm/modular_timesfm.py +14 -0
  711. transformers/models/timesformer/modeling_timesformer.py +2 -0
  712. transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
  713. transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
  714. transformers/models/timm_wrapper/modeling_timm_wrapper.py +20 -14
  715. transformers/models/trocr/modeling_trocr.py +3 -2
  716. transformers/models/tvp/configuration_tvp.py +5 -1
  717. transformers/models/tvp/modeling_tvp.py +6 -4
  718. transformers/models/udop/configuration_udop.py +1 -0
  719. transformers/models/udop/modeling_udop.py +7 -7
  720. transformers/models/udop/tokenization_udop.py +5 -13
  721. transformers/models/umt5/configuration_umt5.py +2 -2
  722. transformers/models/umt5/modeling_umt5.py +7 -6
  723. transformers/models/unispeech/modeling_unispeech.py +4 -0
  724. transformers/models/unispeech/modular_unispeech.py +2 -0
  725. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  726. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  727. transformers/models/univnet/modeling_univnet.py +1 -0
  728. transformers/models/upernet/modeling_upernet.py +1 -0
  729. transformers/models/vaultgemma/modeling_vaultgemma.py +5 -5
  730. transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
  731. transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
  732. transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
  733. transformers/models/video_llava/modeling_video_llava.py +7 -3
  734. transformers/models/vilt/configuration_vilt.py +2 -2
  735. transformers/models/vilt/modeling_vilt.py +13 -0
  736. transformers/models/vipllava/modeling_vipllava.py +7 -3
  737. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  738. transformers/models/visual_bert/modeling_visual_bert.py +8 -0
  739. transformers/models/vitdet/modeling_vitdet.py +2 -0
  740. transformers/models/vitmatte/configuration_vitmatte.py +1 -1
  741. transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
  742. transformers/models/vitmatte/modeling_vitmatte.py +5 -0
  743. transformers/models/vitpose/configuration_vitpose.py +1 -1
  744. transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
  745. transformers/models/vits/modeling_vits.py +1 -0
  746. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  747. transformers/models/voxtral/modeling_voxtral.py +2 -2
  748. transformers/models/voxtral/modular_voxtral.py +2 -2
  749. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  750. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +21 -10
  751. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +12 -0
  752. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +27 -11
  753. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
  754. transformers/models/wavlm/modeling_wavlm.py +5 -0
  755. transformers/models/whisper/generation_whisper.py +1 -0
  756. transformers/models/whisper/modeling_whisper.py +11 -3
  757. transformers/models/whisper/tokenization_whisper.py +4 -15
  758. transformers/models/x_clip/modeling_x_clip.py +5 -0
  759. transformers/models/xcodec/modeling_xcodec.py +5 -0
  760. transformers/models/xglm/modeling_xglm.py +11 -0
  761. transformers/models/xglm/tokenization_xglm.py +4 -9
  762. transformers/models/xlm/modeling_xlm.py +18 -14
  763. transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
  764. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  765. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
  766. transformers/models/xlnet/modeling_xlnet.py +3 -1
  767. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  768. transformers/models/xmod/modeling_xmod.py +3 -0
  769. transformers/models/yoso/modeling_yoso.py +10 -1
  770. transformers/models/zamba/modeling_zamba.py +4 -1
  771. transformers/models/zamba2/modeling_zamba2.py +7 -4
  772. transformers/models/zamba2/modular_zamba2.py +1 -1
  773. transformers/models/zoedepth/configuration_zoedepth.py +1 -1
  774. transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
  775. transformers/models/zoedepth/modeling_zoedepth.py +8 -0
  776. transformers/pipelines/__init__.py +11 -9
  777. transformers/pipelines/automatic_speech_recognition.py +20 -12
  778. transformers/pipelines/base.py +2 -10
  779. transformers/pipelines/document_question_answering.py +4 -2
  780. transformers/pipelines/question_answering.py +1 -1
  781. transformers/pipelines/text_generation.py +1 -1
  782. transformers/pipelines/text_to_audio.py +2 -2
  783. transformers/processing_utils.py +133 -50
  784. transformers/quantizers/auto.py +2 -4
  785. transformers/quantizers/base.py +44 -174
  786. transformers/quantizers/quantizer_aqlm.py +2 -23
  787. transformers/quantizers/quantizer_auto_round.py +2 -12
  788. transformers/quantizers/quantizer_awq.py +20 -89
  789. transformers/quantizers/quantizer_bitnet.py +4 -14
  790. transformers/quantizers/quantizer_bnb_4bit.py +18 -155
  791. transformers/quantizers/quantizer_bnb_8bit.py +24 -110
  792. transformers/quantizers/quantizer_compressed_tensors.py +2 -9
  793. transformers/quantizers/quantizer_eetq.py +16 -74
  794. transformers/quantizers/quantizer_fbgemm_fp8.py +38 -138
  795. transformers/quantizers/quantizer_finegrained_fp8.py +26 -113
  796. transformers/quantizers/quantizer_fp_quant.py +52 -82
  797. transformers/quantizers/quantizer_gptq.py +8 -28
  798. transformers/quantizers/quantizer_higgs.py +42 -60
  799. transformers/quantizers/quantizer_hqq.py +144 -153
  800. transformers/quantizers/quantizer_mxfp4.py +14 -194
  801. transformers/quantizers/quantizer_quanto.py +35 -79
  802. transformers/quantizers/quantizer_quark.py +36 -17
  803. transformers/quantizers/quantizer_spqr.py +4 -12
  804. transformers/quantizers/quantizer_torchao.py +50 -325
  805. transformers/quantizers/quantizer_vptq.py +4 -27
  806. transformers/quantizers/quantizers_utils.py +20 -0
  807. transformers/testing_utils.py +324 -47
  808. transformers/tokenization_mistral_common.py +7 -2
  809. transformers/tokenization_utils_base.py +116 -224
  810. transformers/tokenization_utils_tokenizers.py +190 -106
  811. transformers/trainer.py +51 -32
  812. transformers/trainer_callback.py +8 -0
  813. transformers/trainer_jit_checkpoint.py +126 -0
  814. transformers/trainer_seq2seq.py +4 -0
  815. transformers/trainer_utils.py +1 -1
  816. transformers/training_args.py +74 -38
  817. transformers/utils/__init__.py +7 -4
  818. transformers/utils/attention_visualizer.py +4 -4
  819. transformers/utils/auto_docstring.py +35 -25
  820. transformers/utils/generic.py +47 -1
  821. transformers/utils/hub.py +5 -15
  822. transformers/utils/import_utils.py +112 -25
  823. transformers/utils/kernel_config.py +74 -19
  824. transformers/utils/loading_report.py +19 -10
  825. transformers/utils/quantization_config.py +78 -245
  826. transformers/video_processing_utils.py +17 -14
  827. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +275 -229
  828. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +832 -777
  829. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +1 -1
  830. transformers/kernels/__init__.py +0 -0
  831. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  832. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  833. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
  834. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info/licenses}/LICENSE +0 -0
  835. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -12,17 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  from abc import ABC, abstractmethod
15
- from copy import deepcopy
16
15
  from typing import TYPE_CHECKING, Any
17
16
 
18
- from ..utils import is_accelerate_available, is_torch_available, logging
17
+ from ..utils import is_torch_available, logging
19
18
  from ..utils.quantization_config import QuantizationConfigMixin, QuantizationMethod
20
19
  from .quantizers_utils import get_module_from_name
21
20
 
22
21
 
23
- if is_accelerate_available():
24
- from accelerate.utils import find_tied_parameters
25
-
26
22
  if TYPE_CHECKING:
27
23
  from ..modeling_utils import PreTrainedModel
28
24
 
@@ -35,60 +31,31 @@ else:
35
31
  logger = logging.get_logger(__file__)
36
32
 
37
33
 
38
- def _assign_original_dtype(module, original_dtype):
39
- # not very nice in a recursive function but it avoids a circular import
40
- from ..modeling_utils import PreTrainedModel
41
-
42
- for child in module.children():
43
- if isinstance(child, PreTrainedModel):
44
- child.config._pre_quantization_dtype = original_dtype
45
- _assign_original_dtype(child, original_dtype)
46
-
47
-
48
- def get_keys_to_not_convert(model):
34
+ def get_keys_to_not_convert(model) -> list:
49
35
  r"""
50
- An utility function to get the key of the module to keep in full precision if any For example for CausalLM modules
51
- we may want to keep the lm_head in full precision for numerical stability reasons. For other architectures, we want
52
- to keep the tied weights of the model. The function will return a list of the keys of the modules to not convert in
53
- int8.
54
-
55
- Parameters:
56
- model (`torch.nn.Module`):
57
- Input model
36
+ Function to automatically detect keys to not convert for usage like quantization. For example for CausalLM modules
37
+ we may want to keep the lm_head in full precision for numerical stability reasons.
58
38
  """
59
- # Create a copy of the model and tie the weights, then
60
- # check if it contains tied weights
61
- tied_model = deepcopy(model) # this has 0 cost since it is done inside `init_empty_weights` context manager`
62
- tied_model.tie_weights()
63
-
64
- tied_params = find_tied_parameters(tied_model)
65
- tied_keys = sum(tied_params, [])
66
- has_tied_params = len(tied_keys) > 0
67
-
68
- # If there is not tied weights, we want to keep the lm_head(output_embedding) in full precision
69
- if not has_tied_params:
70
- output_emb = model.get_output_embeddings()
71
- if output_emb is not None:
72
- list_last_module = [name for name, module in model.named_modules() if id(module) == id(output_emb)]
73
- return list_last_module
74
-
75
- # otherwise, no tied weights, no output embedding defined, simply keep the last module in full precision
76
- list_modules = list(model.named_parameters())
77
- list_last_module = [list_modules[-1][0]]
78
- # add last module together with tied weights
79
- intersection = set(list_last_module) - set(tied_keys)
80
- list_untouched = list(set(tied_keys)) + list(intersection)
81
-
82
- # remove ".weight" from the keys
83
- names_to_remove = [".weight", ".bias"]
84
- filtered_module_names = []
85
- for name in list_untouched:
86
- for name_to_remove in names_to_remove:
87
- if name_to_remove in name:
88
- name = name.replace(name_to_remove, "")
89
- filtered_module_names.append(name)
90
-
91
- return filtered_module_names
39
+ # remove tied weights
40
+ tied_keys = set()
41
+ if len(model.all_tied_weights_keys) > 0:
42
+ tied_keys = set(model.all_tied_weights_keys.values()) | set(model.all_tied_weights_keys.keys())
43
+
44
+ # remove last module
45
+ last_module_key = {list(model.named_parameters())[-1][0]}
46
+
47
+ # remove output emb
48
+ output_emb_module = model.get_output_embeddings()
49
+ output_emb_keys = {
50
+ name
51
+ for name, module in model.named_modules()
52
+ if output_emb_module is not None and id(module) == id(output_emb_module)
53
+ }
54
+ modules_to_not_convert = tied_keys | last_module_key | output_emb_keys
55
+
56
+ modules_to_not_convert = list({k.removesuffix(".weight") for k in modules_to_not_convert})
57
+
58
+ return list(modules_to_not_convert)
92
59
 
93
60
 
94
61
  class HfQuantizer(ABC):
@@ -100,26 +67,14 @@ class HfQuantizer(ABC):
100
67
  Attributes
101
68
  quantization_config (`transformers.utils.quantization_config.QuantizationConfigMixin`):
102
69
  The quantization config that defines the quantization parameters of your model that you want to quantize.
103
- modules_to_not_convert (`list[str]`, *optional*):
104
- The list of module names to not convert when quantizing the model.
105
- required_packages (`list[str]`, *optional*):
106
- The list of required pip packages to install prior to using the quantizer
107
70
  requires_calibration (`bool`):
108
71
  Whether the quantization method requires to calibrate the model before using it.
109
- requires_parameters_quantization (`bool`):
110
- Whether the quantization method requires to create a new Parameter. For example, for bitsandbytes, it is
111
- required to create a new xxxParameter in order to properly quantize the model.
112
72
  """
113
73
 
114
74
  requires_calibration = False
115
- required_packages = None
116
- requires_parameters_quantization = False
117
75
 
118
76
  def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
119
77
  self.quantization_config = quantization_config
120
-
121
- # -- Handle extra kwargs below --
122
- self.modules_to_not_convert = kwargs.pop("modules_to_not_convert", [])
123
78
  self.pre_quantized = kwargs.pop("pre_quantized", True)
124
79
 
125
80
  if not self.pre_quantized and self.requires_calibration:
@@ -153,82 +108,19 @@ class HfQuantizer(ABC):
153
108
  """
154
109
  return device_map
155
110
 
156
- def adjust_target_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
157
- """
158
- Override this method if you want to adjust the `target_dtype` variable used in `from_pretrained`
159
- to compute the device_map in case the device_map is a `str`. E.g. for bitsandbytes we force-set `target_dtype`
160
- to `torch.int8` and for 4-bit we pass a custom enum `accelerate.CustomDtype.int4`.
161
-
162
- Args:
163
- dtype (`torch.dtype`, *optional*):
164
- The dtype that is used to compute the device_map.
165
- """
166
- return dtype
167
-
168
111
  def param_element_size(self, model: "PreTrainedModel", param_name: str, param: "torch.Tensor") -> float:
169
- "Return the element size (in bytes) for `param_name`."
170
-
171
- if self.param_needs_quantization(model, param_name):
172
- from accelerate.utils import CustomDtype
173
-
174
- mapping = {
175
- torch.int8: 1,
176
- CustomDtype.INT4: 0.5,
177
- CustomDtype.FP8: 1,
178
- CustomDtype.INT2: 0.25,
179
- }
180
- # The value passed is actually not used when the method is overridden
181
- if (custom_dtype := self.adjust_target_dtype(torch.float16)) in mapping:
182
- return mapping[custom_dtype]
183
112
  return param.element_size()
184
113
 
185
- def update_missing_keys(self, model, missing_keys: list[str], prefix: str) -> list[str]:
186
- """
187
- Override this method if you want to adjust the `missing_keys`.
188
-
189
- Args:
190
- missing_keys (`list[str]`, *optional*):
191
- The list of missing keys in the checkpoint compared to the state dict of the model
192
- """
193
- return missing_keys
194
-
195
- def update_expected_keys(self, model, expected_keys: list[str], loaded_keys: list[str]) -> list[str]:
196
- """
197
- Override this method if you want to adjust the `update_expected_keys`.
198
-
199
- Args:
200
- expected_keys (`list[str]`, *optional*):
201
- The list of the expected keys in the initialized model.
202
- loaded_keys (`list[str]`, *optional*):
203
- The list of the loaded keys in the checkpoint.
204
- """
205
- return expected_keys
206
-
207
- def update_unexpected_keys(self, model, unexpected_keys: list[str]) -> list[str]:
208
- return unexpected_keys
209
-
210
114
  def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]:
211
115
  """adjust max_memory argument for infer_auto_device_map() if extra memory is needed for quantization"""
212
116
  return max_memory
213
117
 
214
118
  def param_needs_quantization(self, model: "PreTrainedModel", param_name: str, **kwargs) -> bool:
215
119
  """
216
- Check whether a given param needs quantization as defined by `create_quantized_param`.
120
+ Check whether a given param needs to be quantized.
217
121
  """
218
122
  return False
219
123
 
220
- def create_quantized_param(self, *args, **kwargs):
221
- """
222
- Take needed components from state_dict (those from which `param_needs_quantization` is True) and create
223
- quantized param.
224
- It usually also load the new param directly in the `model`.
225
- Note: only applicable if requires_parameters_quantization == True.
226
- """
227
- if not self.requires_parameters_quantization:
228
- raise AttributeError(
229
- f"`.create_quantized_param()` method is not supported by quantizer class {self.__class__.__name__}."
230
- )
231
-
232
124
  def validate_environment(self, *args, **kwargs):
233
125
  """
234
126
  This method is used to potentially check for potential conflicts with arguments that are
@@ -248,7 +140,7 @@ class HfQuantizer(ABC):
248
140
  def _process_model_before_weight_loading(self, model, **kwargs):
249
141
  return model
250
142
 
251
- def preprocess_model(self, model: "PreTrainedModel", config, dtype=None, checkpoint_files=None, **kwargs):
143
+ def preprocess_model(self, model: "PreTrainedModel", dtype=None, **kwargs):
252
144
  """
253
145
  Setting model attributes and/or converting model before weights loading. At this point
254
146
  the model should be initialized on the meta device so you can freely manipulate the skeleton
@@ -266,14 +158,6 @@ class HfQuantizer(ABC):
266
158
  self._convert_model_for_quantization(model)
267
159
  self._process_model_before_weight_loading(model, **kwargs)
268
160
 
269
- # We store the original dtype for quantized models as we cannot easily retrieve it
270
- # once the weights have been quantized
271
- # Note that once you have loaded a quantized model, you can't change its dtype so this will
272
- # remain a single source of truth
273
- original_dtype = dtype if dtype is not None else torch.get_default_dtype()
274
- config._pre_quantization_dtype = original_dtype
275
- _assign_original_dtype(model, original_dtype)
276
-
277
161
  def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs):
278
162
  return model
279
163
 
@@ -288,6 +172,11 @@ class HfQuantizer(ABC):
288
172
  kwargs (`dict`, *optional*):
289
173
  The keyword arguments that are passed along `_process_model_after_weight_loading`.
290
174
  """
175
+ model.config.quantization_config = self.quantization_config
176
+
177
+ if self.pre_quantized and getattr(self.quantization_config, "dequantize", False):
178
+ self.remove_quantization_config(model)
179
+
291
180
  return self._process_model_after_weight_loading(model, **kwargs)
292
181
 
293
182
  def remove_quantization_config(self, model):
@@ -298,40 +187,25 @@ class HfQuantizer(ABC):
298
187
  del model.hf_quantizer
299
188
  if hasattr(model.config, "quantization_config"):
300
189
  del model.config.quantization_config
301
- if hasattr(model.config, "_pre_quantization_dtype"):
302
- del model.config._pre_quantization_dtype
303
190
  if hasattr(model, "quantization_method"):
304
191
  del model.quantization_method
305
192
  model.is_quantized = False
306
193
 
307
- def dequantize(self, model):
194
+ def dequantize(self, model, dtype=None):
308
195
  """
309
196
  Potentially dequantize the model to retrieve the original model, with some loss in accuracy / performance.
310
197
  Note not all quantization schemes support this.
311
198
  """
312
- model = self._dequantize(model)
313
-
314
- # Delete quantizer and quantization config
315
- del model.hf_quantizer
316
- del model.config.quantization_config
317
- del model.config._pre_quantization_dtype
318
- del model.quantization_method
319
- model.is_quantized = False
199
+ if dtype is None:
200
+ # using the same dtype we used to load the model. If we don't do that, we might have issues with modules we didn't quantize.
201
+ # or we need to upcast everything to the same dtype
202
+ dtype = model.config.dtype
203
+ model = self._dequantize(model, dtype=dtype)
204
+ self.remove_quantization_config(model)
320
205
 
321
206
  return model
322
207
 
323
- def get_accelerator_warm_up_factor(self):
324
- """
325
- The factor to be used in `caching_allocator_warmup` to get the number of bytes to pre-allocate to warm up accelerator.
326
- A factor of 2 means we allocate all bytes in the empty model (since we allocate in fp16), a factor of 4 means
327
- we allocate half the memory of the weights residing in the empty model, etc...
328
- """
329
- # By default we return 4, i.e. half the model size (this corresponds to the case where the model is not
330
- # really pre-processed, i.e. we do not have the info that weights are going to be 8 bits before actual
331
- # weight loading)
332
- return 4
333
-
334
- def _dequantize(self, model):
208
+ def _dequantize(self, model, dtype=None):
335
209
  raise NotImplementedError(
336
210
  f"{self.quantization_config.quant_method} has no implementation of `dequantize`, please raise an issue on GitHub."
337
211
  )
@@ -360,6 +234,8 @@ class HfQuantizer(ABC):
360
234
  if keep_in_fp32_modules is not None:
361
235
  modules_to_not_convert.extend(keep_in_fp32_modules)
362
236
 
237
+ modules_to_not_convert = list(set(modules_to_not_convert))
238
+
363
239
  return modules_to_not_convert
364
240
 
365
241
  @property
@@ -372,31 +248,25 @@ class HfQuantizer(ABC):
372
248
  """Flag indicating whether the quantized model can be compiled"""
373
249
  return False
374
250
 
375
- def get_state_dict_and_metadata(self, model, safe_serialization=False):
251
+ def get_state_dict_and_metadata(self, model):
376
252
  """Get state dict and metadata. Useful when we need to modify a bit the state dict due to quantization"""
377
253
  return None, {}
378
254
 
379
- def update_state_dict_with_metadata(self, state_dict, metadata):
380
- """Update state dict with metadata. Default behaviour returns state_dict"""
381
- return state_dict
382
-
383
255
  @abstractmethod
384
- def is_serializable(self, safe_serialization=None): ...
256
+ def is_serializable(self): ...
385
257
 
386
258
  @property
387
259
  @abstractmethod
388
260
  def is_trainable(self): ...
389
261
 
390
262
  def _convert_model_for_quantization(self, model):
391
- from accelerate import init_empty_weights
392
-
393
263
  for name, module in model.named_modules():
394
264
  module_class_name = module.__class__.__name__
395
265
  if module_class_name in MODULES_TO_PATCH_FOR_QUANTIZATION and (
396
266
  self.quantization_config.quant_method
397
267
  in MODULES_TO_PATCH_FOR_QUANTIZATION[module_class_name]["quantization_methods"]
398
268
  ):
399
- with init_empty_weights():
269
+ with torch.device("meta"):
400
270
  parent_module, name = get_module_from_name(model, name)
401
271
  parent_module._modules[name] = MODULES_TO_PATCH_FOR_QUANTIZATION[module_class_name]["module_name"](
402
272
  model.config.get_text_config()
@@ -23,13 +23,10 @@ if TYPE_CHECKING:
23
23
  from ..modeling_utils import PreTrainedModel
24
24
 
25
25
  from ..integrations import replace_with_aqlm_linear
26
- from ..utils import is_accelerate_available, is_aqlm_available, is_torch_available, logging
26
+ from ..utils import is_accelerate_available, is_aqlm_available, logging
27
27
  from ..utils.quantization_config import QuantizationConfigMixin
28
28
 
29
29
 
30
- if is_torch_available():
31
- import torch
32
-
33
30
  logger = logging.get_logger(__name__)
34
31
 
35
32
 
@@ -39,12 +36,9 @@ class AqlmHfQuantizer(HfQuantizer):
39
36
  """
40
37
 
41
38
  requires_calibration = True
42
- required_packages = ["aqlm"]
43
- optimum_quantizer = None
44
39
 
45
40
  def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
46
41
  super().__init__(quantization_config, **kwargs)
47
- self.quantization_config = quantization_config
48
42
 
49
43
  def validate_environment(self, *args, **kwargs):
50
44
  if not is_accelerate_available():
@@ -53,20 +47,6 @@ class AqlmHfQuantizer(HfQuantizer):
53
47
  if not is_aqlm_available():
54
48
  raise ImportError("Using `aqlm` quantization requires AQLM: `pip install aqlm[gpu,cpu]`")
55
49
 
56
- def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
57
- if dtype is None:
58
- if torch.cuda.is_available():
59
- dtype = torch.float16
60
- logger.info(
61
- "CUDA available. Assuming AQLM inference on GPU and loading the model in `torch.float16`. To overwrite it, set `dtype` manually."
62
- )
63
- else:
64
- dtype = torch.float32
65
- logger.info(
66
- "CUDA is unavailable. Assuming AQLM inference on CPU and loading the model in `torch.float32`. To overwrite it, set `dtype` manually."
67
- )
68
- return dtype
69
-
70
50
  def _process_model_before_weight_loading(
71
51
  self,
72
52
  model: "PreTrainedModel",
@@ -77,7 +57,6 @@ class AqlmHfQuantizer(HfQuantizer):
77
57
  quantization_config=self.quantization_config,
78
58
  linear_weights_not_to_quantize=self.quantization_config.linear_weights_not_to_quantize,
79
59
  )
80
- model.config.quantization_config = self.quantization_config
81
60
 
82
61
  @property
83
62
  def is_trainable(self) -> bool:
@@ -90,5 +69,5 @@ class AqlmHfQuantizer(HfQuantizer):
90
69
  )
91
70
  return False
92
71
 
93
- def is_serializable(self, safe_serialization=None):
72
+ def is_serializable(self):
94
73
  return True
@@ -19,13 +19,10 @@ from .base import HfQuantizer
19
19
  if TYPE_CHECKING:
20
20
  from ..modeling_utils import PreTrainedModel
21
21
 
22
- from ..utils import is_auto_round_available, is_torch_available, logging
22
+ from ..utils import is_auto_round_available, logging
23
23
  from ..utils.quantization_config import QuantizationConfigMixin
24
24
 
25
25
 
26
- if is_torch_available():
27
- import torch
28
-
29
26
  logger = logging.get_logger(__name__)
30
27
 
31
28
 
@@ -36,7 +33,6 @@ class AutoRoundQuantizer(HfQuantizer):
36
33
 
37
34
  # AutoRound requires data calibration - we support only inference
38
35
  requires_calibration = True
39
- required_packages = ["auto_round"]
40
36
 
41
37
  def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
42
38
  super().__init__(quantization_config, **kwargs)
@@ -48,12 +44,6 @@ class AutoRoundQuantizer(HfQuantizer):
48
44
  "Loading an AutoRound quantized model requires auto-round library (`pip install 'auto-round>=0.5'`)"
49
45
  )
50
46
 
51
- def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
52
- if dtype is None:
53
- dtype = torch.bfloat16
54
- logger.info("Loading the model in `torch.bfloat16`. To overwrite it, set `dtype` manually.")
55
- return dtype
56
-
57
47
  def _process_model_before_weight_loading(self, model: "PreTrainedModel", **kwargs):
58
48
  if model.__class__.main_input_name != "input_ids":
59
49
  logger.warning("AutoRound offers only limited support for models that are not strictly text-based.")
@@ -76,6 +66,6 @@ class AutoRoundQuantizer(HfQuantizer):
76
66
  def is_trainable(self) -> bool:
77
67
  return False
78
68
 
79
- def is_serializable(self, safe_serialization=None):
69
+ def is_serializable(self):
80
70
  ## for gptq/awq models, the quantization config will be changed
81
71
  return True
@@ -22,8 +22,8 @@ from .base import HfQuantizer
22
22
  if TYPE_CHECKING:
23
23
  from ..modeling_utils import PreTrainedModel
24
24
 
25
- from ..utils import is_accelerate_available, is_auto_awq_available, is_torch_available, logging
26
- from ..utils.quantization_config import AWQLinearVersion
25
+ from ..utils import is_accelerate_available, is_gptqmodel_available, is_torch_available, logging
26
+ from ..utils.quantization_config import AwqBackend
27
27
 
28
28
 
29
29
  if is_torch_available():
@@ -40,65 +40,20 @@ class AwqQuantizer(HfQuantizer):
40
40
  # AWQ requires data calibration - we support only inference
41
41
  requires_calibration = True
42
42
 
43
- required_packages = ["awq", "accelerate"]
44
-
45
43
  def __init__(self, quantization_config, **kwargs):
46
44
  super().__init__(quantization_config, **kwargs)
47
45
 
48
- def validate_environment(self, device_map, **kwargs):
49
- if not is_auto_awq_available():
50
- raise ImportError("Loading an AWQ quantized model requires auto-awq library (`pip install autoawq`)")
46
+ def validate_environment(self, **kwargs):
47
+ if not is_gptqmodel_available():
48
+ raise ImportError(
49
+ "Loading an AWQ quantized model requires gptqmodel. Please install it with `pip install gptqmodel`"
50
+ )
51
51
 
52
52
  if not is_accelerate_available():
53
53
  raise ImportError("Loading an AWQ quantized model requires accelerate (`pip install accelerate`)")
54
54
 
55
- if (
56
- self.quantization_config.version == AWQLinearVersion.GEMM
57
- and not torch.cuda.is_available()
58
- and not torch.xpu.is_available()
59
- ):
60
- logger.warning_once("No CUDA or XPU found, consider switching to the IPEX version for CPU-only execution.")
61
- self.quantization_config.version = AWQLinearVersion.IPEX
62
-
63
- if self.quantization_config.version == AWQLinearVersion.IPEX:
64
- if version.parse(importlib.metadata.version("autoawq")) < version.parse("0.2.6"):
65
- raise RuntimeError(
66
- "To use IPEX backend, you need autoawq>0.2.6. Please install the latest version or from source."
67
- )
68
- if device_map is None:
69
- logger.warning_once(
70
- "You have loaded an AWQ model without setting device_map, please set 'cpu' or 'xpu' or 'auto'"
71
- )
72
- elif isinstance(device_map, dict) and "disk" in device_map.values():
73
- raise ValueError(
74
- "You are attempting to load an IPEX version AWQ model with a device_map that contains disk device."
75
- " This is not supported. Please make sure only cpu and xpu in the device_map."
76
- )
77
- else:
78
- if not torch.cuda.is_available() and not torch.xpu.is_available():
79
- raise RuntimeError(
80
- "GPU is required to run AWQ quantized model. You can use IPEX version AWQ if you have an Intel CPU"
81
- )
82
-
83
- if device_map is None:
84
- logger.warning_once(
85
- "You have loaded an AWQ model on CPU and have a CUDA/XPU device available, make sure to set "
86
- "your model on a GPU device in order to run your model."
87
- )
88
- elif device_map is not None:
89
- if isinstance(device_map, dict) and any(
90
- forbidden in device_map.values() for forbidden in ("cpu", torch.device("cpu"), "disk")
91
- ):
92
- raise ValueError(
93
- "You are attempting to load an AWQ model with a device_map that contains a CPU or disk device."
94
- " This is not supported. Please remove the CPU or disk device from the device_map."
95
- )
96
-
97
55
  def update_dtype(self, dtype):
98
- if dtype is None:
99
- dtype = torch.float16
100
- logger.info("Loading the model in `torch.float16`. To overwrite it, set `dtype` manually.")
101
- elif dtype == torch.bfloat16 and (torch.cuda.is_available() or torch.xpu.is_available()):
56
+ if dtype == torch.bfloat16 and (torch.cuda.is_available() or torch.xpu.is_available()):
102
57
  logger.warning(
103
58
  "`torch.bfloat16` is not supported for AWQ CUDA/XPU kernels yet. Casting to `torch.float16`."
104
59
  )
@@ -107,51 +62,29 @@ class AwqQuantizer(HfQuantizer):
107
62
  logger.warning("We suggest you to set `dtype=torch.float16` for better efficiency on CUDA/XPU with AWQ.")
108
63
  return dtype
109
64
 
110
- def _process_model_before_weight_loading(
111
- self, model: "PreTrainedModel", keep_in_fp32_modules: list[str] | None = None, **kwargs
112
- ):
65
+ def _process_model_before_weight_loading(self, model: "PreTrainedModel", **kwargs):
113
66
  from ..integrations import replace_quantization_scales, replace_with_awq_linear
114
67
 
115
68
  self.modules_to_not_convert = self.get_modules_to_not_convert(
116
- model, self.quantization_config.modules_to_not_convert, keep_in_fp32_modules, add_default_skips=True
69
+ model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules, add_default_skips=True
117
70
  )
118
71
 
119
- model, has_been_replaced = replace_with_awq_linear(
120
- model, quantization_config=self.quantization_config, modules_to_not_convert=self.modules_to_not_convert
72
+ model = replace_with_awq_linear(
73
+ model,
74
+ quantization_config=self.quantization_config,
75
+ modules_to_not_convert=self.modules_to_not_convert,
76
+ device_map=kwargs.get("device_map"),
121
77
  )
122
78
 
123
79
  model = replace_quantization_scales(model, model.config.model_type)
124
80
 
125
- if not has_been_replaced:
126
- logger.warning(
127
- "You are loading an AWQ model but no linear modules were found in your model."
128
- " Please double check your model architecture, or submit an issue on github if you think this is a bug."
129
- )
130
-
131
81
  def _process_model_after_weight_loading(self, model, **kwargs):
132
- if self.quantization_config.do_fuse:
133
- from ..integrations import fuse_awq_modules
82
+ from gptqmodel.utils.model import hf_gptqmodel_post_init
134
83
 
135
- model = fuse_awq_modules(model, self.quantization_config)
136
- model._awq_is_fused = True # TODO: consider storing this flag in model.config instead
137
-
138
- if self.quantization_config.version == AWQLinearVersion.EXLLAMA:
139
- from ..integrations import post_init_awq_exllama_modules
140
-
141
- model = post_init_awq_exllama_modules(model, self.quantization_config.exllama_config)
142
-
143
- if self.quantization_config.version == AWQLinearVersion.IPEX:
144
- from ..integrations import post_init_awq_ipex_modules
145
-
146
- model = post_init_awq_ipex_modules(model)
147
-
148
- def is_serializable(self, safe_serialization=None):
149
- # AWQ through auto-awq has been always serializable, except if the model is fused.
150
- if self.quantization_config.do_fuse:
151
- logger.warning("You cannot save an AWQ model that uses fused modules!")
152
- return False
84
+ hf_gptqmodel_post_init(model, use_act_order=self.quantization_config.desc_act)
153
85
 
154
- if self.quantization_config.version == AWQLinearVersion.EXLLAMA:
86
+ def is_serializable(self):
87
+ if self.quantization_config.backend in [AwqBackend.EXLLAMA_V1, AwqBackend.EXLLAMA_V2]:
155
88
  logger.warning("You cannot save an AWQ model that uses Exllama backend!")
156
89
  return False
157
90
 
@@ -159,6 +92,4 @@ class AwqQuantizer(HfQuantizer):
159
92
 
160
93
  @property
161
94
  def is_trainable(self):
162
- # AWQ supports PEFT fine-tuning from version 0.2.0
163
- MIN_AWQ_VERSION_FOR_PEFT = "0.2.0"
164
- return version.parse(importlib.metadata.version("autoawq")) >= version.parse(MIN_AWQ_VERSION_FOR_PEFT)
95
+ return version.parse(importlib.metadata.version("gptqmodel")) >= version.parse("5.0.0")
@@ -37,14 +37,10 @@ class BitNetHfQuantizer(HfQuantizer):
37
37
  Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
38
38
  """
39
39
 
40
- requires_parameters_quantization = False
41
40
  requires_calibration = True
42
41
 
43
- required_packages = ["accelerate"]
44
-
45
42
  def __init__(self, quantization_config, **kwargs):
46
43
  super().__init__(quantization_config, **kwargs)
47
- self.quantization_config = quantization_config
48
44
 
49
45
  def validate_environment(self, *args, **kwargs):
50
46
  if not is_accelerate_available():
@@ -62,8 +58,8 @@ class BitNetHfQuantizer(HfQuantizer):
62
58
  "You have loaded a BitNet model on CPU and have a CUDA device available, make sure to set "
63
59
  "your model on a GPU device in order to run your model."
64
60
  )
65
- elif device_map is not None:
66
- if isinstance(device_map, dict) and ("cpu" in device_map.values() or "disk" in device_map.values()):
61
+ elif isinstance(device_map, dict):
62
+ if len(device_map) > 1 and "cpu" in device_map.values() or "disk" in device_map.values():
67
63
  raise ValueError(
68
64
  "You are attempting to load a BitNet model with a device_map that contains a CPU or disk device."
69
65
  "This is not supported. Please remove the CPU or disk device from the device_map."
@@ -72,31 +68,25 @@ class BitNetHfQuantizer(HfQuantizer):
72
68
  def _process_model_before_weight_loading(
73
69
  self,
74
70
  model: "PreTrainedModel",
75
- keep_in_fp32_modules: list[str] | None = None,
76
71
  **kwargs,
77
72
  ):
78
73
  from ..integrations import replace_with_bitnet_linear
79
74
 
80
75
  self.modules_to_not_convert = self.get_modules_to_not_convert(
81
- model, self.quantization_config.modules_to_not_convert, keep_in_fp32_modules
76
+ model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules
82
77
  )
83
78
 
84
79
  model = replace_with_bitnet_linear(
85
80
  model,
86
81
  modules_to_not_convert=self.modules_to_not_convert,
87
82
  quantization_config=self.quantization_config,
88
- pre_quantized=self.pre_quantized,
89
83
  )
90
84
 
91
85
  def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]:
92
86
  max_memory = {key: val * 0.90 for key, val in max_memory.items()}
93
87
  return max_memory
94
88
 
95
- def adjust_target_dtype(self, target_dtype: "torch.dtype") -> "torch.dtype":
96
- target_dtype = torch.int8
97
- return target_dtype
98
-
99
- def is_serializable(self, safe_serialization=None):
89
+ def is_serializable(self):
100
90
  return True
101
91
 
102
92
  @property