transformers 5.0.0rc1__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (671) hide show
  1. transformers/__init__.py +20 -1
  2. transformers/activations.py +1 -1
  3. transformers/audio_utils.py +0 -1
  4. transformers/cache_utils.py +17 -15
  5. transformers/configuration_utils.py +114 -70
  6. transformers/conversion_mapping.py +68 -5
  7. transformers/core_model_loading.py +201 -35
  8. transformers/dependency_versions_table.py +1 -1
  9. transformers/feature_extraction_utils.py +54 -22
  10. transformers/generation/candidate_generator.py +79 -31
  11. transformers/generation/configuration_utils.py +162 -122
  12. transformers/generation/continuous_batching/cache.py +47 -18
  13. transformers/generation/continuous_batching/cache_manager.py +131 -34
  14. transformers/generation/continuous_batching/continuous_api.py +101 -64
  15. transformers/generation/continuous_batching/requests.py +28 -1
  16. transformers/generation/continuous_batching/scheduler.py +11 -4
  17. transformers/generation/stopping_criteria.py +1 -1
  18. transformers/generation/utils.py +108 -110
  19. transformers/generation/watermarking.py +8 -5
  20. transformers/image_processing_base.py +2 -12
  21. transformers/image_processing_utils_fast.py +15 -4
  22. transformers/initialization.py +37 -0
  23. transformers/integrations/__init__.py +12 -0
  24. transformers/integrations/accelerate.py +44 -111
  25. transformers/integrations/aqlm.py +3 -5
  26. transformers/integrations/awq.py +2 -5
  27. transformers/integrations/bitnet.py +5 -8
  28. transformers/integrations/bitsandbytes.py +16 -15
  29. transformers/integrations/deepspeed.py +18 -3
  30. transformers/integrations/eetq.py +3 -5
  31. transformers/integrations/fbgemm_fp8.py +1 -1
  32. transformers/integrations/finegrained_fp8.py +6 -16
  33. transformers/integrations/flash_attention.py +2 -2
  34. transformers/integrations/higgs.py +2 -5
  35. transformers/integrations/hub_kernels.py +23 -5
  36. transformers/integrations/integration_utils.py +35 -0
  37. transformers/integrations/mistral.py +12 -0
  38. transformers/integrations/moe.py +240 -0
  39. transformers/integrations/mxfp4.py +4 -10
  40. transformers/integrations/peft.py +5 -0
  41. transformers/integrations/quanto.py +5 -2
  42. transformers/integrations/spqr.py +3 -5
  43. transformers/integrations/tensor_parallel.py +167 -221
  44. transformers/integrations/vptq.py +3 -5
  45. transformers/modeling_gguf_pytorch_utils.py +66 -19
  46. transformers/modeling_rope_utils.py +78 -81
  47. transformers/modeling_utils.py +583 -503
  48. transformers/models/__init__.py +19 -0
  49. transformers/models/afmoe/modeling_afmoe.py +7 -16
  50. transformers/models/afmoe/modular_afmoe.py +5 -13
  51. transformers/models/aimv2/modeling_aimv2.py +4 -0
  52. transformers/models/aimv2/modular_aimv2.py +4 -0
  53. transformers/models/albert/modeling_albert.py +3 -0
  54. transformers/models/align/modeling_align.py +12 -6
  55. transformers/models/altclip/modeling_altclip.py +7 -3
  56. transformers/models/apertus/modeling_apertus.py +4 -2
  57. transformers/models/apertus/modular_apertus.py +4 -1
  58. transformers/models/arcee/modeling_arcee.py +1 -1
  59. transformers/models/aria/modeling_aria.py +8 -4
  60. transformers/models/aria/modular_aria.py +7 -3
  61. transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
  62. transformers/models/auto/auto_factory.py +1 -1
  63. transformers/models/auto/configuration_auto.py +27 -0
  64. transformers/models/auto/feature_extraction_auto.py +7 -3
  65. transformers/models/auto/image_processing_auto.py +4 -2
  66. transformers/models/auto/modeling_auto.py +31 -0
  67. transformers/models/auto/processing_auto.py +4 -0
  68. transformers/models/auto/tokenization_auto.py +132 -153
  69. transformers/models/auto/video_processing_auto.py +5 -2
  70. transformers/models/aya_vision/modeling_aya_vision.py +7 -3
  71. transformers/models/bamba/modeling_bamba.py +18 -19
  72. transformers/models/bamba/modular_bamba.py +17 -16
  73. transformers/models/bark/modeling_bark.py +9 -0
  74. transformers/models/bart/configuration_bart.py +0 -1
  75. transformers/models/bart/modeling_bart.py +7 -0
  76. transformers/models/beit/image_processing_beit_fast.py +0 -1
  77. transformers/models/bert/modeling_bert.py +3 -0
  78. transformers/models/bert_generation/modeling_bert_generation.py +2 -0
  79. transformers/models/big_bird/modeling_big_bird.py +3 -0
  80. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +7 -0
  81. transformers/models/bit/modeling_bit.py +5 -1
  82. transformers/models/bitnet/modeling_bitnet.py +1 -1
  83. transformers/models/blenderbot/modeling_blenderbot.py +7 -0
  84. transformers/models/blenderbot/tokenization_blenderbot.py +6 -7
  85. transformers/models/blenderbot_small/modeling_blenderbot_small.py +7 -0
  86. transformers/models/blip/modeling_blip.py +2 -0
  87. transformers/models/blip/modeling_blip_text.py +8 -0
  88. transformers/models/blip_2/modeling_blip_2.py +2 -0
  89. transformers/models/bloom/modeling_bloom.py +13 -44
  90. transformers/models/blt/modeling_blt.py +162 -2
  91. transformers/models/blt/modular_blt.py +168 -3
  92. transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
  93. transformers/models/bridgetower/modeling_bridgetower.py +6 -0
  94. transformers/models/bros/modeling_bros.py +8 -0
  95. transformers/models/camembert/modeling_camembert.py +109 -106
  96. transformers/models/canine/modeling_canine.py +6 -0
  97. transformers/models/canine/tokenization_canine.py +2 -0
  98. transformers/models/chameleon/modeling_chameleon.py +9 -4
  99. transformers/models/chinese_clip/modeling_chinese_clip.py +6 -3
  100. transformers/models/clap/feature_extraction_clap.py +2 -2
  101. transformers/models/clap/modeling_clap.py +25 -15
  102. transformers/models/clip/modeling_clip.py +2 -0
  103. transformers/models/clipseg/modeling_clipseg.py +4 -0
  104. transformers/models/clvp/modeling_clvp.py +14 -3
  105. transformers/models/code_llama/tokenization_code_llama.py +1 -1
  106. transformers/models/codegen/modeling_codegen.py +13 -4
  107. transformers/models/cohere/modeling_cohere.py +1 -1
  108. transformers/models/cohere2/modeling_cohere2.py +1 -1
  109. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +0 -1
  110. transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
  111. transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
  112. transformers/models/conditional_detr/modeling_conditional_detr.py +4 -1
  113. transformers/models/convbert/modeling_convbert.py +3 -0
  114. transformers/models/convnext/image_processing_convnext.py +2 -2
  115. transformers/models/convnext/image_processing_convnext_fast.py +9 -13
  116. transformers/models/csm/generation_csm.py +19 -22
  117. transformers/models/csm/modeling_csm.py +3 -1
  118. transformers/models/csm/modular_csm.py +2 -0
  119. transformers/models/ctrl/modeling_ctrl.py +14 -2
  120. transformers/models/cvt/modeling_cvt.py +5 -1
  121. transformers/models/cwm/modeling_cwm.py +1 -1
  122. transformers/models/d_fine/configuration_d_fine.py +3 -4
  123. transformers/models/d_fine/modeling_d_fine.py +46 -39
  124. transformers/models/d_fine/modular_d_fine.py +15 -4
  125. transformers/models/dab_detr/configuration_dab_detr.py +2 -2
  126. transformers/models/dab_detr/modeling_dab_detr.py +1 -1
  127. transformers/models/dac/modeling_dac.py +4 -4
  128. transformers/models/data2vec/modeling_data2vec_text.py +7 -0
  129. transformers/models/data2vec/modular_data2vec_text.py +7 -0
  130. transformers/models/dbrx/configuration_dbrx.py +9 -1
  131. transformers/models/dbrx/modeling_dbrx.py +1 -1
  132. transformers/models/deberta/modeling_deberta.py +2 -0
  133. transformers/models/deberta_v2/modeling_deberta_v2.py +2 -0
  134. transformers/models/decision_transformer/modeling_decision_transformer.py +8 -5
  135. transformers/models/deepseek_v2/modeling_deepseek_v2.py +7 -4
  136. transformers/models/deepseek_v2/modular_deepseek_v2.py +4 -2
  137. transformers/models/deepseek_v3/modeling_deepseek_v3.py +9 -5
  138. transformers/models/deepseek_v3/modular_deepseek_v3.py +6 -2
  139. transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
  140. transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
  141. transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
  142. transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
  143. transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
  144. transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
  145. transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
  146. transformers/models/deformable_detr/modeling_deformable_detr.py +1 -1
  147. transformers/models/depth_anything/configuration_depth_anything.py +2 -3
  148. transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
  149. transformers/models/detr/configuration_detr.py +1 -1
  150. transformers/models/detr/modeling_detr.py +8 -1
  151. transformers/models/dia/generation_dia.py +3 -10
  152. transformers/models/dia/modeling_dia.py +12 -1
  153. transformers/models/dia/modular_dia.py +11 -0
  154. transformers/models/dia/processing_dia.py +1 -1
  155. transformers/models/diffllama/modeling_diffllama.py +3 -3
  156. transformers/models/diffllama/modular_diffllama.py +2 -2
  157. transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
  158. transformers/models/dinov3_vit/modeling_dinov3_vit.py +3 -0
  159. transformers/models/dinov3_vit/modular_dinov3_vit.py +3 -0
  160. transformers/models/distilbert/modeling_distilbert.py +11 -9
  161. transformers/models/doge/modeling_doge.py +1 -1
  162. transformers/models/donut/image_processing_donut_fast.py +0 -1
  163. transformers/models/donut/modeling_donut_swin.py +16 -12
  164. transformers/models/dots1/modeling_dots1.py +14 -5
  165. transformers/models/dpt/configuration_dpt.py +1 -1
  166. transformers/models/dpt/image_processing_dpt_fast.py +1 -2
  167. transformers/models/dpt/modular_dpt.py +1 -2
  168. transformers/models/edgetam/configuration_edgetam.py +1 -1
  169. transformers/models/edgetam/modeling_edgetam.py +5 -2
  170. transformers/models/edgetam/modular_edgetam.py +15 -14
  171. transformers/models/edgetam_video/modeling_edgetam_video.py +55 -43
  172. transformers/models/edgetam_video/modular_edgetam_video.py +13 -19
  173. transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
  174. transformers/models/efficientloftr/modeling_efficientloftr.py +14 -1
  175. transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
  176. transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
  177. transformers/models/efficientnet/modeling_efficientnet.py +5 -1
  178. transformers/models/electra/modeling_electra.py +7 -0
  179. transformers/models/emu3/modeling_emu3.py +8 -2
  180. transformers/models/emu3/modular_emu3.py +7 -1
  181. transformers/models/encodec/modeling_encodec.py +14 -0
  182. transformers/models/eomt/image_processing_eomt_fast.py +46 -14
  183. transformers/models/eomt/modeling_eomt.py +7 -0
  184. transformers/models/eomt/modular_eomt.py +7 -0
  185. transformers/models/ernie/modeling_ernie.py +6 -0
  186. transformers/models/ernie/modular_ernie.py +6 -0
  187. transformers/models/ernie4_5/modeling_ernie4_5.py +1 -1
  188. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +16 -13
  189. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +9 -35
  190. transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
  191. transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
  192. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
  193. transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
  194. transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
  195. transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
  196. transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
  197. transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
  198. transformers/models/esm/modeling_esm.py +6 -0
  199. transformers/models/esm/modeling_esmfold.py +6 -1
  200. transformers/models/evolla/modeling_evolla.py +9 -1
  201. transformers/models/evolla/modular_evolla.py +8 -0
  202. transformers/models/exaone4/modeling_exaone4.py +1 -1
  203. transformers/models/falcon/modeling_falcon.py +3 -3
  204. transformers/models/falcon_h1/modeling_falcon_h1.py +28 -23
  205. transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
  206. transformers/models/falcon_mamba/modeling_falcon_mamba.py +6 -2
  207. transformers/models/falcon_mamba/modular_falcon_mamba.py +7 -2
  208. transformers/models/fast_vlm/modeling_fast_vlm.py +7 -3
  209. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +23 -10
  210. transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
  211. transformers/models/flaubert/modeling_flaubert.py +14 -15
  212. transformers/models/flava/image_processing_flava_fast.py +0 -2
  213. transformers/models/flava/modeling_flava.py +4 -1
  214. transformers/models/flex_olmo/modeling_flex_olmo.py +7 -4
  215. transformers/models/florence2/modeling_florence2.py +20 -3
  216. transformers/models/florence2/modular_florence2.py +13 -0
  217. transformers/models/fnet/modeling_fnet.py +7 -0
  218. transformers/models/fuyu/image_processing_fuyu.py +1 -1
  219. transformers/models/fuyu/modeling_fuyu.py +3 -1
  220. transformers/models/fuyu/processing_fuyu.py +16 -0
  221. transformers/models/gemma/modeling_gemma.py +10 -12
  222. transformers/models/gemma/modular_gemma.py +9 -11
  223. transformers/models/gemma2/modeling_gemma2.py +1 -1
  224. transformers/models/gemma2/modular_gemma2.py +1 -1
  225. transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
  226. transformers/models/gemma3/modeling_gemma3.py +28 -7
  227. transformers/models/gemma3/modular_gemma3.py +26 -6
  228. transformers/models/gemma3n/configuration_gemma3n.py +3 -0
  229. transformers/models/gemma3n/modeling_gemma3n.py +47 -9
  230. transformers/models/gemma3n/modular_gemma3n.py +51 -9
  231. transformers/models/git/modeling_git.py +181 -126
  232. transformers/models/glm/modeling_glm.py +1 -1
  233. transformers/models/glm4/modeling_glm4.py +1 -1
  234. transformers/models/glm46v/image_processing_glm46v.py +0 -4
  235. transformers/models/glm46v/modeling_glm46v.py +3 -1
  236. transformers/models/glm46v/modular_glm46v.py +3 -0
  237. transformers/models/glm4_moe/modeling_glm4_moe.py +9 -5
  238. transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
  239. transformers/models/glm4v/image_processing_glm4v.py +0 -4
  240. transformers/models/glm4v/modeling_glm4v.py +15 -5
  241. transformers/models/glm4v/modular_glm4v.py +11 -3
  242. transformers/models/glm4v_moe/modeling_glm4v_moe.py +39 -23
  243. transformers/models/glm4v_moe/modular_glm4v_moe.py +12 -0
  244. transformers/models/glmasr/__init__.py +30 -0
  245. transformers/models/glmasr/configuration_glmasr.py +197 -0
  246. transformers/models/glmasr/modeling_glmasr.py +512 -0
  247. transformers/models/glmasr/modular_glmasr.py +433 -0
  248. transformers/models/glmasr/processing_glmasr.py +332 -0
  249. transformers/models/glpn/image_processing_glpn_fast.py +0 -1
  250. transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
  251. transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
  252. transformers/models/gpt2/modeling_gpt2.py +8 -5
  253. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +3 -8
  254. transformers/models/gpt_neo/modeling_gpt_neo.py +15 -3
  255. transformers/models/gpt_neox/modeling_gpt_neox.py +1 -1
  256. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +1 -1
  257. transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
  258. transformers/models/gpt_oss/modeling_gpt_oss.py +6 -9
  259. transformers/models/gpt_oss/modular_gpt_oss.py +5 -7
  260. transformers/models/gptj/modeling_gptj.py +15 -6
  261. transformers/models/granite/modeling_granite.py +1 -1
  262. transformers/models/granite_speech/modeling_granite_speech.py +15 -1
  263. transformers/models/granitemoe/modeling_granitemoe.py +2 -3
  264. transformers/models/granitemoe/modular_granitemoe.py +1 -2
  265. transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
  266. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +33 -23
  267. transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
  268. transformers/models/granitemoeshared/modeling_granitemoeshared.py +2 -3
  269. transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
  270. transformers/models/grounding_dino/modeling_grounding_dino.py +4 -4
  271. transformers/models/groupvit/modeling_groupvit.py +6 -1
  272. transformers/models/helium/modeling_helium.py +1 -1
  273. transformers/models/hgnet_v2/modeling_hgnet_v2.py +10 -0
  274. transformers/models/hgnet_v2/modular_hgnet_v2.py +10 -0
  275. transformers/models/hubert/modeling_hubert.py +4 -0
  276. transformers/models/hubert/modular_hubert.py +4 -0
  277. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +1 -1
  278. transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
  279. transformers/models/hunyuan_v1_moe/__init__.py +1 -1
  280. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +12 -4
  281. transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
  282. transformers/models/ibert/modeling_ibert.py +16 -0
  283. transformers/models/idefics/modeling_idefics.py +10 -0
  284. transformers/models/idefics2/modeling_idefics2.py +7 -1
  285. transformers/models/idefics3/modeling_idefics3.py +5 -1
  286. transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
  287. transformers/models/imagegpt/modeling_imagegpt.py +9 -2
  288. transformers/models/instructblip/modeling_instructblip.py +2 -0
  289. transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
  290. transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
  291. transformers/models/internvl/modeling_internvl.py +11 -8
  292. transformers/models/internvl/modular_internvl.py +5 -9
  293. transformers/models/internvl/video_processing_internvl.py +0 -1
  294. transformers/models/jais2/__init__.py +27 -0
  295. transformers/models/jais2/configuration_jais2.py +152 -0
  296. transformers/models/jais2/modeling_jais2.py +486 -0
  297. transformers/models/jais2/modular_jais2.py +196 -0
  298. transformers/models/jamba/modeling_jamba.py +24 -19
  299. transformers/models/jamba/modular_jamba.py +17 -17
  300. transformers/models/janus/image_processing_janus_fast.py +0 -1
  301. transformers/models/janus/modeling_janus.py +15 -7
  302. transformers/models/janus/modular_janus.py +16 -7
  303. transformers/models/jetmoe/modeling_jetmoe.py +2 -2
  304. transformers/models/jetmoe/modular_jetmoe.py +1 -0
  305. transformers/models/kosmos2/modeling_kosmos2.py +14 -2
  306. transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
  307. transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
  308. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +9 -3
  309. transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
  310. transformers/models/lasr/configuration_lasr.py +4 -0
  311. transformers/models/lasr/modeling_lasr.py +3 -2
  312. transformers/models/lasr/modular_lasr.py +8 -1
  313. transformers/models/lasr/processing_lasr.py +0 -2
  314. transformers/models/layoutlm/modeling_layoutlm.py +5 -3
  315. transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
  316. transformers/models/layoutlmv2/modeling_layoutlmv2.py +12 -0
  317. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +1 -0
  318. transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
  319. transformers/models/layoutlmv3/modeling_layoutlmv3.py +29 -5
  320. transformers/models/led/modeling_led.py +6 -0
  321. transformers/models/levit/modeling_levit.py +18 -0
  322. transformers/models/lfm2/modeling_lfm2.py +1 -1
  323. transformers/models/lfm2_moe/modeling_lfm2_moe.py +14 -4
  324. transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
  325. transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
  326. transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
  327. transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
  328. transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
  329. transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
  330. transformers/models/lilt/modeling_lilt.py +19 -15
  331. transformers/models/llama/modeling_llama.py +1 -1
  332. transformers/models/llama4/image_processing_llama4_fast.py +1 -2
  333. transformers/models/llama4/modeling_llama4.py +8 -4
  334. transformers/models/llava/image_processing_llava_fast.py +0 -1
  335. transformers/models/llava/modeling_llava.py +12 -7
  336. transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
  337. transformers/models/llava_next/modeling_llava_next.py +7 -3
  338. transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
  339. transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
  340. transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
  341. transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
  342. transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
  343. transformers/models/longcat_flash/modeling_longcat_flash.py +2 -1
  344. transformers/models/longcat_flash/modular_longcat_flash.py +1 -0
  345. transformers/models/longt5/modeling_longt5.py +0 -4
  346. transformers/models/m2m_100/modeling_m2m_100.py +10 -0
  347. transformers/models/mamba/modeling_mamba.py +2 -1
  348. transformers/models/mamba2/modeling_mamba2.py +24 -23
  349. transformers/models/marian/configuration_marian.py +1 -1
  350. transformers/models/marian/modeling_marian.py +3 -0
  351. transformers/models/markuplm/modeling_markuplm.py +5 -8
  352. transformers/models/mask2former/configuration_mask2former.py +3 -3
  353. transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
  354. transformers/models/mask2former/modeling_mask2former.py +9 -0
  355. transformers/models/maskformer/configuration_maskformer.py +3 -3
  356. transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
  357. transformers/models/maskformer/modeling_maskformer.py +9 -1
  358. transformers/models/maskformer/modeling_maskformer_swin.py +19 -15
  359. transformers/models/mbart/configuration_mbart.py +1 -0
  360. transformers/models/mbart/modeling_mbart.py +7 -0
  361. transformers/models/megatron_bert/modeling_megatron_bert.py +2 -0
  362. transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
  363. transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
  364. transformers/models/mimi/modeling_mimi.py +25 -4
  365. transformers/models/minimax/modeling_minimax.py +16 -3
  366. transformers/models/minimax/modular_minimax.py +12 -1
  367. transformers/models/ministral/modeling_ministral.py +1 -1
  368. transformers/models/ministral3/modeling_ministral3.py +1 -1
  369. transformers/models/mistral/modeling_mistral.py +1 -1
  370. transformers/models/mistral3/modeling_mistral3.py +10 -4
  371. transformers/models/mistral3/modular_mistral3.py +3 -1
  372. transformers/models/mixtral/modeling_mixtral.py +12 -4
  373. transformers/models/mixtral/modular_mixtral.py +6 -2
  374. transformers/models/mlcd/modeling_mlcd.py +6 -0
  375. transformers/models/mlcd/modular_mlcd.py +4 -0
  376. transformers/models/mllama/modeling_mllama.py +13 -2
  377. transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
  378. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -4
  379. transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
  380. transformers/models/mobilebert/modeling_mobilebert.py +2 -0
  381. transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
  382. transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
  383. transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
  384. transformers/models/mobilevit/modeling_mobilevit.py +4 -0
  385. transformers/models/mobilevitv2/modeling_mobilevitv2.py +4 -0
  386. transformers/models/modernbert/modeling_modernbert.py +12 -1
  387. transformers/models/modernbert/modular_modernbert.py +12 -1
  388. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +9 -1
  389. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +9 -1
  390. transformers/models/moonshine/modeling_moonshine.py +1 -1
  391. transformers/models/moshi/modeling_moshi.py +21 -51
  392. transformers/models/mpnet/modeling_mpnet.py +2 -0
  393. transformers/models/mra/modeling_mra.py +4 -1
  394. transformers/models/mt5/configuration_mt5.py +2 -3
  395. transformers/models/mt5/modeling_mt5.py +0 -10
  396. transformers/models/musicgen/modeling_musicgen.py +5 -9
  397. transformers/models/musicgen_melody/modeling_musicgen_melody.py +4 -0
  398. transformers/models/mvp/modeling_mvp.py +7 -0
  399. transformers/models/nanochat/modeling_nanochat.py +1 -1
  400. transformers/models/nemotron/modeling_nemotron.py +3 -3
  401. transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
  402. transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
  403. transformers/models/nougat/image_processing_nougat_fast.py +0 -1
  404. transformers/models/nougat/tokenization_nougat.py +11 -16
  405. transformers/models/nystromformer/modeling_nystromformer.py +7 -0
  406. transformers/models/olmo/modeling_olmo.py +1 -1
  407. transformers/models/olmo2/modeling_olmo2.py +1 -1
  408. transformers/models/olmo3/modeling_olmo3.py +1 -1
  409. transformers/models/olmoe/modeling_olmoe.py +12 -4
  410. transformers/models/olmoe/modular_olmoe.py +4 -2
  411. transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
  412. transformers/models/omdet_turbo/modeling_omdet_turbo.py +4 -0
  413. transformers/models/oneformer/configuration_oneformer.py +3 -3
  414. transformers/models/oneformer/modeling_oneformer.py +7 -38
  415. transformers/models/openai/modeling_openai.py +12 -0
  416. transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
  417. transformers/models/ovis2/modeling_ovis2.py +15 -3
  418. transformers/models/ovis2/modular_ovis2.py +8 -0
  419. transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
  420. transformers/models/owlv2/modeling_owlv2.py +7 -3
  421. transformers/models/owlv2/modular_owlv2.py +0 -2
  422. transformers/models/owlvit/modeling_owlvit.py +7 -3
  423. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +3 -2
  424. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +28 -14
  425. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +22 -12
  426. transformers/models/paligemma/modeling_paligemma.py +25 -17
  427. transformers/models/parakeet/modeling_parakeet.py +5 -0
  428. transformers/models/parakeet/modular_parakeet.py +5 -0
  429. transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
  430. transformers/models/patchtsmixer/modeling_patchtsmixer.py +4 -0
  431. transformers/models/patchtst/modeling_patchtst.py +5 -4
  432. transformers/models/pe_audio/__init__.py +30 -0
  433. transformers/models/pe_audio/configuration_pe_audio.py +206 -0
  434. transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
  435. transformers/models/pe_audio/modeling_pe_audio.py +820 -0
  436. transformers/models/pe_audio/modular_pe_audio.py +299 -0
  437. transformers/models/pe_audio/processing_pe_audio.py +24 -0
  438. transformers/models/pe_audio_video/__init__.py +29 -0
  439. transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
  440. transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
  441. transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
  442. transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
  443. transformers/models/pe_video/__init__.py +30 -0
  444. transformers/models/pe_video/configuration_pe_video.py +211 -0
  445. transformers/models/pe_video/modeling_pe_video.py +636 -0
  446. transformers/models/pe_video/modular_pe_video.py +219 -0
  447. transformers/models/pe_video/processing_pe_video.py +10 -0
  448. transformers/models/pe_video/video_processing_pe_video.py +66 -0
  449. transformers/models/pegasus/configuration_pegasus.py +1 -0
  450. transformers/models/pegasus/modeling_pegasus.py +3 -0
  451. transformers/models/pegasus_x/modeling_pegasus_x.py +1 -0
  452. transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
  453. transformers/models/perceiver/modeling_perceiver.py +5 -1
  454. transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
  455. transformers/models/perception_lm/modeling_perception_lm.py +7 -3
  456. transformers/models/perception_lm/modular_perception_lm.py +7 -3
  457. transformers/models/persimmon/modeling_persimmon.py +1 -1
  458. transformers/models/phi/modeling_phi.py +1 -1
  459. transformers/models/phi3/modeling_phi3.py +1 -1
  460. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +4 -1
  461. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +3 -0
  462. transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
  463. transformers/models/phimoe/modeling_phimoe.py +12 -4
  464. transformers/models/phimoe/modular_phimoe.py +1 -1
  465. transformers/models/pix2struct/processing_pix2struct.py +0 -4
  466. transformers/models/pixio/__init__.py +30 -0
  467. transformers/models/pixio/configuration_pixio.py +151 -0
  468. transformers/models/pixio/modeling_pixio.py +507 -0
  469. transformers/models/pixio/modular_pixio.py +404 -0
  470. transformers/models/pixtral/modeling_pixtral.py +1 -1
  471. transformers/models/pixtral/processing_pixtral.py +3 -1
  472. transformers/models/plbart/configuration_plbart.py +1 -0
  473. transformers/models/plbart/modeling_plbart.py +7 -0
  474. transformers/models/plbart/modular_plbart.py +6 -0
  475. transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
  476. transformers/models/poolformer/modeling_poolformer.py +11 -1
  477. transformers/models/pop2piano/configuration_pop2piano.py +0 -1
  478. transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
  479. transformers/models/prophetnet/modeling_prophetnet.py +2 -1
  480. transformers/models/qwen2/modeling_qwen2.py +1 -1
  481. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +104 -64
  482. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +58 -18
  483. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +18 -5
  484. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +26 -22
  485. transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -2
  486. transformers/models/qwen2_moe/modeling_qwen2_moe.py +12 -4
  487. transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
  488. transformers/models/qwen2_vl/modeling_qwen2_vl.py +17 -4
  489. transformers/models/qwen3/modeling_qwen3.py +1 -1
  490. transformers/models/qwen3_moe/modeling_qwen3_moe.py +12 -4
  491. transformers/models/qwen3_next/modeling_qwen3_next.py +4 -6
  492. transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
  493. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +92 -46
  494. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +48 -4
  495. transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
  496. transformers/models/qwen3_vl/modeling_qwen3_vl.py +17 -4
  497. transformers/models/qwen3_vl/modular_qwen3_vl.py +21 -10
  498. transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
  499. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +94 -112
  500. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +32 -81
  501. transformers/models/rag/configuration_rag.py +0 -8
  502. transformers/models/rag/modeling_rag.py +7 -9
  503. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +3 -2
  504. transformers/models/reformer/modeling_reformer.py +9 -1
  505. transformers/models/regnet/modeling_regnet.py +4 -0
  506. transformers/models/rembert/modeling_rembert.py +7 -1
  507. transformers/models/resnet/modeling_resnet.py +8 -3
  508. transformers/models/roberta/modeling_roberta.py +3 -0
  509. transformers/models/roberta/modular_roberta.py +3 -0
  510. transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
  511. transformers/models/roc_bert/modeling_roc_bert.py +3 -0
  512. transformers/models/rt_detr/configuration_rt_detr.py +1 -1
  513. transformers/models/rt_detr/modeling_rt_detr.py +4 -0
  514. transformers/models/rt_detr/modeling_rt_detr_resnet.py +8 -3
  515. transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
  516. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +7 -0
  517. transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
  518. transformers/models/rwkv/modeling_rwkv.py +1 -1
  519. transformers/models/sam/configuration_sam.py +1 -0
  520. transformers/models/sam/image_processing_sam_fast.py +0 -1
  521. transformers/models/sam/modeling_sam.py +4 -1
  522. transformers/models/sam2/configuration_sam2.py +1 -1
  523. transformers/models/sam2/modeling_sam2.py +5 -1
  524. transformers/models/sam2/modular_sam2.py +5 -1
  525. transformers/models/sam2_video/modeling_sam2_video.py +51 -43
  526. transformers/models/sam2_video/modular_sam2_video.py +31 -18
  527. transformers/models/sam3/configuration_sam3.py +21 -1
  528. transformers/models/sam3/modeling_sam3.py +23 -0
  529. transformers/models/sam3_tracker/modeling_sam3_tracker.py +2 -0
  530. transformers/models/sam3_tracker/modular_sam3_tracker.py +2 -0
  531. transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
  532. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +26 -15
  533. transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
  534. transformers/models/sam3_video/configuration_sam3_video.py +14 -0
  535. transformers/models/sam3_video/modeling_sam3_video.py +3 -3
  536. transformers/models/sam3_video/processing_sam3_video.py +1 -1
  537. transformers/models/sam_hq/configuration_sam_hq.py +1 -0
  538. transformers/models/sam_hq/modeling_sam_hq.py +26 -23
  539. transformers/models/seamless_m4t/modeling_seamless_m4t.py +27 -11
  540. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +6 -0
  541. transformers/models/seed_oss/modeling_seed_oss.py +1 -1
  542. transformers/models/segformer/image_processing_segformer_fast.py +0 -1
  543. transformers/models/segformer/modeling_segformer.py +2 -2
  544. transformers/models/segformer/modular_segformer.py +0 -1
  545. transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
  546. transformers/models/siglip/modeling_siglip.py +24 -2
  547. transformers/models/siglip2/modeling_siglip2.py +63 -41
  548. transformers/models/smollm3/modeling_smollm3.py +1 -1
  549. transformers/models/smolvlm/modeling_smolvlm.py +5 -1
  550. transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
  551. transformers/models/speech_to_text/modeling_speech_to_text.py +10 -0
  552. transformers/models/speecht5/modeling_speecht5.py +28 -0
  553. transformers/models/splinter/modeling_splinter.py +9 -3
  554. transformers/models/squeezebert/modeling_squeezebert.py +2 -0
  555. transformers/models/stablelm/modeling_stablelm.py +1 -1
  556. transformers/models/starcoder2/modeling_starcoder2.py +1 -1
  557. transformers/models/superglue/image_processing_superglue_fast.py +1 -2
  558. transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
  559. transformers/models/swiftformer/modeling_swiftformer.py +4 -0
  560. transformers/models/swin/modeling_swin.py +16 -12
  561. transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
  562. transformers/models/swin2sr/modeling_swin2sr.py +49 -33
  563. transformers/models/swinv2/modeling_swinv2.py +41 -33
  564. transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
  565. transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
  566. transformers/models/t5/configuration_t5.py +7 -1
  567. transformers/models/t5/modeling_t5.py +1 -7
  568. transformers/models/t5gemma/modeling_t5gemma.py +1 -1
  569. transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
  570. transformers/models/t5gemma2/modeling_t5gemma2.py +13 -4
  571. transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
  572. transformers/models/table_transformer/configuration_table_transformer.py +1 -1
  573. transformers/models/table_transformer/modeling_table_transformer.py +1 -1
  574. transformers/models/textnet/image_processing_textnet_fast.py +0 -1
  575. transformers/models/timesfm/modeling_timesfm.py +12 -0
  576. transformers/models/timesfm/modular_timesfm.py +12 -0
  577. transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
  578. transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
  579. transformers/models/timm_wrapper/modeling_timm_wrapper.py +19 -13
  580. transformers/models/trocr/modeling_trocr.py +1 -2
  581. transformers/models/tvp/configuration_tvp.py +5 -1
  582. transformers/models/tvp/modeling_tvp.py +4 -4
  583. transformers/models/udop/configuration_udop.py +1 -0
  584. transformers/models/udop/modeling_udop.py +3 -7
  585. transformers/models/umt5/configuration_umt5.py +2 -2
  586. transformers/models/umt5/modeling_umt5.py +0 -6
  587. transformers/models/vaultgemma/modeling_vaultgemma.py +1 -1
  588. transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
  589. transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
  590. transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
  591. transformers/models/video_llava/modeling_video_llava.py +7 -3
  592. transformers/models/vilt/configuration_vilt.py +2 -2
  593. transformers/models/vilt/modeling_vilt.py +7 -0
  594. transformers/models/vipllava/modeling_vipllava.py +7 -3
  595. transformers/models/visual_bert/modeling_visual_bert.py +2 -0
  596. transformers/models/vitmatte/configuration_vitmatte.py +1 -1
  597. transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
  598. transformers/models/vitmatte/modeling_vitmatte.py +4 -0
  599. transformers/models/vitpose/configuration_vitpose.py +1 -1
  600. transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
  601. transformers/models/voxtral/modeling_voxtral.py +2 -2
  602. transformers/models/voxtral/modular_voxtral.py +2 -2
  603. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +16 -10
  604. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +7 -0
  605. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +21 -11
  606. transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
  607. transformers/models/whisper/generation_whisper.py +1 -0
  608. transformers/models/whisper/modeling_whisper.py +5 -3
  609. transformers/models/x_clip/modeling_x_clip.py +2 -0
  610. transformers/models/xcodec/modeling_xcodec.py +5 -0
  611. transformers/models/xglm/modeling_xglm.py +10 -0
  612. transformers/models/xlm/modeling_xlm.py +13 -14
  613. transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
  614. transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
  615. transformers/models/xlnet/modeling_xlnet.py +3 -1
  616. transformers/models/xmod/modeling_xmod.py +3 -0
  617. transformers/models/yoso/modeling_yoso.py +4 -1
  618. transformers/models/zamba/modeling_zamba.py +2 -1
  619. transformers/models/zamba2/modeling_zamba2.py +3 -2
  620. transformers/models/zoedepth/configuration_zoedepth.py +1 -1
  621. transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
  622. transformers/models/zoedepth/modeling_zoedepth.py +7 -0
  623. transformers/pipelines/__init__.py +9 -6
  624. transformers/pipelines/automatic_speech_recognition.py +20 -12
  625. transformers/pipelines/base.py +1 -1
  626. transformers/pipelines/document_question_answering.py +1 -1
  627. transformers/pipelines/question_answering.py +1 -1
  628. transformers/pipelines/text_to_audio.py +2 -2
  629. transformers/processing_utils.py +127 -56
  630. transformers/quantizers/auto.py +2 -4
  631. transformers/quantizers/base.py +9 -64
  632. transformers/quantizers/quantizer_aqlm.py +1 -18
  633. transformers/quantizers/quantizer_auto_round.py +1 -10
  634. transformers/quantizers/quantizer_awq.py +3 -8
  635. transformers/quantizers/quantizer_bitnet.py +1 -6
  636. transformers/quantizers/quantizer_bnb_4bit.py +9 -49
  637. transformers/quantizers/quantizer_bnb_8bit.py +9 -19
  638. transformers/quantizers/quantizer_compressed_tensors.py +1 -4
  639. transformers/quantizers/quantizer_eetq.py +2 -12
  640. transformers/quantizers/quantizer_fbgemm_fp8.py +5 -14
  641. transformers/quantizers/quantizer_finegrained_fp8.py +15 -10
  642. transformers/quantizers/quantizer_fp_quant.py +4 -4
  643. transformers/quantizers/quantizer_gptq.py +1 -4
  644. transformers/quantizers/quantizer_higgs.py +2 -6
  645. transformers/quantizers/quantizer_mxfp4.py +2 -28
  646. transformers/quantizers/quantizer_quanto.py +14 -14
  647. transformers/quantizers/quantizer_spqr.py +3 -8
  648. transformers/quantizers/quantizer_torchao.py +28 -124
  649. transformers/quantizers/quantizer_vptq.py +1 -10
  650. transformers/testing_utils.py +28 -12
  651. transformers/tokenization_mistral_common.py +3 -2
  652. transformers/tokenization_utils_base.py +3 -2
  653. transformers/tokenization_utils_tokenizers.py +25 -2
  654. transformers/trainer.py +24 -2
  655. transformers/trainer_callback.py +8 -0
  656. transformers/trainer_seq2seq.py +4 -0
  657. transformers/training_args.py +8 -10
  658. transformers/utils/__init__.py +4 -0
  659. transformers/utils/attention_visualizer.py +4 -4
  660. transformers/utils/auto_docstring.py +34 -25
  661. transformers/utils/generic.py +20 -0
  662. transformers/utils/import_utils.py +51 -9
  663. transformers/utils/kernel_config.py +71 -18
  664. transformers/utils/quantization_config.py +8 -8
  665. transformers/video_processing_utils.py +16 -12
  666. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +5 -6
  667. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +671 -632
  668. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +0 -0
  669. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
  670. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/licenses/LICENSE +0 -0
  671. {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -198,11 +198,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
198
198
  self.type = "seq2seq_whisper"
199
199
  elif model.__class__.__name__ in MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES.values():
200
200
  self.type = "seq2seq"
201
- elif (
202
- feature_extractor._processor_class
203
- and feature_extractor._processor_class.endswith("WithLM")
204
- and decoder is not None
205
- ):
201
+ elif decoder is not None:
206
202
  self.decoder = decoder
207
203
  self.type = "ctc_with_lm"
208
204
  else:
@@ -350,6 +346,20 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
350
346
 
351
347
  return preprocess_params, forward_params, postprocess_params
352
348
 
349
+ @property
350
+ def _align_to(self):
351
+ """Sample stride per output."""
352
+ # XXX: Carefully, this variable will not exist in `seq2seq` setting.
353
+ # Currently chunking is not possible at this level for `seq2seq` so
354
+ # it's ok.
355
+ align_to = getattr(self.model.config, "inputs_to_logits_ratio", 1)
356
+ if self.model.config.model_type == "lasr_ctc":
357
+ # TODO: find a standard for that but not easy because input length -> mel length depends on the feature extractor
358
+ # specific way of doing it
359
+ # means the model take mel features as input, we align according to the hop length
360
+ align_to *= self.feature_extractor.hop_length
361
+ return align_to
362
+
353
363
  def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None):
354
364
  if isinstance(inputs, str):
355
365
  if inputs.startswith("http://") or inputs.startswith("https://"):
@@ -444,10 +454,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
444
454
  if isinstance(stride_length_s, (int, float)):
445
455
  stride_length_s = [stride_length_s, stride_length_s]
446
456
 
447
- # XXX: Carefully, this variable will not exist in `seq2seq` setting.
448
- # Currently chunking is not possible at this level for `seq2seq` so
449
- # it's ok.
450
- align_to = getattr(self.model.config, "inputs_to_logits_ratio", 1)
457
+ align_to = self._align_to
451
458
  chunk_len = int(round(chunk_length_s * self.feature_extractor.sampling_rate / align_to) * align_to)
452
459
  stride_left = int(round(stride_length_s[0] * self.feature_extractor.sampling_rate / align_to) * align_to)
453
460
  stride_right = int(round(stride_length_s[1] * self.feature_extractor.sampling_rate / align_to) * align_to)
@@ -567,7 +574,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
567
574
  # Send stride to `postprocess`.
568
575
  # it needs to be handled there where
569
576
  # the pieces are to be concatenated.
570
- ratio = 1 / self.model.config.inputs_to_logits_ratio
577
+ ratio = 1 / self._align_to
571
578
  if isinstance(stride, tuple):
572
579
  out["stride"] = rescale_stride([stride], ratio)[0]
573
580
  else:
@@ -650,11 +657,12 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
650
657
 
651
658
  if return_timestamps and self.type not in {"seq2seq", "seq2seq_whisper"}:
652
659
  chunks = []
660
+ align_to = self._align_to
653
661
  for item in offsets:
654
- start = item["start_offset"] * self.model.config.inputs_to_logits_ratio
662
+ start = item["start_offset"] * align_to
655
663
  start /= self.feature_extractor.sampling_rate
656
664
 
657
- stop = item["end_offset"] * self.model.config.inputs_to_logits_ratio
665
+ stop = item["end_offset"] * align_to
658
666
  stop /= self.feature_extractor.sampling_rate
659
667
 
660
668
  chunks.append({"text": item[return_timestamps], "timestamp": (start, stop)})
@@ -884,7 +884,7 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
884
884
  # NOTE: _prepare_generation_config creates a deep copy of the generation config before updating it,
885
885
  # and returns all kwargs that were not used to update the generation config
886
886
  prepared_generation_config, kwargs = self.model._prepare_generation_config(
887
- generation_config=default_pipeline_generation_config, use_model_defaults=True, **kwargs
887
+ generation_config=default_pipeline_generation_config, **kwargs
888
888
  )
889
889
  self.generation_config = prepared_generation_config
890
890
  # if the `max_new_tokens` is set to the pipeline default, but `max_length` is set to a non-default
@@ -201,7 +201,7 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
201
201
  postprocess_params["top_k"] = top_k
202
202
  if max_answer_len is not None:
203
203
  if max_answer_len < 1:
204
- raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len}")
204
+ raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len})")
205
205
  postprocess_params["max_answer_len"] = max_answer_len
206
206
  if handle_impossible_answer is not None:
207
207
  postprocess_params["handle_impossible_answer"] = handle_impossible_answer
@@ -328,7 +328,7 @@ class QuestionAnsweringPipeline(ChunkPipeline):
328
328
  postprocess_params["top_k"] = top_k
329
329
  if max_answer_len is not None:
330
330
  if max_answer_len < 1:
331
- raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len}")
331
+ raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len})")
332
332
  postprocess_params["max_answer_len"] = max_answer_len
333
333
  if handle_impossible_answer is not None:
334
334
  postprocess_params["handle_impossible_answer"] = handle_impossible_answer
@@ -117,8 +117,8 @@ class TextToAudioPipeline(Pipeline):
117
117
  else vocoder
118
118
  )
119
119
 
120
- if self.model.config.model_type in ["musicgen"]:
121
- # MusicGen expect to use the tokenizer
120
+ if self.model.config.model_type in ["musicgen", "speecht5"]:
121
+ # MusicGen and SpeechT5 expect to use their tokenizer instead
122
122
  self.processor = None
123
123
 
124
124
  self.sampling_rate = sampling_rate
@@ -129,6 +129,26 @@ MODALITY_TO_BASE_CLASS_MAPPING = {
129
129
  "video_processor": "BaseVideoProcessor",
130
130
  }
131
131
 
132
+
133
+ def _get_modality_for_attribute(attribute_name: str) -> str:
134
+ """
135
+ Get the canonical modality type for a given attribute name.
136
+
137
+ For example:
138
+ - "image_processor" -> "image_processor"
139
+ - "encoder_image_processor" -> "image_processor"
140
+ - "text_tokenizer" -> "tokenizer"
141
+ - "my_feature_extractor" -> "feature_extractor"
142
+ """
143
+ for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys():
144
+ if modality in attribute_name:
145
+ return modality
146
+ raise ValueError(
147
+ f"Cannot determine modality for attribute '{attribute_name}'. "
148
+ f"Attribute name must contain one of: {list(MODALITY_TO_AUTOPROCESSOR_MAPPING.keys())}"
149
+ )
150
+
151
+
132
152
  if sys.version_info >= (3, 11):
133
153
  Unpack = typing.Unpack
134
154
  else:
@@ -663,8 +683,10 @@ class ProcessorMixin(PushToHubMixin):
663
683
  mismatch between expected and actual class, an error is raise. Otherwise, the proper retrieved class
664
684
  is returned.
665
685
  """
666
- if argument_name not in MODALITY_TO_BASE_CLASS_MAPPING and "tokenizer" in argument_name:
667
- argument_name = "tokenizer"
686
+ # If the exact attribute name is not in the mapping, use its canonical modality
687
+ # (e.g., "encoder_tokenizer" -> "tokenizer")
688
+ if argument_name not in MODALITY_TO_BASE_CLASS_MAPPING:
689
+ argument_name = _get_modality_for_attribute(argument_name)
668
690
  class_name = MODALITY_TO_BASE_CLASS_MAPPING.get(argument_name)
669
691
  if isinstance(class_name, tuple):
670
692
  proper_class = tuple(self.get_possibly_dynamic_module(n) for n in class_name if n is not None)
@@ -695,24 +717,17 @@ class ProcessorMixin(PushToHubMixin):
695
717
  # extra attributes to be kept
696
718
  attrs_to_save += ["auto_map"]
697
719
 
720
+ # Remove tokenizers from output - they have their own vocab files and are saved separately.
721
+ # All other sub-processors (image_processor, feature_extractor, etc.) are kept in processor_config.json.
698
722
  for attribute in self.__class__.get_attributes():
699
- if "tokenizer" in attribute and attribute in output:
700
- del output[attribute]
723
+ if attribute in output:
724
+ modality = _get_modality_for_attribute(attribute)
725
+ if modality == "tokenizer":
726
+ del output[attribute]
701
727
 
702
728
  if "chat_template" in output:
703
729
  del output["chat_template"]
704
730
 
705
- def save_public_processor_class(dictionary):
706
- # make sure private name "_processor_class" is correctly
707
- # saved as "processor_class"
708
- _processor_class = dictionary.pop("_processor_class", None)
709
- if _processor_class is not None:
710
- dictionary["processor_class"] = _processor_class
711
- for value in dictionary.values():
712
- if isinstance(value, dict):
713
- save_public_processor_class(value)
714
- return dictionary
715
-
716
731
  def cast_array_to_list(dictionary):
717
732
  """
718
733
  Numpy arrays are not serialiazable but can be in pre-processing dicts.
@@ -743,7 +758,6 @@ class ProcessorMixin(PushToHubMixin):
743
758
  )
744
759
  }
745
760
  output = cast_array_to_list(output)
746
- output = save_public_processor_class(output)
747
761
  output["processor_class"] = self.__class__.__name__
748
762
 
749
763
  return output
@@ -816,16 +830,17 @@ class ProcessorMixin(PushToHubMixin):
816
830
 
817
831
  for attribute_name in self.get_attributes():
818
832
  attribute = getattr(self, attribute_name)
819
- if hasattr(attribute, "_set_processor_class"):
820
- attribute._set_processor_class(self.__class__.__name__)
821
833
 
822
- # Save the tokenizer in its own vocab file. The other attributes are saved as part of `processor_config.json`
823
- if attribute_name == "tokenizer":
824
- attribute.save_pretrained(save_directory)
825
- # if a model has multiple tokenizers, save the additional tokenizers in their own folders.
826
- # Note that the additional tokenizers must have "tokenizer" in their attribute name.
827
- elif "tokenizer" in attribute_name:
828
- attribute.save_pretrained(os.path.join(save_directory, attribute_name))
834
+ modality = _get_modality_for_attribute(attribute_name)
835
+ is_primary = attribute_name == modality
836
+ if modality == "tokenizer":
837
+ attribute._set_processor_class(self.__class__.__name__)
838
+ # Save the tokenizer in its own vocab file. The other attributes are saved as part of `processor_config.json`
839
+ if is_primary:
840
+ attribute.save_pretrained(save_directory)
841
+ else:
842
+ # if a model has multiple tokenizers, save the additional tokenizers in their own folders.
843
+ attribute.save_pretrained(os.path.join(save_directory, attribute_name))
829
844
  elif attribute._auto_class is not None:
830
845
  custom_object_save(attribute, save_directory, config=attribute)
831
846
 
@@ -1393,9 +1408,10 @@ class ProcessorMixin(PushToHubMixin):
1393
1408
  if token is not None:
1394
1409
  kwargs["token"] = token
1395
1410
 
1396
- args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
1397
- processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
1398
- return cls.from_args_and_dict(args, processor_dict, **kwargs)
1411
+ # Get processor_dict first so we can use it to instantiate non-tokenizer sub-processors
1412
+ processor_dict, instantiation_kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
1413
+ args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, processor_dict, **kwargs)
1414
+ return cls.from_args_and_dict(args, processor_dict, **instantiation_kwargs)
1399
1415
 
1400
1416
  @classmethod
1401
1417
  def get_attributes(cls):
@@ -1405,7 +1421,7 @@ class ProcessorMixin(PushToHubMixin):
1405
1421
  # don't treat audio_tokenizer as an attribute
1406
1422
  if sub_processor_type == "audio_tokenizer":
1407
1423
  continue
1408
- if sub_processor_type in MODALITY_TO_AUTOPROCESSOR_MAPPING or "tokenizer" in sub_processor_type:
1424
+ if any(modality in sub_processor_type for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys()):
1409
1425
  attributes.append(sub_processor_type)
1410
1426
 
1411
1427
  # Legacy processors may not override `__init__` and instead expose modality
@@ -1419,7 +1435,7 @@ class ProcessorMixin(PushToHubMixin):
1419
1435
  inferred_attribute = attribute_name[: -len("_class")]
1420
1436
  if inferred_attribute == "audio_tokenizer":
1421
1437
  continue
1422
- if inferred_attribute in MODALITY_TO_AUTOPROCESSOR_MAPPING or "tokenizer" in inferred_attribute:
1438
+ if any(modality in inferred_attribute for modality in MODALITY_TO_AUTOPROCESSOR_MAPPING.keys()):
1423
1439
  attributes.append(inferred_attribute)
1424
1440
 
1425
1441
  return attributes
@@ -1447,49 +1463,104 @@ class ProcessorMixin(PushToHubMixin):
1447
1463
  cls._auto_class = auto_class
1448
1464
 
1449
1465
  @classmethod
1450
- def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
1466
+ def _load_tokenizer_from_pretrained(
1467
+ cls, sub_processor_type, pretrained_model_name_or_path, subfolder="", **kwargs
1468
+ ):
1469
+ auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING["tokenizer"]
1470
+ is_primary = sub_processor_type == "tokenizer"
1471
+
1472
+ if is_primary:
1473
+ # Primary tokenizer: load from root
1474
+ tokenizer = auto_processor_class.from_pretrained(
1475
+ pretrained_model_name_or_path, subfolder=subfolder, **kwargs
1476
+ )
1477
+ else:
1478
+ # Additional tokenizer: load from subfolder (e.g., "decoder_tokenizer")
1479
+ tokenizer_subfolder = os.path.join(subfolder, sub_processor_type) if subfolder else sub_processor_type
1480
+ tokenizer = auto_processor_class.from_pretrained(
1481
+ pretrained_model_name_or_path, subfolder=tokenizer_subfolder, **kwargs
1482
+ )
1483
+ return tokenizer
1484
+
1485
+ @classmethod
1486
+ def _get_arguments_from_pretrained(cls, pretrained_model_name_or_path, processor_dict=None, **kwargs):
1451
1487
  """
1452
1488
  Identify and instantiate the subcomponents of Processor classes, such as image processors, tokenizers,
1453
1489
  and feature extractors. This method inspects the processor's `__init__` signature to identify parameters
1454
1490
  that correspond to known modality types (image_processor, tokenizer, feature_extractor, etc.) or contain
1455
- "tokenizer" in their name. It then uses the appropriate Auto class (AutoImageProcessor, AutoTokenizer, etc.)
1456
- from `MODALITY_TO_AUTOPROCESSOR_MAPPING` to load each subcomponent via `.from_pretrained()`. For tokenizer-like
1457
- parameters not explicitly in the mapping, the method uses AutoTokenizer with a subfolder argument.
1491
+ modality names in their attribute name.
1492
+
1493
+ For tokenizers: Uses the appropriate Auto class (AutoTokenizer) to load via `.from_pretrained()`.
1494
+ Additional tokenizers (e.g., "decoder_tokenizer") are loaded from subfolders.
1495
+
1496
+ For other sub-processors (image_processor, feature_extractor, etc.): Primary ones are loaded via
1497
+ Auto class. Additional ones are instantiated from the config stored in processor_config.json
1498
+ (passed as processor_dict).
1499
+
1500
+ Args:
1501
+ pretrained_model_name_or_path: Path or model id to load from.
1502
+ processor_dict: Optional dict containing processor config (from processor_config.json).
1503
+ Required when loading additional non-tokenizer sub-processors.
1458
1504
  """
1459
1505
  args = []
1506
+ processor_dict = processor_dict if processor_dict is not None else {}
1507
+ # Remove subfolder from kwargs to avoid duplicate keyword arguments
1508
+ subfolder = kwargs.pop("subfolder", "")
1509
+
1460
1510
  # get args from processor init signature
1461
1511
  sub_processors = cls.get_attributes()
1462
1512
  for sub_processor_type in sub_processors:
1463
- if "FuyuProcessor" in cls.__name__ and "tokenizer" in sub_processor_type:
1464
- from .tokenization_utils_tokenizers import TokenizersBackend
1465
-
1466
- tokenizer = TokenizersBackend.from_pretrained(pretrained_model_name_or_path, **kwargs)
1467
- if "token_type_ids" in tokenizer.model_input_names:
1468
- tokenizer.model_input_names.remove("token_type_ids")
1469
- args.append(tokenizer)
1470
- elif "PixtralProcessor" in cls.__name__ and "tokenizer" in sub_processor_type:
1471
- from tokenizers import pre_tokenizers
1513
+ modality = _get_modality_for_attribute(sub_processor_type)
1514
+ is_primary = sub_processor_type == modality
1472
1515
 
1473
- from .models.llama import LlamaTokenizer
1516
+ if (
1517
+ "tokenizer" in sub_processor_type
1518
+ ): # This is only necessary for the checkpoing in test_procesing_mistral3.py which has no config.json and
1519
+ # the tokenizer_config.json references LlamaTokenizerFast. TODO: update the config on the hub.
1520
+ if "PixtralProcessor" in cls.__name__:
1521
+ from .tokenization_utils_tokenizers import TokenizersBackend
1474
1522
 
1475
- tokenizer = LlamaTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
1476
- tokenizer._tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
1477
- [pre_tokenizers.ByteLevel(False), tokenizer._tokenizer.pre_tokenizer]
1478
- )
1523
+ tokenizer = TokenizersBackend.from_pretrained(pretrained_model_name_or_path, **kwargs)
1524
+ else:
1525
+ tokenizer = cls._load_tokenizer_from_pretrained(
1526
+ sub_processor_type, pretrained_model_name_or_path, subfolder=subfolder, **kwargs
1527
+ )
1479
1528
  args.append(tokenizer)
1480
- elif sub_processor_type in MODALITY_TO_AUTOPROCESSOR_MAPPING:
1529
+ elif is_primary:
1530
+ # Primary non-tokenizer sub-processor: load via Auto class
1481
1531
  auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING[sub_processor_type]
1482
- sub_processor = auto_processor_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
1483
- args.append(sub_processor)
1484
- elif "tokenizer" in sub_processor_type:
1485
- # Special case: tokenizer-like parameters not in the mapping (e.g., "protein_tokenizer")
1486
- # Load using AutoTokenizer with subfolder
1487
- auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING["tokenizer"]
1488
1532
  sub_processor = auto_processor_class.from_pretrained(
1489
- pretrained_model_name_or_path, subfolder=sub_processor_type, **kwargs
1533
+ pretrained_model_name_or_path, subfolder=subfolder, **kwargs
1490
1534
  )
1491
1535
  args.append(sub_processor)
1492
1536
 
1537
+ elif sub_processor_type in processor_dict:
1538
+ # Additional non-tokenizer sub-processor: instantiate from config in processor_dict
1539
+ sub_processor_config = processor_dict[sub_processor_type]
1540
+ if isinstance(sub_processor_config, dict):
1541
+ # Determine the class to instantiate
1542
+ # Image processors have 'image_processor_type', feature extractors have 'feature_extractor_type'
1543
+ type_key = f"{modality}_type"
1544
+ class_name = sub_processor_config.get(type_key)
1545
+ if class_name is None:
1546
+ raise ValueError(
1547
+ f"Cannot instantiate {sub_processor_type}: missing '{type_key}' in config. "
1548
+ f"Config keys: {list(sub_processor_config.keys())}"
1549
+ )
1550
+ processor_class = cls.get_possibly_dynamic_module(class_name)
1551
+ sub_processor = processor_class(**sub_processor_config)
1552
+ args.append(sub_processor)
1553
+ else:
1554
+ raise ValueError(
1555
+ f"Expected dict for {sub_processor_type} in processor_config.json, "
1556
+ f"got {type(sub_processor_config)}"
1557
+ )
1558
+ else:
1559
+ raise ValueError(
1560
+ f"Cannot find config for {sub_processor_type} in processor_config.json. "
1561
+ f"Available keys: {list(processor_dict.keys())}"
1562
+ )
1563
+
1493
1564
  return args
1494
1565
 
1495
1566
  @staticmethod
@@ -302,7 +302,7 @@ def register_quantizer(name: str):
302
302
  return register_quantizer_fn
303
303
 
304
304
 
305
- def get_hf_quantizer(config, quantization_config, dtype, device_map, weights_only, user_agent):
305
+ def get_hf_quantizer(config, quantization_config, device_map, weights_only, user_agent):
306
306
  pre_quantized = hasattr(config, "quantization_config")
307
307
  if pre_quantized and not AutoHfQuantizer.supports_quant_method(config.quantization_config):
308
308
  pre_quantized = False
@@ -324,11 +324,9 @@ def get_hf_quantizer(config, quantization_config, dtype, device_map, weights_onl
324
324
 
325
325
  if hf_quantizer is not None:
326
326
  hf_quantizer.validate_environment(
327
- dtype=dtype,
328
327
  device_map=device_map,
329
328
  weights_only=weights_only,
330
329
  )
331
- dtype = hf_quantizer.update_dtype(dtype)
332
330
  device_map = hf_quantizer.update_device_map(device_map)
333
331
  config = hf_quantizer.update_tp_plan(config)
334
332
  config = hf_quantizer.update_ep_plan(config)
@@ -337,4 +335,4 @@ def get_hf_quantizer(config, quantization_config, dtype, device_map, weights_onl
337
335
  if not getattr(hf_quantizer.quantization_config, "dequantize", False):
338
336
  quant_method = hf_quantizer.quantization_config.quant_method
339
337
  user_agent["quant"] = getattr(quant_method, "value", quant_method)
340
- return hf_quantizer, config, dtype, device_map
338
+ return hf_quantizer, config, device_map
@@ -31,16 +31,6 @@ else:
31
31
  logger = logging.get_logger(__file__)
32
32
 
33
33
 
34
- def _assign_original_dtype(module, original_dtype):
35
- # not very nice in a recursive function but it avoids a circular import
36
- from ..modeling_utils import PreTrainedModel
37
-
38
- for child in module.children():
39
- if isinstance(child, PreTrainedModel):
40
- child.config._pre_quantization_dtype = original_dtype
41
- _assign_original_dtype(child, original_dtype)
42
-
43
-
44
34
  def get_keys_to_not_convert(model) -> list:
45
35
  r"""
46
36
  Function to automatically detect keys to not convert for usage like quantization. For example for CausalLM modules
@@ -118,33 +108,7 @@ class HfQuantizer(ABC):
118
108
  """
119
109
  return device_map
120
110
 
121
- def adjust_target_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
122
- """
123
- Override this method if you want to adjust the `target_dtype` variable used in `from_pretrained`
124
- to compute the device_map in case the device_map is a `str`. E.g. for bitsandbytes we force-set `target_dtype`
125
- to `torch.int8` and for 4-bit we pass a custom enum `accelerate.CustomDtype.int4`.
126
-
127
- Args:
128
- dtype (`torch.dtype`, *optional*):
129
- The dtype that is used to compute the device_map.
130
- """
131
- return dtype
132
-
133
111
  def param_element_size(self, model: "PreTrainedModel", param_name: str, param: "torch.Tensor") -> float:
134
- "Return the element size (in bytes) for `param_name`."
135
-
136
- if self.param_needs_quantization(model, param_name):
137
- from accelerate.utils import CustomDtype
138
-
139
- mapping = {
140
- torch.int8: 1,
141
- CustomDtype.INT4: 0.5,
142
- CustomDtype.FP8: 1,
143
- CustomDtype.INT2: 0.25,
144
- }
145
- # The value passed is actually not used when the method is overridden
146
- if (custom_dtype := self.adjust_target_dtype(torch.float16)) in mapping:
147
- return mapping[custom_dtype]
148
112
  return param.element_size()
149
113
 
150
114
  def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]:
@@ -176,7 +140,7 @@ class HfQuantizer(ABC):
176
140
  def _process_model_before_weight_loading(self, model, **kwargs):
177
141
  return model
178
142
 
179
- def preprocess_model(self, model: "PreTrainedModel", config, dtype=None, checkpoint_files=None, **kwargs):
143
+ def preprocess_model(self, model: "PreTrainedModel", dtype=None, **kwargs):
180
144
  """
181
145
  Setting model attributes and/or converting model before weights loading. At this point
182
146
  the model should be initialized on the meta device so you can freely manipulate the skeleton
@@ -194,14 +158,6 @@ class HfQuantizer(ABC):
194
158
  self._convert_model_for_quantization(model)
195
159
  self._process_model_before_weight_loading(model, **kwargs)
196
160
 
197
- # We store the original dtype for quantized models as we cannot easily retrieve it
198
- # once the weights have been quantized
199
- # Note that once you have loaded a quantized model, you can't change its dtype so this will
200
- # remain a single source of truth
201
- original_dtype = dtype if dtype is not None else torch.get_default_dtype()
202
- config._pre_quantization_dtype = original_dtype
203
- _assign_original_dtype(model, original_dtype)
204
-
205
161
  def _process_model_after_weight_loading(self, model: "PreTrainedModel", **kwargs):
206
162
  return model
207
163
 
@@ -231,34 +187,25 @@ class HfQuantizer(ABC):
231
187
  del model.hf_quantizer
232
188
  if hasattr(model.config, "quantization_config"):
233
189
  del model.config.quantization_config
234
- if hasattr(model.config, "_pre_quantization_dtype"):
235
- del model.config._pre_quantization_dtype
236
190
  if hasattr(model, "quantization_method"):
237
191
  del model.quantization_method
238
192
  model.is_quantized = False
239
193
 
240
- def dequantize(self, model):
194
+ def dequantize(self, model, dtype=None):
241
195
  """
242
196
  Potentially dequantize the model to retrieve the original model, with some loss in accuracy / performance.
243
197
  Note not all quantization schemes support this.
244
198
  """
245
- model = self._dequantize(model)
199
+ if dtype is None:
200
+ # using the same dtype we used to load the model. If we don't do that, we might have issues with modules we didn't quantize.
201
+ # or we need to upcast everything to the same dtype
202
+ dtype = model.config.dtype
203
+ model = self._dequantize(model, dtype=dtype)
246
204
  self.remove_quantization_config(model)
247
205
 
248
206
  return model
249
207
 
250
- def get_accelerator_warm_up_factor(self):
251
- """
252
- The factor to be used in `caching_allocator_warmup` to get the number of bytes to pre-allocate to warm up accelerator.
253
- A factor of 2 means we allocate all bytes in the empty model (since we allocate in fp16), a factor of 4 means
254
- we allocate half the memory of the weights residing in the empty model, etc...
255
- """
256
- # By default we return 4, i.e. half the model size (this corresponds to the case where the model is not
257
- # really pre-processed, i.e. we do not have the info that weights are going to be 8 bits before actual
258
- # weight loading)
259
- return 4
260
-
261
- def _dequantize(self, model):
208
+ def _dequantize(self, model, dtype=None):
262
209
  raise NotImplementedError(
263
210
  f"{self.quantization_config.quant_method} has no implementation of `dequantize`, please raise an issue on GitHub."
264
211
  )
@@ -313,15 +260,13 @@ class HfQuantizer(ABC):
313
260
  def is_trainable(self): ...
314
261
 
315
262
  def _convert_model_for_quantization(self, model):
316
- from accelerate import init_empty_weights
317
-
318
263
  for name, module in model.named_modules():
319
264
  module_class_name = module.__class__.__name__
320
265
  if module_class_name in MODULES_TO_PATCH_FOR_QUANTIZATION and (
321
266
  self.quantization_config.quant_method
322
267
  in MODULES_TO_PATCH_FOR_QUANTIZATION[module_class_name]["quantization_methods"]
323
268
  ):
324
- with init_empty_weights():
269
+ with torch.device("meta"):
325
270
  parent_module, name = get_module_from_name(model, name)
326
271
  parent_module._modules[name] = MODULES_TO_PATCH_FOR_QUANTIZATION[module_class_name]["module_name"](
327
272
  model.config.get_text_config()
@@ -23,13 +23,10 @@ if TYPE_CHECKING:
23
23
  from ..modeling_utils import PreTrainedModel
24
24
 
25
25
  from ..integrations import replace_with_aqlm_linear
26
- from ..utils import is_accelerate_available, is_aqlm_available, is_torch_available, logging
26
+ from ..utils import is_accelerate_available, is_aqlm_available, logging
27
27
  from ..utils.quantization_config import QuantizationConfigMixin
28
28
 
29
29
 
30
- if is_torch_available():
31
- import torch
32
-
33
30
  logger = logging.get_logger(__name__)
34
31
 
35
32
 
@@ -50,20 +47,6 @@ class AqlmHfQuantizer(HfQuantizer):
50
47
  if not is_aqlm_available():
51
48
  raise ImportError("Using `aqlm` quantization requires AQLM: `pip install aqlm[gpu,cpu]`")
52
49
 
53
- def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
54
- if dtype is None:
55
- if torch.cuda.is_available():
56
- dtype = torch.float16
57
- logger.info(
58
- "CUDA available. Assuming AQLM inference on GPU and loading the model in `torch.float16`. To overwrite it, set `dtype` manually."
59
- )
60
- else:
61
- dtype = torch.float32
62
- logger.info(
63
- "CUDA is unavailable. Assuming AQLM inference on CPU and loading the model in `torch.float32`. To overwrite it, set `dtype` manually."
64
- )
65
- return dtype
66
-
67
50
  def _process_model_before_weight_loading(
68
51
  self,
69
52
  model: "PreTrainedModel",
@@ -19,13 +19,10 @@ from .base import HfQuantizer
19
19
  if TYPE_CHECKING:
20
20
  from ..modeling_utils import PreTrainedModel
21
21
 
22
- from ..utils import is_auto_round_available, is_torch_available, logging
22
+ from ..utils import is_auto_round_available, logging
23
23
  from ..utils.quantization_config import QuantizationConfigMixin
24
24
 
25
25
 
26
- if is_torch_available():
27
- import torch
28
-
29
26
  logger = logging.get_logger(__name__)
30
27
 
31
28
 
@@ -47,12 +44,6 @@ class AutoRoundQuantizer(HfQuantizer):
47
44
  "Loading an AutoRound quantized model requires auto-round library (`pip install 'auto-round>=0.5'`)"
48
45
  )
49
46
 
50
- def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
51
- if dtype is None:
52
- dtype = torch.bfloat16
53
- logger.info("Loading the model in `torch.bfloat16`. To overwrite it, set `dtype` manually.")
54
- return dtype
55
-
56
47
  def _process_model_before_weight_loading(self, model: "PreTrainedModel", **kwargs):
57
48
  if model.__class__.main_input_name != "input_ids":
58
49
  logger.warning("AutoRound offers only limited support for models that are not strictly text-based.")
@@ -53,10 +53,7 @@ class AwqQuantizer(HfQuantizer):
53
53
  raise ImportError("Loading an AWQ quantized model requires accelerate (`pip install accelerate`)")
54
54
 
55
55
  def update_dtype(self, dtype):
56
- if dtype is None:
57
- dtype = torch.float16
58
- logger.info("Loading the model in `torch.float16`. To overwrite it, set `dtype` manually.")
59
- elif dtype == torch.bfloat16 and (torch.cuda.is_available() or torch.xpu.is_available()):
56
+ if dtype == torch.bfloat16 and (torch.cuda.is_available() or torch.xpu.is_available()):
60
57
  logger.warning(
61
58
  "`torch.bfloat16` is not supported for AWQ CUDA/XPU kernels yet. Casting to `torch.float16`."
62
59
  )
@@ -65,13 +62,11 @@ class AwqQuantizer(HfQuantizer):
65
62
  logger.warning("We suggest you to set `dtype=torch.float16` for better efficiency on CUDA/XPU with AWQ.")
66
63
  return dtype
67
64
 
68
- def _process_model_before_weight_loading(
69
- self, model: "PreTrainedModel", keep_in_fp32_modules: list[str] | None = None, **kwargs
70
- ):
65
+ def _process_model_before_weight_loading(self, model: "PreTrainedModel", **kwargs):
71
66
  from ..integrations import replace_quantization_scales, replace_with_awq_linear
72
67
 
73
68
  self.modules_to_not_convert = self.get_modules_to_not_convert(
74
- model, self.quantization_config.modules_to_not_convert, keep_in_fp32_modules, add_default_skips=True
69
+ model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules, add_default_skips=True
75
70
  )
76
71
 
77
72
  model = replace_with_awq_linear(