transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. transformers/__init__.py +30 -3
  2. transformers/cli/serve.py +47 -17
  3. transformers/conversion_mapping.py +15 -2
  4. transformers/convert_slow_tokenizer.py +225 -10
  5. transformers/core_model_loading.py +196 -135
  6. transformers/data/data_collator.py +12 -4
  7. transformers/dependency_versions_table.py +1 -2
  8. transformers/dynamic_module_utils.py +1 -2
  9. transformers/feature_extraction_utils.py +1 -2
  10. transformers/file_utils.py +0 -1
  11. transformers/generation/__init__.py +11 -1
  12. transformers/generation/configuration_utils.py +3 -2
  13. transformers/generation/continuous_batching/__init__.py +4 -0
  14. transformers/generation/continuous_batching/continuous_api.py +134 -79
  15. transformers/image_processing_base.py +1 -2
  16. transformers/integrations/__init__.py +4 -2
  17. transformers/integrations/accelerate.py +15 -3
  18. transformers/integrations/aqlm.py +38 -66
  19. transformers/integrations/awq.py +48 -514
  20. transformers/integrations/bitnet.py +45 -100
  21. transformers/integrations/bitsandbytes.py +79 -191
  22. transformers/integrations/deepspeed.py +1 -0
  23. transformers/integrations/eetq.py +84 -79
  24. transformers/integrations/fbgemm_fp8.py +191 -145
  25. transformers/integrations/finegrained_fp8.py +236 -193
  26. transformers/integrations/fp_quant.py +92 -0
  27. transformers/integrations/ggml.py +11 -1
  28. transformers/integrations/higgs.py +40 -62
  29. transformers/integrations/hub_kernels.py +42 -3
  30. transformers/integrations/integration_utils.py +10 -0
  31. transformers/integrations/mxfp4.py +25 -65
  32. transformers/integrations/peft.py +7 -29
  33. transformers/integrations/quanto.py +73 -55
  34. transformers/integrations/quark.py +55 -0
  35. transformers/integrations/spqr.py +44 -90
  36. transformers/integrations/torchao.py +32 -38
  37. transformers/integrations/vptq.py +42 -59
  38. transformers/modelcard.py +1 -2
  39. transformers/modeling_gguf_pytorch_utils.py +8 -0
  40. transformers/modeling_rope_utils.py +30 -6
  41. transformers/modeling_utils.py +116 -112
  42. transformers/models/__init__.py +3 -0
  43. transformers/models/afmoe/modeling_afmoe.py +4 -4
  44. transformers/models/albert/tokenization_albert.py +6 -12
  45. transformers/models/align/modeling_align.py +2 -0
  46. transformers/models/altclip/modeling_altclip.py +4 -0
  47. transformers/models/apertus/modeling_apertus.py +4 -4
  48. transformers/models/arcee/modeling_arcee.py +4 -4
  49. transformers/models/aria/modeling_aria.py +4 -4
  50. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  51. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  52. transformers/models/auto/configuration_auto.py +11 -0
  53. transformers/models/auto/feature_extraction_auto.py +2 -0
  54. transformers/models/auto/image_processing_auto.py +1 -0
  55. transformers/models/auto/modeling_auto.py +6 -0
  56. transformers/models/auto/processing_auto.py +18 -10
  57. transformers/models/auto/tokenization_auto.py +74 -472
  58. transformers/models/autoformer/modeling_autoformer.py +4 -0
  59. transformers/models/bamba/modeling_bamba.py +4 -3
  60. transformers/models/bark/modeling_bark.py +2 -0
  61. transformers/models/bart/modeling_bart.py +7 -0
  62. transformers/models/barthez/tokenization_barthez.py +5 -10
  63. transformers/models/beit/modeling_beit.py +6 -1
  64. transformers/models/bert/tokenization_bert.py +8 -21
  65. transformers/models/big_bird/modeling_big_bird.py +6 -0
  66. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  67. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
  68. transformers/models/biogpt/modeling_biogpt.py +2 -0
  69. transformers/models/biogpt/modular_biogpt.py +2 -0
  70. transformers/models/bit/modeling_bit.py +11 -2
  71. transformers/models/bitnet/modeling_bitnet.py +4 -4
  72. transformers/models/blenderbot/modeling_blenderbot.py +5 -0
  73. transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
  74. transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
  75. transformers/models/blip/modeling_blip_text.py +2 -0
  76. transformers/models/blip_2/modeling_blip_2.py +2 -1
  77. transformers/models/bloom/modeling_bloom.py +4 -0
  78. transformers/models/blt/modeling_blt.py +2 -2
  79. transformers/models/blt/modular_blt.py +2 -2
  80. transformers/models/bridgetower/modeling_bridgetower.py +5 -1
  81. transformers/models/bros/modeling_bros.py +4 -0
  82. transformers/models/camembert/tokenization_camembert.py +8 -12
  83. transformers/models/canine/modeling_canine.py +5 -0
  84. transformers/models/chameleon/modeling_chameleon.py +2 -1
  85. transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
  86. transformers/models/clap/modeling_clap.py +5 -0
  87. transformers/models/clip/tokenization_clip.py +22 -44
  88. transformers/models/clipseg/modeling_clipseg.py +5 -0
  89. transformers/models/clvp/modeling_clvp.py +5 -0
  90. transformers/models/clvp/tokenization_clvp.py +1 -63
  91. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  92. transformers/models/codegen/tokenization_codegen.py +14 -43
  93. transformers/models/cohere/modeling_cohere.py +4 -3
  94. transformers/models/cohere/modular_cohere.py +2 -1
  95. transformers/models/cohere/tokenization_cohere.py +12 -42
  96. transformers/models/cohere2/modeling_cohere2.py +7 -6
  97. transformers/models/cohere2/modular_cohere2.py +5 -5
  98. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
  99. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  100. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  101. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  102. transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
  103. transformers/models/convbert/modeling_convbert.py +6 -0
  104. transformers/models/convnext/modeling_convnext.py +2 -4
  105. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  106. transformers/models/csm/modeling_csm.py +4 -3
  107. transformers/models/ctrl/modeling_ctrl.py +1 -0
  108. transformers/models/cvt/modeling_cvt.py +2 -0
  109. transformers/models/cwm/modeling_cwm.py +4 -4
  110. transformers/models/d_fine/modeling_d_fine.py +2 -0
  111. transformers/models/d_fine/modular_d_fine.py +1 -0
  112. transformers/models/dab_detr/modeling_dab_detr.py +4 -0
  113. transformers/models/dac/modeling_dac.py +2 -2
  114. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  115. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  116. transformers/models/dbrx/modeling_dbrx.py +2 -2
  117. transformers/models/deberta/modeling_deberta.py +5 -0
  118. transformers/models/deberta/tokenization_deberta.py +11 -20
  119. transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
  120. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  121. transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
  122. transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
  123. transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
  124. transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
  125. transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
  126. transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
  127. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  128. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  129. transformers/models/detr/modeling_detr.py +5 -0
  130. transformers/models/dia/modeling_dia.py +4 -3
  131. transformers/models/dia/modular_dia.py +0 -1
  132. transformers/models/diffllama/modeling_diffllama.py +2 -2
  133. transformers/models/dinat/modeling_dinat.py +3 -0
  134. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  135. transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
  136. transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
  137. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  138. transformers/models/doge/modeling_doge.py +2 -3
  139. transformers/models/doge/modular_doge.py +0 -1
  140. transformers/models/donut/modeling_donut_swin.py +2 -0
  141. transformers/models/dots1/modeling_dots1.py +10 -7
  142. transformers/models/dots1/modular_dots1.py +5 -3
  143. transformers/models/dpr/modeling_dpr.py +5 -0
  144. transformers/models/dpr/tokenization_dpr.py +12 -0
  145. transformers/models/edgetam/modeling_edgetam.py +1 -1
  146. transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
  147. transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
  148. transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
  149. transformers/models/efficientnet/modeling_efficientnet.py +2 -0
  150. transformers/models/emu3/modeling_emu3.py +4 -4
  151. transformers/models/eomt/image_processing_eomt.py +13 -1
  152. transformers/models/eomt/image_processing_eomt_fast.py +14 -2
  153. transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
  154. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  155. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
  156. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
  157. transformers/models/esm/modeling_esmfold.py +5 -4
  158. transformers/models/evolla/modeling_evolla.py +4 -4
  159. transformers/models/exaone4/modeling_exaone4.py +2 -2
  160. transformers/models/exaone4/modular_exaone4.py +0 -1
  161. transformers/models/falcon/modeling_falcon.py +6 -1
  162. transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
  163. transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
  164. transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
  165. transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
  166. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  167. transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
  168. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  169. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
  170. transformers/models/flaubert/modeling_flaubert.py +7 -0
  171. transformers/models/flava/modeling_flava.py +6 -1
  172. transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
  173. transformers/models/florence2/modeling_florence2.py +2 -1
  174. transformers/models/florence2/modular_florence2.py +2 -1
  175. transformers/models/fnet/modeling_fnet.py +7 -0
  176. transformers/models/focalnet/modeling_focalnet.py +4 -0
  177. transformers/models/fsmt/modeling_fsmt.py +2 -0
  178. transformers/models/funnel/modeling_funnel.py +8 -0
  179. transformers/models/funnel/tokenization_funnel.py +17 -24
  180. transformers/models/fuyu/processing_fuyu.py +3 -3
  181. transformers/models/gemma/modeling_gemma.py +4 -4
  182. transformers/models/gemma/tokenization_gemma.py +10 -27
  183. transformers/models/gemma2/modeling_gemma2.py +4 -4
  184. transformers/models/gemma2/modular_gemma2.py +2 -1
  185. transformers/models/gemma3/modeling_gemma3.py +14 -84
  186. transformers/models/gemma3/modular_gemma3.py +12 -81
  187. transformers/models/gemma3n/modeling_gemma3n.py +18 -209
  188. transformers/models/gemma3n/modular_gemma3n.py +17 -59
  189. transformers/models/git/modeling_git.py +2 -0
  190. transformers/models/glm/modeling_glm.py +4 -4
  191. transformers/models/glm4/modeling_glm4.py +4 -4
  192. transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
  193. transformers/models/glm4v/configuration_glm4v.py +3 -1
  194. transformers/models/glm4v/modeling_glm4v.py +3 -3
  195. transformers/models/glm4v/modular_glm4v.py +6 -4
  196. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  197. transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
  198. transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
  199. transformers/models/glpn/modeling_glpn.py +2 -0
  200. transformers/models/gpt2/modeling_gpt2.py +5 -1
  201. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  202. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
  203. transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
  204. transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
  205. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  206. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  207. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
  208. transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
  209. transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
  210. transformers/models/gptj/modeling_gptj.py +3 -0
  211. transformers/models/granite/modeling_granite.py +4 -4
  212. transformers/models/granitemoe/modeling_granitemoe.py +4 -6
  213. transformers/models/granitemoe/modular_granitemoe.py +0 -2
  214. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
  215. transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
  216. transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
  217. transformers/models/groupvit/modeling_groupvit.py +3 -0
  218. transformers/models/helium/modeling_helium.py +4 -3
  219. transformers/models/herbert/tokenization_herbert.py +9 -25
  220. transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
  221. transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
  222. transformers/models/hiera/modeling_hiera.py +4 -0
  223. transformers/models/hubert/modeling_hubert.py +3 -0
  224. transformers/models/hubert/modular_hubert.py +1 -0
  225. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
  226. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
  227. transformers/models/ibert/modeling_ibert.py +6 -0
  228. transformers/models/idefics/modeling_idefics.py +5 -21
  229. transformers/models/imagegpt/modeling_imagegpt.py +2 -1
  230. transformers/models/informer/modeling_informer.py +4 -0
  231. transformers/models/informer/modular_informer.py +1 -0
  232. transformers/models/internvl/modeling_internvl.py +2 -4
  233. transformers/models/internvl/modular_internvl.py +2 -4
  234. transformers/models/jamba/modeling_jamba.py +2 -2
  235. transformers/models/janus/modeling_janus.py +1 -0
  236. transformers/models/janus/modular_janus.py +1 -0
  237. transformers/models/jetmoe/modeling_jetmoe.py +2 -2
  238. transformers/models/kosmos2/modeling_kosmos2.py +1 -0
  239. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
  240. transformers/models/lasr/__init__.py +29 -0
  241. transformers/models/lasr/configuration_lasr.py +244 -0
  242. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  243. transformers/models/lasr/modeling_lasr.py +729 -0
  244. transformers/models/lasr/modular_lasr.py +569 -0
  245. transformers/models/lasr/processing_lasr.py +96 -0
  246. transformers/models/lasr/tokenization_lasr.py +186 -0
  247. transformers/models/layoutlm/modeling_layoutlm.py +5 -0
  248. transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
  249. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
  250. transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
  251. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  252. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  253. transformers/models/led/modeling_led.py +6 -0
  254. transformers/models/levit/modeling_levit.py +3 -0
  255. transformers/models/lfm2/modeling_lfm2.py +4 -5
  256. transformers/models/lfm2/modular_lfm2.py +0 -1
  257. transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
  258. transformers/models/lightglue/modeling_lightglue.py +3 -1
  259. transformers/models/lightglue/modular_lightglue.py +1 -0
  260. transformers/models/lilt/modeling_lilt.py +4 -0
  261. transformers/models/llama/modeling_llama.py +4 -4
  262. transformers/models/llama/tokenization_llama.py +15 -43
  263. transformers/models/llama4/modeling_llama4.py +3 -2
  264. transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
  265. transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
  266. transformers/models/longformer/modeling_longformer.py +6 -0
  267. transformers/models/longt5/modeling_longt5.py +4 -0
  268. transformers/models/luke/modeling_luke.py +9 -0
  269. transformers/models/luke/tokenization_luke.py +11 -38
  270. transformers/models/lxmert/modeling_lxmert.py +2 -0
  271. transformers/models/m2m_100/modeling_m2m_100.py +4 -0
  272. transformers/models/mamba/modeling_mamba.py +14 -22
  273. transformers/models/marian/modeling_marian.py +5 -0
  274. transformers/models/markuplm/modeling_markuplm.py +4 -0
  275. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  276. transformers/models/mask2former/modeling_mask2former.py +2 -0
  277. transformers/models/maskformer/modeling_maskformer.py +2 -0
  278. transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
  279. transformers/models/mbart/modeling_mbart.py +7 -0
  280. transformers/models/mbart/tokenization_mbart.py +11 -52
  281. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  282. transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
  283. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  284. transformers/models/mimi/modeling_mimi.py +3 -1
  285. transformers/models/minimax/modeling_minimax.py +4 -4
  286. transformers/models/ministral/modeling_ministral.py +4 -4
  287. transformers/models/ministral3/configuration_ministral3.py +1 -1
  288. transformers/models/ministral3/modeling_ministral3.py +4 -3
  289. transformers/models/mistral/modeling_mistral.py +4 -3
  290. transformers/models/mixtral/modeling_mixtral.py +4 -4
  291. transformers/models/mllama/modeling_mllama.py +2 -2
  292. transformers/models/mluke/tokenization_mluke.py +6 -6
  293. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
  294. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  295. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  296. transformers/models/mobilevit/modeling_mobilevit.py +3 -0
  297. transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
  298. transformers/models/modernbert/modeling_modernbert.py +4 -1
  299. transformers/models/modernbert/modular_modernbert.py +2 -0
  300. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
  301. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
  302. transformers/models/moonshine/modeling_moonshine.py +4 -2
  303. transformers/models/moshi/modeling_moshi.py +5 -2
  304. transformers/models/mpnet/modeling_mpnet.py +5 -0
  305. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  306. transformers/models/mpt/modeling_mpt.py +2 -0
  307. transformers/models/mra/modeling_mra.py +6 -0
  308. transformers/models/mt5/modeling_mt5.py +7 -0
  309. transformers/models/musicgen/modeling_musicgen.py +2 -0
  310. transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
  311. transformers/models/mvp/modeling_mvp.py +7 -0
  312. transformers/models/nanochat/modeling_nanochat.py +4 -4
  313. transformers/models/nemotron/modeling_nemotron.py +4 -2
  314. transformers/models/nllb/tokenization_nllb.py +8 -22
  315. transformers/models/nougat/tokenization_nougat.py +11 -59
  316. transformers/models/nystromformer/modeling_nystromformer.py +6 -0
  317. transformers/models/olmo/modeling_olmo.py +4 -4
  318. transformers/models/olmo/modular_olmo.py +2 -2
  319. transformers/models/olmo2/modeling_olmo2.py +4 -5
  320. transformers/models/olmo2/modular_olmo2.py +0 -1
  321. transformers/models/olmo3/modeling_olmo3.py +4 -4
  322. transformers/models/olmoe/modeling_olmoe.py +4 -4
  323. transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
  324. transformers/models/oneformer/modeling_oneformer.py +4 -1
  325. transformers/models/openai/modeling_openai.py +3 -0
  326. transformers/models/openai/tokenization_openai.py +10 -46
  327. transformers/models/opt/modeling_opt.py +2 -0
  328. transformers/models/owlv2/modeling_owlv2.py +4 -0
  329. transformers/models/owlvit/modeling_owlvit.py +4 -0
  330. transformers/models/paddleocr_vl/__init__.py +32 -0
  331. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  332. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
  333. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  334. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
  335. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
  336. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  337. transformers/models/parakeet/configuration_parakeet.py +4 -6
  338. transformers/models/parakeet/modeling_parakeet.py +9 -6
  339. transformers/models/parakeet/modular_parakeet.py +2 -2
  340. transformers/models/parakeet/processing_parakeet.py +1 -0
  341. transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
  342. transformers/models/patchtst/modeling_patchtst.py +20 -2
  343. transformers/models/pegasus/modeling_pegasus.py +5 -0
  344. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  345. transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
  346. transformers/models/perceiver/modeling_perceiver.py +8 -0
  347. transformers/models/persimmon/modeling_persimmon.py +2 -1
  348. transformers/models/phi/modeling_phi.py +4 -5
  349. transformers/models/phi/modular_phi.py +0 -1
  350. transformers/models/phi3/modeling_phi3.py +2 -1
  351. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
  352. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
  353. transformers/models/phimoe/modeling_phimoe.py +4 -4
  354. transformers/models/phimoe/modular_phimoe.py +2 -2
  355. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  356. transformers/models/pixtral/modeling_pixtral.py +2 -1
  357. transformers/models/plbart/modeling_plbart.py +6 -0
  358. transformers/models/plbart/modular_plbart.py +2 -0
  359. transformers/models/plbart/tokenization_plbart.py +0 -2
  360. transformers/models/poolformer/modeling_poolformer.py +2 -0
  361. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  362. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  363. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  364. transformers/models/prophetnet/modeling_prophetnet.py +3 -0
  365. transformers/models/pvt/modeling_pvt.py +2 -0
  366. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  367. transformers/models/qwen2/modeling_qwen2.py +4 -4
  368. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  369. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  370. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
  371. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
  372. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  373. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
  374. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
  375. transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
  376. transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
  377. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  378. transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
  379. transformers/models/qwen3/modeling_qwen3.py +4 -4
  380. transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
  381. transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
  382. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
  383. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
  384. transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
  385. transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
  386. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
  387. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
  388. transformers/models/rag/modeling_rag.py +1 -0
  389. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
  390. transformers/models/reformer/modeling_reformer.py +4 -0
  391. transformers/models/reformer/tokenization_reformer.py +11 -28
  392. transformers/models/regnet/modeling_regnet.py +6 -1
  393. transformers/models/rembert/modeling_rembert.py +6 -0
  394. transformers/models/rembert/tokenization_rembert.py +3 -10
  395. transformers/models/resnet/modeling_resnet.py +11 -2
  396. transformers/models/roberta/tokenization_roberta.py +18 -27
  397. transformers/models/roformer/modeling_roformer.py +6 -0
  398. transformers/models/roformer/tokenization_roformer.py +77 -412
  399. transformers/models/rt_detr/modeling_rt_detr.py +2 -0
  400. transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
  401. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
  402. transformers/models/rwkv/modeling_rwkv.py +1 -0
  403. transformers/models/sam2/modeling_sam2.py +2 -2
  404. transformers/models/sam2/modular_sam2.py +2 -2
  405. transformers/models/sam2_video/modeling_sam2_video.py +1 -0
  406. transformers/models/sam2_video/modular_sam2_video.py +1 -0
  407. transformers/models/sam3/modeling_sam3.py +77 -80
  408. transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
  409. transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
  410. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
  411. transformers/models/sam3_video/modeling_sam3_video.py +1 -0
  412. transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
  413. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  414. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
  415. transformers/models/seed_oss/modeling_seed_oss.py +2 -2
  416. transformers/models/segformer/modeling_segformer.py +4 -1
  417. transformers/models/seggpt/modeling_seggpt.py +2 -0
  418. transformers/models/sew/modeling_sew.py +3 -0
  419. transformers/models/sew/modular_sew.py +1 -0
  420. transformers/models/sew_d/modeling_sew_d.py +3 -0
  421. transformers/models/siglip2/modeling_siglip2.py +4 -0
  422. transformers/models/siglip2/modular_siglip2.py +4 -0
  423. transformers/models/smollm3/modeling_smollm3.py +4 -4
  424. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  425. transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
  426. transformers/models/speecht5/modeling_speecht5.py +13 -1
  427. transformers/models/splinter/modeling_splinter.py +3 -0
  428. transformers/models/splinter/tokenization_splinter.py +9 -28
  429. transformers/models/squeezebert/modeling_squeezebert.py +6 -0
  430. transformers/models/stablelm/modeling_stablelm.py +3 -1
  431. transformers/models/starcoder2/modeling_starcoder2.py +4 -3
  432. transformers/models/superglue/modeling_superglue.py +1 -0
  433. transformers/models/superpoint/modeling_superpoint.py +1 -0
  434. transformers/models/swiftformer/modeling_swiftformer.py +2 -0
  435. transformers/models/swin/modeling_swin.py +4 -0
  436. transformers/models/swin2sr/modeling_swin2sr.py +2 -0
  437. transformers/models/swinv2/modeling_swinv2.py +4 -0
  438. transformers/models/t5/modeling_t5.py +7 -0
  439. transformers/models/t5/tokenization_t5.py +4 -8
  440. transformers/models/t5gemma/modeling_t5gemma.py +5 -5
  441. transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
  442. transformers/models/table_transformer/modeling_table_transformer.py +4 -0
  443. transformers/models/tapas/modeling_tapas.py +3 -0
  444. transformers/models/textnet/modeling_textnet.py +11 -2
  445. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  446. transformers/models/timesfm/modeling_timesfm.py +2 -0
  447. transformers/models/timesfm/modular_timesfm.py +2 -0
  448. transformers/models/timesformer/modeling_timesformer.py +2 -0
  449. transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
  450. transformers/models/trocr/modeling_trocr.py +2 -0
  451. transformers/models/tvp/modeling_tvp.py +2 -0
  452. transformers/models/udop/modeling_udop.py +4 -0
  453. transformers/models/udop/tokenization_udop.py +5 -13
  454. transformers/models/umt5/modeling_umt5.py +7 -0
  455. transformers/models/unispeech/modeling_unispeech.py +4 -0
  456. transformers/models/unispeech/modular_unispeech.py +2 -0
  457. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  458. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  459. transformers/models/univnet/modeling_univnet.py +1 -0
  460. transformers/models/upernet/modeling_upernet.py +1 -0
  461. transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
  462. transformers/models/vilt/modeling_vilt.py +6 -0
  463. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  464. transformers/models/visual_bert/modeling_visual_bert.py +6 -0
  465. transformers/models/vitdet/modeling_vitdet.py +2 -0
  466. transformers/models/vitmatte/modeling_vitmatte.py +1 -0
  467. transformers/models/vits/modeling_vits.py +1 -0
  468. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  469. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  470. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
  471. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
  472. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
  473. transformers/models/wavlm/modeling_wavlm.py +5 -0
  474. transformers/models/whisper/modeling_whisper.py +6 -0
  475. transformers/models/whisper/tokenization_whisper.py +4 -15
  476. transformers/models/x_clip/modeling_x_clip.py +3 -0
  477. transformers/models/xglm/modeling_xglm.py +1 -0
  478. transformers/models/xglm/tokenization_xglm.py +4 -9
  479. transformers/models/xlm/modeling_xlm.py +5 -0
  480. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  481. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  482. transformers/models/yoso/modeling_yoso.py +6 -0
  483. transformers/models/zamba/modeling_zamba.py +2 -0
  484. transformers/models/zamba2/modeling_zamba2.py +4 -2
  485. transformers/models/zamba2/modular_zamba2.py +1 -1
  486. transformers/models/zoedepth/modeling_zoedepth.py +1 -0
  487. transformers/pipelines/__init__.py +2 -3
  488. transformers/pipelines/base.py +1 -9
  489. transformers/pipelines/document_question_answering.py +3 -1
  490. transformers/pipelines/text_generation.py +1 -1
  491. transformers/processing_utils.py +23 -11
  492. transformers/quantizers/base.py +35 -110
  493. transformers/quantizers/quantizer_aqlm.py +1 -5
  494. transformers/quantizers/quantizer_auto_round.py +1 -2
  495. transformers/quantizers/quantizer_awq.py +17 -81
  496. transformers/quantizers/quantizer_bitnet.py +3 -8
  497. transformers/quantizers/quantizer_bnb_4bit.py +13 -110
  498. transformers/quantizers/quantizer_bnb_8bit.py +16 -92
  499. transformers/quantizers/quantizer_compressed_tensors.py +1 -5
  500. transformers/quantizers/quantizer_eetq.py +14 -62
  501. transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
  502. transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
  503. transformers/quantizers/quantizer_fp_quant.py +48 -78
  504. transformers/quantizers/quantizer_gptq.py +7 -24
  505. transformers/quantizers/quantizer_higgs.py +40 -54
  506. transformers/quantizers/quantizer_hqq.py +144 -153
  507. transformers/quantizers/quantizer_mxfp4.py +13 -167
  508. transformers/quantizers/quantizer_quanto.py +20 -64
  509. transformers/quantizers/quantizer_quark.py +36 -17
  510. transformers/quantizers/quantizer_spqr.py +1 -4
  511. transformers/quantizers/quantizer_torchao.py +23 -202
  512. transformers/quantizers/quantizer_vptq.py +8 -22
  513. transformers/quantizers/quantizers_utils.py +20 -0
  514. transformers/testing_utils.py +297 -36
  515. transformers/tokenization_mistral_common.py +4 -0
  516. transformers/tokenization_utils_base.py +113 -222
  517. transformers/tokenization_utils_tokenizers.py +168 -107
  518. transformers/trainer.py +28 -31
  519. transformers/trainer_jit_checkpoint.py +126 -0
  520. transformers/trainer_utils.py +1 -1
  521. transformers/training_args.py +66 -28
  522. transformers/utils/__init__.py +3 -4
  523. transformers/utils/auto_docstring.py +1 -0
  524. transformers/utils/generic.py +27 -1
  525. transformers/utils/hub.py +5 -15
  526. transformers/utils/import_utils.py +61 -16
  527. transformers/utils/kernel_config.py +4 -2
  528. transformers/utils/loading_report.py +19 -10
  529. transformers/utils/quantization_config.py +75 -242
  530. transformers/video_processing_utils.py +1 -2
  531. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
  532. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
  533. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
  534. transformers/kernels/__init__.py +0 -0
  535. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  536. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  537. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
  538. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
  539. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,55 @@
1
+ # coding=utf-8
2
+ # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from typing import Optional
17
+
18
+ from ..core_model_loading import ConversionOps
19
+ from ..utils import is_torch_available
20
+
21
+
22
+ if is_torch_available():
23
+ import torch
24
+
25
+
26
+ class QuarkDeserialize(ConversionOps):
27
+ def __init__(self, hf_quantizer):
28
+ self.hf_quantizer = hf_quantizer
29
+
30
+ def convert(
31
+ self,
32
+ input_dict: torch.Tensor,
33
+ model: Optional[torch.nn.Module] = None,
34
+ missing_keys: Optional[list[str]] = None,
35
+ full_layer_name: str | None = None,
36
+ **kwargs,
37
+ ) -> dict[str, torch.Tensor]:
38
+ # target_key should be in the form of weight_scale, bias_scale, input_scale, output_scale, weight_zero_point, bias_zero_point, input_zero_point, output_zero_point
39
+ target_key, value = tuple(input_dict.items())[0]
40
+ value = value[0] if isinstance(value, list) else value
41
+ # this will get the param name : weight, input, bias or output
42
+ param = target_key.split("_", 1)[0]
43
+ # quant_state should be in the form of scale, or zero_point
44
+ quant_state = target_key.split("_", 1)[-1]
45
+
46
+ # here we change the name for example from the form of :
47
+ # model.layers.0.mlp.down_proj.weight_scale to model.layers.0.mlp.down_proj.weight_quantizer.scale to fit within
48
+ # the QParamsLinear module of quark
49
+ sub_module_state = full_layer_name.rsplit(".", 1)[0] + "." + param + "_quantizer" + "." + quant_state
50
+
51
+ # since quark module was expecting keys in the form of model.layers.0.mlp.down_proj.weight_scale
52
+ # we need to remove it from the missing_keys list
53
+ missing_keys.discard(full_layer_name)
54
+
55
+ return {sub_module_state: value}
@@ -13,110 +13,64 @@
13
13
  # limitations under the License.
14
14
  "SpQR (Sparse-Quantized Representation) integration file"
15
15
 
16
- from ..utils import is_accelerate_available, is_spqr_available, is_torch_available
16
+ from ..quantizers.quantizers_utils import should_convert_module
17
+ from ..utils import is_accelerate_available, is_spqr_available, is_torch_available, logging
17
18
 
18
19
 
20
+ if is_accelerate_available():
21
+ from accelerate import init_empty_weights
22
+
19
23
  if is_torch_available():
20
24
  import torch.nn as nn
21
25
 
26
+ logger = logging.get_logger(__name__)
27
+
22
28
 
23
- def replace_with_spqr_linear(
24
- model,
25
- quantization_config=None,
26
- modules_to_not_convert=None,
27
- current_key_name=None,
28
- has_been_replaced=False,
29
- ):
29
+ def replace_with_spqr_linear(model, modules_to_not_convert: list[str] | None = None, quantization_config=None):
30
30
  """
31
- Public method that recursively replaces the Linear layers of the given model with SpQR quantized layers.
32
- `accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the
33
- conversion has been successful or not.
31
+ Public method that replaces the Linear layers of the given model with SPQR quantized layers.
34
32
 
35
33
  Args:
36
34
  model (`torch.nn.Module`):
37
35
  The model to convert, can be any `torch.nn.Module` instance.
38
- quantization_config (`SpQRConfig`):
39
- The quantization config object that contains the quantization parameters.
40
- modules_to_not_convert (`list[str]`, *optional*):
36
+ modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
41
37
  A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
42
38
  converted.
43
- current_key_name (`list`, *optional*):
44
- A list that contains the current key name. This is used for recursion and should not be passed by the user.
45
- has_been_replaced (`bool`, *optional*):
46
- A boolean that indicates if the conversion has been successful or not. This is used for recursion and
47
- should not be passed by the user.
39
+ quantization_config (`SpQRConfig`):
40
+ The quantization config object that contains the quantization parameters.
48
41
  """
49
- if modules_to_not_convert is None:
50
- modules_to_not_convert = []
51
-
52
- if is_accelerate_available():
53
- from accelerate import init_empty_weights
54
42
  if is_spqr_available():
55
43
  from spqr_quant import QuantizedLinear
56
44
 
57
- for name, module in model.named_children():
58
- if current_key_name is None:
59
- current_key_name = []
60
- current_key_name.append(name)
61
-
62
- if isinstance(module, nn.Linear):
63
- # Check if the current key is not in the `modules_to_not_convert`
64
- if ".".join(current_key_name) + ".weight" not in modules_to_not_convert:
65
- with init_empty_weights():
66
- tensor_name = ".".join(current_key_name)
67
-
68
- shapes = quantization_config.shapes
69
- shapes_keys = shapes.keys()
70
-
71
- shapes_valid = (
72
- f"{tensor_name}.dense_weights.shape" in shapes_keys
73
- and f"{tensor_name}.row_offsets.shape" in shapes_keys
74
- and f"{tensor_name}.col_vals.shape" in shapes_keys
75
- and f"{tensor_name}.in_perm.shape" in shapes_keys
76
- )
77
-
78
- if not shapes_valid:
79
- raise ValueError(
80
- f"The SpQR quantization config does not contain the shape "
81
- f"configuration for {tensor_name}. This indicates that the "
82
- f"configuration is either invalid or corrupted."
83
- )
84
-
85
- dense_weights_shape = shapes[f"{tensor_name}.dense_weights.shape"]
86
- row_offsets_shape = shapes[f"{tensor_name}.row_offsets.shape"]
87
- col_vals_shape = shapes[f"{tensor_name}.col_vals.shape"]
88
- in_perm_shape = shapes[f"{tensor_name}.in_perm.shape"]
89
-
90
- in_features = module.in_features
91
- out_features = module.out_features
92
-
93
- model._modules[name] = QuantizedLinear.create_placehodler(
94
- rows=out_features,
95
- cols=in_features,
96
- bits=quantization_config.bits,
97
- beta1=quantization_config.beta1,
98
- beta2=quantization_config.beta2,
99
- dense_weights_shape=dense_weights_shape,
100
- row_offsets_shape=row_offsets_shape,
101
- col_vals_shape=col_vals_shape,
102
- in_perm_shape=in_perm_shape,
103
- )
104
- has_been_replaced = True
105
-
106
- # Store the module class in case we need to transpose the weight later
107
- model._modules[name].source_cls = type(module)
108
- # Force requires grad to False to avoid unexpected errors
109
- model._modules[name].requires_grad_(False)
110
- else:
111
- pass
112
- if len(list(module.children())) > 0:
113
- _, has_been_replaced = replace_with_spqr_linear(
114
- module,
115
- quantization_config=quantization_config,
116
- modules_to_not_convert=modules_to_not_convert,
117
- current_key_name=current_key_name,
118
- has_been_replaced=has_been_replaced,
119
- )
120
- # Remove the last key for recursion
121
- current_key_name.pop(-1)
122
- return model, has_been_replaced
45
+ has_been_replaced = False
46
+ # we need this to correctly materialize the weights during quantization
47
+ for module_name, module in model.named_modules():
48
+ if not should_convert_module(module_name, modules_to_not_convert):
49
+ continue
50
+ with init_empty_weights():
51
+ if isinstance(module, nn.Linear):
52
+ shapes = quantization_config.shapes
53
+
54
+ new_module = QuantizedLinear.create_placehodler(
55
+ rows=module.out_features,
56
+ cols=module.in_features,
57
+ bits=quantization_config.bits,
58
+ beta1=quantization_config.beta1,
59
+ beta2=quantization_config.beta2,
60
+ dense_weights_shape=shapes[f"{module_name}.dense_weights.shape"],
61
+ row_offsets_shape=shapes[f"{module_name}.row_offsets.shape"],
62
+ col_vals_shape=shapes[f"{module_name}.col_vals.shape"],
63
+ in_perm_shape=shapes[f"{module_name}.in_perm.shape"],
64
+ )
65
+ # Force requires grad to False to avoid unexpected errors
66
+ model._modules[module_name].requires_grad_(False)
67
+ model.set_submodule(module_name, new_module)
68
+ has_been_replaced = True
69
+ if not has_been_replaced:
70
+ logger.warning(
71
+ "You are loading your model using eetq but no linear modules were found in your model."
72
+ " Please double check your model architecture, or submit an issue on github if you think this is"
73
+ " a bug."
74
+ )
75
+
76
+ return model
@@ -32,7 +32,7 @@ from ..quantizers.quantizers_utils import get_module_from_name
32
32
 
33
33
  if is_torchao_available():
34
34
  TORCHAO_VERSION = version.parse(importlib.metadata.version("torchao"))
35
- if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.14.0"):
35
+ if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.15.0"):
36
36
  from torchao.prototype.safetensors.safetensors_support import (
37
37
  unflatten_tensor_state_dict,
38
38
  )
@@ -210,61 +210,55 @@ class TorchAoDeserialize(ConversionOps):
210
210
  def convert(
211
211
  self,
212
212
  input_dict: dict[str, torch.Tensor],
213
+ source_patterns: list[str] | None = None,
213
214
  model: Optional[torch.nn.Module] = None,
214
215
  full_layer_name: str | None = None,
215
216
  missing_keys=None,
216
217
  **kwargs,
217
218
  ) -> dict[str, torch.Tensor]:
218
- if isinstance(self.hf_quantizer.quantization_config.quant_type, str):
219
- is_int_4 = "int4" in self.hf_quantizer.quantization_config.quant_type
220
- else:
221
- config_name = self.hf_quantizer.quantization_config.quant_type.__class__.__name__
222
- is_int_4 = fuzzy_match_size(config_name) == "4"
223
-
224
- # Simple case if we gather layermsnorm weights, we can just return the value since they are not quantized
225
- if "weight:_data" in input_dict.keys():
226
- value = (
227
- input_dict["weight:_data"][0]
228
- if isinstance(input_dict["weight:_data"], list)
229
- else input_dict["weight:_data"]
230
- )
231
- return {full_layer_name: value}
232
-
233
- is_unsafe_serialization = ":" not in list(input_dict.keys())[0]
219
+ """
220
+ Consolidates tensor subclass components before reconstructing the object
221
+
222
+ For example:
223
+ input_dict: {
224
+ "_weight_qdata": torch.Tensor,
225
+ "_weight_scale": torch.Tensor,
226
+ }
227
+ full_layer_name: "model.layers.0.self_attn.k_proj.weight"
228
+
229
+ Given this, we reconstruct a Float8Tensor instance using the qdata and scale
230
+ and return it as a dictionary with the full_layer_name as the key and the recovered
231
+ Float8Tensor instance as the value.
232
+ """
233
+ is_unsafe_serialization = list(input_dict.keys())[0] not in source_patterns
234
234
 
235
235
  param_data = {}
236
+ layer_name = ".".join(full_layer_name.split(".")[:-1])
236
237
  if is_unsafe_serialization:
237
238
  if isinstance(input_dict["weight"], list):
238
239
  weight = input_dict["weight"][0]
239
240
  else:
240
241
  weight = input_dict["weight"]
241
242
  else:
242
- if isinstance(input_dict["weight:qdata"], list):
243
- param_data[f"{full_layer_name}:qdata"] = input_dict["weight:qdata"][0]
244
- else:
245
- param_data[f"{full_layer_name}:qdata"] = input_dict["weight:qdata"]
246
-
247
- if isinstance(input_dict["weight:scale"], list):
248
- param_data[f"{full_layer_name}:scale"] = input_dict["weight:scale"][0]
249
- else:
250
- param_data[f"{full_layer_name}:scale"] = input_dict["weight:scale"]
251
-
252
- if is_int_4:
253
- if isinstance(input_dict["weight:zero_point"], list):
254
- param_data[f"{full_layer_name}:zero_point"] = input_dict["weight:zero_point"][0]
255
- else:
256
- param_data[f"{full_layer_name}:zero_point"] = input_dict["weight:zero_point"]
243
+ for suffix in input_dict.keys():
244
+ if len(input_dict[suffix]) != 1:
245
+ raise ValueError(
246
+ f"Expected a single tensor for {suffix} but got {len(input_dict[suffix])} tensors instead"
247
+ )
248
+ param_data[f"{layer_name}.{suffix}"] = input_dict[suffix][0]
257
249
 
258
- # If it's a bias, no need to do anything special (except removing the ":_data" part of the key, but was
259
- # already done) - if it's unsafe-serialized (i.e. not safetensors), not need for anything either
250
+ # If it's unsafe-serialized (i.e. not safetensors), no need for anything
260
251
  if is_unsafe_serialization:
261
252
  return {full_layer_name: weight}
262
253
  # Sanity check for the new serialization format
263
- elif not (TORCHAO_VERSION >= version.parse("0.14.0") and is_metadata_torchao(self.hf_quantizer.metadata)):
264
- # print("metadata", self.hf_quantizer.metadata)
265
- raise ValueError("To use `safetensors` serialization, you should have `torchao>=0.14.0` installed")
254
+ elif not (TORCHAO_VERSION >= version.parse("0.15.0") and is_metadata_torchao(self.hf_quantizer.metadata)):
255
+ raise ValueError("To use `safetensors` serialization, you should have `torchao>=0.15.0` installed")
266
256
 
267
- new_param = unflatten_tensor_state_dict(param_data, self.hf_quantizer.metadata)[full_layer_name]
257
+ unflattened_state_dict, leftover_state_dict = unflatten_tensor_state_dict(
258
+ param_data, self.hf_quantizer.metadata
259
+ )
260
+ assert not leftover_state_dict # there should be no unprocessed tensors
261
+ new_param = unflattened_state_dict[full_layer_name]
268
262
 
269
263
  module, _ = get_module_from_name(model, full_layer_name)
270
264
  # Add repr to the module
@@ -13,64 +13,49 @@
13
13
  # limitations under the License.
14
14
  "VPTQ (Vector Post-Training Quantization) integration file"
15
15
 
16
- import torch.nn as nn
17
- from accelerate import init_empty_weights
18
- from vptq import VQuantLinear
16
+ from ..quantizers.quantizers_utils import should_convert_module
17
+ from ..utils import is_accelerate_available, is_torch_available, logging
19
18
 
20
19
 
21
- def replace_with_vptq_linear(
22
- model,
23
- quantization_config=None,
24
- modules_to_not_convert=None,
25
- current_key_name=None,
26
- has_been_replaced=False,
27
- ):
20
+ if is_accelerate_available():
21
+ from accelerate import init_empty_weights
22
+
23
+ if is_torch_available():
24
+ import torch.nn as nn
25
+
26
+ logger = logging.get_logger(__name__)
27
+
28
+
29
+ def replace_with_vptq_linear(model, modules_to_not_convert: list[str] | None = None, quantization_config=None):
28
30
  """
29
- Public method that recursively replaces the Linear layers of the given model with VPTQ quantized layers.
30
- `accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the
31
- conversion has been successful or not.
31
+ Public method that replaces the Linear layers of the given model with SPQR quantized layers.
32
32
 
33
33
  Args:
34
34
  model (`torch.nn.Module`):
35
35
  The model to convert, can be any `torch.nn.Module` instance.
36
+ modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
37
+ A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
38
+ converted.
36
39
  quantization_config (`VptqConfig`):
37
40
  The quantization config object that contains the quantization parameters.
38
- modules_to_not_convert (`list[`str`]`, *optional*, defaults to `["lm_head"]`):
39
- Names of the modules to not convert in `VQuantLinear`. In practice we keep the `lm_head` in full precision
40
- for numerical stability reasons.
41
- current_key_name (`list`, *optional*):
42
- A list that contains the current key name. This is used for recursion and should not be passed by the user.
43
- has_been_replaced (`bool`, *optional*):
44
- A boolean that indicates if the conversion has been successful or not. This is used for recursion and
45
- should not be passed by the user.
46
41
  """
42
+ from vptq import VQuantLinear
47
43
 
48
- modules_to_not_convert = modules_to_not_convert if modules_to_not_convert else ["lm_head"]
49
-
50
- for name, module in model.named_children():
51
- if current_key_name is None:
52
- current_key_name = []
53
- current_key_name.append(name)
54
- layer_name = ".".join(current_key_name)
55
- shared_layer_config = quantization_config.shared_layer_config
56
- config_for_layers = quantization_config.config_for_layers
57
-
58
- if (
59
- isinstance(module, nn.Linear)
60
- and layer_name not in modules_to_not_convert
61
- and ((layer_name in config_for_layers) or (current_key_name[-1] in shared_layer_config))
62
- ):
63
- layer_params = config_for_layers.get(layer_name, None) or shared_layer_config.get(
64
- current_key_name[-1], None
65
- )
44
+ has_been_replaced = False
45
+ shared_layer_config = quantization_config.shared_layer_config
46
+ config_for_layers = quantization_config.config_for_layers
66
47
 
67
- with init_empty_weights():
68
- in_features = module.in_features
69
- out_features = module.out_features
70
-
71
- model._modules[name] = VQuantLinear(
72
- in_features,
73
- out_features,
48
+ for module_name, module in model.named_modules():
49
+ if not should_convert_module(module_name, modules_to_not_convert):
50
+ continue
51
+ with init_empty_weights():
52
+ if isinstance(module, nn.Linear):
53
+ layer_params = config_for_layers.get(module_name, None) or shared_layer_config.get(
54
+ module_name.rsplit(".")[1], None
55
+ )
56
+ new_module = VQuantLinear(
57
+ module.in_features,
58
+ module.out_features,
74
59
  vector_lens=layer_params["vector_lens"],
75
60
  num_centroids=layer_params["num_centroids"],
76
61
  num_res_centroids=layer_params["num_res_centroids"],
@@ -84,18 +69,16 @@ def replace_with_vptq_linear(
84
69
  enable_proxy_error=False,
85
70
  bias=module.bias is not None,
86
71
  )
72
+ # Force requires grad to False to avoid unexpected errors
73
+ model._modules[module_name].requires_grad_(False)
74
+ model.set_submodule(module_name, new_module)
87
75
  has_been_replaced = True
88
76
 
89
- # Force requires grad to False to avoid unexpected errors
90
- model._modules[name].requires_grad_(False)
91
- if len(list(module.children())) > 0:
92
- _, has_been_replaced = replace_with_vptq_linear(
93
- module,
94
- quantization_config=quantization_config,
95
- modules_to_not_convert=modules_to_not_convert,
96
- current_key_name=current_key_name,
97
- has_been_replaced=has_been_replaced,
98
- )
99
- # Remove the last key for recursion
100
- current_key_name.pop(-1)
101
- return model, has_been_replaced
77
+ if not has_been_replaced:
78
+ logger.warning(
79
+ "You are loading your model using eetq but no linear modules were found in your model."
80
+ " Please double check your model architecture, or submit an issue on github if you think this is"
81
+ " a bug."
82
+ )
83
+
84
+ return model
transformers/modelcard.py CHANGED
@@ -23,7 +23,7 @@ from typing import Any, Optional, Union
23
23
 
24
24
  import httpx
25
25
  import yaml
26
- from huggingface_hub import model_info
26
+ from huggingface_hub import is_offline_mode, model_info
27
27
  from huggingface_hub.errors import OfflineModeIsEnabled
28
28
  from huggingface_hub.utils import HFValidationError
29
29
 
@@ -50,7 +50,6 @@ from .utils import (
50
50
  MODEL_CARD_NAME,
51
51
  cached_file,
52
52
  is_datasets_available,
53
- is_offline_mode,
54
53
  is_tokenizers_available,
55
54
  is_torch_available,
56
55
  logging,
@@ -20,6 +20,7 @@ import numpy as np
20
20
  from tqdm.auto import tqdm
21
21
 
22
22
  from .integrations import (
23
+ GGUF_CONFIG_DEFAULTS_MAPPING,
23
24
  GGUF_CONFIG_MAPPING,
24
25
  GGUF_TOKENIZER_MAPPING,
25
26
  _gguf_parse_value,
@@ -437,6 +438,13 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
437
438
  all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions
438
439
  )
439
440
 
441
+ # Set GGUF-specific default values
442
+ config_defaults = GGUF_CONFIG_DEFAULTS_MAPPING.get(
443
+ updated_architecture, GGUF_CONFIG_DEFAULTS_MAPPING.get(architecture) or {}
444
+ )
445
+ for key, value in config_defaults.items():
446
+ parsed_parameters["config"].setdefault(key, value)
447
+
440
448
  # List all key-value pairs in a columnized format
441
449
  for gguf_key, field in reader.fields.items():
442
450
  gguf_key = gguf_key.replace(architecture, updated_architecture)
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import math
16
+ import warnings
16
17
  from functools import wraps
17
18
  from typing import TYPE_CHECKING, Optional, TypedDict
18
19
 
@@ -653,20 +654,26 @@ class RotaryEmbeddingConfigMixin:
653
654
  Helper to standardize the config's rope params field by ensuring the params are defined for each
654
655
  later type. For old model the fn will duplicate a single rope param in each layer type (backward compatibility)
655
656
  """
656
- # Move `rope_theta` and `partial_rotary_factor` to the params dict, if not there yet
657
+ # Move `rope_theta` and `partial_rotary_factor` to the `rope_parameters`, if not there yet
657
658
  rope_theta = getattr(self, "rope_theta", None)
658
659
  partial_rotary_factor = getattr(self, "partial_rotary_factor", None)
659
- rope_parameters = self.rope_parameters
660
+ rope_parameters = getattr(self, "rope_parameters", None) or {}
661
+ layer_types = getattr(self, "layer_types", None)
660
662
 
663
+ # Case 0: no RoPE params defined
664
+ if not (rope_parameters or rope_theta):
665
+ # partial_rotary_factor without rope_theta is invalid, so we don't check for it here
666
+ logger.warning("`standardize_rope_params` was called but no RoPE parameters were found.")
667
+ return
661
668
  # Case 1: RoPE param keys do not intersect with possible `layer_types` -> one global dict
662
- if getattr(self, "layer_types", None) is None or not set(rope_parameters.keys()).issubset(self.layer_types):
669
+ elif layer_types is None or rope_parameters == {} or not set(rope_parameters.keys()).issubset(layer_types):
663
670
  rope_parameters.setdefault("rope_type", rope_parameters.get("type", "default"))
664
671
  rope_parameters.setdefault("rope_theta", rope_theta)
665
672
  if partial_rotary_factor is not None:
666
673
  rope_parameters["partial_rotary_factor"] = partial_rotary_factor
667
674
  # Case 2: different RoPE for each layer -> several params as nested dict
668
675
  else:
669
- for layer_type in self.layer_types:
676
+ for layer_type in layer_types:
670
677
  rope_parameters[layer_type].setdefault("rope_type", rope_parameters[layer_type].get("type", "default"))
671
678
  rope_parameters[layer_type].setdefault("rope_theta", rope_theta)
672
679
  if partial_rotary_factor is not None:
@@ -691,14 +698,14 @@ class RotaryEmbeddingConfigMixin:
691
698
 
692
699
  for rope_parameters in rope_parameters_dict.values():
693
700
  rope_type = rope_parameters.get("rope_type", rope_parameters.get("type", "default"))
694
- validation_fn = getattr(self, f"_validate_{rope_type}_rope_parameters")
701
+ validation_fn = getattr(self, f"_validate_{rope_type}_rope_parameters", None)
695
702
  rope_parameters["rope_type"] = rope_type
696
703
 
697
704
  if validation_fn is not None:
698
705
  validation_fn(rope_parameters, ignore_keys=ignore_keys)
699
706
  else:
700
707
  logger.warning(
701
- f"Missing validation function mapping in `ROPE_VALIDATION_FUNCTIONS` for 'rope_type'='{rope_type}'"
708
+ f"Missing validation function in 'RotaryEmbeddingConfigMixin' for 'rope_type'='{rope_type}'"
702
709
  )
703
710
 
704
711
  def _validate_default_rope_parameters(self, rope_parameters: dict, ignore_keys: Optional[set] = None):
@@ -913,3 +920,20 @@ class RotaryEmbeddingConfigMixin:
913
920
  unused_keys = received_keys - required_keys
914
921
  if unused_keys:
915
922
  logger.warning(f"Unrecognized keys in `rope_parameters` for 'rope_type'='{rope_type}': {unused_keys}")
923
+
924
+
925
+ def rope_config_validation(config: RotaryEmbeddingConfigMixin, ignore_keys: Optional[set] = None):
926
+ """
927
+ This is a deprecated function.
928
+ It has been kept for backward compatibility with custom code models.
929
+ """
930
+ warnings.warn(
931
+ "`rope_config_validation` is deprecated and has been removed. "
932
+ "Its functionality has been moved to RotaryEmbeddingConfigMixin.validate_rope method. "
933
+ "PreTrainedConfig inherits this class, so please call self.validate_rope() instead. "
934
+ "Also, make sure to use the new rope_parameters syntax. "
935
+ "You can call self.standardize_rope_params() in the meantime.",
936
+ FutureWarning,
937
+ )
938
+ config.standardize_rope_params()
939
+ config.validate_rope(ignore_keys=ignore_keys)