transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. transformers/__init__.py +30 -3
  2. transformers/cli/serve.py +47 -17
  3. transformers/conversion_mapping.py +15 -2
  4. transformers/convert_slow_tokenizer.py +225 -10
  5. transformers/core_model_loading.py +196 -135
  6. transformers/data/data_collator.py +12 -4
  7. transformers/dependency_versions_table.py +1 -2
  8. transformers/dynamic_module_utils.py +1 -2
  9. transformers/feature_extraction_utils.py +1 -2
  10. transformers/file_utils.py +0 -1
  11. transformers/generation/__init__.py +11 -1
  12. transformers/generation/configuration_utils.py +3 -2
  13. transformers/generation/continuous_batching/__init__.py +4 -0
  14. transformers/generation/continuous_batching/continuous_api.py +134 -79
  15. transformers/image_processing_base.py +1 -2
  16. transformers/integrations/__init__.py +4 -2
  17. transformers/integrations/accelerate.py +15 -3
  18. transformers/integrations/aqlm.py +38 -66
  19. transformers/integrations/awq.py +48 -514
  20. transformers/integrations/bitnet.py +45 -100
  21. transformers/integrations/bitsandbytes.py +79 -191
  22. transformers/integrations/deepspeed.py +1 -0
  23. transformers/integrations/eetq.py +84 -79
  24. transformers/integrations/fbgemm_fp8.py +191 -145
  25. transformers/integrations/finegrained_fp8.py +236 -193
  26. transformers/integrations/fp_quant.py +92 -0
  27. transformers/integrations/ggml.py +11 -1
  28. transformers/integrations/higgs.py +40 -62
  29. transformers/integrations/hub_kernels.py +42 -3
  30. transformers/integrations/integration_utils.py +10 -0
  31. transformers/integrations/mxfp4.py +25 -65
  32. transformers/integrations/peft.py +7 -29
  33. transformers/integrations/quanto.py +73 -55
  34. transformers/integrations/quark.py +55 -0
  35. transformers/integrations/spqr.py +44 -90
  36. transformers/integrations/torchao.py +32 -38
  37. transformers/integrations/vptq.py +42 -59
  38. transformers/modelcard.py +1 -2
  39. transformers/modeling_gguf_pytorch_utils.py +8 -0
  40. transformers/modeling_rope_utils.py +30 -6
  41. transformers/modeling_utils.py +116 -112
  42. transformers/models/__init__.py +3 -0
  43. transformers/models/afmoe/modeling_afmoe.py +4 -4
  44. transformers/models/albert/tokenization_albert.py +6 -12
  45. transformers/models/align/modeling_align.py +2 -0
  46. transformers/models/altclip/modeling_altclip.py +4 -0
  47. transformers/models/apertus/modeling_apertus.py +4 -4
  48. transformers/models/arcee/modeling_arcee.py +4 -4
  49. transformers/models/aria/modeling_aria.py +4 -4
  50. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  51. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  52. transformers/models/auto/configuration_auto.py +11 -0
  53. transformers/models/auto/feature_extraction_auto.py +2 -0
  54. transformers/models/auto/image_processing_auto.py +1 -0
  55. transformers/models/auto/modeling_auto.py +6 -0
  56. transformers/models/auto/processing_auto.py +18 -10
  57. transformers/models/auto/tokenization_auto.py +74 -472
  58. transformers/models/autoformer/modeling_autoformer.py +4 -0
  59. transformers/models/bamba/modeling_bamba.py +4 -3
  60. transformers/models/bark/modeling_bark.py +2 -0
  61. transformers/models/bart/modeling_bart.py +7 -0
  62. transformers/models/barthez/tokenization_barthez.py +5 -10
  63. transformers/models/beit/modeling_beit.py +6 -1
  64. transformers/models/bert/tokenization_bert.py +8 -21
  65. transformers/models/big_bird/modeling_big_bird.py +6 -0
  66. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  67. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
  68. transformers/models/biogpt/modeling_biogpt.py +2 -0
  69. transformers/models/biogpt/modular_biogpt.py +2 -0
  70. transformers/models/bit/modeling_bit.py +11 -2
  71. transformers/models/bitnet/modeling_bitnet.py +4 -4
  72. transformers/models/blenderbot/modeling_blenderbot.py +5 -0
  73. transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
  74. transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
  75. transformers/models/blip/modeling_blip_text.py +2 -0
  76. transformers/models/blip_2/modeling_blip_2.py +2 -1
  77. transformers/models/bloom/modeling_bloom.py +4 -0
  78. transformers/models/blt/modeling_blt.py +2 -2
  79. transformers/models/blt/modular_blt.py +2 -2
  80. transformers/models/bridgetower/modeling_bridgetower.py +5 -1
  81. transformers/models/bros/modeling_bros.py +4 -0
  82. transformers/models/camembert/tokenization_camembert.py +8 -12
  83. transformers/models/canine/modeling_canine.py +5 -0
  84. transformers/models/chameleon/modeling_chameleon.py +2 -1
  85. transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
  86. transformers/models/clap/modeling_clap.py +5 -0
  87. transformers/models/clip/tokenization_clip.py +22 -44
  88. transformers/models/clipseg/modeling_clipseg.py +5 -0
  89. transformers/models/clvp/modeling_clvp.py +5 -0
  90. transformers/models/clvp/tokenization_clvp.py +1 -63
  91. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  92. transformers/models/codegen/tokenization_codegen.py +14 -43
  93. transformers/models/cohere/modeling_cohere.py +4 -3
  94. transformers/models/cohere/modular_cohere.py +2 -1
  95. transformers/models/cohere/tokenization_cohere.py +12 -42
  96. transformers/models/cohere2/modeling_cohere2.py +7 -6
  97. transformers/models/cohere2/modular_cohere2.py +5 -5
  98. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
  99. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  100. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  101. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  102. transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
  103. transformers/models/convbert/modeling_convbert.py +6 -0
  104. transformers/models/convnext/modeling_convnext.py +2 -4
  105. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  106. transformers/models/csm/modeling_csm.py +4 -3
  107. transformers/models/ctrl/modeling_ctrl.py +1 -0
  108. transformers/models/cvt/modeling_cvt.py +2 -0
  109. transformers/models/cwm/modeling_cwm.py +4 -4
  110. transformers/models/d_fine/modeling_d_fine.py +2 -0
  111. transformers/models/d_fine/modular_d_fine.py +1 -0
  112. transformers/models/dab_detr/modeling_dab_detr.py +4 -0
  113. transformers/models/dac/modeling_dac.py +2 -2
  114. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  115. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  116. transformers/models/dbrx/modeling_dbrx.py +2 -2
  117. transformers/models/deberta/modeling_deberta.py +5 -0
  118. transformers/models/deberta/tokenization_deberta.py +11 -20
  119. transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
  120. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  121. transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
  122. transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
  123. transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
  124. transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
  125. transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
  126. transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
  127. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  128. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  129. transformers/models/detr/modeling_detr.py +5 -0
  130. transformers/models/dia/modeling_dia.py +4 -3
  131. transformers/models/dia/modular_dia.py +0 -1
  132. transformers/models/diffllama/modeling_diffllama.py +2 -2
  133. transformers/models/dinat/modeling_dinat.py +3 -0
  134. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  135. transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
  136. transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
  137. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  138. transformers/models/doge/modeling_doge.py +2 -3
  139. transformers/models/doge/modular_doge.py +0 -1
  140. transformers/models/donut/modeling_donut_swin.py +2 -0
  141. transformers/models/dots1/modeling_dots1.py +10 -7
  142. transformers/models/dots1/modular_dots1.py +5 -3
  143. transformers/models/dpr/modeling_dpr.py +5 -0
  144. transformers/models/dpr/tokenization_dpr.py +12 -0
  145. transformers/models/edgetam/modeling_edgetam.py +1 -1
  146. transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
  147. transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
  148. transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
  149. transformers/models/efficientnet/modeling_efficientnet.py +2 -0
  150. transformers/models/emu3/modeling_emu3.py +4 -4
  151. transformers/models/eomt/image_processing_eomt.py +13 -1
  152. transformers/models/eomt/image_processing_eomt_fast.py +14 -2
  153. transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
  154. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  155. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
  156. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
  157. transformers/models/esm/modeling_esmfold.py +5 -4
  158. transformers/models/evolla/modeling_evolla.py +4 -4
  159. transformers/models/exaone4/modeling_exaone4.py +2 -2
  160. transformers/models/exaone4/modular_exaone4.py +0 -1
  161. transformers/models/falcon/modeling_falcon.py +6 -1
  162. transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
  163. transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
  164. transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
  165. transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
  166. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  167. transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
  168. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  169. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
  170. transformers/models/flaubert/modeling_flaubert.py +7 -0
  171. transformers/models/flava/modeling_flava.py +6 -1
  172. transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
  173. transformers/models/florence2/modeling_florence2.py +2 -1
  174. transformers/models/florence2/modular_florence2.py +2 -1
  175. transformers/models/fnet/modeling_fnet.py +7 -0
  176. transformers/models/focalnet/modeling_focalnet.py +4 -0
  177. transformers/models/fsmt/modeling_fsmt.py +2 -0
  178. transformers/models/funnel/modeling_funnel.py +8 -0
  179. transformers/models/funnel/tokenization_funnel.py +17 -24
  180. transformers/models/fuyu/processing_fuyu.py +3 -3
  181. transformers/models/gemma/modeling_gemma.py +4 -4
  182. transformers/models/gemma/tokenization_gemma.py +10 -27
  183. transformers/models/gemma2/modeling_gemma2.py +4 -4
  184. transformers/models/gemma2/modular_gemma2.py +2 -1
  185. transformers/models/gemma3/modeling_gemma3.py +14 -84
  186. transformers/models/gemma3/modular_gemma3.py +12 -81
  187. transformers/models/gemma3n/modeling_gemma3n.py +18 -209
  188. transformers/models/gemma3n/modular_gemma3n.py +17 -59
  189. transformers/models/git/modeling_git.py +2 -0
  190. transformers/models/glm/modeling_glm.py +4 -4
  191. transformers/models/glm4/modeling_glm4.py +4 -4
  192. transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
  193. transformers/models/glm4v/configuration_glm4v.py +3 -1
  194. transformers/models/glm4v/modeling_glm4v.py +3 -3
  195. transformers/models/glm4v/modular_glm4v.py +6 -4
  196. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  197. transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
  198. transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
  199. transformers/models/glpn/modeling_glpn.py +2 -0
  200. transformers/models/gpt2/modeling_gpt2.py +5 -1
  201. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  202. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
  203. transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
  204. transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
  205. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  206. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  207. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
  208. transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
  209. transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
  210. transformers/models/gptj/modeling_gptj.py +3 -0
  211. transformers/models/granite/modeling_granite.py +4 -4
  212. transformers/models/granitemoe/modeling_granitemoe.py +4 -6
  213. transformers/models/granitemoe/modular_granitemoe.py +0 -2
  214. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
  215. transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
  216. transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
  217. transformers/models/groupvit/modeling_groupvit.py +3 -0
  218. transformers/models/helium/modeling_helium.py +4 -3
  219. transformers/models/herbert/tokenization_herbert.py +9 -25
  220. transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
  221. transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
  222. transformers/models/hiera/modeling_hiera.py +4 -0
  223. transformers/models/hubert/modeling_hubert.py +3 -0
  224. transformers/models/hubert/modular_hubert.py +1 -0
  225. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
  226. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
  227. transformers/models/ibert/modeling_ibert.py +6 -0
  228. transformers/models/idefics/modeling_idefics.py +5 -21
  229. transformers/models/imagegpt/modeling_imagegpt.py +2 -1
  230. transformers/models/informer/modeling_informer.py +4 -0
  231. transformers/models/informer/modular_informer.py +1 -0
  232. transformers/models/internvl/modeling_internvl.py +2 -4
  233. transformers/models/internvl/modular_internvl.py +2 -4
  234. transformers/models/jamba/modeling_jamba.py +2 -2
  235. transformers/models/janus/modeling_janus.py +1 -0
  236. transformers/models/janus/modular_janus.py +1 -0
  237. transformers/models/jetmoe/modeling_jetmoe.py +2 -2
  238. transformers/models/kosmos2/modeling_kosmos2.py +1 -0
  239. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
  240. transformers/models/lasr/__init__.py +29 -0
  241. transformers/models/lasr/configuration_lasr.py +244 -0
  242. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  243. transformers/models/lasr/modeling_lasr.py +729 -0
  244. transformers/models/lasr/modular_lasr.py +569 -0
  245. transformers/models/lasr/processing_lasr.py +96 -0
  246. transformers/models/lasr/tokenization_lasr.py +186 -0
  247. transformers/models/layoutlm/modeling_layoutlm.py +5 -0
  248. transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
  249. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
  250. transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
  251. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  252. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  253. transformers/models/led/modeling_led.py +6 -0
  254. transformers/models/levit/modeling_levit.py +3 -0
  255. transformers/models/lfm2/modeling_lfm2.py +4 -5
  256. transformers/models/lfm2/modular_lfm2.py +0 -1
  257. transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
  258. transformers/models/lightglue/modeling_lightglue.py +3 -1
  259. transformers/models/lightglue/modular_lightglue.py +1 -0
  260. transformers/models/lilt/modeling_lilt.py +4 -0
  261. transformers/models/llama/modeling_llama.py +4 -4
  262. transformers/models/llama/tokenization_llama.py +15 -43
  263. transformers/models/llama4/modeling_llama4.py +3 -2
  264. transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
  265. transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
  266. transformers/models/longformer/modeling_longformer.py +6 -0
  267. transformers/models/longt5/modeling_longt5.py +4 -0
  268. transformers/models/luke/modeling_luke.py +9 -0
  269. transformers/models/luke/tokenization_luke.py +11 -38
  270. transformers/models/lxmert/modeling_lxmert.py +2 -0
  271. transformers/models/m2m_100/modeling_m2m_100.py +4 -0
  272. transformers/models/mamba/modeling_mamba.py +14 -22
  273. transformers/models/marian/modeling_marian.py +5 -0
  274. transformers/models/markuplm/modeling_markuplm.py +4 -0
  275. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  276. transformers/models/mask2former/modeling_mask2former.py +2 -0
  277. transformers/models/maskformer/modeling_maskformer.py +2 -0
  278. transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
  279. transformers/models/mbart/modeling_mbart.py +7 -0
  280. transformers/models/mbart/tokenization_mbart.py +11 -52
  281. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  282. transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
  283. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  284. transformers/models/mimi/modeling_mimi.py +3 -1
  285. transformers/models/minimax/modeling_minimax.py +4 -4
  286. transformers/models/ministral/modeling_ministral.py +4 -4
  287. transformers/models/ministral3/configuration_ministral3.py +1 -1
  288. transformers/models/ministral3/modeling_ministral3.py +4 -3
  289. transformers/models/mistral/modeling_mistral.py +4 -3
  290. transformers/models/mixtral/modeling_mixtral.py +4 -4
  291. transformers/models/mllama/modeling_mllama.py +2 -2
  292. transformers/models/mluke/tokenization_mluke.py +6 -6
  293. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
  294. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  295. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  296. transformers/models/mobilevit/modeling_mobilevit.py +3 -0
  297. transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
  298. transformers/models/modernbert/modeling_modernbert.py +4 -1
  299. transformers/models/modernbert/modular_modernbert.py +2 -0
  300. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
  301. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
  302. transformers/models/moonshine/modeling_moonshine.py +4 -2
  303. transformers/models/moshi/modeling_moshi.py +5 -2
  304. transformers/models/mpnet/modeling_mpnet.py +5 -0
  305. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  306. transformers/models/mpt/modeling_mpt.py +2 -0
  307. transformers/models/mra/modeling_mra.py +6 -0
  308. transformers/models/mt5/modeling_mt5.py +7 -0
  309. transformers/models/musicgen/modeling_musicgen.py +2 -0
  310. transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
  311. transformers/models/mvp/modeling_mvp.py +7 -0
  312. transformers/models/nanochat/modeling_nanochat.py +4 -4
  313. transformers/models/nemotron/modeling_nemotron.py +4 -2
  314. transformers/models/nllb/tokenization_nllb.py +8 -22
  315. transformers/models/nougat/tokenization_nougat.py +11 -59
  316. transformers/models/nystromformer/modeling_nystromformer.py +6 -0
  317. transformers/models/olmo/modeling_olmo.py +4 -4
  318. transformers/models/olmo/modular_olmo.py +2 -2
  319. transformers/models/olmo2/modeling_olmo2.py +4 -5
  320. transformers/models/olmo2/modular_olmo2.py +0 -1
  321. transformers/models/olmo3/modeling_olmo3.py +4 -4
  322. transformers/models/olmoe/modeling_olmoe.py +4 -4
  323. transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
  324. transformers/models/oneformer/modeling_oneformer.py +4 -1
  325. transformers/models/openai/modeling_openai.py +3 -0
  326. transformers/models/openai/tokenization_openai.py +10 -46
  327. transformers/models/opt/modeling_opt.py +2 -0
  328. transformers/models/owlv2/modeling_owlv2.py +4 -0
  329. transformers/models/owlvit/modeling_owlvit.py +4 -0
  330. transformers/models/paddleocr_vl/__init__.py +32 -0
  331. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  332. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
  333. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  334. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
  335. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
  336. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  337. transformers/models/parakeet/configuration_parakeet.py +4 -6
  338. transformers/models/parakeet/modeling_parakeet.py +9 -6
  339. transformers/models/parakeet/modular_parakeet.py +2 -2
  340. transformers/models/parakeet/processing_parakeet.py +1 -0
  341. transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
  342. transformers/models/patchtst/modeling_patchtst.py +20 -2
  343. transformers/models/pegasus/modeling_pegasus.py +5 -0
  344. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  345. transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
  346. transformers/models/perceiver/modeling_perceiver.py +8 -0
  347. transformers/models/persimmon/modeling_persimmon.py +2 -1
  348. transformers/models/phi/modeling_phi.py +4 -5
  349. transformers/models/phi/modular_phi.py +0 -1
  350. transformers/models/phi3/modeling_phi3.py +2 -1
  351. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
  352. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
  353. transformers/models/phimoe/modeling_phimoe.py +4 -4
  354. transformers/models/phimoe/modular_phimoe.py +2 -2
  355. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  356. transformers/models/pixtral/modeling_pixtral.py +2 -1
  357. transformers/models/plbart/modeling_plbart.py +6 -0
  358. transformers/models/plbart/modular_plbart.py +2 -0
  359. transformers/models/plbart/tokenization_plbart.py +0 -2
  360. transformers/models/poolformer/modeling_poolformer.py +2 -0
  361. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  362. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  363. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  364. transformers/models/prophetnet/modeling_prophetnet.py +3 -0
  365. transformers/models/pvt/modeling_pvt.py +2 -0
  366. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  367. transformers/models/qwen2/modeling_qwen2.py +4 -4
  368. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  369. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  370. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
  371. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
  372. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  373. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
  374. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
  375. transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
  376. transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
  377. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  378. transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
  379. transformers/models/qwen3/modeling_qwen3.py +4 -4
  380. transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
  381. transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
  382. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
  383. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
  384. transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
  385. transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
  386. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
  387. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
  388. transformers/models/rag/modeling_rag.py +1 -0
  389. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
  390. transformers/models/reformer/modeling_reformer.py +4 -0
  391. transformers/models/reformer/tokenization_reformer.py +11 -28
  392. transformers/models/regnet/modeling_regnet.py +6 -1
  393. transformers/models/rembert/modeling_rembert.py +6 -0
  394. transformers/models/rembert/tokenization_rembert.py +3 -10
  395. transformers/models/resnet/modeling_resnet.py +11 -2
  396. transformers/models/roberta/tokenization_roberta.py +18 -27
  397. transformers/models/roformer/modeling_roformer.py +6 -0
  398. transformers/models/roformer/tokenization_roformer.py +77 -412
  399. transformers/models/rt_detr/modeling_rt_detr.py +2 -0
  400. transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
  401. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
  402. transformers/models/rwkv/modeling_rwkv.py +1 -0
  403. transformers/models/sam2/modeling_sam2.py +2 -2
  404. transformers/models/sam2/modular_sam2.py +2 -2
  405. transformers/models/sam2_video/modeling_sam2_video.py +1 -0
  406. transformers/models/sam2_video/modular_sam2_video.py +1 -0
  407. transformers/models/sam3/modeling_sam3.py +77 -80
  408. transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
  409. transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
  410. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
  411. transformers/models/sam3_video/modeling_sam3_video.py +1 -0
  412. transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
  413. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  414. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
  415. transformers/models/seed_oss/modeling_seed_oss.py +2 -2
  416. transformers/models/segformer/modeling_segformer.py +4 -1
  417. transformers/models/seggpt/modeling_seggpt.py +2 -0
  418. transformers/models/sew/modeling_sew.py +3 -0
  419. transformers/models/sew/modular_sew.py +1 -0
  420. transformers/models/sew_d/modeling_sew_d.py +3 -0
  421. transformers/models/siglip2/modeling_siglip2.py +4 -0
  422. transformers/models/siglip2/modular_siglip2.py +4 -0
  423. transformers/models/smollm3/modeling_smollm3.py +4 -4
  424. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  425. transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
  426. transformers/models/speecht5/modeling_speecht5.py +13 -1
  427. transformers/models/splinter/modeling_splinter.py +3 -0
  428. transformers/models/splinter/tokenization_splinter.py +9 -28
  429. transformers/models/squeezebert/modeling_squeezebert.py +6 -0
  430. transformers/models/stablelm/modeling_stablelm.py +3 -1
  431. transformers/models/starcoder2/modeling_starcoder2.py +4 -3
  432. transformers/models/superglue/modeling_superglue.py +1 -0
  433. transformers/models/superpoint/modeling_superpoint.py +1 -0
  434. transformers/models/swiftformer/modeling_swiftformer.py +2 -0
  435. transformers/models/swin/modeling_swin.py +4 -0
  436. transformers/models/swin2sr/modeling_swin2sr.py +2 -0
  437. transformers/models/swinv2/modeling_swinv2.py +4 -0
  438. transformers/models/t5/modeling_t5.py +7 -0
  439. transformers/models/t5/tokenization_t5.py +4 -8
  440. transformers/models/t5gemma/modeling_t5gemma.py +5 -5
  441. transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
  442. transformers/models/table_transformer/modeling_table_transformer.py +4 -0
  443. transformers/models/tapas/modeling_tapas.py +3 -0
  444. transformers/models/textnet/modeling_textnet.py +11 -2
  445. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  446. transformers/models/timesfm/modeling_timesfm.py +2 -0
  447. transformers/models/timesfm/modular_timesfm.py +2 -0
  448. transformers/models/timesformer/modeling_timesformer.py +2 -0
  449. transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
  450. transformers/models/trocr/modeling_trocr.py +2 -0
  451. transformers/models/tvp/modeling_tvp.py +2 -0
  452. transformers/models/udop/modeling_udop.py +4 -0
  453. transformers/models/udop/tokenization_udop.py +5 -13
  454. transformers/models/umt5/modeling_umt5.py +7 -0
  455. transformers/models/unispeech/modeling_unispeech.py +4 -0
  456. transformers/models/unispeech/modular_unispeech.py +2 -0
  457. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  458. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  459. transformers/models/univnet/modeling_univnet.py +1 -0
  460. transformers/models/upernet/modeling_upernet.py +1 -0
  461. transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
  462. transformers/models/vilt/modeling_vilt.py +6 -0
  463. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  464. transformers/models/visual_bert/modeling_visual_bert.py +6 -0
  465. transformers/models/vitdet/modeling_vitdet.py +2 -0
  466. transformers/models/vitmatte/modeling_vitmatte.py +1 -0
  467. transformers/models/vits/modeling_vits.py +1 -0
  468. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  469. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  470. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
  471. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
  472. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
  473. transformers/models/wavlm/modeling_wavlm.py +5 -0
  474. transformers/models/whisper/modeling_whisper.py +6 -0
  475. transformers/models/whisper/tokenization_whisper.py +4 -15
  476. transformers/models/x_clip/modeling_x_clip.py +3 -0
  477. transformers/models/xglm/modeling_xglm.py +1 -0
  478. transformers/models/xglm/tokenization_xglm.py +4 -9
  479. transformers/models/xlm/modeling_xlm.py +5 -0
  480. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  481. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  482. transformers/models/yoso/modeling_yoso.py +6 -0
  483. transformers/models/zamba/modeling_zamba.py +2 -0
  484. transformers/models/zamba2/modeling_zamba2.py +4 -2
  485. transformers/models/zamba2/modular_zamba2.py +1 -1
  486. transformers/models/zoedepth/modeling_zoedepth.py +1 -0
  487. transformers/pipelines/__init__.py +2 -3
  488. transformers/pipelines/base.py +1 -9
  489. transformers/pipelines/document_question_answering.py +3 -1
  490. transformers/pipelines/text_generation.py +1 -1
  491. transformers/processing_utils.py +23 -11
  492. transformers/quantizers/base.py +35 -110
  493. transformers/quantizers/quantizer_aqlm.py +1 -5
  494. transformers/quantizers/quantizer_auto_round.py +1 -2
  495. transformers/quantizers/quantizer_awq.py +17 -81
  496. transformers/quantizers/quantizer_bitnet.py +3 -8
  497. transformers/quantizers/quantizer_bnb_4bit.py +13 -110
  498. transformers/quantizers/quantizer_bnb_8bit.py +16 -92
  499. transformers/quantizers/quantizer_compressed_tensors.py +1 -5
  500. transformers/quantizers/quantizer_eetq.py +14 -62
  501. transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
  502. transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
  503. transformers/quantizers/quantizer_fp_quant.py +48 -78
  504. transformers/quantizers/quantizer_gptq.py +7 -24
  505. transformers/quantizers/quantizer_higgs.py +40 -54
  506. transformers/quantizers/quantizer_hqq.py +144 -153
  507. transformers/quantizers/quantizer_mxfp4.py +13 -167
  508. transformers/quantizers/quantizer_quanto.py +20 -64
  509. transformers/quantizers/quantizer_quark.py +36 -17
  510. transformers/quantizers/quantizer_spqr.py +1 -4
  511. transformers/quantizers/quantizer_torchao.py +23 -202
  512. transformers/quantizers/quantizer_vptq.py +8 -22
  513. transformers/quantizers/quantizers_utils.py +20 -0
  514. transformers/testing_utils.py +297 -36
  515. transformers/tokenization_mistral_common.py +4 -0
  516. transformers/tokenization_utils_base.py +113 -222
  517. transformers/tokenization_utils_tokenizers.py +168 -107
  518. transformers/trainer.py +28 -31
  519. transformers/trainer_jit_checkpoint.py +126 -0
  520. transformers/trainer_utils.py +1 -1
  521. transformers/training_args.py +66 -28
  522. transformers/utils/__init__.py +3 -4
  523. transformers/utils/auto_docstring.py +1 -0
  524. transformers/utils/generic.py +27 -1
  525. transformers/utils/hub.py +5 -15
  526. transformers/utils/import_utils.py +61 -16
  527. transformers/utils/kernel_config.py +4 -2
  528. transformers/utils/loading_report.py +19 -10
  529. transformers/utils/quantization_config.py +75 -242
  530. transformers/video_processing_utils.py +1 -2
  531. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
  532. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
  533. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
  534. transformers/kernels/__init__.py +0 -0
  535. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  536. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  537. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
  538. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
  539. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -76,7 +76,7 @@ GGUF_CONFIG_MAPPING = {
76
76
  "attention.layer_norm_rms_epsilon": "rms_norm_eps",
77
77
  "vocab_size": "vocab_size",
78
78
  },
79
- "qwen2moe": {
79
+ "qwen2_moe": {
80
80
  "context_length": "max_position_embeddings",
81
81
  "block_count": "num_hidden_layers",
82
82
  "feed_forward_length": "intermediate_size",
@@ -313,6 +313,16 @@ GGUF_TOKENIZER_MAPPING = {
313
313
  },
314
314
  }
315
315
 
316
+ # We only need to set here the parameters that default to different values between transformers and llamacpp.
317
+ GGUF_CONFIG_DEFAULTS_MAPPING = {
318
+ "qwen3_moe": {
319
+ # NOTE: Qwen3MoeConfig defaults to false but llama.cpp needs this to be true.
320
+ # See: https://github.com/ggml-org/llama.cpp/blob/17f7f4baad8b3a716ee139da7bb56ae984e8c0fa/src/models/qwen3moe.cpp#L85-L96
321
+ # (the parameter right after LLM_FFN_SILU corresponds to norm_topk_prob)
322
+ "norm_topk_prob": True,
323
+ },
324
+ }
325
+
316
326
 
317
327
  def _gguf_parse_value(_value, data_type):
318
328
  if not isinstance(data_type, list):
@@ -15,17 +15,16 @@
15
15
 
16
16
  from math import sqrt
17
17
 
18
- from ..utils import (
19
- is_flute_available,
20
- is_hadamard_available,
21
- is_torch_available,
22
- )
18
+ from ..quantizers.quantizers_utils import should_convert_module
19
+ from ..utils import is_accelerate_available, is_flute_available, is_hadamard_available, is_torch_available, logging
23
20
 
24
21
 
22
+ if is_accelerate_available():
23
+ from accelerate import init_empty_weights
24
+
25
25
  if is_torch_available():
26
26
  import torch
27
- from torch import nn
28
-
27
+ import torch.nn as nn
29
28
 
30
29
  if is_flute_available():
31
30
  from flute.integrations.higgs import prepare_data_transposed
@@ -34,6 +33,8 @@ if is_flute_available():
34
33
  if is_hadamard_available():
35
34
  from fast_hadamard_transform import hadamard_transform
36
35
 
36
+ logger = logging.get_logger(__name__)
37
+
37
38
 
38
39
  def pad_to_block(tensor, dims, had_block_size, value=0):
39
40
  pad_dims = [0 for _ in range(2 * len(tensor.shape))]
@@ -549,70 +550,47 @@ class HiggsLinear(torch.nn.Module):
549
550
  )
550
551
 
551
552
 
552
- def replace_with_higgs_linear(
553
- model,
554
- quantization_config=None,
555
- current_key_name=None,
556
- has_been_replaced=False,
557
- modules_to_not_convert=None,
558
- ):
553
+ def replace_with_higgs_linear(model, modules_to_not_convert: list[str] | None = None, quantization_config=None):
559
554
  """
560
- Public method that recursively replaces the Linear layers of the given model with HIGGS quantized layers.
561
- `accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the
562
- conversion has been successful or not.
555
+ Public method that replaces the Linear layers of the given model with HIGGS quantized layers.
563
556
 
564
557
  Args:
565
558
  model (`torch.nn.Module`):
566
559
  The model to convert, can be any `torch.nn.Module` instance.
560
+ modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
561
+ A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
562
+ converted.
567
563
  quantization_config (`HiggsConfig`):
568
564
  The quantization config object that contains the quantization parameters.
569
- current_key_name (`list`, *optional*):
570
- A list that contains the current key name. This is used for recursion and should not be passed by the user.
571
- has_been_replaced (`bool`, *optional*):
572
- A boolean that indicates if the conversion has been successful or not. This is used for recursion and
573
- should not be passed by the user.
574
565
  """
575
566
 
576
- from accelerate import init_empty_weights
577
-
578
- for name, module in model.named_children():
579
- if current_key_name is None:
580
- current_key_name = []
581
- current_key_name.append(name)
582
-
583
- if isinstance(module, nn.Linear):
584
- # Check if the current key is not in the `quantization_config.modules_to_not_convert`
585
- current_key_name_str = ".".join(current_key_name)
586
- if not any(current_key_name_str.endswith(key) for key in modules_to_not_convert):
587
- with init_empty_weights():
588
- in_features = module.in_features
589
- out_features = module.out_features
590
-
591
- model._modules[name] = HiggsLinear(
592
- in_features,
593
- out_features,
594
- bias=module.bias is not None,
595
- num_bits=quantization_config.bits,
596
- hadamard_size=quantization_config.hadamard_size,
597
- group_size=quantization_config.group_size,
598
- )
599
- has_been_replaced = True
600
-
601
- # Store the module class in case we need to transpose the weight later
602
- model._modules[name].source_cls = type(module)
603
- # Force requires grad to False to avoid unexpected errors
604
- model._modules[name].requires_grad_(False)
605
- if len(list(module.children())) > 0:
606
- _, has_been_replaced = replace_with_higgs_linear(
607
- module,
608
- quantization_config=quantization_config,
609
- current_key_name=current_key_name,
610
- has_been_replaced=has_been_replaced,
611
- modules_to_not_convert=modules_to_not_convert,
612
- )
613
- # Remove the last key for recursion
614
- current_key_name.pop(-1)
615
- return model, has_been_replaced
567
+ has_been_replaced = False
568
+ # we need this to correctly materialize the weights during quantization
569
+ for module_name, module in model.named_modules():
570
+ if not should_convert_module(module_name, modules_to_not_convert):
571
+ continue
572
+ with init_empty_weights():
573
+ if isinstance(module, nn.Linear):
574
+ new_module = HiggsLinear(
575
+ module.in_features,
576
+ module.out_features,
577
+ bias=module.bias is not None,
578
+ num_bits=quantization_config.bits,
579
+ hadamard_size=quantization_config.hadamard_size,
580
+ group_size=quantization_config.group_size,
581
+ )
582
+ new_module.source_cls = type(module)
583
+ new_module.requires_grad_(False)
584
+ model.set_submodule(module_name, new_module)
585
+ has_been_replaced = True
586
+
587
+ if not has_been_replaced:
588
+ logger.warning(
589
+ "You are loading your model using eetq but no linear modules were found in your model."
590
+ " Please double check your model architecture, or submit an issue on github if you think this is"
591
+ " a bug."
592
+ )
593
+ return model
616
594
 
617
595
 
618
596
  def dequantize_higgs(model, current_key_name=None):
@@ -78,7 +78,7 @@ try:
78
78
  )
79
79
  return lambda func: func
80
80
 
81
- _KERNEL_MAPPING: dict[str, dict[Device | str, LayerRepository]] = {
81
+ _KERNEL_MAPPING: dict[str, dict[Device | str, LayerRepository | dict[Mode, LayerRepository]]] = {
82
82
  "MultiScaleDeformableAttention": {
83
83
  "cuda": LayerRepository(
84
84
  repo_id="kernels-community/deformable-detr",
@@ -111,6 +111,12 @@ try:
111
111
  layer_name="RMSNorm",
112
112
  )
113
113
  },
114
+ "mps": {
115
+ Mode.INFERENCE: LayerRepository(
116
+ repo_id="kernels-community/mlx_rmsnorm",
117
+ layer_name="RMSNorm",
118
+ )
119
+ },
114
120
  "npu": {
115
121
  Mode.INFERENCE: LayerRepository(
116
122
  repo_id="kernels-community/liger_kernels",
@@ -253,6 +259,8 @@ except ImportError:
253
259
 
254
260
  _HUB_KERNEL_MAPPING: dict[str, dict[str, str]] = {
255
261
  "causal-conv1d": {"repo_id": "kernels-community/causal-conv1d"},
262
+ "mamba-ssm": {"repo_id": "kernels-community/mamba-ssm", "revision": "v0.0.4"},
263
+ "falcon_mamba-ssm": {"repo_id": "kernels-community/mamba-ssm", "revision": "v0.0.4"},
256
264
  }
257
265
 
258
266
  _KERNEL_MODULE_MAPPING: dict[str, ModuleType | None] = {}
@@ -328,7 +336,7 @@ def lazy_load_kernel(kernel_name: str, mapping: dict[str, ModuleType | None] = _
328
336
  if kernel_name in mapping and isinstance(mapping[kernel_name], ModuleType):
329
337
  return mapping[kernel_name]
330
338
  if kernel_name not in _HUB_KERNEL_MAPPING:
331
- logger.warning(f"Kernel {kernel_name} not found in _HUB_KERNEL_MAPPING")
339
+ logger.warning_once(f"Kernel {kernel_name} not found in _HUB_KERNEL_MAPPING")
332
340
  mapping[kernel_name] = None
333
341
  return None
334
342
  if _kernels_available:
@@ -336,11 +344,15 @@ def lazy_load_kernel(kernel_name: str, mapping: dict[str, ModuleType | None] = _
336
344
 
337
345
  try:
338
346
  repo_id = _HUB_KERNEL_MAPPING[kernel_name]["repo_id"]
347
+ revision = _HUB_KERNEL_MAPPING[kernel_name].get("revision", None)
339
348
  version = _HUB_KERNEL_MAPPING[kernel_name].get("version", None)
340
- kernel = get_kernel(repo_id, version=version)
349
+ kernel = get_kernel(repo_id, revision=revision, version=version)
341
350
  mapping[kernel_name] = kernel
342
351
  except FileNotFoundError:
343
352
  mapping[kernel_name] = None
353
+ except AssertionError:
354
+ # Happens when torch is built without an accelerator backend; fall back to slow path.
355
+ mapping[kernel_name] = None
344
356
 
345
357
  else:
346
358
  # Try to import is_{kernel_name}_available from ..utils
@@ -369,6 +381,32 @@ def lazy_load_kernel(kernel_name: str, mapping: dict[str, ModuleType | None] = _
369
381
  return mapping[kernel_name]
370
382
 
371
383
 
384
+ def use_kernelized_func(module_names: list[Callable] | Callable):
385
+ """
386
+ This decorator attaches the target function as an attribute of the module.
387
+ The function must already be decorated with @use_kernel_func_from_hub
388
+ this decorator then wraps it as an nn.Module internally.
389
+ When kernelize is later applied to the full model, the function can be accessed as a regular module attribute and kernelized just like any other layer.
390
+ The kernelization is performed in place, modifying the module directly.
391
+ """
392
+ if isinstance(module_names, Callable):
393
+ module_names = [module_names]
394
+
395
+ def decorator(cls):
396
+ orig_init = cls.__init__
397
+
398
+ def new_init(self, *args, **kwargs):
399
+ orig_init(self, *args, **kwargs)
400
+ for fn in module_names:
401
+ # we hardcode the name of the function to "rotary_fn" for now
402
+ setattr(self, "rotary_fn", fn)
403
+
404
+ cls.__init__ = new_init
405
+ return cls
406
+
407
+ return decorator
408
+
409
+
372
410
  __all__ = [
373
411
  "LayerRepository",
374
412
  "use_kernel_forward_from_hub",
@@ -377,4 +415,5 @@ __all__ = [
377
415
  "register_kernel_mapping_transformers",
378
416
  "replace_kernel_forward_from_hub",
379
417
  "lazy_load_kernel",
418
+ "use_kernelized_func",
380
419
  ]
@@ -26,6 +26,7 @@ import re
26
26
  import shutil
27
27
  import sys
28
28
  import tempfile
29
+ import warnings
29
30
  from dataclasses import fields
30
31
  from enum import Enum
31
32
  from pathlib import Path
@@ -1455,6 +1456,10 @@ class NeptuneMissingConfiguration(Exception):
1455
1456
  class NeptuneCallback(TrainerCallback):
1456
1457
  """TrainerCallback that sends the logs to [Neptune](https://app.neptune.ai).
1457
1458
 
1459
+ > [!WARNING]
1460
+ > Neptune integration is deprecated and will be removed in a future version of Transformers. We recommend using
1461
+ > other supported experiment tracking integrations.
1462
+
1458
1463
  Args:
1459
1464
  api_token (`str`, *optional*): Neptune API token obtained upon registration.
1460
1465
  You can leave this argument out if you have saved your token to the `NEPTUNE_API_TOKEN` environment
@@ -1500,6 +1505,11 @@ class NeptuneCallback(TrainerCallback):
1500
1505
  log_checkpoints: str | None = None,
1501
1506
  **neptune_run_kwargs,
1502
1507
  ):
1508
+ warnings.warn(
1509
+ "The NeptuneCallback is deprecated and will be removed in a future version of Transformers. We recommend "
1510
+ "using other supported experiment tracking integrations.",
1511
+ FutureWarning,
1512
+ )
1503
1513
  if not is_neptune_available():
1504
1514
  raise ValueError(
1505
1515
  "NeptuneCallback requires the Neptune client library to be installed. "
@@ -26,10 +26,9 @@ from ..core_model_loading import ConversionOps
26
26
  if is_accelerate_available():
27
27
  from accelerate import init_empty_weights
28
28
 
29
- import re
30
29
  from contextlib import contextmanager
31
30
 
32
- from ..quantizers.quantizers_utils import get_module_from_name
31
+ from ..quantizers.quantizers_utils import get_module_from_name, should_convert_module
33
32
 
34
33
 
35
34
  logger = logging.get_logger(__name__)
@@ -436,15 +435,6 @@ def mlp_forward(self, hidden_states):
436
435
  return routed_out, router_logits
437
436
 
438
437
 
439
- def should_convert_module(current_key_name, patterns):
440
- current_key_name_str = ".".join(current_key_name)
441
- if not any(
442
- re.match(f"{key}\\.", current_key_name_str) or re.match(f"{key}", current_key_name_str) for key in patterns
443
- ):
444
- return True
445
- return False
446
-
447
-
448
438
  def dequantize(module, param_name, param_value, target_device, dq_param_name, **kwargs):
449
439
  from ..integrations.tensor_parallel import shard_and_distribute_module
450
440
 
@@ -604,70 +594,40 @@ def swizzle_mxfp4_convertops(blocks, scales, module, proj, target_device, triton
604
594
  )
605
595
 
606
596
 
607
- def _replace_with_mxfp4_linear(
608
- model,
609
- modules_to_not_convert=None,
610
- current_key_name=None,
611
- quantization_config=None,
612
- has_been_replaced=False,
613
- config=None,
614
- ):
615
- if current_key_name is None:
616
- current_key_name = []
597
+ def replace_with_mxfp4_linear(model, quantization_config=None, modules_to_not_convert: list[str] | None = None):
598
+ """
599
+ Public method that replaces the expert layers of the given model with mxfp4 quantized layers.
600
+
601
+ Args:
602
+ model (`torch.nn.Module`):
603
+ The model to convert, can be any `torch.nn.Module` instance.
604
+ quantization_config (`Mxfp4Config`, defaults to `None`):
605
+ The quantization config object that contains the quantization parameters.
606
+ modules_to_not_convert (`list`, *optional*, defaults to `None`):
607
+ A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
608
+ converted.
609
+ """
610
+ if quantization_config.dequantize:
611
+ return model
612
+
613
+ from kernels import get_kernel
617
614
 
618
- for name, module in model.named_children():
619
- current_key_name.append(name)
620
- if not should_convert_module(current_key_name, modules_to_not_convert):
621
- current_key_name.pop(-1)
615
+ global triton_kernels_hub
616
+ triton_kernels_hub = get_kernel("kernels-community/triton_kernels")
617
+
618
+ has_been_replaced = False
619
+ for module_name, module in model.named_modules():
620
+ if not should_convert_module(module_name, modules_to_not_convert):
622
621
  continue
623
622
  if module.__class__.__name__ == "GptOssExperts" and not quantization_config.dequantize:
624
623
  with init_empty_weights():
625
- model._modules[name] = Mxfp4GptOssExperts(config)
624
+ model.set_submodule(module_name, Mxfp4GptOssExperts(model.config))
626
625
  has_been_replaced = True
627
626
  if module.__class__.__name__ == "GptOssMLP" and not quantization_config.dequantize:
628
627
  from types import MethodType
629
628
 
630
629
  module.forward = MethodType(mlp_forward, module)
631
- if len(list(module.children())) > 0:
632
- _, has_been_replaced = _replace_with_mxfp4_linear(
633
- module,
634
- modules_to_not_convert,
635
- current_key_name,
636
- quantization_config,
637
- has_been_replaced=has_been_replaced,
638
- config=config,
639
- )
640
- current_key_name.pop(-1)
641
- return model, has_been_replaced
642
630
 
643
-
644
- def replace_with_mxfp4_linear(
645
- model,
646
- modules_to_not_convert=None,
647
- current_key_name=None,
648
- quantization_config=None,
649
- config=None,
650
- ):
651
- if quantization_config.dequantize:
652
- return model
653
- else:
654
- from kernels import get_kernel
655
-
656
- global triton_kernels_hub
657
- triton_kernels_hub = get_kernel("kernels-community/triton_kernels")
658
-
659
- modules_to_not_convert = ["lm_head"] if modules_to_not_convert is None else modules_to_not_convert
660
-
661
- if quantization_config.modules_to_not_convert is not None:
662
- modules_to_not_convert.extend(quantization_config.modules_to_not_convert)
663
- modules_to_not_convert = list(set(modules_to_not_convert))
664
- model, has_been_replaced = _replace_with_mxfp4_linear(
665
- model,
666
- modules_to_not_convert,
667
- current_key_name,
668
- quantization_config,
669
- config=config,
670
- )
671
631
  if not has_been_replaced:
672
632
  logger.warning(
673
633
  "You are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model."
@@ -17,6 +17,7 @@ import json
17
17
  import os
18
18
  from typing import Any, Literal
19
19
 
20
+ from ..conversion_mapping import get_model_conversion_mapping
20
21
  from ..core_model_loading import WeightRenaming, rename_source_key
21
22
  from ..utils import (
22
23
  CONFIG_NAME,
@@ -46,26 +47,6 @@ MIN_PEFT_VERSION = "0.18.0"
46
47
  logger = logging.get_logger(__name__)
47
48
 
48
49
 
49
- # DO NOT MODIFY, KEPT FOR BC ONLY
50
- VLMS = [
51
- "aria",
52
- "ayavision",
53
- "emu3",
54
- "fuyu",
55
- "gotocr2",
56
- "gemma3",
57
- "internvl",
58
- "llava", # all llava prefixed models fall under this check
59
- "mistral3",
60
- "mllama",
61
- "paligemma",
62
- "qwen2vl",
63
- "qwen2_5_vl",
64
- "videollava",
65
- "vipllava",
66
- ]
67
-
68
-
69
50
  class PeftAdapterMixin:
70
51
  """
71
52
  A class containing all functions for loading and using adapters weights that are supported in PEFT library. For
@@ -211,11 +192,10 @@ class PeftAdapterMixin:
211
192
  if any(conf.peft_type != PeftType.LORA for conf in self.peft_config.values()):
212
193
  raise ValueError("Hotswapping is currently only supported for LoRA, please set `hotswap=False`.")
213
194
 
195
+ key_mapping = adapter_kwargs.pop("key_mapping", None) if adapter_kwargs is not None else None
196
+ weight_conversions = get_model_conversion_mapping(self, key_mapping=key_mapping)
214
197
  # peft only supports low_cpu_mem_usage starting from v0.13.0
215
198
  peft_load_kwargs = {}
216
- key_mapping = adapter_kwargs.pop("key_mapping", None) if adapter_kwargs is not None else None
217
- if key_mapping is None and any(allowed_name in self.__class__.__name__.lower() for allowed_name in VLMS):
218
- key_mapping = self._checkpoint_conversion_mapping
219
199
  peft_load_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
220
200
 
221
201
  adapter_name = adapter_name if adapter_name is not None else "default"
@@ -279,9 +259,6 @@ class PeftAdapterMixin:
279
259
  )
280
260
  peft_config.inference_mode = not is_trainable
281
261
 
282
- if peft_config.peft_type != PeftType.LORA:
283
- raise ValueError("Hotswapping is currently only supported for LoRA, please set `hotswap=False`.")
284
-
285
262
  if not hotswap:
286
263
  # TODO: WE NEED TOO APPLY OUR DYNAMIC WEIGHT CONVERSION AT SOME POINT HERE!
287
264
  # Create and add fresh new adapters into the model, unless the weights are hotswapped
@@ -295,17 +272,18 @@ class PeftAdapterMixin:
295
272
 
296
273
  # We need to pre-process the state dict to remove unneeded prefixes - for backward compatibility
297
274
  renamings = []
298
- if key_mapping:
299
- renamings = [entry for entry in key_mapping if isinstance(entry, WeightRenaming)]
275
+ if weight_conversions:
276
+ renamings = [entry for entry in weight_conversions if isinstance(entry, WeightRenaming)]
300
277
  processed_adapter_state_dict = {}
301
278
  prefix = "base_model.model."
279
+ state_dict = self.state_dict()
302
280
  for key, value in adapter_state_dict.items():
303
281
  if key.startswith(prefix):
304
282
  new_key = key[len(prefix) :]
305
283
  else:
306
284
  new_key = key
307
285
 
308
- new_key = rename_source_key(new_key, renamings, [])[0]
286
+ new_key = rename_source_key(new_key, renamings, [], self.base_model_prefix, state_dict)[0]
309
287
 
310
288
  # For hotswapping, we need the adapter name to be present in the state dict keys
311
289
  if hotswap:
@@ -12,21 +12,53 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from ..utils import is_optimum_quanto_available, is_torch_available, logging
15
+ from ..core_model_loading import ConversionOps
16
+ from ..quantizers.quantizers_utils import get_module_from_name, should_convert_module
17
+ from ..utils import is_torch_available, logging
16
18
 
17
19
 
18
20
  if is_torch_available():
19
21
  import torch
22
+ import torch.nn as nn
20
23
 
21
24
  logger = logging.get_logger(__name__)
22
25
 
23
26
 
27
+ class QuantoQuantize(ConversionOps):
28
+ def __init__(self, hf_quantizer):
29
+ self.hf_quantizer = hf_quantizer
30
+
31
+ def convert(
32
+ self,
33
+ input_dict: dict[str, list[torch.Tensor]],
34
+ model: torch.nn.Module | None = None,
35
+ full_layer_name: str | None = None,
36
+ missing_keys: list[str] | None = None,
37
+ **kwargs,
38
+ ) -> dict[str, torch.Tensor]:
39
+ _, value = tuple(input_dict.items())[0]
40
+ value = value[0]
41
+
42
+ from ..modeling_utils import _load_parameter_into_model
43
+
44
+ _load_parameter_into_model(model, full_layer_name, value)
45
+ module, _ = get_module_from_name(model, full_layer_name)
46
+ module.freeze()
47
+ module.weight.requires_grad = False
48
+ module._is_hf_initialized = True
49
+
50
+ # need to discard some missing keys we already updated the module in freeze.
51
+ module_name = full_layer_name.rsplit(".", 1)[0]
52
+ missing_keys.discard(f"{module_name}.weight")
53
+ missing_keys.discard(f"{module_name}.input_scale")
54
+ missing_keys.discard(f"{module_name}.output_scale")
55
+ return {}
56
+
57
+
24
58
  def replace_with_quanto_layers(
25
59
  model,
26
60
  quantization_config=None,
27
- modules_to_not_convert=None,
28
- current_key_name=None,
29
- has_been_replaced=False,
61
+ modules_to_not_convert: list[str] | None = None,
30
62
  ):
31
63
  """
32
64
  Public method that recursively replaces the Linear layers of the given model with Quanto quantized layers.
@@ -35,64 +67,50 @@ def replace_with_quanto_layers(
35
67
  Args:
36
68
  model (`torch.nn.Module`):
37
69
  The model to convert, can be any `torch.nn.Module` instance.
38
- quantization_config (`AqlmConfig`, defaults to `None`):
70
+ quantization_config (`QuantoConfig`, defaults to `None`):
39
71
  The quantization config object that contains the quantization parameters.
40
72
  modules_to_not_convert (`list`, *optional*, defaults to `None`):
41
73
  A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
42
74
  converted.
43
- current_key_name (`list`, *optional*, defaults to `None`):
44
- A list that contains the current key name. This is used for recursion and should not be passed by the user.
45
- has_been_replaced (`bool`, *optional*, defaults to `None`):
46
- A boolean that indicates if the conversion has been successful or not. This is used for recursion and
47
- should not be passed by the user.
48
75
  """
49
76
  from accelerate import init_empty_weights
50
-
51
- if is_optimum_quanto_available():
52
- from optimum.quanto import QLayerNorm, QLinear, qfloat8, qint2, qint4, qint8
77
+ from optimum.quanto import QLayerNorm, QLinear, qfloat8, qint2, qint4, qint8
53
78
 
54
79
  w_mapping = {"float8": qfloat8, "int8": qint8, "int4": qint4, "int2": qint2}
55
80
  a_mapping = {None: None, "float8": qfloat8, "int8": qint8}
56
81
 
57
- if modules_to_not_convert is None:
58
- modules_to_not_convert = []
59
-
60
- for name, module in model.named_children():
61
- if current_key_name is None:
62
- current_key_name = []
63
- current_key_name.append(name)
64
-
65
- if not any(key in ".".join(current_key_name) for key in modules_to_not_convert):
66
- with init_empty_weights():
67
- if isinstance(module, torch.nn.Linear):
68
- model._modules[name] = QLinear(
69
- in_features=module.in_features,
70
- out_features=module.out_features,
71
- bias=module.bias is not None,
72
- dtype=module.weight.dtype,
73
- weights=w_mapping[quantization_config.weights],
74
- activations=a_mapping[quantization_config.activations],
75
- )
76
- model._modules[name].requires_grad_(False)
77
- has_been_replaced = True
78
- elif isinstance(module, torch.nn.LayerNorm):
79
- if quantization_config.activations is not None:
80
- model._modules[name] = QLayerNorm(
81
- module.normalized_shape,
82
- module.eps,
83
- module.elementwise_affine,
84
- module.bias is not None,
85
- activations=a_mapping[quantization_config.activations],
86
- )
87
- has_been_replaced = True
88
- if len(list(module.children())) > 0:
89
- _, has_been_replaced = replace_with_quanto_layers(
90
- module,
91
- quantization_config=quantization_config,
92
- modules_to_not_convert=modules_to_not_convert,
93
- current_key_name=current_key_name,
94
- has_been_replaced=has_been_replaced,
95
- )
96
- # Remove the last key for recursion
97
- current_key_name.pop(-1)
98
- return model, has_been_replaced
82
+ has_been_replaced = False
83
+ for module_name, module in model.named_modules():
84
+ if not should_convert_module(module_name, modules_to_not_convert):
85
+ continue
86
+ with init_empty_weights():
87
+ new_module = None
88
+ if isinstance(module, nn.Linear):
89
+ new_module = QLinear(
90
+ in_features=module.in_features,
91
+ out_features=module.out_features,
92
+ bias=module.bias is not None,
93
+ dtype=module.weight.dtype,
94
+ weights=w_mapping[quantization_config.weights],
95
+ activations=a_mapping[quantization_config.activations],
96
+ )
97
+ elif isinstance(module, torch.nn.LayerNorm) and quantization_config.activations is not None:
98
+ new_module = QLayerNorm(
99
+ module.normalized_shape,
100
+ module.eps,
101
+ module.elementwise_affine,
102
+ module.bias is not None,
103
+ activations=a_mapping[quantization_config.activations],
104
+ )
105
+ if new_module is not None:
106
+ has_been_replaced = True
107
+ model.set_submodule(module_name, new_module)
108
+
109
+ if not has_been_replaced:
110
+ logger.warning(
111
+ "You are loading your model using quanto but no linear modules were found in your model."
112
+ " Please double check your model architecture, or submit an issue on github if you think this is"
113
+ " a bug."
114
+ )
115
+
116
+ return model