transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. transformers/__init__.py +30 -3
  2. transformers/cli/serve.py +47 -17
  3. transformers/conversion_mapping.py +15 -2
  4. transformers/convert_slow_tokenizer.py +225 -10
  5. transformers/core_model_loading.py +196 -135
  6. transformers/data/data_collator.py +12 -4
  7. transformers/dependency_versions_table.py +1 -2
  8. transformers/dynamic_module_utils.py +1 -2
  9. transformers/feature_extraction_utils.py +1 -2
  10. transformers/file_utils.py +0 -1
  11. transformers/generation/__init__.py +11 -1
  12. transformers/generation/configuration_utils.py +3 -2
  13. transformers/generation/continuous_batching/__init__.py +4 -0
  14. transformers/generation/continuous_batching/continuous_api.py +134 -79
  15. transformers/image_processing_base.py +1 -2
  16. transformers/integrations/__init__.py +4 -2
  17. transformers/integrations/accelerate.py +15 -3
  18. transformers/integrations/aqlm.py +38 -66
  19. transformers/integrations/awq.py +48 -514
  20. transformers/integrations/bitnet.py +45 -100
  21. transformers/integrations/bitsandbytes.py +79 -191
  22. transformers/integrations/deepspeed.py +1 -0
  23. transformers/integrations/eetq.py +84 -79
  24. transformers/integrations/fbgemm_fp8.py +191 -145
  25. transformers/integrations/finegrained_fp8.py +236 -193
  26. transformers/integrations/fp_quant.py +92 -0
  27. transformers/integrations/ggml.py +11 -1
  28. transformers/integrations/higgs.py +40 -62
  29. transformers/integrations/hub_kernels.py +42 -3
  30. transformers/integrations/integration_utils.py +10 -0
  31. transformers/integrations/mxfp4.py +25 -65
  32. transformers/integrations/peft.py +7 -29
  33. transformers/integrations/quanto.py +73 -55
  34. transformers/integrations/quark.py +55 -0
  35. transformers/integrations/spqr.py +44 -90
  36. transformers/integrations/torchao.py +32 -38
  37. transformers/integrations/vptq.py +42 -59
  38. transformers/modelcard.py +1 -2
  39. transformers/modeling_gguf_pytorch_utils.py +8 -0
  40. transformers/modeling_rope_utils.py +30 -6
  41. transformers/modeling_utils.py +116 -112
  42. transformers/models/__init__.py +3 -0
  43. transformers/models/afmoe/modeling_afmoe.py +4 -4
  44. transformers/models/albert/tokenization_albert.py +6 -12
  45. transformers/models/align/modeling_align.py +2 -0
  46. transformers/models/altclip/modeling_altclip.py +4 -0
  47. transformers/models/apertus/modeling_apertus.py +4 -4
  48. transformers/models/arcee/modeling_arcee.py +4 -4
  49. transformers/models/aria/modeling_aria.py +4 -4
  50. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  51. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  52. transformers/models/auto/configuration_auto.py +11 -0
  53. transformers/models/auto/feature_extraction_auto.py +2 -0
  54. transformers/models/auto/image_processing_auto.py +1 -0
  55. transformers/models/auto/modeling_auto.py +6 -0
  56. transformers/models/auto/processing_auto.py +18 -10
  57. transformers/models/auto/tokenization_auto.py +74 -472
  58. transformers/models/autoformer/modeling_autoformer.py +4 -0
  59. transformers/models/bamba/modeling_bamba.py +4 -3
  60. transformers/models/bark/modeling_bark.py +2 -0
  61. transformers/models/bart/modeling_bart.py +7 -0
  62. transformers/models/barthez/tokenization_barthez.py +5 -10
  63. transformers/models/beit/modeling_beit.py +6 -1
  64. transformers/models/bert/tokenization_bert.py +8 -21
  65. transformers/models/big_bird/modeling_big_bird.py +6 -0
  66. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  67. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
  68. transformers/models/biogpt/modeling_biogpt.py +2 -0
  69. transformers/models/biogpt/modular_biogpt.py +2 -0
  70. transformers/models/bit/modeling_bit.py +11 -2
  71. transformers/models/bitnet/modeling_bitnet.py +4 -4
  72. transformers/models/blenderbot/modeling_blenderbot.py +5 -0
  73. transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
  74. transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
  75. transformers/models/blip/modeling_blip_text.py +2 -0
  76. transformers/models/blip_2/modeling_blip_2.py +2 -1
  77. transformers/models/bloom/modeling_bloom.py +4 -0
  78. transformers/models/blt/modeling_blt.py +2 -2
  79. transformers/models/blt/modular_blt.py +2 -2
  80. transformers/models/bridgetower/modeling_bridgetower.py +5 -1
  81. transformers/models/bros/modeling_bros.py +4 -0
  82. transformers/models/camembert/tokenization_camembert.py +8 -12
  83. transformers/models/canine/modeling_canine.py +5 -0
  84. transformers/models/chameleon/modeling_chameleon.py +2 -1
  85. transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
  86. transformers/models/clap/modeling_clap.py +5 -0
  87. transformers/models/clip/tokenization_clip.py +22 -44
  88. transformers/models/clipseg/modeling_clipseg.py +5 -0
  89. transformers/models/clvp/modeling_clvp.py +5 -0
  90. transformers/models/clvp/tokenization_clvp.py +1 -63
  91. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  92. transformers/models/codegen/tokenization_codegen.py +14 -43
  93. transformers/models/cohere/modeling_cohere.py +4 -3
  94. transformers/models/cohere/modular_cohere.py +2 -1
  95. transformers/models/cohere/tokenization_cohere.py +12 -42
  96. transformers/models/cohere2/modeling_cohere2.py +7 -6
  97. transformers/models/cohere2/modular_cohere2.py +5 -5
  98. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
  99. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  100. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  101. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  102. transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
  103. transformers/models/convbert/modeling_convbert.py +6 -0
  104. transformers/models/convnext/modeling_convnext.py +2 -4
  105. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  106. transformers/models/csm/modeling_csm.py +4 -3
  107. transformers/models/ctrl/modeling_ctrl.py +1 -0
  108. transformers/models/cvt/modeling_cvt.py +2 -0
  109. transformers/models/cwm/modeling_cwm.py +4 -4
  110. transformers/models/d_fine/modeling_d_fine.py +2 -0
  111. transformers/models/d_fine/modular_d_fine.py +1 -0
  112. transformers/models/dab_detr/modeling_dab_detr.py +4 -0
  113. transformers/models/dac/modeling_dac.py +2 -2
  114. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  115. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  116. transformers/models/dbrx/modeling_dbrx.py +2 -2
  117. transformers/models/deberta/modeling_deberta.py +5 -0
  118. transformers/models/deberta/tokenization_deberta.py +11 -20
  119. transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
  120. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  121. transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
  122. transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
  123. transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
  124. transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
  125. transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
  126. transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
  127. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  128. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  129. transformers/models/detr/modeling_detr.py +5 -0
  130. transformers/models/dia/modeling_dia.py +4 -3
  131. transformers/models/dia/modular_dia.py +0 -1
  132. transformers/models/diffllama/modeling_diffllama.py +2 -2
  133. transformers/models/dinat/modeling_dinat.py +3 -0
  134. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  135. transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
  136. transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
  137. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  138. transformers/models/doge/modeling_doge.py +2 -3
  139. transformers/models/doge/modular_doge.py +0 -1
  140. transformers/models/donut/modeling_donut_swin.py +2 -0
  141. transformers/models/dots1/modeling_dots1.py +10 -7
  142. transformers/models/dots1/modular_dots1.py +5 -3
  143. transformers/models/dpr/modeling_dpr.py +5 -0
  144. transformers/models/dpr/tokenization_dpr.py +12 -0
  145. transformers/models/edgetam/modeling_edgetam.py +1 -1
  146. transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
  147. transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
  148. transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
  149. transformers/models/efficientnet/modeling_efficientnet.py +2 -0
  150. transformers/models/emu3/modeling_emu3.py +4 -4
  151. transformers/models/eomt/image_processing_eomt.py +13 -1
  152. transformers/models/eomt/image_processing_eomt_fast.py +14 -2
  153. transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
  154. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  155. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
  156. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
  157. transformers/models/esm/modeling_esmfold.py +5 -4
  158. transformers/models/evolla/modeling_evolla.py +4 -4
  159. transformers/models/exaone4/modeling_exaone4.py +2 -2
  160. transformers/models/exaone4/modular_exaone4.py +0 -1
  161. transformers/models/falcon/modeling_falcon.py +6 -1
  162. transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
  163. transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
  164. transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
  165. transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
  166. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  167. transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
  168. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  169. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
  170. transformers/models/flaubert/modeling_flaubert.py +7 -0
  171. transformers/models/flava/modeling_flava.py +6 -1
  172. transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
  173. transformers/models/florence2/modeling_florence2.py +2 -1
  174. transformers/models/florence2/modular_florence2.py +2 -1
  175. transformers/models/fnet/modeling_fnet.py +7 -0
  176. transformers/models/focalnet/modeling_focalnet.py +4 -0
  177. transformers/models/fsmt/modeling_fsmt.py +2 -0
  178. transformers/models/funnel/modeling_funnel.py +8 -0
  179. transformers/models/funnel/tokenization_funnel.py +17 -24
  180. transformers/models/fuyu/processing_fuyu.py +3 -3
  181. transformers/models/gemma/modeling_gemma.py +4 -4
  182. transformers/models/gemma/tokenization_gemma.py +10 -27
  183. transformers/models/gemma2/modeling_gemma2.py +4 -4
  184. transformers/models/gemma2/modular_gemma2.py +2 -1
  185. transformers/models/gemma3/modeling_gemma3.py +14 -84
  186. transformers/models/gemma3/modular_gemma3.py +12 -81
  187. transformers/models/gemma3n/modeling_gemma3n.py +18 -209
  188. transformers/models/gemma3n/modular_gemma3n.py +17 -59
  189. transformers/models/git/modeling_git.py +2 -0
  190. transformers/models/glm/modeling_glm.py +4 -4
  191. transformers/models/glm4/modeling_glm4.py +4 -4
  192. transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
  193. transformers/models/glm4v/configuration_glm4v.py +3 -1
  194. transformers/models/glm4v/modeling_glm4v.py +3 -3
  195. transformers/models/glm4v/modular_glm4v.py +6 -4
  196. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  197. transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
  198. transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
  199. transformers/models/glpn/modeling_glpn.py +2 -0
  200. transformers/models/gpt2/modeling_gpt2.py +5 -1
  201. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  202. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
  203. transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
  204. transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
  205. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  206. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  207. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
  208. transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
  209. transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
  210. transformers/models/gptj/modeling_gptj.py +3 -0
  211. transformers/models/granite/modeling_granite.py +4 -4
  212. transformers/models/granitemoe/modeling_granitemoe.py +4 -6
  213. transformers/models/granitemoe/modular_granitemoe.py +0 -2
  214. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
  215. transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
  216. transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
  217. transformers/models/groupvit/modeling_groupvit.py +3 -0
  218. transformers/models/helium/modeling_helium.py +4 -3
  219. transformers/models/herbert/tokenization_herbert.py +9 -25
  220. transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
  221. transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
  222. transformers/models/hiera/modeling_hiera.py +4 -0
  223. transformers/models/hubert/modeling_hubert.py +3 -0
  224. transformers/models/hubert/modular_hubert.py +1 -0
  225. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
  226. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
  227. transformers/models/ibert/modeling_ibert.py +6 -0
  228. transformers/models/idefics/modeling_idefics.py +5 -21
  229. transformers/models/imagegpt/modeling_imagegpt.py +2 -1
  230. transformers/models/informer/modeling_informer.py +4 -0
  231. transformers/models/informer/modular_informer.py +1 -0
  232. transformers/models/internvl/modeling_internvl.py +2 -4
  233. transformers/models/internvl/modular_internvl.py +2 -4
  234. transformers/models/jamba/modeling_jamba.py +2 -2
  235. transformers/models/janus/modeling_janus.py +1 -0
  236. transformers/models/janus/modular_janus.py +1 -0
  237. transformers/models/jetmoe/modeling_jetmoe.py +2 -2
  238. transformers/models/kosmos2/modeling_kosmos2.py +1 -0
  239. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
  240. transformers/models/lasr/__init__.py +29 -0
  241. transformers/models/lasr/configuration_lasr.py +244 -0
  242. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  243. transformers/models/lasr/modeling_lasr.py +729 -0
  244. transformers/models/lasr/modular_lasr.py +569 -0
  245. transformers/models/lasr/processing_lasr.py +96 -0
  246. transformers/models/lasr/tokenization_lasr.py +186 -0
  247. transformers/models/layoutlm/modeling_layoutlm.py +5 -0
  248. transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
  249. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
  250. transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
  251. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  252. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  253. transformers/models/led/modeling_led.py +6 -0
  254. transformers/models/levit/modeling_levit.py +3 -0
  255. transformers/models/lfm2/modeling_lfm2.py +4 -5
  256. transformers/models/lfm2/modular_lfm2.py +0 -1
  257. transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
  258. transformers/models/lightglue/modeling_lightglue.py +3 -1
  259. transformers/models/lightglue/modular_lightglue.py +1 -0
  260. transformers/models/lilt/modeling_lilt.py +4 -0
  261. transformers/models/llama/modeling_llama.py +4 -4
  262. transformers/models/llama/tokenization_llama.py +15 -43
  263. transformers/models/llama4/modeling_llama4.py +3 -2
  264. transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
  265. transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
  266. transformers/models/longformer/modeling_longformer.py +6 -0
  267. transformers/models/longt5/modeling_longt5.py +4 -0
  268. transformers/models/luke/modeling_luke.py +9 -0
  269. transformers/models/luke/tokenization_luke.py +11 -38
  270. transformers/models/lxmert/modeling_lxmert.py +2 -0
  271. transformers/models/m2m_100/modeling_m2m_100.py +4 -0
  272. transformers/models/mamba/modeling_mamba.py +14 -22
  273. transformers/models/marian/modeling_marian.py +5 -0
  274. transformers/models/markuplm/modeling_markuplm.py +4 -0
  275. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  276. transformers/models/mask2former/modeling_mask2former.py +2 -0
  277. transformers/models/maskformer/modeling_maskformer.py +2 -0
  278. transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
  279. transformers/models/mbart/modeling_mbart.py +7 -0
  280. transformers/models/mbart/tokenization_mbart.py +11 -52
  281. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  282. transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
  283. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  284. transformers/models/mimi/modeling_mimi.py +3 -1
  285. transformers/models/minimax/modeling_minimax.py +4 -4
  286. transformers/models/ministral/modeling_ministral.py +4 -4
  287. transformers/models/ministral3/configuration_ministral3.py +1 -1
  288. transformers/models/ministral3/modeling_ministral3.py +4 -3
  289. transformers/models/mistral/modeling_mistral.py +4 -3
  290. transformers/models/mixtral/modeling_mixtral.py +4 -4
  291. transformers/models/mllama/modeling_mllama.py +2 -2
  292. transformers/models/mluke/tokenization_mluke.py +6 -6
  293. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
  294. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  295. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  296. transformers/models/mobilevit/modeling_mobilevit.py +3 -0
  297. transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
  298. transformers/models/modernbert/modeling_modernbert.py +4 -1
  299. transformers/models/modernbert/modular_modernbert.py +2 -0
  300. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
  301. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
  302. transformers/models/moonshine/modeling_moonshine.py +4 -2
  303. transformers/models/moshi/modeling_moshi.py +5 -2
  304. transformers/models/mpnet/modeling_mpnet.py +5 -0
  305. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  306. transformers/models/mpt/modeling_mpt.py +2 -0
  307. transformers/models/mra/modeling_mra.py +6 -0
  308. transformers/models/mt5/modeling_mt5.py +7 -0
  309. transformers/models/musicgen/modeling_musicgen.py +2 -0
  310. transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
  311. transformers/models/mvp/modeling_mvp.py +7 -0
  312. transformers/models/nanochat/modeling_nanochat.py +4 -4
  313. transformers/models/nemotron/modeling_nemotron.py +4 -2
  314. transformers/models/nllb/tokenization_nllb.py +8 -22
  315. transformers/models/nougat/tokenization_nougat.py +11 -59
  316. transformers/models/nystromformer/modeling_nystromformer.py +6 -0
  317. transformers/models/olmo/modeling_olmo.py +4 -4
  318. transformers/models/olmo/modular_olmo.py +2 -2
  319. transformers/models/olmo2/modeling_olmo2.py +4 -5
  320. transformers/models/olmo2/modular_olmo2.py +0 -1
  321. transformers/models/olmo3/modeling_olmo3.py +4 -4
  322. transformers/models/olmoe/modeling_olmoe.py +4 -4
  323. transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
  324. transformers/models/oneformer/modeling_oneformer.py +4 -1
  325. transformers/models/openai/modeling_openai.py +3 -0
  326. transformers/models/openai/tokenization_openai.py +10 -46
  327. transformers/models/opt/modeling_opt.py +2 -0
  328. transformers/models/owlv2/modeling_owlv2.py +4 -0
  329. transformers/models/owlvit/modeling_owlvit.py +4 -0
  330. transformers/models/paddleocr_vl/__init__.py +32 -0
  331. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  332. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
  333. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  334. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
  335. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
  336. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  337. transformers/models/parakeet/configuration_parakeet.py +4 -6
  338. transformers/models/parakeet/modeling_parakeet.py +9 -6
  339. transformers/models/parakeet/modular_parakeet.py +2 -2
  340. transformers/models/parakeet/processing_parakeet.py +1 -0
  341. transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
  342. transformers/models/patchtst/modeling_patchtst.py +20 -2
  343. transformers/models/pegasus/modeling_pegasus.py +5 -0
  344. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  345. transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
  346. transformers/models/perceiver/modeling_perceiver.py +8 -0
  347. transformers/models/persimmon/modeling_persimmon.py +2 -1
  348. transformers/models/phi/modeling_phi.py +4 -5
  349. transformers/models/phi/modular_phi.py +0 -1
  350. transformers/models/phi3/modeling_phi3.py +2 -1
  351. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
  352. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
  353. transformers/models/phimoe/modeling_phimoe.py +4 -4
  354. transformers/models/phimoe/modular_phimoe.py +2 -2
  355. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  356. transformers/models/pixtral/modeling_pixtral.py +2 -1
  357. transformers/models/plbart/modeling_plbart.py +6 -0
  358. transformers/models/plbart/modular_plbart.py +2 -0
  359. transformers/models/plbart/tokenization_plbart.py +0 -2
  360. transformers/models/poolformer/modeling_poolformer.py +2 -0
  361. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  362. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  363. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  364. transformers/models/prophetnet/modeling_prophetnet.py +3 -0
  365. transformers/models/pvt/modeling_pvt.py +2 -0
  366. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  367. transformers/models/qwen2/modeling_qwen2.py +4 -4
  368. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  369. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  370. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
  371. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
  372. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  373. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
  374. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
  375. transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
  376. transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
  377. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  378. transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
  379. transformers/models/qwen3/modeling_qwen3.py +4 -4
  380. transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
  381. transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
  382. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
  383. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
  384. transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
  385. transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
  386. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
  387. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
  388. transformers/models/rag/modeling_rag.py +1 -0
  389. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
  390. transformers/models/reformer/modeling_reformer.py +4 -0
  391. transformers/models/reformer/tokenization_reformer.py +11 -28
  392. transformers/models/regnet/modeling_regnet.py +6 -1
  393. transformers/models/rembert/modeling_rembert.py +6 -0
  394. transformers/models/rembert/tokenization_rembert.py +3 -10
  395. transformers/models/resnet/modeling_resnet.py +11 -2
  396. transformers/models/roberta/tokenization_roberta.py +18 -27
  397. transformers/models/roformer/modeling_roformer.py +6 -0
  398. transformers/models/roformer/tokenization_roformer.py +77 -412
  399. transformers/models/rt_detr/modeling_rt_detr.py +2 -0
  400. transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
  401. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
  402. transformers/models/rwkv/modeling_rwkv.py +1 -0
  403. transformers/models/sam2/modeling_sam2.py +2 -2
  404. transformers/models/sam2/modular_sam2.py +2 -2
  405. transformers/models/sam2_video/modeling_sam2_video.py +1 -0
  406. transformers/models/sam2_video/modular_sam2_video.py +1 -0
  407. transformers/models/sam3/modeling_sam3.py +77 -80
  408. transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
  409. transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
  410. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
  411. transformers/models/sam3_video/modeling_sam3_video.py +1 -0
  412. transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
  413. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  414. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
  415. transformers/models/seed_oss/modeling_seed_oss.py +2 -2
  416. transformers/models/segformer/modeling_segformer.py +4 -1
  417. transformers/models/seggpt/modeling_seggpt.py +2 -0
  418. transformers/models/sew/modeling_sew.py +3 -0
  419. transformers/models/sew/modular_sew.py +1 -0
  420. transformers/models/sew_d/modeling_sew_d.py +3 -0
  421. transformers/models/siglip2/modeling_siglip2.py +4 -0
  422. transformers/models/siglip2/modular_siglip2.py +4 -0
  423. transformers/models/smollm3/modeling_smollm3.py +4 -4
  424. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  425. transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
  426. transformers/models/speecht5/modeling_speecht5.py +13 -1
  427. transformers/models/splinter/modeling_splinter.py +3 -0
  428. transformers/models/splinter/tokenization_splinter.py +9 -28
  429. transformers/models/squeezebert/modeling_squeezebert.py +6 -0
  430. transformers/models/stablelm/modeling_stablelm.py +3 -1
  431. transformers/models/starcoder2/modeling_starcoder2.py +4 -3
  432. transformers/models/superglue/modeling_superglue.py +1 -0
  433. transformers/models/superpoint/modeling_superpoint.py +1 -0
  434. transformers/models/swiftformer/modeling_swiftformer.py +2 -0
  435. transformers/models/swin/modeling_swin.py +4 -0
  436. transformers/models/swin2sr/modeling_swin2sr.py +2 -0
  437. transformers/models/swinv2/modeling_swinv2.py +4 -0
  438. transformers/models/t5/modeling_t5.py +7 -0
  439. transformers/models/t5/tokenization_t5.py +4 -8
  440. transformers/models/t5gemma/modeling_t5gemma.py +5 -5
  441. transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
  442. transformers/models/table_transformer/modeling_table_transformer.py +4 -0
  443. transformers/models/tapas/modeling_tapas.py +3 -0
  444. transformers/models/textnet/modeling_textnet.py +11 -2
  445. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  446. transformers/models/timesfm/modeling_timesfm.py +2 -0
  447. transformers/models/timesfm/modular_timesfm.py +2 -0
  448. transformers/models/timesformer/modeling_timesformer.py +2 -0
  449. transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
  450. transformers/models/trocr/modeling_trocr.py +2 -0
  451. transformers/models/tvp/modeling_tvp.py +2 -0
  452. transformers/models/udop/modeling_udop.py +4 -0
  453. transformers/models/udop/tokenization_udop.py +5 -13
  454. transformers/models/umt5/modeling_umt5.py +7 -0
  455. transformers/models/unispeech/modeling_unispeech.py +4 -0
  456. transformers/models/unispeech/modular_unispeech.py +2 -0
  457. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  458. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  459. transformers/models/univnet/modeling_univnet.py +1 -0
  460. transformers/models/upernet/modeling_upernet.py +1 -0
  461. transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
  462. transformers/models/vilt/modeling_vilt.py +6 -0
  463. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  464. transformers/models/visual_bert/modeling_visual_bert.py +6 -0
  465. transformers/models/vitdet/modeling_vitdet.py +2 -0
  466. transformers/models/vitmatte/modeling_vitmatte.py +1 -0
  467. transformers/models/vits/modeling_vits.py +1 -0
  468. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  469. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  470. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
  471. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
  472. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
  473. transformers/models/wavlm/modeling_wavlm.py +5 -0
  474. transformers/models/whisper/modeling_whisper.py +6 -0
  475. transformers/models/whisper/tokenization_whisper.py +4 -15
  476. transformers/models/x_clip/modeling_x_clip.py +3 -0
  477. transformers/models/xglm/modeling_xglm.py +1 -0
  478. transformers/models/xglm/tokenization_xglm.py +4 -9
  479. transformers/models/xlm/modeling_xlm.py +5 -0
  480. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  481. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  482. transformers/models/yoso/modeling_yoso.py +6 -0
  483. transformers/models/zamba/modeling_zamba.py +2 -0
  484. transformers/models/zamba2/modeling_zamba2.py +4 -2
  485. transformers/models/zamba2/modular_zamba2.py +1 -1
  486. transformers/models/zoedepth/modeling_zoedepth.py +1 -0
  487. transformers/pipelines/__init__.py +2 -3
  488. transformers/pipelines/base.py +1 -9
  489. transformers/pipelines/document_question_answering.py +3 -1
  490. transformers/pipelines/text_generation.py +1 -1
  491. transformers/processing_utils.py +23 -11
  492. transformers/quantizers/base.py +35 -110
  493. transformers/quantizers/quantizer_aqlm.py +1 -5
  494. transformers/quantizers/quantizer_auto_round.py +1 -2
  495. transformers/quantizers/quantizer_awq.py +17 -81
  496. transformers/quantizers/quantizer_bitnet.py +3 -8
  497. transformers/quantizers/quantizer_bnb_4bit.py +13 -110
  498. transformers/quantizers/quantizer_bnb_8bit.py +16 -92
  499. transformers/quantizers/quantizer_compressed_tensors.py +1 -5
  500. transformers/quantizers/quantizer_eetq.py +14 -62
  501. transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
  502. transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
  503. transformers/quantizers/quantizer_fp_quant.py +48 -78
  504. transformers/quantizers/quantizer_gptq.py +7 -24
  505. transformers/quantizers/quantizer_higgs.py +40 -54
  506. transformers/quantizers/quantizer_hqq.py +144 -153
  507. transformers/quantizers/quantizer_mxfp4.py +13 -167
  508. transformers/quantizers/quantizer_quanto.py +20 -64
  509. transformers/quantizers/quantizer_quark.py +36 -17
  510. transformers/quantizers/quantizer_spqr.py +1 -4
  511. transformers/quantizers/quantizer_torchao.py +23 -202
  512. transformers/quantizers/quantizer_vptq.py +8 -22
  513. transformers/quantizers/quantizers_utils.py +20 -0
  514. transformers/testing_utils.py +297 -36
  515. transformers/tokenization_mistral_common.py +4 -0
  516. transformers/tokenization_utils_base.py +113 -222
  517. transformers/tokenization_utils_tokenizers.py +168 -107
  518. transformers/trainer.py +28 -31
  519. transformers/trainer_jit_checkpoint.py +126 -0
  520. transformers/trainer_utils.py +1 -1
  521. transformers/training_args.py +66 -28
  522. transformers/utils/__init__.py +3 -4
  523. transformers/utils/auto_docstring.py +1 -0
  524. transformers/utils/generic.py +27 -1
  525. transformers/utils/hub.py +5 -15
  526. transformers/utils/import_utils.py +61 -16
  527. transformers/utils/kernel_config.py +4 -2
  528. transformers/utils/loading_report.py +19 -10
  529. transformers/utils/quantization_config.py +75 -242
  530. transformers/video_processing_utils.py +1 -2
  531. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
  532. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
  533. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
  534. transformers/kernels/__init__.py +0 -0
  535. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  536. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  537. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
  538. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
  539. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -43,6 +43,7 @@ from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
43
43
  from ...processing_utils import Unpack
44
44
  from ...utils import TransformersKwargs, auto_docstring, check_torch_load_is_safe, logging
45
45
  from ...utils.deprecation import deprecate_kwarg
46
+ from ...utils.generic import maybe_autocast
46
47
  from ...utils.hub import cached_file
47
48
  from ..qwen2.modeling_qwen2 import Qwen2RMSNorm
48
49
  from .configuration_qwen2_5_omni import (
@@ -1291,7 +1292,7 @@ class Qwen2_5OmniRotaryEmbedding(nn.Module):
1291
1292
  position_ids_expanded = position_ids[:, :, None, :].float() # shape (3, bs, 1, positions)
1292
1293
 
1293
1294
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
1294
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
1295
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
1295
1296
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(2, 3)
1296
1297
  emb = torch.cat((freqs, freqs), dim=-1)
1297
1298
  cos = emb.cos() * self.attention_scaling
@@ -1958,11 +1959,8 @@ class Qwen2_5OmniThinkerForConditionalGeneration(Qwen2_5OmniPreTrainedModelForCo
1958
1959
  audio_feature_lengths = None
1959
1960
 
1960
1961
  if attention_mask is not None and position_ids is None:
1961
- if (
1962
- cache_position is None
1963
- or (cache_position is not None and cache_position[0] == 0)
1964
- or self.rope_deltas is None
1965
- ):
1962
+ past_key_values_length = 0 if past_key_values is None else past_key_values.get_seq_length()
1963
+ if past_key_values_length == 0 or self.rope_deltas is None:
1966
1964
  delta0 = (1 - attention_mask).sum(dim=-1).unsqueeze(1)
1967
1965
  position_ids, rope_deltas = self.get_rope_index(
1968
1966
  input_ids,
@@ -1977,7 +1975,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration(Qwen2_5OmniPreTrainedModelForCo
1977
1975
  self.rope_deltas = rope_deltas
1978
1976
  else:
1979
1977
  batch_size, seq_length = input_ids.shape
1980
- delta = cache_position[0] + self.rope_deltas if cache_position is not None else 0
1978
+ delta = (past_key_values_length + self.rope_deltas).to(input_ids.device)
1981
1979
  position_ids = torch.arange(seq_length, device=input_ids.device)
1982
1980
  position_ids = position_ids.view(1, -1).expand(batch_size, -1)
1983
1981
  position_ids = position_ids.add(delta)
@@ -2317,6 +2315,7 @@ class Qwen2_5OmniTalkerForConditionalGeneration(Qwen2_5OmniPreTrainedModelForCon
2317
2315
  output_attentions: Optional[bool] = None,
2318
2316
  output_hidden_states: Optional[bool] = None,
2319
2317
  return_dict: Optional[bool] = None,
2318
+ **kwargs,
2320
2319
  ) -> Union[tuple, Qwen2_5OmniTalkerCausalLMOutputWithPast]:
2321
2320
  r"""
2322
2321
  thinker_reply_part (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
@@ -2366,11 +2365,8 @@ class Qwen2_5OmniTalkerForConditionalGeneration(Qwen2_5OmniPreTrainedModelForCon
2366
2365
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
2367
2366
 
2368
2367
  if attention_mask is not None and position_ids is None:
2369
- if (
2370
- cache_position is None
2371
- or (cache_position is not None and cache_position[0] == 0)
2372
- or self.rope_deltas is None
2373
- ):
2368
+ past_key_values_length = 0 if past_key_values is None else past_key_values.get_seq_length()
2369
+ if past_key_values_length == 0 or self.rope_deltas is None:
2374
2370
  position_ids, rope_deltas = self.get_rope_index(
2375
2371
  input_text_ids,
2376
2372
  image_grid_thw,
@@ -2390,8 +2386,8 @@ class Qwen2_5OmniTalkerForConditionalGeneration(Qwen2_5OmniPreTrainedModelForCon
2390
2386
  self.rope_deltas = rope_deltas
2391
2387
 
2392
2388
  else:
2393
- batch_size, seq_length = input_ids.shape
2394
- delta = cache_position[0] + self.rope_deltas if cache_position is not None else 0
2389
+ batch_size, seq_length, _ = inputs_embeds.shape
2390
+ delta = (past_key_values_length + self.rope_deltas).to(input_ids.device)
2395
2391
  position_ids = torch.arange(seq_length, device=input_ids.device)
2396
2392
  position_ids = position_ids.view(1, -1).expand(batch_size, -1)
2397
2393
  position_ids = position_ids.add(delta)
@@ -2564,7 +2560,7 @@ class Qwen2_5OmniDiTRotaryEmbedding(nn.Module):
2564
2560
  position_ids_expanded = position_ids[:, None, :].float()
2565
2561
 
2566
2562
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
2567
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
2563
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
2568
2564
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
2569
2565
  emb = torch.cat((freqs, freqs), dim=-1)
2570
2566
  cos = emb.cos() * self.attention_scaling
@@ -3459,7 +3455,7 @@ class Qwen2_5OmniToken2WavBigVGANModel(Qwen2_5OmniPreTrainedModel):
3459
3455
  decibel_spectrum = self.amplitude_to_db(amplitude_spectrum, -115) - 20
3460
3456
  return self.normalize_spectrogram(decibel_spectrum, 1, -115)
3461
3457
 
3462
- def forward(self, mel_spectrogram):
3458
+ def forward(self, mel_spectrogram, **kwargs):
3463
3459
  processed_spectrogram = self.process_mel_spectrogram(mel_spectrogram)
3464
3460
  hidden_representation = self.conv_pre(processed_spectrogram)
3465
3461
 
@@ -3592,6 +3588,7 @@ class Qwen2_5OmniToken2WavDiTModel(Qwen2_5OmniPreTrainedModel):
3592
3588
  drop_audio_conditioning=False,
3593
3589
  drop_code=False,
3594
3590
  apply_cfg=True,
3591
+ **kwargs,
3595
3592
  ):
3596
3593
  batch_size = hidden_states.shape[0]
3597
3594
  if time_step.ndim == 0:
@@ -399,7 +399,7 @@ class Qwen2_5OmniTextConfig(PreTrainedConfig):
399
399
  self.rope_parameters = rope_parameters
400
400
  super().__init__(
401
401
  tie_word_embeddings=tie_word_embeddings,
402
- ignore_keys_at_rope_validation={"mrope"},
402
+ ignore_keys_at_rope_validation={"mrope_section"},
403
403
  **kwargs,
404
404
  )
405
405
 
@@ -747,7 +747,9 @@ class Qwen2_5OmniTalkerConfig(PreTrainedConfig):
747
747
  layer_type_validation(self.layer_types, self.num_hidden_layers)
748
748
 
749
749
  self.rope_parameters = rope_parameters
750
- super().__init__(tie_word_embeddings=tie_word_embeddings, ignore_keys_at_rope_validation={"mrope"}, **kwargs)
750
+ super().__init__(
751
+ tie_word_embeddings=tie_word_embeddings, ignore_keys_at_rope_validation={"mrope_section"}, **kwargs
752
+ )
751
753
 
752
754
 
753
755
  class Qwen2_5OmniDiTConfig(PreTrainedConfig):
@@ -2306,11 +2308,8 @@ class Qwen2_5OmniThinkerForConditionalGeneration(Qwen2_5OmniPreTrainedModelForCo
2306
2308
  audio_feature_lengths = None
2307
2309
 
2308
2310
  if attention_mask is not None and position_ids is None:
2309
- if (
2310
- cache_position is None
2311
- or (cache_position is not None and cache_position[0] == 0)
2312
- or self.rope_deltas is None
2313
- ):
2311
+ past_key_values_length = 0 if past_key_values is None else past_key_values.get_seq_length()
2312
+ if past_key_values_length == 0 or self.rope_deltas is None:
2314
2313
  delta0 = (1 - attention_mask).sum(dim=-1).unsqueeze(1)
2315
2314
  position_ids, rope_deltas = self.get_rope_index(
2316
2315
  input_ids,
@@ -2325,7 +2324,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration(Qwen2_5OmniPreTrainedModelForCo
2325
2324
  self.rope_deltas = rope_deltas
2326
2325
  else:
2327
2326
  batch_size, seq_length = input_ids.shape
2328
- delta = cache_position[0] + self.rope_deltas if cache_position is not None else 0
2327
+ delta = (past_key_values_length + self.rope_deltas).to(input_ids.device)
2329
2328
  position_ids = torch.arange(seq_length, device=input_ids.device)
2330
2329
  position_ids = position_ids.view(1, -1).expand(batch_size, -1)
2331
2330
  position_ids = position_ids.add(delta)
@@ -2518,6 +2517,7 @@ class Qwen2_5OmniTalkerForConditionalGeneration(Qwen2_5OmniPreTrainedModelForCon
2518
2517
  output_attentions: Optional[bool] = None,
2519
2518
  output_hidden_states: Optional[bool] = None,
2520
2519
  return_dict: Optional[bool] = None,
2520
+ **kwargs,
2521
2521
  ) -> Union[tuple, Qwen2_5OmniTalkerCausalLMOutputWithPast]:
2522
2522
  r"""
2523
2523
  thinker_reply_part (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
@@ -2567,11 +2567,8 @@ class Qwen2_5OmniTalkerForConditionalGeneration(Qwen2_5OmniPreTrainedModelForCon
2567
2567
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
2568
2568
 
2569
2569
  if attention_mask is not None and position_ids is None:
2570
- if (
2571
- cache_position is None
2572
- or (cache_position is not None and cache_position[0] == 0)
2573
- or self.rope_deltas is None
2574
- ):
2570
+ past_key_values_length = 0 if past_key_values is None else past_key_values.get_seq_length()
2571
+ if past_key_values_length == 0 or self.rope_deltas is None:
2575
2572
  position_ids, rope_deltas = self.get_rope_index(
2576
2573
  input_text_ids,
2577
2574
  image_grid_thw,
@@ -2591,8 +2588,8 @@ class Qwen2_5OmniTalkerForConditionalGeneration(Qwen2_5OmniPreTrainedModelForCon
2591
2588
  self.rope_deltas = rope_deltas
2592
2589
 
2593
2590
  else:
2594
- batch_size, seq_length = input_ids.shape
2595
- delta = cache_position[0] + self.rope_deltas if cache_position is not None else 0
2591
+ batch_size, seq_length, _ = inputs_embeds.shape
2592
+ delta = (past_key_values_length + self.rope_deltas).to(input_ids.device)
2596
2593
  position_ids = torch.arange(seq_length, device=input_ids.device)
2597
2594
  position_ids = position_ids.view(1, -1).expand(batch_size, -1)
2598
2595
  position_ids = position_ids.add(delta)
@@ -3617,7 +3614,7 @@ class Qwen2_5OmniToken2WavBigVGANModel(Qwen2_5OmniPreTrainedModel):
3617
3614
  decibel_spectrum = self.amplitude_to_db(amplitude_spectrum, -115) - 20
3618
3615
  return self.normalize_spectrogram(decibel_spectrum, 1, -115)
3619
3616
 
3620
- def forward(self, mel_spectrogram):
3617
+ def forward(self, mel_spectrogram, **kwargs):
3621
3618
  processed_spectrogram = self.process_mel_spectrogram(mel_spectrogram)
3622
3619
  hidden_representation = self.conv_pre(processed_spectrogram)
3623
3620
 
@@ -3750,6 +3747,7 @@ class Qwen2_5OmniToken2WavDiTModel(Qwen2_5OmniPreTrainedModel):
3750
3747
  drop_audio_conditioning=False,
3751
3748
  drop_code=False,
3752
3749
  apply_cfg=True,
3750
+ **kwargs,
3753
3751
  ):
3754
3752
  batch_size = hidden_states.shape[0]
3755
3753
  if time_step.ndim == 0:
@@ -230,7 +230,7 @@ class Qwen2_5_VLTextConfig(PreTrainedConfig):
230
230
  bos_token_id=bos_token_id,
231
231
  eos_token_id=eos_token_id,
232
232
  pad_token_id=pad_token_id,
233
- ignore_keys_at_rope_validation={"mrope"},
233
+ ignore_keys_at_rope_validation={"mrope_section"},
234
234
  **kwargs,
235
235
  )
236
236
 
@@ -43,6 +43,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS
43
43
  from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
44
44
  from ...processing_utils import Unpack
45
45
  from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, logging
46
+ from ...utils.generic import maybe_autocast
46
47
  from ..qwen2.modeling_qwen2 import Qwen2RMSNorm
47
48
  from .configuration_qwen2_5_vl import Qwen2_5_VLConfig, Qwen2_5_VLTextConfig, Qwen2_5_VLVisionConfig
48
49
 
@@ -547,7 +548,7 @@ class Qwen2_5_VLRotaryEmbedding(nn.Module):
547
548
  position_ids_expanded = position_ids[:, :, None, :].float() # shape (3, bs, 1, positions)
548
549
 
549
550
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
550
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
551
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
551
552
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(2, 3)
552
553
  emb = torch.cat((freqs, freqs), dim=-1)
553
554
  cos = emb.cos() * self.attention_scaling
@@ -1290,7 +1291,8 @@ class Qwen2_5_VLModel(Qwen2_5_VLPreTrainedModel):
1290
1291
  inputs_embeds = inputs_embeds.masked_scatter(video_mask, video_embeds)
1291
1292
 
1292
1293
  if position_ids is None:
1293
- if self.rope_deltas is None or cache_position is None or cache_position[0] == 0:
1294
+ past_key_values_length = 0 if past_key_values is None else past_key_values.get_seq_length()
1295
+ if self.rope_deltas is None or past_key_values_length == 0:
1294
1296
  position_ids, rope_deltas = self.get_rope_index(
1295
1297
  input_ids,
1296
1298
  image_grid_thw,
@@ -1303,10 +1305,7 @@ class Qwen2_5_VLModel(Qwen2_5_VLPreTrainedModel):
1303
1305
  batch_size, seq_length, _ = inputs_embeds.shape
1304
1306
  position_ids = torch.arange(seq_length, device=inputs_embeds.device)
1305
1307
  position_ids = position_ids.view(1, 1, -1).expand(3, batch_size, -1)
1306
- if cache_position is not None:
1307
- delta = (cache_position[0] + self.rope_deltas).to(inputs_embeds.device)
1308
- else:
1309
- delta = torch.zeros((batch_size, seq_length), device=inputs_embeds.device)
1308
+ delta = (past_key_values_length + self.rope_deltas).to(inputs_embeds.device)
1310
1309
  delta = delta.repeat_interleave(batch_size // delta.shape[0], dim=1)
1311
1310
  position_ids = position_ids + delta.to(position_ids.device)
1312
1311
 
@@ -595,7 +595,8 @@ class Qwen2_5_VLModel(Qwen2VLModel):
595
595
  inputs_embeds = inputs_embeds.masked_scatter(video_mask, video_embeds)
596
596
 
597
597
  if position_ids is None:
598
- if self.rope_deltas is None or cache_position is None or cache_position[0] == 0:
598
+ past_key_values_length = 0 if past_key_values is None else past_key_values.get_seq_length()
599
+ if self.rope_deltas is None or past_key_values_length == 0:
599
600
  position_ids, rope_deltas = self.get_rope_index(
600
601
  input_ids,
601
602
  image_grid_thw,
@@ -608,10 +609,7 @@ class Qwen2_5_VLModel(Qwen2VLModel):
608
609
  batch_size, seq_length, _ = inputs_embeds.shape
609
610
  position_ids = torch.arange(seq_length, device=inputs_embeds.device)
610
611
  position_ids = position_ids.view(1, 1, -1).expand(3, batch_size, -1)
611
- if cache_position is not None:
612
- delta = (cache_position[0] + self.rope_deltas).to(inputs_embeds.device)
613
- else:
614
- delta = torch.zeros((batch_size, seq_length), device=inputs_embeds.device)
612
+ delta = (past_key_values_length + self.rope_deltas).to(inputs_embeds.device)
615
613
  delta = delta.repeat_interleave(batch_size // delta.shape[0], dim=1)
616
614
  position_ids = position_ids + delta.to(position_ids.device)
617
615
 
@@ -323,6 +323,7 @@ class Qwen2AudioEncoder(Qwen2AudioPreTrainedModel):
323
323
  output_attentions=None,
324
324
  output_hidden_states=None,
325
325
  return_dict=None,
326
+ **kwargs,
326
327
  ):
327
328
  r"""
328
329
  Args:
@@ -685,6 +686,7 @@ class Qwen2AudioForConditionalGeneration(Qwen2AudioPreTrainedModel, GenerationMi
685
686
  output_hidden_states: Optional[bool] = None,
686
687
  return_dict: Optional[bool] = None,
687
688
  cache_position: Optional[torch.LongTensor] = None,
689
+ **kwargs,
688
690
  ) -> Union[tuple, Qwen2AudioCausalLMOutputWithPast]:
689
691
  r"""
690
692
  feature_attention_mask (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`):
@@ -35,7 +35,7 @@ from ... import initialization as init
35
35
  from ...activations import ACT2FN
36
36
  from ...cache_utils import Cache, DynamicCache
37
37
  from ...generation import GenerationMixin
38
- from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub
38
+ from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub, use_kernelized_func
39
39
  from ...masking_utils import create_causal_mask, create_sliding_window_causal_mask
40
40
  from ...modeling_layers import (
41
41
  GenericForQuestionAnswering,
@@ -48,7 +48,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
48
48
  from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
49
49
  from ...processing_utils import Unpack
50
50
  from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
51
- from ...utils.generic import OutputRecorder, check_model_inputs
51
+ from ...utils.generic import OutputRecorder, check_model_inputs, maybe_autocast
52
52
  from .configuration_qwen2_moe import Qwen2MoeConfig
53
53
 
54
54
 
@@ -129,7 +129,7 @@ class Qwen2MoeRotaryEmbedding(nn.Module):
129
129
  position_ids_expanded = position_ids[:, None, :].float()
130
130
 
131
131
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
132
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
132
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
133
133
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
134
134
  emb = torch.cat((freqs, freqs), dim=-1)
135
135
  cos = emb.cos() * self.attention_scaling
@@ -227,6 +227,7 @@ def eager_attention_forward(
227
227
  return attn_output, attn_weights
228
228
 
229
229
 
230
+ @use_kernelized_func(apply_rotary_pos_emb)
230
231
  class Qwen2MoeAttention(nn.Module):
231
232
  """Multi-headed attention from 'Attention Is All You Need' paper"""
232
233
 
@@ -244,7 +245,6 @@ class Qwen2MoeAttention(nn.Module):
244
245
  self.k_proj = nn.Linear(config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.qkv_bias)
245
246
  self.v_proj = nn.Linear(config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.qkv_bias)
246
247
  self.o_proj = nn.Linear(config.num_attention_heads * self.head_dim, config.hidden_size, bias=False)
247
- self.rotary_fn = apply_rotary_pos_emb
248
248
  if self.config.layer_types[layer_idx] == "sliding_attention":
249
249
  self.sliding_window = config.sliding_window
250
250
 
@@ -218,7 +218,7 @@ class Qwen2VLTextConfig(PreTrainedConfig):
218
218
  bos_token_id=bos_token_id,
219
219
  eos_token_id=eos_token_id,
220
220
  pad_token_id=pad_token_id,
221
- ignore_keys_at_rope_validation={"mrope"},
221
+ ignore_keys_at_rope_validation={"mrope_section"},
222
222
  **kwargs,
223
223
  )
224
224
 
@@ -42,9 +42,9 @@ from ...utils import (
42
42
  TransformersKwargs,
43
43
  auto_docstring,
44
44
  can_return_tuple,
45
- is_torchdynamo_compiling,
46
45
  logging,
47
46
  )
47
+ from ...utils.generic import maybe_autocast
48
48
  from ..qwen2.modeling_qwen2 import (
49
49
  Qwen2RMSNorm,
50
50
  )
@@ -165,7 +165,7 @@ class Qwen2VLRotaryEmbedding(nn.Module):
165
165
  position_ids_expanded = position_ids[:, :, None, :].float() # shape (3, bs, 1, positions)
166
166
 
167
167
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
168
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
168
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
169
169
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(2, 3)
170
170
  emb = torch.cat((freqs, freqs), dim=-1)
171
171
  cos = emb.cos() * self.attention_scaling
@@ -1222,7 +1222,8 @@ class Qwen2VLModel(Qwen2VLPreTrainedModel):
1222
1222
  inputs_embeds = inputs_embeds.masked_scatter(video_mask, video_embeds)
1223
1223
 
1224
1224
  if position_ids is None:
1225
- if self.rope_deltas is None or cache_position is None or cache_position[0] == 0:
1225
+ past_key_values_length = 0 if past_key_values is None else past_key_values.get_seq_length()
1226
+ if self.rope_deltas is None or past_key_values_length == 0:
1226
1227
  position_ids, rope_deltas = self.get_rope_index(
1227
1228
  input_ids, image_grid_thw, video_grid_thw, attention_mask
1228
1229
  )
@@ -1232,10 +1233,7 @@ class Qwen2VLModel(Qwen2VLPreTrainedModel):
1232
1233
  batch_size, seq_length, _ = inputs_embeds.shape
1233
1234
  position_ids = torch.arange(seq_length, device=inputs_embeds.device)
1234
1235
  position_ids = position_ids.view(1, 1, -1).expand(3, batch_size, -1)
1235
- if cache_position is not None:
1236
- delta = (cache_position[0] + self.rope_deltas).to(inputs_embeds.device)
1237
- else:
1238
- delta = torch.zeros((batch_size, seq_length), device=inputs_embeds.device)
1236
+ delta = (past_key_values_length + self.rope_deltas).to(inputs_embeds.device)
1239
1237
  delta = delta.repeat_interleave(batch_size // delta.shape[0], dim=0)
1240
1238
  position_ids = position_ids + delta.to(position_ids.device)
1241
1239
 
@@ -1443,15 +1441,7 @@ class Qwen2VLForConditionalGeneration(Qwen2VLPreTrainedModel, GenerationMixin):
1443
1441
  # When compiling, we can't check tensor values thus we check only input length
1444
1442
  # It is safe to assume that `length!=1` means we're in pre-fill because compiled
1445
1443
  # models currently cannot do asssisted decoding
1446
- prefill_compiled_stage = is_torchdynamo_compiling() and (
1447
- (input_ids is not None and input_ids.shape[1] != 1)
1448
- or (inputs_embeds is not None and inputs_embeds.shape[1] != 1)
1449
- )
1450
- prefill_noncompiled_stage = not is_torchdynamo_compiling() and (
1451
- (cache_position is not None and cache_position[0] == 0)
1452
- or (past_key_values is None or past_key_values.get_seq_length() == 0)
1453
- )
1454
- if (prefill_compiled_stage or prefill_noncompiled_stage) or self.model.rope_deltas is None:
1444
+ if model_inputs["cache_position"][0] == 0 or self.model.rope_deltas is None:
1455
1445
  vision_positions, rope_deltas = self.model.get_rope_index(
1456
1446
  model_inputs.get("input_ids", None),
1457
1447
  image_grid_thw=image_grid_thw,
@@ -28,7 +28,7 @@ from torch import nn
28
28
  from ...activations import ACT2FN
29
29
  from ...cache_utils import Cache, DynamicCache
30
30
  from ...generation import GenerationMixin
31
- from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub
31
+ from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub, use_kernelized_func
32
32
  from ...masking_utils import create_causal_mask, create_sliding_window_causal_mask
33
33
  from ...modeling_flash_attention_utils import FlashAttentionKwargs
34
34
  from ...modeling_layers import (
@@ -42,7 +42,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
42
42
  from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
43
43
  from ...processing_utils import Unpack
44
44
  from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
45
- from ...utils.generic import check_model_inputs
45
+ from ...utils.generic import check_model_inputs, maybe_autocast
46
46
  from .configuration_qwen3 import Qwen3Config
47
47
 
48
48
 
@@ -139,7 +139,7 @@ class Qwen3RotaryEmbedding(nn.Module):
139
139
  position_ids_expanded = position_ids[:, None, :].float()
140
140
 
141
141
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
142
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
142
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
143
143
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
144
144
  emb = torch.cat((freqs, freqs), dim=-1)
145
145
  cos = emb.cos() * self.attention_scaling
@@ -221,6 +221,7 @@ def eager_attention_forward(
221
221
  return attn_output, attn_weights
222
222
 
223
223
 
224
+ @use_kernelized_func(apply_rotary_pos_emb)
224
225
  class Qwen3Attention(nn.Module):
225
226
  """Multi-headed attention from 'Attention Is All You Need' paper"""
226
227
 
@@ -247,7 +248,6 @@ class Qwen3Attention(nn.Module):
247
248
  self.o_proj = nn.Linear(
248
249
  config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
249
250
  )
250
- self.rotary_fn = apply_rotary_pos_emb
251
251
  self.q_norm = Qwen3RMSNorm(self.head_dim, eps=config.rms_norm_eps) # unlike olmo, only on the head dim!
252
252
  self.k_norm = Qwen3RMSNorm(self.head_dim, eps=config.rms_norm_eps) # thus post q_norm does not need reshape
253
253
  self.sliding_window = config.sliding_window if self.layer_type == "sliding_attention" else None
@@ -30,7 +30,7 @@ from ... import initialization as init
30
30
  from ...activations import ACT2FN
31
31
  from ...cache_utils import Cache, DynamicCache
32
32
  from ...generation import GenerationMixin
33
- from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub
33
+ from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub, use_kernelized_func
34
34
  from ...masking_utils import create_causal_mask, create_sliding_window_causal_mask
35
35
  from ...modeling_flash_attention_utils import FlashAttentionKwargs
36
36
  from ...modeling_layers import (
@@ -44,7 +44,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
44
44
  from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
45
45
  from ...processing_utils import Unpack
46
46
  from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
47
- from ...utils.generic import OutputRecorder, check_model_inputs
47
+ from ...utils.generic import OutputRecorder, check_model_inputs, maybe_autocast
48
48
  from .configuration_qwen3_moe import Qwen3MoeConfig
49
49
 
50
50
 
@@ -121,6 +121,7 @@ def eager_attention_forward(
121
121
  return attn_output, attn_weights
122
122
 
123
123
 
124
+ @use_kernelized_func(apply_rotary_pos_emb)
124
125
  class Qwen3MoeAttention(nn.Module):
125
126
  """Multi-headed attention from 'Attention Is All You Need' paper"""
126
127
 
@@ -146,7 +147,6 @@ class Qwen3MoeAttention(nn.Module):
146
147
  self.o_proj = nn.Linear(
147
148
  config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
148
149
  )
149
- self.rotary_fn = apply_rotary_pos_emb
150
150
  self.q_norm = Qwen3MoeRMSNorm(self.head_dim, eps=config.rms_norm_eps) # unlike olmo, only on the head dim!
151
151
  self.k_norm = Qwen3MoeRMSNorm(self.head_dim, eps=config.rms_norm_eps) # thus post q_norm does not need reshape
152
152
  self.sliding_window = getattr(config, "sliding_window", None)
@@ -440,7 +440,7 @@ class Qwen3MoeRotaryEmbedding(nn.Module):
440
440
  position_ids_expanded = position_ids[:, None, :].float()
441
441
 
442
442
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
443
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
443
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
444
444
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
445
445
  emb = torch.cat((freqs, freqs), dim=-1)
446
446
  cos = emb.cos() * self.attention_scaling
@@ -30,6 +30,7 @@ from ... import initialization as init
30
30
  from ...activations import ACT2FN
31
31
  from ...cache_utils import Cache
32
32
  from ...generation import GenerationMixin
33
+ from ...integrations import use_kernelized_func
33
34
  from ...masking_utils import create_causal_mask
34
35
  from ...modeling_flash_attention_utils import FlashAttentionKwargs
35
36
  from ...modeling_layers import (
@@ -43,7 +44,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
43
44
  from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
44
45
  from ...processing_utils import Unpack
45
46
  from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, logging
46
- from ...utils.generic import OutputRecorder, check_model_inputs
47
+ from ...utils.generic import OutputRecorder, check_model_inputs, maybe_autocast
47
48
  from ...utils.import_utils import (
48
49
  is_causal_conv1d_available,
49
50
  is_flash_linear_attention_available,
@@ -232,7 +233,7 @@ class Qwen3NextRotaryEmbedding(nn.Module):
232
233
  position_ids_expanded = position_ids[:, None, :].float()
233
234
 
234
235
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
235
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
236
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
236
237
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
237
238
  emb = torch.cat((freqs, freqs), dim=-1)
238
239
  cos = emb.cos() * self.attention_scaling
@@ -347,6 +348,7 @@ def eager_attention_forward(
347
348
  return attn_output, attn_weights
348
349
 
349
350
 
351
+ @use_kernelized_func(apply_rotary_pos_emb)
350
352
  class Qwen3NextAttention(nn.Module):
351
353
  """Multi-headed attention from 'Attention Is All You Need' paper"""
352
354
 
@@ -371,7 +373,6 @@ class Qwen3NextAttention(nn.Module):
371
373
  self.o_proj = nn.Linear(
372
374
  config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
373
375
  )
374
- self.rotary_fn = apply_rotary_pos_emb
375
376
  self.q_norm = Qwen3NextRMSNorm(self.head_dim, eps=config.rms_norm_eps) # unlike olmo, only on the head dim!
376
377
  self.k_norm = Qwen3NextRMSNorm(
377
378
  self.head_dim, eps=config.rms_norm_eps