transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (539) hide show
  1. transformers/__init__.py +30 -3
  2. transformers/cli/serve.py +47 -17
  3. transformers/conversion_mapping.py +15 -2
  4. transformers/convert_slow_tokenizer.py +225 -10
  5. transformers/core_model_loading.py +196 -135
  6. transformers/data/data_collator.py +12 -4
  7. transformers/dependency_versions_table.py +1 -2
  8. transformers/dynamic_module_utils.py +1 -2
  9. transformers/feature_extraction_utils.py +1 -2
  10. transformers/file_utils.py +0 -1
  11. transformers/generation/__init__.py +11 -1
  12. transformers/generation/configuration_utils.py +3 -2
  13. transformers/generation/continuous_batching/__init__.py +4 -0
  14. transformers/generation/continuous_batching/continuous_api.py +134 -79
  15. transformers/image_processing_base.py +1 -2
  16. transformers/integrations/__init__.py +4 -2
  17. transformers/integrations/accelerate.py +15 -3
  18. transformers/integrations/aqlm.py +38 -66
  19. transformers/integrations/awq.py +48 -514
  20. transformers/integrations/bitnet.py +45 -100
  21. transformers/integrations/bitsandbytes.py +79 -191
  22. transformers/integrations/deepspeed.py +1 -0
  23. transformers/integrations/eetq.py +84 -79
  24. transformers/integrations/fbgemm_fp8.py +191 -145
  25. transformers/integrations/finegrained_fp8.py +236 -193
  26. transformers/integrations/fp_quant.py +92 -0
  27. transformers/integrations/ggml.py +11 -1
  28. transformers/integrations/higgs.py +40 -62
  29. transformers/integrations/hub_kernels.py +42 -3
  30. transformers/integrations/integration_utils.py +10 -0
  31. transformers/integrations/mxfp4.py +25 -65
  32. transformers/integrations/peft.py +7 -29
  33. transformers/integrations/quanto.py +73 -55
  34. transformers/integrations/quark.py +55 -0
  35. transformers/integrations/spqr.py +44 -90
  36. transformers/integrations/torchao.py +32 -38
  37. transformers/integrations/vptq.py +42 -59
  38. transformers/modelcard.py +1 -2
  39. transformers/modeling_gguf_pytorch_utils.py +8 -0
  40. transformers/modeling_rope_utils.py +30 -6
  41. transformers/modeling_utils.py +116 -112
  42. transformers/models/__init__.py +3 -0
  43. transformers/models/afmoe/modeling_afmoe.py +4 -4
  44. transformers/models/albert/tokenization_albert.py +6 -12
  45. transformers/models/align/modeling_align.py +2 -0
  46. transformers/models/altclip/modeling_altclip.py +4 -0
  47. transformers/models/apertus/modeling_apertus.py +4 -4
  48. transformers/models/arcee/modeling_arcee.py +4 -4
  49. transformers/models/aria/modeling_aria.py +4 -4
  50. transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
  51. transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
  52. transformers/models/auto/configuration_auto.py +11 -0
  53. transformers/models/auto/feature_extraction_auto.py +2 -0
  54. transformers/models/auto/image_processing_auto.py +1 -0
  55. transformers/models/auto/modeling_auto.py +6 -0
  56. transformers/models/auto/processing_auto.py +18 -10
  57. transformers/models/auto/tokenization_auto.py +74 -472
  58. transformers/models/autoformer/modeling_autoformer.py +4 -0
  59. transformers/models/bamba/modeling_bamba.py +4 -3
  60. transformers/models/bark/modeling_bark.py +2 -0
  61. transformers/models/bart/modeling_bart.py +7 -0
  62. transformers/models/barthez/tokenization_barthez.py +5 -10
  63. transformers/models/beit/modeling_beit.py +6 -1
  64. transformers/models/bert/tokenization_bert.py +8 -21
  65. transformers/models/big_bird/modeling_big_bird.py +6 -0
  66. transformers/models/big_bird/tokenization_big_bird.py +18 -42
  67. transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
  68. transformers/models/biogpt/modeling_biogpt.py +2 -0
  69. transformers/models/biogpt/modular_biogpt.py +2 -0
  70. transformers/models/bit/modeling_bit.py +11 -2
  71. transformers/models/bitnet/modeling_bitnet.py +4 -4
  72. transformers/models/blenderbot/modeling_blenderbot.py +5 -0
  73. transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
  74. transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
  75. transformers/models/blip/modeling_blip_text.py +2 -0
  76. transformers/models/blip_2/modeling_blip_2.py +2 -1
  77. transformers/models/bloom/modeling_bloom.py +4 -0
  78. transformers/models/blt/modeling_blt.py +2 -2
  79. transformers/models/blt/modular_blt.py +2 -2
  80. transformers/models/bridgetower/modeling_bridgetower.py +5 -1
  81. transformers/models/bros/modeling_bros.py +4 -0
  82. transformers/models/camembert/tokenization_camembert.py +8 -12
  83. transformers/models/canine/modeling_canine.py +5 -0
  84. transformers/models/chameleon/modeling_chameleon.py +2 -1
  85. transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
  86. transformers/models/clap/modeling_clap.py +5 -0
  87. transformers/models/clip/tokenization_clip.py +22 -44
  88. transformers/models/clipseg/modeling_clipseg.py +5 -0
  89. transformers/models/clvp/modeling_clvp.py +5 -0
  90. transformers/models/clvp/tokenization_clvp.py +1 -63
  91. transformers/models/code_llama/tokenization_code_llama.py +20 -43
  92. transformers/models/codegen/tokenization_codegen.py +14 -43
  93. transformers/models/cohere/modeling_cohere.py +4 -3
  94. transformers/models/cohere/modular_cohere.py +2 -1
  95. transformers/models/cohere/tokenization_cohere.py +12 -42
  96. transformers/models/cohere2/modeling_cohere2.py +7 -6
  97. transformers/models/cohere2/modular_cohere2.py +5 -5
  98. transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
  99. transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
  100. transformers/models/colqwen2/modeling_colqwen2.py +1 -0
  101. transformers/models/colqwen2/modular_colqwen2.py +1 -0
  102. transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
  103. transformers/models/convbert/modeling_convbert.py +6 -0
  104. transformers/models/convnext/modeling_convnext.py +2 -4
  105. transformers/models/convnextv2/modeling_convnextv2.py +2 -4
  106. transformers/models/csm/modeling_csm.py +4 -3
  107. transformers/models/ctrl/modeling_ctrl.py +1 -0
  108. transformers/models/cvt/modeling_cvt.py +2 -0
  109. transformers/models/cwm/modeling_cwm.py +4 -4
  110. transformers/models/d_fine/modeling_d_fine.py +2 -0
  111. transformers/models/d_fine/modular_d_fine.py +1 -0
  112. transformers/models/dab_detr/modeling_dab_detr.py +4 -0
  113. transformers/models/dac/modeling_dac.py +2 -2
  114. transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
  115. transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
  116. transformers/models/dbrx/modeling_dbrx.py +2 -2
  117. transformers/models/deberta/modeling_deberta.py +5 -0
  118. transformers/models/deberta/tokenization_deberta.py +11 -20
  119. transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
  120. transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
  121. transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
  122. transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
  123. transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
  124. transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
  125. transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
  126. transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
  127. transformers/models/depth_anything/modeling_depth_anything.py +1 -0
  128. transformers/models/depth_pro/modeling_depth_pro.py +2 -0
  129. transformers/models/detr/modeling_detr.py +5 -0
  130. transformers/models/dia/modeling_dia.py +4 -3
  131. transformers/models/dia/modular_dia.py +0 -1
  132. transformers/models/diffllama/modeling_diffllama.py +2 -2
  133. transformers/models/dinat/modeling_dinat.py +3 -0
  134. transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
  135. transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
  136. transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
  137. transformers/models/distilbert/tokenization_distilbert.py +13 -0
  138. transformers/models/doge/modeling_doge.py +2 -3
  139. transformers/models/doge/modular_doge.py +0 -1
  140. transformers/models/donut/modeling_donut_swin.py +2 -0
  141. transformers/models/dots1/modeling_dots1.py +10 -7
  142. transformers/models/dots1/modular_dots1.py +5 -3
  143. transformers/models/dpr/modeling_dpr.py +5 -0
  144. transformers/models/dpr/tokenization_dpr.py +12 -0
  145. transformers/models/edgetam/modeling_edgetam.py +1 -1
  146. transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
  147. transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
  148. transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
  149. transformers/models/efficientnet/modeling_efficientnet.py +2 -0
  150. transformers/models/emu3/modeling_emu3.py +4 -4
  151. transformers/models/eomt/image_processing_eomt.py +13 -1
  152. transformers/models/eomt/image_processing_eomt_fast.py +14 -2
  153. transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
  154. transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
  155. transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
  156. transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
  157. transformers/models/esm/modeling_esmfold.py +5 -4
  158. transformers/models/evolla/modeling_evolla.py +4 -4
  159. transformers/models/exaone4/modeling_exaone4.py +2 -2
  160. transformers/models/exaone4/modular_exaone4.py +0 -1
  161. transformers/models/falcon/modeling_falcon.py +6 -1
  162. transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
  163. transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
  164. transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
  165. transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
  166. transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
  167. transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
  168. transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
  169. transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
  170. transformers/models/flaubert/modeling_flaubert.py +7 -0
  171. transformers/models/flava/modeling_flava.py +6 -1
  172. transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
  173. transformers/models/florence2/modeling_florence2.py +2 -1
  174. transformers/models/florence2/modular_florence2.py +2 -1
  175. transformers/models/fnet/modeling_fnet.py +7 -0
  176. transformers/models/focalnet/modeling_focalnet.py +4 -0
  177. transformers/models/fsmt/modeling_fsmt.py +2 -0
  178. transformers/models/funnel/modeling_funnel.py +8 -0
  179. transformers/models/funnel/tokenization_funnel.py +17 -24
  180. transformers/models/fuyu/processing_fuyu.py +3 -3
  181. transformers/models/gemma/modeling_gemma.py +4 -4
  182. transformers/models/gemma/tokenization_gemma.py +10 -27
  183. transformers/models/gemma2/modeling_gemma2.py +4 -4
  184. transformers/models/gemma2/modular_gemma2.py +2 -1
  185. transformers/models/gemma3/modeling_gemma3.py +14 -84
  186. transformers/models/gemma3/modular_gemma3.py +12 -81
  187. transformers/models/gemma3n/modeling_gemma3n.py +18 -209
  188. transformers/models/gemma3n/modular_gemma3n.py +17 -59
  189. transformers/models/git/modeling_git.py +2 -0
  190. transformers/models/glm/modeling_glm.py +4 -4
  191. transformers/models/glm4/modeling_glm4.py +4 -4
  192. transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
  193. transformers/models/glm4v/configuration_glm4v.py +3 -1
  194. transformers/models/glm4v/modeling_glm4v.py +3 -3
  195. transformers/models/glm4v/modular_glm4v.py +6 -4
  196. transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
  197. transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
  198. transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
  199. transformers/models/glpn/modeling_glpn.py +2 -0
  200. transformers/models/gpt2/modeling_gpt2.py +5 -1
  201. transformers/models/gpt2/tokenization_gpt2.py +16 -44
  202. transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
  203. transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
  204. transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
  205. transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
  206. transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
  207. transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
  208. transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
  209. transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
  210. transformers/models/gptj/modeling_gptj.py +3 -0
  211. transformers/models/granite/modeling_granite.py +4 -4
  212. transformers/models/granitemoe/modeling_granitemoe.py +4 -6
  213. transformers/models/granitemoe/modular_granitemoe.py +0 -2
  214. transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
  215. transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
  216. transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
  217. transformers/models/groupvit/modeling_groupvit.py +3 -0
  218. transformers/models/helium/modeling_helium.py +4 -3
  219. transformers/models/herbert/tokenization_herbert.py +9 -25
  220. transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
  221. transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
  222. transformers/models/hiera/modeling_hiera.py +4 -0
  223. transformers/models/hubert/modeling_hubert.py +3 -0
  224. transformers/models/hubert/modular_hubert.py +1 -0
  225. transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
  226. transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
  227. transformers/models/ibert/modeling_ibert.py +6 -0
  228. transformers/models/idefics/modeling_idefics.py +5 -21
  229. transformers/models/imagegpt/modeling_imagegpt.py +2 -1
  230. transformers/models/informer/modeling_informer.py +4 -0
  231. transformers/models/informer/modular_informer.py +1 -0
  232. transformers/models/internvl/modeling_internvl.py +2 -4
  233. transformers/models/internvl/modular_internvl.py +2 -4
  234. transformers/models/jamba/modeling_jamba.py +2 -2
  235. transformers/models/janus/modeling_janus.py +1 -0
  236. transformers/models/janus/modular_janus.py +1 -0
  237. transformers/models/jetmoe/modeling_jetmoe.py +2 -2
  238. transformers/models/kosmos2/modeling_kosmos2.py +1 -0
  239. transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
  240. transformers/models/lasr/__init__.py +29 -0
  241. transformers/models/lasr/configuration_lasr.py +244 -0
  242. transformers/models/lasr/feature_extraction_lasr.py +277 -0
  243. transformers/models/lasr/modeling_lasr.py +729 -0
  244. transformers/models/lasr/modular_lasr.py +569 -0
  245. transformers/models/lasr/processing_lasr.py +96 -0
  246. transformers/models/lasr/tokenization_lasr.py +186 -0
  247. transformers/models/layoutlm/modeling_layoutlm.py +5 -0
  248. transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
  249. transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
  250. transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
  251. transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
  252. transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
  253. transformers/models/led/modeling_led.py +6 -0
  254. transformers/models/levit/modeling_levit.py +3 -0
  255. transformers/models/lfm2/modeling_lfm2.py +4 -5
  256. transformers/models/lfm2/modular_lfm2.py +0 -1
  257. transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
  258. transformers/models/lightglue/modeling_lightglue.py +3 -1
  259. transformers/models/lightglue/modular_lightglue.py +1 -0
  260. transformers/models/lilt/modeling_lilt.py +4 -0
  261. transformers/models/llama/modeling_llama.py +4 -4
  262. transformers/models/llama/tokenization_llama.py +15 -43
  263. transformers/models/llama4/modeling_llama4.py +3 -2
  264. transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
  265. transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
  266. transformers/models/longformer/modeling_longformer.py +6 -0
  267. transformers/models/longt5/modeling_longt5.py +4 -0
  268. transformers/models/luke/modeling_luke.py +9 -0
  269. transformers/models/luke/tokenization_luke.py +11 -38
  270. transformers/models/lxmert/modeling_lxmert.py +2 -0
  271. transformers/models/m2m_100/modeling_m2m_100.py +4 -0
  272. transformers/models/mamba/modeling_mamba.py +14 -22
  273. transformers/models/marian/modeling_marian.py +5 -0
  274. transformers/models/markuplm/modeling_markuplm.py +4 -0
  275. transformers/models/markuplm/tokenization_markuplm.py +28 -61
  276. transformers/models/mask2former/modeling_mask2former.py +2 -0
  277. transformers/models/maskformer/modeling_maskformer.py +2 -0
  278. transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
  279. transformers/models/mbart/modeling_mbart.py +7 -0
  280. transformers/models/mbart/tokenization_mbart.py +11 -52
  281. transformers/models/mbart50/tokenization_mbart50.py +7 -10
  282. transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
  283. transformers/models/mgp_str/modeling_mgp_str.py +2 -0
  284. transformers/models/mimi/modeling_mimi.py +3 -1
  285. transformers/models/minimax/modeling_minimax.py +4 -4
  286. transformers/models/ministral/modeling_ministral.py +4 -4
  287. transformers/models/ministral3/configuration_ministral3.py +1 -1
  288. transformers/models/ministral3/modeling_ministral3.py +4 -3
  289. transformers/models/mistral/modeling_mistral.py +4 -3
  290. transformers/models/mixtral/modeling_mixtral.py +4 -4
  291. transformers/models/mllama/modeling_mllama.py +2 -2
  292. transformers/models/mluke/tokenization_mluke.py +6 -6
  293. transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
  294. transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
  295. transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
  296. transformers/models/mobilevit/modeling_mobilevit.py +3 -0
  297. transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
  298. transformers/models/modernbert/modeling_modernbert.py +4 -1
  299. transformers/models/modernbert/modular_modernbert.py +2 -0
  300. transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
  301. transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
  302. transformers/models/moonshine/modeling_moonshine.py +4 -2
  303. transformers/models/moshi/modeling_moshi.py +5 -2
  304. transformers/models/mpnet/modeling_mpnet.py +5 -0
  305. transformers/models/mpnet/tokenization_mpnet.py +5 -13
  306. transformers/models/mpt/modeling_mpt.py +2 -0
  307. transformers/models/mra/modeling_mra.py +6 -0
  308. transformers/models/mt5/modeling_mt5.py +7 -0
  309. transformers/models/musicgen/modeling_musicgen.py +2 -0
  310. transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
  311. transformers/models/mvp/modeling_mvp.py +7 -0
  312. transformers/models/nanochat/modeling_nanochat.py +4 -4
  313. transformers/models/nemotron/modeling_nemotron.py +4 -2
  314. transformers/models/nllb/tokenization_nllb.py +8 -22
  315. transformers/models/nougat/tokenization_nougat.py +11 -59
  316. transformers/models/nystromformer/modeling_nystromformer.py +6 -0
  317. transformers/models/olmo/modeling_olmo.py +4 -4
  318. transformers/models/olmo/modular_olmo.py +2 -2
  319. transformers/models/olmo2/modeling_olmo2.py +4 -5
  320. transformers/models/olmo2/modular_olmo2.py +0 -1
  321. transformers/models/olmo3/modeling_olmo3.py +4 -4
  322. transformers/models/olmoe/modeling_olmoe.py +4 -4
  323. transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
  324. transformers/models/oneformer/modeling_oneformer.py +4 -1
  325. transformers/models/openai/modeling_openai.py +3 -0
  326. transformers/models/openai/tokenization_openai.py +10 -46
  327. transformers/models/opt/modeling_opt.py +2 -0
  328. transformers/models/owlv2/modeling_owlv2.py +4 -0
  329. transformers/models/owlvit/modeling_owlvit.py +4 -0
  330. transformers/models/paddleocr_vl/__init__.py +32 -0
  331. transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
  332. transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
  333. transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
  334. transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
  335. transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
  336. transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
  337. transformers/models/parakeet/configuration_parakeet.py +4 -6
  338. transformers/models/parakeet/modeling_parakeet.py +9 -6
  339. transformers/models/parakeet/modular_parakeet.py +2 -2
  340. transformers/models/parakeet/processing_parakeet.py +1 -0
  341. transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
  342. transformers/models/patchtst/modeling_patchtst.py +20 -2
  343. transformers/models/pegasus/modeling_pegasus.py +5 -0
  344. transformers/models/pegasus/tokenization_pegasus.py +17 -44
  345. transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
  346. transformers/models/perceiver/modeling_perceiver.py +8 -0
  347. transformers/models/persimmon/modeling_persimmon.py +2 -1
  348. transformers/models/phi/modeling_phi.py +4 -5
  349. transformers/models/phi/modular_phi.py +0 -1
  350. transformers/models/phi3/modeling_phi3.py +2 -1
  351. transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
  352. transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
  353. transformers/models/phimoe/modeling_phimoe.py +4 -4
  354. transformers/models/phimoe/modular_phimoe.py +2 -2
  355. transformers/models/pix2struct/modeling_pix2struct.py +2 -0
  356. transformers/models/pixtral/modeling_pixtral.py +2 -1
  357. transformers/models/plbart/modeling_plbart.py +6 -0
  358. transformers/models/plbart/modular_plbart.py +2 -0
  359. transformers/models/plbart/tokenization_plbart.py +0 -2
  360. transformers/models/poolformer/modeling_poolformer.py +2 -0
  361. transformers/models/pop2piano/modeling_pop2piano.py +2 -0
  362. transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
  363. transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
  364. transformers/models/prophetnet/modeling_prophetnet.py +3 -0
  365. transformers/models/pvt/modeling_pvt.py +2 -0
  366. transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
  367. transformers/models/qwen2/modeling_qwen2.py +4 -4
  368. transformers/models/qwen2/tokenization_qwen2.py +14 -18
  369. transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
  370. transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
  371. transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
  372. transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
  373. transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
  374. transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
  375. transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
  376. transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
  377. transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
  378. transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
  379. transformers/models/qwen3/modeling_qwen3.py +4 -4
  380. transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
  381. transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
  382. transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
  383. transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
  384. transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
  385. transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
  386. transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
  387. transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
  388. transformers/models/rag/modeling_rag.py +1 -0
  389. transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
  390. transformers/models/reformer/modeling_reformer.py +4 -0
  391. transformers/models/reformer/tokenization_reformer.py +11 -28
  392. transformers/models/regnet/modeling_regnet.py +6 -1
  393. transformers/models/rembert/modeling_rembert.py +6 -0
  394. transformers/models/rembert/tokenization_rembert.py +3 -10
  395. transformers/models/resnet/modeling_resnet.py +11 -2
  396. transformers/models/roberta/tokenization_roberta.py +18 -27
  397. transformers/models/roformer/modeling_roformer.py +6 -0
  398. transformers/models/roformer/tokenization_roformer.py +77 -412
  399. transformers/models/rt_detr/modeling_rt_detr.py +2 -0
  400. transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
  401. transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
  402. transformers/models/rwkv/modeling_rwkv.py +1 -0
  403. transformers/models/sam2/modeling_sam2.py +2 -2
  404. transformers/models/sam2/modular_sam2.py +2 -2
  405. transformers/models/sam2_video/modeling_sam2_video.py +1 -0
  406. transformers/models/sam2_video/modular_sam2_video.py +1 -0
  407. transformers/models/sam3/modeling_sam3.py +77 -80
  408. transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
  409. transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
  410. transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
  411. transformers/models/sam3_video/modeling_sam3_video.py +1 -0
  412. transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
  413. transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
  414. transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
  415. transformers/models/seed_oss/modeling_seed_oss.py +2 -2
  416. transformers/models/segformer/modeling_segformer.py +4 -1
  417. transformers/models/seggpt/modeling_seggpt.py +2 -0
  418. transformers/models/sew/modeling_sew.py +3 -0
  419. transformers/models/sew/modular_sew.py +1 -0
  420. transformers/models/sew_d/modeling_sew_d.py +3 -0
  421. transformers/models/siglip2/modeling_siglip2.py +4 -0
  422. transformers/models/siglip2/modular_siglip2.py +4 -0
  423. transformers/models/smollm3/modeling_smollm3.py +4 -4
  424. transformers/models/smolvlm/processing_smolvlm.py +0 -7
  425. transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
  426. transformers/models/speecht5/modeling_speecht5.py +13 -1
  427. transformers/models/splinter/modeling_splinter.py +3 -0
  428. transformers/models/splinter/tokenization_splinter.py +9 -28
  429. transformers/models/squeezebert/modeling_squeezebert.py +6 -0
  430. transformers/models/stablelm/modeling_stablelm.py +3 -1
  431. transformers/models/starcoder2/modeling_starcoder2.py +4 -3
  432. transformers/models/superglue/modeling_superglue.py +1 -0
  433. transformers/models/superpoint/modeling_superpoint.py +1 -0
  434. transformers/models/swiftformer/modeling_swiftformer.py +2 -0
  435. transformers/models/swin/modeling_swin.py +4 -0
  436. transformers/models/swin2sr/modeling_swin2sr.py +2 -0
  437. transformers/models/swinv2/modeling_swinv2.py +4 -0
  438. transformers/models/t5/modeling_t5.py +7 -0
  439. transformers/models/t5/tokenization_t5.py +4 -8
  440. transformers/models/t5gemma/modeling_t5gemma.py +5 -5
  441. transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
  442. transformers/models/table_transformer/modeling_table_transformer.py +4 -0
  443. transformers/models/tapas/modeling_tapas.py +3 -0
  444. transformers/models/textnet/modeling_textnet.py +11 -2
  445. transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
  446. transformers/models/timesfm/modeling_timesfm.py +2 -0
  447. transformers/models/timesfm/modular_timesfm.py +2 -0
  448. transformers/models/timesformer/modeling_timesformer.py +2 -0
  449. transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
  450. transformers/models/trocr/modeling_trocr.py +2 -0
  451. transformers/models/tvp/modeling_tvp.py +2 -0
  452. transformers/models/udop/modeling_udop.py +4 -0
  453. transformers/models/udop/tokenization_udop.py +5 -13
  454. transformers/models/umt5/modeling_umt5.py +7 -0
  455. transformers/models/unispeech/modeling_unispeech.py +4 -0
  456. transformers/models/unispeech/modular_unispeech.py +2 -0
  457. transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
  458. transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
  459. transformers/models/univnet/modeling_univnet.py +1 -0
  460. transformers/models/upernet/modeling_upernet.py +1 -0
  461. transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
  462. transformers/models/vilt/modeling_vilt.py +6 -0
  463. transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
  464. transformers/models/visual_bert/modeling_visual_bert.py +6 -0
  465. transformers/models/vitdet/modeling_vitdet.py +2 -0
  466. transformers/models/vitmatte/modeling_vitmatte.py +1 -0
  467. transformers/models/vits/modeling_vits.py +1 -0
  468. transformers/models/vjepa2/modeling_vjepa2.py +1 -0
  469. transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
  470. transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
  471. transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
  472. transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
  473. transformers/models/wavlm/modeling_wavlm.py +5 -0
  474. transformers/models/whisper/modeling_whisper.py +6 -0
  475. transformers/models/whisper/tokenization_whisper.py +4 -15
  476. transformers/models/x_clip/modeling_x_clip.py +3 -0
  477. transformers/models/xglm/modeling_xglm.py +1 -0
  478. transformers/models/xglm/tokenization_xglm.py +4 -9
  479. transformers/models/xlm/modeling_xlm.py +5 -0
  480. transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
  481. transformers/models/xlnet/tokenization_xlnet.py +3 -7
  482. transformers/models/yoso/modeling_yoso.py +6 -0
  483. transformers/models/zamba/modeling_zamba.py +2 -0
  484. transformers/models/zamba2/modeling_zamba2.py +4 -2
  485. transformers/models/zamba2/modular_zamba2.py +1 -1
  486. transformers/models/zoedepth/modeling_zoedepth.py +1 -0
  487. transformers/pipelines/__init__.py +2 -3
  488. transformers/pipelines/base.py +1 -9
  489. transformers/pipelines/document_question_answering.py +3 -1
  490. transformers/pipelines/text_generation.py +1 -1
  491. transformers/processing_utils.py +23 -11
  492. transformers/quantizers/base.py +35 -110
  493. transformers/quantizers/quantizer_aqlm.py +1 -5
  494. transformers/quantizers/quantizer_auto_round.py +1 -2
  495. transformers/quantizers/quantizer_awq.py +17 -81
  496. transformers/quantizers/quantizer_bitnet.py +3 -8
  497. transformers/quantizers/quantizer_bnb_4bit.py +13 -110
  498. transformers/quantizers/quantizer_bnb_8bit.py +16 -92
  499. transformers/quantizers/quantizer_compressed_tensors.py +1 -5
  500. transformers/quantizers/quantizer_eetq.py +14 -62
  501. transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
  502. transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
  503. transformers/quantizers/quantizer_fp_quant.py +48 -78
  504. transformers/quantizers/quantizer_gptq.py +7 -24
  505. transformers/quantizers/quantizer_higgs.py +40 -54
  506. transformers/quantizers/quantizer_hqq.py +144 -153
  507. transformers/quantizers/quantizer_mxfp4.py +13 -167
  508. transformers/quantizers/quantizer_quanto.py +20 -64
  509. transformers/quantizers/quantizer_quark.py +36 -17
  510. transformers/quantizers/quantizer_spqr.py +1 -4
  511. transformers/quantizers/quantizer_torchao.py +23 -202
  512. transformers/quantizers/quantizer_vptq.py +8 -22
  513. transformers/quantizers/quantizers_utils.py +20 -0
  514. transformers/testing_utils.py +297 -36
  515. transformers/tokenization_mistral_common.py +4 -0
  516. transformers/tokenization_utils_base.py +113 -222
  517. transformers/tokenization_utils_tokenizers.py +168 -107
  518. transformers/trainer.py +28 -31
  519. transformers/trainer_jit_checkpoint.py +126 -0
  520. transformers/trainer_utils.py +1 -1
  521. transformers/training_args.py +66 -28
  522. transformers/utils/__init__.py +3 -4
  523. transformers/utils/auto_docstring.py +1 -0
  524. transformers/utils/generic.py +27 -1
  525. transformers/utils/hub.py +5 -15
  526. transformers/utils/import_utils.py +61 -16
  527. transformers/utils/kernel_config.py +4 -2
  528. transformers/utils/loading_report.py +19 -10
  529. transformers/utils/quantization_config.py +75 -242
  530. transformers/video_processing_utils.py +1 -2
  531. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
  532. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
  533. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
  534. transformers/kernels/__init__.py +0 -0
  535. transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
  536. transformers/models/roformer/tokenization_roformer_fast.py +0 -160
  537. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
  538. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
  539. {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -31,7 +31,7 @@ from ... import initialization as init
31
31
  from ...activations import ACT2FN
32
32
  from ...cache_utils import Cache, DynamicCache
33
33
  from ...generation import GenerationMixin
34
- from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub
34
+ from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub, use_kernelized_func
35
35
  from ...masking_utils import create_bidirectional_mask, create_causal_mask
36
36
  from ...modeling_layers import GradientCheckpointingLayer
37
37
  from ...modeling_outputs import (
@@ -45,7 +45,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
45
45
  from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
46
46
  from ...processing_utils import Unpack
47
47
  from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
48
- from ...utils.generic import OutputRecorder, check_model_inputs
48
+ from ...utils.generic import OutputRecorder, check_model_inputs, maybe_autocast
49
49
  from .configuration_evolla import EvollaConfig, SaProtConfig
50
50
 
51
51
 
@@ -1019,7 +1019,7 @@ class EvollaRotaryEmbedding(nn.Module):
1019
1019
  position_ids_expanded = position_ids[:, None, :].float()
1020
1020
 
1021
1021
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
1022
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
1022
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
1023
1023
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
1024
1024
  emb = torch.cat((freqs, freqs), dim=-1)
1025
1025
  cos = emb.cos() * self.attention_scaling
@@ -1091,6 +1091,7 @@ def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
1091
1091
  return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
1092
1092
 
1093
1093
 
1094
+ @use_kernelized_func(apply_rotary_pos_emb)
1094
1095
  class EvollaAttention(nn.Module):
1095
1096
  """Multi-headed attention from 'Attention Is All You Need' paper"""
1096
1097
 
@@ -1116,7 +1117,6 @@ class EvollaAttention(nn.Module):
1116
1117
  self.o_proj = nn.Linear(
1117
1118
  config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
1118
1119
  )
1119
- self.rotary_fn = apply_rotary_pos_emb
1120
1120
 
1121
1121
  def forward(
1122
1122
  self,
@@ -44,6 +44,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
44
44
  from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
45
45
  from ...processing_utils import Unpack
46
46
  from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
47
+ from ...utils.generic import maybe_autocast
47
48
  from .configuration_exaone4 import Exaone4Config
48
49
 
49
50
 
@@ -124,7 +125,7 @@ class Exaone4RotaryEmbedding(nn.Module):
124
125
  position_ids_expanded = position_ids[:, None, :].float()
125
126
 
126
127
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
127
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
128
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
128
129
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
129
130
  emb = torch.cat((freqs, freqs), dim=-1)
130
131
  cos = emb.cos() * self.attention_scaling
@@ -239,7 +240,6 @@ class Exaone4Attention(nn.Module):
239
240
  attention_mask: Optional[torch.Tensor] = None,
240
241
  past_key_values: Optional[Cache] = None,
241
242
  cache_position: Optional[torch.LongTensor] = None,
242
- position_ids: Optional[torch.LongTensor] = None,
243
243
  **kwargs: Unpack[TransformersKwargs],
244
244
  ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
245
245
  input_shape = hidden_states.shape[:-1]
@@ -260,7 +260,6 @@ class Exaone4Attention(nn.Module):
260
260
  attention_mask: Optional[torch.Tensor] = None,
261
261
  past_key_values: Optional[Cache] = None,
262
262
  cache_position: Optional[torch.LongTensor] = None,
263
- position_ids: Optional[torch.LongTensor] = None,
264
263
  **kwargs: Unpack[TransformersKwargs],
265
264
  ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
266
265
  input_shape = hidden_states.shape[:-1]
@@ -48,6 +48,7 @@ from ...utils import (
48
48
  auto_docstring,
49
49
  logging,
50
50
  )
51
+ from ...utils.generic import maybe_autocast
51
52
  from .configuration_falcon import FalconConfig
52
53
 
53
54
 
@@ -160,7 +161,7 @@ class FalconRotaryEmbedding(nn.Module):
160
161
  position_ids_expanded = position_ids[:, None, :].float()
161
162
 
162
163
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
163
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
164
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
164
165
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
165
166
  emb = torch.cat((freqs, freqs), dim=-1)
166
167
  cos = emb.cos() * self.attention_scaling
@@ -739,6 +740,7 @@ class FalconModel(FalconPreTrainedModel):
739
740
  output_hidden_states: Optional[bool] = None,
740
741
  return_dict: Optional[bool] = None,
741
742
  cache_position: Optional[torch.LongTensor] = None,
743
+ **kwargs,
742
744
  ) -> Union[tuple[torch.Tensor, ...], BaseModelOutputWithPastAndCrossAttentions]:
743
745
  r"""
744
746
  input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
@@ -1119,6 +1121,7 @@ class FalconForSequenceClassification(FalconPreTrainedModel):
1119
1121
  output_attentions: Optional[bool] = None,
1120
1122
  output_hidden_states: Optional[bool] = None,
1121
1123
  return_dict: Optional[bool] = None,
1124
+ **kwargs,
1122
1125
  ) -> Union[tuple[torch.Tensor], SequenceClassifierOutputWithPast]:
1123
1126
  r"""
1124
1127
  input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
@@ -1243,6 +1246,7 @@ class FalconForTokenClassification(FalconPreTrainedModel):
1243
1246
  output_attentions: Optional[bool] = None,
1244
1247
  output_hidden_states: Optional[bool] = None,
1245
1248
  return_dict: Optional[bool] = None,
1249
+ **kwargs,
1246
1250
  ) -> Union[tuple[torch.Tensor], TokenClassifierOutput]:
1247
1251
  r"""
1248
1252
  input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
@@ -1320,6 +1324,7 @@ class FalconForQuestionAnswering(FalconPreTrainedModel):
1320
1324
  output_attentions: Optional[bool] = None,
1321
1325
  output_hidden_states: Optional[bool] = None,
1322
1326
  return_dict: Optional[bool] = None,
1327
+ **kwargs,
1323
1328
  ) -> Union[tuple, QuestionAnsweringModelOutput]:
1324
1329
  r"""
1325
1330
  input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
@@ -36,7 +36,7 @@ from transformers.activations import ACT2FN
36
36
  from ... import initialization as init
37
37
  from ...cache_utils import Cache
38
38
  from ...generation import GenerationMixin
39
- from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub
39
+ from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub, use_kernelized_func
40
40
  from ...modeling_attn_mask_utils import AttentionMaskConverter
41
41
  from ...modeling_flash_attention_utils import FlashAttentionKwargs
42
42
  from ...modeling_layers import GradientCheckpointingLayer
@@ -45,6 +45,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
45
45
  from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
46
46
  from ...processing_utils import Unpack
47
47
  from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torchdynamo_compiling, logging
48
+ from ...utils.generic import maybe_autocast
48
49
  from ...utils.import_utils import is_causal_conv1d_available, is_mamba_2_ssm_available
49
50
  from .configuration_falcon_h1 import FalconH1Config
50
51
 
@@ -279,7 +280,7 @@ class FalconH1RotaryEmbedding(nn.Module):
279
280
  position_ids_expanded = position_ids[:, None, :].float()
280
281
 
281
282
  device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
282
- with torch.autocast(device_type=device_type, enabled=False): # Force float32
283
+ with maybe_autocast(device_type=device_type, enabled=False): # Force float32
283
284
  freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
284
285
  emb = torch.cat((freqs, freqs), dim=-1)
285
286
  cos = emb.cos() * self.attention_scaling
@@ -361,6 +362,7 @@ def eager_attention_forward(
361
362
  return attn_output, attn_weights
362
363
 
363
364
 
365
+ @use_kernelized_func(apply_rotary_pos_emb)
364
366
  class FalconH1Attention(nn.Module):
365
367
  """Multi-headed attention from 'Attention Is All You Need' paper"""
366
368
 
@@ -386,7 +388,6 @@ class FalconH1Attention(nn.Module):
386
388
  self.o_proj = nn.Linear(
387
389
  config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
388
390
  )
389
- self.rotary_fn = apply_rotary_pos_emb
390
391
  self.key_multiplier = config.key_multiplier
391
392
 
392
393
  def forward(
@@ -35,11 +35,7 @@ from ...integrations.hub_kernels import lazy_load_kernel
35
35
  from ...modeling_layers import GradientCheckpointingLayer
36
36
  from ...modeling_utils import PreTrainedModel
37
37
  from ...utils import ModelOutput, auto_docstring, logging
38
- from ...utils.import_utils import (
39
- is_mamba_ssm_available,
40
- is_mambapy_available,
41
- is_torchdynamo_compiling,
42
- )
38
+ from ...utils.import_utils import is_mambapy_available, is_torchdynamo_compiling
43
39
  from .configuration_falcon_mamba import FalconMambaConfig
44
40
 
45
41
 
@@ -48,14 +44,6 @@ if is_mambapy_available():
48
44
  else:
49
45
  pscan = None
50
46
 
51
- if is_mamba_ssm_available():
52
- from mamba_ssm.ops.selective_scan_interface import selective_scan_fn
53
- from mamba_ssm.ops.triton.selective_state_update import selective_state_update
54
-
55
- from ...kernels.falcon_mamba import mamba_inner_fn
56
- else:
57
- selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None
58
-
59
47
 
60
48
  logger = logging.get_logger(__name__)
61
49
 
@@ -231,7 +219,27 @@ class FalconMambaMixer(nn.Module):
231
219
  self.out_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=config.use_bias)
232
220
  self.use_bias = config.use_bias
233
221
 
222
+ global causal_conv1d, causal_conv1d_update, causal_conv1d_fn
223
+ causal_conv1d = lazy_load_kernel("causal-conv1d")
224
+ causal_conv1d_update, causal_conv1d_fn = (
225
+ (causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
226
+ if causal_conv1d is not None
227
+ else (None, None)
228
+ )
229
+ global falcon_mamba_ssm, selective_state_update, selective_scan_fn, falcon_mamba_inner_fn
230
+ falcon_mamba_ssm = lazy_load_kernel("falcon_mamba-ssm")
231
+ selective_state_update, selective_scan_fn, falcon_mamba_inner_fn = (
232
+ (
233
+ falcon_mamba_ssm.selective_state_update,
234
+ falcon_mamba_ssm.selective_scan_fn,
235
+ falcon_mamba_ssm.falcon_mamba_inner_fn,
236
+ )
237
+ if falcon_mamba_ssm is not None
238
+ else (None, None, None)
239
+ )
240
+
234
241
  self.warn_slow_implementation()
242
+
235
243
  # Triton expects to pass RMS weights even if they are non learnable, thus we need to create these weights here
236
244
  self.register_buffer(
237
245
  "b_c_rms", torch.nn.Parameter(torch.ones(self.ssm_state_size), requires_grad=False), persistent=False
@@ -242,14 +250,8 @@ class FalconMambaMixer(nn.Module):
242
250
  self.rms_eps = config.mixer_rms_eps
243
251
 
244
252
  def warn_slow_implementation(self):
245
- causal_conv1d = lazy_load_kernel("causal-conv1d")
246
- causal_conv1d_update, causal_conv1d_fn = (
247
- (causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
248
- if causal_conv1d is not None
249
- else (None, None)
250
- )
251
253
  is_fast_path_available = all(
252
- (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)
254
+ (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, falcon_mamba_inner_fn)
253
255
  )
254
256
  if not is_fast_path_available:
255
257
  if self.use_falcon_mambapy:
@@ -279,9 +281,8 @@ class FalconMambaMixer(nn.Module):
279
281
  ):
280
282
  # 1. Gated MLP's linear projection
281
283
  projected_states = self.in_proj(hidden_states).transpose(1, 2)
282
-
283
284
  if self.training and cache_params is None: # Doesn't support outputting the states -> used for training
284
- contextualized_states = mamba_inner_fn(
285
+ contextualized_states = falcon_mamba_inner_fn(
285
286
  projected_states,
286
287
  self.conv1d.weight,
287
288
  self.conv1d.bias if self.use_conv_bias else None,
@@ -302,12 +303,6 @@ class FalconMambaMixer(nn.Module):
302
303
  )
303
304
 
304
305
  else:
305
- causal_conv1d = lazy_load_kernel("causal-conv1d")
306
- causal_conv1d_update, causal_conv1d_fn = (
307
- (causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
308
- if causal_conv1d is not None
309
- else (None, None)
310
- )
311
306
  hidden_states, gate = projected_states.chunk(2, dim=1)
312
307
 
313
308
  if attention_mask is not None:
@@ -502,14 +497,8 @@ class FalconMambaMixer(nn.Module):
502
497
  cache_position: Optional[torch.LongTensor] = None,
503
498
  attention_mask: Optional[torch.LongTensor] = None,
504
499
  ):
505
- causal_conv1d = lazy_load_kernel("causal-conv1d")
506
- causal_conv1d_update, causal_conv1d_fn = (
507
- (causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
508
- if causal_conv1d is not None
509
- else (None, None)
510
- )
511
500
  is_fast_path_available = all(
512
- (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)
501
+ (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, falcon_mamba_inner_fn)
513
502
  )
514
503
  if is_fast_path_available and "cuda" in self.x_proj.weight.device.type and not is_torchdynamo_compiling():
515
504
  return self.cuda_kernels_forward(hidden_states, cache_params, cache_position, attention_mask)
@@ -703,6 +692,7 @@ class FalconMambaModel(FalconMambaPreTrainedModel):
703
692
  return_dict: Optional[bool] = None,
704
693
  cache_position: Optional[torch.LongTensor] = None,
705
694
  attention_mask: Optional[torch.LongTensor] = None,
695
+ **kwargs,
706
696
  ) -> Union[tuple, FalconMambaOutput]:
707
697
  r"""
708
698
  cache_params (`FalconMambaCache`, *optional*):
@@ -19,9 +19,8 @@ from typing import Optional
19
19
  import torch
20
20
  from torch import nn
21
21
 
22
- from ...integrations.hub_kernels import lazy_load_kernel
23
22
  from ...utils import auto_docstring, logging
24
- from ...utils.import_utils import is_mamba_ssm_available, is_mambapy_available, is_torchdynamo_compiling
23
+ from ...utils.import_utils import is_mambapy_available, is_torchdynamo_compiling
25
24
  from ..mamba.configuration_mamba import MambaConfig
26
25
  from ..mamba.modeling_mamba import (
27
26
  MambaBlock,
@@ -43,13 +42,13 @@ if is_mambapy_available():
43
42
  else:
44
43
  pscan = None
45
44
 
46
- if is_mamba_ssm_available():
47
- from mamba_ssm.ops.selective_scan_interface import selective_scan_fn
48
- from mamba_ssm.ops.triton.selective_state_update import selective_state_update
49
-
50
- from ...kernels.falcon_mamba import mamba_inner_fn
51
- else:
52
- selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None
45
+ selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, falcon_mamba_inner_fn = (
46
+ None,
47
+ None,
48
+ None,
49
+ None,
50
+ None,
51
+ )
53
52
 
54
53
 
55
54
  class FalconMambaConfig(MambaConfig):
@@ -251,14 +250,8 @@ def rms_forward(hidden_states, variance_epsilon=1e-6):
251
250
 
252
251
  class FalconMambaMixer(MambaMixer):
253
252
  def warn_slow_implementation(self):
254
- causal_conv1d = lazy_load_kernel("causal-conv1d")
255
- causal_conv1d_update, causal_conv1d_fn = (
256
- (causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
257
- if causal_conv1d is not None
258
- else (None, None)
259
- )
260
253
  is_fast_path_available = all(
261
- (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)
254
+ (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, falcon_mamba_inner_fn)
262
255
  )
263
256
  if not is_fast_path_available:
264
257
  if self.use_falcon_mambapy:
@@ -281,6 +274,7 @@ class FalconMambaMixer(MambaMixer):
281
274
 
282
275
  def __init__(self, config: FalconMambaConfig, layer_idx: int):
283
276
  super().__init__(config, layer_idx)
277
+
284
278
  # Triton expects to pass RMS weights even if they are non learnable, thus we need to create these weights here
285
279
  self.register_buffer(
286
280
  "b_c_rms", torch.nn.Parameter(torch.ones(self.ssm_state_size), requires_grad=False), persistent=False
@@ -299,9 +293,8 @@ class FalconMambaMixer(MambaMixer):
299
293
  ):
300
294
  # 1. Gated MLP's linear projection
301
295
  projected_states = self.in_proj(hidden_states).transpose(1, 2)
302
-
303
296
  if self.training and cache_params is None: # Doesn't support outputting the states -> used for training
304
- contextualized_states = mamba_inner_fn(
297
+ contextualized_states = falcon_mamba_inner_fn(
305
298
  projected_states,
306
299
  self.conv1d.weight,
307
300
  self.conv1d.bias if self.use_conv_bias else None,
@@ -322,12 +315,6 @@ class FalconMambaMixer(MambaMixer):
322
315
  )
323
316
 
324
317
  else:
325
- causal_conv1d = lazy_load_kernel("causal-conv1d")
326
- causal_conv1d_update, causal_conv1d_fn = (
327
- (causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
328
- if causal_conv1d is not None
329
- else (None, None)
330
- )
331
318
  hidden_states, gate = projected_states.chunk(2, dim=1)
332
319
 
333
320
  if attention_mask is not None:
@@ -521,14 +508,8 @@ class FalconMambaMixer(MambaMixer):
521
508
  cache_position: Optional[torch.LongTensor] = None,
522
509
  attention_mask: Optional[torch.LongTensor] = None,
523
510
  ):
524
- causal_conv1d = lazy_load_kernel("causal-conv1d")
525
- causal_conv1d_update, causal_conv1d_fn = (
526
- (causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
527
- if causal_conv1d is not None
528
- else (None, None)
529
- )
530
511
  is_fast_path_available = all(
531
- (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)
512
+ (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, falcon_mamba_inner_fn)
532
513
  )
533
514
  if is_fast_path_available and "cuda" in self.x_proj.weight.device.type and not is_torchdynamo_compiling():
534
515
  return self.cuda_kernels_forward(hidden_states, cache_params, cache_position, attention_mask)
@@ -1,5 +1,4 @@
1
- # coding=utf-8
2
- # Copyright 2024 Tri Dao, Albert Gu, Technological Innovation Institute and HuggingFace Inc. team.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
3
2
  #
4
3
  # Licensed under the Apache License, Version 2.0 (the "License");
5
4
  # you may not use this file except in compliance with the License.
@@ -12,4 +11,17 @@
12
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
12
  # See the License for the specific language governing permissions and
14
13
  # limitations under the License.
15
- from .selective_scan_with_ln_interface import mamba_inner_fn
14
+ from typing import TYPE_CHECKING
15
+
16
+ from ...utils import _LazyModule
17
+ from ...utils.import_utils import define_import_structure
18
+
19
+
20
+ if TYPE_CHECKING:
21
+ from .configuration_fast_vlm import *
22
+ from .modeling_fast_vlm import *
23
+ else:
24
+ import sys
25
+
26
+ _file = globals()["__file__"]
27
+ sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
@@ -0,0 +1,137 @@
1
+ # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
2
+ # This file was automatically generated from src/transformers/models/fast_vlm/modular_fast_vlm.py.
3
+ # Do NOT edit this file manually as any edits will be overwritten by the generation of
4
+ # the file from the modular. If any change should be done, please apply the change to the
5
+ # modular_fast_vlm.py file directly. One of our CI enforces this.
6
+ # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
7
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+
21
+ from ...configuration_utils import PreTrainedConfig
22
+ from ..auto import CONFIG_MAPPING, AutoConfig
23
+
24
+
25
+ class FastVlmConfig(PreTrainedConfig):
26
+ r"""
27
+ This is the configuration class to store the configuration of a [`FastVlmForConditionalGeneration`]. It is used to instantiate a
28
+ FastVLM model according to the specified arguments, defining the model architecture. Instantiating a configuration
29
+ with the defaults will yield the same configuration as the one of FastVLM-7B.
30
+
31
+ e.g. [KamilaMila/FastVLM-7B](https://huggingface.co/KamilaMila/FastVLM-7B)
32
+
33
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
34
+ documentation from [`PretrainedConfig`] for more information.
35
+
36
+ Args:
37
+ vision_config (`Union[AutoConfig, dict]`, *optional*, defaults to `TimmWrapperConfig` for `fastvit_mci3`):
38
+ The config object or dictionary of the vision backbone.
39
+ text_config (`Union[AutoConfig, dict]`, *optional*, defaults to `Qwen2Config`):
40
+ The config object or dictionary of the text backbone.
41
+ image_token_id (`int`, *optional*, defaults to 151646):
42
+ The image token index to encode the image prompt.
43
+ projector_hidden_act (`str`, *optional*, defaults to `"gelu"`):
44
+ The activation function used by the multimodal projector.
45
+ vision_feature_select_strategy (`str`, *optional*, defaults to `"full"`):
46
+ The feature selection strategy used to select the vision feature from the vision backbone.
47
+ Only "full" supported.
48
+ vision_feature_layer (`Union[int, list[int]]`, *optional*, defaults to -1):
49
+ The index of the layer to select the vision feature. If multiple indices are provided,
50
+ the vision feature of the corresponding indices will be concatenated to form the
51
+ vision features. Only -1 supported.
52
+ multimodal_projector_bias (`bool`, *optional*, defaults to `True`):
53
+ Whether to use bias in the multimodal projector.
54
+
55
+ Example:
56
+
57
+ ```python
58
+ >>> from transformers import FastVlmForConditionalGeneration, FastVlmConfig
59
+
60
+ >>> # Initializing a FastVLM-7B style configuration
61
+ >>> configuration = FastVlmConfig()
62
+
63
+ >>> # Initializing a model from the FastVLM-7B style configuration
64
+ >>> model = FastVlmForConditionalGeneration(configuration)
65
+
66
+ >>> # Accessing the model configuration
67
+ >>> configuration = model.config
68
+ ```"""
69
+
70
+ model_type = "fast_vlm"
71
+ attribute_map = {
72
+ "image_token_id": "image_token_index",
73
+ }
74
+ sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
75
+
76
+ def __init__(
77
+ self,
78
+ vision_config=None,
79
+ text_config=None,
80
+ image_token_id=151646,
81
+ projector_hidden_act="gelu",
82
+ vision_feature_select_strategy="full",
83
+ vision_feature_layer=-1,
84
+ multimodal_projector_bias=True,
85
+ **kwargs,
86
+ ):
87
+ self.image_token_id = image_token_id
88
+ self.projector_hidden_act = projector_hidden_act
89
+
90
+ if vision_feature_select_strategy != "full":
91
+ raise ValueError(
92
+ f"Unexpected select feature strategy: {vision_feature_select_strategy}. Only 'full' is supported in FastVLM."
93
+ )
94
+
95
+ if vision_feature_layer != -1:
96
+ raise ValueError(
97
+ f"Unexpected vision feature layer: {vision_feature_layer}. Only -1 is supported in FastVLM."
98
+ )
99
+
100
+ self.vision_feature_select_strategy = vision_feature_select_strategy
101
+ self.vision_feature_layer = vision_feature_layer
102
+
103
+ if isinstance(vision_config, dict):
104
+ vision_config["model_type"] = vision_config.get("model_type", "timm_wrapper")
105
+ vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config)
106
+ elif vision_config is None:
107
+ vision_config = CONFIG_MAPPING["timm_wrapper"](
108
+ architecture="fastvit_mci3",
109
+ do_pooling=True,
110
+ global_pool="avg",
111
+ hidden_size=3072,
112
+ initializer_range=0.02,
113
+ model_args={"inference_mode": True},
114
+ )
115
+
116
+ self.vision_config = vision_config
117
+
118
+ if isinstance(text_config, dict):
119
+ text_config["model_type"] = text_config.get("model_type", "qwen2")
120
+ text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
121
+ elif text_config is None:
122
+ text_config = CONFIG_MAPPING["qwen2"](
123
+ hidden_size=3584,
124
+ vocab_size=152128,
125
+ intermediate_size=18944,
126
+ num_attention_heads=28,
127
+ num_key_value_heads=4,
128
+ num_hidden_layers=28,
129
+ )
130
+
131
+ self.text_config = text_config
132
+ self.multimodal_projector_bias = multimodal_projector_bias
133
+
134
+ super().__init__(**kwargs)
135
+
136
+
137
+ __all__ = ["FastVlmConfig"]