vllm-ascend 0.9.0rc2__tar.gz → 0.11.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (859) hide show
  1. vllm_ascend-0.11.0rc1/.gemini/config.yaml +6 -0
  2. vllm_ascend-0.11.0rc1/.github/Dockerfile.buildwheel +45 -0
  3. vllm_ascend-0.11.0rc1/.github/ISSUE_TEMPLATE/110-user-story.yml +37 -0
  4. vllm_ascend-0.11.0rc1/.github/ISSUE_TEMPLATE/750-RFC.yml +49 -0
  5. vllm_ascend-0.11.0rc1/.github/ISSUE_TEMPLATE/900-release-checklist.yml +104 -0
  6. vllm_ascend-0.11.0rc1/.github/PULL_REQUEST_TEMPLATE.md +27 -0
  7. vllm_ascend-0.11.0rc1/.github/actionlint.yaml +21 -0
  8. vllm_ascend-0.11.0rc1/.github/format_pr_body.sh +59 -0
  9. vllm_ascend-0.11.0rc1/.github/workflows/_accuracy_test.yaml +175 -0
  10. vllm_ascend-0.11.0rc1/.github/workflows/_e2e_nightly.yaml +115 -0
  11. vllm_ascend-0.11.0rc1/.github/workflows/_e2e_test.yaml +199 -0
  12. vllm_ascend-0.11.0rc1/.github/workflows/accuracy_test.yaml +72 -0
  13. vllm_ascend-0.11.0rc1/.github/workflows/format_pr_body.yaml +57 -0
  14. vllm_ascend-0.11.0rc1/.github/workflows/image_310p_openeuler.yml +135 -0
  15. vllm_ascend-0.11.0rc1/.github/workflows/image_310p_ubuntu.yml +131 -0
  16. vllm_ascend-0.11.0rc1/.github/workflows/image_a3_openeuler.yml +135 -0
  17. vllm_ascend-0.11.0rc1/.github/workflows/image_a3_ubuntu.yml +131 -0
  18. vllm_ascend-0.11.0rc1/.github/workflows/image_openeuler.yml +134 -0
  19. vllm_ascend-0.11.0rc1/.github/workflows/image_ubuntu.yml +131 -0
  20. vllm_ascend-0.11.0rc1/.github/workflows/label_merge_conflict.yml +20 -0
  21. vllm_ascend-0.11.0rc1/.github/workflows/labeler.yml +18 -0
  22. vllm_ascend-0.11.0rc1/.github/workflows/multi_node_test.yaml +118 -0
  23. vllm_ascend-0.11.0rc1/.github/workflows/nightly_benchmarks.yaml +206 -0
  24. vllm_ascend-0.11.0rc1/.github/workflows/pre-commit.yml +43 -0
  25. vllm_ascend-0.11.0rc1/.github/workflows/release_code.yml +75 -0
  26. vllm_ascend-0.11.0rc1/.github/workflows/release_whl.yml +119 -0
  27. vllm_ascend-0.11.0rc1/.github/workflows/reminder_comment.yml +26 -0
  28. vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_dist.yaml +100 -0
  29. vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_doctest.yaml +87 -0
  30. vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test.yaml +149 -0
  31. vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_310p.yaml +117 -0
  32. vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_full.yaml +80 -0
  33. vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +45 -0
  34. vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_models.yaml +177 -0
  35. vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_nightly.yaml +105 -0
  36. vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_pd.yaml +112 -0
  37. vllm_ascend-0.11.0rc1/.gitignore +204 -0
  38. vllm_ascend-0.11.0rc1/.pre-commit-config.yaml +151 -0
  39. vllm_ascend-0.11.0rc1/CMakeLists.txt +111 -0
  40. vllm_ascend-0.11.0rc1/CODE_OF_CONDUCT.md +127 -0
  41. vllm_ascend-0.11.0rc1/CONTRIBUTING.md +3 -0
  42. vllm_ascend-0.11.0rc1/Dockerfile +60 -0
  43. vllm_ascend-0.11.0rc1/Dockerfile.310p +61 -0
  44. vllm_ascend-0.11.0rc1/Dockerfile.310p.openEuler +59 -0
  45. vllm_ascend-0.11.0rc1/Dockerfile.a3 +60 -0
  46. vllm_ascend-0.11.0rc1/Dockerfile.a3.openEuler +58 -0
  47. vllm_ascend-0.11.0rc1/Dockerfile.openEuler +58 -0
  48. vllm_ascend-0.11.0rc1/PKG-INFO +142 -0
  49. vllm_ascend-0.11.0rc1/README.md +91 -0
  50. vllm_ascend-0.11.0rc1/README.zh.md +90 -0
  51. vllm_ascend-0.11.0rc1/benchmarks/README.md +175 -0
  52. vllm_ascend-0.11.0rc1/benchmarks/ops/ben_vocabparallelembedding.py +158 -0
  53. vllm_ascend-0.11.0rc1/benchmarks/requirements-bench.txt +4 -0
  54. vllm_ascend-0.11.0rc1/benchmarks/scripts/convert_json_to_markdown.py +188 -0
  55. vllm_ascend-0.11.0rc1/benchmarks/scripts/perf_result_template.md +31 -0
  56. vllm_ascend-0.11.0rc1/benchmarks/scripts/run-performance-benchmarks.sh +323 -0
  57. vllm_ascend-0.11.0rc1/benchmarks/tests/latency-tests.json +23 -0
  58. vllm_ascend-0.11.0rc1/benchmarks/tests/serving-tests.json +78 -0
  59. vllm_ascend-0.11.0rc1/benchmarks/tests/throughput-tests.json +38 -0
  60. vllm_ascend-0.11.0rc1/codecov.yml +28 -0
  61. vllm_ascend-0.11.0rc1/csrc/camem_allocator.cpp +347 -0
  62. vllm_ascend-0.11.0rc1/csrc/kernels/bgmv_expand.cpp +369 -0
  63. vllm_ascend-0.11.0rc1/csrc/kernels/bgmv_shrink.cpp +252 -0
  64. vllm_ascend-0.11.0rc1/csrc/kernels/get_masked_input_and_mask_kernel.cpp +378 -0
  65. vllm_ascend-0.11.0rc1/csrc/kernels/pos_encoding_kernels.cpp +372 -0
  66. vllm_ascend-0.11.0rc1/csrc/kernels/sgmv_expand.cpp +389 -0
  67. vllm_ascend-0.11.0rc1/csrc/kernels/sgmv_shrink.cpp +275 -0
  68. vllm_ascend-0.11.0rc1/csrc/kernels/utils.h +51 -0
  69. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_host/mla_preprocess.h +698 -0
  70. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +95 -0
  71. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/common.h +25 -0
  72. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/common_func.h +121 -0
  73. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/hardware.h +36 -0
  74. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterator.h +92 -0
  75. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +162 -0
  76. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +89 -0
  77. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +228 -0
  78. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +42 -0
  79. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +71 -0
  80. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +39 -0
  81. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +36 -0
  82. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +310 -0
  83. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +44 -0
  84. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +395 -0
  85. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/layout.h +18 -0
  86. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/mem.h +82 -0
  87. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/mma.h +67 -0
  88. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +38 -0
  89. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/simd.h +274 -0
  90. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/utils.h +69 -0
  91. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/mla_preprocess.h +114 -0
  92. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +295 -0
  93. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +2914 -0
  94. vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +2503 -0
  95. vllm_ascend-0.11.0rc1/csrc/ops.h +161 -0
  96. vllm_ascend-0.11.0rc1/csrc/torch_binding.cpp +514 -0
  97. vllm_ascend-0.11.0rc1/csrc/torch_binding_meta.cpp +136 -0
  98. vllm_ascend-0.11.0rc1/csrc/utils.h +31 -0
  99. vllm_ascend-0.11.0rc1/docs/Makefile +25 -0
  100. vllm_ascend-0.11.0rc1/docs/README.md +24 -0
  101. vllm_ascend-0.11.0rc1/docs/requirements-docs.txt +10 -0
  102. vllm_ascend-0.11.0rc1/docs/requirements-test.txt +2 -0
  103. vllm_ascend-0.11.0rc1/docs/source/_templates/sections/header.html +58 -0
  104. vllm_ascend-0.11.0rc1/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  105. vllm_ascend-0.11.0rc1/docs/source/assets/multi_node_dp_kimi.png +0 -0
  106. vllm_ascend-0.11.0rc1/docs/source/community/contributors.md +171 -0
  107. vllm_ascend-0.11.0rc1/docs/source/community/governance.md +48 -0
  108. vllm_ascend-0.11.0rc1/docs/source/community/user_stories/index.md +19 -0
  109. vllm_ascend-0.11.0rc1/docs/source/community/user_stories/llamafactory.md +19 -0
  110. vllm_ascend-0.11.0rc1/docs/source/community/versioning_policy.md +135 -0
  111. vllm_ascend-0.11.0rc1/docs/source/conf.py +142 -0
  112. vllm_ascend-0.11.0rc1/docs/source/developer_guide/contribution/index.md +111 -0
  113. vllm_ascend-0.11.0rc1/docs/source/developer_guide/contribution/testing.md +285 -0
  114. vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +20 -0
  115. vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +19 -0
  116. vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +21 -0
  117. vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +21 -0
  118. vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/accuracy_report/index.md +10 -0
  119. vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/index.md +10 -0
  120. vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/using_evalscope.md +175 -0
  121. vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/using_lm_eval.md +300 -0
  122. vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/using_opencompass.md +123 -0
  123. vllm_ascend-0.11.0rc1/docs/source/developer_guide/feature_guide/ACL_Graph.md +102 -0
  124. vllm_ascend-0.11.0rc1/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +260 -0
  125. vllm_ascend-0.11.0rc1/docs/source/developer_guide/feature_guide/index.md +11 -0
  126. vllm_ascend-0.11.0rc1/docs/source/developer_guide/feature_guide/patch.md +75 -0
  127. vllm_ascend-0.11.0rc1/docs/source/developer_guide/modeling/adding_a_new_model.md +258 -0
  128. vllm_ascend-0.11.0rc1/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +3 -0
  129. vllm_ascend-0.11.0rc1/docs/source/developer_guide/modeling/index.md +10 -0
  130. vllm_ascend-0.11.0rc1/docs/source/developer_guide/performance/index.md +9 -0
  131. vllm_ascend-0.11.0rc1/docs/source/developer_guide/performance/optimization_and_tuning.md +183 -0
  132. vllm_ascend-0.11.0rc1/docs/source/developer_guide/performance/performance_benchmark.md +194 -0
  133. vllm_ascend-0.11.0rc1/docs/source/developer_guide/performance/profile_execute_duration.md +40 -0
  134. vllm_ascend-0.11.0rc1/docs/source/faqs.md +216 -0
  135. vllm_ascend-0.11.0rc1/docs/source/index.md +71 -0
  136. vllm_ascend-0.11.0rc1/docs/source/installation.md +287 -0
  137. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +1647 -0
  138. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +204 -0
  139. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +103 -0
  140. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +87 -0
  141. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +624 -0
  142. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +187 -0
  143. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +237 -0
  144. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +26 -0
  145. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +26 -0
  146. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +112 -0
  147. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +65 -0
  148. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +83 -0
  149. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +33 -0
  150. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +248 -0
  151. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +333 -0
  152. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +29 -0
  153. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +32 -0
  154. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +26 -0
  155. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +88 -0
  156. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +81 -0
  157. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +479 -0
  158. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/index.po +79 -0
  159. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +293 -0
  160. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +149 -0
  161. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +29 -0
  162. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +192 -0
  163. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +62 -0
  164. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +86 -0
  165. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +82 -0
  166. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +71 -0
  167. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +110 -0
  168. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +107 -0
  169. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +77 -0
  170. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +99 -0
  171. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +70 -0
  172. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +286 -0
  173. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +28 -0
  174. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +30 -0
  175. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +121 -0
  176. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +30 -0
  177. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +58 -0
  178. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +183 -0
  179. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +156 -0
  180. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +220 -0
  181. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +1660 -0
  182. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +30 -0
  183. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +264 -0
  184. vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +214 -0
  185. vllm_ascend-0.11.0rc1/docs/source/quick_start.md +185 -0
  186. vllm_ascend-0.11.0rc1/docs/source/tutorials/index.md +24 -0
  187. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi-node_dsv3.2.md +405 -0
  188. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node.md +212 -0
  189. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node_kimi.md +158 -0
  190. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node_pd_disaggregation_llmdatadist.md +244 -0
  191. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md +616 -0
  192. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node_qwen3vl.md +165 -0
  193. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node_ray.md +182 -0
  194. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_npu.md +107 -0
  195. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_npu_moge.md +242 -0
  196. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_npu_quantization.md +137 -0
  197. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_npu_qwen3_moe.md +109 -0
  198. vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_npu_qwen3_next.md +156 -0
  199. vllm_ascend-0.11.0rc1/docs/source/tutorials/single_node_300i.md +408 -0
  200. vllm_ascend-0.11.0rc1/docs/source/tutorials/single_npu.md +202 -0
  201. vllm_ascend-0.11.0rc1/docs/source/tutorials/single_npu_audio.md +122 -0
  202. vllm_ascend-0.11.0rc1/docs/source/tutorials/single_npu_multimodal.md +192 -0
  203. vllm_ascend-0.11.0rc1/docs/source/tutorials/single_npu_qwen3_embedding.md +99 -0
  204. vllm_ascend-0.11.0rc1/docs/source/tutorials/single_npu_qwen3_quantization.md +133 -0
  205. vllm_ascend-0.11.0rc1/docs/source/user_guide/configuration/additional_config.md +116 -0
  206. vllm_ascend-0.11.0rc1/docs/source/user_guide/configuration/env_vars.md +9 -0
  207. vllm_ascend-0.11.0rc1/docs/source/user_guide/configuration/index.md +10 -0
  208. vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +91 -0
  209. vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/graph_mode.md +78 -0
  210. vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
  211. vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  212. vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/index.md +14 -0
  213. vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/lora.md +23 -0
  214. vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/quantization.md +125 -0
  215. vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/sleep_mode.md +114 -0
  216. vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/structured_output.md +163 -0
  217. vllm_ascend-0.11.0rc1/docs/source/user_guide/release_notes.md +689 -0
  218. vllm_ascend-0.11.0rc1/docs/source/user_guide/support_matrix/index.md +10 -0
  219. vllm_ascend-0.11.0rc1/docs/source/user_guide/support_matrix/supported_features.md +45 -0
  220. vllm_ascend-0.11.0rc1/docs/source/user_guide/support_matrix/supported_models.md +83 -0
  221. vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/README.md +242 -0
  222. vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/gen_ranktable.py +141 -0
  223. vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/gen_ranktable.sh +88 -0
  224. vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +576 -0
  225. vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +657 -0
  226. vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +165 -0
  227. vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md +278 -0
  228. vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/run_server.sh +32 -0
  229. vllm_ascend-0.11.0rc1/examples/eplb/eplb_deepseek.py +205 -0
  230. vllm_ascend-0.11.0rc1/examples/eplb/eplb_strategy.py +186 -0
  231. vllm_ascend-0.11.0rc1/examples/external_online_dp/README.md +38 -0
  232. vllm_ascend-0.11.0rc1/examples/external_online_dp/launch_online_dp.py +97 -0
  233. vllm_ascend-0.11.0rc1/examples/external_online_dp/run_dp_template.sh +46 -0
  234. vllm_ascend-0.11.0rc1/examples/offline_data_parallel.py +257 -0
  235. vllm_ascend-0.11.0rc1/examples/offline_disaggregated_prefill_npu.py +147 -0
  236. vllm_ascend-0.11.0rc1/examples/offline_dualbatch_overlap_npu.py +52 -0
  237. vllm_ascend-0.11.0rc1/examples/offline_embed.py +58 -0
  238. vllm_ascend-0.11.0rc1/examples/offline_external_launcher.py +330 -0
  239. vllm_ascend-0.11.0rc1/examples/offline_inference_audio_language.py +105 -0
  240. vllm_ascend-0.11.0rc1/examples/offline_inference_npu.py +51 -0
  241. vllm_ascend-0.11.0rc1/examples/offline_inference_npu_tp2.py +55 -0
  242. vllm_ascend-0.11.0rc1/examples/offline_inference_sleep_mode_npu.py +57 -0
  243. vllm_ascend-0.11.0rc1/examples/offline_weight_load.py +326 -0
  244. vllm_ascend-0.11.0rc1/examples/prompt_embedding_inference.py +88 -0
  245. vllm_ascend-0.11.0rc1/examples/run_dp_server.sh +32 -0
  246. vllm_ascend-0.11.0rc1/format.sh +44 -0
  247. vllm_ascend-0.11.0rc1/mypy.ini +16 -0
  248. vllm_ascend-0.11.0rc1/pyproject.toml +35 -0
  249. vllm_ascend-0.11.0rc1/requirements-dev.txt +20 -0
  250. vllm_ascend-0.11.0rc1/requirements-lint.txt +9 -0
  251. vllm_ascend-0.11.0rc1/requirements.txt +28 -0
  252. vllm_ascend-0.11.0rc1/setup.py +399 -0
  253. vllm_ascend-0.11.0rc1/tests/e2e/310p/test_offline_inference_310p.py +72 -0
  254. vllm_ascend-0.11.0rc1/tests/e2e/310p/test_offline_inference_parallel_310p.py +62 -0
  255. vllm_ascend-0.11.0rc1/tests/e2e/common.sh +74 -0
  256. vllm_ascend-0.11.0rc1/tests/e2e/conftest.py +589 -0
  257. vllm_ascend-0.11.0rc1/tests/e2e/doctests/001-quickstart-test.sh +64 -0
  258. vllm_ascend-0.11.0rc1/tests/e2e/doctests/002-pip-binary-installation-test.sh +74 -0
  259. vllm_ascend-0.11.0rc1/tests/e2e/model_utils.py +74 -0
  260. vllm_ascend-0.11.0rc1/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +17 -0
  261. vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +11 -0
  262. vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml +10 -0
  263. vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +10 -0
  264. vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +20 -0
  265. vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen3-8B-Base.yaml +15 -0
  266. vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen3-8B.yaml +11 -0
  267. vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +12 -0
  268. vllm_ascend-0.11.0rc1/tests/e2e/models/configs/accuracy.txt +8 -0
  269. vllm_ascend-0.11.0rc1/tests/e2e/models/conftest.py +72 -0
  270. vllm_ascend-0.11.0rc1/tests/e2e/models/report_template.md +34 -0
  271. vllm_ascend-0.11.0rc1/tests/e2e/models/test_lm_eval_correctness.py +157 -0
  272. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_data_parallel.py +73 -0
  273. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_expert_parallel.py +42 -0
  274. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_external_launcher.py +243 -0
  275. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_full_graph_mode.py +72 -0
  276. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_fused_moe_allgather_ep.py +86 -0
  277. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_ilama_lora_tp2.py +23 -0
  278. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_offline_inference_distributed.py +228 -0
  279. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_pipeline_parallel.py +47 -0
  280. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_prefix_caching.py +148 -0
  281. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_qwen3_moe.py +103 -0
  282. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_single_request_aclgraph.py +84 -0
  283. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_torchair_graph_mode.py +227 -0
  284. vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_weight_loader.py +109 -0
  285. vllm_ascend-0.11.0rc1/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py +106 -0
  286. vllm_ascend-0.11.0rc1/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +110 -0
  287. vllm_ascend-0.11.0rc1/tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py +104 -0
  288. vllm_ascend-0.11.0rc1/tests/e2e/nightly/models/test_qwen3_32b.py +99 -0
  289. vllm_ascend-0.11.0rc1/tests/e2e/nightly/models/test_qwen3_32b_int8.py +118 -0
  290. vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml +126 -0
  291. vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml +76 -0
  292. vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/config/multi_node_config.py +207 -0
  293. vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/config/utils.py +95 -0
  294. vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/scripts/lws.yaml +132 -0
  295. vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/scripts/run.sh +145 -0
  296. vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/test_multi_node.py +30 -0
  297. vllm_ascend-0.11.0rc1/tests/e2e/pd_disaggreate/run_edge_case_test.sh +139 -0
  298. vllm_ascend-0.11.0rc1/tests/e2e/pd_disaggreate/test_edge_cases.py +81 -0
  299. vllm_ascend-0.11.0rc1/tests/e2e/prompts/example.txt +8 -0
  300. vllm_ascend-0.11.0rc1/tests/e2e/run_doctests.sh +32 -0
  301. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_bgmv_expand.py +46 -0
  302. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_bgmv_shrink.py +45 -0
  303. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_fused_moe.py +341 -0
  304. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +37 -0
  305. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_mla_preprocess.py +108 -0
  306. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_rotary_embedding.py +351 -0
  307. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +98 -0
  308. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +110 -0
  309. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +106 -0
  310. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +148 -0
  311. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_aclgraph.py +203 -0
  312. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_aclgraph_mem.py +100 -0
  313. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_ascend_scheduler.py +113 -0
  314. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_bge_model.py +49 -0
  315. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_camem.py +99 -0
  316. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_chunked.py +82 -0
  317. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_embedding.py +49 -0
  318. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_embedding_aclgraph.py +55 -0
  319. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_guided_decoding.py +153 -0
  320. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_ilama_lora.py +62 -0
  321. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +103 -0
  322. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_profile_execute_duration.py +71 -0
  323. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_quantization.py +35 -0
  324. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_sampler.py +49 -0
  325. vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_vlm.py +124 -0
  326. vllm_ascend-0.11.0rc1/tests/e2e/utils.py +106 -0
  327. vllm_ascend-0.11.0rc1/tests/e2e/vllm_interface/singlecard/test_sampler.py +36 -0
  328. vllm_ascend-0.11.0rc1/tests/e2e/vllm_interface/vllm_test.cfg +2 -0
  329. vllm_ascend-0.11.0rc1/tests/ut/attention/test_attention_mask.py +95 -0
  330. vllm_ascend-0.11.0rc1/tests/ut/attention/test_attention_v1.py +702 -0
  331. vllm_ascend-0.11.0rc1/tests/ut/attention/test_mla_v1.py +675 -0
  332. vllm_ascend-0.11.0rc1/tests/ut/base.py +44 -0
  333. vllm_ascend-0.11.0rc1/tests/ut/compilation/test_acl_graph.py +720 -0
  334. vllm_ascend-0.11.0rc1/tests/ut/conftest.py +26 -0
  335. vllm_ascend-0.11.0rc1/tests/ut/core/test_schedule_config.py +148 -0
  336. vllm_ascend-0.11.0rc1/tests/ut/core/test_scheduler.py +807 -0
  337. vllm_ascend-0.11.0rc1/tests/ut/device_allocator/test_camem.py +188 -0
  338. vllm_ascend-0.11.0rc1/tests/ut/distributed/device_communicators/test_pyhccl.py +84 -0
  339. vllm_ascend-0.11.0rc1/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +173 -0
  340. vllm_ascend-0.11.0rc1/tests/ut/distributed/test_communicator.py +89 -0
  341. vllm_ascend-0.11.0rc1/tests/ut/distributed/test_parallel_state.py +58 -0
  342. vllm_ascend-0.11.0rc1/tests/ut/eplb/adaptor/test_abstract_adaptor.py +73 -0
  343. vllm_ascend-0.11.0rc1/tests/ut/eplb/core/policy/test_policy_abstract.py +31 -0
  344. vllm_ascend-0.11.0rc1/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +98 -0
  345. vllm_ascend-0.11.0rc1/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +99 -0
  346. vllm_ascend-0.11.0rc1/tests/ut/eplb/core/policy/test_policy_factor.py +23 -0
  347. vllm_ascend-0.11.0rc1/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +116 -0
  348. vllm_ascend-0.11.0rc1/tests/ut/eplb/core/test_eplb_utils.py +225 -0
  349. vllm_ascend-0.11.0rc1/tests/ut/fake_weight/config.json +28 -0
  350. vllm_ascend-0.11.0rc1/tests/ut/kv_connector/test_llmdatadist_connector.py +96 -0
  351. vllm_ascend-0.11.0rc1/tests/ut/kv_connector/test_mooncake_connector.py +1139 -0
  352. vllm_ascend-0.11.0rc1/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +924 -0
  353. vllm_ascend-0.11.0rc1/tests/ut/kv_connector/test_remote_decode_lifecycle.py +169 -0
  354. vllm_ascend-0.11.0rc1/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +239 -0
  355. vllm_ascend-0.11.0rc1/tests/ut/kv_connector/utils.py +208 -0
  356. vllm_ascend-0.11.0rc1/tests/ut/models/conftest.py +100 -0
  357. vllm_ascend-0.11.0rc1/tests/ut/models/test_qwen2_5_vl.py +492 -0
  358. vllm_ascend-0.11.0rc1/tests/ut/models/test_qwen2_5_vl_without_padding.py +422 -0
  359. vllm_ascend-0.11.0rc1/tests/ut/models/test_qwen2_vl.py +200 -0
  360. vllm_ascend-0.11.0rc1/tests/ut/multistream/test_base.py +32 -0
  361. vllm_ascend-0.11.0rc1/tests/ut/multistream/test_decorator.py +47 -0
  362. vllm_ascend-0.11.0rc1/tests/ut/multistream/test_layers.py +198 -0
  363. vllm_ascend-0.11.0rc1/tests/ut/multistream/test_metadata.py +246 -0
  364. vllm_ascend-0.11.0rc1/tests/ut/multistream/test_ms_split.py +147 -0
  365. vllm_ascend-0.11.0rc1/tests/ut/ops/expert_map.json +17 -0
  366. vllm_ascend-0.11.0rc1/tests/ut/ops/test_activation.py +72 -0
  367. vllm_ascend-0.11.0rc1/tests/ut/ops/test_comm_utils.py +98 -0
  368. vllm_ascend-0.11.0rc1/tests/ut/ops/test_common_fused_moe.py +56 -0
  369. vllm_ascend-0.11.0rc1/tests/ut/ops/test_expert_load_balancer.py +141 -0
  370. vllm_ascend-0.11.0rc1/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +271 -0
  371. vllm_ascend-0.11.0rc1/tests/ut/ops/test_fused_ops.py +597 -0
  372. vllm_ascend-0.11.0rc1/tests/ut/ops/test_layernorm.py +156 -0
  373. vllm_ascend-0.11.0rc1/tests/ut/ops/test_linear.py +147 -0
  374. vllm_ascend-0.11.0rc1/tests/ut/ops/test_moe_comm_method.py +230 -0
  375. vllm_ascend-0.11.0rc1/tests/ut/ops/test_rotary_embedding.py +462 -0
  376. vllm_ascend-0.11.0rc1/tests/ut/ops/test_token_dispatcher.py +515 -0
  377. vllm_ascend-0.11.0rc1/tests/ut/ops/test_vocab_parallel_embedding.py +240 -0
  378. vllm_ascend-0.11.0rc1/tests/ut/patch/worker/patch_common/test_patch_distributed.py +119 -0
  379. vllm_ascend-0.11.0rc1/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +77 -0
  380. vllm_ascend-0.11.0rc1/tests/ut/quantization/test_quant_config.py +233 -0
  381. vllm_ascend-0.11.0rc1/tests/ut/quantization/test_utils.py +62 -0
  382. vllm_ascend-0.11.0rc1/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +246 -0
  383. vllm_ascend-0.11.0rc1/tests/ut/quantization/test_w4a8_dynamic.py +303 -0
  384. vllm_ascend-0.11.0rc1/tests/ut/quantization/test_w8a8.py +977 -0
  385. vllm_ascend-0.11.0rc1/tests/ut/quantization/test_w8a8_dynamic.py +69 -0
  386. vllm_ascend-0.11.0rc1/tests/ut/sample/logits_processor/test_builtin.py +40 -0
  387. vllm_ascend-0.11.0rc1/tests/ut/sample/test_rejection_sampler.py +203 -0
  388. vllm_ascend-0.11.0rc1/tests/ut/sample/test_sampler.py +32 -0
  389. vllm_ascend-0.11.0rc1/tests/ut/test_ascend_config.py +362 -0
  390. vllm_ascend-0.11.0rc1/tests/ut/test_envs.py +62 -0
  391. vllm_ascend-0.11.0rc1/tests/ut/test_platform.py +765 -0
  392. vllm_ascend-0.11.0rc1/tests/ut/test_utils.py +381 -0
  393. vllm_ascend-0.11.0rc1/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +197 -0
  394. vllm_ascend-0.11.0rc1/tests/ut/torchair/models/test_torchair_deepseek_v2.py +357 -0
  395. vllm_ascend-0.11.0rc1/tests/ut/torchair/ops/test_torchair_fused_moe.py +422 -0
  396. vllm_ascend-0.11.0rc1/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +331 -0
  397. vllm_ascend-0.11.0rc1/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +296 -0
  398. vllm_ascend-0.11.0rc1/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +129 -0
  399. vllm_ascend-0.11.0rc1/tests/ut/torchair/test_torchair_attention.py +95 -0
  400. vllm_ascend-0.11.0rc1/tests/ut/torchair/test_torchair_mla.py +850 -0
  401. vllm_ascend-0.11.0rc1/tests/ut/torchair/test_utils.py +164 -0
  402. vllm_ascend-0.11.0rc1/tests/ut/worker/test_input_batch.py +372 -0
  403. vllm_ascend-0.11.0rc1/tests/ut/worker/test_model_runner_v1.py +111 -0
  404. vllm_ascend-0.11.0rc1/tests/ut/worker/test_worker_v1.py +1240 -0
  405. vllm_ascend-0.11.0rc1/tools/aisbench.py +227 -0
  406. vllm_ascend-0.11.0rc1/tools/check_python_src_init.py +76 -0
  407. vllm_ascend-0.11.0rc1/tools/enforce_regex_import.py +104 -0
  408. vllm_ascend-0.11.0rc1/tools/mypy.sh +40 -0
  409. vllm_ascend-0.11.0rc1/tools/send_mm_request.py +49 -0
  410. vllm_ascend-0.11.0rc1/tools/shellcheck.sh +45 -0
  411. vllm_ascend-0.11.0rc1/typos.toml +177 -0
  412. vllm_ascend-0.11.0rc1/vllm_ascend/__init__.py +33 -0
  413. vllm_ascend-0.11.0rc1/vllm_ascend/_version.py +34 -0
  414. vllm_ascend-0.11.0rc1/vllm_ascend/ascend_config.py +310 -0
  415. vllm_ascend-0.11.0rc1/vllm_ascend/ascend_forward_context.py +211 -0
  416. vllm_ascend-0.11.0rc1/vllm_ascend/attention/attention_mask.py +96 -0
  417. vllm_ascend-0.11.0rc1/vllm_ascend/attention/attention_v1.py +719 -0
  418. vllm_ascend-0.11.0rc1/vllm_ascend/attention/mla_v1.py +1323 -0
  419. vllm_ascend-0.11.0rc1/vllm_ascend/attention/sfa_v1.py +988 -0
  420. vllm_ascend-0.11.0rc1/vllm_ascend/attention/utils.py +180 -0
  421. vllm_ascend-0.11.0rc1/vllm_ascend/compilation/acl_graph.py +344 -0
  422. vllm_ascend-0.11.0rc1/vllm_ascend/core/recompute_schedule_config.py +39 -0
  423. vllm_ascend-0.11.0rc1/vllm_ascend/core/recompute_scheduler.py +1392 -0
  424. vllm_ascend-0.11.0rc1/vllm_ascend/core/schedule_config.py +108 -0
  425. vllm_ascend-0.11.0rc1/vllm_ascend/core/scheduler.py +587 -0
  426. vllm_ascend-0.11.0rc1/vllm_ascend/cpu_binding.py +330 -0
  427. vllm_ascend-0.11.0rc1/vllm_ascend/device_allocator/camem.py +278 -0
  428. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/__init__.py +40 -0
  429. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/cpu_offload_connector.py +471 -0
  430. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +202 -0
  431. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/cpu_offload_manager/metadata.py +269 -0
  432. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +994 -0
  433. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/config_data.py +449 -0
  434. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/kv_transfer.py +282 -0
  435. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/mooncake_engine.py +621 -0
  436. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/mooncake_store.py +126 -0
  437. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +492 -0
  438. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/transfer_engine.py +28 -0
  439. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake_connector.py +1263 -0
  440. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake_layerwise_connector.py +1153 -0
  441. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/parallel_state.py +196 -0
  442. vllm_ascend-0.11.0rc1/vllm_ascend/distributed/utils.py +61 -0
  443. vllm_ascend-0.11.0rc1/vllm_ascend/envs.py +188 -0
  444. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/__init__.py +0 -0
  445. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/adaptor/__init__.py +0 -0
  446. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/adaptor/abstract_adaptor.py +44 -0
  447. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/adaptor/vllm_adaptor.py +289 -0
  448. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/__init__.py +0 -0
  449. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +138 -0
  450. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/eplb_utils.py +190 -0
  451. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/eplb_worker.py +440 -0
  452. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/__init__.py +0 -0
  453. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_abstract.py +42 -0
  454. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +389 -0
  455. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +771 -0
  456. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_factory.py +33 -0
  457. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_flashlb.py +651 -0
  458. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_random.py +30 -0
  459. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/eplb_updator.py +209 -0
  460. vllm_ascend-0.11.0rc1/vllm_ascend/eplb/utils.py +77 -0
  461. vllm_ascend-0.11.0rc1/vllm_ascend/lora/__init__.py +0 -0
  462. vllm_ascend-0.11.0rc1/vllm_ascend/lora/lora_ops.py +113 -0
  463. vllm_ascend-0.11.0rc1/vllm_ascend/lora/punica_npu.py +356 -0
  464. vllm_ascend-0.11.0rc1/vllm_ascend/lora/utils.py +110 -0
  465. vllm_ascend-0.11.0rc1/vllm_ascend/meta_registration.py +105 -0
  466. vllm_ascend-0.11.0rc1/vllm_ascend/models/__init__.py +48 -0
  467. vllm_ascend-0.11.0rc1/vllm_ascend/models/deepseek_v3_2.py +633 -0
  468. vllm_ascend-0.11.0rc1/vllm_ascend/models/layers/__init__.py +0 -0
  469. vllm_ascend-0.11.0rc1/vllm_ascend/models/layers/mla.py +193 -0
  470. vllm_ascend-0.11.0rc1/vllm_ascend/models/layers/sfa.py +233 -0
  471. vllm_ascend-0.11.0rc1/vllm_ascend/models/qwen2_5_omni_thinker.py +54 -0
  472. vllm_ascend-0.11.0rc1/vllm_ascend/models/qwen2_5_vl.py +562 -0
  473. vllm_ascend-0.11.0rc1/vllm_ascend/models/qwen2_5_vl_without_padding.py +605 -0
  474. vllm_ascend-0.11.0rc1/vllm_ascend/models/qwen2_vl.py +369 -0
  475. vllm_ascend-0.11.0rc1/vllm_ascend/models/qwen3_next.py +676 -0
  476. vllm_ascend-0.11.0rc1/vllm_ascend/multistream/__init__.py +0 -0
  477. vllm_ascend-0.11.0rc1/vllm_ascend/multistream/decorator.py +22 -0
  478. vllm_ascend-0.11.0rc1/vllm_ascend/multistream/ms_split.py +247 -0
  479. vllm_ascend-0.11.0rc1/vllm_ascend/ops/__init__.py +57 -0
  480. vllm_ascend-0.11.0rc1/vllm_ascend/ops/activation.py +44 -0
  481. vllm_ascend-0.11.0rc1/vllm_ascend/ops/attention.py +309 -0
  482. vllm_ascend-0.11.0rc1/vllm_ascend/ops/casual_conv1d.py +539 -0
  483. vllm_ascend-0.11.0rc1/vllm_ascend/ops/common_fused_moe.py +469 -0
  484. vllm_ascend-0.11.0rc1/vllm_ascend/ops/expert_load_balancer.py +117 -0
  485. vllm_ascend-0.11.0rc1/vllm_ascend/ops/fla.py +299 -0
  486. vllm_ascend-0.11.0rc1/vllm_ascend/ops/layernorm.py +213 -0
  487. vllm_ascend-0.11.0rc1/vllm_ascend/ops/linear.py +467 -0
  488. vllm_ascend-0.11.0rc1/vllm_ascend/ops/linear_op.py +531 -0
  489. vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/__init__.py +0 -0
  490. vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/comm_utils.py +113 -0
  491. vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/experts_selector.py +277 -0
  492. vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +520 -0
  493. vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/moe_comm_method.py +273 -0
  494. vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/moe_mlp.py +258 -0
  495. vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/token_dispatcher.py +726 -0
  496. vllm_ascend-0.11.0rc1/vllm_ascend/ops/register_custom_ops.py +315 -0
  497. vllm_ascend-0.11.0rc1/vllm_ascend/ops/rotary_embedding.py +431 -0
  498. vllm_ascend-0.11.0rc1/vllm_ascend/ops/sigmoid_gating.py +384 -0
  499. vllm_ascend-0.11.0rc1/vllm_ascend/ops/vocab_parallel_embedding.py +255 -0
  500. vllm_ascend-0.11.0rc1/vllm_ascend/ops/weight_prefetch.py +112 -0
  501. vllm_ascend-0.11.0rc1/vllm_ascend/patch/__init__.py +174 -0
  502. vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/__init__.py +30 -0
  503. vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_config.py +234 -0
  504. vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_core.py +68 -0
  505. vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_distributed.py +115 -0
  506. vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_mamba_config.py +96 -0
  507. vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_message_queue.py +164 -0
  508. vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_multiproc_executor.py +151 -0
  509. vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_sched_yield.py +13 -0
  510. vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/__init__.py +32 -0
  511. vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_attention_layer.py +92 -0
  512. vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_deepseek_mtp.py +94 -0
  513. vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_distributed.py +115 -0
  514. vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_logits.py +26 -0
  515. vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_multimodal_merge.py +58 -0
  516. vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_roberta.py +88 -0
  517. vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_triton.py +16 -0
  518. vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_weight_loader.py +41 -0
  519. vllm_ascend-0.11.0rc1/vllm_ascend/platform.py +418 -0
  520. vllm_ascend-0.11.0rc1/vllm_ascend/quantization/__init__.py +0 -0
  521. vllm_ascend-0.11.0rc1/vllm_ascend/quantization/quant_config.py +474 -0
  522. vllm_ascend-0.11.0rc1/vllm_ascend/quantization/utils.py +87 -0
  523. vllm_ascend-0.11.0rc1/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +193 -0
  524. vllm_ascend-0.11.0rc1/vllm_ascend/quantization/w4a8_dynamic.py +490 -0
  525. vllm_ascend-0.11.0rc1/vllm_ascend/quantization/w8a8.py +674 -0
  526. vllm_ascend-0.11.0rc1/vllm_ascend/quantization/w8a8_dynamic.py +284 -0
  527. vllm_ascend-0.11.0rc1/vllm_ascend/sample/__init__.py +0 -0
  528. vllm_ascend-0.11.0rc1/vllm_ascend/sample/logits_processor/__init__.py +50 -0
  529. vllm_ascend-0.11.0rc1/vllm_ascend/sample/logits_processor/builtin.py +35 -0
  530. vllm_ascend-0.11.0rc1/vllm_ascend/sample/rejection_sampler.py +504 -0
  531. vllm_ascend-0.11.0rc1/vllm_ascend/sample/sampler.py +74 -0
  532. vllm_ascend-0.11.0rc1/vllm_ascend/spec_decode/__init__.py +33 -0
  533. vllm_ascend-0.11.0rc1/vllm_ascend/spec_decode/eagle_proposer.py +661 -0
  534. vllm_ascend-0.11.0rc1/vllm_ascend/spec_decode/interface.py +53 -0
  535. vllm_ascend-0.11.0rc1/vllm_ascend/spec_decode/mtp_proposer.py +672 -0
  536. vllm_ascend-0.11.0rc1/vllm_ascend/spec_decode/ngram_proposer.py +68 -0
  537. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/__init__.py +0 -0
  538. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/__init__.py +0 -0
  539. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/qwen2.py +363 -0
  540. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/qwen3_moe.py +537 -0
  541. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +218 -0
  542. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/torchair_deepseek_v2.py +1301 -0
  543. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/torchair_deepseek_v3.py +28 -0
  544. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/torchair_pangu_moe.py +1118 -0
  545. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/__init__.py +0 -0
  546. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/sequence_parallel.py +120 -0
  547. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/shared_weight_layer.py +245 -0
  548. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/torchair_activation.py +37 -0
  549. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/torchair_fused_moe.py +1429 -0
  550. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/torchair_layernorm.py +78 -0
  551. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +365 -0
  552. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/torchair_vocab_parallel_embedding.py +38 -0
  553. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/quantization/__init__.py +0 -0
  554. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +501 -0
  555. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +1080 -0
  556. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/torchair_attention.py +463 -0
  557. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/torchair_mla.py +1310 -0
  558. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/torchair_model_runner.py +557 -0
  559. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/torchair_sfa.py +1333 -0
  560. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/torchair_worker.py +63 -0
  561. vllm_ascend-0.11.0rc1/vllm_ascend/torchair/utils.py +275 -0
  562. vllm_ascend-0.11.0rc1/vllm_ascend/utils.py +764 -0
  563. vllm_ascend-0.11.0rc1/vllm_ascend/worker/__init__.py +0 -0
  564. vllm_ascend-0.11.0rc1/vllm_ascend/worker/block_table.py +312 -0
  565. vllm_ascend-0.11.0rc1/vllm_ascend/worker/model_runner_v1.py +3674 -0
  566. vllm_ascend-0.11.0rc1/vllm_ascend/worker/npu_input_batch.py +842 -0
  567. vllm_ascend-0.11.0rc1/vllm_ascend/worker/worker_v1.py +442 -0
  568. vllm_ascend-0.11.0rc1/vllm_ascend.egg-info/PKG-INFO +142 -0
  569. vllm_ascend-0.11.0rc1/vllm_ascend.egg-info/SOURCES.txt +627 -0
  570. vllm_ascend-0.11.0rc1/vllm_ascend.egg-info/entry_points.txt +6 -0
  571. vllm_ascend-0.11.0rc1/vllm_ascend.egg-info/requires.txt +19 -0
  572. vllm_ascend-0.9.0rc2/.github/Dockerfile.buildwheel +0 -48
  573. vllm_ascend-0.9.0rc2/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -37
  574. vllm_ascend-0.9.0rc2/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -49
  575. vllm_ascend-0.9.0rc2/.github/PULL_REQUEST_TEMPLATE.md +0 -28
  576. vllm_ascend-0.9.0rc2/.github/actionlint.yaml +0 -8
  577. vllm_ascend-0.9.0rc2/.github/workflows/accuracy_report.yaml +0 -202
  578. vllm_ascend-0.9.0rc2/.github/workflows/accuracy_test.yaml +0 -255
  579. vllm_ascend-0.9.0rc2/.github/workflows/actionlint.yml +0 -53
  580. vllm_ascend-0.9.0rc2/.github/workflows/image_openeuler.yml +0 -99
  581. vllm_ascend-0.9.0rc2/.github/workflows/image_ubuntu.yml +0 -98
  582. vllm_ascend-0.9.0rc2/.github/workflows/label_merge_conflict.yml +0 -21
  583. vllm_ascend-0.9.0rc2/.github/workflows/labeler.yml +0 -18
  584. vllm_ascend-0.9.0rc2/.github/workflows/nightly_benchmarks.yaml +0 -193
  585. vllm_ascend-0.9.0rc2/.github/workflows/release_code.yml +0 -87
  586. vllm_ascend-0.9.0rc2/.github/workflows/release_whl.yml +0 -95
  587. vllm_ascend-0.9.0rc2/.github/workflows/shellcheck.yml +0 -49
  588. vllm_ascend-0.9.0rc2/.github/workflows/vllm_ascend_doctest.yaml +0 -102
  589. vllm_ascend-0.9.0rc2/.github/workflows/vllm_ascend_test.yaml +0 -222
  590. vllm_ascend-0.9.0rc2/.github/workflows/vllm_ascend_test_long_term.yaml +0 -106
  591. vllm_ascend-0.9.0rc2/.github/workflows/vllm_ascend_test_pd.yaml +0 -106
  592. vllm_ascend-0.9.0rc2/.gitignore +0 -198
  593. vllm_ascend-0.9.0rc2/CMakeLists.txt +0 -100
  594. vllm_ascend-0.9.0rc2/CODE_OF_CONDUCT.md +0 -128
  595. vllm_ascend-0.9.0rc2/Dockerfile +0 -59
  596. vllm_ascend-0.9.0rc2/Dockerfile.openEuler +0 -56
  597. vllm_ascend-0.9.0rc2/PKG-INFO +0 -100
  598. vllm_ascend-0.9.0rc2/README.md +0 -79
  599. vllm_ascend-0.9.0rc2/README.zh.md +0 -78
  600. vllm_ascend-0.9.0rc2/benchmarks/README.md +0 -57
  601. vllm_ascend-0.9.0rc2/benchmarks/requirements-bench.txt +0 -5
  602. vllm_ascend-0.9.0rc2/benchmarks/scripts/convert_json_to_markdown.py +0 -183
  603. vllm_ascend-0.9.0rc2/benchmarks/scripts/patch_benchmark_dataset.py +0 -68
  604. vllm_ascend-0.9.0rc2/benchmarks/scripts/perf_result_template.md +0 -31
  605. vllm_ascend-0.9.0rc2/benchmarks/scripts/run-performance-benchmarks.sh +0 -323
  606. vllm_ascend-0.9.0rc2/benchmarks/scripts/run_accuracy.py +0 -226
  607. vllm_ascend-0.9.0rc2/benchmarks/tests/latency-tests.json +0 -13
  608. vllm_ascend-0.9.0rc2/benchmarks/tests/serving-tests.json +0 -53
  609. vllm_ascend-0.9.0rc2/benchmarks/tests/throughput-tests.json +0 -27
  610. vllm_ascend-0.9.0rc2/csrc/camem_allocator.cpp +0 -338
  611. vllm_ascend-0.9.0rc2/csrc/kernels/advance_step.cpp +0 -241
  612. vllm_ascend-0.9.0rc2/csrc/kernels/pos_encoding_kernels.cpp +0 -367
  613. vllm_ascend-0.9.0rc2/csrc/kernels/utils.h +0 -49
  614. vllm_ascend-0.9.0rc2/csrc/ops.h +0 -61
  615. vllm_ascend-0.9.0rc2/csrc/torch_binding.cpp +0 -204
  616. vllm_ascend-0.9.0rc2/csrc/utils.h +0 -43
  617. vllm_ascend-0.9.0rc2/docs/Makefile +0 -21
  618. vllm_ascend-0.9.0rc2/docs/README.md +0 -23
  619. vllm_ascend-0.9.0rc2/docs/requirements-docs.txt +0 -9
  620. vllm_ascend-0.9.0rc2/docs/requirements-test.txt +0 -2
  621. vllm_ascend-0.9.0rc2/docs/source/_templates/sections/header.html +0 -58
  622. vllm_ascend-0.9.0rc2/docs/source/community/contributors.md +0 -84
  623. vllm_ascend-0.9.0rc2/docs/source/community/governance.md +0 -48
  624. vllm_ascend-0.9.0rc2/docs/source/conf.py +0 -135
  625. vllm_ascend-0.9.0rc2/docs/source/developer_guide/contributing.md +0 -113
  626. vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -6
  627. vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/index.md +0 -17
  628. vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/performance_benchmark.md +0 -187
  629. vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/profile_execute_duration.md +0 -34
  630. vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/using_evalscope.md +0 -173
  631. vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -62
  632. vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/using_opencompass.md +0 -120
  633. vllm_ascend-0.9.0rc2/docs/source/developer_guide/versioning_policy.md +0 -106
  634. vllm_ascend-0.9.0rc2/docs/source/faqs.md +0 -125
  635. vllm_ascend-0.9.0rc2/docs/source/index.md +0 -76
  636. vllm_ascend-0.9.0rc2/docs/source/installation.md +0 -274
  637. vllm_ascend-0.9.0rc2/docs/source/quick_start.md +0 -169
  638. vllm_ascend-0.9.0rc2/docs/source/tutorials/index.md +0 -11
  639. vllm_ascend-0.9.0rc2/docs/source/tutorials/multi_node.md +0 -195
  640. vllm_ascend-0.9.0rc2/docs/source/tutorials/multi_npu.md +0 -107
  641. vllm_ascend-0.9.0rc2/docs/source/tutorials/multi_npu_quantization.md +0 -134
  642. vllm_ascend-0.9.0rc2/docs/source/tutorials/single_npu.md +0 -133
  643. vllm_ascend-0.9.0rc2/docs/source/tutorials/single_npu_multimodal.md +0 -191
  644. vllm_ascend-0.9.0rc2/docs/source/user_guide/additional_config.md +0 -76
  645. vllm_ascend-0.9.0rc2/docs/source/user_guide/env_vars.md +0 -9
  646. vllm_ascend-0.9.0rc2/docs/source/user_guide/graph_mode.md +0 -82
  647. vllm_ascend-0.9.0rc2/docs/source/user_guide/release.template.md +0 -13
  648. vllm_ascend-0.9.0rc2/docs/source/user_guide/release_notes.md +0 -243
  649. vllm_ascend-0.9.0rc2/docs/source/user_guide/supported_models.md +0 -52
  650. vllm_ascend-0.9.0rc2/docs/source/user_guide/suppoted_features.md +0 -49
  651. vllm_ascend-0.9.0rc2/docs/source/user_stories/example.md +0 -15
  652. vllm_ascend-0.9.0rc2/docs/source/user_stories/index.md +0 -22
  653. vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/disaggregated_prefill_offline.py +0 -138
  654. vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/dp_proxy.py +0 -463
  655. vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/find_device_ips.py +0 -67
  656. vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py +0 -193
  657. vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/run_decode_server.sh +0 -37
  658. vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/run_prefill_server.sh +0 -37
  659. vllm_ascend-0.9.0rc2/examples/dp_offline/data_parallel.py +0 -85
  660. vllm_ascend-0.9.0rc2/examples/dp_offline/run_dp.sh +0 -19
  661. vllm_ascend-0.9.0rc2/examples/offline_disaggregated_prefill_npu.py +0 -140
  662. vllm_ascend-0.9.0rc2/examples/offline_distributed_inference_npu.py +0 -44
  663. vllm_ascend-0.9.0rc2/examples/offline_dualbatch_overlap_npu.py +0 -51
  664. vllm_ascend-0.9.0rc2/examples/offline_inference_audio_language.py +0 -126
  665. vllm_ascend-0.9.0rc2/examples/offline_inference_npu.py +0 -39
  666. vllm_ascend-0.9.0rc2/examples/offline_inference_npu_v1.py +0 -49
  667. vllm_ascend-0.9.0rc2/examples/offline_multi_step_custom_ops.py +0 -53
  668. vllm_ascend-0.9.0rc2/examples/prompt_embedding_inference.py +0 -83
  669. vllm_ascend-0.9.0rc2/examples/run_dp_server.sh +0 -30
  670. vllm_ascend-0.9.0rc2/format.sh +0 -343
  671. vllm_ascend-0.9.0rc2/mypy.ini +0 -13
  672. vllm_ascend-0.9.0rc2/pyproject.toml +0 -23
  673. vllm_ascend-0.9.0rc2/pytest.ini +0 -68
  674. vllm_ascend-0.9.0rc2/requirements-dev.txt +0 -12
  675. vllm_ascend-0.9.0rc2/requirements-lint.txt +0 -15
  676. vllm_ascend-0.9.0rc2/requirements.txt +0 -23
  677. vllm_ascend-0.9.0rc2/setup.py +0 -368
  678. vllm_ascend-0.9.0rc2/tests/conftest.py +0 -359
  679. vllm_ascend-0.9.0rc2/tests/e2e/common.sh +0 -51
  680. vllm_ascend-0.9.0rc2/tests/e2e/doctests/001-quickstart-test.sh +0 -55
  681. vllm_ascend-0.9.0rc2/tests/e2e/run_doctests.sh +0 -27
  682. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/__init__.py +0 -18
  683. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/conftest.py +0 -28
  684. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/conftest.py +0 -212
  685. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_medusa_correctness.py +0 -445
  686. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_mlp_correctness.py +0 -560
  687. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_mtp_correctness.py +0 -455
  688. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_ngram_correctness.py +0 -404
  689. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py +0 -92
  690. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_v1_spec_decode.py +0 -155
  691. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_dynamic_spec_decode.py +0 -105
  692. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_multi_step_worker.py +0 -846
  693. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_ngram_worker.py +0 -237
  694. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_spec_decode_worker.py +0 -958
  695. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_utils.py +0 -165
  696. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/utils.py +0 -317
  697. vllm_ascend-0.9.0rc2/tests/long_term/test_accuracy.py +0 -111
  698. vllm_ascend-0.9.0rc2/tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py +0 -71
  699. vllm_ascend-0.9.0rc2/tests/model_utils.py +0 -274
  700. vllm_ascend-0.9.0rc2/tests/multicard/test_dynamic_npugraph_batchsize.py +0 -57
  701. vllm_ascend-0.9.0rc2/tests/multicard/test_ilama_lora_tp2.py +0 -21
  702. vllm_ascend-0.9.0rc2/tests/multicard/test_offline_inference_distributed.py +0 -97
  703. vllm_ascend-0.9.0rc2/tests/multicard/test_pyhccl_distributed.py +0 -110
  704. vllm_ascend-0.9.0rc2/tests/singlecard/compile/test_simple.py +0 -134
  705. vllm_ascend-0.9.0rc2/tests/singlecard/ops/test_fused_moe.py +0 -100
  706. vllm_ascend-0.9.0rc2/tests/singlecard/ops/test_multi_step.py +0 -190
  707. vllm_ascend-0.9.0rc2/tests/singlecard/ops/test_rotary_embedding.py +0 -198
  708. vllm_ascend-0.9.0rc2/tests/singlecard/sample/test_rejection_sampler.py +0 -611
  709. vllm_ascend-0.9.0rc2/tests/singlecard/test_aclgraph.py +0 -95
  710. vllm_ascend-0.9.0rc2/tests/singlecard/test_ascend_config.py +0 -189
  711. vllm_ascend-0.9.0rc2/tests/singlecard/test_camem.py +0 -85
  712. vllm_ascend-0.9.0rc2/tests/singlecard/test_guided_decoding.py +0 -175
  713. vllm_ascend-0.9.0rc2/tests/singlecard/test_ilama_lora.py +0 -60
  714. vllm_ascend-0.9.0rc2/tests/singlecard/test_offline_inference.py +0 -129
  715. vllm_ascend-0.9.0rc2/tests/singlecard/test_profile_execute_duration.py +0 -62
  716. vllm_ascend-0.9.0rc2/tests/singlecard/test_prompt_embedding.py +0 -259
  717. vllm_ascend-0.9.0rc2/tests/singlecard/test_pyhccl.py +0 -29
  718. vllm_ascend-0.9.0rc2/tests/singlecard/test_sampler.py +0 -147
  719. vllm_ascend-0.9.0rc2/tests/singlecard/test_scheduler.py +0 -404
  720. vllm_ascend-0.9.0rc2/tests/utils.py +0 -199
  721. vllm_ascend-0.9.0rc2/tools/mypy.sh +0 -36
  722. vllm_ascend-0.9.0rc2/tools/shellcheck.sh +0 -41
  723. vllm_ascend-0.9.0rc2/vllm_ascend/__init__.py +0 -31
  724. vllm_ascend-0.9.0rc2/vllm_ascend/_version.py +0 -21
  725. vllm_ascend-0.9.0rc2/vllm_ascend/ascend_config.py +0 -163
  726. vllm_ascend-0.9.0rc2/vllm_ascend/attention/attention.py +0 -1301
  727. vllm_ascend-0.9.0rc2/vllm_ascend/attention/attention_v1.py +0 -409
  728. vllm_ascend-0.9.0rc2/vllm_ascend/attention/mla_v1.py +0 -937
  729. vllm_ascend-0.9.0rc2/vllm_ascend/compilation/piecewise_backend.py +0 -231
  730. vllm_ascend-0.9.0rc2/vllm_ascend/core/schedule_config.py +0 -74
  731. vllm_ascend-0.9.0rc2/vllm_ascend/core/scheduler.py +0 -407
  732. vllm_ascend-0.9.0rc2/vllm_ascend/device_allocator/camem.py +0 -278
  733. vllm_ascend-0.9.0rc2/vllm_ascend/distributed/__init__.py +0 -27
  734. vllm_ascend-0.9.0rc2/vllm_ascend/distributed/kv_transfer/simple_buffer.py +0 -209
  735. vllm_ascend-0.9.0rc2/vllm_ascend/distributed/kv_transfer/simple_connector.py +0 -376
  736. vllm_ascend-0.9.0rc2/vllm_ascend/distributed/kv_transfer/simple_pipe.py +0 -209
  737. vllm_ascend-0.9.0rc2/vllm_ascend/distributed/kv_transfer/utils.py +0 -40
  738. vllm_ascend-0.9.0rc2/vllm_ascend/distributed/llmdatadist_connector.py +0 -470
  739. vllm_ascend-0.9.0rc2/vllm_ascend/distributed/parallel_state.py +0 -77
  740. vllm_ascend-0.9.0rc2/vllm_ascend/envs.py +0 -149
  741. vllm_ascend-0.9.0rc2/vllm_ascend/lora/punica_wrapper/punica_npu.py +0 -346
  742. vllm_ascend-0.9.0rc2/vllm_ascend/models/__init__.py +0 -49
  743. vllm_ascend-0.9.0rc2/vllm_ascend/models/deepseek_dbo.py +0 -1118
  744. vllm_ascend-0.9.0rc2/vllm_ascend/models/deepseek_mtp.py +0 -200
  745. vllm_ascend-0.9.0rc2/vllm_ascend/models/deepseek_v2.py +0 -728
  746. vllm_ascend-0.9.0rc2/vllm_ascend/models/qwen2_5_vl.py +0 -487
  747. vllm_ascend-0.9.0rc2/vllm_ascend/models/qwen2_5_vl_without_padding.py +0 -273
  748. vllm_ascend-0.9.0rc2/vllm_ascend/models/qwen2_vl.py +0 -352
  749. vllm_ascend-0.9.0rc2/vllm_ascend/models/qwen3_moe.py +0 -35
  750. vllm_ascend-0.9.0rc2/vllm_ascend/multistream/decorator.py +0 -26
  751. vllm_ascend-0.9.0rc2/vllm_ascend/multistream/ms_split.py +0 -245
  752. vllm_ascend-0.9.0rc2/vllm_ascend/ops/__init__.py +0 -49
  753. vllm_ascend-0.9.0rc2/vllm_ascend/ops/activation.py +0 -37
  754. vllm_ascend-0.9.0rc2/vllm_ascend/ops/attention.py +0 -305
  755. vllm_ascend-0.9.0rc2/vllm_ascend/ops/cache.py +0 -35
  756. vllm_ascend-0.9.0rc2/vllm_ascend/ops/common_fused_moe.py +0 -69
  757. vllm_ascend-0.9.0rc2/vllm_ascend/ops/expert_load_balancer.py +0 -99
  758. vllm_ascend-0.9.0rc2/vllm_ascend/ops/fused_moe.py +0 -1211
  759. vllm_ascend-0.9.0rc2/vllm_ascend/ops/layernorm.py +0 -40
  760. vllm_ascend-0.9.0rc2/vllm_ascend/ops/rotary_embedding.py +0 -279
  761. vllm_ascend-0.9.0rc2/vllm_ascend/ops/vocab_parallel_embedding.py +0 -67
  762. vllm_ascend-0.9.0rc2/vllm_ascend/patch/__init__.py +0 -213
  763. vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/__init__.py +0 -25
  764. vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_0_9_0/__init__.py +0 -17
  765. vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_0_9_0/patch_distributed.py +0 -116
  766. vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_common/__init__.py +0 -18
  767. vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_common/patch_distributed.py +0 -99
  768. vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_main/__init__.py +0 -16
  769. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/__init__.py +0 -26
  770. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_0_9_0/__init__.py +0 -16
  771. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/__init__.py +0 -26
  772. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -49
  773. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_eagle.py +0 -70
  774. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py +0 -107
  775. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_sampler.py +0 -101
  776. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py +0 -155
  777. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_utils.py +0 -38
  778. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_main/__init__.py +0 -16
  779. vllm_ascend-0.9.0rc2/vllm_ascend/platform.py +0 -309
  780. vllm_ascend-0.9.0rc2/vllm_ascend/quantization/func_wrapper.py +0 -151
  781. vllm_ascend-0.9.0rc2/vllm_ascend/quantization/quant_config.py +0 -339
  782. vllm_ascend-0.9.0rc2/vllm_ascend/quantization/quantizer.py +0 -287
  783. vllm_ascend-0.9.0rc2/vllm_ascend/quantization/w8a8.py +0 -115
  784. vllm_ascend-0.9.0rc2/vllm_ascend/quantization/w8a8_dynamic.py +0 -753
  785. vllm_ascend-0.9.0rc2/vllm_ascend/sample/rejection_sampler.py +0 -456
  786. vllm_ascend-0.9.0rc2/vllm_ascend/utils.py +0 -229
  787. vllm_ascend-0.9.0rc2/vllm_ascend/worker/__init__.py +0 -17
  788. vllm_ascend-0.9.0rc2/vllm_ascend/worker/cache_engine.py +0 -83
  789. vllm_ascend-0.9.0rc2/vllm_ascend/worker/draft_model_runner.py +0 -319
  790. vllm_ascend-0.9.0rc2/vllm_ascend/worker/model_runner.py +0 -1607
  791. vllm_ascend-0.9.0rc2/vllm_ascend/worker/model_runner_v1.py +0 -1916
  792. vllm_ascend-0.9.0rc2/vllm_ascend/worker/mtp_proposer_v1.py +0 -225
  793. vllm_ascend-0.9.0rc2/vllm_ascend/worker/multi_step_runner.py +0 -737
  794. vllm_ascend-0.9.0rc2/vllm_ascend/worker/multi_step_worker.py +0 -194
  795. vllm_ascend-0.9.0rc2/vllm_ascend/worker/pooling_model_runner.py +0 -186
  796. vllm_ascend-0.9.0rc2/vllm_ascend/worker/worker.py +0 -573
  797. vllm_ascend-0.9.0rc2/vllm_ascend/worker/worker_v1.py +0 -313
  798. vllm_ascend-0.9.0rc2/vllm_ascend.egg-info/PKG-INFO +0 -100
  799. vllm_ascend-0.9.0rc2/vllm_ascend.egg-info/SOURCES.txt +0 -286
  800. vllm_ascend-0.9.0rc2/vllm_ascend.egg-info/entry_points.txt +0 -5
  801. vllm_ascend-0.9.0rc2/vllm_ascend.egg-info/requires.txt +0 -18
  802. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  803. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  804. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  805. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  806. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  807. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  808. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  809. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  810. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  811. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/dependabot.yml +0 -0
  812. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/labeler.yml +0 -0
  813. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/workflows/matchers/actionlint.json +0 -0
  814. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/workflows/matchers/mypy.json +0 -0
  815. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/workflows/matchers/ruff.json +0 -0
  816. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.readthedocs.yaml +0 -0
  817. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/DCO +0 -0
  818. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/LICENSE +0 -0
  819. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/cmake/utils.cmake +0 -0
  820. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/collect_env.py +0 -0
  821. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/csrc/kernels/types.h +0 -0
  822. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  823. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  824. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/packages.txt +0 -0
  825. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/setup.cfg +0 -0
  826. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tests/__init__.py +0 -0
  827. {vllm_ascend-0.9.0rc2/tests/long_term/spec_decode → vllm_ascend-0.11.0rc1/tests}/e2e/__init__.py +0 -0
  828. {vllm_ascend-0.9.0rc2/tests/singlecard → vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node}/__init__.py +0 -0
  829. {vllm_ascend-0.9.0rc2/tests/singlecard/compile → vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/config}/__init__.py +0 -0
  830. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
  831. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  832. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tests/e2e/run_disagg_pd.sh +0 -0
  833. {vllm_ascend-0.9.0rc2/tests/singlecard/ops → vllm_ascend-0.11.0rc1/tests/e2e/singlecard}/__init__.py +0 -0
  834. {vllm_ascend-0.9.0rc2/tests/singlecard/sample → vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops}/__init__.py +0 -0
  835. {vllm_ascend-0.9.0rc2/vllm_ascend/attention → vllm_ascend-0.11.0rc1/tests/ut}/__init__.py +0 -0
  836. /vllm_ascend-0.9.0rc2/vllm_ascend/compilation/__init__.py → /vllm_ascend-0.11.0rc1/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
  837. {vllm_ascend-0.9.0rc2/vllm_ascend/core → vllm_ascend-0.11.0rc1/tests/ut/models}/__init__.py +0 -0
  838. {vllm_ascend-0.9.0rc2/vllm_ascend/device_allocator → vllm_ascend-0.11.0rc1/tests/ut/torchair}/__init__.py +0 -0
  839. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tools/actionlint.sh +0 -0
  840. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tools/check_repo.sh +0 -0
  841. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tools/png-lint.sh +0 -0
  842. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tools/sphinx-lint.sh +0 -0
  843. {vllm_ascend-0.9.0rc2/vllm_ascend/distributed/device_communicators → vllm_ascend-0.11.0rc1/vllm_ascend/attention}/__init__.py +0 -0
  844. {vllm_ascend-0.9.0rc2/vllm_ascend/distributed/kv_transfer → vllm_ascend-0.11.0rc1/vllm_ascend/compilation}/__init__.py +0 -0
  845. {vllm_ascend-0.9.0rc2/vllm_ascend/lora → vllm_ascend-0.11.0rc1/vllm_ascend/core}/__init__.py +0 -0
  846. {vllm_ascend-0.9.0rc2/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.11.0rc1/vllm_ascend/device_allocator}/__init__.py +0 -0
  847. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/distributed/communicator.py +0 -0
  848. {vllm_ascend-0.9.0rc2/vllm_ascend/multistream → vllm_ascend-0.11.0rc1/vllm_ascend/distributed/cpu_offload_manager}/__init__.py +0 -0
  849. {vllm_ascend-0.9.0rc2/vllm_ascend/quantization → vllm_ascend-0.11.0rc1/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
  850. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  851. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  852. {vllm_ascend-0.9.0rc2/vllm_ascend/sample → vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake}/__init__.py +0 -0
  853. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/multistream/base.py +0 -0
  854. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/multistream/context.py +0 -0
  855. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/multistream/layers.py +0 -0
  856. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/multistream/metadata.py +0 -0
  857. {vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common → vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker}/patch_minicpm.py +0 -0
  858. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  859. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -0,0 +1,6 @@
1
+ # https://developers.google.com/gemini-code-assist/docs/customize-gemini-behavior-github
2
+ have_fun: false # Just review the code
3
+ code_review:
4
+ comment_severity_threshold: HIGH # Reduce quantity of comments
5
+ pull_request_opened:
6
+ summary: false # Don't summarize the PR in a separate comment
@@ -0,0 +1,45 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+ ARG PY_VERSION=3.11
18
+ FROM quay.io/ascend/manylinux:8.2.rc1-910b-manylinux_2_28-py${PY_VERSION}
19
+
20
+ ARG COMPILE_CUSTOM_KERNELS=1
21
+
22
+ # Define environments
23
+ ENV DEBIAN_FRONTEND=noninteractive
24
+ ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
25
+ RUN yum update -y && \
26
+ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
27
+ rm -rf /var/cache/yum
28
+
29
+ WORKDIR /workspace
30
+
31
+ COPY . /workspace/vllm-ascend/
32
+
33
+ # Install req
34
+ RUN python3 -m pip install -r vllm-ascend/requirements.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
35
+ python3 -m pip install twine
36
+
37
+ # Install vllm-ascend
38
+ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
39
+ source /usr/local/Ascend/nnal/atb/set_env.sh && \
40
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
41
+ cd vllm-ascend && \
42
+ python3 setup.py bdist_wheel && \
43
+ ls -l dist
44
+
45
+ CMD ["/bin/bash"]
@@ -0,0 +1,37 @@
1
+ name: 📚 User Story
2
+ description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.io/en/latest/community/user_stories/index.html
3
+ title: "[User Story]: "
4
+ labels: ["user-story"]
5
+
6
+ body:
7
+ - type: textarea
8
+ attributes:
9
+ label: 📚 Title
10
+ description: >
11
+ A clear title about what your user story is about.
12
+ validations:
13
+ required: true
14
+ - type: textarea
15
+ attributes:
16
+ label: About / Introduction
17
+ description: >
18
+ A brief introduction about the background of your use case, like your scenario, hardware size etc.
19
+ - type: textarea
20
+ attributes:
21
+ label: Bussiness Challenges
22
+ description: >
23
+ Tell us how what kind of challenge you faced in this user story.
24
+ - type: textarea
25
+ attributes:
26
+ label: Solving challenges with vLLM Ascend and benefits
27
+ description: >
28
+ Tell us how vLLM Ascend helped you overcome the challenges, including details like how you use it, what version you used, hardware info, etc. And what kind of benefit do you get from using vLLM Ascend
29
+ - type: textarea
30
+ attributes:
31
+ label: Extra Info
32
+ description: >
33
+ Any extra infomation you want to include in this story
34
+ - type: markdown
35
+ attributes:
36
+ value: >
37
+ Thanks for contributing 🎉!
@@ -0,0 +1,49 @@
1
+ name: 💬 Request for comments (RFC).
2
+ description: Ask for feedback on major architectural changes or design choices.
3
+ title: "[RFC]: "
4
+ labels: ["RFC"]
5
+
6
+ body:
7
+ - type: markdown
8
+ attributes:
9
+ value: >
10
+ #### Please take a look at previous [RFCs](https://github.com/vllm-project/vllm-ascend/issues?q=label%3ARFC+sort%3Aupdated-desc) for reference.
11
+ - type: textarea
12
+ attributes:
13
+ label: Motivation.
14
+ description: >
15
+ The motivation of the RFC.
16
+ validations:
17
+ required: true
18
+ - type: textarea
19
+ attributes:
20
+ label: Proposed Change.
21
+ description: >
22
+ The proposed change of the RFC.
23
+ validations:
24
+ required: true
25
+ - type: textarea
26
+ attributes:
27
+ label: Feedback Period.
28
+ description: >
29
+ The feedback period of the RFC. Usually at least one week.
30
+ validations:
31
+ required: false
32
+ - type: textarea
33
+ attributes:
34
+ label: CC List.
35
+ description: >
36
+ The list of people you want to CC.
37
+ validations:
38
+ required: false
39
+ - type: textarea
40
+ attributes:
41
+ label: Any Other Things.
42
+ description: >
43
+ Any other things you would like to mention, such as feature branch request.
44
+ validations:
45
+ required: false
46
+ - type: markdown
47
+ attributes:
48
+ value: >
49
+ Thanks for contributing 🎉!
@@ -0,0 +1,104 @@
1
+ name: Release Checklist
2
+ description: Generate a release checklist issue when prepare a new release.(Used for release team)
3
+ title: "[Release]: Release checklist for v"
4
+
5
+ body:
6
+ - type: textarea
7
+ attributes:
8
+ description: >
9
+ Brief info for the new release.
10
+ label: Release Checklist
11
+ value: >
12
+ **Release Version**:
13
+
14
+ **Release Branch**:
15
+
16
+ **Release Date**:
17
+
18
+ **Release Manager**:
19
+ - type: textarea
20
+ attributes:
21
+ description: >
22
+ Release notes.
23
+ label: Prepare Release Note
24
+ value: >
25
+ - [ ] Create a new issue for release feedback
26
+
27
+ - [ ] Upgrade vllm version to the new version for CI and Dockerfile
28
+
29
+ - [ ] Write the release note PR.
30
+
31
+ - [ ] Update the feedback issue link in docs/source/faqs.md
32
+
33
+ - [ ] Add release note to docs/source/user_guide/release_notes.md
34
+
35
+ - [ ] Update release version in README.md and README.zh.md
36
+
37
+ - [ ] Update version info in docs/source/community/versioning_policy.md
38
+
39
+ - [ ] Update contributor info in docs/source/community/contributors.md
40
+
41
+ - [ ] Update package version in docs/conf.py
42
+ - type: textarea
43
+ attributes:
44
+ description: >
45
+ Make sure the code is merged.
46
+ label: PR need Merge
47
+ value: >
48
+ - [ ] PR link1
49
+
50
+ - [ ] PR link2
51
+
52
+ - [ ] ...
53
+ - type: textarea
54
+ attributes:
55
+ description: >
56
+ Make sure the new Feature/Function is tested
57
+ label: Functional Test
58
+ value: >
59
+ - [ ] Feature1
60
+
61
+ - [ ] Bug1
62
+
63
+ - [ ] ...
64
+ - type: textarea
65
+ attributes:
66
+ description: >
67
+ Make sure the doc is updated.
68
+ label: Doc Test
69
+ value: >
70
+ - [ ] Tutorial is updated.
71
+
72
+ - [ ] User Guide is updated.
73
+
74
+ - [ ] Developer Guide is updated.
75
+ - type: textarea
76
+ attributes:
77
+ description: >
78
+ Make sure the artifacts is ready
79
+ label: Prepare Artifacts
80
+ value: >
81
+ - [ ] Docker image is ready.
82
+
83
+ - [ ] Wheel package is ready.
84
+ - type: textarea
85
+ attributes:
86
+ description: >
87
+ Start to release.
88
+ label: Release Step
89
+ value: >
90
+ - [ ] Release note PR is merged.
91
+
92
+ - [ ] Post the release on GitHub release page.
93
+
94
+ - [ ] Generate official doc page on https://app.readthedocs.org/dashboard/
95
+
96
+ - [ ] Wait for the wheel package to be available on https://pypi.org/project/vllm-ascend
97
+
98
+ - [ ] Wait for the docker image to be available on https://quay.io/ascend/vllm-ascend
99
+
100
+ - [ ] Upload 310p wheel to Github release page
101
+
102
+ - [ ] Broadcast the release news (By message, blog , etc)
103
+
104
+ - [ ] Close this issue
@@ -0,0 +1,27 @@
1
+ <!-- Thanks for sending a pull request!
2
+
3
+ BEFORE SUBMITTING, PLEASE READ https://docs.vllm.ai/en/latest/contributing/overview.html
4
+
5
+ -->
6
+ ### What this PR does / why we need it?
7
+ <!--
8
+ - Please clarify what changes you are proposing. The purpose of this section is to outline the changes and how this PR fixes the issue.
9
+ If possible, please consider writing useful notes for better and faster reviews in your PR.
10
+
11
+ - Please clarify why the changes are needed. For instance, the use case and bug description.
12
+
13
+ - Fixes #
14
+ -->
15
+
16
+ ### Does this PR introduce _any_ user-facing change?
17
+ <!--
18
+ Note that it means *any* user-facing change including all aspects such as API, interface or other behavior changes.
19
+ Documentation-only updates are not considered user-facing changes.
20
+ -->
21
+
22
+ ### How was this patch tested?
23
+ <!--
24
+ CI passed with new added/existing test.
25
+ If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
26
+ If tests were not added, please describe why they were not added and/or why it was difficult to add.
27
+ -->
@@ -0,0 +1,21 @@
1
+ self-hosted-runner:
2
+ # Labels of self-hosted runner in array of strings.
3
+ labels:
4
+ - linux-aarch64-a2-0
5
+ - linux-aarch64-a2-1
6
+ - linux-aarch64-a2-2
7
+ - linux-aarch64-a2-4
8
+ - linux-aarch64-a2-8
9
+ - linux-arm64-npu-static-8
10
+ - linux-aarch64-310p-1
11
+ - linux-aarch64-310p-2
12
+ - linux-aarch64-310p-4
13
+ - ubuntu-24.04-arm
14
+ - linux-aarch64-a3-1
15
+ - linux-aarch64-a3-2
16
+ - linux-aarch64-a3-4
17
+ - linux-aarch64-a3-8
18
+ - linux-amd64-cpu-0
19
+ - linux-amd64-cpu-8
20
+ - linux-amd64-cpu-16
21
+ - linux-aarch64-a3-0
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ # Adapted from vllm/.github/scripts/cleanup_pr_body.sh
17
+
18
+ #!/bin/bash
19
+
20
+ set -eux
21
+
22
+ # ensure 2 argument is passed
23
+ if [ "$#" -ne 3 ]; then
24
+ echo "Usage: $0 <pr_number> <vllm_version> <vllm_commit>"
25
+ exit 1
26
+ fi
27
+
28
+ PR_NUMBER=$1
29
+ VLLM_VERSION=$2
30
+ VLLM_COMMIT=$3
31
+ OLD=/tmp/orig_pr_body.txt
32
+ NEW=/tmp/new_pr_body.txt
33
+ FINAL=/tmp/final_pr_body.txt
34
+
35
+ gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
36
+ cp "${OLD}" "${NEW}"
37
+
38
+ # Remove notes in pr description and add vLLM version and commit
39
+ sed -i '/<!--/,/-->/d' "${NEW}"
40
+ sed -i '/- vLLM .*$/d' "${NEW}"
41
+ {
42
+ echo ""
43
+ echo "- vLLM version: $VLLM_VERSION"
44
+ echo "- vLLM main: $VLLM_COMMIT"
45
+ } >> "${NEW}"
46
+
47
+ # Remove redundant empty lines
48
+ uniq "${NEW}" > "${FINAL}"
49
+
50
+ # Run this only if ${NEW} is different than ${OLD}
51
+ if ! cmp -s "${OLD}" "${FINAL}"; then
52
+ echo
53
+ echo "Updating PR body:"
54
+ echo
55
+ cat "${NEW}"
56
+ gh pr edit --body-file "${FINAL}" "${PR_NUMBER}"
57
+ else
58
+ echo "No changes needed"
59
+ fi
@@ -0,0 +1,175 @@
1
+ name: 'accuracy test'
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ vllm:
7
+ required: true
8
+ type: string
9
+ vllm-ascend:
10
+ required: false
11
+ type: string
12
+ default: main
13
+ runner:
14
+ required: true
15
+ type: string
16
+ image:
17
+ required: true
18
+ type: string
19
+ model_name:
20
+ required: true
21
+ type: string
22
+ upload:
23
+ required: false
24
+ type: boolean
25
+ default: false
26
+
27
+ jobs:
28
+ accuracy_tests:
29
+
30
+ runs-on: ${{ inputs.runner }}
31
+ name: ${{ inputs.model_name }} accuracy
32
+ container:
33
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
34
+ env:
35
+ VLLM_USE_MODELSCOPE: True
36
+ # 1. If version specified (work_dispatch), do specified branch accuracy test
37
+ # 2. If no version (labeled PR), do accuracy test by default ref:
38
+ # The branch, tag or SHA to checkout. When checking out the repository that
39
+ # triggered a workflow, this defaults to the reference or SHA for that event.
40
+ # Otherwise, uses the default branch.
41
+ GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
42
+
43
+ steps:
44
+ - name: Checkout repository
45
+ uses: actions/checkout@v4
46
+
47
+ - name: Set model name as output
48
+ id: set_output
49
+ run: |
50
+ echo "model_name=${{ inputs.model_name }}" >> $GITHUB_OUTPUT
51
+
52
+ - name: Config mirrors
53
+ run: |
54
+ sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
55
+ pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
56
+ pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
57
+ apt-get update -y
58
+ apt install git -y
59
+
60
+ - name: Install system dependencies
61
+ run: |
62
+ apt-get -y install `cat packages.txt`
63
+ apt-get -y install gcc g++ cmake libnuma-dev
64
+
65
+ - name: Checkout vllm-project/vllm repo
66
+ uses: actions/checkout@v4
67
+ with:
68
+ repository: vllm-project/vllm
69
+ ref: ${{ inputs.vllm }}
70
+ path: ./vllm-empty
71
+
72
+ - name: Install vllm-project/vllm from source
73
+ working-directory: ./vllm-empty
74
+ run: |
75
+ VLLM_TARGET_DEVICE=empty pip install -e .
76
+
77
+ - name: Resolve vllm-ascend version
78
+ run: |
79
+ VERSION_INPUT="${{ inputs.vllm-ascend }}"
80
+
81
+ if [[ "$VERSION_INPUT" == "latest" ]]; then
82
+ TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
83
+ LATEST_TAG=$(echo "$TAGS" | head -n1)
84
+ if [[ -z "$LATEST_TAG" ]]; then
85
+ RESOLVED_VERSION="main"
86
+ else
87
+ RESOLVED_VERSION="$LATEST_TAG"
88
+ fi
89
+ else
90
+ RESOLVED_VERSION="$VERSION_INPUT"
91
+ fi
92
+ echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
93
+
94
+ - name: Checkout vllm-project/vllm-ascend repo
95
+ uses: actions/checkout@v4
96
+ with:
97
+ repository: vllm-project/vllm-ascend
98
+ path: ./vllm-ascend
99
+ ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
100
+
101
+ - name: Install vllm-project/vllm-ascend
102
+ working-directory: ./vllm-ascend
103
+ env:
104
+ PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
105
+ run: |
106
+ pip install -r requirements-dev.txt
107
+ pip install -v -e .
108
+
109
+ - name: Get vLLM commit hash and URL
110
+ working-directory: ./vllm-empty
111
+ run: |
112
+ VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
113
+ echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
114
+
115
+ - name: Get vLLM-Ascend commit hash and URL
116
+ working-directory: ./vllm-ascend
117
+ run: |
118
+ VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
119
+ echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
120
+
121
+ - name: Collect version info
122
+ run: |
123
+ for dir in /usr/local/Ascend/ascend-toolkit/*; do
124
+ dname=$(basename "$dir")
125
+ if [ "$dname" != "latest" ]; then
126
+ TOOLKIT_DIR="$dname"
127
+ break
128
+ fi
129
+ done
130
+ INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
131
+ GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
132
+ | head -n1 \
133
+ | cut -d'=' -f2 \
134
+ | tr -d '"')
135
+ {
136
+ echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
137
+ pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
138
+ pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
139
+ pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
140
+ } >> "$GITHUB_ENV"
141
+
142
+ - name: Run accuracy test
143
+ id: report
144
+ env:
145
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
146
+ VLLM_USE_MODELSCOPE: True
147
+ VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
148
+ VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
149
+ VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
150
+ VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
151
+ CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
152
+ TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
153
+ TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
154
+ run: |
155
+ model_base_name=$(basename ${{ inputs.model_name }})
156
+ markdown_name="${model_base_name}"
157
+ echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
158
+ mkdir -p ./benchmarks/accuracy
159
+ pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
160
+ --config ./tests/e2e/models/configs/${{ inputs.model_name }}.yaml
161
+
162
+ - name: Generate step summary
163
+ if: ${{ always() }}
164
+ run: |
165
+ cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
166
+
167
+ - name: Upload Report
168
+ if: ${{ inputs.upload == true }}
169
+ uses: actions/upload-artifact@v4
170
+ with:
171
+ name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
172
+ path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
173
+ if-no-files-found: warn
174
+ retention-days: 90
175
+ overwrite: true
@@ -0,0 +1,115 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ name: 'e2e nightly test'
19
+
20
+ on:
21
+ workflow_call:
22
+ inputs:
23
+ vllm:
24
+ required: true
25
+ type: string
26
+ runner:
27
+ required: true
28
+ type: string
29
+ image:
30
+ required: false
31
+ type: string
32
+ default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
33
+ tests:
34
+ required: true
35
+ type: string
36
+
37
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
38
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
39
+ # It's used to activate ascend-toolkit environment variables.
40
+ defaults:
41
+ run:
42
+ shell: bash -el {0}
43
+
44
+ # only cancel in-progress runs of the same workflow
45
+ # and ignore the lint / 1 card / 4 cards test type
46
+ concurrency:
47
+ group: ${{ github.workflow }}-${{ github.ref }}
48
+ cancel-in-progress: true
49
+
50
+ jobs:
51
+ e2e-nightly:
52
+ name: e2e-nightly
53
+ runs-on: ${{ inputs.runner }}
54
+ container:
55
+ image: ${{ inputs.image }}
56
+ env:
57
+ VLLM_USE_MODELSCOPE: True
58
+ steps:
59
+ - name: Check npu and CANN info
60
+ run: |
61
+ npu-smi info
62
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
63
+
64
+ - name: Config mirrors
65
+ run: |
66
+ sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
67
+ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
68
+ apt-get update -y
69
+ apt install git -y
70
+ git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
71
+
72
+ - name: Checkout vllm-project/vllm-ascend repo
73
+ uses: actions/checkout@v4
74
+
75
+ - name: Install system dependencies
76
+ run: |
77
+ apt-get -y install `cat packages.txt`
78
+ apt-get -y install gcc g++ cmake libnuma-dev
79
+
80
+ - name: Checkout vllm-project/vllm repo
81
+ uses: actions/checkout@v4
82
+ with:
83
+ repository: vllm-project/vllm
84
+ ref: ${{ inputs.vllm }}
85
+ path: ./vllm-empty
86
+
87
+ - name: Install vllm-project/vllm from source
88
+ working-directory: ./vllm-empty
89
+ run: |
90
+ VLLM_TARGET_DEVICE=empty pip install -e .
91
+
92
+ - name: Install vllm-project/vllm-ascend
93
+ env:
94
+ PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
95
+ run: |
96
+ pip install -r requirements-dev.txt
97
+ pip install -v -e .
98
+
99
+ - name: Checkout aisbench repo and Install aisbench
100
+ run: |
101
+ git clone https://gitee.com/aisbench/benchmark.git
102
+ cd benchmark
103
+ git checkout v3.0-20250930-master
104
+ pip3 install -e ./
105
+ pip3 install -r requirements/api.txt
106
+ pip3 install -r requirements/extra.txt
107
+
108
+ - name: Run vllm-project/vllm-ascend test
109
+ env:
110
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
111
+ VLLM_USE_MODELSCOPE: True
112
+ VLLM_CI_RUNNER: ${{ inputs.runner }}
113
+ run: |
114
+ # TODO: enable more tests
115
+ pytest -sv ${{ inputs.tests }}