vllm-ascend 0.9.2rc1__tar.gz → 0.10.1rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (572) hide show
  1. vllm_ascend-0.10.1rc1/.gemini/config.yaml +6 -0
  2. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/Dockerfile.buildwheel +1 -1
  3. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +1 -1
  4. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +2 -0
  5. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -1
  6. vllm_ascend-0.10.1rc1/.github/actionlint.yaml +17 -0
  7. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/format_pr_body.sh +6 -3
  8. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/accuracy_test.yaml +74 -141
  9. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/format_pr_body.yaml +1 -1
  10. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/image_310p_openeuler.yml +16 -7
  11. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/image_310p_ubuntu.yml +13 -4
  12. vllm_ascend-0.10.1rc1/.github/workflows/image_a3_openeuler.yml +123 -0
  13. vllm_ascend-0.10.1rc1/.github/workflows/image_a3_ubuntu.yml +119 -0
  14. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/image_openeuler.yml +12 -4
  15. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/image_ubuntu.yml +10 -1
  16. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/nightly_benchmarks.yaml +8 -6
  17. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/pre-commit.yml +1 -1
  18. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/release_code.yml +2 -2
  19. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/release_whl.yml +3 -2
  20. vllm_ascend-0.10.1rc1/.github/workflows/reminder_comment.yml +26 -0
  21. vllm_ascend-0.9.2rc1/.github/workflows/vllm_ascend_test_long_term.yaml → vllm_ascend-0.10.1rc1/.github/workflows/vllm_ascend_dist.yaml +22 -25
  22. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/vllm_ascend_doctest.yaml +4 -2
  23. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/vllm_ascend_test.yaml +77 -107
  24. vllm_ascend-0.10.1rc1/.github/workflows/vllm_ascend_test_310p.yaml +117 -0
  25. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/vllm_ascend_test_pd.yaml +5 -5
  26. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.gitignore +4 -0
  27. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.pre-commit-config.yaml +11 -5
  28. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/CODE_OF_CONDUCT.md +0 -1
  29. vllm_ascend-0.10.1rc1/CONTRIBUTING.md +3 -0
  30. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/Dockerfile +3 -3
  31. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/Dockerfile.310p +3 -3
  32. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/Dockerfile.310p.openEuler +4 -3
  33. vllm_ascend-0.10.1rc1/Dockerfile.a3 +60 -0
  34. vllm_ascend-0.10.1rc1/Dockerfile.a3.openEuler +58 -0
  35. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/Dockerfile.openEuler +4 -3
  36. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/PKG-INFO +17 -6
  37. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/README.md +16 -5
  38. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/README.zh.md +15 -4
  39. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/README.md +44 -35
  40. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/requirements-bench.txt +0 -1
  41. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/scripts/perf_result_template.md +1 -1
  42. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/scripts/run-performance-benchmarks.sh +0 -1
  43. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/codecov.yml +2 -4
  44. vllm_ascend-0.10.1rc1/csrc/kernels/bgmv_expand.cpp +369 -0
  45. vllm_ascend-0.10.1rc1/csrc/kernels/bgmv_shrink.cpp +252 -0
  46. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/kernels/pos_encoding_kernels.cpp +0 -5
  47. vllm_ascend-0.10.1rc1/csrc/kernels/sgmv_expand.cpp +389 -0
  48. vllm_ascend-0.10.1rc1/csrc/kernels/sgmv_shrink.cpp +275 -0
  49. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/ops.h +64 -12
  50. vllm_ascend-0.10.1rc1/csrc/torch_binding.cpp +428 -0
  51. vllm_ascend-0.10.1rc1/csrc/torch_binding_meta.cpp +102 -0
  52. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/utils.h +0 -12
  53. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/Makefile +4 -0
  54. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/README.md +6 -5
  55. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/requirements-docs.txt +1 -0
  56. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/_templates/sections/header.html +1 -1
  57. vllm_ascend-0.10.1rc1/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  58. vllm_ascend-0.10.1rc1/docs/source/assets/multi_node_dp_kimi.png +0 -0
  59. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/community/contributors.md +37 -1
  60. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/community/governance.md +2 -2
  61. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/community/user_stories/llamafactory.md +1 -1
  62. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/community/versioning_policy.md +21 -0
  63. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/conf.py +8 -8
  64. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/contribution/testing.md +5 -0
  65. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/evaluation/accuracy_report/index.md +1 -1
  66. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/evaluation/using_evalscope.md +3 -1
  67. vllm_ascend-0.10.1rc1/docs/source/developer_guide/evaluation/using_lm_eval.md +300 -0
  68. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/evaluation/using_opencompass.md +4 -1
  69. vllm_ascend-0.10.1rc1/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +237 -0
  70. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/feature_guide/index.md +1 -0
  71. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/feature_guide/patch.md +8 -5
  72. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/modeling/adding_a_new_model.md +1 -0
  73. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/performance/index.md +1 -0
  74. vllm_ascend-0.10.1rc1/docs/source/developer_guide/performance/optimization_and_tuning.md +183 -0
  75. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/performance/performance_benchmark.md +7 -0
  76. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/performance/profile_execute_duration.md +2 -1
  77. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/faqs.md +41 -8
  78. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/installation.md +15 -14
  79. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +1647 -0
  80. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +204 -0
  81. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +103 -0
  82. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +87 -0
  83. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +624 -0
  84. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +187 -0
  85. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +237 -0
  86. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +26 -0
  87. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +26 -0
  88. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +112 -0
  89. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +65 -0
  90. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +83 -0
  91. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +33 -0
  92. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +248 -0
  93. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +333 -0
  94. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +29 -0
  95. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +32 -0
  96. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +26 -0
  97. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +88 -0
  98. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +81 -0
  99. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +479 -0
  100. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/index.po +79 -0
  101. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +293 -0
  102. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +149 -0
  103. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +29 -0
  104. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +192 -0
  105. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +62 -0
  106. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +86 -0
  107. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +82 -0
  108. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +71 -0
  109. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +110 -0
  110. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +107 -0
  111. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +77 -0
  112. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +99 -0
  113. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +70 -0
  114. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +290 -0
  115. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +28 -0
  116. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +30 -0
  117. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +121 -0
  118. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +30 -0
  119. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +58 -0
  120. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +183 -0
  121. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +156 -0
  122. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +220 -0
  123. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +1660 -0
  124. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +30 -0
  125. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +264 -0
  126. vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +214 -0
  127. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/quick_start.md +14 -1
  128. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/index.md +2 -0
  129. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/multi_node.md +29 -19
  130. vllm_ascend-0.10.1rc1/docs/source/tutorials/multi_node_kimi.md +153 -0
  131. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/multi_npu_moge.md +109 -3
  132. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/multi_npu_quantization.md +6 -3
  133. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/single_node_300i.md +78 -2
  134. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/single_npu.md +4 -0
  135. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/single_npu_audio.md +2 -2
  136. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/single_npu_multimodal.md +3 -5
  137. vllm_ascend-0.10.1rc1/docs/source/tutorials/single_npu_qwen3_quantization.md +133 -0
  138. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/configuration/additional_config.md +4 -2
  139. vllm_ascend-0.10.1rc1/docs/source/user_guide/feature_guide/lora.md +23 -0
  140. vllm_ascend-0.10.1rc1/docs/source/user_guide/feature_guide/quantization.md +125 -0
  141. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -1
  142. vllm_ascend-0.10.1rc1/docs/source/user_guide/release_notes.md +624 -0
  143. vllm_ascend-0.10.1rc1/docs/source/user_guide/support_matrix/supported_features.md +45 -0
  144. vllm_ascend-0.10.1rc1/docs/source/user_guide/support_matrix/supported_models.md +79 -0
  145. vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/README.md +246 -0
  146. vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/gen_ranktable.py +122 -0
  147. vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/gen_ranktable.sh +79 -0
  148. vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +546 -0
  149. vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +165 -0
  150. vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/run_server.sh +32 -0
  151. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/eplb/eplb_strategy.py +3 -0
  152. vllm_ascend-0.10.1rc1/examples/external_online_dp/README.md +38 -0
  153. vllm_ascend-0.10.1rc1/examples/external_online_dp/launch_online_dp.py +97 -0
  154. vllm_ascend-0.10.1rc1/examples/external_online_dp/run_dp_template.sh +46 -0
  155. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/offline_data_parallel.py +19 -3
  156. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/offline_disaggregated_prefill_npu.py +18 -11
  157. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/offline_dualbatch_overlap_npu.py +3 -2
  158. vllm_ascend-0.10.1rc1/examples/offline_embed.py +58 -0
  159. vllm_ascend-0.10.1rc1/examples/offline_external_launcher.py +287 -0
  160. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/offline_inference_audio_language.py +33 -12
  161. vllm_ascend-0.9.2rc1/examples/offline_inference_npu_v0.py → vllm_ascend-0.10.1rc1/examples/offline_inference_npu.py +28 -21
  162. vllm_ascend-0.9.2rc1/examples/offline_inference_npu_v1.py → vllm_ascend-0.10.1rc1/examples/offline_inference_npu_tp2.py +6 -1
  163. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/offline_inference_sleep_mode_npu.py +5 -2
  164. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/prompt_embedding_inference.py +5 -0
  165. vllm_ascend-0.10.1rc1/examples/run_dp_server.sh +32 -0
  166. vllm_ascend-0.10.1rc1/pyproject.toml +34 -0
  167. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/requirements-dev.txt +7 -2
  168. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/requirements-lint.txt +1 -0
  169. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/requirements.txt +3 -6
  170. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/setup.py +1 -1
  171. vllm_ascend-0.10.1rc1/tests/e2e/310p/test_offline_inference_310p.py +72 -0
  172. vllm_ascend-0.10.1rc1/tests/e2e/310p/test_offline_inference_parallel_310p.py +62 -0
  173. {vllm_ascend-0.9.2rc1/tests → vllm_ascend-0.10.1rc1/tests/e2e}/conftest.py +27 -114
  174. vllm_ascend-0.10.1rc1/tests/e2e/model_utils.py +74 -0
  175. vllm_ascend-0.10.1rc1/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +13 -0
  176. vllm_ascend-0.10.1rc1/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +8 -0
  177. vllm_ascend-0.10.1rc1/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +18 -0
  178. vllm_ascend-0.10.1rc1/tests/e2e/models/configs/Qwen3-8B-Base.yaml +13 -0
  179. vllm_ascend-0.10.1rc1/tests/e2e/models/configs/accuracy.txt +3 -0
  180. vllm_ascend-0.10.1rc1/tests/e2e/models/conftest.py +72 -0
  181. vllm_ascend-0.10.1rc1/tests/e2e/models/report_template.md +21 -0
  182. vllm_ascend-0.10.1rc1/tests/e2e/models/test_lm_eval_correctness.py +153 -0
  183. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_data_parallel.py +3 -2
  184. vllm_ascend-0.10.1rc1/tests/e2e/multicard/test_expert_parallel.py +32 -0
  185. vllm_ascend-0.10.1rc1/tests/e2e/multicard/test_external_launcher.py +187 -0
  186. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +19 -15
  187. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_ilama_lora_tp2.py +4 -3
  188. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_offline_inference_distributed.py +48 -65
  189. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_pipeline_parallel.py +7 -4
  190. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_prefix_caching.py +2 -8
  191. vllm_ascend-0.10.1rc1/tests/e2e/multicard/test_qwen3_moe.py +104 -0
  192. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_torchair_graph_mode.py +84 -21
  193. vllm_ascend-0.10.1rc1/tests/e2e/pd_disaggreate/run_edge_case_test.sh +141 -0
  194. vllm_ascend-0.10.1rc1/tests/e2e/pd_disaggreate/test_edge_cases.py +81 -0
  195. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops/test_bgmv_expand.py +46 -0
  196. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops/test_bgmv_shrink.py +45 -0
  197. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops/test_fused_moe.py +284 -0
  198. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops/test_moe_comm.py +175 -0
  199. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/ops/test_rotary_embedding.py +152 -1
  200. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +7 -0
  201. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +76 -0
  202. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +85 -0
  203. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +50 -59
  204. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_aclgraph.py +75 -0
  205. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_ascend_scheduler.py +88 -0
  206. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/test_camem.py +26 -15
  207. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_chunked.py +81 -0
  208. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/test_embedding.py +17 -36
  209. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/test_guided_decoding.py +6 -21
  210. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/test_ilama_lora.py +4 -2
  211. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/test_profile_execute_duration.py +9 -0
  212. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_quantization.py +35 -0
  213. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_sampler.py +49 -0
  214. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_vlm.py +89 -0
  215. vllm_ascend-0.10.1rc1/tests/e2e/utils.py +106 -0
  216. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/attention/test_attention_mask.py +49 -72
  217. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/attention/test_attention_v1.py +159 -78
  218. vllm_ascend-0.10.1rc1/tests/ut/attention/test_mla_v1.py +631 -0
  219. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/base.py +18 -5
  220. vllm_ascend-0.10.1rc1/tests/ut/conftest.py +26 -0
  221. vllm_ascend-0.10.1rc1/tests/ut/core/test_schedule_config.py +167 -0
  222. vllm_ascend-0.10.1rc1/tests/ut/core/test_scheduler.py +898 -0
  223. vllm_ascend-0.10.1rc1/tests/ut/device_allocator/test_camem.py +188 -0
  224. vllm_ascend-0.10.1rc1/tests/ut/distributed/device_communicators/test_pyhccl.py +84 -0
  225. vllm_ascend-0.10.1rc1/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +173 -0
  226. vllm_ascend-0.10.1rc1/tests/ut/distributed/test_communicator.py +89 -0
  227. vllm_ascend-0.10.1rc1/tests/ut/distributed/test_distributed_tensor_parallel.py +139 -0
  228. vllm_ascend-0.10.1rc1/tests/ut/distributed/test_parallel_state.py +44 -0
  229. vllm_ascend-0.10.1rc1/tests/ut/kv_connector/test_llmdatadist_connector.py +96 -0
  230. vllm_ascend-0.10.1rc1/tests/ut/kv_connector/test_mooncake_connector.py +998 -0
  231. vllm_ascend-0.10.1rc1/tests/ut/kv_connector/test_remote_decode_lifecycle.py +169 -0
  232. vllm_ascend-0.10.1rc1/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +239 -0
  233. vllm_ascend-0.10.1rc1/tests/ut/kv_connector/utils.py +233 -0
  234. vllm_ascend-0.10.1rc1/tests/ut/models/test_deepseek_mtp.py +195 -0
  235. vllm_ascend-0.10.1rc1/tests/ut/models/test_deepseek_v2.py +295 -0
  236. vllm_ascend-0.10.1rc1/tests/ut/models/test_qwen2_5_vl.py +424 -0
  237. vllm_ascend-0.10.1rc1/tests/ut/models/test_qwen2_5_vl_without_padding.py +422 -0
  238. vllm_ascend-0.10.1rc1/tests/ut/models/test_qwen2_vl.py +200 -0
  239. vllm_ascend-0.10.1rc1/tests/ut/models/test_qwen3_moe.py +98 -0
  240. vllm_ascend-0.10.1rc1/tests/ut/multistream/test_base.py +32 -0
  241. vllm_ascend-0.10.1rc1/tests/ut/multistream/test_decorator.py +47 -0
  242. vllm_ascend-0.10.1rc1/tests/ut/multistream/test_layers.py +198 -0
  243. vllm_ascend-0.10.1rc1/tests/ut/multistream/test_metadata.py +246 -0
  244. vllm_ascend-0.10.1rc1/tests/ut/multistream/test_ms_split.py +147 -0
  245. vllm_ascend-0.10.1rc1/tests/ut/ops/test_activation.py +61 -0
  246. vllm_ascend-0.10.1rc1/tests/ut/ops/test_common_fused_moe.py +69 -0
  247. vllm_ascend-0.10.1rc1/tests/ut/ops/test_fused_ops.py +741 -0
  248. vllm_ascend-0.10.1rc1/tests/ut/ops/test_layernorm.py +53 -0
  249. vllm_ascend-0.10.1rc1/tests/ut/ops/test_linear.py +363 -0
  250. vllm_ascend-0.10.1rc1/tests/ut/ops/test_rotary_embedding.py +318 -0
  251. vllm_ascend-0.10.1rc1/tests/ut/ops/test_token_dispatcher.py +606 -0
  252. vllm_ascend-0.10.1rc1/tests/ut/ops/test_vocab_parallel_embedding.py +232 -0
  253. vllm_ascend-0.10.1rc1/tests/ut/patch/worker/patch_common/test_patch_distributed.py +112 -0
  254. vllm_ascend-0.10.1rc1/tests/ut/patch/worker/patch_common/test_patch_linear.py +167 -0
  255. vllm_ascend-0.10.1rc1/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +77 -0
  256. vllm_ascend-0.10.1rc1/tests/ut/quantization/test_func_wrapper.py +134 -0
  257. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/quantization/test_quant_config.py +6 -4
  258. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/quantization/test_quantizer.py +23 -0
  259. vllm_ascend-0.10.1rc1/tests/ut/quantization/test_w4a8_dynamic.py +166 -0
  260. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/quantization/test_w8a8.py +68 -44
  261. vllm_ascend-0.10.1rc1/tests/ut/sample/test_rejection_sampler.py +203 -0
  262. vllm_ascend-0.10.1rc1/tests/ut/sample/test_sampler.py +32 -0
  263. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/test_ascend_config.py +173 -79
  264. vllm_ascend-0.10.1rc1/tests/ut/test_envs.py +62 -0
  265. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/test_platform.py +177 -180
  266. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/test_utils.py +101 -105
  267. vllm_ascend-0.10.1rc1/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +195 -0
  268. vllm_ascend-0.10.1rc1/tests/ut/torchair/models/test_torchair_deepseek_v2.py +325 -0
  269. vllm_ascend-0.10.1rc1/tests/ut/torchair/ops/test_torchair_fused_moe.py +410 -0
  270. vllm_ascend-0.9.2rc1/tests/ut/ops/test_rotary_embedding.py → vllm_ascend-0.10.1rc1/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +332 -315
  271. vllm_ascend-0.10.1rc1/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +176 -0
  272. vllm_ascend-0.10.1rc1/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +75 -0
  273. vllm_ascend-0.10.1rc1/tests/ut/torchair/test_torchair_mla.py +817 -0
  274. vllm_ascend-0.10.1rc1/tests/ut/torchair/test_utils.py +149 -0
  275. vllm_ascend-0.10.1rc1/tests/ut/worker/test_input_batch.py +372 -0
  276. vllm_ascend-0.10.1rc1/tests/ut/worker/test_worker_v1.py +1143 -0
  277. vllm_ascend-0.10.1rc1/tools/check_python_src_init.py +76 -0
  278. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/__init__.py +0 -4
  279. vllm_ascend-0.10.1rc1/vllm_ascend/_version.py +34 -0
  280. vllm_ascend-0.10.1rc1/vllm_ascend/ascend_config.py +215 -0
  281. vllm_ascend-0.10.1rc1/vllm_ascend/ascend_forward_context.py +138 -0
  282. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/attention/attention_mask.py +35 -46
  283. vllm_ascend-0.10.1rc1/vllm_ascend/attention/attention_v1.py +604 -0
  284. vllm_ascend-0.10.1rc1/vllm_ascend/attention/mla_v1.py +1050 -0
  285. vllm_ascend-0.10.1rc1/vllm_ascend/attention/utils.py +95 -0
  286. vllm_ascend-0.10.1rc1/vllm_ascend/compilation/acl_graph.py +185 -0
  287. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/core/schedule_config.py +10 -0
  288. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/core/scheduler.py +100 -70
  289. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/device_allocator/camem.py +3 -2
  290. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/distributed/__init__.py +5 -4
  291. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +894 -0
  292. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/moe_comm_method.py +556 -0
  293. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/mooncake_connector.py +1070 -0
  294. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/parallel_state.py +119 -0
  295. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/tensor_parallel.py +248 -0
  296. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/envs.py +44 -33
  297. vllm_ascend-0.10.1rc1/vllm_ascend/lora/punica_wrapper/lora_ops.py +112 -0
  298. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/lora/punica_wrapper/punica_npu.py +43 -25
  299. vllm_ascend-0.10.1rc1/vllm_ascend/meta_registration.py +104 -0
  300. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/__init__.py +10 -7
  301. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/deepseek_dbo.py +12 -40
  302. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/deepseek_mtp.py +27 -10
  303. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/deepseek_v2.py +105 -97
  304. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_pyhccl.py → vllm_ascend-0.10.1rc1/vllm_ascend/models/deepseek_v3.py +10 -12
  305. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/pangu_moe.py +18 -35
  306. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/qwen2_5_vl.py +9 -5
  307. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/qwen2_5_vl_without_padding.py +103 -3
  308. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/qwen2_vl.py +2 -2
  309. vllm_ascend-0.10.1rc1/vllm_ascend/models/qwen3.py +156 -0
  310. vllm_ascend-0.10.1rc1/vllm_ascend/models/qwen3_moe.py +393 -0
  311. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/decorator.py +0 -4
  312. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/ms_split.py +9 -7
  313. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/ops/__init__.py +9 -2
  314. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/ops/activation.py +14 -14
  315. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/ops/attention.py +19 -15
  316. vllm_ascend-0.10.1rc1/vllm_ascend/ops/comm_utils.py +62 -0
  317. vllm_ascend-0.10.1rc1/vllm_ascend/ops/common_fused_moe.py +531 -0
  318. vllm_ascend-0.10.1rc1/vllm_ascend/ops/fused_moe.py +587 -0
  319. vllm_ascend-0.10.1rc1/vllm_ascend/ops/layernorm.py +85 -0
  320. vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers/experts_selector.py +283 -0
  321. vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers/moe_mlp.py +199 -0
  322. vllm_ascend-0.10.1rc1/vllm_ascend/ops/linear.py +309 -0
  323. vllm_ascend-0.10.1rc1/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py +809 -0
  324. vllm_ascend-0.10.1rc1/vllm_ascend/ops/rotary_embedding.py +339 -0
  325. vllm_ascend-0.10.1rc1/vllm_ascend/ops/sequence_parallel.py +120 -0
  326. vllm_ascend-0.10.1rc1/vllm_ascend/ops/vocab_parallel_embedding.py +254 -0
  327. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/__init__.py +19 -60
  328. {vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_0_9_2 → vllm_ascend-0.10.1rc1/vllm_ascend/patch/platform}/__init__.py +3 -1
  329. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/platform/patch_common/patch_distributed.py +2 -24
  330. {vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_main → vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker}/__init__.py +3 -0
  331. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/worker/patch_common/__init__.py +3 -6
  332. vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker/patch_common/patch_linear.py +147 -0
  333. vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker/patch_common/patch_logits.py +26 -0
  334. vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker/patch_common/patch_lora_embedding.py +29 -0
  335. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/platform.py +114 -76
  336. vllm_ascend-0.10.1rc1/vllm_ascend/quantization/__init__.py +0 -0
  337. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/quantization/func_wrapper.py +33 -0
  338. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/quantization/quant_config.py +46 -10
  339. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/quantization/quantizer.py +31 -20
  340. vllm_ascend-0.10.1rc1/vllm_ascend/quantization/w4a8_dynamic.py +394 -0
  341. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/quantization/w8a8.py +19 -130
  342. vllm_ascend-0.10.1rc1/vllm_ascend/quantization/w8a8_dynamic.py +453 -0
  343. vllm_ascend-0.10.1rc1/vllm_ascend/sample/__init__.py +0 -0
  344. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/sample/rejection_sampler.py +100 -52
  345. vllm_ascend-0.10.1rc1/vllm_ascend/sample/sampler.py +86 -0
  346. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/__init__.py +0 -0
  347. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/__init__.py +0 -0
  348. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/qwen2.py +364 -0
  349. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/qwen3_moe.py +537 -0
  350. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +218 -0
  351. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/torchair_deepseek_v2.py +1049 -0
  352. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/torchair_deepseek_v3.py +28 -0
  353. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/torchair_pangu_moe.py +1119 -0
  354. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/ops/__init__.py +0 -0
  355. vllm_ascend-0.9.2rc1/vllm_ascend/ops/fused_moe.py → vllm_ascend-0.10.1rc1/vllm_ascend/torchair/ops/torchair_fused_moe.py +1321 -1453
  356. vllm_ascend-0.9.2rc1/vllm_ascend/ops/rotary_embedding.py → vllm_ascend-0.10.1rc1/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +372 -292
  357. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization/__init__.py +0 -0
  358. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization/torchair_quantizer.py +29 -0
  359. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +439 -0
  360. vllm_ascend-0.9.2rc1/vllm_ascend/quantization/w8a8_dynamic.py → vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +1035 -830
  361. vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention_v1_torchair.py → vllm_ascend-0.10.1rc1/vllm_ascend/torchair/torchair_attention.py +97 -148
  362. vllm_ascend-0.9.2rc1/vllm_ascend/attention/mla_v1.py → vllm_ascend-0.10.1rc1/vllm_ascend/torchair/torchair_mla.py +395 -301
  363. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/torchair_model_runner.py +446 -0
  364. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/torchair_worker.py +63 -0
  365. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/utils.py +205 -0
  366. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/utils.py +160 -165
  367. vllm_ascend-0.10.1rc1/vllm_ascend/worker/__init__.py +0 -0
  368. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/worker/eagle_proposer_v1.py +31 -19
  369. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/worker/model_runner_v1.py +1203 -691
  370. vllm_ascend-0.10.1rc1/vllm_ascend/worker/mtp_proposer_v1.py +439 -0
  371. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/worker/npu_input_batch.py +156 -92
  372. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/worker/worker_v1.py +56 -59
  373. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/PKG-INFO +17 -6
  374. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/SOURCES.txt +201 -67
  375. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/requires.txt +3 -4
  376. vllm_ascend-0.9.2rc1/.github/actionlint.yaml +0 -8
  377. vllm_ascend-0.9.2rc1/benchmarks/scripts/patch_benchmark_dataset.py +0 -79
  378. vllm_ascend-0.9.2rc1/benchmarks/scripts/run_accuracy.py +0 -313
  379. vllm_ascend-0.9.2rc1/csrc/kernels/advance_step.cpp +0 -241
  380. vllm_ascend-0.9.2rc1/csrc/torch_binding.cpp +0 -320
  381. vllm_ascend-0.9.2rc1/docs/source/assets/multi_node_dp.png +0 -0
  382. vllm_ascend-0.9.2rc1/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -62
  383. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/lora.md +0 -8
  384. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/quantization.md +0 -106
  385. vllm_ascend-0.9.2rc1/docs/source/user_guide/release_notes.md +0 -310
  386. vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix/supported_features.md +0 -49
  387. vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix/supported_models.md +0 -52
  388. vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/disaggregated_prefill_offline.py +0 -138
  389. vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/dp_proxy.py +0 -463
  390. vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/find_device_ips.py +0 -69
  391. vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py +0 -193
  392. vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/run_decode_server.sh +0 -37
  393. vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/run_prefill_server.sh +0 -37
  394. vllm_ascend-0.9.2rc1/examples/offline_distributed_inference_npu.py +0 -44
  395. vllm_ascend-0.9.2rc1/examples/offline_embed.py +0 -53
  396. vllm_ascend-0.9.2rc1/examples/offline_multi_step_custom_ops.py +0 -50
  397. vllm_ascend-0.9.2rc1/examples/run_dp_attention_etp16.sh +0 -23
  398. vllm_ascend-0.9.2rc1/examples/run_dp_attention_etp16_benmark.sh +0 -57
  399. vllm_ascend-0.9.2rc1/examples/run_dp_server.sh +0 -30
  400. vllm_ascend-0.9.2rc1/pyproject.toml +0 -25
  401. vllm_ascend-0.9.2rc1/tests/e2e/long_term/accuracy/accuracy_multicard.py +0 -261
  402. vllm_ascend-0.9.2rc1/tests/e2e/long_term/accuracy/accuracy_singlecard.py +0 -115
  403. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_dynamic_npugraph_batchsize.py +0 -57
  404. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_pyhccl_distributed.py +0 -110
  405. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/compile/test_simple.py +0 -118
  406. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler.py +0 -728
  407. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py +0 -46
  408. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py +0 -60
  409. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops/test_fused_moe.py +0 -100
  410. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops/test_multi_step.py +0 -190
  411. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/sample/test_rejection_sampler.py +0 -608
  412. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +0 -94
  413. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_aclgraph.py +0 -99
  414. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_chunked.py +0 -74
  415. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_offline_inference.py +0 -129
  416. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_prompt_embedding.py +0 -259
  417. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_sampler.py +0 -109
  418. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_scheduler.py +0 -390
  419. vllm_ascend-0.9.2rc1/tests/model_utils.py +0 -274
  420. vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_buffer.py +0 -71
  421. vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_connector.py +0 -146
  422. vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_pipe.py +0 -145
  423. vllm_ascend-0.9.2rc1/tests/ut/distributed/test_parallel_state.py +0 -208
  424. vllm_ascend-0.9.2rc1/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -27
  425. vllm_ascend-0.9.2rc1/tests/ut/patch/worker/patch_common/test_patch_sampler.py +0 -46
  426. vllm_ascend-0.9.2rc1/tests/ut/worker/test_input_batch.py +0 -162
  427. vllm_ascend-0.9.2rc1/tests/ut/worker/test_pooling_model_runner.py +0 -355
  428. vllm_ascend-0.9.2rc1/tests/ut/worker/test_worker_v1.py +0 -1
  429. vllm_ascend-0.9.2rc1/tests/utils.py +0 -236
  430. vllm_ascend-0.9.2rc1/vllm_ascend/_version.py +0 -21
  431. vllm_ascend-0.9.2rc1/vllm_ascend/ascend_config.py +0 -171
  432. vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention.py +0 -1228
  433. vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention_v1.py +0 -478
  434. vllm_ascend-0.9.2rc1/vllm_ascend/compilation/piecewise_backend.py +0 -225
  435. vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer/simple_buffer.py +0 -209
  436. vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer/simple_connector.py +0 -379
  437. vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer/simple_pipe.py +0 -209
  438. vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer/utils.py +0 -40
  439. vllm_ascend-0.9.2rc1/vllm_ascend/distributed/llmdatadist_connector.py +0 -470
  440. vllm_ascend-0.9.2rc1/vllm_ascend/distributed/parallel_state.py +0 -77
  441. vllm_ascend-0.9.2rc1/vllm_ascend/models/qwen3_moe.py +0 -35
  442. vllm_ascend-0.9.2rc1/vllm_ascend/ops/cache.py +0 -35
  443. vllm_ascend-0.9.2rc1/vllm_ascend/ops/common_fused_moe.py +0 -112
  444. vllm_ascend-0.9.2rc1/vllm_ascend/ops/layernorm.py +0 -49
  445. vllm_ascend-0.9.2rc1/vllm_ascend/ops/vocab_parallel_embedding.py +0 -67
  446. vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/__init__.py +0 -25
  447. vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/__init__.py +0 -26
  448. vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py +0 -91
  449. vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_common/patch_sampler.py +0 -83
  450. vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py +0 -157
  451. vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_common/patch_utils.py +0 -38
  452. vllm_ascend-0.9.2rc1/vllm_ascend/pool/__init__.py +0 -16
  453. vllm_ascend-0.9.2rc1/vllm_ascend/pool/metadata.py +0 -32
  454. vllm_ascend-0.9.2rc1/vllm_ascend/worker/__init__.py +0 -17
  455. vllm_ascend-0.9.2rc1/vllm_ascend/worker/cache_engine.py +0 -83
  456. vllm_ascend-0.9.2rc1/vllm_ascend/worker/draft_model_runner.py +0 -320
  457. vllm_ascend-0.9.2rc1/vllm_ascend/worker/model_runner.py +0 -1607
  458. vllm_ascend-0.9.2rc1/vllm_ascend/worker/mtp_proposer_v1.py +0 -188
  459. vllm_ascend-0.9.2rc1/vllm_ascend/worker/multi_step_runner.py +0 -737
  460. vllm_ascend-0.9.2rc1/vllm_ascend/worker/multi_step_worker.py +0 -194
  461. vllm_ascend-0.9.2rc1/vllm_ascend/worker/pooling_model_runner.py +0 -186
  462. vllm_ascend-0.9.2rc1/vllm_ascend/worker/worker.py +0 -579
  463. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  464. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  465. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  466. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  467. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  468. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  469. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  470. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  471. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  472. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  473. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/dependabot.yml +0 -0
  474. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/labeler.yml +0 -0
  475. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/label_merge_conflict.yml +0 -0
  476. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/labeler.yml +0 -0
  477. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/matchers/actionlint.json +0 -0
  478. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/matchers/mypy.json +0 -0
  479. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/matchers/ruff.json +0 -0
  480. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.readthedocs.yaml +0 -0
  481. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/CMakeLists.txt +0 -0
  482. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/DCO +0 -0
  483. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/LICENSE +0 -0
  484. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
  485. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  486. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/tests/latency-tests.json +0 -0
  487. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/tests/serving-tests.json +0 -0
  488. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/tests/throughput-tests.json +0 -0
  489. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/cmake/utils.cmake +0 -0
  490. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/collect_env.py +0 -0
  491. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/camem_allocator.cpp +0 -0
  492. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  493. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/kernels/types.h +0 -0
  494. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/kernels/utils.h +0 -0
  495. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/requirements-test.txt +0 -0
  496. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/community/user_stories/index.md +0 -0
  497. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/contribution/index.md +1 -1
  498. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/evaluation/index.md +0 -0
  499. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
  500. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/modeling/index.md +0 -0
  501. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/index.md +0 -0
  502. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  503. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  504. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/multi_npu.md +0 -0
  505. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
  506. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
  507. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/configuration/env_vars.md +0 -0
  508. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/configuration/index.md +0 -0
  509. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
  510. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  511. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/feature_guide/index.md +0 -0
  512. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  513. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/support_matrix/index.md +0 -0
  514. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/eplb/eplb_deepseek.py +0 -0
  515. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/format.sh +0 -0
  516. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/mypy.ini +0 -0
  517. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/packages.txt +0 -0
  518. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/setup.cfg +0 -0
  519. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/__init__.py +0 -0
  520. {vllm_ascend-0.9.2rc1/tests/e2e/singlecard → vllm_ascend-0.10.1rc1/tests/e2e}/__init__.py +0 -0
  521. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/common.sh +0 -0
  522. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
  523. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
  524. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
  525. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  526. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/prompts/example.txt +0 -0
  527. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/run_disagg_pd.sh +0 -0
  528. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/run_doctests.sh +0 -0
  529. {vllm_ascend-0.9.2rc1/tests/e2e/singlecard/compile → vllm_ascend-0.10.1rc1/tests/e2e/singlecard}/__init__.py +0 -0
  530. {vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core → vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops}/__init__.py +0 -0
  531. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
  532. {vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops → vllm_ascend-0.10.1rc1/tests/ut}/__init__.py +0 -0
  533. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/fake_weight/config.json +0 -0
  534. {vllm_ascend-0.9.2rc1/tests/e2e/singlecard/sample → vllm_ascend-0.10.1rc1/tests/ut/models}/__init__.py +0 -0
  535. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/ops/expert_map.json +0 -0
  536. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/ops/test_expert_load_balancer.py +0 -0
  537. {vllm_ascend-0.9.2rc1/vllm_ascend/attention → vllm_ascend-0.10.1rc1/tests/ut/torchair}/__init__.py +0 -0
  538. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/actionlint.sh +0 -0
  539. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/check_repo.sh +0 -0
  540. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/enforce_regex_import.py +0 -0
  541. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/mypy.sh +0 -0
  542. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/png-lint.sh +0 -0
  543. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/shellcheck.sh +0 -0
  544. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/sphinx-lint.sh +0 -0
  545. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/typos.toml +0 -0
  546. {vllm_ascend-0.9.2rc1/vllm_ascend/compilation → vllm_ascend-0.10.1rc1/vllm_ascend/attention}/__init__.py +0 -0
  547. {vllm_ascend-0.9.2rc1/vllm_ascend/core → vllm_ascend-0.10.1rc1/vllm_ascend/compilation}/__init__.py +0 -0
  548. {vllm_ascend-0.9.2rc1/vllm_ascend/device_allocator → vllm_ascend-0.10.1rc1/vllm_ascend/core}/__init__.py +0 -0
  549. {vllm_ascend-0.9.2rc1/vllm_ascend/distributed/device_communicators → vllm_ascend-0.10.1rc1/vllm_ascend/device_allocator}/__init__.py +0 -0
  550. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/distributed/communication_op.py +0 -0
  551. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/distributed/communicator.py +0 -0
  552. {vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer → vllm_ascend-0.10.1rc1/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
  553. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  554. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  555. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/lora/__init__.py +0 -0
  556. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/lora/punica_wrapper/__init__.py +0 -0
  557. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/__init__.py +0 -0
  558. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/base.py +0 -0
  559. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/context.py +0 -0
  560. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/layers.py +0 -0
  561. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/metadata.py +0 -0
  562. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  563. {vllm_ascend-0.9.2rc1/vllm_ascend/quantization → vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers}/__init__.py +0 -0
  564. {vllm_ascend-0.9.2rc1/vllm_ascend/sample → vllm_ascend-0.10.1rc1/vllm_ascend/ops/moe_dispatcher}/__init__.py +0 -0
  565. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/platform/patch_common/__init__.py +0 -0
  566. {vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_0_9_2 → vllm_ascend-0.10.1rc1/vllm_ascend/patch/platform/patch_main}/__init__.py +0 -0
  567. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
  568. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
  569. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
  570. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  571. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
  572. {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -0,0 +1,6 @@
1
+ # https://developers.google.com/gemini-code-assist/docs/customize-gemini-behavior-github
2
+ have_fun: false # Just review the code
3
+ code_review:
4
+ comment_severity_threshold: HIGH # Reduce quantity of comments
5
+ pull_request_opened:
6
+ summary: false # Don't summarize the PR in a separate comment
@@ -14,7 +14,7 @@
14
14
  # limitations under the License.
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
- ARG PY_VERSION=3.10
17
+ ARG PY_VERSION=3.11
18
18
  FROM quay.io/ascend/manylinux:8.0.0-910b-manylinux_2_28-py${PY_VERSION}
19
19
 
20
20
  ARG COMPILE_CUSTOM_KERNELS=1
@@ -40,7 +40,7 @@ body:
40
40
  attributes:
41
41
  label: Any Other Things.
42
42
  description: >
43
- Any other things you would like to mention.
43
+ Any other things you would like to mention, such as feature branch request.
44
44
  validations:
45
45
  required: false
46
46
  - type: markdown
@@ -30,6 +30,8 @@ body:
30
30
 
31
31
  - [ ] Add release note to docs/source/user_guide/release_notes.md
32
32
 
33
+ - [ ] Update release version in README.md and README.zh.md
34
+
33
35
  - [ ] Update version info in docs/source/community/versioning_policy.md
34
36
 
35
37
  - [ ] Update contributor info in docs/source/community/contributors.md
@@ -25,4 +25,3 @@ CI passed with new added/existing test.
25
25
  If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
26
26
  If tests were not added, please describe why they were not added and/or why it was difficult to add.
27
27
  -->
28
-
@@ -0,0 +1,17 @@
1
+ self-hosted-runner:
2
+ # Labels of self-hosted runner in array of strings.
3
+ labels:
4
+ - linux-aarch64-a2-0
5
+ - linux-aarch64-a2-1
6
+ - linux-aarch64-a2-2
7
+ - linux-aarch64-a2-4
8
+ - linux-aarch64-a2-8
9
+ - linux-arm64-npu-static-8
10
+ - linux-aarch64-310p-1
11
+ - linux-aarch64-310p-2
12
+ - linux-aarch64-310p-4
13
+ - ubuntu-24.04-arm
14
+ - linux-aarch64-a3-1
15
+ - linux-aarch64-a3-2
16
+ - linux-aarch64-a3-4
17
+ - linux-aarch64-a3-8
@@ -30,6 +30,7 @@ VLLM_VERSION=$2
30
30
  VLLM_COMMIT=$3
31
31
  OLD=/tmp/orig_pr_body.txt
32
32
  NEW=/tmp/new_pr_body.txt
33
+ FINAL=/tmp/final_pr_body.txt
33
34
 
34
35
  gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
35
36
  cp "${OLD}" "${NEW}"
@@ -41,16 +42,18 @@ sed -i '/- vLLM .*$/d' "${NEW}"
41
42
  echo ""
42
43
  echo "- vLLM version: $VLLM_VERSION"
43
44
  echo "- vLLM main: $VLLM_COMMIT"
44
- echo ""
45
45
  } >> "${NEW}"
46
46
 
47
+ # Remove redundant empty lines
48
+ uniq "${NEW}" > "${FINAL}"
49
+
47
50
  # Run this only if ${NEW} is different than ${OLD}
48
- if ! cmp -s "${OLD}" "${NEW}"; then
51
+ if ! cmp -s "${OLD}" "${FINAL}"; then
49
52
  echo
50
53
  echo "Updating PR body:"
51
54
  echo
52
55
  cat "${NEW}"
53
- gh pr edit --body-file "${NEW}" "${PR_NUMBER}"
56
+ gh pr edit --body-file "${FINAL}" "${PR_NUMBER}"
54
57
  else
55
58
  echo "No changes needed"
56
59
  fi
@@ -29,35 +29,15 @@ on:
29
29
  types: [ labeled ]
30
30
  workflow_dispatch:
31
31
  inputs:
32
- vllm-version:
33
- description: 'vllm version:'
34
- required: true
35
- type: choice
36
- # Please also update this when bump matched version
37
- # Current supported vLLM versions
38
- options:
39
- - main
40
- - v0.9.2
41
- - v0.9.1
42
- - v0.7.3
43
32
  vllm-ascend-version:
44
- description: 'vllm-ascend version:'
33
+ description: 'vllm-ascend:'
45
34
  required: true
46
35
  type: choice
36
+ # Current supported vLLM versions
47
37
  options:
38
+ - latest
48
39
  - main
49
- - v0.9.1-dev
50
- - v0.7.3-dev
51
- models:
52
- description: 'model:'
53
- required: true
54
- type: choice
55
- options:
56
- - all
57
- - Qwen/Qwen2.5-VL-7B-Instruct
58
- - Qwen/Qwen3-8B-Base
59
- - Qwen/Qwen3-30B-A3B
60
- default: 'all'
40
+ default: main
61
41
 
62
42
  # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
63
43
  # declared as "shell: bash -el {0}" on steps that need to be properly activated.
@@ -76,58 +56,29 @@ jobs:
76
56
  # test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
77
57
  if: >-
78
58
  ${{
79
- (contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
80
- contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
81
- contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') ||
82
- contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
59
+ contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
83
60
  contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
84
61
  github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
85
62
  }}
86
- runs-on: >-
87
- ${{
88
- (matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-arm64-npu-4') ||
89
- 'linux-arm64-npu-2'
90
- }}
63
+ runs-on: ${{ matrix.runner }}
91
64
  strategy:
92
65
  matrix:
93
- # the accuracy test will run:
94
- # 1. workflow_dispatch with models input
95
- # - all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
96
- # - specified but not all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
97
- # 2. PR labeled with "*-accuracy-test"
98
- # - accuracy-test: Qwen/Qwen3-8B-Base, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-30B-A3B
99
- # - dense-accuracy-test: Qwen/Qwen3-8B-Base
100
- # - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
101
- # - moe-accuracy-test: Qwen/Qwen3-30B-A3B
102
- model_name: ${{ fromJSON(
103
- (github.event_name == 'schedule' &&
104
- '["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
105
- (github.event.inputs.models == 'all' &&
106
- '["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
107
- (github.event.inputs.models == 'Qwen/Qwen3-30B-A3B' &&
108
- '["Qwen/Qwen3-30B-A3B"]') ||
109
- (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
110
- '["Qwen/Qwen2.5-VL-7B-Instruct"]') ||
111
- (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
112
- '["Qwen/Qwen3-8B-Base"]') ||
113
- contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
114
- '["Qwen/Qwen3-8B-Base","Qwen/Qwen2.5-VL-7B-Instruct", "Qwen/Qwen3-30B-A3B"]' ||
115
- contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
116
- '["Qwen/Qwen3-8B-Base"]' ||
117
- contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
118
- '["Qwen/Qwen2.5-VL-7B-Instruct"]' ||
119
- contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') &&
120
- '["Qwen/Qwen3-30B-A3B"]'
121
- ) }}
122
-
66
+ include:
67
+ - model_name: Qwen3-8B-Base
68
+ runner: linux-aarch64-a2-1
69
+ - model_name: Qwen2.5-VL-7B-Instruct
70
+ runner: linux-aarch64-a2-1
71
+ - model_name: Qwen3-30B-A3B
72
+ runner: linux-aarch64-a2-2
73
+ - model_name: DeepSeek-V2-Lite
74
+ runner: linux-aarch64-a2-2
123
75
  fail-fast: false
76
+
124
77
  name: ${{ matrix.model_name }} accuracy
125
78
  container:
126
- image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
79
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
127
80
  env:
128
- DATASET_SOURCE: ModelScope
129
81
  VLLM_USE_MODELSCOPE: True
130
- USE_MODELSCOPE_HUB: 1
131
82
  # 1. If version specified (work_dispatch), do specified branch accuracy test
132
83
  # 2. If no version (labeled PR), do accuracy test by default ref:
133
84
  # The branch, tag or SHA to checkout. When checking out the repository that
@@ -139,18 +90,18 @@ jobs:
139
90
  - name: Checkout repository
140
91
  uses: actions/checkout@v4
141
92
 
142
- - name: Check npu and CANN info
93
+ - name: Set model name as output
94
+ id: set_output
143
95
  run: |
144
- npu-smi info
145
- cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
96
+ echo "model_name=${{ matrix.model_name }}" >> $GITHUB_OUTPUT
146
97
 
147
98
  - name: Config mirrors
148
99
  run: |
149
- sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
150
- pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
100
+ sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
101
+ pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
102
+ pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
151
103
  apt-get update -y
152
104
  apt install git -y
153
- git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
154
105
 
155
106
  - name: Install system dependencies
156
107
  run: |
@@ -161,13 +112,30 @@ jobs:
161
112
  uses: actions/checkout@v4
162
113
  with:
163
114
  repository: vllm-project/vllm
115
+ ref: v0.10.1.1
164
116
  path: ./vllm-empty
165
- # Please also update this when bump matched version
166
- ref: ${{ github.event.inputs.vllm-version || 'v0.9.2' }}
167
117
 
168
118
  - name: Install vllm-project/vllm from source
169
119
  working-directory: ./vllm-empty
170
- run: VLLM_TARGET_DEVICE=empty pip install -e .
120
+ run: |
121
+ VLLM_TARGET_DEVICE=empty pip install -e .
122
+
123
+ - name: Resolve vllm-ascend version
124
+ run: |
125
+ VERSION_INPUT="${{ github.event.inputs.vllm-ascend-version }}"
126
+
127
+ if [[ "$VERSION_INPUT" == "latest" ]]; then
128
+ TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
129
+ LATEST_TAG=$(echo "$TAGS" | head -n1)
130
+ if [[ -z "$LATEST_TAG" ]]; then
131
+ RESOLVED_VERSION="main"
132
+ else
133
+ RESOLVED_VERSION="$LATEST_TAG"
134
+ fi
135
+ else
136
+ RESOLVED_VERSION="$VERSION_INPUT"
137
+ fi
138
+ echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
171
139
 
172
140
  - name: Checkout vllm-project/vllm-ascend repo
173
141
  uses: actions/checkout@v4
@@ -182,8 +150,8 @@ jobs:
182
150
  PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
183
151
  run: |
184
152
  pip install -r requirements-dev.txt
185
- pip install -v -e .
186
-
153
+ pip install -v -e .
154
+
187
155
  - name: Get vLLM commit hash and URL
188
156
  working-directory: ./vllm-empty
189
157
  run: |
@@ -196,15 +164,6 @@ jobs:
196
164
  VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
197
165
  echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
198
166
 
199
- - name: Print resolved hashes
200
- run: |
201
- echo "vLLM : ${{ env.VLLM_COMMIT }}"
202
- echo "vLLM-Ascend: ${{ env.VLLM_ASCEND_COMMIT }}"
203
-
204
- - name: Install lm-eval, ray, and datasets
205
- run: |
206
- pip install lm-eval==0.4.8
207
-
208
167
  - name: Collect version info
209
168
  run: |
210
169
  for dir in /usr/local/Ascend/ascend-toolkit/*; do
@@ -224,39 +183,27 @@ jobs:
224
183
  pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
225
184
  pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
226
185
  pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
227
- echo "GHA_VLLM_ASCEND_VERSION=${{ github.event.inputs.vllm-ascend-version || github.ref }}"
228
186
  } >> "$GITHUB_ENV"
229
-
230
- - name: Print versions
231
- run: |
232
- echo "CANN: ${{ env.GHA_CANN_VERSION }}"
233
- echo "Torch NPU: ${{ env.GHA_TORCH_NPU_VERSION }}"
234
- echo "Torch: ${{ env.GHA_TORCH_VERSION }}"
235
- echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
236
- echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION }}"
237
187
 
238
- - name: Run Accuracy Test
188
+ - name: Run accuracy test
239
189
  id: report
240
- working-directory: ./benchmarks
241
190
  env:
242
- PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
191
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
192
+ VLLM_USE_MODELSCOPE: True
193
+ VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
194
+ VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
195
+ VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
196
+ VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
197
+ CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
198
+ TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
199
+ TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
243
200
  run: |
244
201
  model_base_name=$(basename ${{ matrix.model_name }})
245
202
  markdown_name="${model_base_name}"
246
- echo "markdown_name=$markdown_name"
247
203
  echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
248
- mkdir -p ./accuracy
249
-
250
- python ./scripts/run_accuracy.py \
251
- --model "${{ matrix.model_name }}" \
252
- --output "./accuracy/${markdown_name}.md" \
253
- --vllm_ascend_version "${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}" \
254
- --cann_version "${{ env.GHA_CANN_VERSION }}" \
255
- --torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
256
- --torch_version "${{ env.GHA_TORCH_VERSION }}" \
257
- --vllm_version "${{ env.GHA_VLLM_VERSION }}" \
258
- --vllm_commit "${{ env.VLLM_COMMIT }}" \
259
- --vllm_ascend_commit "${{ env.VLLM_ASCEND_COMMIT }}" \
204
+ mkdir -p ./benchmarks/accuracy
205
+ pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
206
+ --config ./tests/e2e/models/configs/${{ matrix.model_name }}.yaml
260
207
 
261
208
  - name: Generate step summary
262
209
  if: ${{ always() }}
@@ -268,19 +215,7 @@ jobs:
268
215
  SAFE_VLLM_ASCEND_VERSION="${GHA_VLLM_ASCEND_VERSION//\//-}"
269
216
  echo "SAFE_VLLM_ASCEND_VERSION=$SAFE_VLLM_ASCEND_VERSION" >> "$GITHUB_ENV"
270
217
 
271
- - name: Check report first line for failure
272
- id: check_report
273
- run: |
274
- REPORT_PATH="./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md"
275
- echo "Scanning $REPORT_PATH for ❌ …"
276
- if grep -q '❌' "$REPORT_PATH"; then
277
- echo "contains_fail=true" >> $GITHUB_OUTPUT
278
- else
279
- echo "contains_fail=false" >> $GITHUB_OUTPUT
280
- fi
281
-
282
218
  - name: Upload Report
283
- if: ${{ github.event_name == 'workflow_dispatch' && steps.check_report.outputs.contains_fail == 'false' }}
284
219
  uses: actions/upload-artifact@v4
285
220
  with:
286
221
  name: "report-${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
@@ -289,10 +224,14 @@ jobs:
289
224
  retention-days: 90
290
225
  overwrite: true
291
226
 
227
+ outputs:
228
+ model_name: ${{ steps.set_output.outputs.model_name }}
229
+ vllm_ascend_version: ${{ env.GHA_VLLM_ASCEND_VERSION }}
230
+
292
231
  create_pr:
293
232
  runs-on: ubuntu-latest
294
233
  needs: accuracy_tests
295
- if: ${{ github.event_name == 'workflow_dispatch' }}
234
+ if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
296
235
  env:
297
236
  UPSTREAM_REPO: vllm-project/vllm-ascend
298
237
  steps:
@@ -302,7 +241,7 @@ jobs:
302
241
  repository: vllm-ascend-ci/vllm-ascend
303
242
  token: ${{ secrets.PAT_TOKEN }}
304
243
  ref: main
305
-
244
+
306
245
  - name: Add upstream remote
307
246
  run: |
308
247
  git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
@@ -319,10 +258,10 @@ jobs:
319
258
  TIMESTAMP=$(date +%Y%m%d%H%M%S)
320
259
  BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
321
260
  echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
322
- git checkout -B "${BRANCH_NAME}" upstream/${{ github.event.inputs.vllm-ascend-version }}
261
+ git checkout -B "${BRANCH_NAME}" upstream/main
323
262
 
324
263
  - name: Download only current run reports
325
- uses: actions/download-artifact@v4
264
+ uses: actions/download-artifact@v5
326
265
  with:
327
266
  path: ./docs/source/developer_guide/evaluation/accuracy_report
328
267
  pattern: report-*
@@ -334,7 +273,7 @@ jobs:
334
273
  find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
335
274
  find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
336
275
  find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
337
-
276
+
338
277
  - name: Update accuracy_report/index.md
339
278
  run: |
340
279
  REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
@@ -360,7 +299,7 @@ jobs:
360
299
  GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
361
300
  run: |
362
301
  git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
363
- git commit -s -m "[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}"
302
+ git commit -s -m "[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}"
364
303
  git push -f origin "${{ env.BRANCH_NAME }}"
365
304
 
366
305
  - name: Create PR in upstream via API
@@ -372,18 +311,12 @@ jobs:
372
311
  owner: 'vllm-project',
373
312
  repo: 'vllm-ascend',
374
313
  head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
375
- base: '${{ github.event.inputs.vllm-ascend-version }}',
376
- title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
377
- body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for:
378
- ${{
379
- github.event.inputs.models == 'all'
380
- && 'All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)'
381
- || github.event.inputs.models
382
- }}
383
-
384
- - [Workflow run][1]
314
+ base: 'main',
315
+ title: `[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}`,
316
+ body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base, DeepSeek-V2-Lite)
385
317
 
386
- [1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
318
+ - [Workflow run][1]
319
+
320
+ [1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
387
321
  });
388
322
  core.info(`Created PR #${pr.data.number}`);
389
-
@@ -46,7 +46,7 @@ jobs:
46
46
  echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
47
47
 
48
48
  - name: Checkout repository
49
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
49
+ uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
50
50
 
51
51
  - name: Set up Python
52
52
  uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
@@ -6,10 +6,10 @@ name: 'image / openEuler / 310p'
6
6
  # - push: ${{ github.event_name != 'pull_request' }} ==> false
7
7
  # 2. branches push trigger image publish
8
8
  # - is for branch/dev/nightly image
9
- # - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev
9
+ # - commits are merge into main/*-dev ==> vllm-ascend:main-310p-openeuler / vllm-ascend:*-dev-310p-openeuler
10
10
  # 3. tags push trigger image publish
11
11
  # - is for final release image
12
- # - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-openeuler|latest / vllm-ascend:v1.2.3rc1-openeuler
12
+ # - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-310p-openeuler / vllm-ascend:v1.2.3rc1-310p-openeuler
13
13
  on:
14
14
  pull_request:
15
15
  branches:
@@ -33,9 +33,15 @@ on:
33
33
  tags:
34
34
  - 'v*'
35
35
  paths:
36
- - '.github/workflows/image_310p.openeuler.yml'
36
+ - '.github/workflows/image_310p_openeuler.yml'
37
37
  - 'Dockerfile.310p.openEuler'
38
38
  - 'vllm_ascend/**'
39
+ - 'setup.py'
40
+ - 'pyproject.toml'
41
+ - 'requirements.txt'
42
+ - 'cmake/**'
43
+ - 'CMakeLists.txt'
44
+ - 'csrc/**'
39
45
 
40
46
  jobs:
41
47
  build:
@@ -63,16 +69,18 @@ jobs:
63
69
  # Note for test case
64
70
  # https://github.com/marketplace/actions/docker-metadata-action#typeref
65
71
  # 1. branch job pulish per main/*-dev branch commits
66
- # 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine
72
+ # 2. main and dev pull_request is build only, so the tag pr-N-310p-openeuler is fine
67
73
  # 3. only pep440 matched tag will be published:
68
- # - v0.7.1 --> v0.7.1-openeuler, latest
69
- # - pre/post/dev: v0.7.1rc1-openeuler/v0.7.1rc1-openeuler/v0.7.1rc1.dev1-openeuler/v0.7.1.post1-openeuler, no latest
74
+ # - v0.7.1 --> v0.7.1-310p-openeuler
75
+ # - pre/post/dev: v0.7.1rc1-310p-openeuler/v0.7.1rc1-310p-openeuler/v0.7.1rc1.dev1-310p-openeuler/v0.7.1.post1-310p-openeuler, no latest
70
76
  # which follow the rule from vLLM with prefix v
71
77
  # TODO(yikun): the post release might be considered as latest release
72
78
  tags: |
73
79
  type=ref,event=branch,suffix=-310p-openeuler
74
- type=ref,event=pr,suffix=-openeuler
80
+ type=ref,event=pr,suffix=-310p-openeuler
75
81
  type=pep440,pattern={{raw}},suffix=-310p-openeuler
82
+ flavor:
83
+ latest=false
76
84
 
77
85
  - name: Free up disk space
78
86
  uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
@@ -112,3 +120,4 @@ jobs:
112
120
  file: Dockerfile.310p.openEuler
113
121
  build-args: |
114
122
  PIP_INDEX_URL=https://pypi.org/simple
123
+ provenance: false
@@ -6,10 +6,10 @@ name: 'image / Ubuntu / 310p'
6
6
  # - push: ${{ github.event_name != 'pull_request' }} ==> false
7
7
  # 2. branches push trigger image publish
8
8
  # - is for branch/dev/nightly image
9
- # - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev
9
+ # - commits are merge into main/*-dev ==> vllm-ascend:main-310p / vllm-ascend:*-dev-310p
10
10
  # 3. tags push trigger image publish
11
11
  # - is for final release image
12
- # - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3|latest / vllm-ascend:v1.2.3rc1
12
+ # - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-310p / vllm-ascend:v1.2.3rc1-310p
13
13
  on:
14
14
  pull_request:
15
15
  branches:
@@ -36,6 +36,12 @@ on:
36
36
  - '.github/workflows/image_310p_ubuntu.yml'
37
37
  - 'Dockerfile.310p'
38
38
  - 'vllm_ascend/**'
39
+ - 'setup.py'
40
+ - 'pyproject.toml'
41
+ - 'requirements.txt'
42
+ - 'cmake/**'
43
+ - 'CMakeLists.txt'
44
+ - 'csrc/**'
39
45
  jobs:
40
46
 
41
47
  build:
@@ -61,14 +67,16 @@ jobs:
61
67
  # 1. branch job pulish per main/*-dev branch commits
62
68
  # 2. main and dev pull_request is build only, so the tag pr-N is fine
63
69
  # 3. only pep440 matched tag will be published:
64
- # - v0.7.1 --> v0.7.1, latest
65
- # - pre/post/dev: v0.7.1rc1/v0.7.1rc1/v0.7.1rc1.dev1/v0.7.1.post1, no latest
70
+ # - v0.7.1 --> v0.7.1-310p
71
+ # - pre/post/dev: v0.7.1rc1-310p/v0.7.1rc1-310p/v0.7.1rc1.dev1-310p/v0.7.1.post1-310p, no latest
66
72
  # which follow the rule from vLLM with prefix v
67
73
  # TODO(yikun): the post release might be considered as latest release
68
74
  tags: |
69
75
  type=ref,event=branch,suffix=-310p
70
76
  type=ref,event=pr,suffix=-310p
71
77
  type=pep440,pattern={{raw}},suffix=-310p
78
+ flavor:
79
+ latest=false
72
80
 
73
81
  - name: Free up disk space
74
82
  uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
@@ -108,3 +116,4 @@ jobs:
108
116
  tags: ${{ steps.meta.outputs.tags }}
109
117
  build-args: |
110
118
  PIP_INDEX_URL=https://pypi.org/simple
119
+ provenance: false