vllm-ascend 0.9.1rc2__tar.gz → 0.10.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (569) hide show
  1. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/Dockerfile.buildwheel +1 -1
  2. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/110-user-story.yml +1 -1
  3. vllm_ascend-0.10.0rc1/.github/ISSUE_TEMPLATE/900-release-checklist.yml +100 -0
  4. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -1
  5. vllm_ascend-0.10.0rc1/.github/actionlint.yaml +13 -0
  6. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/dependabot.yml +3 -0
  7. vllm_ascend-0.10.0rc1/.github/format_pr_body.sh +59 -0
  8. vllm_ascend-0.10.0rc1/.github/workflows/accuracy_test.yaml +321 -0
  9. vllm_ascend-0.10.0rc1/.github/workflows/format_pr_body.yaml +63 -0
  10. vllm_ascend-0.10.0rc1/.github/workflows/image_310p_openeuler.yml +123 -0
  11. vllm_ascend-0.10.0rc1/.github/workflows/image_310p_ubuntu.yml +119 -0
  12. vllm_ascend-0.10.0rc1/.github/workflows/image_a3_openeuler.yml +123 -0
  13. vllm_ascend-0.10.0rc1/.github/workflows/image_a3_ubuntu.yml +119 -0
  14. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/image_openeuler.yml +27 -10
  15. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/image_ubuntu.yml +20 -5
  16. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/nightly_benchmarks.yaml +23 -12
  17. vllm_ascend-0.10.0rc1/.github/workflows/pre-commit.yml +37 -0
  18. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/release_code.yml +0 -12
  19. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/release_whl.yml +11 -16
  20. vllm_ascend-0.10.0rc1/.github/workflows/reminder_comment.yml +26 -0
  21. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/vllm_ascend_doctest.yaml +13 -25
  22. vllm_ascend-0.10.0rc1/.github/workflows/vllm_ascend_test.yaml +292 -0
  23. vllm_ascend-0.10.0rc1/.github/workflows/vllm_ascend_test_310p.yaml +117 -0
  24. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/vllm_ascend_test_long_term.yaml +13 -19
  25. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/vllm_ascend_test_pd.yaml +11 -7
  26. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.gitignore +6 -0
  27. vllm_ascend-0.10.0rc1/.pre-commit-config.yaml +147 -0
  28. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/CODE_OF_CONDUCT.md +0 -1
  29. vllm_ascend-0.10.0rc1/CONTRIBUTING.md +3 -0
  30. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/Dockerfile +4 -3
  31. vllm_ascend-0.10.0rc1/Dockerfile.310p +61 -0
  32. vllm_ascend-0.10.0rc1/Dockerfile.310p.openEuler +58 -0
  33. vllm_ascend-0.10.0rc1/Dockerfile.a3 +60 -0
  34. vllm_ascend-0.10.0rc1/Dockerfile.a3.openEuler +57 -0
  35. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/Dockerfile.openEuler +4 -3
  36. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/PKG-INFO +20 -9
  37. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/README.md +19 -8
  38. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/README.zh.md +17 -7
  39. vllm_ascend-0.10.0rc1/benchmarks/README.md +175 -0
  40. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/benchmarks/ops/ben_vocabparallelembedding.py +48 -34
  41. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/benchmarks/requirements-bench.txt +0 -1
  42. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/benchmarks/scripts/convert_json_to_markdown.py +54 -49
  43. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/benchmarks/scripts/perf_result_template.md +1 -1
  44. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/benchmarks/scripts/run-performance-benchmarks.sh +20 -22
  45. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/benchmarks/tests/serving-tests.json +3 -3
  46. vllm_ascend-0.10.0rc1/codecov.yml +30 -0
  47. vllm_ascend-0.10.0rc1/csrc/kernels/bgmv_expand.cpp +369 -0
  48. vllm_ascend-0.10.0rc1/csrc/kernels/bgmv_shrink.cpp +252 -0
  49. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +63 -30
  50. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/csrc/kernels/pos_encoding_kernels.cpp +15 -5
  51. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/csrc/kernels/utils.h +3 -1
  52. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/csrc/ops.h +28 -12
  53. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/csrc/torch_binding.cpp +90 -90
  54. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/Makefile +4 -0
  55. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/README.md +6 -5
  56. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/requirements-docs.txt +1 -0
  57. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/requirements-test.txt +1 -1
  58. vllm_ascend-0.10.0rc1/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  59. vllm_ascend-0.10.0rc1/docs/source/assets/multi_node_dp_kimi.png +0 -0
  60. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/community/contributors.md +23 -1
  61. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/community/versioning_policy.md +3 -0
  62. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/conf.py +8 -6
  63. vllm_ascend-0.10.0rc1/docs/source/developer_guide/evaluation/using_lm_eval.md +300 -0
  64. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/faqs.md +6 -3
  65. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/installation.md +3 -2
  66. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +1647 -0
  67. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +204 -0
  68. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +103 -0
  69. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +87 -0
  70. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +624 -0
  71. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +187 -0
  72. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +237 -0
  73. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +26 -0
  74. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +26 -0
  75. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +112 -0
  76. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +65 -0
  77. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +83 -0
  78. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +33 -0
  79. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +248 -0
  80. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +333 -0
  81. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +29 -0
  82. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +32 -0
  83. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +26 -0
  84. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +88 -0
  85. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +81 -0
  86. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +479 -0
  87. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/index.po +79 -0
  88. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +293 -0
  89. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +149 -0
  90. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +29 -0
  91. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +192 -0
  92. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +62 -0
  93. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +86 -0
  94. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +82 -0
  95. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +71 -0
  96. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +110 -0
  97. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +107 -0
  98. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +77 -0
  99. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +99 -0
  100. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +70 -0
  101. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +290 -0
  102. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +28 -0
  103. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +30 -0
  104. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +121 -0
  105. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +30 -0
  106. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +58 -0
  107. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +183 -0
  108. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +156 -0
  109. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +220 -0
  110. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +1660 -0
  111. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +30 -0
  112. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +264 -0
  113. vllm_ascend-0.10.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +214 -0
  114. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/quick_start.md +4 -1
  115. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/tutorials/index.md +6 -0
  116. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/tutorials/multi_node.md +7 -6
  117. vllm_ascend-0.10.0rc1/docs/source/tutorials/multi_node_kimi.md +153 -0
  118. vllm_ascend-0.10.0rc1/docs/source/tutorials/multi_npu_moge.md +242 -0
  119. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/tutorials/multi_npu_quantization.md +1 -1
  120. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/tutorials/multi_npu_qwen3_moe.md +2 -1
  121. vllm_ascend-0.10.0rc1/docs/source/tutorials/single_node_300i.md +406 -0
  122. vllm_ascend-0.10.0rc1/docs/source/tutorials/single_npu_audio.md +122 -0
  123. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/tutorials/single_npu_multimodal.md +2 -2
  124. vllm_ascend-0.10.0rc1/docs/source/tutorials/single_npu_qwen3_embedding.md +99 -0
  125. vllm_ascend-0.10.0rc1/docs/source/tutorials/single_npu_qwen3_quantization.md +131 -0
  126. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/feature_guide/quantization.md +33 -33
  127. vllm_ascend-0.10.0rc1/docs/source/user_guide/release_notes.md +498 -0
  128. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/support_matrix/supported_features.md +1 -7
  129. vllm_ascend-0.10.0rc1/docs/source/user_guide/support_matrix/supported_models.md +79 -0
  130. {vllm_ascend-0.9.1rc2/examples/disaggregate_prefill_v1 → vllm_ascend-0.10.0rc1/examples/disaggregated_prefill_v1}/gen_ranktable.py +6 -4
  131. {vllm_ascend-0.9.1rc2/examples/disaggregate_prefill_v1 → vllm_ascend-0.10.0rc1/examples/disaggregated_prefill_v1}/gen_ranktable.sh +1 -1
  132. {vllm_ascend-0.9.1rc2/examples/disaggregate_prefill_v1 → vllm_ascend-0.10.0rc1/examples/disaggregated_prefill_v1}/load_balance_proxy_server_example.py +83 -0
  133. vllm_ascend-0.10.0rc1/examples/eplb/eplb_deepseek.py +205 -0
  134. vllm_ascend-0.10.0rc1/examples/eplb/eplb_strategy.py +186 -0
  135. vllm_ascend-0.9.1rc2/examples/dp_offline/data_parallel.py → vllm_ascend-0.10.0rc1/examples/offline_data_parallel.py +64 -33
  136. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/examples/offline_disaggregated_prefill_npu.py +18 -11
  137. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/examples/offline_dualbatch_overlap_npu.py +2 -1
  138. vllm_ascend-0.10.0rc1/examples/offline_embed.py +58 -0
  139. vllm_ascend-0.10.0rc1/examples/offline_external_launcher.py +287 -0
  140. vllm_ascend-0.10.0rc1/examples/offline_inference_audio_language.py +105 -0
  141. vllm_ascend-0.9.1rc2/examples/offline_distributed_inference_npu.py → vllm_ascend-0.10.0rc1/examples/offline_inference_npu.py +32 -25
  142. vllm_ascend-0.9.1rc2/examples/offline_inference_npu_v1.py → vllm_ascend-0.10.0rc1/examples/offline_inference_npu_tp2.py +10 -4
  143. vllm_ascend-0.10.0rc1/examples/offline_inference_sleep_mode_npu.py +57 -0
  144. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/examples/prompt_embedding_inference.py +5 -0
  145. vllm_ascend-0.10.0rc1/examples/run_dp_server.sh +32 -0
  146. vllm_ascend-0.10.0rc1/format.sh +44 -0
  147. vllm_ascend-0.10.0rc1/pyproject.toml +36 -0
  148. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/requirements-dev.txt +10 -3
  149. vllm_ascend-0.10.0rc1/requirements-lint.txt +9 -0
  150. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/requirements.txt +7 -8
  151. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/setup.py +30 -1
  152. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/e2e/common.sh +24 -1
  153. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/conftest.py +159 -6
  154. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/e2e/doctests/001-quickstart-test.sh +11 -2
  155. vllm_ascend-0.10.0rc1/tests/e2e/doctests/002-pip-binary-installation-test.sh +62 -0
  156. vllm_ascend-0.10.0rc1/tests/e2e/long_term/accuracy/accuracy_multicard.py +167 -0
  157. vllm_ascend-0.9.1rc2/tests/long_term/test_accuracy.py → vllm_ascend-0.10.0rc1/tests/e2e/long_term/accuracy/accuracy_singlecard.py +15 -11
  158. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_data_parallel.py +73 -0
  159. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/multicard/test_dynamic_npugraph_batchsize.py +19 -17
  160. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_expert_parallel.py +30 -0
  161. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_external_launcher.py +149 -0
  162. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_fused_moe_allgather_ep.py +88 -0
  163. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/multicard/test_ilama_lora_tp2.py +5 -4
  164. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_offline_inference_310p.py +62 -0
  165. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/multicard/test_offline_inference_distributed.py +117 -87
  166. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_pipeline_parallel.py +47 -0
  167. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_prefix_caching.py +146 -0
  168. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/multicard/test_pyhccl_distributed.py +35 -24
  169. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_qwen3_moe.py +74 -0
  170. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_torchair_graph_mode.py +164 -0
  171. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/e2e/pd_disaggreate/run_edge_case_test.sh +1 -1
  172. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +2 -0
  173. vllm_ascend-0.10.0rc1/tests/e2e/prompts/example.txt +8 -0
  174. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/e2e/run_doctests.sh +6 -0
  175. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/models/configs/Qwen2.5-VL-7B-Instruct.yaml +8 -0
  176. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/models/configs/Qwen3-30B-A3B.yaml +18 -0
  177. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/models/configs/Qwen3-8B-Base.yaml +13 -0
  178. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/models/configs/accuracy.txt +3 -0
  179. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/models/conftest.py +73 -0
  180. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/models/report_template.md +24 -0
  181. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/models/test_lm_eval_correctness.py +148 -0
  182. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/ops/test_bgmv_expand.py +41 -0
  183. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/ops/test_bgmv_shrink.py +40 -0
  184. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/ops/test_fused_moe.py +0 -3
  185. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +37 -0
  186. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/ops/test_rotary_embedding.py +0 -67
  187. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e/singlecard}/ops/test_vocabparallelembedding.py +5 -2
  188. vllm_ascend-0.9.1rc2/examples/offline_inference_npu.py → vllm_ascend-0.10.0rc1/tests/e2e/singlecard/quant/test_w8a8.py +20 -17
  189. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/sample/test_rejection_sampler.py +30 -42
  190. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +90 -0
  191. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +153 -0
  192. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_aclgraph.py +94 -0
  193. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_ascend_scheduler.py +88 -0
  194. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/test_camem.py +1 -1
  195. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_chunked.py +67 -0
  196. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_embedding.py +68 -0
  197. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/test_guided_decoding.py +12 -38
  198. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/test_ilama_lora.py +4 -4
  199. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/test_offline_inference.py +67 -34
  200. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_offline_inference_310p.py +72 -0
  201. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/test_sampler.py +109 -147
  202. vllm_ascend-0.10.0rc1/tests/e2e/utils.py +106 -0
  203. vllm_ascend-0.10.0rc1/tests/ut/attention/test_attention_mask.py +156 -0
  204. vllm_ascend-0.10.0rc1/tests/ut/attention/test_attention_v1.py +503 -0
  205. vllm_ascend-0.10.0rc1/tests/ut/attention/test_mla_v1.py +692 -0
  206. vllm_ascend-0.10.0rc1/tests/ut/base.py +44 -0
  207. vllm_ascend-0.10.0rc1/tests/ut/conftest.py +26 -0
  208. vllm_ascend-0.10.0rc1/tests/ut/core/test_schedule_config.py +117 -0
  209. vllm_ascend-0.10.0rc1/tests/ut/core/test_scheduler.py +718 -0
  210. vllm_ascend-0.10.0rc1/tests/ut/device_allocator/test_camem.py +188 -0
  211. vllm_ascend-0.10.0rc1/tests/ut/distributed/device_communicators/test_pyhccl.py +84 -0
  212. vllm_ascend-0.10.0rc1/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +173 -0
  213. vllm_ascend-0.10.0rc1/tests/ut/distributed/test_distributed_tensor_parallel.py +139 -0
  214. vllm_ascend-0.10.0rc1/tests/ut/fake_weight/config.json +28 -0
  215. vllm_ascend-0.10.0rc1/tests/ut/kv_connector/test_llmdatadist_connector.py +96 -0
  216. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +64 -11
  217. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +19 -16
  218. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/ut/kv_connector/utils.py +22 -4
  219. vllm_ascend-0.10.0rc1/tests/ut/models/test_deepseek_mtp.py +180 -0
  220. vllm_ascend-0.10.0rc1/tests/ut/models/test_deepseek_v2.py +319 -0
  221. vllm_ascend-0.10.0rc1/tests/ut/models/test_qwen2_5_vl.py +424 -0
  222. vllm_ascend-0.10.0rc1/tests/ut/models/test_qwen2_5_vl_without_padding.py +398 -0
  223. vllm_ascend-0.10.0rc1/tests/ut/models/test_qwen2_vl.py +200 -0
  224. vllm_ascend-0.10.0rc1/tests/ut/models/test_qwen3_moe.py +46 -0
  225. vllm_ascend-0.10.0rc1/tests/ut/multistream/test_base.py +32 -0
  226. vllm_ascend-0.10.0rc1/tests/ut/multistream/test_decorator.py +47 -0
  227. vllm_ascend-0.10.0rc1/tests/ut/multistream/test_layers.py +198 -0
  228. vllm_ascend-0.10.0rc1/tests/ut/multistream/test_metadata.py +246 -0
  229. vllm_ascend-0.10.0rc1/tests/ut/multistream/test_ms_split.py +147 -0
  230. vllm_ascend-0.10.0rc1/tests/ut/ops/expert_map.json +17 -0
  231. vllm_ascend-0.10.0rc1/tests/ut/ops/test_activation.py +61 -0
  232. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/ut/ops/test_expert_load_balancer.py +25 -31
  233. vllm_ascend-0.10.0rc1/tests/ut/ops/test_fused_ops.py +377 -0
  234. vllm_ascend-0.10.0rc1/tests/ut/ops/test_rotary_embedding.py +314 -0
  235. {vllm_ascend-0.9.1rc2/tests/ut → vllm_ascend-0.10.0rc1/tests/ut/ops}/test_token_dispatcher.py +10 -14
  236. vllm_ascend-0.10.0rc1/tests/ut/ops/test_vocab_parallel_embedding.py +299 -0
  237. vllm_ascend-0.10.0rc1/tests/ut/patch/worker/patch_common/test_patch_distributed.py +112 -0
  238. vllm_ascend-0.10.0rc1/tests/ut/patch/worker/patch_common/test_patch_linear.py +167 -0
  239. vllm_ascend-0.10.0rc1/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +77 -0
  240. vllm_ascend-0.10.0rc1/tests/ut/quantization/test_func_wrapper.py +134 -0
  241. vllm_ascend-0.10.0rc1/tests/ut/quantization/test_quant_config.py +232 -0
  242. vllm_ascend-0.10.0rc1/tests/ut/quantization/test_quantizer.py +145 -0
  243. vllm_ascend-0.10.0rc1/tests/ut/quantization/test_w4a8_dynamic.py +109 -0
  244. vllm_ascend-0.10.0rc1/tests/ut/quantization/test_w8a8.py +906 -0
  245. vllm_ascend-0.10.0rc1/tests/ut/quantization/test_w8a8_dynamic.py +75 -0
  246. vllm_ascend-0.10.0rc1/tests/ut/sample/test_rejection_sampler.py +201 -0
  247. vllm_ascend-0.10.0rc1/tests/ut/sample/test_sampler.py +32 -0
  248. vllm_ascend-0.10.0rc1/tests/ut/test_ascend_config.py +306 -0
  249. vllm_ascend-0.10.0rc1/tests/ut/test_envs.py +61 -0
  250. vllm_ascend-0.10.0rc1/tests/ut/test_platform.py +594 -0
  251. vllm_ascend-0.10.0rc1/tests/ut/test_utils.py +416 -0
  252. vllm_ascend-0.10.0rc1/tests/ut/torchair/test_utils.py +28 -0
  253. vllm_ascend-0.10.0rc1/tests/ut/worker/test_input_batch.py +161 -0
  254. vllm_ascend-0.10.0rc1/tests/ut/worker/test_worker_v1.py +1 -0
  255. vllm_ascend-0.10.0rc1/tools/check_python_src_init.py +76 -0
  256. vllm_ascend-0.10.0rc1/tools/enforce_regex_import.py +104 -0
  257. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tools/mypy.sh +5 -1
  258. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tools/shellcheck.sh +4 -0
  259. vllm_ascend-0.10.0rc1/typos.toml +177 -0
  260. vllm_ascend-0.10.0rc1/vllm_ascend/__init__.py +27 -0
  261. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/_version.py +2 -2
  262. vllm_ascend-0.10.0rc1/vllm_ascend/ascend_config.py +183 -0
  263. vllm_ascend-0.10.0rc1/vllm_ascend/ascend_forward_context.py +114 -0
  264. vllm_ascend-0.10.0rc1/vllm_ascend/attention/attention_mask.py +104 -0
  265. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/attention/attention_v1.py +137 -152
  266. vllm_ascend-0.10.0rc1/vllm_ascend/attention/attention_v1_torchair.py +496 -0
  267. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/attention/mla_v1.py +91 -95
  268. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/compilation/piecewise_backend.py +0 -57
  269. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/core/scheduler.py +9 -62
  270. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/device_allocator/camem.py +4 -4
  271. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/distributed/__init__.py +0 -8
  272. vllm_ascend-0.10.0rc1/vllm_ascend/distributed/communication_op.py +25 -0
  273. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/distributed/communicator.py +21 -0
  274. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +20 -49
  275. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/distributed/parallel_state.py +6 -7
  276. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/envs.py +27 -37
  277. vllm_ascend-0.10.0rc1/vllm_ascend/lora/punica_wrapper/lora_ops.py +112 -0
  278. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/lora/punica_wrapper/punica_npu.py +32 -14
  279. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/models/__init__.py +9 -14
  280. vllm_ascend-0.10.0rc1/vllm_ascend/models/deepseek_dbo.py +1046 -0
  281. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/models/deepseek_mtp.py +11 -13
  282. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/models/deepseek_v2.py +294 -206
  283. vllm_ascend-0.10.0rc1/vllm_ascend/models/deepseek_v3.py +27 -0
  284. vllm_ascend-0.10.0rc1/vllm_ascend/models/pangu_moe.py +1117 -0
  285. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/models/qwen2_5_vl.py +21 -150
  286. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/models/qwen2_5_vl_without_padding.py +28 -22
  287. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/models/qwen2_vl.py +2 -2
  288. vllm_ascend-0.10.0rc1/vllm_ascend/models/qwen3.py +156 -0
  289. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/models/qwen3_moe.py +139 -19
  290. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/multistream/base.py +0 -2
  291. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/multistream/decorator.py +0 -4
  292. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/multistream/metadata.py +0 -2
  293. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/multistream/ms_split.py +6 -128
  294. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/activation.py +15 -10
  295. vllm_ascend-0.10.0rc1/vllm_ascend/ops/comm_utils.py +62 -0
  296. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/common_fused_moe.py +41 -18
  297. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/fused_moe.py +353 -380
  298. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/layernorm.py +11 -2
  299. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py +0 -125
  300. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/rotary_embedding.py +48 -65
  301. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/sequence_parallel.py +3 -2
  302. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/vocab_parallel_embedding.py +2 -2
  303. vllm_ascend-0.10.0rc1/vllm_ascend/patch/__init__.py +104 -0
  304. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/patch/platform/__init__.py +2 -2
  305. vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/patch_common/patch_distributed.py +115 -0
  306. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/patch/worker/__init__.py +2 -2
  307. {vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0 → vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/patch_0_10_0}/__init__.py +1 -1
  308. vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/patch_0_10_0/patch_sampler_gather_logprobs.py +87 -0
  309. vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/patch_common/__init__.py +20 -0
  310. vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/patch_common/patch_linear.py +145 -0
  311. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/platform.py +41 -52
  312. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/quantization/func_wrapper.py +1 -0
  313. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/quantization/quant_config.py +9 -34
  314. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/quantization/quantizer.py +24 -12
  315. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/quantization/w4a8_dynamic.py +18 -15
  316. vllm_ascend-0.10.0rc1/vllm_ascend/quantization/w8a8.py +767 -0
  317. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/quantization/w8a8_dynamic.py +201 -223
  318. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/sample/rejection_sampler.py +49 -103
  319. vllm_ascend-0.10.0rc1/vllm_ascend/sample/sampler.py +65 -0
  320. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/conftest.py → vllm_ascend-0.10.0rc1/vllm_ascend/torchair/torchair_model_runner.py +11 -10
  321. vllm_ascend-0.10.0rc1/vllm_ascend/torchair/torchair_worker.py +61 -0
  322. vllm_ascend-0.10.0rc1/vllm_ascend/torchair/utils.py +98 -0
  323. vllm_ascend-0.10.0rc1/vllm_ascend/utils.py +507 -0
  324. vllm_ascend-0.10.0rc1/vllm_ascend/worker/eagle_proposer_v1.py +384 -0
  325. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/worker/model_runner_v1.py +1063 -591
  326. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/worker/mtp_proposer_v1.py +40 -77
  327. vllm_ascend-0.10.0rc1/vllm_ascend/worker/npu_input_batch.py +758 -0
  328. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/worker/worker_v1.py +83 -68
  329. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend.egg-info/PKG-INFO +20 -9
  330. vllm_ascend-0.10.0rc1/vllm_ascend.egg-info/SOURCES.txt +447 -0
  331. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend.egg-info/requires.txt +4 -6
  332. vllm_ascend-0.9.1rc2/.github/actionlint.yaml +0 -8
  333. vllm_ascend-0.9.1rc2/.github/workflows/accuracy_report.yaml +0 -202
  334. vllm_ascend-0.9.1rc2/.github/workflows/accuracy_test.yaml +0 -254
  335. vllm_ascend-0.9.1rc2/.github/workflows/shellcheck.yml +0 -49
  336. vllm_ascend-0.9.1rc2/.github/workflows/vllm_ascend_test.yaml +0 -242
  337. vllm_ascend-0.9.1rc2/benchmarks/README.md +0 -57
  338. vllm_ascend-0.9.1rc2/benchmarks/scripts/patch_benchmark_dataset.py +0 -68
  339. vllm_ascend-0.9.1rc2/benchmarks/scripts/run_accuracy.py +0 -226
  340. vllm_ascend-0.9.1rc2/csrc/kernels/advance_step.cpp +0 -241
  341. vllm_ascend-0.9.1rc2/docs/source/assets/multi_node_dp.png +0 -0
  342. vllm_ascend-0.9.1rc2/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -63
  343. vllm_ascend-0.9.1rc2/docs/source/user_guide/release_notes.md +0 -306
  344. vllm_ascend-0.9.1rc2/docs/source/user_guide/support_matrix/supported_models.md +0 -53
  345. vllm_ascend-0.9.1rc2/examples/disaggregated_prefill/disaggregated_prefill_offline.py +0 -138
  346. vllm_ascend-0.9.1rc2/examples/disaggregated_prefill/dp_proxy.py +0 -463
  347. vllm_ascend-0.9.1rc2/examples/disaggregated_prefill/find_device_ips.py +0 -69
  348. vllm_ascend-0.9.1rc2/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py +0 -193
  349. vllm_ascend-0.9.1rc2/examples/disaggregated_prefill/run_decode_server.sh +0 -37
  350. vllm_ascend-0.9.1rc2/examples/disaggregated_prefill/run_prefill_server.sh +0 -37
  351. vllm_ascend-0.9.1rc2/examples/dp_offline/run_dp.sh +0 -28
  352. vllm_ascend-0.9.1rc2/examples/eplb_generate_map.py +0 -77
  353. vllm_ascend-0.9.1rc2/examples/external_online_dp/README.md +0 -0
  354. vllm_ascend-0.9.1rc2/examples/external_online_dp/launch_dp_program.py +0 -34
  355. vllm_ascend-0.9.1rc2/examples/external_online_dp/run_dp_template.sh +0 -51
  356. vllm_ascend-0.9.1rc2/examples/offline_inference_audio_language.py +0 -126
  357. vllm_ascend-0.9.1rc2/examples/offline_multi_step_custom_ops.py +0 -50
  358. vllm_ascend-0.9.1rc2/examples/run_dp_attention_etp16_benmark.sh +0 -56
  359. vllm_ascend-0.9.1rc2/examples/run_dp_server.sh +0 -33
  360. vllm_ascend-0.9.1rc2/examples/run_dp_with_cached_graph_etp16.sh +0 -25
  361. vllm_ascend-0.9.1rc2/format.sh +0 -343
  362. vllm_ascend-0.9.1rc2/pyproject.toml +0 -25
  363. vllm_ascend-0.9.1rc2/pytest.ini +0 -68
  364. vllm_ascend-0.9.1rc2/requirements-lint.txt +0 -16
  365. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/e2e/conftest.py +0 -212
  366. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/e2e/test_eagle_correctness.py +0 -344
  367. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/e2e/test_medusa_correctness.py +0 -445
  368. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/e2e/test_mlp_correctness.py +0 -560
  369. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/e2e/test_mtp_correctness.py +0 -455
  370. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/e2e/test_ngram_correctness.py +0 -404
  371. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/test_dynamic_spec_decode.py +0 -105
  372. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/test_multi_step_worker.py +0 -846
  373. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/test_ngram_worker.py +0 -237
  374. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/test_spec_decode_worker.py +0 -958
  375. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/test_utils.py +0 -165
  376. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/utils.py +0 -317
  377. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v1/test_v1_mtp_correctness.py +0 -157
  378. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v1/test_v1_spec_decode.py +0 -155
  379. vllm_ascend-0.9.1rc2/tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py +0 -71
  380. vllm_ascend-0.9.1rc2/tests/multicard/test_data_parallel.py +0 -66
  381. vllm_ascend-0.9.1rc2/tests/multicard/test_model_qwen3_w4a8.py +0 -65
  382. vllm_ascend-0.9.1rc2/tests/multicard/test_multimodal_context_parallel.py +0 -82
  383. vllm_ascend-0.9.1rc2/tests/multicard/test_torchair_graph_mode.py +0 -83
  384. vllm_ascend-0.9.1rc2/tests/multicard/test_w4a8_deepseek.py +0 -67
  385. vllm_ascend-0.9.1rc2/tests/singlecard/core/test_ascend_scheduler.py +0 -792
  386. vllm_ascend-0.9.1rc2/tests/singlecard/core/test_ascend_scheduler_e2e.py +0 -40
  387. vllm_ascend-0.9.1rc2/tests/singlecard/ops/test_multi_step.py +0 -190
  388. vllm_ascend-0.9.1rc2/tests/singlecard/test_aclgraph.py +0 -118
  389. vllm_ascend-0.9.1rc2/tests/singlecard/test_ascend_config.py +0 -233
  390. vllm_ascend-0.9.1rc2/tests/singlecard/test_chunked.py +0 -74
  391. vllm_ascend-0.9.1rc2/tests/singlecard/test_prompt_embedding.py +0 -259
  392. vllm_ascend-0.9.1rc2/tests/singlecard/test_scheduler.py +0 -379
  393. vllm_ascend-0.9.1rc2/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -42
  394. vllm_ascend-0.9.1rc2/tests/ut/patch/worker/patch_common/test_patch_sampler.py +0 -44
  395. vllm_ascend-0.9.1rc2/tests/ut/test_distributed_tensor_parallel.py +0 -139
  396. vllm_ascend-0.9.1rc2/tests/utils.py +0 -199
  397. vllm_ascend-0.9.1rc2/vllm_ascend/__init__.py +0 -38
  398. vllm_ascend-0.9.1rc2/vllm_ascend/ascend_config.py +0 -236
  399. vllm_ascend-0.9.1rc2/vllm_ascend/ascend_forward_context.py +0 -137
  400. vllm_ascend-0.9.1rc2/vllm_ascend/attention/attention.py +0 -1292
  401. vllm_ascend-0.9.1rc2/vllm_ascend/attention/utils.py +0 -23
  402. vllm_ascend-0.9.1rc2/vllm_ascend/cpu_binding.py +0 -329
  403. vllm_ascend-0.9.1rc2/vllm_ascend/distributed/context_parallel_utils.py +0 -110
  404. vllm_ascend-0.9.1rc2/vllm_ascend/distributed/kv_transfer/simple_buffer.py +0 -209
  405. vllm_ascend-0.9.1rc2/vllm_ascend/distributed/kv_transfer/simple_connector.py +0 -376
  406. vllm_ascend-0.9.1rc2/vllm_ascend/distributed/kv_transfer/simple_pipe.py +0 -209
  407. vllm_ascend-0.9.1rc2/vllm_ascend/distributed/kv_transfer/utils.py +0 -40
  408. vllm_ascend-0.9.1rc2/vllm_ascend/distributed/llmdatadist_connector.py +0 -470
  409. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/adaptor/abstract_adaptor.py +0 -44
  410. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/adaptor/vllm_adaptor.py +0 -212
  411. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +0 -136
  412. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/eplb_utils.py +0 -75
  413. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/eplb_worker.py +0 -442
  414. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy/policy_abstract.py +0 -41
  415. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +0 -388
  416. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +0 -770
  417. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy/policy_factory.py +0 -25
  418. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy/policy_random.py +0 -29
  419. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/eplb_updator.py +0 -222
  420. vllm_ascend-0.9.1rc2/vllm_ascend/models/deepseek_dbo.py +0 -1085
  421. vllm_ascend-0.9.1rc2/vllm_ascend/models/qwen2.py +0 -372
  422. vllm_ascend-0.9.1rc2/vllm_ascend/models/qwen3.py +0 -472
  423. vllm_ascend-0.9.1rc2/vllm_ascend/models/qwen3_dbo.py +0 -552
  424. vllm_ascend-0.9.1rc2/vllm_ascend/ops/comm_utils.py +0 -127
  425. vllm_ascend-0.9.1rc2/vllm_ascend/patch/__init__.py +0 -202
  426. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/__init__.py +0 -25
  427. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/patch_cache_manager.py +0 -13
  428. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/patch_configs.py +0 -77
  429. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/patch_core.py +0 -132
  430. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/patch_core_client.py +0 -26
  431. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/patch_decorator.py +0 -154
  432. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_common/patch_distributed.py +0 -83
  433. vllm_ascend-0.9.1rc2/vllm_ascend/patch/worker/patch_common/__init__.py +0 -26
  434. vllm_ascend-0.9.1rc2/vllm_ascend/patch/worker/patch_common/patch_eagle.py +0 -70
  435. vllm_ascend-0.9.1rc2/vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py +0 -91
  436. vllm_ascend-0.9.1rc2/vllm_ascend/patch/worker/patch_common/patch_sampler.py +0 -106
  437. vllm_ascend-0.9.1rc2/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py +0 -157
  438. vllm_ascend-0.9.1rc2/vllm_ascend/patch/worker/patch_common/patch_utils.py +0 -38
  439. vllm_ascend-0.9.1rc2/vllm_ascend/quantization/w8a8.py +0 -129
  440. vllm_ascend-0.9.1rc2/vllm_ascend/sample/__init__.py +0 -0
  441. vllm_ascend-0.9.1rc2/vllm_ascend/soc_info.py +0 -14
  442. vllm_ascend-0.9.1rc2/vllm_ascend/utils.py +0 -454
  443. vllm_ascend-0.9.1rc2/vllm_ascend/worker/__init__.py +0 -17
  444. vllm_ascend-0.9.1rc2/vllm_ascend/worker/cache_engine.py +0 -83
  445. vllm_ascend-0.9.1rc2/vllm_ascend/worker/draft_model_runner.py +0 -320
  446. vllm_ascend-0.9.1rc2/vllm_ascend/worker/model_runner.py +0 -1611
  447. vllm_ascend-0.9.1rc2/vllm_ascend/worker/multi_step_runner.py +0 -737
  448. vllm_ascend-0.9.1rc2/vllm_ascend/worker/multi_step_worker.py +0 -194
  449. vllm_ascend-0.9.1rc2/vllm_ascend/worker/pooling_model_runner.py +0 -186
  450. vllm_ascend-0.9.1rc2/vllm_ascend/worker/worker.py +0 -570
  451. vllm_ascend-0.9.1rc2/vllm_ascend.egg-info/SOURCES.txt +0 -370
  452. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  453. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  454. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  455. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  456. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  457. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  458. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  459. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  460. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  461. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  462. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/labeler.yml +0 -0
  463. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/label_merge_conflict.yml +0 -0
  464. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/labeler.yml +0 -0
  465. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/matchers/actionlint.json +0 -0
  466. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/matchers/mypy.json +0 -0
  467. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.github/workflows/matchers/ruff.json +0 -0
  468. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/.readthedocs.yaml +0 -0
  469. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/CMakeLists.txt +0 -0
  470. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/DCO +0 -0
  471. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/LICENSE +0 -0
  472. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/benchmarks/tests/latency-tests.json +0 -0
  473. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/benchmarks/tests/throughput-tests.json +0 -0
  474. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/cmake/utils.cmake +0 -0
  475. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/collect_env.py +0 -0
  476. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/csrc/camem_allocator.cpp +0 -0
  477. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/csrc/kernels/types.h +0 -0
  478. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/csrc/utils.h +0 -0
  479. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/_templates/sections/header.html +0 -0
  480. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/community/governance.md +0 -0
  481. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/community/user_stories/index.md +0 -0
  482. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/community/user_stories/llamafactory.md +0 -0
  483. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/contribution/index.md +0 -0
  484. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/contribution/testing.md +0 -0
  485. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -0
  486. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/evaluation/index.md +0 -0
  487. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  488. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  489. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/feature_guide/index.md +0 -0
  490. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/feature_guide/patch.md +0 -0
  491. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -0
  492. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
  493. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/modeling/index.md +0 -0
  494. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/performance/index.md +0 -0
  495. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -0
  496. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/performance/performance_benchmark.md +0 -0
  497. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/developer_guide/performance/profile_execute_duration.md +0 -0
  498. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/index.md +0 -0
  499. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  500. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  501. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/tutorials/multi_npu.md +0 -0
  502. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/tutorials/single_npu.md +0 -0
  503. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/configuration/additional_config.md +0 -0
  504. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/configuration/env_vars.md +0 -0
  505. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/configuration/index.md +0 -0
  506. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
  507. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  508. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/feature_guide/index.md +0 -0
  509. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/feature_guide/lora.md +0 -0
  510. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
  511. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  512. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/docs/source/user_guide/support_matrix/index.md +0 -0
  513. {vllm_ascend-0.9.1rc2/examples/disaggregate_prefill_v1 → vllm_ascend-0.10.0rc1/examples/disaggregated_prefill_v1}/README.md +0 -0
  514. {vllm_ascend-0.9.1rc2/examples/disaggregate_prefill_v1 → vllm_ascend-0.10.0rc1/examples/disaggregated_prefill_v1}/run_server.sh +0 -0
  515. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/mypy.ini +0 -0
  516. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/packages.txt +0 -0
  517. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/setup.cfg +0 -0
  518. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/__init__.py +0 -0
  519. {vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0 → vllm_ascend-0.10.0rc1/tests}/e2e/__init__.py +0 -0
  520. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/model_utils.py +0 -0
  521. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
  522. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  523. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tests/e2e/run_disagg_pd.sh +0 -0
  524. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/__init__.py +0 -0
  525. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/compile/__init__.py +0 -0
  526. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/compile/test_simple.py +0 -0
  527. {vllm_ascend-0.9.1rc2/tests/singlecard/core → vllm_ascend-0.10.0rc1/tests/e2e/singlecard/ops}/__init__.py +0 -0
  528. {vllm_ascend-0.9.1rc2/tests/singlecard/ops → vllm_ascend-0.10.0rc1/tests/e2e/singlecard/sample}/__init__.py +0 -0
  529. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/test_profile_execute_duration.py +0 -0
  530. {vllm_ascend-0.9.1rc2/tests → vllm_ascend-0.10.0rc1/tests/e2e}/singlecard/test_pyhccl.py +0 -0
  531. {vllm_ascend-0.9.1rc2/tests/singlecard/sample → vllm_ascend-0.10.0rc1/tests/ut}/__init__.py +0 -0
  532. {vllm_ascend-0.9.1rc2/vllm_ascend/attention → vllm_ascend-0.10.0rc1/tests/ut/models}/__init__.py +0 -0
  533. {vllm_ascend-0.9.1rc2/vllm_ascend/compilation → vllm_ascend-0.10.0rc1/tests/ut/torchair}/__init__.py +0 -0
  534. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tools/actionlint.sh +0 -0
  535. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tools/check_repo.sh +0 -0
  536. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tools/png-lint.sh +0 -0
  537. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/tools/sphinx-lint.sh +0 -0
  538. {vllm_ascend-0.9.1rc2/vllm_ascend/core → vllm_ascend-0.10.0rc1/vllm_ascend/attention}/__init__.py +0 -0
  539. {vllm_ascend-0.9.1rc2/vllm_ascend/device_allocator → vllm_ascend-0.10.0rc1/vllm_ascend/compilation}/__init__.py +0 -0
  540. {vllm_ascend-0.9.1rc2/vllm_ascend/distributed/device_communicators → vllm_ascend-0.10.0rc1/vllm_ascend/core}/__init__.py +0 -0
  541. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/core/schedule_config.py +0 -0
  542. {vllm_ascend-0.9.1rc2/vllm_ascend/distributed/kv_transfer → vllm_ascend-0.10.0rc1/vllm_ascend/device_allocator}/__init__.py +0 -0
  543. {vllm_ascend-0.9.1rc2/vllm_ascend/eplb → vllm_ascend-0.10.0rc1/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
  544. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  545. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  546. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/distributed/tensor_parallel.py +0 -0
  547. {vllm_ascend-0.9.1rc2/vllm_ascend/eplb/adaptor → vllm_ascend-0.10.0rc1/vllm_ascend/lora}/__init__.py +0 -0
  548. {vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core → vllm_ascend-0.10.0rc1/vllm_ascend/lora/punica_wrapper}/__init__.py +0 -0
  549. {vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy → vllm_ascend-0.10.0rc1/vllm_ascend/multistream}/__init__.py +0 -0
  550. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/multistream/context.py +0 -0
  551. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/multistream/layers.py +0 -0
  552. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/__init__.py +0 -0
  553. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/attention.py +0 -0
  554. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/cache.py +0 -0
  555. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  556. {vllm_ascend-0.9.1rc2/vllm_ascend/lora → vllm_ascend-0.10.0rc1/vllm_ascend/ops/moe_dispatcher}/__init__.py +0 -0
  557. {vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_main → vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/patch_0_10_0}/__init__.py +0 -0
  558. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/patch/platform/patch_common/__init__.py +0 -0
  559. {vllm_ascend-0.9.1rc2/vllm_ascend/patch/worker/patch_0_9_1 → vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/patch_main}/__init__.py +0 -0
  560. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
  561. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
  562. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
  563. {vllm_ascend-0.9.1rc2/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.10.0rc1/vllm_ascend/quantization}/__init__.py +0 -0
  564. {vllm_ascend-0.9.1rc2/vllm_ascend/multistream → vllm_ascend-0.10.0rc1/vllm_ascend/sample}/__init__.py +0 -0
  565. {vllm_ascend-0.9.1rc2/vllm_ascend/ops/moe_dispatcher → vllm_ascend-0.10.0rc1/vllm_ascend/torchair}/__init__.py +0 -0
  566. {vllm_ascend-0.9.1rc2/vllm_ascend/quantization → vllm_ascend-0.10.0rc1/vllm_ascend/worker}/__init__.py +0 -0
  567. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  568. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
  569. {vllm_ascend-0.9.1rc2 → vllm_ascend-0.10.0rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -40,6 +40,6 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
40
40
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
41
41
  cd vllm-ascend && \
42
42
  python3 setup.py bdist_wheel && \
43
- ls -l dist
43
+ ls -l dist
44
44
 
45
45
  CMD ["/bin/bash"]
@@ -1,5 +1,5 @@
1
1
  name: 📚 User Story
2
- description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.org/user_stories/index.html
2
+ description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.io/en/latest/community/user_stories/index.html
3
3
  title: "[User Story]: "
4
4
  labels: ["user-story"]
5
5
 
@@ -0,0 +1,100 @@
1
+ name: Release Checklist
2
+ description: Generate a release checklist issue when prepare a new release.(Used for release team)
3
+ title: "[Release]: Release checklist for v"
4
+
5
+ body:
6
+ - type: textarea
7
+ attributes:
8
+ description: >
9
+ Brief info for the new release.
10
+ label: Release Checklist
11
+ value: >
12
+ **Release Version**:
13
+
14
+ **Release Branch**:
15
+
16
+ **Release Date**:
17
+
18
+ **Release Manager**:
19
+ - type: textarea
20
+ attributes:
21
+ description: >
22
+ Release notes.
23
+ label: Prepare Release Note
24
+ value: >
25
+ - [ ] Create a new issue for release feedback
26
+
27
+ - [ ] Write the release note PR.
28
+
29
+ - [ ] Update the feedback issue link in docs/source/faqs.md
30
+
31
+ - [ ] Add release note to docs/source/user_guide/release_notes.md
32
+
33
+ - [ ] Update version info in docs/source/community/versioning_policy.md
34
+
35
+ - [ ] Update contributor info in docs/source/community/contributors.md
36
+
37
+ - [ ] Update package version in docs/conf.py
38
+ - type: textarea
39
+ attributes:
40
+ description: >
41
+ Make sure the code is merged.
42
+ label: PR need Merge
43
+ value: >
44
+ - [ ] PR link1
45
+
46
+ - [ ] PR link2
47
+
48
+ - [ ] ...
49
+ - type: textarea
50
+ attributes:
51
+ description: >
52
+ Make sure the new Feature/Function is tested
53
+ label: Functional Test
54
+ value: >
55
+ - [ ] Feature1
56
+
57
+ - [ ] Bug1
58
+
59
+ - [ ] ...
60
+ - type: textarea
61
+ attributes:
62
+ description: >
63
+ Make sure the doc is updated.
64
+ label: Doc Test
65
+ value: >
66
+ - [ ] Tutorial is updated.
67
+
68
+ - [ ] User Guide is updated.
69
+
70
+ - [ ] Developer Guide is updated.
71
+ - type: textarea
72
+ attributes:
73
+ description: >
74
+ Make sure the artifacts is ready
75
+ label: Prepare Artifacts
76
+ value: >
77
+ - [ ] Docker image is ready.
78
+
79
+ - [ ] Wheel package is ready.
80
+ - type: textarea
81
+ attributes:
82
+ description: >
83
+ Start to release.
84
+ label: Release Step
85
+ value: >
86
+ - [ ] Release note PR is merged.
87
+
88
+ - [ ] Post the release on GitHub release page.
89
+
90
+ - [ ] Generate official doc page on https://app.readthedocs.org/dashboard/
91
+
92
+ - [ ] Wait for the wheel package to be available on https://pypi.org/project/vllm-ascend
93
+
94
+ - [ ] Wait for the docker image to be available on https://quay.io/ascend/vllm-ascend
95
+
96
+ - [ ] Upload 310p wheel to Github release page
97
+
98
+ - [ ] Broadcast the release news (By message, blog , etc)
99
+
100
+ - [ ] Close this issue
@@ -25,4 +25,3 @@ CI passed with new added/existing test.
25
25
  If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
26
26
  If tests were not added, please describe why they were not added and/or why it was difficult to add.
27
27
  -->
28
-
@@ -0,0 +1,13 @@
1
+ self-hosted-runner:
2
+ # Labels of self-hosted runner in array of strings.
3
+ labels:
4
+ - linux-aarch64-a2-0
5
+ - linux-aarch64-a2-1
6
+ - linux-aarch64-a2-2
7
+ - linux-aarch64-a2-4
8
+ - linux-aarch64-a2-8
9
+ - linux-arm64-npu-static-8
10
+ - linux-aarch64-310p-1
11
+ - linux-aarch64-310p-2
12
+ - linux-aarch64-310p-4
13
+ - ubuntu-24.04-arm
@@ -2,6 +2,9 @@ version: 2
2
2
  updates:
3
3
  - package-ecosystem: "github-actions"
4
4
  directory: "/"
5
+ schedule:
6
+ # Check for updates to GitHub Actions every week
7
+ interval: "weekly"
5
8
  open-pull-requests-limit: 2
6
9
  reviewers:
7
10
  - "Yikun"
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ # Adapted from vllm/.github/scripts/cleanup_pr_body.sh
17
+
18
+ #!/bin/bash
19
+
20
+ set -eux
21
+
22
+ # ensure 2 argument is passed
23
+ if [ "$#" -ne 3 ]; then
24
+ echo "Usage: $0 <pr_number> <vllm_version> <vllm_commit>"
25
+ exit 1
26
+ fi
27
+
28
+ PR_NUMBER=$1
29
+ VLLM_VERSION=$2
30
+ VLLM_COMMIT=$3
31
+ OLD=/tmp/orig_pr_body.txt
32
+ NEW=/tmp/new_pr_body.txt
33
+ FINAL=/tmp/final_pr_body.txt
34
+
35
+ gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
36
+ cp "${OLD}" "${NEW}"
37
+
38
+ # Remove notes in pr description and add vLLM version and commit
39
+ sed -i '/<!--/,/-->/d' "${NEW}"
40
+ sed -i '/- vLLM .*$/d' "${NEW}"
41
+ {
42
+ echo ""
43
+ echo "- vLLM version: $VLLM_VERSION"
44
+ echo "- vLLM main: $VLLM_COMMIT"
45
+ } >> "${NEW}"
46
+
47
+ # Remove redundant empty lines
48
+ uniq "${NEW}" > "${FINAL}"
49
+
50
+ # Run this only if ${NEW} is different than ${OLD}
51
+ if ! cmp -s "${OLD}" "${FINAL}"; then
52
+ echo
53
+ echo "Updating PR body:"
54
+ echo
55
+ cat "${NEW}"
56
+ gh pr edit --body-file "${FINAL}" "${PR_NUMBER}"
57
+ else
58
+ echo "No changes needed"
59
+ fi
@@ -0,0 +1,321 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ # This test will be triggered:
19
+ # 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test'
20
+ # 2. workflow_dispatch with models input
21
+ # See detail rule in strategy.matrix note
22
+ name: Benchmarks / accuracy
23
+
24
+ on:
25
+ schedule:
26
+ # Runs every 6 hours
27
+ - cron: '0 */6 * * *'
28
+ pull_request:
29
+ types: [ labeled ]
30
+ workflow_dispatch:
31
+ inputs:
32
+ vllm-ascend-version:
33
+ description: 'vllm-ascend:'
34
+ required: true
35
+ type: choice
36
+ # Current supported vLLM versions
37
+ options:
38
+ - latest
39
+ - main
40
+ default: main
41
+
42
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
43
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
44
+ # It's used to activate ascend-toolkit environment variables.
45
+ defaults:
46
+ run:
47
+ shell: bash -el {0}
48
+
49
+ # only cancel in-progress runs of the same workflow
50
+ concurrency:
51
+ group: ${{ github.workflow }}-${{ github.ref }}
52
+ cancel-in-progress: true
53
+
54
+ jobs:
55
+ accuracy_tests:
56
+ # test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
57
+ if: >-
58
+ ${{
59
+ contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
60
+ contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
61
+ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
62
+ }}
63
+ runs-on: ${{ matrix.runner }}
64
+ strategy:
65
+ matrix:
66
+ include:
67
+ - model_name: Qwen3-8B-Base
68
+ runner: linux-aarch64-a2-1
69
+ - model_name: Qwen2.5-VL-7B-Instruct
70
+ runner: linux-aarch64-a2-1
71
+ - model_name: Qwen3-30B-A3B
72
+ runner: linux-aarch64-a2-2
73
+ fail-fast: false
74
+
75
+ name: ${{ matrix.model_name }} accuracy
76
+ container:
77
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
78
+ env:
79
+ VLLM_USE_MODELSCOPE: True
80
+ # 1. If version specified (work_dispatch), do specified branch accuracy test
81
+ # 2. If no version (labeled PR), do accuracy test by default ref:
82
+ # The branch, tag or SHA to checkout. When checking out the repository that
83
+ # triggered a workflow, this defaults to the reference or SHA for that event.
84
+ # Otherwise, uses the default branch.
85
+ GHA_VLLM_ASCEND_VERSION: ${{ github.event.inputs.vllm-ascend-version }}
86
+
87
+ steps:
88
+ - name: Checkout repository
89
+ uses: actions/checkout@v4
90
+
91
+ - name: Set model name as output
92
+ id: set_output
93
+ run: |
94
+ echo "model_name=${{ matrix.model_name }}" >> $GITHUB_OUTPUT
95
+
96
+ - name: Config mirrors
97
+ run: |
98
+ sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
99
+ pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
100
+ pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
101
+ apt-get update -y
102
+ apt install git -y
103
+
104
+ - name: Install system dependencies
105
+ run: |
106
+ apt-get -y install `cat packages.txt`
107
+ apt-get -y install gcc g++ cmake libnuma-dev
108
+
109
+ - name: Checkout vllm-project/vllm repo
110
+ uses: actions/checkout@v4
111
+ with:
112
+ repository: vllm-project/vllm
113
+ ref: v0.10.0
114
+ path: ./vllm-empty
115
+
116
+ - name: Install vllm-project/vllm from source
117
+ working-directory: ./vllm-empty
118
+ run: |
119
+ VLLM_TARGET_DEVICE=empty pip install -e .
120
+
121
+ - name: Resolve vllm-ascend version
122
+ run: |
123
+ VERSION_INPUT="${{ github.event.inputs.vllm-ascend-version }}"
124
+
125
+ if [[ "$VERSION_INPUT" == "latest" ]]; then
126
+ TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
127
+ LATEST_TAG=$(echo "$TAGS" | head -n1)
128
+ if [[ -z "$LATEST_TAG" ]]; then
129
+ RESOLVED_VERSION="main"
130
+ else
131
+ RESOLVED_VERSION="$LATEST_TAG"
132
+ fi
133
+ else
134
+ RESOLVED_VERSION="$VERSION_INPUT"
135
+ fi
136
+ echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
137
+
138
+ - name: Checkout vllm-project/vllm-ascend repo
139
+ uses: actions/checkout@v4
140
+ with:
141
+ repository: vllm-project/vllm-ascend
142
+ path: ./vllm-ascend
143
+ ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
144
+
145
+ - name: Install vllm-project/vllm-ascend
146
+ working-directory: ./vllm-ascend
147
+ env:
148
+ PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
149
+ run: |
150
+ pip install -r requirements-dev.txt
151
+ pip install -v -e .
152
+
153
+ - name: Get vLLM commit hash and URL
154
+ working-directory: ./vllm-empty
155
+ run: |
156
+ VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
157
+ echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
158
+
159
+ - name: Get vLLM-Ascend commit hash and URL
160
+ working-directory: ./vllm-ascend
161
+ run: |
162
+ VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
163
+ echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
164
+
165
+ - name: Collect version info
166
+ run: |
167
+ for dir in /usr/local/Ascend/ascend-toolkit/*; do
168
+ dname=$(basename "$dir")
169
+ if [ "$dname" != "latest" ]; then
170
+ TOOLKIT_DIR="$dname"
171
+ break
172
+ fi
173
+ done
174
+ INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
175
+ GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
176
+ | head -n1 \
177
+ | cut -d'=' -f2 \
178
+ | tr -d '"')
179
+ {
180
+ echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
181
+ pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
182
+ pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
183
+ pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
184
+ } >> "$GITHUB_ENV"
185
+
186
+ - name: Run accuracy test
187
+ id: report
188
+ env:
189
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
190
+ VLLM_USE_MODELSCOPE: True
191
+ VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
192
+ VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
193
+ VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
194
+ VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
195
+ CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
196
+ TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
197
+ TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
198
+ run: |
199
+ model_base_name=$(basename ${{ matrix.model_name }})
200
+ markdown_name="${model_base_name}"
201
+ echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
202
+ mkdir -p ./benchmarks/accuracy
203
+ pytest -sv ./tests/e2e/singlecard/models/test_lm_eval_correctness.py \
204
+ --config ./tests/e2e/singlecard/models/configs/${{ matrix.model_name }}.yaml \
205
+ --report_output ./benchmarks/accuracy/${model_base_name}.md
206
+
207
+ - name: Generate step summary
208
+ if: ${{ always() }}
209
+ run: |
210
+ cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
211
+
212
+ - name: Sanitize version string for artifact naming
213
+ run: |
214
+ SAFE_VLLM_ASCEND_VERSION="${GHA_VLLM_ASCEND_VERSION//\//-}"
215
+ echo "SAFE_VLLM_ASCEND_VERSION=$SAFE_VLLM_ASCEND_VERSION" >> "$GITHUB_ENV"
216
+
217
+ - name: Upload Report
218
+ uses: actions/upload-artifact@v4
219
+ with:
220
+ name: "report-${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
221
+ path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
222
+ if-no-files-found: warn
223
+ retention-days: 90
224
+ overwrite: true
225
+
226
+ outputs:
227
+ model_name: ${{ steps.set_output.outputs.model_name }}
228
+
229
+ create_pr:
230
+ runs-on: ubuntu-latest
231
+ needs: accuracy_tests
232
+ if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
233
+ env:
234
+ UPSTREAM_REPO: vllm-project/vllm-ascend
235
+
236
+ steps:
237
+ - name: Checkout repository
238
+ uses: actions/checkout@v4
239
+ with:
240
+ repository: vllm-ascend-ci/vllm-ascend
241
+ token: ${{ secrets.PAT_TOKEN }}
242
+ ref: main
243
+
244
+ - name: Add upstream remote
245
+ run: |
246
+ git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
247
+ git fetch upstream
248
+ git remote -v
249
+
250
+ - name: Set Git user info dynamically
251
+ run: |
252
+ git config user.name "${{ github.actor }}"
253
+ git config user.email "${{ github.actor }}@users.noreply.github.com"
254
+
255
+ - name: Create or switch to branch
256
+ run: |
257
+ TIMESTAMP=$(date +%Y%m%d%H%M%S)
258
+ BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
259
+ echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
260
+ git checkout -B "${BRANCH_NAME}" upstream/${{ github.event.inputs.vllm-ascend-version }}
261
+
262
+ - name: Download only current run reports
263
+ uses: actions/download-artifact@v4
264
+ with:
265
+ path: ./docs/source/developer_guide/evaluation/accuracy_report
266
+ pattern: report-*
267
+ github-token: ${{ secrets.GITHUB_TOKEN }}
268
+ run-id: ${{ github.run_id }}
269
+
270
+ - name: Delete old report
271
+ run: |
272
+ find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
273
+ find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
274
+ find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
275
+
276
+ - name: Update accuracy_report/index.md
277
+ run: |
278
+ REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
279
+ INDEX_MD="$REPORT_DIR/index.md"
280
+ {
281
+ echo "# Accuracy Report"
282
+ echo ""
283
+ echo ":::{toctree}"
284
+ echo ":caption: Accuracy Report"
285
+ echo ":maxdepth: 1"
286
+
287
+ for report in "$REPORT_DIR"/*.md; do
288
+ filename="$(basename "$report" .md)"
289
+ if [ "$filename" != "index" ]; then
290
+ echo "$filename"
291
+ fi
292
+ done
293
+ echo ":::"
294
+ } > "$INDEX_MD"
295
+
296
+ - name: push accuracy report
297
+ env:
298
+ GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
299
+ run: |
300
+ git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
301
+ git commit -s -m "[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}"
302
+ git push -f origin "${{ env.BRANCH_NAME }}"
303
+
304
+ - name: Create PR in upstream via API
305
+ uses: actions/github-script@v7
306
+ with:
307
+ github-token: ${{ secrets.PAT_TOKEN }}
308
+ script: |
309
+ const pr = await github.rest.pulls.create({
310
+ owner: 'vllm-project',
311
+ repo: 'vllm-ascend',
312
+ head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
313
+ base: '${{ github.event.inputs.vllm-ascend-version }}',
314
+ title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
315
+ body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)
316
+
317
+ - [Workflow run][1]
318
+
319
+ [1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
320
+ });
321
+ core.info(`Created PR #${pr.data.number}`);
@@ -0,0 +1,63 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ name: format / pr body
19
+
20
+ on:
21
+ # The PR updated when PR opened and push new commits
22
+ pull_request_target:
23
+ types: [opened, synchronize]
24
+ branches:
25
+ - 'main'
26
+
27
+ permissions:
28
+ pull-requests: write
29
+
30
+ jobs:
31
+ update-description:
32
+ name: update vLLM version
33
+ runs-on: ubuntu-latest
34
+
35
+ steps:
36
+ - name: Checkout vllm-project/vllm repo
37
+ uses: actions/checkout@v4
38
+ with:
39
+ repository: vllm-project/vllm
40
+ path: ./vllm-empty
41
+
42
+ - name: Get vLLM version
43
+ working-directory: ./vllm-empty
44
+ run: |
45
+ VLLM_COMMIT=$(git rev-parse HEAD)
46
+ echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
47
+
48
+ - name: Checkout repository
49
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
50
+
51
+ - name: Set up Python
52
+ uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
53
+
54
+ - name: Get vLLM release version
55
+ run: |
56
+ VLLM_VERSION=$(python3 docs/source/conf.py | jq .ci_vllm_version | tr -d '"')
57
+ echo "VLLM_VERSION=$VLLM_VERSION" >> $GITHUB_ENV
58
+
59
+ - name: Update PR description
60
+ env:
61
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
62
+ run: |
63
+ bash .github/format_pr_body.sh "${{ github.event.number }}" "${{ env.VLLM_VERSION }}" "${{ env.VLLM_COMMIT }}"