vllm-ascend 0.9.0rc2__tar.gz → 0.9.2rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (388) hide show
  1. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/Dockerfile.buildwheel +5 -8
  2. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/110-user-story.yml +1 -1
  3. vllm_ascend-0.9.2rc1/.github/ISSUE_TEMPLATE/900-release-checklist.yml +100 -0
  4. vllm_ascend-0.9.2rc1/.github/format_pr_body.sh +56 -0
  5. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/accuracy_test.yaml +167 -33
  6. vllm_ascend-0.9.2rc1/.github/workflows/format_pr_body.yaml +63 -0
  7. vllm_ascend-0.9.2rc1/.github/workflows/image_310p_openeuler.yml +114 -0
  8. vllm_ascend-0.9.2rc1/.github/workflows/image_310p_ubuntu.yml +110 -0
  9. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/image_openeuler.yml +21 -6
  10. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/image_ubuntu.yml +16 -4
  11. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/nightly_benchmarks.yaml +30 -19
  12. vllm_ascend-0.9.2rc1/.github/workflows/pre-commit.yml +37 -0
  13. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/release_code.yml +0 -12
  14. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/release_whl.yml +43 -20
  15. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/vllm_ascend_doctest.yaml +8 -25
  16. vllm_ascend-0.9.2rc1/.github/workflows/vllm_ascend_test.yaml +326 -0
  17. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/vllm_ascend_test_long_term.yaml +7 -10
  18. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/vllm_ascend_test_pd.yaml +8 -3
  19. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.gitignore +2 -0
  20. vllm_ascend-0.9.2rc1/.pre-commit-config.yaml +141 -0
  21. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/CMakeLists.txt +0 -2
  22. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/Dockerfile +3 -2
  23. vllm_ascend-0.9.2rc1/Dockerfile.310p +61 -0
  24. vllm_ascend-0.9.2rc1/Dockerfile.310p.openEuler +58 -0
  25. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/Dockerfile.openEuler +3 -2
  26. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/PKG-INFO +6 -5
  27. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/README.md +5 -4
  28. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/README.zh.md +5 -4
  29. vllm_ascend-0.9.2rc1/benchmarks/README.md +166 -0
  30. vllm_ascend-0.9.2rc1/benchmarks/ops/ben_vocabparallelembedding.py +158 -0
  31. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/convert_json_to_markdown.py +54 -49
  32. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/patch_benchmark_dataset.py +22 -11
  33. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/run-performance-benchmarks.sh +21 -22
  34. vllm_ascend-0.9.2rc1/benchmarks/scripts/run_accuracy.py +313 -0
  35. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/tests/latency-tests.json +10 -0
  36. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/tests/serving-tests.json +26 -2
  37. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/tests/throughput-tests.json +11 -0
  38. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/__init__.py → vllm_ascend-0.9.2rc1/codecov.yml +14 -2
  39. vllm_ascend-0.9.2rc1/csrc/kernels/get_masked_input_and_mask_kernel.cpp +378 -0
  40. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/kernels/pos_encoding_kernels.cpp +15 -5
  41. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/kernels/utils.h +3 -1
  42. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/ops.h +14 -0
  43. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/torch_binding.cpp +116 -0
  44. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/requirements-test.txt +1 -1
  45. vllm_ascend-0.9.2rc1/docs/source/assets/multi_node_dp.png +0 -0
  46. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/community/contributors.md +18 -0
  47. vllm_ascend-0.9.2rc1/docs/source/community/user_stories/index.md +19 -0
  48. vllm_ascend-0.9.2rc1/docs/source/community/user_stories/llamafactory.md +19 -0
  49. {vllm_ascend-0.9.0rc2/docs/source/developer_guide → vllm_ascend-0.9.2rc1/docs/source/community}/versioning_policy.md +6 -2
  50. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/conf.py +11 -4
  51. vllm_ascend-0.9.0rc2/docs/source/developer_guide/contributing.md → vllm_ascend-0.9.2rc1/docs/source/developer_guide/contribution/index.md +44 -46
  52. vllm_ascend-0.9.2rc1/docs/source/developer_guide/contribution/testing.md +280 -0
  53. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/index.md +2 -9
  54. vllm_ascend-0.9.2rc1/docs/source/developer_guide/feature_guide/index.md +9 -0
  55. vllm_ascend-0.9.2rc1/docs/source/developer_guide/feature_guide/patch.md +82 -0
  56. vllm_ascend-0.9.2rc1/docs/source/developer_guide/modeling/adding_a_new_model.md +258 -0
  57. vllm_ascend-0.9.2rc1/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +3 -0
  58. vllm_ascend-0.9.2rc1/docs/source/developer_guide/modeling/index.md +10 -0
  59. vllm_ascend-0.9.2rc1/docs/source/developer_guide/performance/index.md +8 -0
  60. {vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation → vllm_ascend-0.9.2rc1/docs/source/developer_guide/performance}/profile_execute_duration.md +5 -0
  61. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/faqs.md +51 -11
  62. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/index.md +9 -14
  63. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/installation.md +21 -14
  64. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/quick_start.md +4 -0
  65. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/index.md +5 -0
  66. vllm_ascend-0.9.2rc1/docs/source/tutorials/multi_node.md +197 -0
  67. vllm_ascend-0.9.2rc1/docs/source/tutorials/multi_npu_moge.md +136 -0
  68. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/multi_npu_quantization.md +1 -1
  69. vllm_ascend-0.9.2rc1/docs/source/tutorials/multi_npu_qwen3_moe.md +109 -0
  70. vllm_ascend-0.9.2rc1/docs/source/tutorials/single_node_300i.md +330 -0
  71. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/single_npu.md +68 -3
  72. vllm_ascend-0.9.2rc1/docs/source/tutorials/single_npu_audio.md +122 -0
  73. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/single_npu_multimodal.md +6 -3
  74. vllm_ascend-0.9.2rc1/docs/source/tutorials/single_npu_qwen3_embedding.md +99 -0
  75. {vllm_ascend-0.9.0rc2/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/configuration}/additional_config.md +20 -15
  76. {vllm_ascend-0.9.0rc2/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/configuration}/env_vars.md +1 -1
  77. vllm_ascend-0.9.2rc1/docs/source/user_guide/configuration/index.md +10 -0
  78. {vllm_ascend-0.9.0rc2/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide}/graph_mode.md +10 -14
  79. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  80. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/index.md +13 -0
  81. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/lora.md +8 -0
  82. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/quantization.md +106 -0
  83. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/sleep_mode.md +115 -0
  84. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/structured_output.md +163 -0
  85. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/user_guide/release_notes.md +68 -1
  86. vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix/index.md +10 -0
  87. {vllm_ascend-0.9.0rc2/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix}/supported_models.md +1 -1
  88. vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/find_device_ips.py +69 -0
  89. vllm_ascend-0.9.2rc1/examples/eplb/eplb_deepseek.py +205 -0
  90. vllm_ascend-0.9.2rc1/examples/eplb/eplb_strategy.py +183 -0
  91. vllm_ascend-0.9.2rc1/examples/offline_data_parallel.py +241 -0
  92. vllm_ascend-0.9.2rc1/examples/offline_embed.py +53 -0
  93. vllm_ascend-0.9.2rc1/examples/offline_inference_audio_language.py +84 -0
  94. vllm_ascend-0.9.0rc2/examples/offline_inference_npu.py → vllm_ascend-0.9.2rc1/examples/offline_inference_npu_v0.py +5 -0
  95. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/offline_inference_npu_v1.py +5 -4
  96. vllm_ascend-0.9.2rc1/examples/offline_inference_sleep_mode_npu.py +54 -0
  97. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/offline_multi_step_custom_ops.py +0 -3
  98. vllm_ascend-0.9.2rc1/examples/run_dp_attention_etp16.sh +23 -0
  99. vllm_ascend-0.9.2rc1/examples/run_dp_attention_etp16_benmark.sh +57 -0
  100. vllm_ascend-0.9.2rc1/format.sh +44 -0
  101. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/mypy.ini +3 -0
  102. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/pyproject.toml +3 -1
  103. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/requirements-dev.txt +3 -0
  104. vllm_ascend-0.9.2rc1/requirements-lint.txt +8 -0
  105. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/requirements.txt +8 -1
  106. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/setup.py +31 -2
  107. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/conftest.py +164 -5
  108. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/common.sh +24 -1
  109. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/doctests/001-quickstart-test.sh +11 -2
  110. vllm_ascend-0.9.2rc1/tests/e2e/doctests/002-pip-binary-installation-test.sh +62 -0
  111. vllm_ascend-0.9.2rc1/tests/e2e/long_term/accuracy/accuracy_multicard.py +261 -0
  112. vllm_ascend-0.9.0rc2/tests/long_term/test_accuracy.py → vllm_ascend-0.9.2rc1/tests/e2e/long_term/accuracy/accuracy_singlecard.py +15 -11
  113. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_data_parallel.py +72 -0
  114. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_fused_moe_allgather_ep.py +82 -0
  115. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/multicard/test_ilama_lora_tp2.py +4 -3
  116. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/multicard/test_offline_inference_distributed.py +88 -16
  117. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_pipeline_parallel.py +43 -0
  118. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_prefix_caching.py +152 -0
  119. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_torchair_graph_mode.py +161 -0
  120. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +2 -0
  121. vllm_ascend-0.9.2rc1/tests/e2e/prompts/example.txt +8 -0
  122. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/run_doctests.sh +6 -0
  123. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/compile/test_simple.py +8 -24
  124. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler.py +728 -0
  125. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py +46 -0
  126. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py +60 -0
  127. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +37 -0
  128. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/ops/test_rotary_embedding.py +3 -1
  129. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +91 -0
  130. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/sample/test_rejection_sampler.py +19 -22
  131. {vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/spec_decode_v1}/test_v1_mtp_correctness.py +2 -0
  132. {vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/spec_decode_v1}/test_v1_spec_decode.py +12 -6
  133. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_aclgraph.py +5 -1
  134. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_chunked.py +74 -0
  135. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_embedding.py +68 -0
  136. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_guided_decoding.py +21 -31
  137. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_ilama_lora.py +3 -3
  138. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_offline_inference.py +2 -2
  139. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_sampler.py +109 -147
  140. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_scheduler.py +40 -54
  141. vllm_ascend-0.9.2rc1/tests/ut/attention/test_attention_mask.py +156 -0
  142. vllm_ascend-0.9.2rc1/tests/ut/attention/test_attention_v1.py +497 -0
  143. vllm_ascend-0.9.2rc1/tests/ut/base.py +31 -0
  144. vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_buffer.py +71 -0
  145. vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_connector.py +146 -0
  146. vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_pipe.py +145 -0
  147. vllm_ascend-0.9.2rc1/tests/ut/distributed/test_parallel_state.py +208 -0
  148. vllm_ascend-0.9.2rc1/tests/ut/fake_weight/config.json +28 -0
  149. vllm_ascend-0.9.2rc1/tests/ut/ops/expert_map.json +17 -0
  150. vllm_ascend-0.9.2rc1/tests/ut/ops/test_expert_load_balancer.py +141 -0
  151. vllm_ascend-0.9.2rc1/tests/ut/ops/test_rotary_embedding.py +315 -0
  152. vllm_ascend-0.9.2rc1/tests/ut/patch/worker/patch_common/test_patch_distributed.py +27 -0
  153. vllm_ascend-0.9.2rc1/tests/ut/patch/worker/patch_common/test_patch_sampler.py +46 -0
  154. vllm_ascend-0.9.2rc1/tests/ut/quantization/test_quant_config.py +230 -0
  155. vllm_ascend-0.9.2rc1/tests/ut/quantization/test_quantizer.py +122 -0
  156. vllm_ascend-0.9.2rc1/tests/ut/quantization/test_w8a8.py +906 -0
  157. vllm_ascend-0.9.2rc1/tests/ut/test_ascend_config.py +267 -0
  158. vllm_ascend-0.9.2rc1/tests/ut/test_platform.py +717 -0
  159. vllm_ascend-0.9.2rc1/tests/ut/test_utils.py +355 -0
  160. vllm_ascend-0.9.2rc1/tests/ut/worker/test_input_batch.py +162 -0
  161. vllm_ascend-0.9.2rc1/tests/ut/worker/test_pooling_model_runner.py +355 -0
  162. vllm_ascend-0.9.2rc1/tests/ut/worker/test_worker_v1.py +1 -0
  163. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/utils.py +37 -0
  164. vllm_ascend-0.9.2rc1/tools/enforce_regex_import.py +104 -0
  165. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/mypy.sh +5 -1
  166. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/shellcheck.sh +4 -0
  167. vllm_ascend-0.9.2rc1/typos.toml +177 -0
  168. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/_version.py +2 -2
  169. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ascend_config.py +20 -12
  170. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/attention.py +34 -107
  171. vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention_mask.py +104 -0
  172. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/attention_v1.py +74 -5
  173. vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention_v1_torchair.py +503 -0
  174. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/mla_v1.py +375 -85
  175. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/compilation/piecewise_backend.py +1 -7
  176. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/core/scheduler.py +177 -76
  177. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/device_allocator/camem.py +1 -2
  178. vllm_ascend-0.9.2rc1/vllm_ascend/distributed/communication_op.py +25 -0
  179. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/simple_connector.py +5 -2
  180. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/envs.py +21 -21
  181. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/__init__.py +12 -3
  182. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/deepseek_dbo.py +120 -164
  183. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/deepseek_mtp.py +2 -1
  184. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/deepseek_v2.py +399 -138
  185. vllm_ascend-0.9.2rc1/vllm_ascend/models/pangu_moe.py +1123 -0
  186. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/activation.py +6 -1
  187. vllm_ascend-0.9.2rc1/vllm_ascend/ops/common_fused_moe.py +112 -0
  188. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/fused_moe.py +346 -104
  189. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/layernorm.py +11 -2
  190. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/rotary_embedding.py +17 -4
  191. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/vocab_parallel_embedding.py +2 -2
  192. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/__init__.py +17 -85
  193. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/platform/__init__.py +2 -2
  194. vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_common/patch_distributed.py +137 -0
  195. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/__init__.py +2 -2
  196. vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_0_9_2/__init__.py +16 -0
  197. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/__init__.py +0 -1
  198. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py +0 -16
  199. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_sampler.py +83 -101
  200. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py +7 -5
  201. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/platform.py +17 -19
  202. vllm_ascend-0.9.2rc1/vllm_ascend/pool/__init__.py +16 -0
  203. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/conftest.py → vllm_ascend-0.9.2rc1/vllm_ascend/pool/metadata.py +14 -10
  204. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/quant_config.py +8 -26
  205. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/quantizer.py +14 -1
  206. vllm_ascend-0.9.2rc1/vllm_ascend/quantization/w8a8.py +758 -0
  207. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/w8a8_dynamic.py +176 -99
  208. vllm_ascend-0.9.2rc1/vllm_ascend/utils.py +563 -0
  209. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/draft_model_runner.py +7 -6
  210. vllm_ascend-0.9.2rc1/vllm_ascend/worker/eagle_proposer_v1.py +386 -0
  211. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/model_runner_v1.py +890 -435
  212. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/mtp_proposer_v1.py +1 -38
  213. vllm_ascend-0.9.2rc1/vllm_ascend/worker/npu_input_batch.py +757 -0
  214. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/worker.py +13 -7
  215. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/worker_v1.py +80 -36
  216. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/PKG-INFO +6 -5
  217. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/SOURCES.txt +128 -65
  218. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/requires.txt +2 -1
  219. vllm_ascend-0.9.0rc2/.github/workflows/accuracy_report.yaml +0 -202
  220. vllm_ascend-0.9.0rc2/.github/workflows/actionlint.yml +0 -53
  221. vllm_ascend-0.9.0rc2/.github/workflows/shellcheck.yml +0 -49
  222. vllm_ascend-0.9.0rc2/.github/workflows/vllm_ascend_test.yaml +0 -222
  223. vllm_ascend-0.9.0rc2/benchmarks/README.md +0 -57
  224. vllm_ascend-0.9.0rc2/benchmarks/scripts/run_accuracy.py +0 -226
  225. vllm_ascend-0.9.0rc2/docs/source/tutorials/multi_node.md +0 -195
  226. vllm_ascend-0.9.0rc2/docs/source/user_guide/release.template.md +0 -13
  227. vllm_ascend-0.9.0rc2/docs/source/user_stories/example.md +0 -15
  228. vllm_ascend-0.9.0rc2/docs/source/user_stories/index.md +0 -22
  229. vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/find_device_ips.py +0 -67
  230. vllm_ascend-0.9.0rc2/examples/dp_offline/data_parallel.py +0 -85
  231. vllm_ascend-0.9.0rc2/examples/dp_offline/run_dp.sh +0 -19
  232. vllm_ascend-0.9.0rc2/examples/offline_inference_audio_language.py +0 -126
  233. vllm_ascend-0.9.0rc2/format.sh +0 -343
  234. vllm_ascend-0.9.0rc2/pytest.ini +0 -68
  235. vllm_ascend-0.9.0rc2/requirements-lint.txt +0 -15
  236. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/conftest.py +0 -212
  237. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_medusa_correctness.py +0 -445
  238. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_mlp_correctness.py +0 -560
  239. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_mtp_correctness.py +0 -455
  240. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_ngram_correctness.py +0 -404
  241. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_dynamic_spec_decode.py +0 -105
  242. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_multi_step_worker.py +0 -846
  243. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_ngram_worker.py +0 -237
  244. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_spec_decode_worker.py +0 -958
  245. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_utils.py +0 -165
  246. vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/utils.py +0 -317
  247. vllm_ascend-0.9.0rc2/tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py +0 -71
  248. vllm_ascend-0.9.0rc2/tests/singlecard/test_ascend_config.py +0 -189
  249. vllm_ascend-0.9.0rc2/vllm_ascend/ops/common_fused_moe.py +0 -69
  250. vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_0_9_0/__init__.py +0 -17
  251. vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_0_9_0/patch_distributed.py +0 -116
  252. vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_common/patch_distributed.py +0 -99
  253. vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_eagle.py +0 -70
  254. vllm_ascend-0.9.0rc2/vllm_ascend/quantization/w8a8.py +0 -115
  255. vllm_ascend-0.9.0rc2/vllm_ascend/utils.py +0 -229
  256. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  257. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  258. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  259. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  260. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  261. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  262. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  263. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  264. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  265. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  266. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  267. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/actionlint.yaml +0 -0
  268. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/dependabot.yml +0 -0
  269. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/labeler.yml +0 -0
  270. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/label_merge_conflict.yml +0 -0
  271. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/labeler.yml +0 -0
  272. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/matchers/actionlint.json +0 -0
  273. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/matchers/mypy.json +0 -0
  274. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/matchers/ruff.json +0 -0
  275. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.readthedocs.yaml +0 -0
  276. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/CODE_OF_CONDUCT.md +0 -0
  277. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/DCO +0 -0
  278. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/LICENSE +0 -0
  279. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/requirements-bench.txt +0 -0
  280. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/perf_result_template.md +0 -0
  281. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/cmake/utils.cmake +0 -0
  282. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/collect_env.py +0 -0
  283. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/camem_allocator.cpp +0 -0
  284. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/kernels/advance_step.cpp +0 -0
  285. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/kernels/types.h +0 -0
  286. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/utils.h +0 -0
  287. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/Makefile +0 -0
  288. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/README.md +0 -0
  289. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/requirements-docs.txt +0 -0
  290. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/_templates/sections/header.html +0 -0
  291. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/community/governance.md +0 -0
  292. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -0
  293. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  294. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
  295. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  296. {vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation → vllm_ascend-0.9.2rc1/docs/source/developer_guide/performance}/performance_benchmark.md +0 -0
  297. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  298. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  299. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/multi_npu.md +0 -0
  300. /vllm_ascend-0.9.0rc2/docs/source/user_guide/suppoted_features.md → /vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix/supported_features.md +0 -0
  301. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/disaggregated_prefill_offline.py +0 -0
  302. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/dp_proxy.py +0 -0
  303. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py +0 -0
  304. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/run_decode_server.sh +0 -0
  305. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/run_prefill_server.sh +0 -0
  306. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/offline_disaggregated_prefill_npu.py +0 -0
  307. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/offline_distributed_inference_npu.py +0 -0
  308. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/offline_dualbatch_overlap_npu.py +0 -0
  309. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/prompt_embedding_inference.py +0 -0
  310. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/run_dp_server.sh +0 -0
  311. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/packages.txt +0 -0
  312. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/setup.cfg +0 -0
  313. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/__init__.py +0 -0
  314. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/multicard/test_dynamic_npugraph_batchsize.py +0 -0
  315. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/multicard/test_pyhccl_distributed.py +0 -0
  316. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  317. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/run_disagg_pd.sh +0 -0
  318. {vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e → vllm_ascend-0.9.2rc1/tests/e2e/singlecard}/__init__.py +0 -0
  319. {vllm_ascend-0.9.0rc2/tests/singlecard → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/compile}/__init__.py +0 -0
  320. {vllm_ascend-0.9.0rc2/tests/singlecard/compile → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core}/__init__.py +0 -0
  321. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/ops/__init__.py +0 -0
  322. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/ops/test_fused_moe.py +0 -0
  323. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/ops/test_multi_step.py +0 -0
  324. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/sample/__init__.py +0 -0
  325. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_camem.py +0 -0
  326. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_profile_execute_duration.py +0 -0
  327. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_prompt_embedding.py +0 -0
  328. {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_pyhccl.py +0 -0
  329. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/model_utils.py +0 -0
  330. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/actionlint.sh +0 -0
  331. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/check_repo.sh +0 -0
  332. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/png-lint.sh +0 -0
  333. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/sphinx-lint.sh +0 -0
  334. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/__init__.py +0 -0
  335. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/__init__.py +0 -0
  336. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/compilation/__init__.py +0 -0
  337. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/core/__init__.py +0 -0
  338. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/core/schedule_config.py +0 -0
  339. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/device_allocator/__init__.py +0 -0
  340. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/__init__.py +0 -0
  341. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/communicator.py +0 -0
  342. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/device_communicators/__init__.py +0 -0
  343. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  344. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  345. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/__init__.py +0 -0
  346. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/simple_buffer.py +0 -0
  347. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/simple_pipe.py +0 -0
  348. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/utils.py +0 -0
  349. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/llmdatadist_connector.py +0 -0
  350. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/parallel_state.py +0 -0
  351. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/lora/__init__.py +0 -0
  352. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/lora/punica_wrapper/__init__.py +0 -0
  353. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/lora/punica_wrapper/punica_npu.py +0 -0
  354. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen2_5_vl.py +0 -0
  355. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen2_5_vl_without_padding.py +0 -0
  356. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen2_vl.py +0 -0
  357. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen3_moe.py +0 -0
  358. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/__init__.py +0 -0
  359. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/base.py +0 -0
  360. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/context.py +0 -0
  361. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/decorator.py +0 -0
  362. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/layers.py +0 -0
  363. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/metadata.py +0 -0
  364. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/ms_split.py +0 -0
  365. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/__init__.py +0 -0
  366. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/attention.py +0 -0
  367. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/cache.py +0 -0
  368. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  369. {vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_main → vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_0_9_2}/__init__.py +0 -0
  370. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/platform/patch_common/__init__.py +0 -0
  371. {vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_0_9_0 → vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_main}/__init__.py +0 -0
  372. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
  373. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
  374. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_utils.py +0 -0
  375. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
  376. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/__init__.py +0 -0
  377. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/func_wrapper.py +0 -0
  378. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/sample/__init__.py +0 -0
  379. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/sample/rejection_sampler.py +0 -0
  380. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/__init__.py +0 -0
  381. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/cache_engine.py +0 -0
  382. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/model_runner.py +0 -0
  383. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/multi_step_runner.py +0 -0
  384. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/multi_step_worker.py +0 -0
  385. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/pooling_model_runner.py +0 -0
  386. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  387. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
  388. {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -15,17 +15,16 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
  ARG PY_VERSION=3.10
18
- FROM quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py${PY_VERSION}
18
+ FROM quay.io/ascend/manylinux:8.0.0-910b-manylinux_2_28-py${PY_VERSION}
19
19
 
20
20
  ARG COMPILE_CUSTOM_KERNELS=1
21
21
 
22
22
  # Define environments
23
23
  ENV DEBIAN_FRONTEND=noninteractive
24
24
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
25
- RUN apt-get update -y && \
26
- apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
27
- rm -rf /var/cache/apt/* && \
28
- rm -rf /var/lib/apt/lists/*
25
+ RUN yum update -y && \
26
+ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
27
+ rm -rf /var/cache/yum
29
28
 
30
29
  WORKDIR /workspace
31
30
 
@@ -41,8 +40,6 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
41
40
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
42
41
  cd vllm-ascend && \
43
42
  python3 setup.py bdist_wheel && \
44
- ls -l dist && \
45
- for f in dist/*.whl; do mv "$f" "$(echo "$f" | sed -e 's/-linux_x86_64\.whl$/-manylinux1_x86_64.whl/' -e 's/-linux_aarch64\.whl$/-manylinux2014_aarch64.whl/')"; done && \
46
- ls -l dist
43
+ ls -l dist
47
44
 
48
45
  CMD ["/bin/bash"]
@@ -1,5 +1,5 @@
1
1
  name: 📚 User Story
2
- description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.org/user_stories/index.html
2
+ description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.io/en/latest/community/user_stories/index.html
3
3
  title: "[User Story]: "
4
4
  labels: ["user-story"]
5
5
 
@@ -0,0 +1,100 @@
1
+ name: Release Checklist
2
+ description: Generate a release checklist issue when prepare a new release.(Used for release team)
3
+ title: "[Release]: Release checklist for v"
4
+
5
+ body:
6
+ - type: textarea
7
+ attributes:
8
+ description: >
9
+ Brief info for the new release.
10
+ label: Release Checklist
11
+ value: >
12
+ **Release Version**:
13
+
14
+ **Release Branch**:
15
+
16
+ **Release Date**:
17
+
18
+ **Release Manager**:
19
+ - type: textarea
20
+ attributes:
21
+ description: >
22
+ Release notes.
23
+ label: Prepare Release Note
24
+ value: >
25
+ - [ ] Create a new issue for release feedback
26
+
27
+ - [ ] Write the release note PR.
28
+
29
+ - [ ] Update the feedback issue link in docs/source/faqs.md
30
+
31
+ - [ ] Add release note to docs/source/user_guide/release_notes.md
32
+
33
+ - [ ] Update version info in docs/source/community/versioning_policy.md
34
+
35
+ - [ ] Update contributor info in docs/source/community/contributors.md
36
+
37
+ - [ ] Update package version in docs/conf.py
38
+ - type: textarea
39
+ attributes:
40
+ description: >
41
+ Make sure the code is merged.
42
+ label: PR need Merge
43
+ value: >
44
+ - [ ] PR link1
45
+
46
+ - [ ] PR link2
47
+
48
+ - [ ] ...
49
+ - type: textarea
50
+ attributes:
51
+ description: >
52
+ Make sure the new Feature/Function is tested
53
+ label: Functional Test
54
+ value: >
55
+ - [ ] Feature1
56
+
57
+ - [ ] Bug1
58
+
59
+ - [ ] ...
60
+ - type: textarea
61
+ attributes:
62
+ description: >
63
+ Make sure the doc is updated.
64
+ label: Doc Test
65
+ value: >
66
+ - [ ] Tutorial is updated.
67
+
68
+ - [ ] User Guide is updated.
69
+
70
+ - [ ] Developer Guide is updated.
71
+ - type: textarea
72
+ attributes:
73
+ description: >
74
+ Make sure the artifacts is ready
75
+ label: Prepare Artifacts
76
+ value: >
77
+ - [ ] Docker image is ready.
78
+
79
+ - [ ] Wheel package is ready.
80
+ - type: textarea
81
+ attributes:
82
+ description: >
83
+ Start to release.
84
+ label: Release Step
85
+ value: >
86
+ - [ ] Release note PR is merged.
87
+
88
+ - [ ] Post the release on GitHub release page.
89
+
90
+ - [ ] Generate official doc page on https://app.readthedocs.org/dashboard/
91
+
92
+ - [ ] Wait for the wheel package to be available on https://pypi.org/project/vllm-ascend
93
+
94
+ - [ ] Wait for the docker image to be available on https://quay.io/ascend/vllm-ascend
95
+
96
+ - [ ] Upload 310p wheel to Github release page
97
+
98
+ - [ ] Broadcast the release news (By message, blog , etc)
99
+
100
+ - [ ] Close this issue
@@ -0,0 +1,56 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ # Adapted from vllm/.github/scripts/cleanup_pr_body.sh
17
+
18
+ #!/bin/bash
19
+
20
+ set -eux
21
+
22
+ # ensure 2 argument is passed
23
+ if [ "$#" -ne 3 ]; then
24
+ echo "Usage: $0 <pr_number> <vllm_version> <vllm_commit>"
25
+ exit 1
26
+ fi
27
+
28
+ PR_NUMBER=$1
29
+ VLLM_VERSION=$2
30
+ VLLM_COMMIT=$3
31
+ OLD=/tmp/orig_pr_body.txt
32
+ NEW=/tmp/new_pr_body.txt
33
+
34
+ gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
35
+ cp "${OLD}" "${NEW}"
36
+
37
+ # Remove notes in pr description and add vLLM version and commit
38
+ sed -i '/<!--/,/-->/d' "${NEW}"
39
+ sed -i '/- vLLM .*$/d' "${NEW}"
40
+ {
41
+ echo ""
42
+ echo "- vLLM version: $VLLM_VERSION"
43
+ echo "- vLLM main: $VLLM_COMMIT"
44
+ echo ""
45
+ } >> "${NEW}"
46
+
47
+ # Run this only if ${NEW} is different than ${OLD}
48
+ if ! cmp -s "${OLD}" "${NEW}"; then
49
+ echo
50
+ echo "Updating PR body:"
51
+ echo
52
+ cat "${NEW}"
53
+ gh pr edit --body-file "${NEW}" "${PR_NUMBER}"
54
+ else
55
+ echo "No changes needed"
56
+ fi
@@ -22,6 +22,9 @@
22
22
  name: Benchmarks / accuracy
23
23
 
24
24
  on:
25
+ schedule:
26
+ # Runs every 6 hours
27
+ - cron: '0 */6 * * *'
25
28
  pull_request:
26
29
  types: [ labeled ]
27
30
  workflow_dispatch:
@@ -34,8 +37,8 @@ on:
34
37
  # Current supported vLLM versions
35
38
  options:
36
39
  - main
37
- - v0.9.0.1
38
- - v0.9.0
40
+ - v0.9.2
41
+ - v0.9.1
39
42
  - v0.7.3
40
43
  vllm-ascend-version:
41
44
  description: 'vllm-ascend version:'
@@ -43,6 +46,7 @@ on:
43
46
  type: choice
44
47
  options:
45
48
  - main
49
+ - v0.9.1-dev
46
50
  - v0.7.3-dev
47
51
  models:
48
52
  description: 'model:'
@@ -50,9 +54,9 @@ on:
50
54
  type: choice
51
55
  options:
52
56
  - all
53
- - Qwen/Qwen2.5-7B-Instruct
54
57
  - Qwen/Qwen2.5-VL-7B-Instruct
55
58
  - Qwen/Qwen3-8B-Base
59
+ - Qwen/Qwen3-30B-A3B
56
60
  default: 'all'
57
61
 
58
62
  # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
@@ -74,56 +78,56 @@ jobs:
74
78
  ${{
75
79
  (contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
76
80
  contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
81
+ contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') ||
77
82
  contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
78
83
  contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
79
- github.event_name == 'workflow_dispatch'
84
+ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
80
85
  }}
81
86
  runs-on: >-
82
87
  ${{
83
- (matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct' && 'linux-arm64-npu-4') ||
88
+ (matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-arm64-npu-4') ||
84
89
  'linux-arm64-npu-2'
85
90
  }}
86
91
  strategy:
87
92
  matrix:
88
- vllm_use_version: [0, 1]
89
93
  # the accuracy test will run:
90
94
  # 1. workflow_dispatch with models input
91
- # - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
92
- # - specified but not all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
95
+ # - all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
96
+ # - specified but not all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
93
97
  # 2. PR labeled with "*-accuracy-test"
94
- # - accuracy-test: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct
95
- # - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
98
+ # - accuracy-test: Qwen/Qwen3-8B-Base, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-30B-A3B
99
+ # - dense-accuracy-test: Qwen/Qwen3-8B-Base
96
100
  # - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
101
+ # - moe-accuracy-test: Qwen/Qwen3-30B-A3B
97
102
  model_name: ${{ fromJSON(
103
+ (github.event_name == 'schedule' &&
104
+ '["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
98
105
  (github.event.inputs.models == 'all' &&
99
- '["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
100
- (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' &&
101
- '["Qwen/Qwen2.5-7B-Instruct"]') ||
106
+ '["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
107
+ (github.event.inputs.models == 'Qwen/Qwen3-30B-A3B' &&
108
+ '["Qwen/Qwen3-30B-A3B"]') ||
102
109
  (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
103
110
  '["Qwen/Qwen2.5-VL-7B-Instruct"]') ||
104
111
  (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
105
112
  '["Qwen/Qwen3-8B-Base"]') ||
106
113
  contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
107
- '["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct"]' ||
114
+ '["Qwen/Qwen3-8B-Base","Qwen/Qwen2.5-VL-7B-Instruct", "Qwen/Qwen3-30B-A3B"]' ||
108
115
  contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
109
- '["Qwen/Qwen2.5-7B-Instruct"]' ||
116
+ '["Qwen/Qwen3-8B-Base"]' ||
110
117
  contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
111
- '["Qwen/Qwen2.5-VL-7B-Instruct"]'
118
+ '["Qwen/Qwen2.5-VL-7B-Instruct"]' ||
119
+ contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') &&
120
+ '["Qwen/Qwen3-30B-A3B"]'
112
121
  ) }}
113
- # Remove exclude after https://github.com/vllm-project/vllm-ascend/issues/1044 resolved
114
- exclude:
115
- - model_name: Qwen/Qwen2.5-VL-7B-Instruct
116
- vllm_use_version: 1
117
122
 
118
123
  fail-fast: false
119
- name: ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
124
+ name: ${{ matrix.model_name }} accuracy
120
125
  container:
121
126
  image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
122
127
  env:
123
- HF_ENDPOINT: https://hf-mirror.com
124
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
125
128
  DATASET_SOURCE: ModelScope
126
129
  VLLM_USE_MODELSCOPE: True
130
+ USE_MODELSCOPE_HUB: 1
127
131
  # 1. If version specified (work_dispatch), do specified branch accuracy test
128
132
  # 2. If no version (labeled PR), do accuracy test by default ref:
129
133
  # The branch, tag or SHA to checkout. When checking out the repository that
@@ -159,7 +163,7 @@ jobs:
159
163
  repository: vllm-project/vllm
160
164
  path: ./vllm-empty
161
165
  # Please also update this when bump matched version
162
- ref: ${{ github.event.inputs.vllm-version || 'v0.9.0' }}
166
+ ref: ${{ github.event.inputs.vllm-version || 'v0.9.2' }}
163
167
 
164
168
  - name: Install vllm-project/vllm from source
165
169
  working-directory: ./vllm-empty
@@ -174,13 +178,32 @@ jobs:
174
178
 
175
179
  - name: Install vllm-project/vllm-ascend
176
180
  working-directory: ./vllm-ascend
181
+ env:
182
+ PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
177
183
  run: |
178
184
  pip install -r requirements-dev.txt
179
- pip install -e .
185
+ pip install -v -e .
186
+
187
+ - name: Get vLLM commit hash and URL
188
+ working-directory: ./vllm-empty
189
+ run: |
190
+ VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
191
+ echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
192
+
193
+ - name: Get vLLM-Ascend commit hash and URL
194
+ working-directory: ./vllm-ascend
195
+ run: |
196
+ VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
197
+ echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
198
+
199
+ - name: Print resolved hashes
200
+ run: |
201
+ echo "vLLM : ${{ env.VLLM_COMMIT }}"
202
+ echo "vLLM-Ascend: ${{ env.VLLM_ASCEND_COMMIT }}"
180
203
 
181
204
  - name: Install lm-eval, ray, and datasets
182
205
  run: |
183
- pip install lm-eval
206
+ pip install lm-eval==0.4.8
184
207
 
185
208
  - name: Collect version info
186
209
  run: |
@@ -212,15 +235,14 @@ jobs:
212
235
  echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
213
236
  echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION }}"
214
237
 
215
- - name: Run Accuracy Test for V${{ matrix.vllm_use_version }}
238
+ - name: Run Accuracy Test
216
239
  id: report
217
240
  working-directory: ./benchmarks
218
241
  env:
219
242
  PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
220
- VLLM_USE_V1: ${{ matrix.vllm_use_version }}
221
243
  run: |
222
244
  model_base_name=$(basename ${{ matrix.model_name }})
223
- markdown_name="${model_base_name}-V${{ matrix.vllm_use_version }}"
245
+ markdown_name="${model_base_name}"
224
246
  echo "markdown_name=$markdown_name"
225
247
  echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
226
248
  mkdir -p ./accuracy
@@ -232,7 +254,9 @@ jobs:
232
254
  --cann_version "${{ env.GHA_CANN_VERSION }}" \
233
255
  --torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
234
256
  --torch_version "${{ env.GHA_TORCH_VERSION }}" \
235
- --vllm_version "${{ env.GHA_VLLM_VERSION }}"
257
+ --vllm_version "${{ env.GHA_VLLM_VERSION }}" \
258
+ --vllm_commit "${{ env.VLLM_COMMIT }}" \
259
+ --vllm_ascend_commit "${{ env.VLLM_ASCEND_COMMIT }}" \
236
260
 
237
261
  - name: Generate step summary
238
262
  if: ${{ always() }}
@@ -244,12 +268,122 @@ jobs:
244
268
  SAFE_VLLM_ASCEND_VERSION="${GHA_VLLM_ASCEND_VERSION//\//-}"
245
269
  echo "SAFE_VLLM_ASCEND_VERSION=$SAFE_VLLM_ASCEND_VERSION" >> "$GITHUB_ENV"
246
270
 
247
- - name: Upload Report for V${{ matrix.vllm_use_version }}
248
- if: ${{ github.event_name == 'workflow_dispatch' }}
271
+ - name: Check report first line for failure
272
+ id: check_report
273
+ run: |
274
+ REPORT_PATH="./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md"
275
+ echo "Scanning $REPORT_PATH for ❌ …"
276
+ if grep -q '❌' "$REPORT_PATH"; then
277
+ echo "contains_fail=true" >> $GITHUB_OUTPUT
278
+ else
279
+ echo "contains_fail=false" >> $GITHUB_OUTPUT
280
+ fi
281
+
282
+ - name: Upload Report
283
+ if: ${{ github.event_name == 'workflow_dispatch' && steps.check_report.outputs.contains_fail == 'false' }}
249
284
  uses: actions/upload-artifact@v4
250
285
  with:
251
- name: "${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}-report"
286
+ name: "report-${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
252
287
  path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
253
288
  if-no-files-found: warn
254
289
  retention-days: 90
255
290
  overwrite: true
291
+
292
+ create_pr:
293
+ runs-on: ubuntu-latest
294
+ needs: accuracy_tests
295
+ if: ${{ github.event_name == 'workflow_dispatch' }}
296
+ env:
297
+ UPSTREAM_REPO: vllm-project/vllm-ascend
298
+ steps:
299
+ - name: Checkout repository
300
+ uses: actions/checkout@v4
301
+ with:
302
+ repository: vllm-ascend-ci/vllm-ascend
303
+ token: ${{ secrets.PAT_TOKEN }}
304
+ ref: main
305
+
306
+ - name: Add upstream remote
307
+ run: |
308
+ git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
309
+ git fetch upstream
310
+ git remote -v
311
+
312
+ - name: Set Git user info dynamically
313
+ run: |
314
+ git config user.name "${{ github.actor }}"
315
+ git config user.email "${{ github.actor }}@users.noreply.github.com"
316
+
317
+ - name: Create or switch to branch
318
+ run: |
319
+ TIMESTAMP=$(date +%Y%m%d%H%M%S)
320
+ BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
321
+ echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
322
+ git checkout -B "${BRANCH_NAME}" upstream/${{ github.event.inputs.vllm-ascend-version }}
323
+
324
+ - name: Download only current run reports
325
+ uses: actions/download-artifact@v4
326
+ with:
327
+ path: ./docs/source/developer_guide/evaluation/accuracy_report
328
+ pattern: report-*
329
+ github-token: ${{ secrets.GITHUB_TOKEN }}
330
+ run-id: ${{ github.run_id }}
331
+
332
+ - name: Delete old report
333
+ run: |
334
+ find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
335
+ find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
336
+ find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
337
+
338
+ - name: Update accuracy_report/index.md
339
+ run: |
340
+ REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
341
+ INDEX_MD="$REPORT_DIR/index.md"
342
+ {
343
+ echo "# Accuracy Report"
344
+ echo ""
345
+ echo ":::{toctree}"
346
+ echo ":caption: Accuracy Report"
347
+ echo ":maxdepth: 1"
348
+
349
+ for report in "$REPORT_DIR"/*.md; do
350
+ filename="$(basename "$report" .md)"
351
+ if [ "$filename" != "index" ]; then
352
+ echo "$filename"
353
+ fi
354
+ done
355
+ echo ":::"
356
+ } > "$INDEX_MD"
357
+
358
+ - name: push accuracy report
359
+ env:
360
+ GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
361
+ run: |
362
+ git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
363
+ git commit -s -m "[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}"
364
+ git push -f origin "${{ env.BRANCH_NAME }}"
365
+
366
+ - name: Create PR in upstream via API
367
+ uses: actions/github-script@v7
368
+ with:
369
+ github-token: ${{ secrets.PAT_TOKEN }}
370
+ script: |
371
+ const pr = await github.rest.pulls.create({
372
+ owner: 'vllm-project',
373
+ repo: 'vllm-ascend',
374
+ head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
375
+ base: '${{ github.event.inputs.vllm-ascend-version }}',
376
+ title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
377
+ body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for:
378
+ ${{
379
+ github.event.inputs.models == 'all'
380
+ && 'All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)'
381
+ || github.event.inputs.models
382
+ }}
383
+
384
+ - [Workflow run][1]
385
+
386
+ [1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
387
+ });
388
+ core.info(`Created PR #${pr.data.number}`);
389
+
@@ -0,0 +1,63 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ name: format / pr body
19
+
20
+ on:
21
+ # The PR updated when PR opened and push new commits
22
+ pull_request_target:
23
+ types: [opened, synchronize]
24
+ branches:
25
+ - 'main'
26
+
27
+ permissions:
28
+ pull-requests: write
29
+
30
+ jobs:
31
+ update-description:
32
+ name: update vLLM version
33
+ runs-on: ubuntu-latest
34
+
35
+ steps:
36
+ - name: Checkout vllm-project/vllm repo
37
+ uses: actions/checkout@v4
38
+ with:
39
+ repository: vllm-project/vllm
40
+ path: ./vllm-empty
41
+
42
+ - name: Get vLLM version
43
+ working-directory: ./vllm-empty
44
+ run: |
45
+ VLLM_COMMIT=$(git rev-parse HEAD)
46
+ echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
47
+
48
+ - name: Checkout repository
49
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
50
+
51
+ - name: Set up Python
52
+ uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
53
+
54
+ - name: Get vLLM release version
55
+ run: |
56
+ VLLM_VERSION=$(python3 docs/source/conf.py | jq .ci_vllm_version | tr -d '"')
57
+ echo "VLLM_VERSION=$VLLM_VERSION" >> $GITHUB_ENV
58
+
59
+ - name: Update PR description
60
+ env:
61
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
62
+ run: |
63
+ bash .github/format_pr_body.sh "${{ github.event.number }}" "${{ env.VLLM_VERSION }}" "${{ env.VLLM_COMMIT }}"