vllm-ascend 0.9.1rc1__tar.gz → 0.9.2rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (382) hide show
  1. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/Dockerfile.buildwheel +5 -8
  2. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/110-user-story.yml +1 -1
  3. vllm_ascend-0.9.2rc1/.github/ISSUE_TEMPLATE/900-release-checklist.yml +100 -0
  4. vllm_ascend-0.9.2rc1/.github/format_pr_body.sh +56 -0
  5. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/accuracy_test.yaml +164 -31
  6. vllm_ascend-0.9.2rc1/.github/workflows/format_pr_body.yaml +63 -0
  7. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/image_310p_openeuler.yml +1 -1
  8. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/image_310p_ubuntu.yml +2 -2
  9. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/nightly_benchmarks.yaml +12 -8
  10. vllm_ascend-0.9.2rc1/.github/workflows/pre-commit.yml +37 -0
  11. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/release_code.yml +0 -12
  12. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/release_whl.yml +43 -20
  13. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/vllm_ascend_doctest.yaml +3 -22
  14. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/vllm_ascend_test.yaml +65 -118
  15. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/vllm_ascend_test_long_term.yaml +5 -11
  16. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/vllm_ascend_test_pd.yaml +6 -3
  17. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.gitignore +2 -0
  18. vllm_ascend-0.9.2rc1/.pre-commit-config.yaml +141 -0
  19. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/Dockerfile +1 -1
  20. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/Dockerfile.310p +1 -1
  21. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/Dockerfile.310p.openEuler +1 -1
  22. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/Dockerfile.openEuler +1 -1
  23. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/PKG-INFO +6 -5
  24. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/README.md +5 -4
  25. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/README.zh.md +5 -4
  26. vllm_ascend-0.9.2rc1/benchmarks/README.md +166 -0
  27. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/benchmarks/ops/ben_vocabparallelembedding.py +48 -34
  28. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/convert_json_to_markdown.py +54 -49
  29. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/patch_benchmark_dataset.py +22 -11
  30. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/run-performance-benchmarks.sh +21 -22
  31. vllm_ascend-0.9.2rc1/benchmarks/scripts/run_accuracy.py +313 -0
  32. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/benchmarks/tests/serving-tests.json +3 -3
  33. vllm_ascend-0.9.2rc1/docs/source/assets/multi_node_dp.png +0 -0
  34. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/community/contributors.md +17 -0
  35. {vllm_ascend-0.9.1rc1/docs/source/developer_guide → vllm_ascend-0.9.2rc1/docs/source/community}/versioning_policy.md +4 -2
  36. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/conf.py +6 -4
  37. vllm_ascend-0.9.1rc1/docs/source/developer_guide/contributing.md → vllm_ascend-0.9.2rc1/docs/source/developer_guide/contribution/index.md +44 -46
  38. vllm_ascend-0.9.2rc1/docs/source/developer_guide/contribution/testing.md +280 -0
  39. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/index.md +2 -9
  40. vllm_ascend-0.9.2rc1/docs/source/developer_guide/feature_guide/index.md +9 -0
  41. vllm_ascend-0.9.2rc1/docs/source/developer_guide/feature_guide/patch.md +82 -0
  42. vllm_ascend-0.9.2rc1/docs/source/developer_guide/modeling/adding_a_new_model.md +258 -0
  43. vllm_ascend-0.9.2rc1/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +3 -0
  44. vllm_ascend-0.9.2rc1/docs/source/developer_guide/modeling/index.md +10 -0
  45. vllm_ascend-0.9.2rc1/docs/source/developer_guide/performance/index.md +8 -0
  46. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/faqs.md +51 -11
  47. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/index.md +8 -8
  48. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/installation.md +12 -7
  49. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/quick_start.md +4 -0
  50. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/index.md +3 -0
  51. vllm_ascend-0.9.2rc1/docs/source/tutorials/multi_node.md +197 -0
  52. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/multi_npu_moge.md +31 -12
  53. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/multi_npu_quantization.md +1 -1
  54. vllm_ascend-0.9.2rc1/docs/source/tutorials/multi_npu_qwen3_moe.md +109 -0
  55. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/single_node_300i.md +126 -100
  56. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/single_npu.md +68 -3
  57. vllm_ascend-0.9.2rc1/docs/source/tutorials/single_npu_audio.md +122 -0
  58. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/single_npu_multimodal.md +6 -3
  59. vllm_ascend-0.9.2rc1/docs/source/tutorials/single_npu_qwen3_embedding.md +99 -0
  60. {vllm_ascend-0.9.1rc1/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/configuration}/additional_config.md +11 -10
  61. {vllm_ascend-0.9.1rc1/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/configuration}/env_vars.md +1 -1
  62. vllm_ascend-0.9.2rc1/docs/source/user_guide/configuration/index.md +10 -0
  63. {vllm_ascend-0.9.1rc1/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide}/graph_mode.md +4 -9
  64. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  65. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/index.md +13 -0
  66. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/lora.md +8 -0
  67. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/sleep_mode.md +115 -0
  68. vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/structured_output.md +163 -0
  69. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/user_guide/release_notes.md +31 -3
  70. vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix/index.md +10 -0
  71. {vllm_ascend-0.9.1rc1/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix}/supported_models.md +1 -1
  72. vllm_ascend-0.9.2rc1/examples/eplb/eplb_deepseek.py +205 -0
  73. vllm_ascend-0.9.2rc1/examples/eplb/eplb_strategy.py +183 -0
  74. vllm_ascend-0.9.2rc1/examples/offline_data_parallel.py +241 -0
  75. vllm_ascend-0.9.2rc1/examples/offline_embed.py +53 -0
  76. vllm_ascend-0.9.2rc1/examples/offline_inference_audio_language.py +84 -0
  77. vllm_ascend-0.9.1rc1/examples/offline_inference_npu.py → vllm_ascend-0.9.2rc1/examples/offline_inference_npu_v0.py +5 -0
  78. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/offline_inference_npu_v1.py +5 -4
  79. vllm_ascend-0.9.2rc1/examples/offline_inference_sleep_mode_npu.py +54 -0
  80. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/run_dp_attention_etp16.sh +2 -2
  81. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/run_dp_attention_etp16_benmark.sh +2 -1
  82. vllm_ascend-0.9.2rc1/format.sh +44 -0
  83. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/pyproject.toml +3 -1
  84. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/requirements-dev.txt +1 -0
  85. vllm_ascend-0.9.2rc1/requirements-lint.txt +8 -0
  86. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/requirements.txt +4 -1
  87. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/setup.py +30 -1
  88. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/conftest.py +156 -4
  89. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/doctests/001-quickstart-test.sh +11 -0
  90. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/doctests/002-pip-binary-installation-test.sh +20 -0
  91. vllm_ascend-0.9.2rc1/tests/e2e/long_term/accuracy/accuracy_multicard.py +261 -0
  92. vllm_ascend-0.9.1rc1/tests/e2e/long_term/test_accuracy.py → vllm_ascend-0.9.2rc1/tests/e2e/long_term/accuracy/accuracy_singlecard.py +15 -11
  93. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_data_parallel.py +72 -0
  94. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_fused_moe_allgather_ep.py +82 -0
  95. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/multicard/test_ilama_lora_tp2.py +2 -1
  96. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/multicard/test_offline_inference_distributed.py +71 -16
  97. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_pipeline_parallel.py +43 -0
  98. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_prefix_caching.py +152 -0
  99. vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_torchair_graph_mode.py +161 -0
  100. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +2 -0
  101. vllm_ascend-0.9.2rc1/tests/e2e/prompts/example.txt +8 -0
  102. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard/core → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler}/test_ascend_scheduler.py +34 -122
  103. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard/core → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler}/test_ascend_scheduler_e2e.py +9 -3
  104. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py +60 -0
  105. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +37 -0
  106. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +2 -2
  107. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/sample/test_rejection_sampler.py +19 -22
  108. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/spec_decode_v1}/test_v1_mtp_correctness.py +2 -0
  109. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_aclgraph.py +5 -1
  110. vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_embedding.py +68 -0
  111. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_guided_decoding.py +21 -31
  112. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_ilama_lora.py +3 -3
  113. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_offline_inference.py +1 -1
  114. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_sampler.py +109 -147
  115. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_scheduler.py +18 -23
  116. vllm_ascend-0.9.2rc1/tests/ut/attention/test_attention_mask.py +156 -0
  117. vllm_ascend-0.9.2rc1/tests/ut/attention/test_attention_v1.py +497 -0
  118. vllm_ascend-0.9.2rc1/tests/ut/base.py +31 -0
  119. vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_buffer.py +71 -0
  120. vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_connector.py +146 -0
  121. vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_pipe.py +145 -0
  122. vllm_ascend-0.9.2rc1/tests/ut/distributed/test_parallel_state.py +208 -0
  123. vllm_ascend-0.9.2rc1/tests/ut/ops/expert_map.json +17 -0
  124. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/ut/ops/test_expert_load_balancer.py +25 -31
  125. vllm_ascend-0.9.2rc1/tests/ut/ops/test_rotary_embedding.py +315 -0
  126. vllm_ascend-0.9.2rc1/tests/ut/patch/worker/patch_common/test_patch_distributed.py +27 -0
  127. vllm_ascend-0.9.2rc1/tests/ut/patch/worker/patch_common/test_patch_sampler.py +46 -0
  128. vllm_ascend-0.9.2rc1/tests/ut/quantization/test_quant_config.py +230 -0
  129. vllm_ascend-0.9.2rc1/tests/ut/quantization/test_quantizer.py +122 -0
  130. vllm_ascend-0.9.2rc1/tests/ut/quantization/test_w8a8.py +906 -0
  131. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/ut/test_ascend_config.py +26 -3
  132. vllm_ascend-0.9.2rc1/tests/ut/test_platform.py +717 -0
  133. vllm_ascend-0.9.2rc1/tests/ut/test_utils.py +355 -0
  134. vllm_ascend-0.9.2rc1/tests/ut/worker/test_input_batch.py +162 -0
  135. vllm_ascend-0.9.2rc1/tests/ut/worker/test_pooling_model_runner.py +355 -0
  136. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/utils.py +37 -0
  137. vllm_ascend-0.9.2rc1/tools/enforce_regex_import.py +104 -0
  138. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tools/mypy.sh +5 -1
  139. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tools/shellcheck.sh +4 -0
  140. vllm_ascend-0.9.2rc1/typos.toml +177 -0
  141. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/_version.py +2 -2
  142. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ascend_config.py +13 -4
  143. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/attention.py +4 -104
  144. vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention_mask.py +104 -0
  145. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/attention_v1.py +30 -5
  146. vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention_v1_torchair.py +503 -0
  147. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/mla_v1.py +58 -38
  148. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/core/scheduler.py +26 -29
  149. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/device_allocator/camem.py +0 -1
  150. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/__init__.py → vllm_ascend-0.9.2rc1/vllm_ascend/distributed/communication_op.py +9 -2
  151. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/simple_connector.py +5 -2
  152. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/envs.py +21 -4
  153. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/__init__.py +8 -3
  154. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/deepseek_dbo.py +115 -18
  155. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/deepseek_mtp.py +2 -1
  156. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/deepseek_v2.py +272 -43
  157. vllm_ascend-0.9.2rc1/vllm_ascend/models/pangu_moe.py +1123 -0
  158. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/common_fused_moe.py +45 -18
  159. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/fused_moe.py +176 -65
  160. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/rotary_embedding.py +9 -0
  161. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/vocab_parallel_embedding.py +2 -2
  162. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/__init__.py +16 -64
  163. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/platform/__init__.py +2 -2
  164. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/platform/patch_common/patch_distributed.py +31 -47
  165. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/__init__.py +2 -2
  166. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/__init__.py +0 -1
  167. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py +0 -16
  168. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_sampler.py +83 -101
  169. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py +7 -5
  170. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/platform.py +9 -9
  171. vllm_ascend-0.9.2rc1/vllm_ascend/pool/__init__.py +16 -0
  172. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/conftest.py → vllm_ascend-0.9.2rc1/vllm_ascend/pool/metadata.py +14 -10
  173. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/quant_config.py +8 -26
  174. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/quantizer.py +14 -1
  175. vllm_ascend-0.9.2rc1/vllm_ascend/quantization/w8a8.py +758 -0
  176. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/w8a8_dynamic.py +116 -9
  177. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/utils.py +220 -64
  178. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/draft_model_runner.py +7 -6
  179. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/eagle_proposer_v1.py +3 -46
  180. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/model_runner_v1.py +677 -508
  181. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/mtp_proposer_v1.py +1 -38
  182. vllm_ascend-0.9.2rc1/vllm_ascend/worker/npu_input_batch.py +757 -0
  183. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/worker_v1.py +69 -23
  184. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/PKG-INFO +6 -5
  185. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/SOURCES.txt +81 -42
  186. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/requires.txt +2 -1
  187. vllm_ascend-0.9.1rc1/.github/workflows/accuracy_report.yaml +0 -202
  188. vllm_ascend-0.9.1rc1/.github/workflows/doc_codespell.yaml +0 -33
  189. vllm_ascend-0.9.1rc1/.github/workflows/shellcheck.yml +0 -49
  190. vllm_ascend-0.9.1rc1/benchmarks/README.md +0 -57
  191. vllm_ascend-0.9.1rc1/benchmarks/scripts/run_accuracy.py +0 -226
  192. vllm_ascend-0.9.1rc1/docs/source/tutorials/multi_node.md +0 -195
  193. vllm_ascend-0.9.1rc1/docs/source/user_guide/release.template.md +0 -13
  194. vllm_ascend-0.9.1rc1/examples/dp_offline/data_parallel.py +0 -85
  195. vllm_ascend-0.9.1rc1/examples/dp_offline/run_dp.sh +0 -19
  196. vllm_ascend-0.9.1rc1/examples/offline_inference_audio_language.py +0 -126
  197. vllm_ascend-0.9.1rc1/format.sh +0 -343
  198. vllm_ascend-0.9.1rc1/pytest.ini +0 -68
  199. vllm_ascend-0.9.1rc1/requirements-lint.txt +0 -15
  200. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e/conftest.py +0 -212
  201. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e/test_medusa_correctness.py +0 -445
  202. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e/test_mlp_correctness.py +0 -560
  203. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py +0 -455
  204. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e/test_ngram_correctness.py +0 -404
  205. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/test_dynamic_spec_decode.py +0 -105
  206. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/test_multi_step_worker.py +0 -846
  207. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/test_ngram_worker.py +0 -237
  208. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/test_spec_decode_worker.py +0 -958
  209. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/test_utils.py +0 -165
  210. vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/utils.py +0 -317
  211. vllm_ascend-0.9.1rc1/tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py +0 -71
  212. vllm_ascend-0.9.1rc1/tests/e2e/multicard/test_torchair_graph_mode.py +0 -80
  213. vllm_ascend-0.9.1rc1/tests/multicard/test_data_parallel.py +0 -66
  214. vllm_ascend-0.9.1rc1/vllm_ascend/models/pangu_moe.py +0 -639
  215. vllm_ascend-0.9.1rc1/vllm_ascend/patch/worker/patch_common/patch_eagle.py +0 -70
  216. vllm_ascend-0.9.1rc1/vllm_ascend/quantization/w8a8.py +0 -115
  217. vllm_ascend-0.9.1rc1/vllm_ascend/sample/__init__.py +0 -0
  218. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  219. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  220. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  221. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  222. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  223. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  224. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  225. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  226. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  227. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  228. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  229. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/actionlint.yaml +0 -0
  230. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/dependabot.yml +0 -0
  231. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/labeler.yml +0 -0
  232. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/image_openeuler.yml +0 -0
  233. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/image_ubuntu.yml +0 -0
  234. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/label_merge_conflict.yml +0 -0
  235. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/labeler.yml +0 -0
  236. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/matchers/actionlint.json +0 -0
  237. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/matchers/mypy.json +0 -0
  238. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.github/workflows/matchers/ruff.json +0 -0
  239. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/.readthedocs.yaml +0 -0
  240. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/CMakeLists.txt +0 -0
  241. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/CODE_OF_CONDUCT.md +0 -0
  242. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/DCO +0 -0
  243. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/LICENSE +0 -0
  244. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/benchmarks/requirements-bench.txt +0 -0
  245. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/perf_result_template.md +0 -0
  246. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/benchmarks/tests/latency-tests.json +0 -0
  247. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/benchmarks/tests/throughput-tests.json +0 -0
  248. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/cmake/utils.cmake +0 -0
  249. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/codecov.yml +0 -0
  250. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/collect_env.py +0 -0
  251. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/csrc/camem_allocator.cpp +0 -0
  252. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/csrc/kernels/advance_step.cpp +0 -0
  253. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  254. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
  255. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/csrc/kernels/types.h +0 -0
  256. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/csrc/kernels/utils.h +0 -0
  257. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/csrc/ops.h +0 -0
  258. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/csrc/torch_binding.cpp +0 -0
  259. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/csrc/utils.h +0 -0
  260. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/Makefile +0 -0
  261. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/README.md +0 -0
  262. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/requirements-docs.txt +0 -0
  263. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/requirements-test.txt +0 -0
  264. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/_templates/sections/header.html +0 -0
  265. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/community/governance.md +0 -0
  266. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/community/user_stories/index.md +0 -0
  267. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/community/user_stories/llamafactory.md +0 -0
  268. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -0
  269. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  270. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
  271. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  272. {vllm_ascend-0.9.1rc1/docs/source/developer_guide/evaluation → vllm_ascend-0.9.2rc1/docs/source/developer_guide/performance}/performance_benchmark.md +0 -0
  273. {vllm_ascend-0.9.1rc1/docs/source/developer_guide/evaluation → vllm_ascend-0.9.2rc1/docs/source/developer_guide/performance}/profile_execute_duration.md +0 -0
  274. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  275. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  276. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/multi_npu.md +0 -0
  277. {vllm_ascend-0.9.1rc1/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide}/quantization.md +0 -0
  278. /vllm_ascend-0.9.1rc1/docs/source/user_guide/suppoted_features.md → /vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix/supported_features.md +0 -0
  279. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/disaggregated_prefill_offline.py +0 -0
  280. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/dp_proxy.py +0 -0
  281. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/find_device_ips.py +0 -0
  282. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py +0 -0
  283. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/run_decode_server.sh +0 -0
  284. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/run_prefill_server.sh +0 -0
  285. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/offline_disaggregated_prefill_npu.py +0 -0
  286. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/offline_distributed_inference_npu.py +0 -0
  287. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/offline_dualbatch_overlap_npu.py +0 -0
  288. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/offline_multi_step_custom_ops.py +0 -0
  289. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/prompt_embedding_inference.py +0 -0
  290. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/examples/run_dp_server.sh +0 -0
  291. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/mypy.ini +0 -0
  292. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/packages.txt +0 -0
  293. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/setup.cfg +0 -0
  294. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/__init__.py +0 -0
  295. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/common.sh +0 -0
  296. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/multicard/test_dynamic_npugraph_batchsize.py +0 -0
  297. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/multicard/test_pyhccl_distributed.py +0 -0
  298. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  299. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/run_disagg_pd.sh +0 -0
  300. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/run_doctests.sh +0 -0
  301. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e → vllm_ascend-0.9.2rc1/tests/e2e/singlecard}/__init__.py +0 -0
  302. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/compile}/__init__.py +0 -0
  303. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/compile/test_simple.py +0 -0
  304. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard/compile → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core}/__init__.py +0 -0
  305. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard/core → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops}/__init__.py +0 -0
  306. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/ops/test_fused_moe.py +0 -0
  307. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/ops/test_multi_step.py +0 -0
  308. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/ops/test_rotary_embedding.py +0 -0
  309. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard/ops → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/sample}/__init__.py +0 -0
  310. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/spec_decode_v1}/test_v1_spec_decode.py +0 -0
  311. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_camem.py +0 -0
  312. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_chunked.py +0 -0
  313. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
  314. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_prompt_embedding.py +0 -0
  315. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/e2e/singlecard/test_pyhccl.py +0 -0
  316. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/model_utils.py +0 -0
  317. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/ut/fake_weight/config.json +0 -0
  318. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tests/ut/worker/test_worker_v1.py +0 -0
  319. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tools/actionlint.sh +0 -0
  320. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tools/check_repo.sh +0 -0
  321. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tools/png-lint.sh +0 -0
  322. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/tools/sphinx-lint.sh +0 -0
  323. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/__init__.py +0 -0
  324. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard/sample → vllm_ascend-0.9.2rc1/vllm_ascend/attention}/__init__.py +0 -0
  325. {vllm_ascend-0.9.1rc1/vllm_ascend/attention → vllm_ascend-0.9.2rc1/vllm_ascend/compilation}/__init__.py +0 -0
  326. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/compilation/piecewise_backend.py +0 -0
  327. {vllm_ascend-0.9.1rc1/vllm_ascend/compilation → vllm_ascend-0.9.2rc1/vllm_ascend/core}/__init__.py +0 -0
  328. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/core/schedule_config.py +0 -0
  329. {vllm_ascend-0.9.1rc1/vllm_ascend/core → vllm_ascend-0.9.2rc1/vllm_ascend/device_allocator}/__init__.py +0 -0
  330. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/__init__.py +0 -0
  331. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/communicator.py +0 -0
  332. {vllm_ascend-0.9.1rc1/vllm_ascend/device_allocator → vllm_ascend-0.9.2rc1/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
  333. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  334. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  335. {vllm_ascend-0.9.1rc1/vllm_ascend/distributed/device_communicators → vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer}/__init__.py +0 -0
  336. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/simple_buffer.py +0 -0
  337. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/simple_pipe.py +0 -0
  338. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/utils.py +0 -0
  339. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/llmdatadist_connector.py +0 -0
  340. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/parallel_state.py +0 -0
  341. {vllm_ascend-0.9.1rc1/vllm_ascend/distributed/kv_transfer → vllm_ascend-0.9.2rc1/vllm_ascend/lora}/__init__.py +0 -0
  342. {vllm_ascend-0.9.1rc1/vllm_ascend/lora → vllm_ascend-0.9.2rc1/vllm_ascend/lora/punica_wrapper}/__init__.py +0 -0
  343. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/lora/punica_wrapper/punica_npu.py +0 -0
  344. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen2_5_vl.py +0 -0
  345. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen2_5_vl_without_padding.py +0 -0
  346. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen2_vl.py +0 -0
  347. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen3_moe.py +0 -0
  348. {vllm_ascend-0.9.1rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.9.2rc1/vllm_ascend/multistream}/__init__.py +0 -0
  349. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/base.py +0 -0
  350. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/context.py +0 -0
  351. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/decorator.py +0 -0
  352. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/layers.py +0 -0
  353. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/metadata.py +0 -0
  354. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/ms_split.py +0 -0
  355. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/__init__.py +0 -0
  356. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/activation.py +0 -0
  357. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/attention.py +0 -0
  358. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/cache.py +0 -0
  359. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  360. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/layernorm.py +0 -0
  361. {vllm_ascend-0.9.1rc1/vllm_ascend/patch/platform/patch_0_9_1 → vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_0_9_2}/__init__.py +0 -0
  362. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/platform/patch_common/__init__.py +0 -0
  363. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/platform/patch_main/__init__.py +0 -0
  364. {vllm_ascend-0.9.1rc1/vllm_ascend/patch/worker/patch_0_9_1 → vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_0_9_2}/__init__.py +0 -0
  365. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
  366. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
  367. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_utils.py +0 -0
  368. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
  369. {vllm_ascend-0.9.1rc1/vllm_ascend/multistream → vllm_ascend-0.9.2rc1/vllm_ascend/quantization}/__init__.py +0 -0
  370. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/func_wrapper.py +0 -0
  371. {vllm_ascend-0.9.1rc1/vllm_ascend/quantization → vllm_ascend-0.9.2rc1/vllm_ascend/sample}/__init__.py +0 -0
  372. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/sample/rejection_sampler.py +0 -0
  373. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/__init__.py +0 -0
  374. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/cache_engine.py +0 -0
  375. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/model_runner.py +0 -0
  376. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/multi_step_runner.py +0 -0
  377. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/multi_step_worker.py +0 -0
  378. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/pooling_model_runner.py +0 -0
  379. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/worker.py +0 -0
  380. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  381. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
  382. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -15,17 +15,16 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
  ARG PY_VERSION=3.10
18
- FROM quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py${PY_VERSION}
18
+ FROM quay.io/ascend/manylinux:8.0.0-910b-manylinux_2_28-py${PY_VERSION}
19
19
 
20
20
  ARG COMPILE_CUSTOM_KERNELS=1
21
21
 
22
22
  # Define environments
23
23
  ENV DEBIAN_FRONTEND=noninteractive
24
24
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
25
- RUN apt-get update -y && \
26
- apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
27
- rm -rf /var/cache/apt/* && \
28
- rm -rf /var/lib/apt/lists/*
25
+ RUN yum update -y && \
26
+ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
27
+ rm -rf /var/cache/yum
29
28
 
30
29
  WORKDIR /workspace
31
30
 
@@ -41,8 +40,6 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
41
40
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
42
41
  cd vllm-ascend && \
43
42
  python3 setup.py bdist_wheel && \
44
- ls -l dist && \
45
- for f in dist/*.whl; do mv "$f" "$(echo "$f" | sed -e 's/-linux_x86_64\.whl$/-manylinux1_x86_64.whl/' -e 's/-linux_aarch64\.whl$/-manylinux2014_aarch64.whl/')"; done && \
46
- ls -l dist
43
+ ls -l dist
47
44
 
48
45
  CMD ["/bin/bash"]
@@ -1,5 +1,5 @@
1
1
  name: 📚 User Story
2
- description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.org/user_stories/index.html
2
+ description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.io/en/latest/community/user_stories/index.html
3
3
  title: "[User Story]: "
4
4
  labels: ["user-story"]
5
5
 
@@ -0,0 +1,100 @@
1
+ name: Release Checklist
2
+ description: Generate a release checklist issue when prepare a new release.(Used for release team)
3
+ title: "[Release]: Release checklist for v"
4
+
5
+ body:
6
+ - type: textarea
7
+ attributes:
8
+ description: >
9
+ Brief info for the new release.
10
+ label: Release Checklist
11
+ value: >
12
+ **Release Version**:
13
+
14
+ **Release Branch**:
15
+
16
+ **Release Date**:
17
+
18
+ **Release Manager**:
19
+ - type: textarea
20
+ attributes:
21
+ description: >
22
+ Release notes.
23
+ label: Prepare Release Note
24
+ value: >
25
+ - [ ] Create a new issue for release feedback
26
+
27
+ - [ ] Write the release note PR.
28
+
29
+ - [ ] Update the feedback issue link in docs/source/faqs.md
30
+
31
+ - [ ] Add release note to docs/source/user_guide/release_notes.md
32
+
33
+ - [ ] Update version info in docs/source/community/versioning_policy.md
34
+
35
+ - [ ] Update contributor info in docs/source/community/contributors.md
36
+
37
+ - [ ] Update package version in docs/conf.py
38
+ - type: textarea
39
+ attributes:
40
+ description: >
41
+ Make sure the code is merged.
42
+ label: PR need Merge
43
+ value: >
44
+ - [ ] PR link1
45
+
46
+ - [ ] PR link2
47
+
48
+ - [ ] ...
49
+ - type: textarea
50
+ attributes:
51
+ description: >
52
+ Make sure the new Feature/Function is tested
53
+ label: Functional Test
54
+ value: >
55
+ - [ ] Feature1
56
+
57
+ - [ ] Bug1
58
+
59
+ - [ ] ...
60
+ - type: textarea
61
+ attributes:
62
+ description: >
63
+ Make sure the doc is updated.
64
+ label: Doc Test
65
+ value: >
66
+ - [ ] Tutorial is updated.
67
+
68
+ - [ ] User Guide is updated.
69
+
70
+ - [ ] Developer Guide is updated.
71
+ - type: textarea
72
+ attributes:
73
+ description: >
74
+ Make sure the artifacts is ready
75
+ label: Prepare Artifacts
76
+ value: >
77
+ - [ ] Docker image is ready.
78
+
79
+ - [ ] Wheel package is ready.
80
+ - type: textarea
81
+ attributes:
82
+ description: >
83
+ Start to release.
84
+ label: Release Step
85
+ value: >
86
+ - [ ] Release note PR is merged.
87
+
88
+ - [ ] Post the release on GitHub release page.
89
+
90
+ - [ ] Generate official doc page on https://app.readthedocs.org/dashboard/
91
+
92
+ - [ ] Wait for the wheel package to be available on https://pypi.org/project/vllm-ascend
93
+
94
+ - [ ] Wait for the docker image to be available on https://quay.io/ascend/vllm-ascend
95
+
96
+ - [ ] Upload 310p wheel to Github release page
97
+
98
+ - [ ] Broadcast the release news (By message, blog , etc)
99
+
100
+ - [ ] Close this issue
@@ -0,0 +1,56 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ # Adapted from vllm/.github/scripts/cleanup_pr_body.sh
17
+
18
+ #!/bin/bash
19
+
20
+ set -eux
21
+
22
+ # ensure 2 argument is passed
23
+ if [ "$#" -ne 3 ]; then
24
+ echo "Usage: $0 <pr_number> <vllm_version> <vllm_commit>"
25
+ exit 1
26
+ fi
27
+
28
+ PR_NUMBER=$1
29
+ VLLM_VERSION=$2
30
+ VLLM_COMMIT=$3
31
+ OLD=/tmp/orig_pr_body.txt
32
+ NEW=/tmp/new_pr_body.txt
33
+
34
+ gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
35
+ cp "${OLD}" "${NEW}"
36
+
37
+ # Remove notes in pr description and add vLLM version and commit
38
+ sed -i '/<!--/,/-->/d' "${NEW}"
39
+ sed -i '/- vLLM .*$/d' "${NEW}"
40
+ {
41
+ echo ""
42
+ echo "- vLLM version: $VLLM_VERSION"
43
+ echo "- vLLM main: $VLLM_COMMIT"
44
+ echo ""
45
+ } >> "${NEW}"
46
+
47
+ # Run this only if ${NEW} is different than ${OLD}
48
+ if ! cmp -s "${OLD}" "${NEW}"; then
49
+ echo
50
+ echo "Updating PR body:"
51
+ echo
52
+ cat "${NEW}"
53
+ gh pr edit --body-file "${NEW}" "${PR_NUMBER}"
54
+ else
55
+ echo "No changes needed"
56
+ fi
@@ -22,6 +22,9 @@
22
22
  name: Benchmarks / accuracy
23
23
 
24
24
  on:
25
+ schedule:
26
+ # Runs every 6 hours
27
+ - cron: '0 */6 * * *'
25
28
  pull_request:
26
29
  types: [ labeled ]
27
30
  workflow_dispatch:
@@ -34,6 +37,7 @@ on:
34
37
  # Current supported vLLM versions
35
38
  options:
36
39
  - main
40
+ - v0.9.2
37
41
  - v0.9.1
38
42
  - v0.7.3
39
43
  vllm-ascend-version:
@@ -42,6 +46,7 @@ on:
42
46
  type: choice
43
47
  options:
44
48
  - main
49
+ - v0.9.1-dev
45
50
  - v0.7.3-dev
46
51
  models:
47
52
  description: 'model:'
@@ -49,9 +54,9 @@ on:
49
54
  type: choice
50
55
  options:
51
56
  - all
52
- - Qwen/Qwen2.5-7B-Instruct
53
57
  - Qwen/Qwen2.5-VL-7B-Instruct
54
58
  - Qwen/Qwen3-8B-Base
59
+ - Qwen/Qwen3-30B-A3B
55
60
  default: 'all'
56
61
 
57
62
  # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
@@ -73,56 +78,56 @@ jobs:
73
78
  ${{
74
79
  (contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
75
80
  contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
81
+ contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') ||
76
82
  contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
77
83
  contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
78
- github.event_name == 'workflow_dispatch'
84
+ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
79
85
  }}
80
86
  runs-on: >-
81
87
  ${{
82
- (matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct' && 'linux-arm64-npu-4') ||
88
+ (matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-arm64-npu-4') ||
83
89
  'linux-arm64-npu-2'
84
90
  }}
85
91
  strategy:
86
92
  matrix:
87
- vllm_use_version: [0, 1]
88
93
  # the accuracy test will run:
89
94
  # 1. workflow_dispatch with models input
90
- # - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
91
- # - specified but not all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
95
+ # - all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
96
+ # - specified but not all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
92
97
  # 2. PR labeled with "*-accuracy-test"
93
- # - accuracy-test: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct
94
- # - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
98
+ # - accuracy-test: Qwen/Qwen3-8B-Base, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-30B-A3B
99
+ # - dense-accuracy-test: Qwen/Qwen3-8B-Base
95
100
  # - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
101
+ # - moe-accuracy-test: Qwen/Qwen3-30B-A3B
96
102
  model_name: ${{ fromJSON(
103
+ (github.event_name == 'schedule' &&
104
+ '["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
97
105
  (github.event.inputs.models == 'all' &&
98
- '["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
99
- (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' &&
100
- '["Qwen/Qwen2.5-7B-Instruct"]') ||
106
+ '["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
107
+ (github.event.inputs.models == 'Qwen/Qwen3-30B-A3B' &&
108
+ '["Qwen/Qwen3-30B-A3B"]') ||
101
109
  (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
102
110
  '["Qwen/Qwen2.5-VL-7B-Instruct"]') ||
103
111
  (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
104
112
  '["Qwen/Qwen3-8B-Base"]') ||
105
113
  contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
106
- '["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct"]' ||
114
+ '["Qwen/Qwen3-8B-Base","Qwen/Qwen2.5-VL-7B-Instruct", "Qwen/Qwen3-30B-A3B"]' ||
107
115
  contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
108
- '["Qwen/Qwen2.5-7B-Instruct"]' ||
116
+ '["Qwen/Qwen3-8B-Base"]' ||
109
117
  contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
110
- '["Qwen/Qwen2.5-VL-7B-Instruct"]'
118
+ '["Qwen/Qwen2.5-VL-7B-Instruct"]' ||
119
+ contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') &&
120
+ '["Qwen/Qwen3-30B-A3B"]'
111
121
  ) }}
112
- # Remove exclude after https://github.com/vllm-project/vllm-ascend/issues/1044 resolved
113
- exclude:
114
- - model_name: Qwen/Qwen2.5-VL-7B-Instruct
115
- vllm_use_version: 1
116
122
 
117
123
  fail-fast: false
118
- name: ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
124
+ name: ${{ matrix.model_name }} accuracy
119
125
  container:
120
126
  image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
121
127
  env:
122
- HF_ENDPOINT: https://hf-mirror.com
123
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
124
128
  DATASET_SOURCE: ModelScope
125
129
  VLLM_USE_MODELSCOPE: True
130
+ USE_MODELSCOPE_HUB: 1
126
131
  # 1. If version specified (work_dispatch), do specified branch accuracy test
127
132
  # 2. If no version (labeled PR), do accuracy test by default ref:
128
133
  # The branch, tag or SHA to checkout. When checking out the repository that
@@ -158,7 +163,7 @@ jobs:
158
163
  repository: vllm-project/vllm
159
164
  path: ./vllm-empty
160
165
  # Please also update this when bump matched version
161
- ref: ${{ github.event.inputs.vllm-version || 'v0.9.1' }}
166
+ ref: ${{ github.event.inputs.vllm-version || 'v0.9.2' }}
162
167
 
163
168
  - name: Install vllm-project/vllm from source
164
169
  working-directory: ./vllm-empty
@@ -177,11 +182,28 @@ jobs:
177
182
  PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
178
183
  run: |
179
184
  pip install -r requirements-dev.txt
180
- pip install -e .
185
+ pip install -v -e .
186
+
187
+ - name: Get vLLM commit hash and URL
188
+ working-directory: ./vllm-empty
189
+ run: |
190
+ VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
191
+ echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
192
+
193
+ - name: Get vLLM-Ascend commit hash and URL
194
+ working-directory: ./vllm-ascend
195
+ run: |
196
+ VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
197
+ echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
198
+
199
+ - name: Print resolved hashes
200
+ run: |
201
+ echo "vLLM : ${{ env.VLLM_COMMIT }}"
202
+ echo "vLLM-Ascend: ${{ env.VLLM_ASCEND_COMMIT }}"
181
203
 
182
204
  - name: Install lm-eval, ray, and datasets
183
205
  run: |
184
- pip install lm-eval
206
+ pip install lm-eval==0.4.8
185
207
 
186
208
  - name: Collect version info
187
209
  run: |
@@ -213,15 +235,14 @@ jobs:
213
235
  echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
214
236
  echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION }}"
215
237
 
216
- - name: Run Accuracy Test for V${{ matrix.vllm_use_version }}
238
+ - name: Run Accuracy Test
217
239
  id: report
218
240
  working-directory: ./benchmarks
219
241
  env:
220
242
  PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
221
- VLLM_USE_V1: ${{ matrix.vllm_use_version }}
222
243
  run: |
223
244
  model_base_name=$(basename ${{ matrix.model_name }})
224
- markdown_name="${model_base_name}-V${{ matrix.vllm_use_version }}"
245
+ markdown_name="${model_base_name}"
225
246
  echo "markdown_name=$markdown_name"
226
247
  echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
227
248
  mkdir -p ./accuracy
@@ -233,7 +254,9 @@ jobs:
233
254
  --cann_version "${{ env.GHA_CANN_VERSION }}" \
234
255
  --torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
235
256
  --torch_version "${{ env.GHA_TORCH_VERSION }}" \
236
- --vllm_version "${{ env.GHA_VLLM_VERSION }}"
257
+ --vllm_version "${{ env.GHA_VLLM_VERSION }}" \
258
+ --vllm_commit "${{ env.VLLM_COMMIT }}" \
259
+ --vllm_ascend_commit "${{ env.VLLM_ASCEND_COMMIT }}" \
237
260
 
238
261
  - name: Generate step summary
239
262
  if: ${{ always() }}
@@ -245,12 +268,122 @@ jobs:
245
268
  SAFE_VLLM_ASCEND_VERSION="${GHA_VLLM_ASCEND_VERSION//\//-}"
246
269
  echo "SAFE_VLLM_ASCEND_VERSION=$SAFE_VLLM_ASCEND_VERSION" >> "$GITHUB_ENV"
247
270
 
248
- - name: Upload Report for V${{ matrix.vllm_use_version }}
249
- if: ${{ github.event_name == 'workflow_dispatch' }}
271
+ - name: Check report first line for failure
272
+ id: check_report
273
+ run: |
274
+ REPORT_PATH="./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md"
275
+ echo "Scanning $REPORT_PATH for ❌ …"
276
+ if grep -q '❌' "$REPORT_PATH"; then
277
+ echo "contains_fail=true" >> $GITHUB_OUTPUT
278
+ else
279
+ echo "contains_fail=false" >> $GITHUB_OUTPUT
280
+ fi
281
+
282
+ - name: Upload Report
283
+ if: ${{ github.event_name == 'workflow_dispatch' && steps.check_report.outputs.contains_fail == 'false' }}
250
284
  uses: actions/upload-artifact@v4
251
285
  with:
252
- name: "${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}-report"
286
+ name: "report-${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
253
287
  path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
254
288
  if-no-files-found: warn
255
289
  retention-days: 90
256
290
  overwrite: true
291
+
292
+ create_pr:
293
+ runs-on: ubuntu-latest
294
+ needs: accuracy_tests
295
+ if: ${{ github.event_name == 'workflow_dispatch' }}
296
+ env:
297
+ UPSTREAM_REPO: vllm-project/vllm-ascend
298
+ steps:
299
+ - name: Checkout repository
300
+ uses: actions/checkout@v4
301
+ with:
302
+ repository: vllm-ascend-ci/vllm-ascend
303
+ token: ${{ secrets.PAT_TOKEN }}
304
+ ref: main
305
+
306
+ - name: Add upstream remote
307
+ run: |
308
+ git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
309
+ git fetch upstream
310
+ git remote -v
311
+
312
+ - name: Set Git user info dynamically
313
+ run: |
314
+ git config user.name "${{ github.actor }}"
315
+ git config user.email "${{ github.actor }}@users.noreply.github.com"
316
+
317
+ - name: Create or switch to branch
318
+ run: |
319
+ TIMESTAMP=$(date +%Y%m%d%H%M%S)
320
+ BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
321
+ echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
322
+ git checkout -B "${BRANCH_NAME}" upstream/${{ github.event.inputs.vllm-ascend-version }}
323
+
324
+ - name: Download only current run reports
325
+ uses: actions/download-artifact@v4
326
+ with:
327
+ path: ./docs/source/developer_guide/evaluation/accuracy_report
328
+ pattern: report-*
329
+ github-token: ${{ secrets.GITHUB_TOKEN }}
330
+ run-id: ${{ github.run_id }}
331
+
332
+ - name: Delete old report
333
+ run: |
334
+ find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
335
+ find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
336
+ find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
337
+
338
+ - name: Update accuracy_report/index.md
339
+ run: |
340
+ REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
341
+ INDEX_MD="$REPORT_DIR/index.md"
342
+ {
343
+ echo "# Accuracy Report"
344
+ echo ""
345
+ echo ":::{toctree}"
346
+ echo ":caption: Accuracy Report"
347
+ echo ":maxdepth: 1"
348
+
349
+ for report in "$REPORT_DIR"/*.md; do
350
+ filename="$(basename "$report" .md)"
351
+ if [ "$filename" != "index" ]; then
352
+ echo "$filename"
353
+ fi
354
+ done
355
+ echo ":::"
356
+ } > "$INDEX_MD"
357
+
358
+ - name: push accuracy report
359
+ env:
360
+ GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
361
+ run: |
362
+ git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
363
+ git commit -s -m "[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}"
364
+ git push -f origin "${{ env.BRANCH_NAME }}"
365
+
366
+ - name: Create PR in upstream via API
367
+ uses: actions/github-script@v7
368
+ with:
369
+ github-token: ${{ secrets.PAT_TOKEN }}
370
+ script: |
371
+ const pr = await github.rest.pulls.create({
372
+ owner: 'vllm-project',
373
+ repo: 'vllm-ascend',
374
+ head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
375
+ base: '${{ github.event.inputs.vllm-ascend-version }}',
376
+ title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
377
+ body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for:
378
+ ${{
379
+ github.event.inputs.models == 'all'
380
+ && 'All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)'
381
+ || github.event.inputs.models
382
+ }}
383
+
384
+ - [Workflow run][1]
385
+
386
+ [1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
387
+ });
388
+ core.info(`Created PR #${pr.data.number}`);
389
+
@@ -0,0 +1,63 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ name: format / pr body
19
+
20
+ on:
21
+ # The PR updated when PR opened and push new commits
22
+ pull_request_target:
23
+ types: [opened, synchronize]
24
+ branches:
25
+ - 'main'
26
+
27
+ permissions:
28
+ pull-requests: write
29
+
30
+ jobs:
31
+ update-description:
32
+ name: update vLLM version
33
+ runs-on: ubuntu-latest
34
+
35
+ steps:
36
+ - name: Checkout vllm-project/vllm repo
37
+ uses: actions/checkout@v4
38
+ with:
39
+ repository: vllm-project/vllm
40
+ path: ./vllm-empty
41
+
42
+ - name: Get vLLM version
43
+ working-directory: ./vllm-empty
44
+ run: |
45
+ VLLM_COMMIT=$(git rev-parse HEAD)
46
+ echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
47
+
48
+ - name: Checkout repository
49
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
50
+
51
+ - name: Set up Python
52
+ uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
53
+
54
+ - name: Get vLLM release version
55
+ run: |
56
+ VLLM_VERSION=$(python3 docs/source/conf.py | jq .ci_vllm_version | tr -d '"')
57
+ echo "VLLM_VERSION=$VLLM_VERSION" >> $GITHUB_ENV
58
+
59
+ - name: Update PR description
60
+ env:
61
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
62
+ run: |
63
+ bash .github/format_pr_body.sh "${{ github.event.number }}" "${{ env.VLLM_VERSION }}" "${{ env.VLLM_COMMIT }}"
@@ -1,4 +1,4 @@
1
- name: 'image / openEuler'
1
+ name: 'image / openEuler / 310p'
2
2
  # This is a docker build check and publish job:
3
3
  # 1. PR Triggered docker image build check
4
4
  # - is for image build check
@@ -1,4 +1,4 @@
1
- name: 'image / Ubuntu'
1
+ name: 'image / Ubuntu / 310p'
2
2
  # This is a docker build check and publish job:
3
3
  # 1. PR Triggered docker image build check
4
4
  # - is for image build check
@@ -16,7 +16,7 @@ on:
16
16
  - 'main'
17
17
  - '*-dev'
18
18
  paths:
19
- - '.github/workflows/image_310p.ubuntu.yml'
19
+ - '.github/workflows/image_310p_ubuntu.yml'
20
20
  - 'Dockerfile.310p'
21
21
  - 'vllm_ascend/**'
22
22
  - 'setup.py'
@@ -50,7 +50,7 @@ jobs:
50
50
  strategy:
51
51
  matrix:
52
52
  include:
53
- - vllm_branch: v0.9.1
53
+ - vllm_branch: v0.9.2
54
54
  vllm_ascend_branch: main
55
55
  vllm_use_v1: 1
56
56
  max-parallel: 1
@@ -69,8 +69,7 @@ jobs:
69
69
  --device /dev/devmm_svm
70
70
  --device /dev/hisi_hdc
71
71
  env:
72
- HF_ENDPOINT: https://hf-mirror.com
73
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
72
+ VLLM_USE_MODELSCOPE: True
74
73
  ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
75
74
  ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
76
75
  VLLM_USE_V1: ${{ matrix.vllm_use_v1 }}
@@ -115,6 +114,7 @@ jobs:
115
114
  env:
116
115
  PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
117
116
  run: |
117
+ pip install "transformers<=4.52.4"
118
118
  pip install -e .
119
119
  pip install -r benchmarks/requirements-bench.txt
120
120
 
@@ -145,8 +145,8 @@ jobs:
145
145
  - name: Install elastic_tool
146
146
  if: github.event_name != 'pull_request'
147
147
  run: |
148
- pip install escli-tool==0.2.2
149
-
148
+ pip install escli-tool==0.2.3
149
+
150
150
  - name: Collect pr info from vllm-project/vllm-ascend
151
151
  if: github.event_name != 'pull_request'
152
152
  run: |
@@ -176,7 +176,7 @@ jobs:
176
176
  commit_time=$(git show -s --format=%cd $commit_hash --date=iso-strict)
177
177
  commit_time_no_tz=${commit_time::19}
178
178
  pip install -e .
179
-
179
+
180
180
  echo "------------------------"
181
181
  echo "commit_id: $commit_id"
182
182
  echo "commit_title: $commit_title"
@@ -184,9 +184,12 @@ jobs:
184
184
  echo "vllm branch: ${{ matrix.vllm_branch }}"
185
185
  echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
186
186
  echo "------------------------"
187
-
187
+
188
188
  cd /github/home
189
- bash benchmarks/scripts/run-performance-benchmarks.sh
189
+ ERROR_MSG=""
190
+ if ! bash benchmarks/scripts/run-performance-benchmarks.sh; then
191
+ ERROR_MSG="Benchmark failed to run"
192
+ fi
190
193
  # send the result to es
191
194
  escli add --vllm_branch ${{ matrix.vllm_branch }} \
192
195
  --vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
@@ -194,6 +197,7 @@ jobs:
194
197
  --commit_title "$commit_title" \
195
198
  --created_at "$commit_time_no_tz" \
196
199
  --res_dir ./benchmarks/results \
200
+ --error "$ERROR_MSG" \
197
201
  --extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
198
202
  rm -rf ./benchmarks/results
199
203
  cd -