vllm-ascend 0.9.1rc1__tar.gz → 0.9.1rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/Dockerfile.buildwheel +5 -8
  2. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/dependabot.yml +0 -3
  3. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/accuracy_test.yaml +1 -3
  4. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/image_openeuler.yml +6 -15
  5. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/image_ubuntu.yml +4 -10
  6. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/nightly_benchmarks.yaml +5 -10
  7. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/release_code.yml +1 -1
  8. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/release_whl.yml +35 -6
  9. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/vllm_ascend_doctest.yaml +8 -13
  10. vllm_ascend-0.9.1rc2/.github/workflows/vllm_ascend_test.yaml +242 -0
  11. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/vllm_ascend_test_long_term.yaml +13 -14
  12. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/vllm_ascend_test_pd.yaml +6 -7
  13. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/Dockerfile +2 -3
  14. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/Dockerfile.openEuler +2 -3
  15. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/PKG-INFO +3 -3
  16. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/README.md +2 -2
  17. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/README.zh.md +2 -2
  18. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +30 -63
  19. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/csrc/kernels/pos_encoding_kernels.cpp +5 -15
  20. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/csrc/kernels/utils.h +1 -3
  21. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/requirements-test.txt +1 -1
  22. vllm_ascend-0.9.1rc2/docs/source/assets/multi_node_dp.png +0 -0
  23. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/community/contributors.md +18 -1
  24. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/community/governance.md +2 -2
  25. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/community/user_stories/llamafactory.md +1 -1
  26. {vllm_ascend-0.9.1rc1/docs/source/developer_guide → vllm_ascend-0.9.1rc2/docs/source/community}/versioning_policy.md +4 -2
  27. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/conf.py +5 -5
  28. vllm_ascend-0.9.1rc1/docs/source/developer_guide/contributing.md → vllm_ascend-0.9.1rc2/docs/source/developer_guide/contribution/index.md +44 -46
  29. vllm_ascend-0.9.1rc2/docs/source/developer_guide/contribution/testing.md +285 -0
  30. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/developer_guide/evaluation/accuracy_report/index.md +1 -1
  31. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/developer_guide/evaluation/index.md +2 -9
  32. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/developer_guide/evaluation/using_evalscope.md +3 -1
  33. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/developer_guide/evaluation/using_lm_eval.md +2 -1
  34. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/developer_guide/evaluation/using_opencompass.md +4 -1
  35. vllm_ascend-0.9.1rc2/docs/source/developer_guide/feature_guide/index.md +9 -0
  36. vllm_ascend-0.9.1rc2/docs/source/developer_guide/feature_guide/patch.md +85 -0
  37. vllm_ascend-0.9.1rc2/docs/source/developer_guide/modeling/adding_a_new_model.md +259 -0
  38. vllm_ascend-0.9.1rc2/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +3 -0
  39. vllm_ascend-0.9.1rc2/docs/source/developer_guide/modeling/index.md +10 -0
  40. vllm_ascend-0.9.1rc2/docs/source/developer_guide/performance/index.md +9 -0
  41. vllm_ascend-0.9.1rc2/docs/source/developer_guide/performance/optimization_and_tuning.md +183 -0
  42. {vllm_ascend-0.9.1rc1/docs/source/developer_guide/evaluation → vllm_ascend-0.9.1rc2/docs/source/developer_guide/performance}/performance_benchmark.md +7 -0
  43. {vllm_ascend-0.9.1rc1/docs/source/developer_guide/evaluation → vllm_ascend-0.9.1rc2/docs/source/developer_guide/performance}/profile_execute_duration.md +2 -1
  44. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/faqs.md +53 -9
  45. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/index.md +8 -8
  46. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/installation.md +23 -18
  47. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/quick_start.md +14 -0
  48. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/tutorials/index.md +1 -2
  49. vllm_ascend-0.9.1rc2/docs/source/tutorials/multi_node.md +203 -0
  50. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/tutorials/multi_npu_quantization.md +6 -3
  51. vllm_ascend-0.9.1rc2/docs/source/tutorials/multi_npu_qwen3_moe.md +108 -0
  52. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/tutorials/single_npu.md +72 -3
  53. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/tutorials/single_npu_multimodal.md +4 -3
  54. {vllm_ascend-0.9.1rc1/docs/source/user_guide → vllm_ascend-0.9.1rc2/docs/source/user_guide/configuration}/additional_config.md +10 -11
  55. {vllm_ascend-0.9.1rc1/docs/source/user_guide → vllm_ascend-0.9.1rc2/docs/source/user_guide/configuration}/env_vars.md +1 -1
  56. vllm_ascend-0.9.1rc2/docs/source/user_guide/configuration/index.md +10 -0
  57. {vllm_ascend-0.9.1rc1/docs/source/user_guide → vllm_ascend-0.9.1rc2/docs/source/user_guide/feature_guide}/graph_mode.md +4 -9
  58. vllm_ascend-0.9.1rc2/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  59. vllm_ascend-0.9.1rc2/docs/source/user_guide/feature_guide/index.md +13 -0
  60. vllm_ascend-0.9.1rc2/docs/source/user_guide/feature_guide/lora.md +8 -0
  61. {vllm_ascend-0.9.1rc1/docs/source/user_guide → vllm_ascend-0.9.1rc2/docs/source/user_guide/feature_guide}/quantization.md +21 -2
  62. vllm_ascend-0.9.1rc2/docs/source/user_guide/feature_guide/sleep_mode.md +114 -0
  63. vllm_ascend-0.9.1rc2/docs/source/user_guide/feature_guide/structured_output.md +163 -0
  64. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/user_guide/release_notes.md +31 -7
  65. vllm_ascend-0.9.1rc2/docs/source/user_guide/support_matrix/index.md +10 -0
  66. vllm_ascend-0.9.1rc2/docs/source/user_guide/support_matrix/supported_features.md +51 -0
  67. {vllm_ascend-0.9.1rc1/docs/source/user_guide → vllm_ascend-0.9.1rc2/docs/source/user_guide/support_matrix}/supported_models.md +3 -2
  68. vllm_ascend-0.9.1rc2/examples/disaggregate_prefill_v1/README.md +246 -0
  69. vllm_ascend-0.9.1rc2/examples/disaggregate_prefill_v1/gen_ranktable.py +120 -0
  70. vllm_ascend-0.9.1rc2/examples/disaggregate_prefill_v1/gen_ranktable.sh +79 -0
  71. vllm_ascend-0.9.1rc2/examples/disaggregate_prefill_v1/load_balance_proxy_server_example.py +435 -0
  72. vllm_ascend-0.9.1rc2/examples/disaggregate_prefill_v1/run_server.sh +32 -0
  73. vllm_ascend-0.9.1rc2/examples/dp_offline/data_parallel.py +226 -0
  74. vllm_ascend-0.9.1rc2/examples/dp_offline/run_dp.sh +28 -0
  75. vllm_ascend-0.9.1rc2/examples/eplb_generate_map.py +77 -0
  76. vllm_ascend-0.9.1rc2/examples/external_online_dp/launch_dp_program.py +34 -0
  77. vllm_ascend-0.9.1rc2/examples/external_online_dp/run_dp_template.sh +51 -0
  78. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/offline_dualbatch_overlap_npu.py +1 -1
  79. vllm_ascend-0.9.1rc2/examples/run_dp_server.sh +33 -0
  80. vllm_ascend-0.9.1rc1/examples/run_dp_attention_etp16.sh → vllm_ascend-0.9.1rc2/examples/run_dp_with_cached_graph_etp16.sh +18 -16
  81. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/format.sh +1 -1
  82. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/pyproject.toml +3 -1
  83. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/requirements-dev.txt +1 -2
  84. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/requirements-lint.txt +1 -0
  85. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/requirements.txt +6 -3
  86. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/e2e/common.sh +1 -24
  87. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/e2e/doctests/001-quickstart-test.sh +2 -0
  88. vllm_ascend-0.9.1rc2/tests/e2e/pd_disaggreate/run_edge_case_test.sh +141 -0
  89. vllm_ascend-0.9.1rc2/tests/e2e/pd_disaggreate/test_edge_cases.py +81 -0
  90. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/e2e/run_doctests.sh +0 -6
  91. vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/e2e/test_eagle_correctness.py +344 -0
  92. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/e2e/test_medusa_correctness.py +2 -2
  93. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/e2e/test_mlp_correctness.py +2 -2
  94. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/e2e/test_ngram_correctness.py +2 -2
  95. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/test_dynamic_spec_decode.py +2 -2
  96. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/test_multi_step_worker.py +1 -1
  97. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/test_ngram_worker.py +1 -1
  98. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/test_spec_decode_worker.py +4 -4
  99. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v1}/test_v1_mtp_correctness.py +66 -1
  100. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v1}/test_v1_spec_decode.py +6 -12
  101. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/multicard/test_data_parallel.py +1 -1
  102. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/multicard/test_ilama_lora_tp2.py +2 -2
  103. vllm_ascend-0.9.1rc2/tests/multicard/test_model_qwen3_w4a8.py +65 -0
  104. vllm_ascend-0.9.1rc2/tests/multicard/test_multimodal_context_parallel.py +82 -0
  105. vllm_ascend-0.9.1rc2/tests/multicard/test_offline_inference_distributed.py +230 -0
  106. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/multicard/test_torchair_graph_mode.py +8 -5
  107. vllm_ascend-0.9.1rc2/tests/multicard/test_w4a8_deepseek.py +67 -0
  108. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/core/test_ascend_scheduler.py +26 -50
  109. vllm_ascend-0.9.1rc2/tests/singlecard/ops/test_fused_moe.py +196 -0
  110. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/ops/test_rotary_embedding.py +67 -0
  111. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/sample/test_rejection_sampler.py +20 -11
  112. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_aclgraph.py +25 -2
  113. vllm_ascend-0.9.1rc2/tests/singlecard/test_ascend_config.py +233 -0
  114. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_offline_inference.py +4 -0
  115. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_scheduler.py +4 -20
  116. vllm_ascend-0.9.1rc2/tests/ut/kv_connector/test_llmdatadist_connector.py +42 -0
  117. vllm_ascend-0.9.1rc2/tests/ut/kv_connector/test_remote_decode_lifecycle.py +123 -0
  118. vllm_ascend-0.9.1rc2/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +242 -0
  119. vllm_ascend-0.9.1rc2/tests/ut/kv_connector/utils.py +194 -0
  120. vllm_ascend-0.9.1rc2/tests/ut/patch/worker/patch_common/test_patch_sampler.py +44 -0
  121. vllm_ascend-0.9.1rc2/tests/ut/test_distributed_tensor_parallel.py +139 -0
  122. vllm_ascend-0.9.1rc2/tests/ut/test_token_dispatcher.py +69 -0
  123. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/__init__.py +7 -0
  124. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/_version.py +2 -2
  125. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/ascend_config.py +89 -15
  126. vllm_ascend-0.9.1rc2/vllm_ascend/ascend_forward_context.py +137 -0
  127. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/attention/attention.py +4 -40
  128. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/attention/attention_v1.py +121 -82
  129. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/attention/mla_v1.py +291 -215
  130. vllm_ascend-0.9.1rc2/vllm_ascend/attention/utils.py +23 -0
  131. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/compilation/piecewise_backend.py +57 -0
  132. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/core/scheduler.py +50 -21
  133. vllm_ascend-0.9.1rc2/vllm_ascend/cpu_binding.py +329 -0
  134. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/device_allocator/camem.py +1 -1
  135. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/distributed/__init__.py +5 -0
  136. vllm_ascend-0.9.1rc2/vllm_ascend/distributed/context_parallel_utils.py +110 -0
  137. vllm_ascend-0.9.1rc2/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +923 -0
  138. vllm_ascend-0.9.1rc2/vllm_ascend/distributed/parallel_state.py +49 -0
  139. vllm_ascend-0.9.1rc2/vllm_ascend/distributed/tensor_parallel.py +248 -0
  140. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/envs.py +56 -3
  141. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/adaptor/abstract_adaptor.py +44 -0
  142. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/adaptor/vllm_adaptor.py +212 -0
  143. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +136 -0
  144. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/eplb_utils.py +75 -0
  145. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/eplb_worker.py +442 -0
  146. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy/policy_abstract.py +41 -0
  147. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +388 -0
  148. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +770 -0
  149. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy/policy_factory.py +25 -0
  150. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy/policy_random.py +29 -0
  151. vllm_ascend-0.9.1rc2/vllm_ascend/eplb/eplb_updator.py +222 -0
  152. vllm_ascend-0.9.1rc2/vllm_ascend/lora/__init__.py +0 -0
  153. vllm_ascend-0.9.1rc2/vllm_ascend/lora/punica_wrapper/__init__.py +0 -0
  154. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/models/__init__.py +21 -8
  155. vllm_ascend-0.9.1rc2/vllm_ascend/models/deepseek_dbo.py +1085 -0
  156. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/models/deepseek_mtp.py +26 -6
  157. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/models/deepseek_v2.py +194 -52
  158. vllm_ascend-0.9.1rc2/vllm_ascend/models/qwen2.py +372 -0
  159. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/models/qwen2_5_vl.py +146 -5
  160. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/models/qwen2_5_vl_without_padding.py +98 -0
  161. vllm_ascend-0.9.1rc2/vllm_ascend/models/qwen3.py +472 -0
  162. vllm_ascend-0.9.1rc2/vllm_ascend/models/qwen3_dbo.py +552 -0
  163. vllm_ascend-0.9.1rc2/vllm_ascend/models/qwen3_moe.py +268 -0
  164. vllm_ascend-0.9.1rc2/vllm_ascend/multistream/__init__.py +0 -0
  165. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/multistream/base.py +2 -0
  166. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/multistream/metadata.py +2 -0
  167. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/multistream/ms_split.py +136 -12
  168. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/ops/activation.py +1 -6
  169. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/ops/attention.py +19 -15
  170. vllm_ascend-0.9.1rc2/vllm_ascend/ops/comm_utils.py +127 -0
  171. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/ops/common_fused_moe.py +24 -17
  172. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/ops/fused_moe.py +531 -289
  173. vllm_ascend-0.9.1rc2/vllm_ascend/ops/layernorm.py +77 -0
  174. vllm_ascend-0.9.1rc2/vllm_ascend/ops/moe_dispatcher/__init__.py +0 -0
  175. vllm_ascend-0.9.1rc2/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py +578 -0
  176. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/ops/rotary_embedding.py +65 -39
  177. vllm_ascend-0.9.1rc2/vllm_ascend/ops/sequence_parallel.py +119 -0
  178. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/ops/vocab_parallel_embedding.py +14 -7
  179. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/__init__.py +24 -15
  180. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/__init__.py +25 -0
  181. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/patch_cache_manager.py +13 -0
  182. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/patch_configs.py +77 -0
  183. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/patch_core.py +132 -0
  184. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/patch_core_client.py +26 -0
  185. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_0_9_1/patch_decorator.py +154 -0
  186. vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_common/patch_distributed.py +83 -0
  187. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py +0 -16
  188. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/worker/patch_common/patch_sampler.py +8 -3
  189. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py +7 -5
  190. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/platform.py +16 -24
  191. vllm_ascend-0.9.1rc2/vllm_ascend/quantization/__init__.py +0 -0
  192. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/quantization/func_wrapper.py +32 -0
  193. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/quantization/quant_config.py +40 -0
  194. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/quantization/quantizer.py +21 -9
  195. vllm_ascend-0.9.1rc2/vllm_ascend/quantization/w4a8_dynamic.py +393 -0
  196. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/quantization/w8a8.py +20 -6
  197. vllm_ascend-0.9.1rc2/vllm_ascend/quantization/w8a8_dynamic.py +1055 -0
  198. vllm_ascend-0.9.1rc2/vllm_ascend/sample/__init__.py +0 -0
  199. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/sample/rejection_sampler.py +101 -50
  200. vllm_ascend-0.9.1rc2/vllm_ascend/soc_info.py +14 -0
  201. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/utils.py +170 -123
  202. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/worker/draft_model_runner.py +10 -9
  203. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/worker/model_runner.py +7 -3
  204. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/worker/model_runner_v1.py +589 -472
  205. vllm_ascend-0.9.1rc2/vllm_ascend/worker/mtp_proposer_v1.py +437 -0
  206. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/worker/pooling_model_runner.py +3 -3
  207. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/worker/worker.py +9 -18
  208. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/worker/worker_v1.py +51 -22
  209. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend.egg-info/PKG-INFO +3 -3
  210. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend.egg-info/SOURCES.txt +133 -73
  211. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend.egg-info/requires.txt +4 -1
  212. vllm_ascend-0.9.1rc1/.github/workflows/doc_codespell.yaml +0 -33
  213. vllm_ascend-0.9.1rc1/.github/workflows/image_310p_openeuler.yml +0 -114
  214. vllm_ascend-0.9.1rc1/.github/workflows/image_310p_ubuntu.yml +0 -110
  215. vllm_ascend-0.9.1rc1/.github/workflows/vllm_ascend_test.yaml +0 -379
  216. vllm_ascend-0.9.1rc1/Dockerfile.310p +0 -61
  217. vllm_ascend-0.9.1rc1/Dockerfile.310p.openEuler +0 -58
  218. vllm_ascend-0.9.1rc1/codecov.yml +0 -30
  219. vllm_ascend-0.9.1rc1/docs/source/tutorials/multi_node.md +0 -195
  220. vllm_ascend-0.9.1rc1/docs/source/tutorials/multi_npu_moge.md +0 -117
  221. vllm_ascend-0.9.1rc1/docs/source/tutorials/single_node_300i.md +0 -304
  222. vllm_ascend-0.9.1rc1/docs/source/user_guide/release.template.md +0 -13
  223. vllm_ascend-0.9.1rc1/docs/source/user_guide/suppoted_features.md +0 -49
  224. vllm_ascend-0.9.1rc1/examples/dp_offline/data_parallel.py +0 -85
  225. vllm_ascend-0.9.1rc1/examples/dp_offline/run_dp.sh +0 -19
  226. vllm_ascend-0.9.1rc1/examples/run_dp_server.sh +0 -30
  227. vllm_ascend-0.9.1rc1/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -42
  228. vllm_ascend-0.9.1rc1/tests/e2e/multicard/test_offline_inference_distributed.py +0 -114
  229. vllm_ascend-0.9.1rc1/tests/e2e/singlecard/ops/test_fused_moe.py +0 -100
  230. vllm_ascend-0.9.1rc1/tests/ut/fake_weight/config.json +0 -28
  231. vllm_ascend-0.9.1rc1/tests/ut/test_ascend_config.py +0 -244
  232. vllm_ascend-0.9.1rc1/tests/ut/worker/test_worker_v1.py +0 -1
  233. vllm_ascend-0.9.1rc1/vllm_ascend/distributed/parallel_state.py +0 -77
  234. vllm_ascend-0.9.1rc1/vllm_ascend/models/deepseek_dbo.py +0 -977
  235. vllm_ascend-0.9.1rc1/vllm_ascend/models/pangu_moe.py +0 -639
  236. vllm_ascend-0.9.1rc1/vllm_ascend/models/qwen3_moe.py +0 -35
  237. vllm_ascend-0.9.1rc1/vllm_ascend/ops/layernorm.py +0 -49
  238. vllm_ascend-0.9.1rc1/vllm_ascend/patch/platform/patch_common/patch_distributed.py +0 -153
  239. vllm_ascend-0.9.1rc1/vllm_ascend/patch/worker/patch_0_9_1/__init__.py +0 -16
  240. vllm_ascend-0.9.1rc1/vllm_ascend/quantization/w8a8_dynamic.py +0 -723
  241. vllm_ascend-0.9.1rc1/vllm_ascend/worker/eagle_proposer_v1.py +0 -429
  242. vllm_ascend-0.9.1rc1/vllm_ascend/worker/mtp_proposer_v1.py +0 -225
  243. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  244. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  245. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  246. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  247. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  248. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  249. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  250. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  251. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  252. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  253. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  254. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  255. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/actionlint.yaml +0 -0
  256. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/labeler.yml +0 -0
  257. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/accuracy_report.yaml +0 -0
  258. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/label_merge_conflict.yml +0 -0
  259. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/labeler.yml +0 -0
  260. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/matchers/actionlint.json +0 -0
  261. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/matchers/mypy.json +0 -0
  262. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/matchers/ruff.json +0 -0
  263. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.github/workflows/shellcheck.yml +0 -0
  264. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.gitignore +0 -0
  265. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/.readthedocs.yaml +0 -0
  266. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/CMakeLists.txt +0 -0
  267. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/CODE_OF_CONDUCT.md +0 -0
  268. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/DCO +0 -0
  269. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/LICENSE +0 -0
  270. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/README.md +0 -0
  271. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
  272. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/requirements-bench.txt +0 -0
  273. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  274. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/scripts/patch_benchmark_dataset.py +0 -0
  275. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/scripts/perf_result_template.md +0 -0
  276. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
  277. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/scripts/run_accuracy.py +0 -0
  278. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/tests/latency-tests.json +0 -0
  279. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/tests/serving-tests.json +0 -0
  280. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/benchmarks/tests/throughput-tests.json +0 -0
  281. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/cmake/utils.cmake +0 -0
  282. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/collect_env.py +0 -0
  283. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/csrc/camem_allocator.cpp +0 -0
  284. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/csrc/kernels/advance_step.cpp +0 -0
  285. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/csrc/kernels/types.h +0 -0
  286. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/csrc/ops.h +0 -0
  287. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/csrc/torch_binding.cpp +0 -0
  288. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/csrc/utils.h +0 -0
  289. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/Makefile +0 -0
  290. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/README.md +0 -0
  291. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/requirements-docs.txt +0 -0
  292. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/_templates/sections/header.html +0 -0
  293. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/community/user_stories/index.md +0 -0
  294. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  295. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  296. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/docs/source/tutorials/multi_npu.md +0 -0
  297. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/disaggregated_prefill/disaggregated_prefill_offline.py +0 -0
  298. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/disaggregated_prefill/dp_proxy.py +0 -0
  299. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/disaggregated_prefill/find_device_ips.py +0 -0
  300. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py +0 -0
  301. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/disaggregated_prefill/run_decode_server.sh +0 -0
  302. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/disaggregated_prefill/run_prefill_server.sh +0 -0
  303. /vllm_ascend-0.9.1rc1/tests/__init__.py → /vllm_ascend-0.9.1rc2/examples/external_online_dp/README.md +0 -0
  304. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/offline_disaggregated_prefill_npu.py +0 -0
  305. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/offline_distributed_inference_npu.py +0 -0
  306. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/offline_inference_audio_language.py +0 -0
  307. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/offline_inference_npu.py +0 -0
  308. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/offline_inference_npu_v1.py +0 -0
  309. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/offline_multi_step_custom_ops.py +0 -0
  310. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/prompt_embedding_inference.py +0 -0
  311. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/examples/run_dp_attention_etp16_benmark.sh +0 -0
  312. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/mypy.ini +0 -0
  313. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/packages.txt +0 -0
  314. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/pytest.ini +0 -0
  315. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/setup.cfg +0 -0
  316. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/setup.py +0 -0
  317. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode/e2e → vllm_ascend-0.9.1rc2/tests}/__init__.py +0 -0
  318. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/conftest.py +0 -0
  319. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
  320. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  321. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/e2e/run_disagg_pd.sh +0 -0
  322. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/__init__.py +0 -0
  323. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/conftest.py +0 -0
  324. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0/e2e}/__init__.py +0 -0
  325. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/e2e/conftest.py +0 -0
  326. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/e2e/test_mtp_correctness.py +0 -0
  327. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/test_utils.py +0 -0
  328. {vllm_ascend-0.9.1rc1/tests/e2e/long_term/spec_decode → vllm_ascend-0.9.1rc2/tests/long_term/spec_decode_v0}/utils.py +0 -0
  329. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/long_term/test_accuracy.py +0 -0
  330. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/long_term/test_deepseek_v2_lite_tp2_accuracy.py +0 -0
  331. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/model_utils.py +0 -0
  332. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/multicard/test_dynamic_npugraph_batchsize.py +0 -0
  333. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/multicard/test_pyhccl_distributed.py +0 -0
  334. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard → vllm_ascend-0.9.1rc2/tests}/ops/test_vocabparallelembedding.py +0 -0
  335. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard/compile → vllm_ascend-0.9.1rc2/tests/singlecard}/__init__.py +0 -0
  336. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard/core → vllm_ascend-0.9.1rc2/tests/singlecard/compile}/__init__.py +0 -0
  337. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/compile/test_simple.py +0 -0
  338. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard/ops → vllm_ascend-0.9.1rc2/tests/singlecard/core}/__init__.py +0 -0
  339. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/core/test_ascend_scheduler_e2e.py +0 -0
  340. {vllm_ascend-0.9.1rc1/tests/e2e/singlecard/sample → vllm_ascend-0.9.1rc2/tests/singlecard/ops}/__init__.py +0 -0
  341. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/ops/test_multi_step.py +0 -0
  342. {vllm_ascend-0.9.1rc1/vllm_ascend/attention → vllm_ascend-0.9.1rc2/tests/singlecard/sample}/__init__.py +0 -0
  343. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_camem.py +0 -0
  344. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_chunked.py +0 -0
  345. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_guided_decoding.py +0 -0
  346. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_ilama_lora.py +0 -0
  347. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_profile_execute_duration.py +0 -0
  348. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_prompt_embedding.py +0 -0
  349. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_pyhccl.py +0 -0
  350. {vllm_ascend-0.9.1rc1/tests/e2e → vllm_ascend-0.9.1rc2/tests}/singlecard/test_sampler.py +0 -0
  351. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/ut/ops/test_expert_load_balancer.py +0 -0
  352. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tests/utils.py +0 -0
  353. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tools/actionlint.sh +0 -0
  354. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tools/check_repo.sh +0 -0
  355. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tools/mypy.sh +0 -0
  356. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tools/png-lint.sh +0 -0
  357. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tools/shellcheck.sh +0 -0
  358. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/tools/sphinx-lint.sh +0 -0
  359. {vllm_ascend-0.9.1rc1/vllm_ascend/compilation → vllm_ascend-0.9.1rc2/vllm_ascend/attention}/__init__.py +0 -0
  360. {vllm_ascend-0.9.1rc1/vllm_ascend/core → vllm_ascend-0.9.1rc2/vllm_ascend/compilation}/__init__.py +0 -0
  361. {vllm_ascend-0.9.1rc1/vllm_ascend/device_allocator → vllm_ascend-0.9.1rc2/vllm_ascend/core}/__init__.py +0 -0
  362. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/core/schedule_config.py +0 -0
  363. {vllm_ascend-0.9.1rc1/vllm_ascend/distributed/device_communicators → vllm_ascend-0.9.1rc2/vllm_ascend/device_allocator}/__init__.py +0 -0
  364. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/distributed/communicator.py +0 -0
  365. {vllm_ascend-0.9.1rc1/vllm_ascend/distributed/kv_transfer → vllm_ascend-0.9.1rc2/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
  366. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  367. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  368. {vllm_ascend-0.9.1rc1/vllm_ascend/lora → vllm_ascend-0.9.1rc2/vllm_ascend/distributed/kv_transfer}/__init__.py +0 -0
  369. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/distributed/kv_transfer/simple_buffer.py +0 -0
  370. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/distributed/kv_transfer/simple_connector.py +0 -0
  371. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/distributed/kv_transfer/simple_pipe.py +0 -0
  372. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/distributed/kv_transfer/utils.py +0 -0
  373. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/distributed/llmdatadist_connector.py +0 -0
  374. {vllm_ascend-0.9.1rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.9.1rc2/vllm_ascend/eplb}/__init__.py +0 -0
  375. {vllm_ascend-0.9.1rc1/vllm_ascend/multistream → vllm_ascend-0.9.1rc2/vllm_ascend/eplb/adaptor}/__init__.py +0 -0
  376. {vllm_ascend-0.9.1rc1/vllm_ascend/quantization → vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core}/__init__.py +0 -0
  377. {vllm_ascend-0.9.1rc1/vllm_ascend/sample → vllm_ascend-0.9.1rc2/vllm_ascend/eplb/core/policy}/__init__.py +0 -0
  378. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/lora/punica_wrapper/punica_npu.py +0 -0
  379. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/models/qwen2_vl.py +0 -0
  380. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/multistream/context.py +0 -0
  381. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/multistream/decorator.py +0 -0
  382. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/multistream/layers.py +0 -0
  383. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/ops/__init__.py +0 -0
  384. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/ops/cache.py +0 -0
  385. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  386. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/platform/__init__.py +0 -0
  387. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/platform/patch_common/__init__.py +0 -0
  388. {vllm_ascend-0.9.1rc1/vllm_ascend/patch/platform/patch_0_9_1 → vllm_ascend-0.9.1rc2/vllm_ascend/patch/platform/patch_main}/__init__.py +0 -0
  389. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/worker/__init__.py +0 -0
  390. {vllm_ascend-0.9.1rc1/vllm_ascend/patch/platform/patch_main → vllm_ascend-0.9.1rc2/vllm_ascend/patch/worker/patch_0_9_1}/__init__.py +0 -0
  391. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/worker/patch_common/__init__.py +0 -0
  392. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
  393. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/worker/patch_common/patch_eagle.py +0 -0
  394. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
  395. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/worker/patch_common/patch_utils.py +0 -0
  396. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
  397. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/worker/__init__.py +0 -0
  398. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/worker/cache_engine.py +0 -0
  399. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/worker/multi_step_runner.py +0 -0
  400. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend/worker/multi_step_worker.py +0 -0
  401. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  402. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend.egg-info/entry_points.txt +0 -0
  403. {vllm_ascend-0.9.1rc1 → vllm_ascend-0.9.1rc2}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -14,18 +14,17 @@
14
14
  # limitations under the License.
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
- ARG PY_VERSION=3.10
18
- FROM quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py${PY_VERSION}
17
+ ARG PY_VERSION=3.11
18
+ FROM quay.io/ascend/manylinux:8.0.0-910b-manylinux_2_28-py${PY_VERSION}
19
19
 
20
20
  ARG COMPILE_CUSTOM_KERNELS=1
21
21
 
22
22
  # Define environments
23
23
  ENV DEBIAN_FRONTEND=noninteractive
24
24
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
25
- RUN apt-get update -y && \
26
- apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
27
- rm -rf /var/cache/apt/* && \
28
- rm -rf /var/lib/apt/lists/*
25
+ RUN yum update -y && \
26
+ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
27
+ rm -rf /var/cache/yum
29
28
 
30
29
  WORKDIR /workspace
31
30
 
@@ -41,8 +40,6 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
41
40
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
42
41
  cd vllm-ascend && \
43
42
  python3 setup.py bdist_wheel && \
44
- ls -l dist && \
45
- for f in dist/*.whl; do mv "$f" "$(echo "$f" | sed -e 's/-linux_x86_64\.whl$/-manylinux1_x86_64.whl/' -e 's/-linux_aarch64\.whl$/-manylinux2014_aarch64.whl/')"; done && \
46
43
  ls -l dist
47
44
 
48
45
  CMD ["/bin/bash"]
@@ -2,9 +2,6 @@ version: 2
2
2
  updates:
3
3
  - package-ecosystem: "github-actions"
4
4
  directory: "/"
5
- schedule:
6
- # Check for updates to GitHub Actions every week
7
- interval: "weekly"
8
5
  open-pull-requests-limit: 2
9
6
  reviewers:
10
7
  - "Yikun"
@@ -117,7 +117,7 @@ jobs:
117
117
  fail-fast: false
118
118
  name: ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
119
119
  container:
120
- image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
120
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
121
121
  env:
122
122
  HF_ENDPOINT: https://hf-mirror.com
123
123
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -173,8 +173,6 @@ jobs:
173
173
 
174
174
  - name: Install vllm-project/vllm-ascend
175
175
  working-directory: ./vllm-ascend
176
- env:
177
- PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
178
176
  run: |
179
177
  pip install -r requirements-dev.txt
180
178
  pip install -e .
@@ -1,4 +1,4 @@
1
- name: 'image / openEuler'
1
+ name: 'image'
2
2
  # This is a docker build check and publish job:
3
3
  # 1. PR Triggered docker image build check
4
4
  # - is for image build check
@@ -39,13 +39,9 @@ on:
39
39
 
40
40
  jobs:
41
41
  build:
42
- name: vllm-ascend image build
43
- runs-on: >-
44
- ${{
45
- github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
46
- 'ubuntu-latest' ||
47
- 'ubuntu-24.04-arm'
48
- }}
42
+ name: vllm-ascend openEuler image
43
+ runs-on: ubuntu-latest
44
+
49
45
  steps:
50
46
  - uses: actions/checkout@v4
51
47
 
@@ -94,15 +90,10 @@ jobs:
94
90
  username: ${{ vars.QUAY_USERNAME }}
95
91
  password: ${{ secrets.QUAY_PASSWORD }}
96
92
 
97
- - name: Build and push 910b
93
+ - name: Build and push
98
94
  uses: docker/build-push-action@v6
99
95
  with:
100
- platforms: >-
101
- ${{
102
- github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
103
- 'linux/amd64,linux/arm64' ||
104
- 'linux/arm64'
105
- }}
96
+ platforms: linux/amd64,linux/arm64
106
97
  # use the current repo path as the build context, ensure .git is contained
107
98
  context: .
108
99
  # only trigger when tag, branch/main push
@@ -1,4 +1,4 @@
1
- name: 'image / Ubuntu'
1
+ name: 'image'
2
2
  # This is a docker build check and publish job:
3
3
  # 1. PR Triggered docker image build check
4
4
  # - is for image build check
@@ -39,7 +39,7 @@ on:
39
39
  jobs:
40
40
 
41
41
  build:
42
- name: vllm-ascend image build
42
+ name: vllm-ascend Ubuntu image
43
43
  runs-on: ubuntu-latest
44
44
 
45
45
  steps:
@@ -90,18 +90,12 @@ jobs:
90
90
  username: ${{ vars.QUAY_USERNAME }}
91
91
  password: ${{ secrets.QUAY_PASSWORD }}
92
92
 
93
- - name: Build and push 910b
93
+ - name: Build and push
94
94
  uses: docker/build-push-action@v6
95
95
  with:
96
- platforms: >-
97
- ${{
98
- github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
99
- 'linux/amd64,linux/arm64' ||
100
- 'linux/amd64'
101
- }}
96
+ platforms: linux/amd64,linux/arm64
102
97
  # use the current repo path as the build context, ensure .git is contained
103
98
  context: .
104
- file: Dockerfile
105
99
  # only trigger when tag, branch/main push
106
100
  push: ${{ github.event_name == 'push' && github.repository_owner == 'vllm-project' }}
107
101
  labels: ${{ steps.meta.outputs.labels }}
@@ -18,11 +18,7 @@
18
18
  name: 'Benchmarks / Performance'
19
19
  # This workflow runs nightly benchmarks for vllm-ascend.
20
20
 
21
- on:
22
- schedule:
23
- # Run at 02:00 everyday
24
- - cron: '00 18 * * *'
25
-
21
+ on:
26
22
  workflow_dispatch:
27
23
  # Allow manual triggering of the workflow
28
24
 
@@ -51,11 +47,14 @@ jobs:
51
47
  matrix:
52
48
  include:
53
49
  - vllm_branch: v0.9.1
50
+ vllm_ascend_branch: main
51
+ vllm_use_v1: 0
52
+ - vllm_branch: v0.9.0
54
53
  vllm_ascend_branch: main
55
54
  vllm_use_v1: 1
56
55
  max-parallel: 1
57
56
  container:
58
- image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
57
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
59
58
  volumes:
60
59
  - /usr/local/dcmi:/usr/local/dcmi
61
60
  - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
@@ -112,8 +111,6 @@ jobs:
112
111
  VLLM_TARGET_DEVICE=empty pip install -e .
113
112
 
114
113
  - name: Install vllm-project/vllm-ascend
115
- env:
116
- PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
117
114
  run: |
118
115
  pip install -e .
119
116
  pip install -r benchmarks/requirements-bench.txt
@@ -164,8 +161,6 @@ jobs:
164
161
  cp -r benchmarks/* /github/home/benchmarks/
165
162
 
166
163
  - name: Run benchmark iteration
167
- env:
168
- PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
169
164
  if: github.event_name != 'pull_request'
170
165
  run: |
171
166
  while IFS= read -r line || [[ -n "$line" ]]; do
@@ -53,7 +53,7 @@ jobs:
53
53
  runs-on: ubuntu-latest
54
54
  strategy:
55
55
  matrix:
56
- python-version: ["3.10"]
56
+ python-version: ["3.11"]
57
57
  steps:
58
58
  - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
59
59
 
@@ -71,16 +71,11 @@ jobs:
71
71
  --build-arg PY_VERSION=${{ matrix.python-version }} \
72
72
  -t wheel:v1 .
73
73
  docker run --rm \
74
+ -u $(id -u):$(id -g) \
74
75
  -v $(pwd):/outpwd \
75
76
  wheel:v1 \
76
77
  bash -c "cp -r /workspace/vllm-ascend/dist /outpwd"
77
78
  ls dist
78
-
79
- - name: Archive wheel
80
- uses: actions/upload-artifact@v4
81
- with:
82
- name: vllm-ascend-${{ matrix.os }}-py${{ matrix.python-version }}-wheel
83
- path: dist/*
84
79
 
85
80
  - name: Set up Python ${{ matrix.python-version }}
86
81
  if: startsWith(github.ref, 'refs/tags/')
@@ -88,6 +83,40 @@ jobs:
88
83
  with:
89
84
  python-version: ${{ matrix.python-version }}
90
85
 
86
+ - name: Repair wheels with auditwheel
87
+ run: |
88
+ python3 -m pip install auditwheel
89
+ python3 -m pip install patchelf
90
+ mkdir -p dist/repaired
91
+ for whl in dist/*.whl; do
92
+ auditwheel repair "$whl" -w dist/repaired/ \
93
+ --exclude libplatform.so \
94
+ --exclude libregister.so \
95
+ --exclude libge_common_base.so \
96
+ --exclude libc10.so \
97
+ --exclude libc_sec.so \
98
+ --exclude "libascend*.so" \
99
+ --exclude "libtorch*.so"
100
+ done
101
+ rm -f dist/*.whl
102
+ mv dist/repaired/*.whl dist/
103
+ rmdir dist/repaired
104
+ ls dist
105
+
106
+ - name: Verify automatic platform tags
107
+ run: |
108
+ cd dist
109
+ for wheel in *.whl; do
110
+ echo "verification file: $wheel"
111
+ auditwheel show "$wheel"
112
+ done
113
+
114
+ - name: Archive wheel
115
+ uses: actions/upload-artifact@v4
116
+ with:
117
+ name: vllm-ascend-${{ matrix.os }}-py${{ matrix.python-version }}-wheel
118
+ path: dist/*
119
+
91
120
  - name: Release
92
121
  if: startsWith(github.ref, 'refs/tags/')
93
122
  run: |
@@ -29,9 +29,6 @@ on:
29
29
  - 'tests/e2e/doctests/**'
30
30
  - 'tests/e2e/common.sh'
31
31
  - 'tests/e2e/run_doctests.sh'
32
- schedule:
33
- # Runs every 4 hours
34
- - cron: '0 */4 * * *'
35
32
 
36
33
  # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
37
34
  # declared as "shell: bash -el {0}" on steps that need to be properly activated.
@@ -46,7 +43,7 @@ jobs:
46
43
  # Each version should be tested
47
44
  fail-fast: false
48
45
  matrix:
49
- vllm_verison: [v0.9.1-dev, v0.9.1-dev-openeuler, main, main-openeuler]
46
+ vllm_verison: [main, v0.7.3-dev, main-openeuler, v0.7.3-dev-openeuler]
50
47
  name: vLLM Ascend test
51
48
  runs-on: linux-arm64-npu-1
52
49
  container:
@@ -70,13 +67,13 @@ jobs:
70
67
  run: |
71
68
  sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
72
69
  apt-get update -y
73
- apt install -y gcc g++ libnuma-dev git curl jq
70
+ apt install git curl -y
74
71
 
75
72
  - name: Config OS mirrors - openEuler
76
73
  if: ${{ endsWith(matrix.vllm_verison, '-openeuler') }}
77
74
  run: |
78
75
  yum update -y
79
- yum install -y gcc g++ numactl-devel git curl jq
76
+ yum install git curl -y
80
77
 
81
78
  - name: Config pip mirrors
82
79
  run: |
@@ -88,13 +85,11 @@ jobs:
88
85
  - name: Run vllm-ascend/tests/e2e/run_doctests.sh
89
86
  run: |
90
87
  # PWD: /__w/vllm-ascend/vllm-ascend
91
- echo "Replacing /vllm-workspace/vllm-ascend/tests/e2e ..."
92
- rm -rf /vllm-workspace/vllm-ascend/tests/e2e
93
- mkdir -p /vllm-workspace/vllm-ascend/tests
94
- cp -r tests/e2e /vllm-workspace/vllm-ascend/tests/
95
-
96
- # TODO(yikun): Remove this after conf.py merged
97
- cp docs/source/conf.py /vllm-workspace/vllm-ascend/docs/source/
88
+ # Address old branch like v0.7.3:
89
+ if [ ! -d /vllm-workspace/vllm-ascend/tests/e2e ]; then
90
+ echo "Warning: the doctest path doesn't exists, copy now"
91
+ cp -r tests/e2e /vllm-workspace/vllm-ascend/tests/
92
+ fi
98
93
 
99
94
  # Simulate container to enter directory
100
95
  cd /workspace
@@ -0,0 +1,242 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ name: 'test'
19
+
20
+ on:
21
+ pull_request:
22
+ branches:
23
+ - 'main'
24
+ - '*-dev'
25
+ paths:
26
+ - '*.txt'
27
+ - '**/*.py'
28
+ - '.github/workflows/vllm_ascend_test.yaml'
29
+ - '!docs/**'
30
+ - 'pytest.ini'
31
+ - '!benchmarks/**'
32
+ - 'tools/mypy.sh'
33
+ - 'mypy.ini'
34
+ - '.github/workflows/*.ya?ml'
35
+ - '.github/workflows/actionlint.*'
36
+ - '.github/workflows/matchers/actionlint.json'
37
+
38
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
39
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
40
+ # It's used to activate ascend-toolkit environment variables.
41
+ defaults:
42
+ run:
43
+ shell: bash -el {0}
44
+
45
+ jobs:
46
+ lint:
47
+ runs-on: ubuntu-latest
48
+ strategy:
49
+ matrix:
50
+ python-version: ["3.11"]
51
+ vllm_version: [v0.9.1]
52
+ steps:
53
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
54
+ - name: Set up Python ${{ matrix.python-version }}
55
+ uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
56
+ with:
57
+ python-version: ${{ matrix.python-version }}
58
+ - name: Install dependencies
59
+ run: |
60
+ python -m pip install --upgrade pip
61
+ pip install -r requirements-lint.txt
62
+ - name: Run codespell check
63
+ run: |
64
+ CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
65
+ CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn')
66
+
67
+ codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
68
+ - name: Analysing the code with ruff
69
+ run: |
70
+ echo "::add-matcher::.github/workflows/matchers/ruff.json"
71
+ ruff check --output-format github .
72
+ - name: Run isort
73
+ run: |
74
+ isort . --check-only
75
+ - name: Running yapf
76
+ run: |
77
+ python -m pip install --upgrade pip
78
+ pip install toml
79
+ pip install yapf==0.32.0
80
+ yapf --diff --recursive .
81
+
82
+ - name: Install dependencies
83
+ run: |
84
+ pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
85
+
86
+ - name: Checkout vllm-project/vllm repo
87
+ uses: actions/checkout@v4
88
+ with:
89
+ repository: vllm-project/vllm
90
+ ref: ${{ matrix.vllm_version }}
91
+ path: vllm-empty
92
+
93
+ - name: Actionlint Check
94
+ env:
95
+ SHELLCHECK_OPTS: --exclude=SC2046,SC2006,SC2086
96
+ run: |
97
+ echo "::add-matcher::.github/workflows/matchers/actionlint.json"
98
+ tools/actionlint.sh -color
99
+
100
+ - name: Install vllm-project/vllm from source
101
+ working-directory: vllm-empty
102
+ run: |
103
+ pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
104
+ VLLM_TARGET_DEVICE=empty pip install .
105
+
106
+ - name: Mypy Check
107
+ run: |
108
+ echo "::add-matcher::.github/workflows/matchers/mypy.json"
109
+ tools/mypy.sh 1 ${{ matrix.python-version }}
110
+
111
+ e2e:
112
+ needs: [lint]
113
+ if: ${{ needs.lint.result == 'success' }}
114
+ strategy:
115
+ max-parallel: 2
116
+ matrix:
117
+ os: [linux-arm64-npu-1, linux-arm64-npu-4]
118
+ vllm_version: [v0.9.1]
119
+ concurrency:
120
+ group: >
121
+ ${{
122
+ matrix.os == 'linux-arm64-npu-4'
123
+ && github.event.pull_request.number
124
+ && format('pr-{0}-limit-npu-4', github.event.pull_request.number)
125
+ || format('job-{0}-{1}-{2}', matrix.os, matrix.vllm_version, github.event.pull_request.number)
126
+ }}
127
+ cancel-in-progress: false
128
+ name: vLLM Ascend test
129
+ runs-on: ${{ matrix.os }}
130
+ container:
131
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
132
+ env:
133
+ HF_ENDPOINT: https://hf-mirror.com
134
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
135
+ VLLM_LOGGING_LEVEL: ERROR
136
+ steps:
137
+ - name: Check npu and CANN info
138
+ run: |
139
+ npu-smi info
140
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
141
+
142
+ - name: Config mirrors
143
+ run: |
144
+ sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
145
+ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
146
+ apt-get update -y
147
+ apt install git -y
148
+ git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
149
+
150
+ - name: Checkout vllm-project/vllm-ascend repo
151
+ uses: actions/checkout@v4
152
+
153
+ - name: Install system dependencies
154
+ run: |
155
+ apt-get -y install `cat packages.txt`
156
+ apt-get -y install gcc g++ cmake libnuma-dev
157
+
158
+ - name: Checkout vllm-project/vllm repo
159
+ uses: actions/checkout@v4
160
+ with:
161
+ repository: vllm-project/vllm
162
+ ref: ${{ matrix.vllm_version }}
163
+ path: ./vllm-empty
164
+
165
+ - name: Install vllm-project/vllm from source
166
+ working-directory: ./vllm-empty
167
+ run: |
168
+ VLLM_TARGET_DEVICE=empty pip install -e .
169
+
170
+ - name: Install vllm-project/vllm-ascend
171
+ run: |
172
+ pip install -r requirements-dev.txt
173
+ pip install -v -e .
174
+
175
+ - name: Run vllm-project/vllm-ascend test for V1 Engine
176
+ env:
177
+ VLLM_USE_V1: 1
178
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
179
+ run: |
180
+ if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
181
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py
182
+ # guided decoding doesn't work, fix it later
183
+ # pytest -sv tests/singlecard/test_guided_decoding.py.py
184
+ # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
185
+ pytest -sv tests/singlecard/test_ascend_config.py
186
+ pytest -sv tests/singlecard/test_camem.py
187
+ pytest -sv tests/singlecard/core/test_ascend_scheduler.py
188
+ pytest -sv tests/singlecard/core/test_ascend_scheduler_e2e.py
189
+ pytest -sv tests/singlecard/ \
190
+ --ignore=tests/singlecard/test_offline_inference.py \
191
+ --ignore=tests/singlecard/test_guided_decoding.py \
192
+ --ignore=tests/singlecard/test_ascend_config.py \
193
+ --ignore=tests/singlecard/test_camem.py \
194
+ --ignore=tests/singlecard/core/test_ascend_scheduler.py \
195
+ --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
196
+ else
197
+ pytest -sv tests/multicard/test_ilama_lora_tp2.py
198
+ # To avoid oom, we need to run the test in a single process.
199
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_w4a8_deepseek.py::test_deepseek_W4A8
200
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
201
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
202
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
203
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
204
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
205
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_w8a8_ep_dbo
206
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
207
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ_with_flashcomm_v1
208
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_with_flashcomm_v2
209
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py --ignore=tests/multicard/test_w4a8_deepseek.py
210
+ fi
211
+
212
+ - name: Run vllm-project/vllm-ascend test on V0 engine
213
+ if: ${{ github.event_name == 'schedule' }}
214
+ env:
215
+ VLLM_USE_V1: 0
216
+ run: |
217
+ if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
218
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py
219
+ # guided decoding doesn't work, fix it later
220
+ # pytest -sv tests/singlecard/test_guided_decoding.py.py
221
+ pytest -sv tests/singlecard/test_camem.py
222
+ # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
223
+ pytest -sv tests/singlecard/test_ascend_config.py
224
+ pytest -sv tests/singlecard/test_prompt_embedding.py
225
+ pytest -sv tests/singlecard/ \
226
+ --ignore=tests/singlecard/test_offline_inference.py \
227
+ --ignore=tests/singlecard/test_guided_decoding.py \
228
+ --ignore=tests/singlecard/test_camem.py \
229
+ --ignore=tests/singlecard/test_ascend_config.py \
230
+ --ignore=tests/singlecard/test_prompt_embedding.py \
231
+ --ignore=tests/singlecard/core/test_ascend_scheduler.py \
232
+ --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
233
+ else
234
+ pytest -sv tests/multicard/test_ilama_lora_tp2.py
235
+ # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
236
+ # To avoid oom, we need to run the test in a single process.
237
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
238
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
239
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
240
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
241
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
242
+ fi
@@ -17,9 +17,6 @@
17
17
  name: 'e2e test / long-term-test'
18
18
 
19
19
  on:
20
- schedule:
21
- # Runs at 23:00 UTC (7:00 AM Beijing) every day
22
- - cron: '0 23 * * *'
23
20
  pull_request:
24
21
  types: [ labeled ]
25
22
 
@@ -43,12 +40,12 @@ jobs:
43
40
  max-parallel: 2
44
41
  matrix:
45
42
  os: [linux-arm64-npu-1, linux-arm64-npu-4]
46
- vllm_version: [main, v0.9.1]
43
+ vllm_version: [v0.9.1]
47
44
  name: vLLM Ascend long term test
48
45
  runs-on: ${{ matrix.os }}
49
46
  container:
50
47
  # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
51
- image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
48
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
52
49
  env:
53
50
  HF_ENDPOINT: https://hf-mirror.com
54
51
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -88,8 +85,6 @@ jobs:
88
85
  VLLM_TARGET_DEVICE=empty pip install -e .
89
86
 
90
87
  - name: Install vllm-project/vllm-ascend
91
- env:
92
- PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
93
88
  run: |
94
89
  pip install -r requirements-dev.txt
95
90
  pip install -v -e .
@@ -97,13 +92,17 @@ jobs:
97
92
  - name: Run vllm-project/vllm-ascend long term test
98
93
  run: |
99
94
  if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
100
- # spec decode test
101
- VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
95
+ # v0 spec decode test
96
+ # VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode_v0/e2e/test_mtp_correctness.py # it needs a clean process
97
+ # pytest -sv tests/long_term/spec_decode_v0 --ignore=tests/long_term/spec_decode_v0/e2e/test_mtp_correctness.py
98
+ # v1 spec decode test
99
+ # TODO: revert me when test_v1_mtp_correctness.py is fixed
100
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode_v1/test_v1_mtp_correctness.py
102
101
  # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
103
- VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py
104
- VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
105
- pytest -sv tests/e2e/long_term/spec_decode --ignore=tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
106
- pytest -sv tests/e2e/long_term/test_accuracy.py
102
+ # VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode_v1/test_v1_spec_decode.py
103
+ # accuracy test single card
104
+ pytest -sv tests/long_term/test_accuracy.py
107
105
  else
108
- VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py
106
+ # accuracy test multi card
107
+ VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py
109
108
  fi
@@ -17,9 +17,6 @@
17
17
  name: 'e2e test / pd-disaggregation'
18
18
 
19
19
  on:
20
- schedule:
21
- # Runs at 23:00 UTC (7:00 AM Beijing) every day
22
- - cron: '0 23 * * *'
23
20
  pull_request:
24
21
  types: [ labeled ]
25
22
 
@@ -41,12 +38,12 @@ jobs:
41
38
  if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
42
39
  strategy:
43
40
  matrix:
44
- vllm_verison: [main, v0.9.1]
41
+ vllm_verison: [v0.9.1]
45
42
  name: vLLM Ascend prefilling decoding disaggregation test
46
43
  runs-on: linux-arm64-npu-static-8
47
44
 
48
45
  container:
49
- image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
46
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
50
47
  volumes:
51
48
  - /usr/local/dcmi:/usr/local/dcmi
52
49
  - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
@@ -97,8 +94,6 @@ jobs:
97
94
  VLLM_TARGET_DEVICE=empty pip install -e .
98
95
 
99
96
  - name: Install vllm-project/vllm-ascend
100
- env:
101
- PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
102
97
  run: |
103
98
  pip install -r requirements-dev.txt
104
99
  pip install -v -e .
@@ -106,3 +101,7 @@ jobs:
106
101
  - name: Run vllm-project/vllm-ascend PD Disaggregation test
107
102
  run: |
108
103
  pytest -sv tests/e2e/pd_disaggreate/test_pd_e2e.py
104
+
105
+ - name: Run vllm-project/vllm-ascend PD Disaggregation edge test
106
+ run: |
107
+ bash tests/e2e/pd_disaggreate/run_edge_case_test.sh