vllm-ascend 0.10.1rc1__tar.gz → 0.11.0rc0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +2 -0
  2. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/actionlint.yaml +3 -0
  3. vllm_ascend-0.10.1rc1/.github/workflows/vllm_ascend_test.yaml → vllm_ascend-0.11.0rc0/.github/workflows/_e2e_test.yaml +52 -153
  4. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/accuracy_test.yaml +3 -3
  5. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/format_pr_body.yaml +2 -8
  6. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_310p_openeuler.yml +12 -0
  7. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_310p_ubuntu.yml +13 -1
  8. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_a3_openeuler.yml +12 -0
  9. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_a3_ubuntu.yml +13 -1
  10. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_openeuler.yml +12 -0
  11. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_ubuntu.yml +13 -1
  12. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/label_merge_conflict.yml +0 -1
  13. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/labeler.yml +1 -1
  14. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/nightly_benchmarks.yaml +2 -2
  15. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/pre-commit.yml +7 -1
  16. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/release_code.yml +1 -1
  17. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/release_whl.yml +1 -1
  18. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/reminder_comment.yml +1 -1
  19. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_dist.yaml +1 -1
  20. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_doctest.yaml +1 -1
  21. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test.yaml +158 -0
  22. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_test_310p.yaml +1 -1
  23. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full.yaml +79 -0
  24. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml +51 -0
  25. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +45 -0
  26. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_test_pd.yaml +1 -0
  27. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/Dockerfile +1 -1
  28. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.310p +1 -1
  29. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.310p.openEuler +1 -1
  30. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.a3 +1 -1
  31. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.a3.openEuler +1 -1
  32. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.openEuler +1 -1
  33. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/PKG-INFO +4 -4
  34. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/README.md +3 -3
  35. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/README.zh.md +3 -3
  36. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/benchmarks/ops/ben_vocabparallelembedding.py +1 -1
  37. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/benchmarks/scripts/run-performance-benchmarks.sh +3 -1
  38. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/benchmarks/tests/serving-tests.json +2 -1
  39. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/torch_binding.cpp +14 -17
  40. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/torch_binding_meta.cpp +4 -4
  41. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/versioning_policy.md +4 -0
  42. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/conf.py +5 -5
  43. vllm_ascend-0.11.0rc0/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +20 -0
  44. vllm_ascend-0.11.0rc0/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +19 -0
  45. vllm_ascend-0.11.0rc0/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +21 -0
  46. vllm_ascend-0.11.0rc0/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +21 -0
  47. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/index.md +4 -0
  48. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -1
  49. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/faqs.md +19 -1
  50. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/installation.md +1 -0
  51. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +2 -6
  52. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/index.md +4 -0
  53. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_pd_disaggregation.md +244 -0
  54. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_qwen3vl.md +156 -0
  55. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_ray.md +182 -0
  56. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_npu_qwen3_next.md +156 -0
  57. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/configuration/additional_config.md +16 -3
  58. vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +94 -0
  59. vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
  60. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/index.md +1 -0
  61. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/quantization.md +3 -2
  62. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/release_notes.md +65 -0
  63. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/README.md +6 -10
  64. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/gen_ranktable.py +43 -29
  65. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/gen_ranktable.sh +10 -1
  66. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +1 -0
  67. vllm_ascend-0.11.0rc0/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md +272 -0
  68. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/external_online_dp/run_dp_template.sh +1 -1
  69. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/offline_disaggregated_prefill_npu.py +1 -1
  70. vllm_ascend-0.11.0rc0/examples/offline_weight_load.py +326 -0
  71. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/run_dp_server.sh +1 -1
  72. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/requirements-dev.txt +1 -1
  73. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/common.sh +1 -1
  74. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/conftest.py +8 -1
  75. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/doctests/001-quickstart-test.sh +3 -3
  76. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/doctests/002-pip-binary-installation-test.sh +1 -1
  77. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/model_utils.py +6 -1
  78. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +6 -2
  79. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +2 -0
  80. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +2 -0
  81. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/Qwen3-8B-Base.yaml +2 -0
  82. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/accuracy.txt +1 -0
  83. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/report_template.md +15 -3
  84. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/test_lm_eval_correctness.py +7 -3
  85. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_expert_parallel.py +16 -6
  86. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_offline_inference_distributed.py +57 -3
  87. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_prefix_caching.py +22 -20
  88. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_qwen3_moe.py +0 -1
  89. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_torchair_graph_mode.py +3 -0
  90. vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_weight_loader.py +188 -0
  91. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/run_edge_case_test.sh +2 -2
  92. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/run_doctests.sh +0 -1
  93. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_bgmv_expand.py +2 -2
  94. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_bgmv_shrink.py +1 -1
  95. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_fused_moe.py +86 -18
  96. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_rotary_embedding.py +3 -3
  97. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +1 -1
  98. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +20 -8
  99. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +0 -4
  100. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +0 -4
  101. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_ascend_scheduler.py +23 -0
  102. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_guided_decoding.py +54 -23
  103. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +103 -0
  104. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_vlm.py +10 -9
  105. vllm_ascend-0.11.0rc0/tests/e2e/vllm_interface/singlecard/test_sampler.py +36 -0
  106. vllm_ascend-0.11.0rc0/tests/e2e/vllm_interface/vllm_test.cfg +2 -0
  107. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/attention/test_attention_v1.py +61 -47
  108. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/attention/test_mla_v1.py +39 -3
  109. vllm_ascend-0.11.0rc0/tests/ut/compilation/test_acl_graph.py +720 -0
  110. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/core/test_schedule_config.py +22 -41
  111. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/core/test_scheduler.py +259 -351
  112. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/test_parallel_state.py +7 -3
  113. vllm_ascend-0.11.0rc0/tests/ut/eplb/adaptor/test_abstract_adaptor.py +73 -0
  114. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_abstract.py +31 -0
  115. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +98 -0
  116. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +99 -0
  117. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_factor.py +23 -0
  118. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +122 -0
  119. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/test_eplb_utils.py +79 -0
  120. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_mooncake_connector.py +68 -3
  121. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/utils.py +21 -46
  122. vllm_ascend-0.11.0rc0/tests/ut/models/conftest.py +114 -0
  123. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_deepseek_mtp.py +7 -6
  124. vllm_ascend-0.11.0rc0/tests/ut/models/test_deepseek_v2.py +107 -0
  125. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen2_5_vl.py +56 -0
  126. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen3_moe.py +0 -30
  127. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_activation.py +12 -1
  128. vllm_ascend-0.11.0rc0/tests/ut/ops/test_comm_utils.py +98 -0
  129. vllm_ascend-0.11.0rc0/tests/ut/ops/test_common_fused_moe.py +56 -0
  130. vllm_ascend-0.11.0rc0/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +289 -0
  131. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_fused_ops.py +185 -138
  132. vllm_ascend-0.11.0rc0/tests/ut/ops/test_layernorm.py +161 -0
  133. vllm_ascend-0.11.0rc0/tests/ut/ops/test_linear.py +96 -0
  134. vllm_ascend-0.11.0rc0/tests/ut/ops/test_moe_comm_method.py +232 -0
  135. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_rotary_embedding.py +96 -36
  136. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_token_dispatcher.py +72 -156
  137. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_vocab_parallel_embedding.py +13 -1
  138. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_quant_config.py +23 -22
  139. vllm_ascend-0.11.0rc0/tests/ut/quantization/test_utils.py +62 -0
  140. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_w4a8_dynamic.py +108 -48
  141. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_w8a8.py +3 -3
  142. vllm_ascend-0.11.0rc0/tests/ut/quantization/test_w8a8_dynamic.py +69 -0
  143. vllm_ascend-0.11.0rc0/tests/ut/sample/logits_processor/test_builtin.py +40 -0
  144. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_ascend_config.py +29 -28
  145. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_platform.py +4 -33
  146. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_utils.py +20 -5
  147. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +0 -2
  148. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/models/test_torchair_deepseek_v2.py +11 -5
  149. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/ops/test_torchair_fused_moe.py +16 -10
  150. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +26 -27
  151. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +78 -45
  152. vllm_ascend-0.11.0rc0/tests/ut/torchair/test_torchair_attention.py +95 -0
  153. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/test_torchair_mla.py +37 -9
  154. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/test_utils.py +0 -13
  155. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/worker/test_input_batch.py +1 -1
  156. vllm_ascend-0.11.0rc0/tests/ut/worker/test_model_runner_v1.py +107 -0
  157. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/worker/test_worker_v1.py +91 -9
  158. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/__init__.py +2 -0
  159. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/_version.py +3 -3
  160. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ascend_config.py +36 -13
  161. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ascend_forward_context.py +71 -22
  162. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/attention_mask.py +33 -18
  163. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/attention_v1.py +152 -90
  164. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/mla_v1.py +81 -98
  165. vllm_ascend-0.11.0rc0/vllm_ascend/attention/sfa_v1.py +986 -0
  166. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/utils.py +44 -2
  167. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/compilation/acl_graph.py +81 -6
  168. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/core/schedule_config.py +33 -9
  169. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/core/scheduler.py +133 -84
  170. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/__init__.py +5 -0
  171. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_connector.py +457 -0
  172. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +202 -0
  173. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_manager/metadata.py +269 -0
  174. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +141 -48
  175. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/config_data.py +447 -0
  176. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/kv_transfer.py +251 -0
  177. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_engine.py +489 -0
  178. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_store.py +88 -0
  179. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +484 -0
  180. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/mooncake_connector.py +63 -21
  181. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/parallel_state.py +26 -1
  182. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/envs.py +26 -1
  183. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor/abstract_adaptor.py +44 -0
  184. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor/vllm_adaptor.py +289 -0
  185. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +137 -0
  186. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_utils.py +135 -0
  187. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_worker.py +436 -0
  188. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_abstract.py +42 -0
  189. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +389 -0
  190. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +771 -0
  191. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_factory.py +33 -0
  192. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_flashlb.py +651 -0
  193. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_random.py +30 -0
  194. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/eplb_updator.py +205 -0
  195. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/utils.py +77 -0
  196. {vllm_ascend-0.10.1rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.11.0rc0/vllm_ascend/lora}/lora_ops.py +13 -12
  197. {vllm_ascend-0.10.1rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.11.0rc0/vllm_ascend/lora}/punica_npu.py +9 -17
  198. vllm_ascend-0.11.0rc0/vllm_ascend/lora/utils.py +110 -0
  199. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/meta_registration.py +7 -6
  200. vllm_ascend-0.11.0rc0/vllm_ascend/models/__init__.py +60 -0
  201. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/deepseek_mtp.py +8 -23
  202. vllm_ascend-0.11.0rc0/vllm_ascend/models/deepseek_v2.py +666 -0
  203. vllm_ascend-0.11.0rc0/vllm_ascend/models/layers/mla.py +180 -0
  204. vllm_ascend-0.11.0rc0/vllm_ascend/models/layers/sfa.py +233 -0
  205. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen2_5_vl.py +65 -9
  206. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen2_5_vl_without_padding.py +277 -19
  207. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen2_vl.py +17 -7
  208. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen3_moe.py +12 -135
  209. vllm_ascend-0.11.0rc0/vllm_ascend/models/qwen3_next.py +676 -0
  210. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/__init__.py +10 -8
  211. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/activation.py +2 -0
  212. vllm_ascend-0.11.0rc0/vllm_ascend/ops/casual_conv1d.py +539 -0
  213. vllm_ascend-0.11.0rc0/vllm_ascend/ops/common_fused_moe.py +368 -0
  214. vllm_ascend-0.11.0rc0/vllm_ascend/ops/fla.py +218 -0
  215. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/fused_moe.py +123 -243
  216. vllm_ascend-0.11.0rc0/vllm_ascend/ops/layernorm.py +159 -0
  217. vllm_ascend-0.11.0rc0/vllm_ascend/ops/linear.py +367 -0
  218. vllm_ascend-0.11.0rc0/vllm_ascend/ops/linear_op.py +459 -0
  219. {vllm_ascend-0.10.1rc1/vllm_ascend/ops → vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe}/comm_utils.py +52 -1
  220. vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +459 -0
  221. vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/moe_comm_method.py +273 -0
  222. {vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers → vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe}/moe_mlp.py +113 -60
  223. {vllm_ascend-0.10.1rc1/vllm_ascend/ops/moe_dispatcher → vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe}/token_dispatcher.py +86 -175
  224. vllm_ascend-0.11.0rc0/vllm_ascend/ops/register_custom_ops.py +201 -0
  225. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/rotary_embedding.py +60 -44
  226. vllm_ascend-0.11.0rc0/vllm_ascend/ops/sigmoid_gating.py +384 -0
  227. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/vocab_parallel_embedding.py +14 -0
  228. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/__init__.py +28 -11
  229. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_common/__init__.py +6 -0
  230. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_config.py +313 -0
  231. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_mamba_config.py +100 -0
  232. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_multimodal_merge.py +58 -0
  233. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_transformers_utils.py +200 -0
  234. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/__init__.py +13 -3
  235. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attention_layer.py +202 -0
  236. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attention_selector.py +181 -0
  237. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attentionspec.py +110 -0
  238. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_triton.py +16 -0
  239. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_weight_loader.py +44 -0
  240. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/platform.py +101 -37
  241. vllm_ascend-0.11.0rc0/vllm_ascend/quantization/__init__.py +0 -0
  242. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/quant_config.py +108 -32
  243. vllm_ascend-0.11.0rc0/vllm_ascend/quantization/utils.py +83 -0
  244. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/w4a8_dynamic.py +70 -63
  245. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/w8a8.py +1 -1
  246. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/w8a8_dynamic.py +19 -193
  247. vllm_ascend-0.11.0rc0/vllm_ascend/sample/__init__.py +0 -0
  248. vllm_ascend-0.11.0rc0/vllm_ascend/sample/logits_processor/__init__.py +50 -0
  249. vllm_ascend-0.11.0rc0/vllm_ascend/sample/logits_processor/builtin.py +35 -0
  250. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/sample/sampler.py +12 -12
  251. vllm_ascend-0.11.0rc0/vllm_ascend/spec_decode/__init__.py +33 -0
  252. vllm_ascend-0.11.0rc0/vllm_ascend/spec_decode/eagle_proposer.py +674 -0
  253. vllm_ascend-0.11.0rc0/vllm_ascend/spec_decode/interface.py +51 -0
  254. vllm_ascend-0.11.0rc0/vllm_ascend/spec_decode/mtp_proposer.py +657 -0
  255. vllm_ascend-0.11.0rc0/vllm_ascend/spec_decode/ngram_proposer.py +65 -0
  256. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/__init__.py +0 -0
  257. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/models/__init__.py +0 -0
  258. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/qwen2.py +3 -4
  259. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/qwen3_moe.py +13 -6
  260. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +1 -5
  261. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_deepseek_v2.py +278 -37
  262. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_pangu_moe.py +9 -10
  263. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops/__init__.py +0 -0
  264. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops/shared_weight_layer.py +245 -0
  265. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/communication_op.py → vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops/torchair_activation.py +37 -25
  266. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/ops/torchair_fused_moe.py +124 -65
  267. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops/torchair_layernorm.py +51 -0
  268. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +8 -15
  269. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/quantization/__init__.py +0 -0
  270. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +58 -49
  271. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +23 -12
  272. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_attention.py +28 -17
  273. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_mla.py +104 -120
  274. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_model_runner.py +115 -58
  275. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/torchair_sfa.py +1330 -0
  276. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_worker.py +22 -22
  277. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/utils.py +28 -9
  278. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/utils.py +143 -49
  279. vllm_ascend-0.11.0rc0/vllm_ascend/worker/__init__.py +0 -0
  280. vllm_ascend-0.11.0rc0/vllm_ascend/worker/block_table.py +312 -0
  281. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/model_runner_v1.py +1834 -1008
  282. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/npu_input_batch.py +64 -36
  283. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/worker_v1.py +97 -33
  284. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/PKG-INFO +4 -4
  285. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/SOURCES.txt +108 -28
  286. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops/test_moe_comm.py +0 -175
  287. vllm_ascend-0.10.1rc1/tests/ut/distributed/test_distributed_tensor_parallel.py +0 -139
  288. vllm_ascend-0.10.1rc1/tests/ut/models/test_deepseek_v2.py +0 -295
  289. vllm_ascend-0.10.1rc1/tests/ut/ops/test_common_fused_moe.py +0 -69
  290. vllm_ascend-0.10.1rc1/tests/ut/ops/test_layernorm.py +0 -53
  291. vllm_ascend-0.10.1rc1/tests/ut/ops/test_linear.py +0 -363
  292. vllm_ascend-0.10.1rc1/tests/ut/patch/worker/patch_common/test_patch_linear.py +0 -167
  293. vllm_ascend-0.10.1rc1/tests/ut/quantization/test_func_wrapper.py +0 -134
  294. vllm_ascend-0.10.1rc1/tests/ut/quantization/test_quantizer.py +0 -145
  295. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/moe_comm_method.py +0 -556
  296. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/tensor_parallel.py +0 -248
  297. vllm_ascend-0.10.1rc1/vllm_ascend/models/__init__.py +0 -61
  298. vllm_ascend-0.10.1rc1/vllm_ascend/models/deepseek_dbo.py +0 -1046
  299. vllm_ascend-0.10.1rc1/vllm_ascend/models/deepseek_v2.py +0 -997
  300. vllm_ascend-0.10.1rc1/vllm_ascend/models/deepseek_v3.py +0 -27
  301. vllm_ascend-0.10.1rc1/vllm_ascend/models/pangu_moe.py +0 -1106
  302. vllm_ascend-0.10.1rc1/vllm_ascend/models/qwen3.py +0 -156
  303. vllm_ascend-0.10.1rc1/vllm_ascend/ops/common_fused_moe.py +0 -531
  304. vllm_ascend-0.10.1rc1/vllm_ascend/ops/layernorm.py +0 -85
  305. vllm_ascend-0.10.1rc1/vllm_ascend/ops/linear.py +0 -309
  306. vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker/patch_common/patch_linear.py +0 -147
  307. vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker/patch_common/patch_lora_embedding.py +0 -29
  308. vllm_ascend-0.10.1rc1/vllm_ascend/quantization/func_wrapper.py +0 -184
  309. vllm_ascend-0.10.1rc1/vllm_ascend/quantization/quantizer.py +0 -311
  310. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization/torchair_quantizer.py +0 -29
  311. vllm_ascend-0.10.1rc1/vllm_ascend/worker/eagle_proposer_v1.py +0 -398
  312. vllm_ascend-0.10.1rc1/vllm_ascend/worker/mtp_proposer_v1.py +0 -439
  313. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.gemini/config.yaml +0 -0
  314. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/Dockerfile.buildwheel +0 -0
  315. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  316. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  317. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  318. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  319. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  320. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  321. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  322. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  323. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  324. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  325. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  326. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  327. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/dependabot.yml +0 -0
  328. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/format_pr_body.sh +0 -0
  329. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/labeler.yml +0 -0
  330. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/matchers/actionlint.json +0 -0
  331. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/matchers/mypy.json +0 -0
  332. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/matchers/ruff.json +0 -0
  333. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.gitignore +0 -0
  334. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.pre-commit-config.yaml +0 -0
  335. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/.readthedocs.yaml +0 -0
  336. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/CMakeLists.txt +0 -0
  337. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/CODE_OF_CONDUCT.md +0 -0
  338. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/CONTRIBUTING.md +0 -0
  339. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/DCO +0 -0
  340. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/LICENSE +0 -0
  341. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/benchmarks/README.md +0 -0
  342. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/benchmarks/requirements-bench.txt +0 -0
  343. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  344. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/benchmarks/scripts/perf_result_template.md +0 -0
  345. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/benchmarks/tests/latency-tests.json +0 -0
  346. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/benchmarks/tests/throughput-tests.json +0 -0
  347. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/cmake/utils.cmake +0 -0
  348. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/codecov.yml +0 -0
  349. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/collect_env.py +0 -0
  350. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/camem_allocator.cpp +0 -0
  351. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/bgmv_expand.cpp +0 -0
  352. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/bgmv_shrink.cpp +0 -0
  353. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  354. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
  355. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/sgmv_expand.cpp +0 -0
  356. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/sgmv_shrink.cpp +0 -0
  357. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/types.h +0 -0
  358. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/utils.h +0 -0
  359. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/ops.h +0 -0
  360. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/csrc/utils.h +0 -0
  361. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/Makefile +0 -0
  362. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/README.md +0 -0
  363. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/requirements-docs.txt +0 -0
  364. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/requirements-test.txt +0 -0
  365. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/_templates/sections/header.html +0 -0
  366. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  367. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/assets/multi_node_dp_kimi.png +0 -0
  368. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/contributors.md +0 -0
  369. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/governance.md +0 -0
  370. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/user_stories/index.md +0 -0
  371. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/user_stories/llamafactory.md +0 -0
  372. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/contribution/index.md +0 -0
  373. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/contribution/testing.md +0 -0
  374. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/index.md +0 -0
  375. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  376. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
  377. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  378. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
  379. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/feature_guide/index.md +0 -0
  380. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/feature_guide/patch.md +0 -0
  381. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
  382. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/modeling/index.md +0 -0
  383. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/index.md +0 -0
  384. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -0
  385. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/performance_benchmark.md +0 -0
  386. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/profile_execute_duration.md +0 -0
  387. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/index.md +0 -0
  388. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
  389. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
  390. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
  391. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
  392. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
  393. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
  394. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
  395. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
  396. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
  397. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
  398. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
  399. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
  400. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
  401. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
  402. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
  403. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
  404. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
  405. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +0 -0
  406. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +0 -0
  407. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +0 -0
  408. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
  409. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
  410. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
  411. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
  412. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
  413. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
  414. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
  415. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
  416. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
  417. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
  418. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
  419. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
  420. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
  421. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
  422. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
  423. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
  424. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
  425. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
  426. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
  427. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
  428. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
  429. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
  430. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
  431. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
  432. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
  433. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
  434. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
  435. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  436. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  437. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/quick_start.md +0 -0
  438. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_node.md +0 -0
  439. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_node_kimi.md +0 -0
  440. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu.md +0 -0
  441. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_moge.md +0 -0
  442. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_quantization.md +0 -0
  443. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
  444. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_node_300i.md +0 -0
  445. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu.md +0 -0
  446. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_audio.md +0 -0
  447. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_multimodal.md +0 -0
  448. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
  449. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_qwen3_quantization.md +0 -0
  450. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/configuration/env_vars.md +0 -0
  451. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/configuration/index.md +0 -0
  452. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
  453. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  454. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/lora.md +0 -0
  455. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
  456. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  457. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/support_matrix/index.md +0 -0
  458. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
  459. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
  460. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
  461. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/run_server.sh +0 -0
  462. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/eplb/eplb_deepseek.py +0 -0
  463. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/eplb/eplb_strategy.py +0 -0
  464. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/external_online_dp/README.md +0 -0
  465. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/external_online_dp/launch_online_dp.py +0 -0
  466. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/offline_data_parallel.py +0 -0
  467. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/offline_dualbatch_overlap_npu.py +0 -0
  468. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/offline_embed.py +0 -0
  469. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/offline_external_launcher.py +0 -0
  470. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_audio_language.py +0 -0
  471. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_npu.py +0 -0
  472. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_npu_tp2.py +0 -0
  473. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_sleep_mode_npu.py +0 -0
  474. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/examples/prompt_embedding_inference.py +0 -0
  475. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/format.sh +0 -0
  476. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/mypy.ini +0 -0
  477. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/packages.txt +0 -0
  478. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/pyproject.toml +0 -0
  479. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/requirements-lint.txt +0 -0
  480. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/requirements.txt +0 -0
  481. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/setup.cfg +0 -0
  482. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/setup.py +0 -0
  483. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/__init__.py +0 -0
  484. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
  485. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/310p/test_offline_inference_parallel_310p.py +0 -0
  486. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/__init__.py +0 -0
  487. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/conftest.py +0 -0
  488. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_data_parallel.py +0 -0
  489. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_external_launcher.py +0 -0
  490. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -0
  491. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_ilama_lora_tp2.py +0 -0
  492. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_pipeline_parallel.py +0 -0
  493. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
  494. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
  495. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  496. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/prompts/example.txt +0 -0
  497. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/run_disagg_pd.sh +0 -0
  498. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/__init__.py +0 -0
  499. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/__init__.py +0 -0
  500. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
  501. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_aclgraph.py +0 -0
  502. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_camem.py +0 -0
  503. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_chunked.py +0 -0
  504. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_embedding.py +0 -0
  505. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_ilama_lora.py +0 -0
  506. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
  507. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_quantization.py +0 -0
  508. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_sampler.py +0 -0
  509. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/utils.py +0 -0
  510. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/__init__.py +0 -0
  511. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/attention/test_attention_mask.py +0 -0
  512. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/base.py +0 -0
  513. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/conftest.py +0 -0
  514. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/device_allocator/test_camem.py +0 -0
  515. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
  516. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
  517. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/test_communicator.py +0 -0
  518. /vllm_ascend-0.10.1rc1/tests/ut/models/__init__.py → /vllm_ascend-0.11.0rc0/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
  519. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/fake_weight/config.json +0 -0
  520. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -0
  521. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
  522. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
  523. {vllm_ascend-0.10.1rc1/tests/ut/torchair → vllm_ascend-0.11.0rc0/tests/ut/models}/__init__.py +0 -0
  524. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen2_5_vl_without_padding.py +0 -0
  525. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen2_vl.py +0 -0
  526. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_base.py +0 -0
  527. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_decorator.py +0 -0
  528. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_layers.py +0 -0
  529. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_metadata.py +0 -0
  530. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_ms_split.py +0 -0
  531. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/expert_map.json +0 -0
  532. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_expert_load_balancer.py +0 -0
  533. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
  534. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
  535. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/sample/test_rejection_sampler.py +0 -0
  536. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/sample/test_sampler.py +0 -0
  537. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_envs.py +0 -0
  538. {vllm_ascend-0.10.1rc1/vllm_ascend/attention → vllm_ascend-0.11.0rc0/tests/ut/torchair}/__init__.py +0 -0
  539. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +0 -0
  540. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tools/actionlint.sh +0 -0
  541. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tools/check_python_src_init.py +0 -0
  542. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tools/check_repo.sh +0 -0
  543. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tools/enforce_regex_import.py +0 -0
  544. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tools/mypy.sh +0 -0
  545. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tools/png-lint.sh +0 -0
  546. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tools/shellcheck.sh +0 -0
  547. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/tools/sphinx-lint.sh +0 -0
  548. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/typos.toml +0 -0
  549. {vllm_ascend-0.10.1rc1/vllm_ascend/compilation → vllm_ascend-0.11.0rc0/vllm_ascend/attention}/__init__.py +0 -0
  550. {vllm_ascend-0.10.1rc1/vllm_ascend/core → vllm_ascend-0.11.0rc0/vllm_ascend/compilation}/__init__.py +0 -0
  551. {vllm_ascend-0.10.1rc1/vllm_ascend/device_allocator → vllm_ascend-0.11.0rc0/vllm_ascend/core}/__init__.py +0 -0
  552. {vllm_ascend-0.10.1rc1/vllm_ascend/distributed/device_communicators → vllm_ascend-0.11.0rc0/vllm_ascend/device_allocator}/__init__.py +0 -0
  553. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/device_allocator/camem.py +0 -0
  554. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/communicator.py +0 -0
  555. {vllm_ascend-0.10.1rc1/vllm_ascend/lora → vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_manager}/__init__.py +0 -0
  556. {vllm_ascend-0.10.1rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.11.0rc0/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
  557. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  558. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  559. {vllm_ascend-0.10.1rc1/vllm_ascend/multistream → vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake}/__init__.py +0 -0
  560. {vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers → vllm_ascend-0.11.0rc0/vllm_ascend/eplb}/__init__.py +0 -0
  561. {vllm_ascend-0.10.1rc1/vllm_ascend/ops/moe_dispatcher → vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor}/__init__.py +0 -0
  562. {vllm_ascend-0.10.1rc1/vllm_ascend/quantization → vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core}/__init__.py +0 -0
  563. {vllm_ascend-0.10.1rc1/vllm_ascend/sample → vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy}/__init__.py +0 -0
  564. {vllm_ascend-0.10.1rc1/vllm_ascend/torchair → vllm_ascend-0.11.0rc0/vllm_ascend/lora}/__init__.py +0 -0
  565. /vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/__init__.py → /vllm_ascend-0.11.0rc0/vllm_ascend/models/deepseek_v3.py +0 -0
  566. {vllm_ascend-0.10.1rc1/vllm_ascend/torchair/ops → vllm_ascend-0.11.0rc0/vllm_ascend/models/layers}/__init__.py +0 -0
  567. {vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization → vllm_ascend-0.11.0rc0/vllm_ascend/multistream}/__init__.py +0 -0
  568. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/base.py +0 -0
  569. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/context.py +0 -0
  570. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/decorator.py +0 -0
  571. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/layers.py +0 -0
  572. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/metadata.py +0 -0
  573. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/ms_split.py +0 -0
  574. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/attention.py +0 -0
  575. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  576. {vllm_ascend-0.10.1rc1/vllm_ascend/worker → vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe}/__init__.py +0 -0
  577. {vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers → vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe}/experts_selector.py +0 -0
  578. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/__init__.py +0 -0
  579. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_common/patch_distributed.py +0 -0
  580. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_main/__init__.py +0 -0
  581. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/__init__.py +0 -0
  582. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
  583. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/patch_logits.py +0 -0
  584. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
  585. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
  586. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/sample/rejection_sampler.py +0 -0
  587. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_deepseek_v3.py +0 -0
  588. {vllm_ascend-0.10.1rc1/vllm_ascend → vllm_ascend-0.11.0rc0/vllm_ascend/torchair}/ops/sequence_parallel.py +0 -0
  589. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  590. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/entry_points.txt +0 -0
  591. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/requires.txt +0 -0
  592. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -24,6 +24,8 @@ body:
24
24
  value: >
25
25
  - [ ] Create a new issue for release feedback
26
26
 
27
+ - [ ] Upgrade vllm version to the new version for CI and Dockerfile
28
+
27
29
  - [ ] Write the release note PR.
28
30
 
29
31
  - [ ] Update the feedback issue link in docs/source/faqs.md
@@ -15,3 +15,6 @@ self-hosted-runner:
15
15
  - linux-aarch64-a3-2
16
16
  - linux-aarch64-a3-4
17
17
  - linux-aarch64-a3-8
18
+ - linux-amd64-cpu-0
19
+ - linux-amd64-cpu-8
20
+ - linux-amd64-cpu-16
@@ -1,147 +1,27 @@
1
- #
2
- # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- # This file is a part of the vllm-ascend project.
16
- #
17
-
18
- name: 'test'
1
+ name: 'e2e test'
19
2
 
20
3
  on:
21
- push:
22
- branches:
23
- - 'main'
24
- pull_request:
25
- branches:
26
- - 'main'
27
- - '*-dev'
28
-
29
- # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
30
- # declared as "shell: bash -el {0}" on steps that need to be properly activated.
31
- # It's used to activate ascend-toolkit environment variables.
32
- defaults:
33
- run:
34
- shell: bash -el {0}
35
-
36
- # only cancel in-progress runs of the same workflow
37
- # and ignore the lint / 1 card / 4 cards test type
38
- concurrency:
39
- group: ${{ github.workflow }}-${{ github.ref }}
40
- cancel-in-progress: true
4
+ workflow_call:
5
+ inputs:
6
+ vllm:
7
+ required: true
8
+ type: string
9
+ runner:
10
+ required: true
11
+ type: string
12
+ image:
13
+ required: true
14
+ type: string
15
+ type:
16
+ required: true
17
+ type: string
41
18
 
42
19
  jobs:
43
- lint:
44
- uses: ./.github/workflows/pre-commit.yml
45
-
46
- changes:
47
- runs-on: ubuntu-latest
48
- outputs:
49
- e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
50
- ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
51
- steps:
52
- - uses: actions/checkout@v4
53
- - uses: dorny/paths-filter@v3
54
- id: filter
55
- with:
56
- filters: |
57
- e2e_tracker:
58
- - '.github/workflows/vllm_ascend_test.yaml'
59
- - 'vllm_ascend/**'
60
- - 'csrc/**'
61
- - 'cmake/**'
62
- - 'tests/e2e/**'
63
- - 'CMakeLists.txt'
64
- - 'setup.py'
65
- - 'requirements.txt'
66
- - 'requirements-dev.txt'
67
- - 'requirements-lint.txt'
68
- - 'packages.txt'
69
- ut_tracker:
70
- - 'tests/ut/**'
71
- ut:
72
- needs: [lint, changes]
73
- name: unit test
74
- # only trigger unit test after lint passed and the change is e2e and ut related.
75
- if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
76
- runs-on: ubuntu-latest
77
- container:
78
- image: quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11
79
- env:
80
- VLLM_LOGGING_LEVEL: ERROR
81
- VLLM_USE_MODELSCOPE: True
82
- strategy:
83
- matrix:
84
- vllm_version: [v0.10.1.1, main]
85
- steps:
86
- - name: Install packages
87
- run: |
88
- apt-get update -y
89
- apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
90
-
91
- - name: Checkout vllm-project/vllm repo
92
- uses: actions/checkout@v4
93
- with:
94
- repository: vllm-project/vllm
95
- ref: ${{ matrix.vllm_version }}
96
- path: ./vllm-empty
97
-
98
- - name: Install vllm-project/vllm from source
99
- working-directory: ./vllm-empty
100
- run: |
101
- VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
102
- python3 -m pip uninstall -y triton
103
-
104
- - name: Checkout vllm-project/vllm-ascend repo
105
- uses: actions/checkout@v4
106
-
107
- - name: Install vllm-project/vllm-ascend
108
- run: |
109
- export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
110
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
111
- python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
112
- python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
113
-
114
- - name: Run unit test
115
- env:
116
- VLLM_WORKER_MULTIPROC_METHOD: spawn
117
- TORCH_DEVICE_BACKEND_AUTOLOAD: 0
118
- run: |
119
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
120
- pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut --ignore=tests/ut/test_platform.py --ignore=tests/ut/ops/test_vocab_parallel_embedding.py
121
-
122
- - name: Upload coverage to Codecov
123
- if: ${{ matrix.vllm_version == 'main' }}
124
- uses: codecov/codecov-action@v5
125
- env:
126
- CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
127
- with:
128
- flags: unittests
129
- name: vllm-ascend
130
- verbose: true
131
-
132
20
  e2e:
133
- needs: [lint, changes]
134
- # only trigger e2e test after lint passed and the change is e2e related with pull request.
135
- if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
136
- strategy:
137
- max-parallel: 2
138
- matrix:
139
- os: [linux-aarch64-a2-1]
140
- vllm_version: [v0.10.1.1, main]
141
- name: singlecard e2e test
142
- runs-on: ${{ matrix.os }}
21
+ name: singlecard
22
+ runs-on: ${{ inputs.runner }}-1
143
23
  container:
144
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
24
+ image: ${{ inputs.image }}
145
25
  env:
146
26
  VLLM_LOGGING_LEVEL: ERROR
147
27
  VLLM_USE_MODELSCOPE: True
@@ -171,8 +51,9 @@ jobs:
171
51
  uses: actions/checkout@v4
172
52
  with:
173
53
  repository: vllm-project/vllm
174
- ref: ${{ matrix.vllm_version }}
54
+ ref: ${{ inputs.vllm }}
175
55
  path: ./vllm-empty
56
+ fetch-depth: 1
176
57
 
177
58
  - name: Install vllm-project/vllm from source
178
59
  working-directory: ./vllm-empty
@@ -186,10 +67,23 @@ jobs:
186
67
  pip install -r requirements-dev.txt
187
68
  pip install -v -e .
188
69
 
70
+ - name: Run vllm-project/vllm-ascend test
71
+ env:
72
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
73
+ VLLM_USE_MODELSCOPE: True
74
+ PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
75
+ if: ${{ inputs.type == 'light' }}
76
+ run: |
77
+ pytest -sv tests/e2e/singlecard/test_aclgraph.py
78
+ pytest -sv tests/e2e/singlecard/test_quantization.py
79
+ pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
80
+
189
81
  - name: Run e2e test
190
82
  env:
191
83
  VLLM_WORKER_MULTIPROC_METHOD: spawn
192
84
  VLLM_USE_MODELSCOPE: True
85
+ PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
86
+ if: ${{ inputs.type == 'full' }}
193
87
  run: |
194
88
  # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
195
89
  # the test separately.
@@ -200,7 +94,6 @@ jobs:
200
94
  pytest -sv tests/e2e/singlecard/test_chunked.py
201
95
  pytest -sv tests/e2e/singlecard/test_embedding.py
202
96
  pytest -sv tests/e2e/singlecard/test_guided_decoding.py
203
- # TODO: Fix lora accuracy error
204
97
  pytest -sv tests/e2e/singlecard/test_ilama_lora.py
205
98
  pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
206
99
  pytest -sv tests/e2e/singlecard/test_quantization.py
@@ -210,22 +103,16 @@ jobs:
210
103
  # ------------------------------------ v1 spec decode test ------------------------------------ #
211
104
  pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
212
105
  pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
213
- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
106
+ # Fix me: OOM error
107
+ #pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
214
108
 
215
109
  pytest -sv tests/e2e/singlecard/ops/
216
110
 
217
111
  e2e-2-cards:
218
- needs: [e2e]
219
- if: ${{ needs.e2e.result == 'success' }}
220
- strategy:
221
- max-parallel: 2
222
- matrix:
223
- os: [linux-aarch64-a2-2]
224
- vllm_version: [v0.10.1.1, main]
225
- name: multicard e2e test
226
- runs-on: ${{ matrix.os }}
112
+ name: multicard
113
+ runs-on: ${{ inputs.runner }}-2
227
114
  container:
228
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
115
+ image: ${{ inputs.image }}
229
116
  env:
230
117
  VLLM_LOGGING_LEVEL: ERROR
231
118
  VLLM_USE_MODELSCOPE: True
@@ -255,8 +142,9 @@ jobs:
255
142
  uses: actions/checkout@v4
256
143
  with:
257
144
  repository: vllm-project/vllm
258
- ref: ${{ matrix.vllm_version }}
145
+ ref: ${{ inputs.vllm }}
259
146
  path: ./vllm-empty
147
+ fetch-depth: 1
260
148
 
261
149
  - name: Install vllm-project/vllm from source
262
150
  working-directory: ./vllm-empty
@@ -270,10 +158,19 @@ jobs:
270
158
  pip install -r requirements-dev.txt
271
159
  pip install -v -e .
272
160
 
273
- - name: Run vllm-project/vllm-ascend test
161
+ - name: Run vllm-project/vllm-ascend test (light)
162
+ env:
163
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
164
+ VLLM_USE_MODELSCOPE: True
165
+ if: ${{ inputs.type == 'light' }}
166
+ run: |
167
+ pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
168
+
169
+ - name: Run vllm-project/vllm-ascend test (full)
274
170
  env:
275
171
  VLLM_WORKER_MULTIPROC_METHOD: spawn
276
172
  VLLM_USE_MODELSCOPE: True
173
+ if: ${{ inputs.type == 'full' }}
277
174
  run: |
278
175
  pytest -sv tests/e2e/multicard/test_data_parallel.py
279
176
  pytest -sv tests/e2e/multicard/test_expert_parallel.py
@@ -289,6 +186,8 @@ jobs:
289
186
  pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
290
187
  pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
291
188
  pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
189
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
190
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
292
191
 
293
192
  #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
294
193
  pytest -sv tests/e2e/multicard/test_prefix_caching.py
@@ -19,7 +19,7 @@
19
19
  # 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test'
20
20
  # 2. workflow_dispatch with models input
21
21
  # See detail rule in strategy.matrix note
22
- name: Benchmarks / accuracy
22
+ name: ascend test / accuracy
23
23
 
24
24
  on:
25
25
  schedule:
@@ -112,7 +112,7 @@ jobs:
112
112
  uses: actions/checkout@v4
113
113
  with:
114
114
  repository: vllm-project/vllm
115
- ref: v0.10.1.1
115
+ ref: v0.11.0rc3
116
116
  path: ./vllm-empty
117
117
 
118
118
  - name: Install vllm-project/vllm from source
@@ -303,7 +303,7 @@ jobs:
303
303
  git push -f origin "${{ env.BRANCH_NAME }}"
304
304
 
305
305
  - name: Create PR in upstream via API
306
- uses: actions/github-script@v7
306
+ uses: actions/github-script@v8
307
307
  with:
308
308
  github-token: ${{ secrets.PAT_TOKEN }}
309
309
  script: |
@@ -33,23 +33,17 @@ jobs:
33
33
  runs-on: ubuntu-latest
34
34
 
35
35
  steps:
36
- - name: Checkout vllm-project/vllm repo
37
- uses: actions/checkout@v4
38
- with:
39
- repository: vllm-project/vllm
40
- path: ./vllm-empty
41
36
 
42
37
  - name: Get vLLM version
43
- working-directory: ./vllm-empty
44
38
  run: |
45
- VLLM_COMMIT=$(git rev-parse HEAD)
39
+ VLLM_COMMIT=releases/v0.11.0
46
40
  echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
47
41
 
48
42
  - name: Checkout repository
49
43
  uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
50
44
 
51
45
  - name: Set up Python
52
- uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
46
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
53
47
 
54
48
  - name: Get vLLM release version
55
49
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -43,17 +44,28 @@ on:
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
45
46
 
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
46
52
  jobs:
47
53
  build:
48
54
  name: vllm-ascend image build
55
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
56
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
57
  runs-on: >-
50
58
  ${{
51
59
  github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
52
60
  'ubuntu-latest' ||
53
61
  'ubuntu-24.04-arm'
54
62
  }}
63
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
55
64
  steps:
56
65
  - uses: actions/checkout@v4
66
+ with:
67
+ fetch-depth: 0
68
+ persist-credentials: false
57
69
 
58
70
  - name: Print
59
71
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -42,14 +43,25 @@ on:
42
43
  - 'cmake/**'
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
45
52
  jobs:
46
53
 
47
54
  build:
48
55
  name: vllm-ascend image build
56
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
57
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
58
  runs-on: ubuntu-latest
50
-
59
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
51
60
  steps:
52
61
  - uses: actions/checkout@v4
62
+ with:
63
+ fetch-depth: 0
64
+ persist-credentials: false
53
65
 
54
66
  - name: Print
55
67
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -43,17 +44,28 @@ on:
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
45
46
 
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
46
52
  jobs:
47
53
  build:
48
54
  name: vllm-ascend image build
55
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
56
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
57
  runs-on: >-
50
58
  ${{
51
59
  github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
52
60
  'ubuntu-latest' ||
53
61
  'ubuntu-24.04-arm'
54
62
  }}
63
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
55
64
  steps:
56
65
  - uses: actions/checkout@v4
66
+ with:
67
+ fetch-depth: 0
68
+ persist-credentials: false
57
69
 
58
70
  - name: Print
59
71
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -42,14 +43,25 @@ on:
42
43
  - 'cmake/**'
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
45
52
  jobs:
46
53
 
47
54
  build:
48
55
  name: vllm-ascend image build
56
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
57
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
58
  runs-on: ubuntu-latest
50
-
59
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
51
60
  steps:
52
61
  - uses: actions/checkout@v4
62
+ with:
63
+ fetch-depth: 0
64
+ persist-credentials: false
53
65
 
54
66
  - name: Print
55
67
  run: |
@@ -24,6 +24,7 @@ on:
24
24
  - 'cmake/**'
25
25
  - 'CMakeLists.txt'
26
26
  - 'csrc/**'
27
+ types: [ labeled ]
27
28
  push:
28
29
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
29
30
  branches:
@@ -42,17 +43,28 @@ on:
42
43
  - 'CMakeLists.txt'
43
44
  - 'csrc/**'
44
45
 
46
+ # only cancel in-progress runs of the same workflow
47
+ concurrency:
48
+ group: ${{ github.workflow }}-${{ github.ref }}
49
+ cancel-in-progress: true
50
+
45
51
  jobs:
46
52
  build:
47
53
  name: vllm-ascend image build
54
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
55
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
48
56
  runs-on: >-
49
57
  ${{
50
58
  github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
51
59
  'ubuntu-latest' ||
52
60
  'ubuntu-24.04-arm'
53
61
  }}
62
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
54
63
  steps:
55
64
  - uses: actions/checkout@v4
65
+ with:
66
+ fetch-depth: 0
67
+ persist-credentials: false
56
68
 
57
69
  - name: Print
58
70
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -42,14 +43,25 @@ on:
42
43
  - 'cmake/**'
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
45
52
  jobs:
46
53
 
47
54
  build:
48
55
  name: vllm-ascend image build
56
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
57
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
58
  runs-on: ubuntu-latest
50
-
59
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
51
60
  steps:
52
61
  - uses: actions/checkout@v4
62
+ with:
63
+ fetch-depth: 0
64
+ persist-credentials: false
53
65
 
54
66
  - name: Print
55
67
  run: |
@@ -16,6 +16,5 @@ jobs:
16
16
  uses: eps1lon/actions-label-merge-conflict@v3
17
17
  with:
18
18
  dirtyLabel: "merge-conflicts"
19
- removeOnDirtyLabel: "ready"
20
19
  repoToken: "${{ secrets.GITHUB_TOKEN }}"
21
20
  commentOnDirty: "This pull request has conflicts, please resolve those before we can evaluate the pull request."
@@ -11,7 +11,7 @@ jobs:
11
11
  pull-requests: write
12
12
  steps:
13
13
  - name: Label the PR
14
- uses: actions/labeler@v5
14
+ uses: actions/labeler@v6
15
15
  with:
16
16
  repo-token: ${{ secrets.GITHUB_TOKEN }}
17
17
  configuration-path: .github/labeler.yml
@@ -15,7 +15,7 @@
15
15
  # limitations under the License.
16
16
  #
17
17
 
18
- name: 'Benchmarks / Performance'
18
+ name: 'ascend test / performance'
19
19
  # This workflow runs nightly benchmarks for vllm-ascend.
20
20
 
21
21
  on:
@@ -51,7 +51,7 @@ jobs:
51
51
  strategy:
52
52
  matrix:
53
53
  include:
54
- - vllm_branch: v0.10.1.1
54
+ - vllm_branch: v0.11.0rc3
55
55
  vllm_ascend_branch: main
56
56
  vllm_use_v1: 1
57
57
  max-parallel: 1
@@ -2,6 +2,10 @@ name: pre-commit
2
2
 
3
3
  on:
4
4
  workflow_call:
5
+ inputs:
6
+ vllm:
7
+ required: true
8
+ type: string
5
9
 
6
10
  permissions:
7
11
  contents: read
@@ -12,7 +16,7 @@ jobs:
12
16
  steps:
13
17
  - name: Checkout vllm-project/vllm-ascend repo
14
18
  uses: actions/checkout@v4
15
- - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
19
+ - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
16
20
  with:
17
21
  python-version: "3.11"
18
22
  - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
@@ -22,6 +26,7 @@ jobs:
22
26
  with:
23
27
  repository: vllm-project/vllm
24
28
  path: ./vllm-empty
29
+ ref: ${{ inputs.vllm }}
25
30
  - name: Install vllm
26
31
  working-directory: vllm-empty
27
32
  run: |
@@ -35,3 +40,4 @@ jobs:
35
40
  SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
36
41
  with:
37
42
  extra_args: --all-files --hook-stage manual
43
+
@@ -50,7 +50,7 @@ jobs:
50
50
  lscpu
51
51
 
52
52
  - name: Set up Python ${{ matrix.python-version }}
53
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
53
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
54
54
  with:
55
55
  python-version: ${{ matrix.python-version }}
56
56
 
@@ -73,7 +73,7 @@ jobs:
73
73
 
74
74
  - name: Set up Python ${{ matrix.python-version }}
75
75
  if: startsWith(github.ref, 'refs/tags/')
76
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
76
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
77
77
  with:
78
78
  python-version: ${{ matrix.python-version }}
79
79
 
@@ -9,7 +9,7 @@ jobs:
9
9
  runs-on: ubuntu-latest
10
10
  steps:
11
11
  - name: Remind to run full CI on PR
12
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
12
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
13
13
  with:
14
14
  script: |
15
15
  github.rest.issues.createComment({
@@ -43,7 +43,7 @@ jobs:
43
43
  strategy:
44
44
  matrix:
45
45
  os: [linux-aarch64-a3-8]
46
- vllm_version: [v0.10.1.1, main]
46
+ vllm_version: [v0.11.0rc3]
47
47
  name: vLLM Ascend test
48
48
  runs-on: ${{ matrix.os }}
49
49
  container: