vllm-ascend 0.10.2rc1__tar.gz → 0.11.0rc0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (578) hide show
  1. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +2 -0
  2. vllm_ascend-0.10.2rc1/.github/workflows/vllm_ascend_test_full.yaml → vllm_ascend-0.11.0rc0/.github/workflows/_e2e_test.yaml +54 -92
  3. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/accuracy_test.yaml +2 -2
  4. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/format_pr_body.yaml +2 -8
  5. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/label_merge_conflict.yml +0 -1
  6. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/labeler.yml +1 -1
  7. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/nightly_benchmarks.yaml +2 -2
  8. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/pre-commit.yml +6 -1
  9. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/release_code.yml +1 -1
  10. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/release_whl.yml +1 -1
  11. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_dist.yaml +1 -1
  12. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_doctest.yaml +1 -1
  13. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_test.yaml +26 -128
  14. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_test_310p.yaml +1 -1
  15. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full.yaml +79 -0
  16. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml +51 -0
  17. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +45 -0
  18. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile +1 -1
  19. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.310p +1 -1
  20. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.310p.openEuler +1 -1
  21. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.a3 +1 -1
  22. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.a3.openEuler +1 -1
  23. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.openEuler +1 -1
  24. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/PKG-INFO +3 -3
  25. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/README.md +2 -2
  26. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/README.zh.md +2 -2
  27. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/versioning_policy.md +2 -0
  28. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/conf.py +5 -5
  29. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -1
  30. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/faqs.md +19 -1
  31. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +2 -6
  32. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/index.md +2 -0
  33. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_qwen3vl.md +156 -0
  34. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_ray.md +182 -0
  35. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_qwen3_next.md +2 -2
  36. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/configuration/additional_config.md +13 -3
  37. vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +94 -0
  38. vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
  39. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/index.md +1 -0
  40. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/quantization.md +3 -2
  41. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/release_notes.md +24 -0
  42. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/README.md +2 -6
  43. vllm_ascend-0.11.0rc0/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md +272 -0
  44. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/external_online_dp/run_dp_template.sh +1 -1
  45. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_disaggregated_prefill_npu.py +1 -1
  46. vllm_ascend-0.11.0rc0/examples/offline_weight_load.py +326 -0
  47. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/run_dp_server.sh +1 -1
  48. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/common.sh +1 -1
  49. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/conftest.py +8 -1
  50. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/doctests/001-quickstart-test.sh +3 -3
  51. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/doctests/002-pip-binary-installation-test.sh +1 -1
  52. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/model_utils.py +6 -1
  53. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +6 -2
  54. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +2 -0
  55. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +2 -0
  56. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/Qwen3-8B-Base.yaml +2 -0
  57. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/accuracy.txt +1 -0
  58. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/report_template.md +15 -3
  59. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/test_lm_eval_correctness.py +7 -3
  60. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_expert_parallel.py +16 -6
  61. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_offline_inference_distributed.py +9 -3
  62. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_torchair_graph_mode.py +3 -0
  63. vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_weight_loader.py +188 -0
  64. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/run_edge_case_test.sh +2 -2
  65. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/run_doctests.sh +0 -1
  66. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +2 -6
  67. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +0 -4
  68. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +0 -4
  69. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_guided_decoding.py +54 -23
  70. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +103 -0
  71. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_vlm.py +0 -4
  72. vllm_ascend-0.11.0rc0/tests/e2e/vllm_interface/singlecard/test_sampler.py +36 -0
  73. vllm_ascend-0.11.0rc0/tests/e2e/vllm_interface/vllm_test.cfg +2 -0
  74. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/attention/test_attention_v1.py +47 -10
  75. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/attention/test_mla_v1.py +35 -1
  76. vllm_ascend-0.11.0rc0/tests/ut/compilation/test_acl_graph.py +720 -0
  77. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/core/test_schedule_config.py +4 -16
  78. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/core/test_scheduler.py +22 -1
  79. vllm_ascend-0.11.0rc0/tests/ut/eplb/adaptor/test_abstract_adaptor.py +73 -0
  80. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_abstract.py +31 -0
  81. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +98 -0
  82. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +99 -0
  83. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_factor.py +23 -0
  84. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +122 -0
  85. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/test_eplb_utils.py +79 -0
  86. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_mooncake_connector.py +28 -3
  87. vllm_ascend-0.11.0rc0/tests/ut/models/conftest.py +114 -0
  88. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_deepseek_mtp.py +7 -6
  89. vllm_ascend-0.11.0rc0/tests/ut/models/test_deepseek_v2.py +107 -0
  90. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen2_5_vl.py +5 -1
  91. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen3_moe.py +0 -30
  92. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_common_fused_moe.py +2 -51
  93. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +72 -1
  94. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_fused_ops.py +75 -129
  95. vllm_ascend-0.11.0rc0/tests/ut/ops/test_layernorm.py +161 -0
  96. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_linear.py +11 -20
  97. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_moe_comm_method.py +24 -4
  98. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_token_dispatcher.py +2 -99
  99. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_vocab_parallel_embedding.py +4 -0
  100. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_quant_config.py +15 -3
  101. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_w4a8_dynamic.py +95 -46
  102. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_ascend_config.py +6 -29
  103. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_platform.py +2 -0
  104. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_utils.py +2 -2
  105. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +0 -2
  106. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/models/test_torchair_deepseek_v2.py +1 -1
  107. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/ops/test_torchair_fused_moe.py +15 -3
  108. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +78 -45
  109. vllm_ascend-0.11.0rc0/tests/ut/torchair/test_torchair_attention.py +95 -0
  110. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/test_torchair_mla.py +17 -1
  111. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/worker/test_model_runner_v1.py +39 -26
  112. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/worker/test_worker_v1.py +80 -7
  113. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/__init__.py +2 -0
  114. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/_version.py +3 -3
  115. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ascend_config.py +18 -13
  116. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ascend_forward_context.py +34 -24
  117. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/attention_mask.py +33 -18
  118. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/attention_v1.py +125 -73
  119. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/mla_v1.py +78 -97
  120. vllm_ascend-0.11.0rc0/vllm_ascend/attention/sfa_v1.py +986 -0
  121. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/utils.py +37 -2
  122. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/compilation/acl_graph.py +74 -0
  123. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/core/schedule_config.py +29 -1
  124. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/core/scheduler.py +25 -9
  125. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/__init__.py +5 -0
  126. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_connector.py +457 -0
  127. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +202 -0
  128. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_manager/metadata.py +269 -0
  129. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +117 -4
  130. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/config_data.py +447 -0
  131. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/kv_transfer.py +251 -0
  132. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_engine.py +489 -0
  133. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_store.py +88 -0
  134. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +484 -0
  135. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/mooncake_connector.py +57 -17
  136. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/envs.py +6 -1
  137. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor/abstract_adaptor.py +44 -0
  138. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor/vllm_adaptor.py +289 -0
  139. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +137 -0
  140. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_utils.py +135 -0
  141. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_worker.py +436 -0
  142. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_abstract.py +42 -0
  143. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +389 -0
  144. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +771 -0
  145. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_factory.py +33 -0
  146. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_flashlb.py +651 -0
  147. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_random.py +30 -0
  148. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/eplb_updator.py +205 -0
  149. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/utils.py +77 -0
  150. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/lora/punica_npu.py +3 -14
  151. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/lora/utils.py +33 -0
  152. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/__init__.py +24 -22
  153. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/deepseek_mtp.py +8 -23
  154. vllm_ascend-0.11.0rc0/vllm_ascend/models/deepseek_v2.py +666 -0
  155. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/layers/mla.py +49 -13
  156. vllm_ascend-0.11.0rc0/vllm_ascend/models/layers/sfa.py +233 -0
  157. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen2_5_vl.py +16 -17
  158. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen2_5_vl_without_padding.py +277 -19
  159. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen2_vl.py +17 -7
  160. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen3_moe.py +11 -130
  161. vllm_ascend-0.11.0rc0/vllm_ascend/models/qwen3_next.py +676 -0
  162. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/casual_conv1d.py +539 -597
  163. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/common_fused_moe.py +111 -187
  164. vllm_ascend-0.11.0rc0/vllm_ascend/ops/fla.py +218 -0
  165. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/fused_moe.py +122 -240
  166. vllm_ascend-0.11.0rc0/vllm_ascend/ops/layernorm.py +159 -0
  167. vllm_ascend-0.11.0rc0/vllm_ascend/ops/linear.py +367 -0
  168. vllm_ascend-0.11.0rc0/vllm_ascend/ops/linear_op.py +459 -0
  169. vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/__init__.py +0 -0
  170. vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +459 -0
  171. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/moe/moe_comm_method.py +77 -102
  172. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/moe/moe_mlp.py +4 -4
  173. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/moe/token_dispatcher.py +55 -61
  174. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/register_custom_ops.py +24 -15
  175. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/sigmoid_gating.py +1 -20
  176. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/vocab_parallel_embedding.py +13 -0
  177. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/__init__.py +12 -0
  178. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_common/__init__.py +5 -1
  179. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_config.py +313 -0
  180. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_common/patch_mamba_config.py +4 -1
  181. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_multimodal_merge.py +58 -0
  182. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_transformers_utils.py +200 -0
  183. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/__init__.py +13 -1
  184. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attention_layer.py +202 -0
  185. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attention_selector.py +181 -0
  186. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attentionspec.py +110 -0
  187. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_triton.py +16 -0
  188. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_weight_loader.py +44 -0
  189. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/platform.py +70 -38
  190. vllm_ascend-0.11.0rc0/vllm_ascend/quantization/__init__.py +0 -0
  191. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/quant_config.py +79 -4
  192. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/w4a8_dynamic.py +69 -62
  193. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/w8a8_dynamic.py +8 -17
  194. vllm_ascend-0.11.0rc0/vllm_ascend/sample/__init__.py +0 -0
  195. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/sample/sampler.py +20 -8
  196. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/spec_decode/eagle_proposer.py +56 -26
  197. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/spec_decode/mtp_proposer.py +39 -12
  198. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/__init__.py +0 -0
  199. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/models/__init__.py +0 -0
  200. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/qwen2.py +3 -4
  201. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/qwen3_moe.py +11 -5
  202. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +1 -5
  203. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_deepseek_v2.py +266 -32
  204. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_pangu_moe.py +9 -10
  205. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops/__init__.py +0 -0
  206. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops/shared_weight_layer.py +245 -0
  207. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/ops/torchair_fused_moe.py +122 -61
  208. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/quantization/__init__.py +0 -0
  209. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +58 -49
  210. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +22 -9
  211. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_attention.py +18 -15
  212. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_mla.py +99 -113
  213. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_model_runner.py +68 -40
  214. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/torchair_sfa.py +1330 -0
  215. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_worker.py +22 -22
  216. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/utils.py +5 -0
  217. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/utils.py +46 -13
  218. vllm_ascend-0.11.0rc0/vllm_ascend/worker/__init__.py +0 -0
  219. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/block_table.py +1 -2
  220. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/model_runner_v1.py +918 -253
  221. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/npu_input_batch.py +22 -7
  222. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/worker_v1.py +71 -24
  223. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/PKG-INFO +3 -3
  224. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/SOURCES.txt +65 -5
  225. vllm_ascend-0.10.2rc1/tests/ut/models/test_deepseek_v2.py +0 -294
  226. vllm_ascend-0.10.2rc1/tests/ut/ops/test_ascend_forwad_context.py +0 -22
  227. vllm_ascend-0.10.2rc1/tests/ut/ops/test_layernorm.py +0 -93
  228. vllm_ascend-0.10.2rc1/vllm_ascend/models/deepseek_dbo.py +0 -1046
  229. vllm_ascend-0.10.2rc1/vllm_ascend/models/deepseek_v2.py +0 -965
  230. vllm_ascend-0.10.2rc1/vllm_ascend/models/deepseek_v3.py +0 -27
  231. vllm_ascend-0.10.2rc1/vllm_ascend/models/qwen3.py +0 -156
  232. vllm_ascend-0.10.2rc1/vllm_ascend/models/qwen3_next.py +0 -1361
  233. vllm_ascend-0.10.2rc1/vllm_ascend/ops/fla.py +0 -381
  234. vllm_ascend-0.10.2rc1/vllm_ascend/ops/layernorm.py +0 -116
  235. vllm_ascend-0.10.2rc1/vllm_ascend/ops/linear.py +0 -626
  236. vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +0 -240
  237. vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_common/patch_shared_fused_moe.py +0 -21
  238. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.gemini/config.yaml +0 -0
  239. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/Dockerfile.buildwheel +0 -0
  240. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  241. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  242. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  243. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  244. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  245. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  246. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  247. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  248. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  249. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  250. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  251. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  252. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/actionlint.yaml +0 -0
  253. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/dependabot.yml +0 -0
  254. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/format_pr_body.sh +0 -0
  255. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/labeler.yml +0 -0
  256. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_310p_openeuler.yml +0 -0
  257. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_310p_ubuntu.yml +0 -0
  258. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_a3_openeuler.yml +0 -0
  259. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_a3_ubuntu.yml +0 -0
  260. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_openeuler.yml +0 -0
  261. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_ubuntu.yml +0 -0
  262. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/matchers/actionlint.json +0 -0
  263. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/matchers/mypy.json +0 -0
  264. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/matchers/ruff.json +0 -0
  265. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/reminder_comment.yml +0 -0
  266. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_test_pd.yaml +0 -0
  267. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.gitignore +0 -0
  268. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.pre-commit-config.yaml +0 -0
  269. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.readthedocs.yaml +0 -0
  270. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/CMakeLists.txt +0 -0
  271. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/CODE_OF_CONDUCT.md +0 -0
  272. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/CONTRIBUTING.md +0 -0
  273. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/DCO +0 -0
  274. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/LICENSE +0 -0
  275. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/README.md +0 -0
  276. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
  277. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/requirements-bench.txt +0 -0
  278. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  279. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/scripts/perf_result_template.md +0 -0
  280. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
  281. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/tests/latency-tests.json +0 -0
  282. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/tests/serving-tests.json +0 -0
  283. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/tests/throughput-tests.json +0 -0
  284. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/cmake/utils.cmake +0 -0
  285. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/codecov.yml +0 -0
  286. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/collect_env.py +0 -0
  287. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/camem_allocator.cpp +0 -0
  288. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/bgmv_expand.cpp +0 -0
  289. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/bgmv_shrink.cpp +0 -0
  290. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  291. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
  292. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/sgmv_expand.cpp +0 -0
  293. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/sgmv_shrink.cpp +0 -0
  294. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/types.h +0 -0
  295. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/utils.h +0 -0
  296. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/ops.h +0 -0
  297. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/torch_binding.cpp +0 -0
  298. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/torch_binding_meta.cpp +0 -0
  299. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/utils.h +0 -0
  300. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/Makefile +0 -0
  301. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/README.md +0 -0
  302. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/requirements-docs.txt +0 -0
  303. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/requirements-test.txt +0 -0
  304. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/_templates/sections/header.html +0 -0
  305. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  306. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/assets/multi_node_dp_kimi.png +0 -0
  307. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/contributors.md +0 -0
  308. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/governance.md +0 -0
  309. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/user_stories/index.md +0 -0
  310. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/user_stories/llamafactory.md +0 -0
  311. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/contribution/index.md +0 -0
  312. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/contribution/testing.md +0 -0
  313. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +0 -0
  314. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +0 -0
  315. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +0 -0
  316. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +0 -0
  317. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -0
  318. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/index.md +0 -0
  319. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  320. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
  321. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  322. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
  323. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/feature_guide/index.md +0 -0
  324. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/feature_guide/patch.md +0 -0
  325. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
  326. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/modeling/index.md +0 -0
  327. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/index.md +0 -0
  328. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -0
  329. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/performance_benchmark.md +0 -0
  330. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/profile_execute_duration.md +0 -0
  331. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/index.md +0 -0
  332. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/installation.md +0 -0
  333. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
  334. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
  335. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
  336. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
  337. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
  338. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
  339. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
  340. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
  341. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
  342. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
  343. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
  344. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
  345. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
  346. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
  347. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
  348. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
  349. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
  350. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +0 -0
  351. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +0 -0
  352. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +0 -0
  353. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
  354. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
  355. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
  356. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
  357. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
  358. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
  359. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
  360. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
  361. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
  362. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
  363. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
  364. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
  365. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
  366. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
  367. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
  368. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
  369. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
  370. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
  371. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
  372. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
  373. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
  374. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
  375. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
  376. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
  377. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
  378. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
  379. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
  380. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  381. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  382. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/quick_start.md +0 -0
  383. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_node.md +0 -0
  384. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_node_kimi.md +0 -0
  385. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_node_pd_disaggregation.md +0 -0
  386. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu.md +0 -0
  387. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_moge.md +0 -0
  388. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_quantization.md +0 -0
  389. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
  390. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_node_300i.md +0 -0
  391. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu.md +0 -0
  392. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_audio.md +0 -0
  393. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_multimodal.md +0 -0
  394. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
  395. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_qwen3_quantization.md +0 -0
  396. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/configuration/env_vars.md +0 -0
  397. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/configuration/index.md +0 -0
  398. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
  399. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  400. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/lora.md +0 -0
  401. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
  402. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  403. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/support_matrix/index.md +0 -0
  404. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
  405. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
  406. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/gen_ranktable.py +0 -0
  407. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/gen_ranktable.sh +0 -0
  408. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +0 -0
  409. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
  410. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/run_server.sh +0 -0
  411. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/eplb/eplb_deepseek.py +0 -0
  412. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/eplb/eplb_strategy.py +0 -0
  413. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/external_online_dp/README.md +0 -0
  414. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/external_online_dp/launch_online_dp.py +0 -0
  415. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_data_parallel.py +0 -0
  416. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_dualbatch_overlap_npu.py +0 -0
  417. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_embed.py +0 -0
  418. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_external_launcher.py +0 -0
  419. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_audio_language.py +0 -0
  420. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_npu.py +0 -0
  421. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_npu_tp2.py +0 -0
  422. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_sleep_mode_npu.py +0 -0
  423. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/prompt_embedding_inference.py +0 -0
  424. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/format.sh +0 -0
  425. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/mypy.ini +0 -0
  426. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/packages.txt +0 -0
  427. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/pyproject.toml +0 -0
  428. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/requirements-dev.txt +0 -0
  429. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/requirements-lint.txt +0 -0
  430. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/requirements.txt +0 -0
  431. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/setup.cfg +0 -0
  432. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/setup.py +0 -0
  433. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/__init__.py +0 -0
  434. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
  435. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/310p/test_offline_inference_parallel_310p.py +0 -0
  436. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/__init__.py +0 -0
  437. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/conftest.py +0 -0
  438. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_data_parallel.py +0 -0
  439. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_external_launcher.py +0 -0
  440. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -0
  441. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_ilama_lora_tp2.py +0 -0
  442. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_pipeline_parallel.py +0 -0
  443. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_prefix_caching.py +0 -0
  444. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_qwen3_moe.py +0 -0
  445. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
  446. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
  447. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  448. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/prompts/example.txt +0 -0
  449. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/run_disagg_pd.sh +0 -0
  450. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/__init__.py +0 -0
  451. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/__init__.py +0 -0
  452. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_bgmv_expand.py +0 -0
  453. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_bgmv_shrink.py +0 -0
  454. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_fused_moe.py +0 -0
  455. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
  456. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_rotary_embedding.py +0 -0
  457. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +0 -0
  458. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_aclgraph.py +0 -0
  459. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_ascend_scheduler.py +0 -0
  460. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_camem.py +0 -0
  461. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_chunked.py +0 -0
  462. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_embedding.py +0 -0
  463. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_ilama_lora.py +0 -0
  464. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
  465. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_quantization.py +0 -0
  466. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_sampler.py +0 -0
  467. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/utils.py +0 -0
  468. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/__init__.py +0 -0
  469. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/attention/test_attention_mask.py +0 -0
  470. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/base.py +0 -0
  471. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/conftest.py +0 -0
  472. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/device_allocator/test_camem.py +0 -0
  473. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
  474. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
  475. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/test_communicator.py +0 -0
  476. /vllm_ascend-0.10.2rc1/tests/ut/models/__init__.py → /vllm_ascend-0.11.0rc0/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
  477. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/test_parallel_state.py +0 -0
  478. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/fake_weight/config.json +0 -0
  479. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -0
  480. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
  481. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
  482. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/utils.py +0 -0
  483. {vllm_ascend-0.10.2rc1/tests/ut/torchair → vllm_ascend-0.11.0rc0/tests/ut/models}/__init__.py +0 -0
  484. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen2_5_vl_without_padding.py +0 -0
  485. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen2_vl.py +0 -0
  486. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_base.py +0 -0
  487. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_decorator.py +0 -0
  488. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_layers.py +0 -0
  489. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_metadata.py +0 -0
  490. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_ms_split.py +0 -0
  491. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/expert_map.json +0 -0
  492. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_activation.py +0 -0
  493. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_comm_utils.py +0 -0
  494. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_expert_load_balancer.py +0 -0
  495. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_rotary_embedding.py +0 -0
  496. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
  497. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
  498. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_utils.py +0 -0
  499. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_w8a8.py +0 -0
  500. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_w8a8_dynamic.py +0 -0
  501. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/sample/logits_processor/test_builtin.py +0 -0
  502. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/sample/test_rejection_sampler.py +0 -0
  503. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/sample/test_sampler.py +0 -0
  504. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_envs.py +0 -0
  505. {vllm_ascend-0.10.2rc1/vllm_ascend/attention → vllm_ascend-0.11.0rc0/tests/ut/torchair}/__init__.py +0 -0
  506. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +0 -0
  507. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +0 -0
  508. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/test_utils.py +0 -0
  509. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/worker/test_input_batch.py +0 -0
  510. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/actionlint.sh +0 -0
  511. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/check_python_src_init.py +0 -0
  512. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/check_repo.sh +0 -0
  513. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/enforce_regex_import.py +0 -0
  514. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/mypy.sh +0 -0
  515. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/png-lint.sh +0 -0
  516. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/shellcheck.sh +0 -0
  517. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/sphinx-lint.sh +0 -0
  518. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/typos.toml +0 -0
  519. {vllm_ascend-0.10.2rc1/vllm_ascend/compilation → vllm_ascend-0.11.0rc0/vllm_ascend/attention}/__init__.py +0 -0
  520. {vllm_ascend-0.10.2rc1/vllm_ascend/core → vllm_ascend-0.11.0rc0/vllm_ascend/compilation}/__init__.py +0 -0
  521. {vllm_ascend-0.10.2rc1/vllm_ascend/device_allocator → vllm_ascend-0.11.0rc0/vllm_ascend/core}/__init__.py +0 -0
  522. {vllm_ascend-0.10.2rc1/vllm_ascend/distributed/device_communicators → vllm_ascend-0.11.0rc0/vllm_ascend/device_allocator}/__init__.py +0 -0
  523. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/device_allocator/camem.py +0 -0
  524. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/communicator.py +0 -0
  525. {vllm_ascend-0.10.2rc1/vllm_ascend/lora → vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_manager}/__init__.py +0 -0
  526. {vllm_ascend-0.10.2rc1/vllm_ascend/models/layers → vllm_ascend-0.11.0rc0/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
  527. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  528. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  529. {vllm_ascend-0.10.2rc1/vllm_ascend/multistream → vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake}/__init__.py +0 -0
  530. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/parallel_state.py +0 -0
  531. {vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe → vllm_ascend-0.11.0rc0/vllm_ascend/eplb}/__init__.py +0 -0
  532. {vllm_ascend-0.10.2rc1/vllm_ascend/quantization → vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor}/__init__.py +0 -0
  533. {vllm_ascend-0.10.2rc1/vllm_ascend/sample → vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core}/__init__.py +0 -0
  534. {vllm_ascend-0.10.2rc1/vllm_ascend/torchair → vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy}/__init__.py +0 -0
  535. {vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models → vllm_ascend-0.11.0rc0/vllm_ascend/lora}/__init__.py +0 -0
  536. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/lora/lora_ops.py +0 -0
  537. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/meta_registration.py +0 -0
  538. /vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/__init__.py → /vllm_ascend-0.11.0rc0/vllm_ascend/models/deepseek_v3.py +0 -0
  539. {vllm_ascend-0.10.2rc1/vllm_ascend/torchair/quantization → vllm_ascend-0.11.0rc0/vllm_ascend/models/layers}/__init__.py +0 -0
  540. {vllm_ascend-0.10.2rc1/vllm_ascend/worker → vllm_ascend-0.11.0rc0/vllm_ascend/multistream}/__init__.py +0 -0
  541. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/base.py +0 -0
  542. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/context.py +0 -0
  543. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/decorator.py +0 -0
  544. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/layers.py +0 -0
  545. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/metadata.py +0 -0
  546. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/ms_split.py +0 -0
  547. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/__init__.py +0 -0
  548. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/activation.py +0 -0
  549. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/attention.py +0 -0
  550. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  551. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/moe/comm_utils.py +0 -0
  552. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/moe/experts_selector.py +0 -0
  553. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/rotary_embedding.py +0 -0
  554. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/__init__.py +0 -0
  555. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_common/patch_distributed.py +0 -0
  556. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_main/__init__.py +0 -0
  557. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/__init__.py +0 -0
  558. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
  559. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/patch_logits.py +0 -0
  560. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
  561. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
  562. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/utils.py +0 -0
  563. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/w8a8.py +0 -0
  564. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/sample/logits_processor/__init__.py +0 -0
  565. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/sample/logits_processor/builtin.py +0 -0
  566. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/sample/rejection_sampler.py +0 -0
  567. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/spec_decode/__init__.py +0 -0
  568. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/spec_decode/interface.py +0 -0
  569. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/spec_decode/ngram_proposer.py +0 -0
  570. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_deepseek_v3.py +0 -0
  571. {vllm_ascend-0.10.2rc1/vllm_ascend → vllm_ascend-0.11.0rc0/vllm_ascend/torchair}/ops/sequence_parallel.py +0 -0
  572. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/ops/torchair_activation.py +0 -0
  573. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/ops/torchair_layernorm.py +0 -0
  574. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +0 -0
  575. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  576. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/entry_points.txt +0 -0
  577. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/requires.txt +0 -0
  578. {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -24,6 +24,8 @@ body:
24
24
  value: >
25
25
  - [ ] Create a new issue for release feedback
26
26
 
27
+ - [ ] Upgrade vllm version to the new version for CI and Dockerfile
28
+
27
29
  - [ ] Write the release note PR.
28
30
 
29
31
  - [ ] Update the feedback issue link in docs/source/faqs.md
@@ -1,82 +1,27 @@
1
- #
2
- # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- # This file is a part of the vllm-ascend project.
16
- #
17
- name: 'test-full'
1
+ name: 'e2e test'
18
2
 
19
3
  on:
20
- pull_request:
21
- branches:
22
- - 'main'
23
- - '*-dev'
24
- types: [ labeled, synchronize ]
25
-
26
- # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
27
- # declared as "shell: bash -el {0}" on steps that need to be properly activated.
28
- # It's used to activate ascend-toolkit environment variables.
29
- defaults:
30
- run:
31
- shell: bash -el {0}
32
-
33
- # only cancel in-progress runs of the same workflow
34
- # and ignore the lint / 1 card / 4 cards test type
35
- concurrency:
36
- group: ${{ github.workflow }}-${{ github.ref }}
37
- cancel-in-progress: true
4
+ workflow_call:
5
+ inputs:
6
+ vllm:
7
+ required: true
8
+ type: string
9
+ runner:
10
+ required: true
11
+ type: string
12
+ image:
13
+ required: true
14
+ type: string
15
+ type:
16
+ required: true
17
+ type: string
38
18
 
39
19
  jobs:
40
- changes:
41
- runs-on: ubuntu-latest
42
- if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
43
- outputs:
44
- e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
45
- ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
46
- steps:
47
- - uses: actions/checkout@v4
48
- - uses: dorny/paths-filter@v3
49
- id: filter
50
- with:
51
- filters: |
52
- e2e_tracker:
53
- - '.github/workflows/vllm_ascend_test.yaml'
54
- - 'vllm_ascend/**'
55
- - 'csrc/**'
56
- - 'cmake/**'
57
- - 'tests/e2e/**'
58
- - 'CMakeLists.txt'
59
- - 'setup.py'
60
- - 'requirements.txt'
61
- - 'requirements-dev.txt'
62
- - 'requirements-lint.txt'
63
- - 'packages.txt'
64
- ut_tracker:
65
- - 'tests/ut/**'
66
-
67
- e2e-full:
68
- # only trigger full test when pull request is approved
69
- needs: [changes]
70
- if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
71
- strategy:
72
- max-parallel: 2
73
- matrix:
74
- os: [linux-aarch64-a2-1]
75
- vllm_version: [v0.10.2]
76
- name: singlecard e2e test - full
77
- runs-on: ${{ matrix.os }}
20
+ e2e:
21
+ name: singlecard
22
+ runs-on: ${{ inputs.runner }}-1
78
23
  container:
79
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
24
+ image: ${{ inputs.image }}
80
25
  env:
81
26
  VLLM_LOGGING_LEVEL: ERROR
82
27
  VLLM_USE_MODELSCOPE: True
@@ -106,8 +51,9 @@ jobs:
106
51
  uses: actions/checkout@v4
107
52
  with:
108
53
  repository: vllm-project/vllm
109
- ref: ${{ matrix.vllm_version }}
54
+ ref: ${{ inputs.vllm }}
110
55
  path: ./vllm-empty
56
+ fetch-depth: 1
111
57
 
112
58
  - name: Install vllm-project/vllm from source
113
59
  working-directory: ./vllm-empty
@@ -121,10 +67,23 @@ jobs:
121
67
  pip install -r requirements-dev.txt
122
68
  pip install -v -e .
123
69
 
70
+ - name: Run vllm-project/vllm-ascend test
71
+ env:
72
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
73
+ VLLM_USE_MODELSCOPE: True
74
+ PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
75
+ if: ${{ inputs.type == 'light' }}
76
+ run: |
77
+ pytest -sv tests/e2e/singlecard/test_aclgraph.py
78
+ pytest -sv tests/e2e/singlecard/test_quantization.py
79
+ pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
80
+
124
81
  - name: Run e2e test
125
82
  env:
126
83
  VLLM_WORKER_MULTIPROC_METHOD: spawn
127
84
  VLLM_USE_MODELSCOPE: True
85
+ PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
86
+ if: ${{ inputs.type == 'full' }}
128
87
  run: |
129
88
  # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
130
89
  # the test separately.
@@ -135,7 +94,7 @@ jobs:
135
94
  pytest -sv tests/e2e/singlecard/test_chunked.py
136
95
  pytest -sv tests/e2e/singlecard/test_embedding.py
137
96
  pytest -sv tests/e2e/singlecard/test_guided_decoding.py
138
- #pytest -sv tests/e2e/singlecard/test_ilama_lora.py
97
+ pytest -sv tests/e2e/singlecard/test_ilama_lora.py
139
98
  pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
140
99
  pytest -sv tests/e2e/singlecard/test_quantization.py
141
100
  pytest -sv tests/e2e/singlecard/test_sampler.py
@@ -144,23 +103,16 @@ jobs:
144
103
  # ------------------------------------ v1 spec decode test ------------------------------------ #
145
104
  pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
146
105
  pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
147
- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
106
+ # Fix me: OOM error
107
+ #pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
148
108
 
149
109
  pytest -sv tests/e2e/singlecard/ops/
150
110
 
151
- e2e-2-cards-full:
152
- # only trigger full test when pull request is approved
153
- needs: [changes]
154
- if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
155
- strategy:
156
- max-parallel: 2
157
- matrix:
158
- os: [linux-aarch64-a2-2]
159
- vllm_version: [v0.10.2]
160
- name: multicard e2e test - full
161
- runs-on: ${{ matrix.os }}
111
+ e2e-2-cards:
112
+ name: multicard
113
+ runs-on: ${{ inputs.runner }}-2
162
114
  container:
163
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
115
+ image: ${{ inputs.image }}
164
116
  env:
165
117
  VLLM_LOGGING_LEVEL: ERROR
166
118
  VLLM_USE_MODELSCOPE: True
@@ -190,8 +142,9 @@ jobs:
190
142
  uses: actions/checkout@v4
191
143
  with:
192
144
  repository: vllm-project/vllm
193
- ref: ${{ matrix.vllm_version }}
145
+ ref: ${{ inputs.vllm }}
194
146
  path: ./vllm-empty
147
+ fetch-depth: 1
195
148
 
196
149
  - name: Install vllm-project/vllm from source
197
150
  working-directory: ./vllm-empty
@@ -205,17 +158,26 @@ jobs:
205
158
  pip install -r requirements-dev.txt
206
159
  pip install -v -e .
207
160
 
208
- - name: Run vllm-project/vllm-ascend test
161
+ - name: Run vllm-project/vllm-ascend test (light)
162
+ env:
163
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
164
+ VLLM_USE_MODELSCOPE: True
165
+ if: ${{ inputs.type == 'light' }}
166
+ run: |
167
+ pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
168
+
169
+ - name: Run vllm-project/vllm-ascend test (full)
209
170
  env:
210
171
  VLLM_WORKER_MULTIPROC_METHOD: spawn
211
172
  VLLM_USE_MODELSCOPE: True
173
+ if: ${{ inputs.type == 'full' }}
212
174
  run: |
213
175
  pytest -sv tests/e2e/multicard/test_data_parallel.py
214
176
  pytest -sv tests/e2e/multicard/test_expert_parallel.py
215
177
  # external_launcher test is not stable enough. Fix it later
216
178
  # pytest -sv tests/e2e/multicard/test_external_launcher.py
217
179
  pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
218
- #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
180
+ pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
219
181
 
220
182
  # To avoid oom, we need to run the test in a single process.
221
183
  pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
@@ -19,7 +19,7 @@
19
19
  # 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test'
20
20
  # 2. workflow_dispatch with models input
21
21
  # See detail rule in strategy.matrix note
22
- name: Benchmarks / accuracy
22
+ name: ascend test / accuracy
23
23
 
24
24
  on:
25
25
  schedule:
@@ -112,7 +112,7 @@ jobs:
112
112
  uses: actions/checkout@v4
113
113
  with:
114
114
  repository: vllm-project/vllm
115
- ref: v0.10.2
115
+ ref: v0.11.0rc3
116
116
  path: ./vllm-empty
117
117
 
118
118
  - name: Install vllm-project/vllm from source
@@ -33,23 +33,17 @@ jobs:
33
33
  runs-on: ubuntu-latest
34
34
 
35
35
  steps:
36
- - name: Checkout vllm-project/vllm repo
37
- uses: actions/checkout@v4
38
- with:
39
- repository: vllm-project/vllm
40
- path: ./vllm-empty
41
36
 
42
37
  - name: Get vLLM version
43
- working-directory: ./vllm-empty
44
38
  run: |
45
- VLLM_COMMIT=$(git rev-parse HEAD)
39
+ VLLM_COMMIT=releases/v0.11.0
46
40
  echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
47
41
 
48
42
  - name: Checkout repository
49
43
  uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
50
44
 
51
45
  - name: Set up Python
52
- uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
46
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
53
47
 
54
48
  - name: Get vLLM release version
55
49
  run: |
@@ -16,6 +16,5 @@ jobs:
16
16
  uses: eps1lon/actions-label-merge-conflict@v3
17
17
  with:
18
18
  dirtyLabel: "merge-conflicts"
19
- removeOnDirtyLabel: "ready"
20
19
  repoToken: "${{ secrets.GITHUB_TOKEN }}"
21
20
  commentOnDirty: "This pull request has conflicts, please resolve those before we can evaluate the pull request."
@@ -11,7 +11,7 @@ jobs:
11
11
  pull-requests: write
12
12
  steps:
13
13
  - name: Label the PR
14
- uses: actions/labeler@v5
14
+ uses: actions/labeler@v6
15
15
  with:
16
16
  repo-token: ${{ secrets.GITHUB_TOKEN }}
17
17
  configuration-path: .github/labeler.yml
@@ -15,7 +15,7 @@
15
15
  # limitations under the License.
16
16
  #
17
17
 
18
- name: 'Benchmarks / Performance'
18
+ name: 'ascend test / performance'
19
19
  # This workflow runs nightly benchmarks for vllm-ascend.
20
20
 
21
21
  on:
@@ -51,7 +51,7 @@ jobs:
51
51
  strategy:
52
52
  matrix:
53
53
  include:
54
- - vllm_branch: v0.10.2
54
+ - vllm_branch: v0.11.0rc3
55
55
  vllm_ascend_branch: main
56
56
  vllm_use_v1: 1
57
57
  max-parallel: 1
@@ -2,6 +2,10 @@ name: pre-commit
2
2
 
3
3
  on:
4
4
  workflow_call:
5
+ inputs:
6
+ vllm:
7
+ required: true
8
+ type: string
5
9
 
6
10
  permissions:
7
11
  contents: read
@@ -12,7 +16,7 @@ jobs:
12
16
  steps:
13
17
  - name: Checkout vllm-project/vllm-ascend repo
14
18
  uses: actions/checkout@v4
15
- - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
19
+ - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
16
20
  with:
17
21
  python-version: "3.11"
18
22
  - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
@@ -22,6 +26,7 @@ jobs:
22
26
  with:
23
27
  repository: vllm-project/vllm
24
28
  path: ./vllm-empty
29
+ ref: ${{ inputs.vllm }}
25
30
  - name: Install vllm
26
31
  working-directory: vllm-empty
27
32
  run: |
@@ -50,7 +50,7 @@ jobs:
50
50
  lscpu
51
51
 
52
52
  - name: Set up Python ${{ matrix.python-version }}
53
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
53
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
54
54
  with:
55
55
  python-version: ${{ matrix.python-version }}
56
56
 
@@ -73,7 +73,7 @@ jobs:
73
73
 
74
74
  - name: Set up Python ${{ matrix.python-version }}
75
75
  if: startsWith(github.ref, 'refs/tags/')
76
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
76
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
77
77
  with:
78
78
  python-version: ${{ matrix.python-version }}
79
79
 
@@ -43,7 +43,7 @@ jobs:
43
43
  strategy:
44
44
  matrix:
45
45
  os: [linux-aarch64-a3-8]
46
- vllm_version: [v0.10.2]
46
+ vllm_version: [v0.11.0rc3]
47
47
  name: vLLM Ascend test
48
48
  runs-on: ${{ matrix.os }}
49
49
  container:
@@ -15,7 +15,7 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
 
18
- name: 'e2e test / doctest'
18
+ name: 'ascend test / doctest'
19
19
 
20
20
  on:
21
21
  workflow_dispatch:
@@ -15,7 +15,7 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
 
18
- name: 'test'
18
+ name: 'ascend test'
19
19
 
20
20
  on:
21
21
  push:
@@ -41,9 +41,10 @@ concurrency:
41
41
  jobs:
42
42
  lint:
43
43
  uses: ./.github/workflows/pre-commit.yml
44
+ with:
45
+ vllm: releases/v0.11.0
44
46
 
45
47
  changes:
46
- if: github.event_name == 'pull_request'
47
48
  runs-on: ubuntu-latest
48
49
  outputs:
49
50
  e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
@@ -82,7 +83,7 @@ jobs:
82
83
  VLLM_USE_MODELSCOPE: True
83
84
  strategy:
84
85
  matrix:
85
- vllm_version: [v0.10.2]
86
+ vllm_version: [releases/v0.11.0, v0.11.0rc3]
86
87
  steps:
87
88
  - name: Install packages
88
89
  run: |
@@ -118,10 +119,20 @@ jobs:
118
119
  TORCH_DEVICE_BACKEND_AUTOLOAD: 0
119
120
  run: |
120
121
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
121
- pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut --ignore=tests/ut/test_platform.py --ignore=tests/ut/ops/test_vocab_parallel_embedding.py
122
+ pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
123
+ --ignore=tests/ut/test_platform.py \
124
+ --ignore=tests/ut/patch/worker/patch_common/test_patch_minicpm.py \
125
+ --ignore=tests/ut/core/test_scheduler.py \
126
+ --ignore=tests/ut/kv_connector/test_llmdatadist_connector.py \
127
+ --ignore=tests/ut/kv_connector/test_mooncake_connector.py \
128
+ --ignore=tests/ut/kv_connector/test_remote_decode_lifecycle.py \
129
+ --ignore=tests/ut/kv_connector/test_remote_prefill_lifecycle.py \
130
+ --ignore=tests/ut/torchair/models/test_torchair_deepseek_v2.py \
131
+ --ignore=tests/ut/torchair/test_utils.py
122
132
 
123
133
  - name: Upload coverage to Codecov
124
- if: ${{ matrix.vllm_version == 'main' }}
134
+ # only upload coverage when commits merged
135
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
125
136
  uses: codecov/codecov-action@v5
126
137
  env:
127
138
  CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
@@ -131,130 +142,17 @@ jobs:
131
142
  verbose: true
132
143
 
133
144
  e2e-light:
145
+ name: e2e-light
146
+ strategy:
147
+ matrix:
148
+ vllm_version: [releases/v0.11.0, v0.11.0rc3]
149
+ # Note (yikun): If CI resource are limited we can split job into two chain jobs
134
150
  needs: [lint, changes]
135
151
  # only trigger e2e test after lint passed and the change is e2e related with pull request.
136
152
  if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
137
- strategy:
138
- max-parallel: 2
139
- matrix:
140
- os: [linux-aarch64-a2-1]
141
- vllm_version: [v0.10.2]
142
- name: singlecard e2e test - light
143
- runs-on: ${{ matrix.os }}
144
- container:
145
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
146
- env:
147
- VLLM_LOGGING_LEVEL: ERROR
148
- VLLM_USE_MODELSCOPE: True
149
- steps:
150
- - name: Check npu and CANN info
151
- run: |
152
- npu-smi info
153
- cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
154
-
155
- - name: Config mirrors
156
- run: |
157
- sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
158
- pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
159
- pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
160
- apt-get update -y
161
- apt install git -y
162
-
163
- - name: Checkout vllm-project/vllm-ascend repo
164
- uses: actions/checkout@v4
165
-
166
- - name: Install system dependencies
167
- run: |
168
- apt-get -y install `cat packages.txt`
169
- apt-get -y install gcc g++ cmake libnuma-dev
170
-
171
- - name: Checkout vllm-project/vllm repo
172
- uses: actions/checkout@v4
173
- with:
174
- repository: vllm-project/vllm
175
- ref: ${{ matrix.vllm_version }}
176
- path: ./vllm-empty
177
-
178
- - name: Install vllm-project/vllm from source
179
- working-directory: ./vllm-empty
180
- run: |
181
- VLLM_TARGET_DEVICE=empty pip install -e .
182
-
183
- - name: Install vllm-project/vllm-ascend
184
- env:
185
- PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
186
- run: |
187
- pip install -r requirements-dev.txt
188
- pip install -v -e .
189
-
190
- - name: Run e2e test
191
- env:
192
- VLLM_WORKER_MULTIPROC_METHOD: spawn
193
- VLLM_USE_MODELSCOPE: True
194
- run: |
195
- pytest -sv tests/e2e/singlecard/test_aclgraph.py
196
- pytest -sv tests/e2e/singlecard/test_quantization.py
197
- pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
198
-
199
- e2e-2-cards-light:
200
- needs: [e2e-light]
201
- if: ${{ needs.e2e-light.result == 'success' }}
202
- strategy:
203
- max-parallel: 2
204
- matrix:
205
- os: [linux-aarch64-a2-2]
206
- vllm_version: [v0.10.2]
207
- name: multicard e2e test - light
208
- runs-on: ${{ matrix.os }}
209
- container:
153
+ uses: ./.github/workflows/_e2e_test.yaml
154
+ with:
155
+ vllm: ${{ matrix.vllm_version }}
156
+ runner: linux-aarch64-a2
210
157
  image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
211
- env:
212
- VLLM_LOGGING_LEVEL: ERROR
213
- VLLM_USE_MODELSCOPE: True
214
- steps:
215
- - name: Check npu and CANN info
216
- run: |
217
- npu-smi info
218
- cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
219
-
220
- - name: Config mirrors
221
- run: |
222
- sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
223
- pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
224
- pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
225
- apt-get update -y
226
- apt install git -y
227
-
228
- - name: Checkout vllm-project/vllm-ascend repo
229
- uses: actions/checkout@v4
230
-
231
- - name: Install system dependencies
232
- run: |
233
- apt-get -y install `cat packages.txt`
234
- apt-get -y install gcc g++ cmake libnuma-dev
235
-
236
- - name: Checkout vllm-project/vllm repo
237
- uses: actions/checkout@v4
238
- with:
239
- repository: vllm-project/vllm
240
- ref: ${{ matrix.vllm_version }}
241
- path: ./vllm-empty
242
-
243
- - name: Install vllm-project/vllm from source
244
- working-directory: ./vllm-empty
245
- run: |
246
- VLLM_TARGET_DEVICE=empty pip install -e .
247
-
248
- - name: Install vllm-project/vllm-ascend
249
- env:
250
- PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
251
- run: |
252
- pip install -r requirements-dev.txt
253
- pip install -v -e .
254
-
255
- - name: Run vllm-project/vllm-ascend test
256
- env:
257
- VLLM_WORKER_MULTIPROC_METHOD: spawn
258
- VLLM_USE_MODELSCOPE: True
259
- run: |
260
- pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
158
+ type: light
@@ -53,7 +53,7 @@ jobs:
53
53
  max-parallel: 2
54
54
  matrix:
55
55
  os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
56
- vllm_version: [v0.10.2]
56
+ vllm_version: [v0.11.0rc3]
57
57
  name: 310p e2e test
58
58
  runs-on: ${{ matrix.os }}
59
59
  container:
@@ -0,0 +1,79 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+ name: 'ascend test / full'
18
+
19
+ on:
20
+ pull_request:
21
+ branches:
22
+ - 'main'
23
+ - '*-dev'
24
+ types: [ labeled, synchronize ]
25
+
26
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
27
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
28
+ # It's used to activate ascend-toolkit environment variables.
29
+ defaults:
30
+ run:
31
+ shell: bash -el {0}
32
+
33
+ # only cancel in-progress runs of the same workflow
34
+ # and ignore the lint / 1 card / 4 cards test type
35
+ concurrency:
36
+ group: ${{ github.workflow }}-${{ github.ref }}
37
+ cancel-in-progress: true
38
+
39
+ jobs:
40
+ changes:
41
+ runs-on: ubuntu-latest
42
+ if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
43
+ outputs:
44
+ e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
45
+ ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
46
+ steps:
47
+ - uses: actions/checkout@v4
48
+ - uses: dorny/paths-filter@v3
49
+ id: filter
50
+ with:
51
+ filters: |
52
+ e2e_tracker:
53
+ - '.github/workflows/vllm_ascend_test.yaml'
54
+ - 'vllm_ascend/**'
55
+ - 'csrc/**'
56
+ - 'cmake/**'
57
+ - 'tests/e2e/**'
58
+ - 'CMakeLists.txt'
59
+ - 'setup.py'
60
+ - 'requirements.txt'
61
+ - 'requirements-dev.txt'
62
+ - 'requirements-lint.txt'
63
+ - 'packages.txt'
64
+ ut_tracker:
65
+ - 'tests/ut/**'
66
+
67
+ e2e-test:
68
+ name: e2e-full
69
+ strategy:
70
+ matrix:
71
+ vllm_version: [releases/v0.11.0, v0.11.0rc3]
72
+ needs: [changes]
73
+ if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
74
+ uses: ./.github/workflows/_e2e_test.yaml
75
+ with:
76
+ vllm: ${{ matrix.vllm_version }}
77
+ runner: linux-aarch64-a2
78
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
79
+ type: full