vllm-ascend 0.11.0rc0__tar.gz → 0.11.0rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (887) hide show
  1. vllm_ascend-0.11.0rc2/.github/Dockerfile.buildwheel +45 -0
  2. vllm_ascend-0.11.0rc2/.github/actionlint.yaml +21 -0
  3. vllm_ascend-0.11.0rc2/.github/workflows/_accuracy_test.yaml +175 -0
  4. vllm_ascend-0.11.0rc2/.github/workflows/_e2e_test.yaml +199 -0
  5. vllm_ascend-0.11.0rc2/.github/workflows/accuracy_test.yaml +72 -0
  6. vllm_ascend-0.11.0rc2/.github/workflows/format_pr_body.yaml +57 -0
  7. vllm_ascend-0.11.0rc2/.github/workflows/multi_node_test.yaml +118 -0
  8. vllm_ascend-0.11.0rc2/.github/workflows/nightly_benchmarks.yaml +206 -0
  9. vllm_ascend-0.11.0rc2/.github/workflows/release_whl.yml +125 -0
  10. vllm_ascend-0.11.0rc2/.github/workflows/vllm_ascend_dist.yaml +100 -0
  11. vllm_ascend-0.11.0rc2/.github/workflows/vllm_ascend_test.yaml +149 -0
  12. vllm_ascend-0.11.0rc2/.github/workflows/vllm_ascend_test_310p.yaml +117 -0
  13. vllm_ascend-0.11.0rc2/.github/workflows/vllm_ascend_test_full.yaml +80 -0
  14. vllm_ascend-0.11.0rc2/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +45 -0
  15. vllm_ascend-0.11.0rc2/.github/workflows/vllm_ascend_test_models.yaml +177 -0
  16. vllm_ascend-0.11.0rc2/.github/workflows/vllm_ascend_test_pd.yaml +112 -0
  17. vllm_ascend-0.11.0rc2/.pre-commit-config.yaml +151 -0
  18. vllm_ascend-0.11.0rc2/CMakeLists.txt +111 -0
  19. vllm_ascend-0.11.0rc2/Dockerfile +60 -0
  20. vllm_ascend-0.11.0rc2/Dockerfile.310p +61 -0
  21. vllm_ascend-0.11.0rc2/Dockerfile.310p.openEuler +59 -0
  22. vllm_ascend-0.11.0rc2/Dockerfile.a3 +60 -0
  23. vllm_ascend-0.11.0rc2/Dockerfile.a3.openEuler +58 -0
  24. vllm_ascend-0.11.0rc2/Dockerfile.openEuler +58 -0
  25. vllm_ascend-0.11.0rc2/PKG-INFO +142 -0
  26. vllm_ascend-0.11.0rc2/README.md +91 -0
  27. vllm_ascend-0.11.0rc2/README.zh.md +90 -0
  28. vllm_ascend-0.11.0rc2/csrc/camem_allocator.cpp +347 -0
  29. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_host/mla_preprocess.h +698 -0
  30. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +95 -0
  31. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/common.h +25 -0
  32. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/common_func.h +121 -0
  33. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/hardware.h +36 -0
  34. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/iterator.h +92 -0
  35. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +162 -0
  36. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +89 -0
  37. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +228 -0
  38. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +42 -0
  39. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +71 -0
  40. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +39 -0
  41. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +36 -0
  42. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +310 -0
  43. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +44 -0
  44. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +395 -0
  45. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/layout.h +18 -0
  46. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/mem.h +82 -0
  47. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/mma.h +67 -0
  48. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +38 -0
  49. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/simd.h +274 -0
  50. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/kernel/utils.h +69 -0
  51. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/mla_preprocess.h +114 -0
  52. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +295 -0
  53. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +2914 -0
  54. vllm_ascend-0.11.0rc2/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +2503 -0
  55. vllm_ascend-0.11.0rc2/csrc/ops.h +161 -0
  56. vllm_ascend-0.11.0rc2/csrc/torch_binding.cpp +514 -0
  57. vllm_ascend-0.11.0rc2/csrc/torch_binding_meta.cpp +136 -0
  58. vllm_ascend-0.11.0rc2/docs/source/community/contributors.md +171 -0
  59. vllm_ascend-0.11.0rc2/docs/source/community/governance.md +48 -0
  60. vllm_ascend-0.11.0rc2/docs/source/community/user_stories/index.md +19 -0
  61. vllm_ascend-0.11.0rc2/docs/source/community/user_stories/llamafactory.md +19 -0
  62. vllm_ascend-0.11.0rc2/docs/source/community/versioning_policy.md +135 -0
  63. vllm_ascend-0.11.0rc2/docs/source/conf.py +142 -0
  64. vllm_ascend-0.11.0rc2/docs/source/developer_guide/contribution/index.md +111 -0
  65. vllm_ascend-0.11.0rc2/docs/source/developer_guide/contribution/testing.md +285 -0
  66. vllm_ascend-0.11.0rc2/docs/source/developer_guide/evaluation/using_evalscope.md +175 -0
  67. vllm_ascend-0.11.0rc2/docs/source/developer_guide/evaluation/using_lm_eval.md +300 -0
  68. vllm_ascend-0.11.0rc2/docs/source/developer_guide/evaluation/using_opencompass.md +123 -0
  69. vllm_ascend-0.11.0rc2/docs/source/developer_guide/feature_guide/ACL_Graph.md +102 -0
  70. vllm_ascend-0.11.0rc2/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +260 -0
  71. vllm_ascend-0.11.0rc2/docs/source/developer_guide/feature_guide/index.md +11 -0
  72. vllm_ascend-0.11.0rc2/docs/source/developer_guide/feature_guide/patch.md +75 -0
  73. vllm_ascend-0.11.0rc2/docs/source/developer_guide/modeling/adding_a_new_model.md +258 -0
  74. vllm_ascend-0.11.0rc2/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +3 -0
  75. vllm_ascend-0.11.0rc2/docs/source/developer_guide/performance/optimization_and_tuning.md +183 -0
  76. vllm_ascend-0.11.0rc2/docs/source/developer_guide/performance/performance_benchmark.md +194 -0
  77. vllm_ascend-0.11.0rc2/docs/source/developer_guide/performance/profile_execute_duration.md +40 -0
  78. vllm_ascend-0.11.0rc2/docs/source/faqs.md +216 -0
  79. vllm_ascend-0.11.0rc2/docs/source/installation.md +287 -0
  80. vllm_ascend-0.11.0rc2/docs/source/quick_start.md +185 -0
  81. vllm_ascend-0.11.0rc2/docs/source/tutorials/index.md +24 -0
  82. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi-node_dsv3.2.md +405 -0
  83. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_node.md +212 -0
  84. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_node_kimi.md +158 -0
  85. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_node_pd_disaggregation_llmdatadist.md +244 -0
  86. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md +616 -0
  87. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_node_qwen3vl.md +165 -0
  88. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_node_ray.md +182 -0
  89. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_npu.md +107 -0
  90. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_npu_moge.md +242 -0
  91. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_npu_quantization.md +137 -0
  92. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_npu_qwen3_moe.md +109 -0
  93. vllm_ascend-0.11.0rc2/docs/source/tutorials/multi_npu_qwen3_next.md +156 -0
  94. vllm_ascend-0.11.0rc2/docs/source/tutorials/single_node_300i.md +408 -0
  95. vllm_ascend-0.11.0rc2/docs/source/tutorials/single_npu.md +202 -0
  96. vllm_ascend-0.11.0rc2/docs/source/tutorials/single_npu_audio.md +122 -0
  97. vllm_ascend-0.11.0rc2/docs/source/tutorials/single_npu_multimodal.md +192 -0
  98. vllm_ascend-0.11.0rc2/docs/source/tutorials/single_npu_qwen3_embedding.md +99 -0
  99. vllm_ascend-0.11.0rc2/docs/source/tutorials/single_npu_qwen3_quantization.md +133 -0
  100. vllm_ascend-0.11.0rc2/docs/source/user_guide/configuration/additional_config.md +116 -0
  101. vllm_ascend-0.11.0rc2/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +98 -0
  102. vllm_ascend-0.11.0rc2/docs/source/user_guide/feature_guide/graph_mode.md +78 -0
  103. vllm_ascend-0.11.0rc2/docs/source/user_guide/feature_guide/lora.md +23 -0
  104. vllm_ascend-0.11.0rc2/docs/source/user_guide/feature_guide/quantization.md +125 -0
  105. vllm_ascend-0.11.0rc2/docs/source/user_guide/feature_guide/sleep_mode.md +114 -0
  106. vllm_ascend-0.11.0rc2/docs/source/user_guide/feature_guide/structured_output.md +163 -0
  107. vllm_ascend-0.11.0rc2/docs/source/user_guide/release_notes.md +689 -0
  108. vllm_ascend-0.11.0rc2/docs/source/user_guide/support_matrix/index.md +10 -0
  109. vllm_ascend-0.11.0rc2/docs/source/user_guide/support_matrix/supported_features.md +45 -0
  110. vllm_ascend-0.11.0rc2/docs/source/user_guide/support_matrix/supported_models.md +83 -0
  111. vllm_ascend-0.11.0rc2/examples/disaggregated_prefill_v1/README.md +242 -0
  112. vllm_ascend-0.11.0rc2/examples/disaggregated_prefill_v1/gen_ranktable.py +141 -0
  113. vllm_ascend-0.11.0rc2/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +586 -0
  114. vllm_ascend-0.11.0rc2/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +666 -0
  115. vllm_ascend-0.11.0rc2/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +165 -0
  116. vllm_ascend-0.11.0rc2/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md +278 -0
  117. vllm_ascend-0.11.0rc2/examples/offline_data_parallel.py +257 -0
  118. vllm_ascend-0.11.0rc2/examples/offline_external_launcher.py +330 -0
  119. vllm_ascend-0.11.0rc2/pyproject.toml +35 -0
  120. vllm_ascend-0.11.0rc2/requirements-dev.txt +20 -0
  121. vllm_ascend-0.11.0rc2/requirements.txt +28 -0
  122. vllm_ascend-0.11.0rc2/setup.py +399 -0
  123. vllm_ascend-0.11.0rc2/tests/e2e/conftest.py +589 -0
  124. vllm_ascend-0.11.0rc2/tests/e2e/doctests/002-pip-binary-installation-test.sh +74 -0
  125. vllm_ascend-0.11.0rc2/tests/e2e/model_utils.py +74 -0
  126. vllm_ascend-0.11.0rc2/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +17 -0
  127. vllm_ascend-0.11.0rc2/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +11 -0
  128. vllm_ascend-0.11.0rc2/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml +10 -0
  129. vllm_ascend-0.11.0rc2/tests/e2e/models/configs/Qwen3-8B.yaml +11 -0
  130. vllm_ascend-0.11.0rc2/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +12 -0
  131. vllm_ascend-0.11.0rc2/tests/e2e/models/configs/accuracy.txt +8 -0
  132. vllm_ascend-0.11.0rc2/tests/e2e/models/report_template.md +34 -0
  133. vllm_ascend-0.11.0rc2/tests/e2e/models/test_lm_eval_correctness.py +157 -0
  134. vllm_ascend-0.11.0rc2/tests/e2e/multicard/test_data_parallel.py +73 -0
  135. vllm_ascend-0.11.0rc2/tests/e2e/multicard/test_expert_parallel.py +42 -0
  136. vllm_ascend-0.11.0rc2/tests/e2e/multicard/test_external_launcher.py +243 -0
  137. vllm_ascend-0.11.0rc2/tests/e2e/multicard/test_full_graph_mode.py +72 -0
  138. vllm_ascend-0.11.0rc2/tests/e2e/multicard/test_ilama_lora_tp2.py +23 -0
  139. vllm_ascend-0.11.0rc2/tests/e2e/multicard/test_offline_inference_distributed.py +228 -0
  140. vllm_ascend-0.11.0rc2/tests/e2e/multicard/test_pipeline_parallel.py +47 -0
  141. vllm_ascend-0.11.0rc2/tests/e2e/multicard/test_prefix_caching.py +148 -0
  142. vllm_ascend-0.11.0rc2/tests/e2e/multicard/test_single_request_aclgraph.py +84 -0
  143. vllm_ascend-0.11.0rc2/tests/e2e/multicard/test_weight_loader.py +109 -0
  144. vllm_ascend-0.11.0rc2/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py +106 -0
  145. vllm_ascend-0.11.0rc2/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +110 -0
  146. vllm_ascend-0.11.0rc2/tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py +104 -0
  147. vllm_ascend-0.11.0rc2/tests/e2e/nightly/models/test_qwen3_32b.py +99 -0
  148. vllm_ascend-0.11.0rc2/tests/e2e/nightly/models/test_qwen3_32b_int8.py +118 -0
  149. vllm_ascend-0.11.0rc2/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml +126 -0
  150. vllm_ascend-0.11.0rc2/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml +76 -0
  151. vllm_ascend-0.11.0rc2/tests/e2e/nightly/multi_node/config/multi_node_config.py +207 -0
  152. vllm_ascend-0.11.0rc2/tests/e2e/nightly/multi_node/config/utils.py +95 -0
  153. vllm_ascend-0.11.0rc2/tests/e2e/nightly/multi_node/scripts/lws.yaml +132 -0
  154. vllm_ascend-0.11.0rc2/tests/e2e/nightly/multi_node/scripts/run.sh +145 -0
  155. vllm_ascend-0.11.0rc2/tests/e2e/nightly/multi_node/test_multi_node.py +30 -0
  156. vllm_ascend-0.11.0rc2/tests/e2e/pd_disaggreate/run_edge_case_test.sh +139 -0
  157. vllm_ascend-0.11.0rc2/tests/e2e/pd_disaggreate/setup_pd.sh +134 -0
  158. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/ops/test_fused_moe.py +341 -0
  159. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/ops/test_mla_preprocess.py +108 -0
  160. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +110 -0
  161. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +106 -0
  162. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +147 -0
  163. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_aclgraph.py +203 -0
  164. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_aclgraph_mem.py +100 -0
  165. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_ascend_scheduler.py +113 -0
  166. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_bge_model.py +49 -0
  167. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_camem.py +99 -0
  168. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_chunked.py +82 -0
  169. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_embedding.py +49 -0
  170. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_embedding_aclgraph.py +55 -0
  171. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_guided_decoding.py +153 -0
  172. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_ilama_lora.py +62 -0
  173. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +103 -0
  174. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_quantization.py +35 -0
  175. vllm_ascend-0.11.0rc2/tests/e2e/singlecard/test_vlm.py +124 -0
  176. vllm_ascend-0.11.0rc2/tests/e2e/vllm_interface/vllm_test.cfg +2 -0
  177. vllm_ascend-0.11.0rc2/tests/ut/attention/test_attention_mask.py +95 -0
  178. vllm_ascend-0.11.0rc2/tests/ut/attention/test_attention_v1.py +702 -0
  179. vllm_ascend-0.11.0rc2/tests/ut/attention/test_mla_v1.py +675 -0
  180. vllm_ascend-0.11.0rc2/tests/ut/core/test_scheduler.py +807 -0
  181. vllm_ascend-0.11.0rc2/tests/ut/distributed/test_parallel_state.py +58 -0
  182. vllm_ascend-0.11.0rc2/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +116 -0
  183. vllm_ascend-0.11.0rc2/tests/ut/eplb/core/test_eplb_utils.py +225 -0
  184. vllm_ascend-0.11.0rc2/tests/ut/kv_connector/test_mooncake_connector.py +1139 -0
  185. vllm_ascend-0.11.0rc2/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +924 -0
  186. vllm_ascend-0.11.0rc2/tests/ut/kv_connector/utils.py +208 -0
  187. vllm_ascend-0.11.0rc2/tests/ut/models/conftest.py +100 -0
  188. vllm_ascend-0.11.0rc2/tests/ut/models/test_qwen2_5_vl.py +492 -0
  189. vllm_ascend-0.11.0rc2/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +271 -0
  190. vllm_ascend-0.11.0rc2/tests/ut/ops/test_fused_ops.py +597 -0
  191. vllm_ascend-0.11.0rc2/tests/ut/ops/test_layernorm.py +156 -0
  192. vllm_ascend-0.11.0rc2/tests/ut/ops/test_linear.py +147 -0
  193. vllm_ascend-0.11.0rc2/tests/ut/ops/test_moe_comm_method.py +230 -0
  194. vllm_ascend-0.11.0rc2/tests/ut/ops/test_rotary_embedding.py +469 -0
  195. vllm_ascend-0.11.0rc2/tests/ut/ops/test_token_dispatcher.py +515 -0
  196. vllm_ascend-0.11.0rc2/tests/ut/ops/test_vocab_parallel_embedding.py +240 -0
  197. vllm_ascend-0.11.0rc2/tests/ut/patch/worker/patch_common/test_patch_distributed.py +119 -0
  198. vllm_ascend-0.11.0rc2/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +77 -0
  199. vllm_ascend-0.11.0rc2/tests/ut/quantization/test_quant_config.py +233 -0
  200. vllm_ascend-0.11.0rc2/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +246 -0
  201. vllm_ascend-0.11.0rc2/tests/ut/quantization/test_w4a8_dynamic.py +303 -0
  202. vllm_ascend-0.11.0rc2/tests/ut/quantization/test_w8a8.py +977 -0
  203. vllm_ascend-0.11.0rc2/tests/ut/test_platform.py +765 -0
  204. vllm_ascend-0.11.0rc2/tests/ut/test_utils.py +381 -0
  205. vllm_ascend-0.11.0rc2/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +197 -0
  206. vllm_ascend-0.11.0rc2/tests/ut/torchair/models/test_torchair_deepseek_v2.py +357 -0
  207. vllm_ascend-0.11.0rc2/tests/ut/torchair/ops/test_torchair_fused_moe.py +422 -0
  208. vllm_ascend-0.11.0rc2/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +296 -0
  209. vllm_ascend-0.11.0rc2/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +129 -0
  210. vllm_ascend-0.11.0rc2/tests/ut/torchair/test_torchair_mla.py +850 -0
  211. vllm_ascend-0.11.0rc2/tests/ut/torchair/test_utils.py +164 -0
  212. vllm_ascend-0.11.0rc2/tests/ut/worker/test_model_runner_v1.py +111 -0
  213. vllm_ascend-0.11.0rc2/tests/ut/worker/test_worker_v1.py +1240 -0
  214. vllm_ascend-0.11.0rc2/tools/aisbench.py +227 -0
  215. vllm_ascend-0.11.0rc2/tools/send_mm_request.py +49 -0
  216. vllm_ascend-0.11.0rc2/vllm_ascend/__init__.py +33 -0
  217. vllm_ascend-0.11.0rc2/vllm_ascend/_version.py +34 -0
  218. vllm_ascend-0.11.0rc2/vllm_ascend/ascend_config.py +310 -0
  219. vllm_ascend-0.11.0rc2/vllm_ascend/ascend_forward_context.py +211 -0
  220. vllm_ascend-0.11.0rc2/vllm_ascend/attention/attention_mask.py +96 -0
  221. vllm_ascend-0.11.0rc2/vllm_ascend/attention/attention_v1.py +727 -0
  222. vllm_ascend-0.11.0rc2/vllm_ascend/attention/mla_v1.py +1325 -0
  223. vllm_ascend-0.11.0rc2/vllm_ascend/attention/sfa_v1.py +988 -0
  224. vllm_ascend-0.11.0rc2/vllm_ascend/attention/utils.py +180 -0
  225. vllm_ascend-0.11.0rc2/vllm_ascend/compilation/acl_graph.py +343 -0
  226. vllm_ascend-0.11.0rc2/vllm_ascend/core/recompute_schedule_config.py +39 -0
  227. vllm_ascend-0.11.0rc2/vllm_ascend/core/recompute_scheduler.py +1392 -0
  228. vllm_ascend-0.11.0rc2/vllm_ascend/core/schedule_config.py +108 -0
  229. vllm_ascend-0.11.0rc2/vllm_ascend/cpu_binding.py +330 -0
  230. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/__init__.py +40 -0
  231. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/cpu_offload_connector.py +471 -0
  232. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +994 -0
  233. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/mooncake/config_data.py +449 -0
  234. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/mooncake/kv_transfer.py +282 -0
  235. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/mooncake/mooncake_engine.py +621 -0
  236. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/mooncake/mooncake_store.py +126 -0
  237. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +492 -0
  238. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/mooncake/transfer_engine.py +38 -0
  239. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/mooncake_connector.py +1263 -0
  240. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/mooncake_layerwise_connector.py +1153 -0
  241. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/parallel_state.py +196 -0
  242. vllm_ascend-0.11.0rc2/vllm_ascend/distributed/utils.py +61 -0
  243. vllm_ascend-0.11.0rc2/vllm_ascend/envs.py +183 -0
  244. vllm_ascend-0.11.0rc2/vllm_ascend/eplb/adaptor/vllm_adaptor.py +289 -0
  245. vllm_ascend-0.11.0rc2/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +138 -0
  246. vllm_ascend-0.11.0rc2/vllm_ascend/eplb/core/eplb_utils.py +190 -0
  247. vllm_ascend-0.11.0rc2/vllm_ascend/eplb/core/eplb_worker.py +440 -0
  248. vllm_ascend-0.11.0rc2/vllm_ascend/eplb/eplb_updator.py +209 -0
  249. vllm_ascend-0.11.0rc2/vllm_ascend/eplb/utils.py +77 -0
  250. vllm_ascend-0.11.0rc2/vllm_ascend/models/__init__.py +48 -0
  251. vllm_ascend-0.11.0rc2/vllm_ascend/models/deepseek_v3_2.py +633 -0
  252. vllm_ascend-0.11.0rc2/vllm_ascend/models/layers/mla.py +193 -0
  253. vllm_ascend-0.11.0rc2/vllm_ascend/models/layers/sfa.py +233 -0
  254. vllm_ascend-0.11.0rc2/vllm_ascend/models/qwen2_5_omni_thinker.py +54 -0
  255. vllm_ascend-0.11.0rc2/vllm_ascend/models/qwen2_5_vl.py +562 -0
  256. vllm_ascend-0.11.0rc2/vllm_ascend/models/qwen2_5_vl_without_padding.py +605 -0
  257. vllm_ascend-0.11.0rc2/vllm_ascend/models/qwen2_vl.py +369 -0
  258. vllm_ascend-0.11.0rc2/vllm_ascend/ops/__init__.py +57 -0
  259. vllm_ascend-0.11.0rc2/vllm_ascend/ops/common_fused_moe.py +477 -0
  260. vllm_ascend-0.11.0rc2/vllm_ascend/ops/expert_load_balancer.py +117 -0
  261. vllm_ascend-0.11.0rc2/vllm_ascend/ops/fla.py +299 -0
  262. vllm_ascend-0.11.0rc2/vllm_ascend/ops/layernorm.py +213 -0
  263. vllm_ascend-0.11.0rc2/vllm_ascend/ops/linear.py +467 -0
  264. vllm_ascend-0.11.0rc2/vllm_ascend/ops/linear_op.py +531 -0
  265. vllm_ascend-0.11.0rc2/vllm_ascend/ops/moe/experts_selector.py +277 -0
  266. vllm_ascend-0.11.0rc2/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +520 -0
  267. vllm_ascend-0.11.0rc2/vllm_ascend/ops/moe/moe_comm_method.py +273 -0
  268. vllm_ascend-0.11.0rc2/vllm_ascend/ops/moe/moe_mlp.py +258 -0
  269. vllm_ascend-0.11.0rc2/vllm_ascend/ops/moe/token_dispatcher.py +730 -0
  270. vllm_ascend-0.11.0rc2/vllm_ascend/ops/register_custom_ops.py +315 -0
  271. vllm_ascend-0.11.0rc2/vllm_ascend/ops/rotary_embedding.py +435 -0
  272. vllm_ascend-0.11.0rc2/vllm_ascend/ops/vocab_parallel_embedding.py +255 -0
  273. vllm_ascend-0.11.0rc2/vllm_ascend/ops/weight_prefetch.py +112 -0
  274. vllm_ascend-0.11.0rc2/vllm_ascend/patch/__init__.py +174 -0
  275. vllm_ascend-0.11.0rc2/vllm_ascend/patch/platform/__init__.py +30 -0
  276. vllm_ascend-0.11.0rc2/vllm_ascend/patch/platform/patch_config.py +234 -0
  277. vllm_ascend-0.11.0rc2/vllm_ascend/patch/platform/patch_core.py +68 -0
  278. vllm_ascend-0.11.0rc2/vllm_ascend/patch/platform/patch_mamba_config.py +96 -0
  279. vllm_ascend-0.11.0rc2/vllm_ascend/patch/platform/patch_message_queue.py +164 -0
  280. vllm_ascend-0.11.0rc2/vllm_ascend/patch/platform/patch_multiproc_executor.py +151 -0
  281. vllm_ascend-0.11.0rc2/vllm_ascend/patch/platform/patch_sched_yield.py +13 -0
  282. vllm_ascend-0.11.0rc2/vllm_ascend/patch/worker/__init__.py +32 -0
  283. vllm_ascend-0.11.0rc2/vllm_ascend/patch/worker/patch_attention_layer.py +92 -0
  284. vllm_ascend-0.11.0rc2/vllm_ascend/patch/worker/patch_deepseek_mtp.py +94 -0
  285. vllm_ascend-0.11.0rc2/vllm_ascend/patch/worker/patch_distributed.py +115 -0
  286. vllm_ascend-0.11.0rc2/vllm_ascend/patch/worker/patch_roberta.py +88 -0
  287. vllm_ascend-0.11.0rc2/vllm_ascend/patch/worker/patch_triton.py +16 -0
  288. vllm_ascend-0.11.0rc2/vllm_ascend/patch/worker/patch_weight_loader.py +41 -0
  289. vllm_ascend-0.11.0rc2/vllm_ascend/platform.py +431 -0
  290. vllm_ascend-0.11.0rc2/vllm_ascend/quantization/quant_config.py +479 -0
  291. vllm_ascend-0.11.0rc2/vllm_ascend/quantization/utils.py +87 -0
  292. vllm_ascend-0.11.0rc2/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +193 -0
  293. vllm_ascend-0.11.0rc2/vllm_ascend/quantization/w4a8_dynamic.py +490 -0
  294. vllm_ascend-0.11.0rc2/vllm_ascend/quantization/w8a8.py +674 -0
  295. vllm_ascend-0.11.0rc2/vllm_ascend/quantization/w8a8_dynamic.py +284 -0
  296. vllm_ascend-0.11.0rc2/vllm_ascend/sample/sampler.py +74 -0
  297. vllm_ascend-0.11.0rc2/vllm_ascend/spec_decode/eagle_proposer.py +661 -0
  298. vllm_ascend-0.11.0rc2/vllm_ascend/spec_decode/interface.py +53 -0
  299. vllm_ascend-0.11.0rc2/vllm_ascend/spec_decode/mtp_proposer.py +672 -0
  300. vllm_ascend-0.11.0rc2/vllm_ascend/spec_decode/ngram_proposer.py +71 -0
  301. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/models/qwen3_moe.py +537 -0
  302. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +218 -0
  303. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/models/torchair_deepseek_v2.py +1301 -0
  304. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/ops/torchair_fused_moe.py +1429 -0
  305. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/ops/torchair_layernorm.py +78 -0
  306. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/ops/torchair_vocab_parallel_embedding.py +38 -0
  307. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +501 -0
  308. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +1080 -0
  309. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/torchair_attention.py +463 -0
  310. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/torchair_mla.py +1310 -0
  311. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/torchair_model_runner.py +557 -0
  312. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/torchair_sfa.py +1333 -0
  313. vllm_ascend-0.11.0rc2/vllm_ascend/torchair/utils.py +275 -0
  314. vllm_ascend-0.11.0rc2/vllm_ascend/utils.py +821 -0
  315. vllm_ascend-0.11.0rc2/vllm_ascend/worker/__init__.py +0 -0
  316. vllm_ascend-0.11.0rc2/vllm_ascend/worker/model_runner_v1.py +3673 -0
  317. vllm_ascend-0.11.0rc2/vllm_ascend/worker/npu_input_batch.py +842 -0
  318. vllm_ascend-0.11.0rc2/vllm_ascend/worker/worker_v1.py +442 -0
  319. vllm_ascend-0.11.0rc2/vllm_ascend.egg-info/PKG-INFO +142 -0
  320. vllm_ascend-0.11.0rc2/vllm_ascend.egg-info/SOURCES.txt +625 -0
  321. vllm_ascend-0.11.0rc2/vllm_ascend.egg-info/entry_points.txt +6 -0
  322. vllm_ascend-0.11.0rc2/vllm_ascend.egg-info/requires.txt +19 -0
  323. vllm_ascend-0.11.0rc0/.github/Dockerfile.buildwheel +0 -45
  324. vllm_ascend-0.11.0rc0/.github/actionlint.yaml +0 -20
  325. vllm_ascend-0.11.0rc0/.github/workflows/_e2e_test.yaml +0 -195
  326. vllm_ascend-0.11.0rc0/.github/workflows/accuracy_test.yaml +0 -322
  327. vllm_ascend-0.11.0rc0/.github/workflows/format_pr_body.yaml +0 -57
  328. vllm_ascend-0.11.0rc0/.github/workflows/nightly_benchmarks.yaml +0 -206
  329. vllm_ascend-0.11.0rc0/.github/workflows/release_whl.yml +0 -119
  330. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_dist.yaml +0 -100
  331. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test.yaml +0 -158
  332. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_310p.yaml +0 -117
  333. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full.yaml +0 -79
  334. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml +0 -51
  335. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +0 -45
  336. vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_pd.yaml +0 -112
  337. vllm_ascend-0.11.0rc0/.pre-commit-config.yaml +0 -147
  338. vllm_ascend-0.11.0rc0/CMakeLists.txt +0 -98
  339. vllm_ascend-0.11.0rc0/Dockerfile +0 -60
  340. vllm_ascend-0.11.0rc0/Dockerfile.310p +0 -61
  341. vllm_ascend-0.11.0rc0/Dockerfile.310p.openEuler +0 -59
  342. vllm_ascend-0.11.0rc0/Dockerfile.a3 +0 -60
  343. vllm_ascend-0.11.0rc0/Dockerfile.a3.openEuler +0 -58
  344. vllm_ascend-0.11.0rc0/Dockerfile.openEuler +0 -58
  345. vllm_ascend-0.11.0rc0/PKG-INFO +0 -112
  346. vllm_ascend-0.11.0rc0/README.md +0 -91
  347. vllm_ascend-0.11.0rc0/README.zh.md +0 -90
  348. vllm_ascend-0.11.0rc0/csrc/camem_allocator.cpp +0 -338
  349. vllm_ascend-0.11.0rc0/csrc/ops.h +0 -127
  350. vllm_ascend-0.11.0rc0/csrc/torch_binding.cpp +0 -425
  351. vllm_ascend-0.11.0rc0/csrc/torch_binding_meta.cpp +0 -102
  352. vllm_ascend-0.11.0rc0/docs/source/community/contributors.md +0 -138
  353. vllm_ascend-0.11.0rc0/docs/source/community/governance.md +0 -48
  354. vllm_ascend-0.11.0rc0/docs/source/community/user_stories/index.md +0 -19
  355. vllm_ascend-0.11.0rc0/docs/source/community/user_stories/llamafactory.md +0 -19
  356. vllm_ascend-0.11.0rc0/docs/source/community/versioning_policy.md +0 -135
  357. vllm_ascend-0.11.0rc0/docs/source/conf.py +0 -142
  358. vllm_ascend-0.11.0rc0/docs/source/developer_guide/contribution/index.md +0 -111
  359. vllm_ascend-0.11.0rc0/docs/source/developer_guide/contribution/testing.md +0 -285
  360. vllm_ascend-0.11.0rc0/docs/source/developer_guide/evaluation/using_evalscope.md +0 -175
  361. vllm_ascend-0.11.0rc0/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -300
  362. vllm_ascend-0.11.0rc0/docs/source/developer_guide/evaluation/using_opencompass.md +0 -123
  363. vllm_ascend-0.11.0rc0/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -237
  364. vllm_ascend-0.11.0rc0/docs/source/developer_guide/feature_guide/index.md +0 -10
  365. vllm_ascend-0.11.0rc0/docs/source/developer_guide/feature_guide/patch.md +0 -85
  366. vllm_ascend-0.11.0rc0/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -258
  367. vllm_ascend-0.11.0rc0/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -3
  368. vllm_ascend-0.11.0rc0/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -183
  369. vllm_ascend-0.11.0rc0/docs/source/developer_guide/performance/performance_benchmark.md +0 -194
  370. vllm_ascend-0.11.0rc0/docs/source/developer_guide/performance/profile_execute_duration.md +0 -40
  371. vllm_ascend-0.11.0rc0/docs/source/faqs.md +0 -216
  372. vllm_ascend-0.11.0rc0/docs/source/installation.md +0 -283
  373. vllm_ascend-0.11.0rc0/docs/source/quick_start.md +0 -186
  374. vllm_ascend-0.11.0rc0/docs/source/tutorials/index.md +0 -22
  375. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node.md +0 -207
  376. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_kimi.md +0 -153
  377. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_pd_disaggregation.md +0 -244
  378. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_qwen3vl.md +0 -156
  379. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_ray.md +0 -182
  380. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_npu.md +0 -107
  381. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_npu_moge.md +0 -242
  382. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_npu_quantization.md +0 -137
  383. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -109
  384. vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_npu_qwen3_next.md +0 -156
  385. vllm_ascend-0.11.0rc0/docs/source/tutorials/single_node_300i.md +0 -406
  386. vllm_ascend-0.11.0rc0/docs/source/tutorials/single_npu.md +0 -202
  387. vllm_ascend-0.11.0rc0/docs/source/tutorials/single_npu_audio.md +0 -122
  388. vllm_ascend-0.11.0rc0/docs/source/tutorials/single_npu_multimodal.md +0 -192
  389. vllm_ascend-0.11.0rc0/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -99
  390. vllm_ascend-0.11.0rc0/docs/source/tutorials/single_npu_qwen3_quantization.md +0 -133
  391. vllm_ascend-0.11.0rc0/docs/source/user_guide/configuration/additional_config.md +0 -96
  392. vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +0 -94
  393. vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/graph_mode.md +0 -78
  394. vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/lora.md +0 -23
  395. vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/quantization.md +0 -126
  396. vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/sleep_mode.md +0 -114
  397. vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/structured_output.md +0 -163
  398. vllm_ascend-0.11.0rc0/docs/source/user_guide/release_notes.md +0 -689
  399. vllm_ascend-0.11.0rc0/docs/source/user_guide/support_matrix/index.md +0 -10
  400. vllm_ascend-0.11.0rc0/docs/source/user_guide/support_matrix/supported_features.md +0 -45
  401. vllm_ascend-0.11.0rc0/docs/source/user_guide/support_matrix/supported_models.md +0 -79
  402. vllm_ascend-0.11.0rc0/examples/disaggregated_prefill_v1/README.md +0 -242
  403. vllm_ascend-0.11.0rc0/examples/disaggregated_prefill_v1/gen_ranktable.py +0 -136
  404. vllm_ascend-0.11.0rc0/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +0 -547
  405. vllm_ascend-0.11.0rc0/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -165
  406. vllm_ascend-0.11.0rc0/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md +0 -272
  407. vllm_ascend-0.11.0rc0/examples/offline_data_parallel.py +0 -257
  408. vllm_ascend-0.11.0rc0/examples/offline_external_launcher.py +0 -287
  409. vllm_ascend-0.11.0rc0/pyproject.toml +0 -34
  410. vllm_ascend-0.11.0rc0/requirements-dev.txt +0 -20
  411. vllm_ascend-0.11.0rc0/requirements.txt +0 -27
  412. vllm_ascend-0.11.0rc0/setup.py +0 -397
  413. vllm_ascend-0.11.0rc0/tests/e2e/conftest.py +0 -438
  414. vllm_ascend-0.11.0rc0/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -62
  415. vllm_ascend-0.11.0rc0/tests/e2e/model_utils.py +0 -79
  416. vllm_ascend-0.11.0rc0/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +0 -17
  417. vllm_ascend-0.11.0rc0/tests/e2e/models/configs/accuracy.txt +0 -4
  418. vllm_ascend-0.11.0rc0/tests/e2e/models/report_template.md +0 -33
  419. vllm_ascend-0.11.0rc0/tests/e2e/models/test_lm_eval_correctness.py +0 -157
  420. vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_data_parallel.py +0 -73
  421. vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_expert_parallel.py +0 -42
  422. vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_external_launcher.py +0 -187
  423. vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_ilama_lora_tp2.py +0 -23
  424. vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_offline_inference_distributed.py +0 -206
  425. vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_pipeline_parallel.py +0 -46
  426. vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_prefix_caching.py +0 -148
  427. vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_weight_loader.py +0 -188
  428. vllm_ascend-0.11.0rc0/tests/e2e/pd_disaggreate/run_edge_case_test.sh +0 -141
  429. vllm_ascend-0.11.0rc0/tests/e2e/pd_disaggreate/setup_pd.sh +0 -136
  430. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/ops/test_fused_moe.py +0 -352
  431. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +0 -88
  432. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +0 -81
  433. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +0 -148
  434. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_aclgraph.py +0 -75
  435. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_ascend_scheduler.py +0 -111
  436. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_camem.py +0 -96
  437. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_chunked.py +0 -81
  438. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_embedding.py +0 -49
  439. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_guided_decoding.py +0 -181
  440. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_ilama_lora.py +0 -62
  441. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +0 -103
  442. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_quantization.py +0 -35
  443. vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_vlm.py +0 -90
  444. vllm_ascend-0.11.0rc0/tests/e2e/vllm_interface/vllm_test.cfg +0 -2
  445. vllm_ascend-0.11.0rc0/tests/ut/attention/test_attention_mask.py +0 -133
  446. vllm_ascend-0.11.0rc0/tests/ut/attention/test_attention_v1.py +0 -592
  447. vllm_ascend-0.11.0rc0/tests/ut/attention/test_mla_v1.py +0 -667
  448. vllm_ascend-0.11.0rc0/tests/ut/core/test_scheduler.py +0 -806
  449. vllm_ascend-0.11.0rc0/tests/ut/distributed/test_parallel_state.py +0 -48
  450. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +0 -122
  451. vllm_ascend-0.11.0rc0/tests/ut/eplb/core/test_eplb_utils.py +0 -79
  452. vllm_ascend-0.11.0rc0/tests/ut/kv_connector/test_mooncake_connector.py +0 -1063
  453. vllm_ascend-0.11.0rc0/tests/ut/kv_connector/utils.py +0 -208
  454. vllm_ascend-0.11.0rc0/tests/ut/models/conftest.py +0 -114
  455. vllm_ascend-0.11.0rc0/tests/ut/models/test_deepseek_mtp.py +0 -196
  456. vllm_ascend-0.11.0rc0/tests/ut/models/test_deepseek_v2.py +0 -107
  457. vllm_ascend-0.11.0rc0/tests/ut/models/test_qwen2_5_vl.py +0 -480
  458. vllm_ascend-0.11.0rc0/tests/ut/models/test_qwen3_moe.py +0 -68
  459. vllm_ascend-0.11.0rc0/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +0 -289
  460. vllm_ascend-0.11.0rc0/tests/ut/ops/test_fused_ops.py +0 -788
  461. vllm_ascend-0.11.0rc0/tests/ut/ops/test_layernorm.py +0 -161
  462. vllm_ascend-0.11.0rc0/tests/ut/ops/test_linear.py +0 -96
  463. vllm_ascend-0.11.0rc0/tests/ut/ops/test_moe_comm_method.py +0 -232
  464. vllm_ascend-0.11.0rc0/tests/ut/ops/test_rotary_embedding.py +0 -378
  465. vllm_ascend-0.11.0rc0/tests/ut/ops/test_token_dispatcher.py +0 -522
  466. vllm_ascend-0.11.0rc0/tests/ut/ops/test_vocab_parallel_embedding.py +0 -244
  467. vllm_ascend-0.11.0rc0/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -112
  468. vllm_ascend-0.11.0rc0/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -77
  469. vllm_ascend-0.11.0rc0/tests/ut/quantization/test_quant_config.py +0 -233
  470. vllm_ascend-0.11.0rc0/tests/ut/quantization/test_w4a8_dynamic.py +0 -226
  471. vllm_ascend-0.11.0rc0/tests/ut/quantization/test_w8a8.py +0 -930
  472. vllm_ascend-0.11.0rc0/tests/ut/test_platform.py +0 -685
  473. vllm_ascend-0.11.0rc0/tests/ut/test_utils.py +0 -366
  474. vllm_ascend-0.11.0rc0/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +0 -193
  475. vllm_ascend-0.11.0rc0/tests/ut/torchair/models/test_torchair_deepseek_v2.py +0 -331
  476. vllm_ascend-0.11.0rc0/tests/ut/torchair/ops/test_torchair_fused_moe.py +0 -416
  477. vllm_ascend-0.11.0rc0/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +0 -209
  478. vllm_ascend-0.11.0rc0/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +0 -75
  479. vllm_ascend-0.11.0rc0/tests/ut/torchair/test_torchair_mla.py +0 -845
  480. vllm_ascend-0.11.0rc0/tests/ut/torchair/test_utils.py +0 -136
  481. vllm_ascend-0.11.0rc0/tests/ut/worker/test_model_runner_v1.py +0 -107
  482. vllm_ascend-0.11.0rc0/tests/ut/worker/test_worker_v1.py +0 -1225
  483. vllm_ascend-0.11.0rc0/vllm_ascend/__init__.py +0 -29
  484. vllm_ascend-0.11.0rc0/vllm_ascend/_version.py +0 -34
  485. vllm_ascend-0.11.0rc0/vllm_ascend/ascend_config.py +0 -238
  486. vllm_ascend-0.11.0rc0/vllm_ascend/ascend_forward_context.py +0 -187
  487. vllm_ascend-0.11.0rc0/vllm_ascend/attention/attention_mask.py +0 -108
  488. vllm_ascend-0.11.0rc0/vllm_ascend/attention/attention_v1.py +0 -666
  489. vllm_ascend-0.11.0rc0/vllm_ascend/attention/mla_v1.py +0 -1033
  490. vllm_ascend-0.11.0rc0/vllm_ascend/attention/sfa_v1.py +0 -986
  491. vllm_ascend-0.11.0rc0/vllm_ascend/attention/utils.py +0 -137
  492. vllm_ascend-0.11.0rc0/vllm_ascend/compilation/acl_graph.py +0 -260
  493. vllm_ascend-0.11.0rc0/vllm_ascend/core/schedule_config.py +0 -108
  494. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/__init__.py +0 -33
  495. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_connector.py +0 -457
  496. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +0 -987
  497. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/config_data.py +0 -447
  498. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/kv_transfer.py +0 -251
  499. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_engine.py +0 -489
  500. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_store.py +0 -88
  501. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +0 -484
  502. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake_connector.py +0 -1112
  503. vllm_ascend-0.11.0rc0/vllm_ascend/distributed/parallel_state.py +0 -144
  504. vllm_ascend-0.11.0rc0/vllm_ascend/envs.py +0 -185
  505. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor/vllm_adaptor.py +0 -289
  506. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +0 -137
  507. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_utils.py +0 -135
  508. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_worker.py +0 -436
  509. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/eplb_updator.py +0 -205
  510. vllm_ascend-0.11.0rc0/vllm_ascend/eplb/utils.py +0 -77
  511. vllm_ascend-0.11.0rc0/vllm_ascend/models/__init__.py +0 -60
  512. vllm_ascend-0.11.0rc0/vllm_ascend/models/deepseek_mtp.py +0 -203
  513. vllm_ascend-0.11.0rc0/vllm_ascend/models/deepseek_v2.py +0 -666
  514. vllm_ascend-0.11.0rc0/vllm_ascend/models/layers/mla.py +0 -180
  515. vllm_ascend-0.11.0rc0/vllm_ascend/models/layers/sfa.py +0 -233
  516. vllm_ascend-0.11.0rc0/vllm_ascend/models/qwen2_5_vl.py +0 -547
  517. vllm_ascend-0.11.0rc0/vllm_ascend/models/qwen2_5_vl_without_padding.py +0 -631
  518. vllm_ascend-0.11.0rc0/vllm_ascend/models/qwen2_vl.py +0 -362
  519. vllm_ascend-0.11.0rc0/vllm_ascend/models/qwen3_moe.py +0 -270
  520. vllm_ascend-0.11.0rc0/vllm_ascend/ops/__init__.py +0 -58
  521. vllm_ascend-0.11.0rc0/vllm_ascend/ops/common_fused_moe.py +0 -368
  522. vllm_ascend-0.11.0rc0/vllm_ascend/ops/expert_load_balancer.py +0 -99
  523. vllm_ascend-0.11.0rc0/vllm_ascend/ops/fla.py +0 -218
  524. vllm_ascend-0.11.0rc0/vllm_ascend/ops/fused_moe.py +0 -467
  525. vllm_ascend-0.11.0rc0/vllm_ascend/ops/layernorm.py +0 -159
  526. vllm_ascend-0.11.0rc0/vllm_ascend/ops/linear.py +0 -367
  527. vllm_ascend-0.11.0rc0/vllm_ascend/ops/linear_op.py +0 -459
  528. vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/experts_selector.py +0 -283
  529. vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +0 -459
  530. vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/moe_comm_method.py +0 -273
  531. vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/moe_mlp.py +0 -252
  532. vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/token_dispatcher.py +0 -720
  533. vllm_ascend-0.11.0rc0/vllm_ascend/ops/register_custom_ops.py +0 -201
  534. vllm_ascend-0.11.0rc0/vllm_ascend/ops/rotary_embedding.py +0 -355
  535. vllm_ascend-0.11.0rc0/vllm_ascend/ops/vocab_parallel_embedding.py +0 -268
  536. vllm_ascend-0.11.0rc0/vllm_ascend/patch/__init__.py +0 -121
  537. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/__init__.py +0 -18
  538. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/__init__.py +0 -24
  539. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_config.py +0 -313
  540. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_mamba_config.py +0 -100
  541. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_transformers_utils.py +0 -200
  542. vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_main/__init__.py +0 -16
  543. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/__init__.py +0 -19
  544. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/__init__.py +0 -32
  545. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attention_layer.py +0 -202
  546. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attention_selector.py +0 -181
  547. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attentionspec.py +0 -110
  548. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -49
  549. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_triton.py +0 -16
  550. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_weight_loader.py +0 -44
  551. vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_main/__init__.py +0 -16
  552. vllm_ascend-0.11.0rc0/vllm_ascend/platform.py +0 -409
  553. vllm_ascend-0.11.0rc0/vllm_ascend/quantization/quant_config.py +0 -433
  554. vllm_ascend-0.11.0rc0/vllm_ascend/quantization/utils.py +0 -83
  555. vllm_ascend-0.11.0rc0/vllm_ascend/quantization/w4a8_dynamic.py +0 -401
  556. vllm_ascend-0.11.0rc0/vllm_ascend/quantization/w8a8.py +0 -647
  557. vllm_ascend-0.11.0rc0/vllm_ascend/quantization/w8a8_dynamic.py +0 -279
  558. vllm_ascend-0.11.0rc0/vllm_ascend/sample/sampler.py +0 -86
  559. vllm_ascend-0.11.0rc0/vllm_ascend/spec_decode/eagle_proposer.py +0 -674
  560. vllm_ascend-0.11.0rc0/vllm_ascend/spec_decode/interface.py +0 -51
  561. vllm_ascend-0.11.0rc0/vllm_ascend/spec_decode/mtp_proposer.py +0 -657
  562. vllm_ascend-0.11.0rc0/vllm_ascend/spec_decode/ngram_proposer.py +0 -65
  563. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/models/qwen3_moe.py +0 -544
  564. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +0 -214
  565. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/models/torchair_deepseek_v2.py +0 -1290
  566. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops/torchair_fused_moe.py +0 -1380
  567. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops/torchair_layernorm.py +0 -51
  568. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +0 -448
  569. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +0 -1046
  570. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/torchair_attention.py +0 -463
  571. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/torchair_mla.py +0 -1305
  572. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/torchair_model_runner.py +0 -503
  573. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/torchair_sfa.py +0 -1330
  574. vllm_ascend-0.11.0rc0/vllm_ascend/torchair/utils.py +0 -224
  575. vllm_ascend-0.11.0rc0/vllm_ascend/utils.py +0 -652
  576. vllm_ascend-0.11.0rc0/vllm_ascend/worker/model_runner_v1.py +0 -3709
  577. vllm_ascend-0.11.0rc0/vllm_ascend/worker/npu_input_batch.py +0 -849
  578. vllm_ascend-0.11.0rc0/vllm_ascend/worker/worker_v1.py +0 -418
  579. vllm_ascend-0.11.0rc0/vllm_ascend.egg-info/PKG-INFO +0 -112
  580. vllm_ascend-0.11.0rc0/vllm_ascend.egg-info/SOURCES.txt +0 -563
  581. vllm_ascend-0.11.0rc0/vllm_ascend.egg-info/entry_points.txt +0 -5
  582. vllm_ascend-0.11.0rc0/vllm_ascend.egg-info/requires.txt +0 -18
  583. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.gemini/config.yaml +0 -0
  584. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  585. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  586. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  587. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  588. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  589. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  590. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  591. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  592. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  593. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  594. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +0 -0
  595. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  596. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  597. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/dependabot.yml +0 -0
  598. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/format_pr_body.sh +0 -0
  599. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/labeler.yml +0 -0
  600. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/image_310p_openeuler.yml +0 -0
  601. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/image_310p_ubuntu.yml +0 -0
  602. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/image_a3_openeuler.yml +0 -0
  603. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/image_a3_ubuntu.yml +0 -0
  604. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/image_openeuler.yml +0 -0
  605. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/image_ubuntu.yml +0 -0
  606. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/label_merge_conflict.yml +0 -0
  607. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/labeler.yml +0 -0
  608. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/matchers/actionlint.json +0 -0
  609. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/matchers/mypy.json +0 -0
  610. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/matchers/ruff.json +0 -0
  611. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/pre-commit.yml +0 -0
  612. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/release_code.yml +0 -0
  613. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/reminder_comment.yml +0 -0
  614. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_doctest.yaml +0 -0
  615. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.gitignore +0 -0
  616. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/.readthedocs.yaml +0 -0
  617. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/CODE_OF_CONDUCT.md +0 -0
  618. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/CONTRIBUTING.md +0 -0
  619. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/DCO +0 -0
  620. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/LICENSE +0 -0
  621. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/benchmarks/README.md +0 -0
  622. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
  623. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/benchmarks/requirements-bench.txt +0 -0
  624. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  625. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/benchmarks/scripts/perf_result_template.md +0 -0
  626. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
  627. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/benchmarks/tests/latency-tests.json +0 -0
  628. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/benchmarks/tests/serving-tests.json +0 -0
  629. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/benchmarks/tests/throughput-tests.json +0 -0
  630. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/cmake/utils.cmake +0 -0
  631. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/codecov.yml +0 -0
  632. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/collect_env.py +0 -0
  633. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/csrc/kernels/bgmv_expand.cpp +0 -0
  634. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/csrc/kernels/bgmv_shrink.cpp +0 -0
  635. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  636. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
  637. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/csrc/kernels/sgmv_expand.cpp +0 -0
  638. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/csrc/kernels/sgmv_shrink.cpp +0 -0
  639. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/csrc/kernels/types.h +0 -0
  640. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/csrc/kernels/utils.h +0 -0
  641. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/csrc/utils.h +0 -0
  642. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/Makefile +0 -0
  643. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/README.md +0 -0
  644. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/requirements-docs.txt +0 -0
  645. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/requirements-test.txt +0 -0
  646. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/_templates/sections/header.html +0 -0
  647. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  648. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/assets/multi_node_dp_kimi.png +0 -0
  649. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +0 -0
  650. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +0 -0
  651. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +0 -0
  652. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +0 -0
  653. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -0
  654. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/index.md +0 -0
  655. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/modeling/index.md +0 -0
  656. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/performance/index.md +0 -0
  657. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/index.md +0 -0
  658. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
  659. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
  660. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
  661. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
  662. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
  663. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
  664. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
  665. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
  666. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
  667. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
  668. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
  669. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
  670. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
  671. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
  672. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
  673. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
  674. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
  675. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +0 -0
  676. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +0 -0
  677. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +0 -0
  678. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
  679. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
  680. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
  681. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
  682. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
  683. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
  684. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
  685. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
  686. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
  687. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
  688. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
  689. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
  690. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
  691. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
  692. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
  693. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -0
  694. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
  695. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
  696. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
  697. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
  698. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
  699. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
  700. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
  701. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
  702. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
  703. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
  704. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
  705. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
  706. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  707. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  708. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/configuration/env_vars.md +0 -0
  709. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/configuration/index.md +0 -0
  710. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
  711. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  712. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/index.md +0 -0
  713. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/disaggregated_prefill_v1/gen_ranktable.sh +0 -0
  714. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/disaggregated_prefill_v1/run_server.sh +0 -0
  715. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/eplb/eplb_deepseek.py +0 -0
  716. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/eplb/eplb_strategy.py +0 -0
  717. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/external_online_dp/README.md +0 -0
  718. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/external_online_dp/launch_online_dp.py +0 -0
  719. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/external_online_dp/run_dp_template.sh +0 -0
  720. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/offline_disaggregated_prefill_npu.py +0 -0
  721. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/offline_dualbatch_overlap_npu.py +0 -0
  722. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/offline_embed.py +0 -0
  723. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/offline_inference_audio_language.py +0 -0
  724. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/offline_inference_npu.py +0 -0
  725. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/offline_inference_npu_tp2.py +0 -0
  726. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/offline_inference_sleep_mode_npu.py +0 -0
  727. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/offline_weight_load.py +0 -0
  728. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/prompt_embedding_inference.py +0 -0
  729. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/examples/run_dp_server.sh +0 -0
  730. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/format.sh +0 -0
  731. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/mypy.ini +0 -0
  732. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/packages.txt +0 -0
  733. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/requirements-lint.txt +0 -0
  734. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/setup.cfg +0 -0
  735. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/__init__.py +0 -0
  736. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
  737. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/310p/test_offline_inference_parallel_310p.py +0 -0
  738. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/__init__.py +0 -0
  739. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/common.sh +0 -0
  740. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
  741. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +0 -0
  742. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +0 -0
  743. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/Qwen3-8B-Base.yaml +0 -0
  744. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/models/conftest.py +0 -0
  745. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -0
  746. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_qwen3_moe.py +0 -0
  747. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_torchair_graph_mode.py +0 -0
  748. {vllm_ascend-0.11.0rc0/tests/e2e/singlecard → vllm_ascend-0.11.0rc2/tests/e2e/nightly/multi_node}/__init__.py +0 -0
  749. {vllm_ascend-0.11.0rc0/tests/e2e/singlecard/ops → vllm_ascend-0.11.0rc2/tests/e2e/nightly/multi_node/config}/__init__.py +0 -0
  750. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
  751. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  752. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/prompts/example.txt +0 -0
  753. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/run_disagg_pd.sh +0 -0
  754. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/run_doctests.sh +0 -0
  755. {vllm_ascend-0.11.0rc0/tests/ut → vllm_ascend-0.11.0rc2/tests/e2e/singlecard}/__init__.py +0 -0
  756. {vllm_ascend-0.11.0rc0/tests/ut/models → vllm_ascend-0.11.0rc2/tests/e2e/singlecard/ops}/__init__.py +0 -0
  757. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_bgmv_expand.py +0 -0
  758. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_bgmv_shrink.py +0 -0
  759. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
  760. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_rotary_embedding.py +0 -0
  761. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +0 -0
  762. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
  763. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_sampler.py +0 -0
  764. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/utils.py +0 -0
  765. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/e2e/vllm_interface/singlecard/test_sampler.py +0 -0
  766. {vllm_ascend-0.11.0rc0/tests/ut/torchair → vllm_ascend-0.11.0rc2/tests/ut}/__init__.py +0 -0
  767. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/base.py +0 -0
  768. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/compilation/test_acl_graph.py +0 -0
  769. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/conftest.py +0 -0
  770. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/core/test_schedule_config.py +0 -0
  771. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/device_allocator/test_camem.py +0 -0
  772. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
  773. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
  774. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/distributed/test_communicator.py +0 -0
  775. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
  776. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/adaptor/test_abstract_adaptor.py +0 -0
  777. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/core/policy/test_policy_abstract.py +0 -0
  778. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +0 -0
  779. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +0 -0
  780. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/core/policy/test_policy_factor.py +0 -0
  781. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/fake_weight/config.json +0 -0
  782. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -0
  783. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
  784. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
  785. {vllm_ascend-0.11.0rc0/vllm_ascend/attention → vllm_ascend-0.11.0rc2/tests/ut/models}/__init__.py +0 -0
  786. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/models/test_qwen2_5_vl_without_padding.py +0 -0
  787. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/models/test_qwen2_vl.py +0 -0
  788. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/multistream/test_base.py +0 -0
  789. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/multistream/test_decorator.py +0 -0
  790. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/multistream/test_layers.py +0 -0
  791. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/multistream/test_metadata.py +0 -0
  792. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/multistream/test_ms_split.py +0 -0
  793. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/ops/expert_map.json +0 -0
  794. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_activation.py +0 -0
  795. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_comm_utils.py +0 -0
  796. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_common_fused_moe.py +0 -0
  797. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_expert_load_balancer.py +0 -0
  798. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/quantization/test_utils.py +0 -0
  799. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/quantization/test_w8a8_dynamic.py +0 -0
  800. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/sample/logits_processor/test_builtin.py +0 -0
  801. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/sample/test_rejection_sampler.py +0 -0
  802. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/sample/test_sampler.py +0 -0
  803. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/test_ascend_config.py +0 -0
  804. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/test_envs.py +0 -0
  805. {vllm_ascend-0.11.0rc0/vllm_ascend/compilation → vllm_ascend-0.11.0rc2/tests/ut/torchair}/__init__.py +0 -0
  806. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +0 -0
  807. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/test_torchair_attention.py +0 -0
  808. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tests/ut/worker/test_input_batch.py +0 -0
  809. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tools/actionlint.sh +0 -0
  810. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tools/check_python_src_init.py +0 -0
  811. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tools/check_repo.sh +0 -0
  812. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tools/enforce_regex_import.py +0 -0
  813. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tools/mypy.sh +0 -0
  814. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tools/png-lint.sh +0 -0
  815. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tools/shellcheck.sh +0 -0
  816. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/tools/sphinx-lint.sh +0 -0
  817. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/typos.toml +0 -0
  818. {vllm_ascend-0.11.0rc0/vllm_ascend/core → vllm_ascend-0.11.0rc2/vllm_ascend/attention}/__init__.py +0 -0
  819. {vllm_ascend-0.11.0rc0/vllm_ascend/device_allocator → vllm_ascend-0.11.0rc2/vllm_ascend/compilation}/__init__.py +0 -0
  820. {vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_manager → vllm_ascend-0.11.0rc2/vllm_ascend/core}/__init__.py +0 -0
  821. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/core/scheduler.py +0 -0
  822. {vllm_ascend-0.11.0rc0/vllm_ascend/distributed/device_communicators → vllm_ascend-0.11.0rc2/vllm_ascend/device_allocator}/__init__.py +0 -0
  823. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/device_allocator/camem.py +0 -0
  824. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/communicator.py +0 -0
  825. {vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake → vllm_ascend-0.11.0rc2/vllm_ascend/distributed/cpu_offload_manager}/__init__.py +0 -0
  826. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +0 -0
  827. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/cpu_offload_manager/metadata.py +0 -0
  828. {vllm_ascend-0.11.0rc0/vllm_ascend/eplb → vllm_ascend-0.11.0rc2/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
  829. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  830. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  831. {vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor → vllm_ascend-0.11.0rc2/vllm_ascend/distributed/mooncake}/__init__.py +0 -0
  832. {vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core → vllm_ascend-0.11.0rc2/vllm_ascend/eplb}/__init__.py +0 -0
  833. {vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy → vllm_ascend-0.11.0rc2/vllm_ascend/eplb/adaptor}/__init__.py +0 -0
  834. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/adaptor/abstract_adaptor.py +0 -0
  835. {vllm_ascend-0.11.0rc0/vllm_ascend/lora → vllm_ascend-0.11.0rc2/vllm_ascend/eplb/core}/__init__.py +0 -0
  836. {vllm_ascend-0.11.0rc0/vllm_ascend/models/layers → vllm_ascend-0.11.0rc2/vllm_ascend/eplb/core/policy}/__init__.py +0 -0
  837. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_abstract.py +0 -0
  838. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +0 -0
  839. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +0 -0
  840. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_factory.py +0 -0
  841. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_flashlb.py +0 -0
  842. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_random.py +0 -0
  843. {vllm_ascend-0.11.0rc0/vllm_ascend/multistream → vllm_ascend-0.11.0rc2/vllm_ascend/lora}/__init__.py +0 -0
  844. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/lora/lora_ops.py +0 -0
  845. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/lora/punica_npu.py +0 -0
  846. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/lora/utils.py +0 -0
  847. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/meta_registration.py +0 -0
  848. {vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe → vllm_ascend-0.11.0rc2/vllm_ascend/models/layers}/__init__.py +0 -0
  849. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/qwen3_next.py +0 -0
  850. {vllm_ascend-0.11.0rc0/vllm_ascend/quantization → vllm_ascend-0.11.0rc2/vllm_ascend/multistream}/__init__.py +0 -0
  851. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/base.py +0 -0
  852. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/context.py +0 -0
  853. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/decorator.py +0 -0
  854. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/layers.py +0 -0
  855. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/metadata.py +0 -0
  856. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/ms_split.py +0 -0
  857. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/activation.py +0 -0
  858. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/attention.py +0 -0
  859. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/casual_conv1d.py +0 -0
  860. {vllm_ascend-0.11.0rc0/vllm_ascend/sample → vllm_ascend-0.11.0rc2/vllm_ascend/ops/moe}/__init__.py +0 -0
  861. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/moe/comm_utils.py +0 -0
  862. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/sigmoid_gating.py +0 -0
  863. {vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common → vllm_ascend-0.11.0rc2/vllm_ascend/patch/platform}/patch_distributed.py +0 -0
  864. {vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common → vllm_ascend-0.11.0rc2/vllm_ascend/patch/worker}/patch_logits.py +0 -0
  865. {vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common → vllm_ascend-0.11.0rc2/vllm_ascend/patch/worker}/patch_minicpm.py +0 -0
  866. {vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common → vllm_ascend-0.11.0rc2/vllm_ascend/patch/worker}/patch_multimodal_merge.py +0 -0
  867. {vllm_ascend-0.11.0rc0/vllm_ascend/torchair → vllm_ascend-0.11.0rc2/vllm_ascend/quantization}/__init__.py +0 -0
  868. {vllm_ascend-0.11.0rc0/vllm_ascend/torchair/models → vllm_ascend-0.11.0rc2/vllm_ascend/sample}/__init__.py +0 -0
  869. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/sample/logits_processor/__init__.py +0 -0
  870. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/sample/logits_processor/builtin.py +0 -0
  871. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/sample/rejection_sampler.py +0 -0
  872. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/spec_decode/__init__.py +0 -0
  873. {vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops → vllm_ascend-0.11.0rc2/vllm_ascend/torchair}/__init__.py +0 -0
  874. {vllm_ascend-0.11.0rc0/vllm_ascend/torchair/quantization → vllm_ascend-0.11.0rc2/vllm_ascend/torchair/models}/__init__.py +0 -0
  875. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/models/qwen2.py +0 -0
  876. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/models/torchair_deepseek_v3.py +0 -0
  877. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/models/torchair_pangu_moe.py +0 -0
  878. {vllm_ascend-0.11.0rc0/vllm_ascend/worker → vllm_ascend-0.11.0rc2/vllm_ascend/torchair/ops}/__init__.py +0 -0
  879. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/sequence_parallel.py +0 -0
  880. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/shared_weight_layer.py +0 -0
  881. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/torchair_activation.py +0 -0
  882. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +0 -0
  883. /vllm_ascend-0.11.0rc0/vllm_ascend/models/deepseek_v3.py → /vllm_ascend-0.11.0rc2/vllm_ascend/torchair/quantization/__init__.py +0 -0
  884. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/torchair_worker.py +0 -0
  885. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend/worker/block_table.py +0 -0
  886. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  887. {vllm_ascend-0.11.0rc0 → vllm_ascend-0.11.0rc2}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -0,0 +1,45 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+ ARG PY_VERSION=3.11
18
+ FROM quay.io/ascend/manylinux:8.2.rc1-910b-manylinux_2_28-py${PY_VERSION}
19
+
20
+ ARG COMPILE_CUSTOM_KERNELS=1
21
+
22
+ # Define environments
23
+ ENV DEBIAN_FRONTEND=noninteractive
24
+ ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
25
+ RUN yum update -y && \
26
+ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
27
+ rm -rf /var/cache/yum
28
+
29
+ WORKDIR /workspace
30
+
31
+ COPY . /workspace/vllm-ascend/
32
+
33
+ # Install req
34
+ RUN python3 -m pip install -r vllm-ascend/requirements.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
35
+ python3 -m pip install twine
36
+
37
+ # Install vllm-ascend
38
+ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
39
+ source /usr/local/Ascend/nnal/atb/set_env.sh && \
40
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
41
+ cd vllm-ascend && \
42
+ python3 setup.py bdist_wheel && \
43
+ ls -l dist
44
+
45
+ CMD ["/bin/bash"]
@@ -0,0 +1,21 @@
1
+ self-hosted-runner:
2
+ # Labels of self-hosted runner in array of strings.
3
+ labels:
4
+ - linux-aarch64-a2-0
5
+ - linux-aarch64-a2-1
6
+ - linux-aarch64-a2-2
7
+ - linux-aarch64-a2-4
8
+ - linux-aarch64-a2-8
9
+ - linux-arm64-npu-static-8
10
+ - linux-aarch64-310p-1
11
+ - linux-aarch64-310p-2
12
+ - linux-aarch64-310p-4
13
+ - ubuntu-24.04-arm
14
+ - linux-aarch64-a3-1
15
+ - linux-aarch64-a3-2
16
+ - linux-aarch64-a3-4
17
+ - linux-aarch64-a3-8
18
+ - linux-amd64-cpu-0
19
+ - linux-amd64-cpu-8
20
+ - linux-amd64-cpu-16
21
+ - linux-aarch64-a3-0
@@ -0,0 +1,175 @@
1
+ name: 'accuracy test'
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ vllm:
7
+ required: true
8
+ type: string
9
+ vllm-ascend:
10
+ required: false
11
+ type: string
12
+ default: main
13
+ runner:
14
+ required: true
15
+ type: string
16
+ image:
17
+ required: true
18
+ type: string
19
+ model_name:
20
+ required: true
21
+ type: string
22
+ upload:
23
+ required: false
24
+ type: boolean
25
+ default: false
26
+
27
+ jobs:
28
+ accuracy_tests:
29
+
30
+ runs-on: ${{ inputs.runner }}
31
+ name: ${{ inputs.model_name }} accuracy
32
+ container:
33
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
34
+ env:
35
+ VLLM_USE_MODELSCOPE: True
36
+ # 1. If version specified (work_dispatch), do specified branch accuracy test
37
+ # 2. If no version (labeled PR), do accuracy test by default ref:
38
+ # The branch, tag or SHA to checkout. When checking out the repository that
39
+ # triggered a workflow, this defaults to the reference or SHA for that event.
40
+ # Otherwise, uses the default branch.
41
+ GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
42
+
43
+ steps:
44
+ - name: Checkout repository
45
+ uses: actions/checkout@v4
46
+
47
+ - name: Set model name as output
48
+ id: set_output
49
+ run: |
50
+ echo "model_name=${{ inputs.model_name }}" >> $GITHUB_OUTPUT
51
+
52
+ - name: Config mirrors
53
+ run: |
54
+ sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
55
+ pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
56
+ pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
57
+ apt-get update -y
58
+ apt install git -y
59
+
60
+ - name: Install system dependencies
61
+ run: |
62
+ apt-get -y install `cat packages.txt`
63
+ apt-get -y install gcc g++ cmake libnuma-dev
64
+
65
+ - name: Checkout vllm-project/vllm repo
66
+ uses: actions/checkout@v4
67
+ with:
68
+ repository: vllm-project/vllm
69
+ ref: ${{ inputs.vllm }}
70
+ path: ./vllm-empty
71
+
72
+ - name: Install vllm-project/vllm from source
73
+ working-directory: ./vllm-empty
74
+ run: |
75
+ VLLM_TARGET_DEVICE=empty pip install -e .
76
+
77
+ - name: Resolve vllm-ascend version
78
+ run: |
79
+ VERSION_INPUT="${{ inputs.vllm-ascend }}"
80
+
81
+ if [[ "$VERSION_INPUT" == "latest" ]]; then
82
+ TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
83
+ LATEST_TAG=$(echo "$TAGS" | head -n1)
84
+ if [[ -z "$LATEST_TAG" ]]; then
85
+ RESOLVED_VERSION="main"
86
+ else
87
+ RESOLVED_VERSION="$LATEST_TAG"
88
+ fi
89
+ else
90
+ RESOLVED_VERSION="$VERSION_INPUT"
91
+ fi
92
+ echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
93
+
94
+ - name: Checkout vllm-project/vllm-ascend repo
95
+ uses: actions/checkout@v4
96
+ with:
97
+ repository: vllm-project/vllm-ascend
98
+ path: ./vllm-ascend
99
+ ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
100
+
101
+ - name: Install vllm-project/vllm-ascend
102
+ working-directory: ./vllm-ascend
103
+ env:
104
+ PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
105
+ run: |
106
+ pip install -r requirements-dev.txt
107
+ pip install -v -e .
108
+
109
+ - name: Get vLLM commit hash and URL
110
+ working-directory: ./vllm-empty
111
+ run: |
112
+ VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
113
+ echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
114
+
115
+ - name: Get vLLM-Ascend commit hash and URL
116
+ working-directory: ./vllm-ascend
117
+ run: |
118
+ VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
119
+ echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
120
+
121
+ - name: Collect version info
122
+ run: |
123
+ for dir in /usr/local/Ascend/ascend-toolkit/*; do
124
+ dname=$(basename "$dir")
125
+ if [ "$dname" != "latest" ]; then
126
+ TOOLKIT_DIR="$dname"
127
+ break
128
+ fi
129
+ done
130
+ INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
131
+ GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
132
+ | head -n1 \
133
+ | cut -d'=' -f2 \
134
+ | tr -d '"')
135
+ {
136
+ echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
137
+ pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
138
+ pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
139
+ pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
140
+ } >> "$GITHUB_ENV"
141
+
142
+ - name: Run accuracy test
143
+ id: report
144
+ env:
145
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
146
+ VLLM_USE_MODELSCOPE: True
147
+ VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
148
+ VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
149
+ VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
150
+ VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
151
+ CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
152
+ TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
153
+ TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
154
+ run: |
155
+ model_base_name=$(basename ${{ inputs.model_name }})
156
+ markdown_name="${model_base_name}"
157
+ echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
158
+ mkdir -p ./benchmarks/accuracy
159
+ pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
160
+ --config ./tests/e2e/models/configs/${{ inputs.model_name }}.yaml
161
+
162
+ - name: Generate step summary
163
+ if: ${{ always() }}
164
+ run: |
165
+ cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
166
+
167
+ - name: Upload Report
168
+ if: ${{ inputs.upload == true }}
169
+ uses: actions/upload-artifact@v4
170
+ with:
171
+ name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
172
+ path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
173
+ if-no-files-found: warn
174
+ retention-days: 90
175
+ overwrite: true
@@ -0,0 +1,199 @@
1
+ name: 'e2e test'
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ vllm:
7
+ required: true
8
+ type: string
9
+ runner:
10
+ required: true
11
+ type: string
12
+ image:
13
+ required: true
14
+ type: string
15
+ type:
16
+ required: true
17
+ type: string
18
+
19
+ jobs:
20
+ e2e:
21
+ name: singlecard
22
+ runs-on: ${{ inputs.runner }}-1
23
+ container:
24
+ image: ${{ inputs.image }}
25
+ env:
26
+ VLLM_LOGGING_LEVEL: ERROR
27
+ VLLM_USE_MODELSCOPE: True
28
+ steps:
29
+ - name: Check npu and CANN info
30
+ run: |
31
+ npu-smi info
32
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
33
+
34
+ - name: Config mirrors
35
+ run: |
36
+ sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
37
+ pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
38
+ pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
39
+ apt-get update -y
40
+ apt install git -y
41
+
42
+ - name: Checkout vllm-project/vllm-ascend repo
43
+ uses: actions/checkout@v4
44
+
45
+ - name: Install system dependencies
46
+ run: |
47
+ apt-get -y install `cat packages.txt`
48
+ apt-get -y install gcc g++ cmake libnuma-dev
49
+
50
+ - name: Checkout vllm-project/vllm repo
51
+ uses: actions/checkout@v4
52
+ with:
53
+ repository: vllm-project/vllm
54
+ ref: ${{ inputs.vllm }}
55
+ path: ./vllm-empty
56
+ fetch-depth: 1
57
+
58
+ - name: Install vllm-project/vllm from source
59
+ working-directory: ./vllm-empty
60
+ run: |
61
+ VLLM_TARGET_DEVICE=empty pip install -e .
62
+
63
+ - name: Install vllm-project/vllm-ascend
64
+ env:
65
+ PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
66
+ run: |
67
+ pip install -r requirements-dev.txt
68
+ pip install -v -e .
69
+
70
+ - name: Run vllm-project/vllm-ascend test
71
+ env:
72
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
73
+ VLLM_USE_MODELSCOPE: True
74
+ PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
75
+ if: ${{ inputs.type == 'light' }}
76
+ run: |
77
+ pytest -sv tests/e2e/singlecard/test_aclgraph.py
78
+ pytest -sv tests/e2e/singlecard/test_quantization.py
79
+ pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
80
+
81
+ - name: Run e2e test
82
+ env:
83
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
84
+ VLLM_USE_MODELSCOPE: True
85
+ PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
86
+ if: ${{ inputs.type == 'full' }}
87
+ run: |
88
+ # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
89
+ # the test separately.
90
+
91
+ pytest -sv tests/e2e/singlecard/test_aclgraph.py
92
+ pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
93
+ pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
94
+ pytest -sv tests/e2e/singlecard/test_bge_model.py
95
+ pytest -sv tests/e2e/singlecard/test_camem.py
96
+ pytest -sv tests/e2e/singlecard/test_chunked.py
97
+ pytest -sv tests/e2e/singlecard/test_embedding.py
98
+ pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
99
+ pytest -sv tests/e2e/singlecard/test_guided_decoding.py
100
+ pytest -sv tests/e2e/singlecard/test_ilama_lora.py
101
+ pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
102
+ pytest -sv tests/e2e/singlecard/test_quantization.py
103
+ pytest -sv tests/e2e/singlecard/test_sampler.py
104
+ pytest -sv tests/e2e/singlecard/test_vlm.py
105
+
106
+ # ------------------------------------ v1 spec decode test ------------------------------------ #
107
+ pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
108
+ pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
109
+ # Fix me: test_eagle_correctness OOM error
110
+ pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
111
+
112
+ pytest -sv tests/e2e/singlecard/ops/
113
+
114
+ e2e-2-cards:
115
+ name: multicard
116
+ runs-on: ${{ inputs.runner }}-2
117
+ container:
118
+ image: ${{ inputs.image }}
119
+ env:
120
+ VLLM_LOGGING_LEVEL: ERROR
121
+ VLLM_USE_MODELSCOPE: True
122
+ steps:
123
+ - name: Check npu and CANN info
124
+ run: |
125
+ npu-smi info
126
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
127
+
128
+ - name: Config mirrors
129
+ run: |
130
+ sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
131
+ pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
132
+ pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
133
+ apt-get update -y
134
+ apt install git -y
135
+
136
+ - name: Checkout vllm-project/vllm-ascend repo
137
+ uses: actions/checkout@v4
138
+
139
+ - name: Install system dependencies
140
+ run: |
141
+ apt-get -y install `cat packages.txt`
142
+ apt-get -y install gcc g++ cmake libnuma-dev
143
+
144
+ - name: Checkout vllm-project/vllm repo
145
+ uses: actions/checkout@v4
146
+ with:
147
+ repository: vllm-project/vllm
148
+ ref: ${{ inputs.vllm }}
149
+ path: ./vllm-empty
150
+ fetch-depth: 1
151
+
152
+ - name: Install vllm-project/vllm from source
153
+ working-directory: ./vllm-empty
154
+ run: |
155
+ VLLM_TARGET_DEVICE=empty pip install -e .
156
+
157
+ - name: Install vllm-project/vllm-ascend
158
+ env:
159
+ PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
160
+ run: |
161
+ pip install -r requirements-dev.txt
162
+ pip install -v -e .
163
+
164
+ - name: Run vllm-project/vllm-ascend test (light)
165
+ env:
166
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
167
+ VLLM_USE_MODELSCOPE: True
168
+ if: ${{ inputs.type == 'light' }}
169
+ run: |
170
+ pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
171
+
172
+ - name: Run vllm-project/vllm-ascend test (full)
173
+ env:
174
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
175
+ VLLM_USE_MODELSCOPE: True
176
+ if: ${{ inputs.type == 'full' }}
177
+ run: |
178
+ pytest -sv tests/e2e/multicard/test_data_parallel.py
179
+ pytest -sv tests/e2e/multicard/test_expert_parallel.py
180
+ pytest -sv tests/e2e/multicard/test_external_launcher.py
181
+ pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
182
+ pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
183
+ pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
184
+
185
+ # To avoid oom, we need to run the test in a single process.
186
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
187
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
188
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
189
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC_new_version
190
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC_old_version
191
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
192
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
193
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
194
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
195
+
196
+ pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
197
+ pytest -sv tests/e2e/multicard/test_prefix_caching.py
198
+ pytest -sv tests/e2e/multicard/test_qwen3_moe.py
199
+ pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
@@ -0,0 +1,72 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ # This test will be triggered:
19
+ # - PR labeled with: 'accuracy-test' & 'ready-for-test'
20
+ name: ascend test / accuracy
21
+
22
+ on:
23
+ pull_request:
24
+ branches:
25
+ - 'main'
26
+ - '*-dev'
27
+ types: [ labeled, synchronize ]
28
+
29
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
30
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
31
+ # It's used to activate ascend-toolkit environment variables.
32
+ defaults:
33
+ run:
34
+ shell: bash -el {0}
35
+
36
+ # only cancel in-progress runs of the same workflow
37
+ concurrency:
38
+ group: ${{ github.workflow }}-${{ github.ref }}
39
+ cancel-in-progress: true
40
+
41
+ jobs:
42
+ run:
43
+ name: ""
44
+ strategy:
45
+ matrix:
46
+ # Only top series models should be listed in here
47
+ include:
48
+ - runner: a2-1
49
+ model_name: Qwen3-8B
50
+ - runner: a2-1
51
+ model_name: Qwen2.5-VL-7B-Instruct
52
+ - runner: a2-1
53
+ model_name: Qwen2-Audio-7B-Instruct
54
+ - runner: a2-2
55
+ model_name: Qwen3-30B-A3B
56
+ - runner: a2-2
57
+ model_name: Qwen3-VL-30B-A3B-Instruct
58
+ - runner: a2-2
59
+ model_name: DeepSeek-V2-Lite
60
+ fail-fast: false
61
+ # test will be triggered when tag 'accuracy-test' & 'ready-for-test'
62
+ if: >-
63
+ ${{
64
+ contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
65
+ contains(github.event.pull_request.labels.*.name, 'ready-for-test')
66
+ }}
67
+ uses: ./.github/workflows/_accuracy_test.yaml
68
+ with:
69
+ vllm: v0.11.0
70
+ runner: linux-aarch64-${{ matrix.runner }}
71
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
72
+ model_name: ${{ matrix.model_name }}
@@ -0,0 +1,57 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ name: format / pr body
19
+
20
+ on:
21
+ # The PR updated when PR opened and push new commits
22
+ pull_request_target:
23
+ types: [opened, synchronize]
24
+ branches:
25
+ - 'main'
26
+
27
+ permissions:
28
+ pull-requests: write
29
+
30
+ jobs:
31
+ update-description:
32
+ name: update vLLM version
33
+ runs-on: ubuntu-latest
34
+
35
+ steps:
36
+
37
+ - name: Get vLLM version
38
+ run: |
39
+ VLLM_COMMIT=v0.11.0
40
+ echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
41
+
42
+ - name: Checkout repository
43
+ uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
44
+
45
+ - name: Set up Python
46
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
47
+
48
+ - name: Get vLLM release version
49
+ run: |
50
+ VLLM_VERSION=$(python3 docs/source/conf.py | jq .ci_vllm_version | tr -d '"')
51
+ echo "VLLM_VERSION=$VLLM_VERSION" >> $GITHUB_ENV
52
+
53
+ - name: Update PR description
54
+ env:
55
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
56
+ run: |
57
+ bash .github/format_pr_body.sh "${{ github.event.number }}" "${{ env.VLLM_VERSION }}" "${{ env.VLLM_COMMIT }}"
@@ -0,0 +1,118 @@
1
+ name: 'e2e test / multi-dp'
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 */4 * * *"
6
+ workflow_dispatch:
7
+
8
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
9
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
10
+ # It's used to activate ascend-toolkit environment variables.
11
+ defaults:
12
+ run:
13
+ shell: bash -el {0}
14
+
15
+ # only cancel in-progress runs of the same workflow
16
+ # and ignore the lint / 8 cards test type
17
+ concurrency:
18
+ group: ${{ github.workflow }}-${{ github.ref }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ e2e:
23
+ # This is a runner with no NPU for k8s controller
24
+ runs-on: linux-aarch64-a3-0
25
+ container:
26
+ image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
27
+ env:
28
+ KUBECONFIG: /tmp/kubeconfig
29
+ KUBECTL: /root/.cache/.kube/kubectl
30
+ NAMESPACE: vllm-project
31
+ LEADER_POD: vllm-0
32
+ steps:
33
+ - name: Install system denpendencies
34
+ run: |
35
+ # configure apt and pip source
36
+ sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
37
+ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
38
+
39
+ apt-get update -y && apt-get install -y git curl
40
+
41
+ TOKEN=`echo -n "x-access-token:${{ secrets.ADMIN_PTA }}" | base64`
42
+ git config --global http.https://gh-proxy.test.osinfra.cn/.extraheader "AUTHORIZATION: basic $TOKEN"
43
+
44
+ - name: Install kubectl
45
+ run: |
46
+ install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
47
+
48
+ # get kubeconfig from secret
49
+ echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
50
+
51
+ - name: Checkout code
52
+ uses: actions/checkout@v4
53
+
54
+ - name: Prepare scripts
55
+ run: |
56
+ # prepare for lws entrypoint scripts
57
+ install -D tests/e2e/multi_node/scripts/run.sh /root/.cache/tests/run.sh
58
+
59
+ - name: Launch cluster
60
+ run: |
61
+ kubectl apply -f tests/e2e/multi_node/scripts/lws.yaml
62
+
63
+ - name: Waiting for pod ready
64
+ run: |
65
+ echo "waiting for Pod [$LEADER_POD] in namespace [$NAMESPACE] to Ready..."
66
+
67
+ while true; do
68
+ # get pod status
69
+ READY_STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}')
70
+
71
+ if [[ "$READY_STATUS" == "true" ]]; then
72
+ echo "✅ Pod [$LEADER_POD] is Ready!"
73
+ break
74
+ else
75
+ echo "Pod [$LEADER_POD] not ready, waiting..."
76
+ sleep 3
77
+ fi
78
+ done
79
+
80
+ - name: Stream logs and monitor pod health
81
+ run: |
82
+ set -euo pipefail
83
+
84
+ echo "🚀 Start streaming logs for Pod [$LEADER_POD] ..."
85
+ kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" &
86
+ LOG_PID=$!
87
+
88
+ echo "Start monitoring Pod [$LEADER_POD] status ..."
89
+ while true; do
90
+ STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}')
91
+ if [[ "$STATUS" != "Running" && "$STATUS" != "Succeeded" ]]; then
92
+ echo "❌ Pod [$LEADER_POD] exited abnormally with status: $STATUS"
93
+ kubectl describe pod "$LEADER_POD" -n "$NAMESPACE" || true
94
+ kubectl logs "$LEADER_POD" -n "$NAMESPACE" --previous --all-containers || true
95
+ kill $LOG_PID || true
96
+ exit 1
97
+ fi
98
+ sleep 5
99
+ done &
100
+
101
+ MONITOR_PID=$!
102
+ wait $LOG_PID || true
103
+ kill $MONITOR_PID || true
104
+
105
+ - name: Generate summary
106
+ if: always()
107
+ run: |
108
+ if [ -f "/root/.cache/test_summary.md" ]; then
109
+ cat /root/.cache/test_summary.md >> "$GITHUB_STEP_SUMMARY"
110
+ else
111
+ echo "No summary file found." >> "$GITHUB_STEP_SUMMARY"
112
+ fi
113
+
114
+ - name: Post process
115
+ if: always()
116
+ run: |
117
+ kubectl get pods -n $NAMESPACE
118
+ kubectl delete -f tests/e2e/multi_node/scripts/lws.yaml