vllm-ascend 0.12.0rc1__tar.gz → 0.13.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1197) hide show
  1. vllm_ascend-0.13.0rc1/.github/workflows/_e2e_nightly_multi_node.yaml +299 -0
  2. vllm_ascend-0.13.0rc1/.github/workflows/_e2e_nightly_single_node.yaml +150 -0
  3. vllm_ascend-0.13.0rc1/.github/workflows/_e2e_nightly_single_node_models.yaml +232 -0
  4. vllm_ascend-0.13.0rc1/.github/workflows/_e2e_test.yaml +303 -0
  5. vllm_ascend-0.13.0rc1/.github/workflows/_pr_image_build.yaml +177 -0
  6. vllm_ascend-0.13.0rc1/.github/workflows/_pre_commit.yml +42 -0
  7. vllm_ascend-0.13.0rc1/.github/workflows/bot_merge_conflict.yaml +20 -0
  8. vllm_ascend-0.13.0rc1/.github/workflows/bot_pr_create.yaml +121 -0
  9. vllm_ascend-0.13.0rc1/.github/workflows/labled_doctest.yaml +87 -0
  10. vllm_ascend-0.13.0rc1/.github/workflows/labled_test_310.yaml +113 -0
  11. vllm_ascend-0.13.0rc1/.github/workflows/nightly_test_a2.yaml +138 -0
  12. vllm_ascend-0.13.0rc1/.github/workflows/nightly_test_a3.yaml +158 -0
  13. vllm_ascend-0.13.0rc1/.github/workflows/pr_tag_image_build_and_push.yaml +85 -0
  14. vllm_ascend-0.13.0rc1/.github/workflows/pr_tag_release_code_and_wheel.yml +167 -0
  15. vllm_ascend-0.13.0rc1/.github/workflows/pr_test_full.yaml +85 -0
  16. vllm_ascend-0.13.0rc1/.github/workflows/pr_test_light.yaml +173 -0
  17. vllm_ascend-0.13.0rc1/.github/workflows/schedule_nightly_image_build.yaml +59 -0
  18. vllm_ascend-0.13.0rc1/.github/workflows/schedule_test_benchmarks.yaml +203 -0
  19. vllm_ascend-0.13.0rc1/.github/workflows/schedule_test_vllm_main.yaml +39 -0
  20. vllm_ascend-0.13.0rc1/.pre-commit-config.yaml +137 -0
  21. vllm_ascend-0.13.0rc1/CMakeLists.txt +140 -0
  22. vllm_ascend-0.13.0rc1/Dockerfile +73 -0
  23. vllm_ascend-0.13.0rc1/Dockerfile.310p +66 -0
  24. vllm_ascend-0.13.0rc1/Dockerfile.310p.openEuler +62 -0
  25. vllm_ascend-0.13.0rc1/Dockerfile.a3 +72 -0
  26. vllm_ascend-0.13.0rc1/Dockerfile.a3.openEuler +75 -0
  27. vllm_ascend-0.13.0rc1/Dockerfile.openEuler +75 -0
  28. vllm_ascend-0.13.0rc1/PKG-INFO +148 -0
  29. vllm_ascend-0.13.0rc1/README.md +93 -0
  30. vllm_ascend-0.13.0rc1/README.zh.md +92 -0
  31. vllm_ascend-0.13.0rc1/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2.cpp +133 -0
  32. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.cpp +103 -0
  33. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.h +52 -0
  34. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_def.cpp +88 -0
  35. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_tiling.cpp +362 -0
  36. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_swiglu_quant_multistage_workspace.h +2030 -0
  37. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_combine.h +846 -0
  38. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_dispatch.h +1124 -0
  39. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.cpp +34 -0
  40. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.h +440 -0
  41. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_base.h +21 -0
  42. vllm_ascend-0.13.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_tiling.h +75 -0
  43. vllm_ascend-0.13.0rc1/csrc/kernels/bgmv_expand.cpp +369 -0
  44. vllm_ascend-0.13.0rc1/csrc/kernels/bgmv_shrink.cpp +252 -0
  45. vllm_ascend-0.13.0rc1/csrc/kernels/sgmv_expand.cpp +389 -0
  46. vllm_ascend-0.13.0rc1/csrc/kernels/sgmv_shrink.cpp +275 -0
  47. vllm_ascend-0.13.0rc1/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.cpp +89 -0
  48. vllm_ascend-0.13.0rc1/csrc/torch_binding.cpp +1260 -0
  49. vllm_ascend-0.13.0rc1/csrc/torch_binding_meta.cpp +320 -0
  50. vllm_ascend-0.13.0rc1/docs/source/_templates/sections/header.html +58 -0
  51. vllm_ascend-0.13.0rc1/docs/source/assets/cp/blocktable.png +0 -0
  52. vllm_ascend-0.13.0rc1/docs/source/assets/cp/chunkedprefill.png +0 -0
  53. vllm_ascend-0.13.0rc1/docs/source/assets/cp/dcp-decode.png +0 -0
  54. vllm_ascend-0.13.0rc1/docs/source/assets/cp/dcp-prefill.png +0 -0
  55. vllm_ascend-0.13.0rc1/docs/source/assets/cp/head-tail-style.png +0 -0
  56. vllm_ascend-0.13.0rc1/docs/source/assets/cp/overview.png +0 -0
  57. vllm_ascend-0.13.0rc1/docs/source/assets/cp/pcp-decode.png +0 -0
  58. vllm_ascend-0.13.0rc1/docs/source/assets/cp/pcp-prefill.png +0 -0
  59. vllm_ascend-0.13.0rc1/docs/source/community/contributors.md +266 -0
  60. vllm_ascend-0.13.0rc1/docs/source/community/versioning_policy.md +156 -0
  61. vllm_ascend-0.13.0rc1/docs/source/conf.py +145 -0
  62. vllm_ascend-0.13.0rc1/docs/source/developer_guide/contribution/testing.md +288 -0
  63. vllm_ascend-0.13.0rc1/docs/source/developer_guide/evaluation/index.md +10 -0
  64. vllm_ascend-0.13.0rc1/docs/source/developer_guide/feature_guide/context_parallel.md +119 -0
  65. vllm_ascend-0.13.0rc1/docs/source/developer_guide/feature_guide/index.md +17 -0
  66. vllm_ascend-0.13.0rc1/docs/source/developer_guide/feature_guide/patch.md +75 -0
  67. vllm_ascend-0.13.0rc1/docs/source/developer_guide/performance_and_debug/msprobe_guide.md +516 -0
  68. vllm_ascend-0.13.0rc1/docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md +268 -0
  69. vllm_ascend-0.13.0rc1/docs/source/developer_guide/performance_and_debug/performance_benchmark.md +239 -0
  70. vllm_ascend-0.13.0rc1/docs/source/faqs.md +257 -0
  71. vllm_ascend-0.13.0rc1/docs/source/index.md +71 -0
  72. vllm_ascend-0.13.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +237 -0
  73. vllm_ascend-0.13.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +248 -0
  74. vllm_ascend-0.13.0rc1/docs/source/tutorials/DeepSeek-R1.md +309 -0
  75. vllm_ascend-0.13.0rc1/docs/source/tutorials/DeepSeek-V3.1.md +701 -0
  76. vllm_ascend-0.13.0rc1/docs/source/tutorials/DeepSeek-V3.2.md +676 -0
  77. vllm_ascend-0.13.0rc1/docs/source/tutorials/Qwen2.5-7B.md +178 -0
  78. vllm_ascend-0.13.0rc1/docs/source/tutorials/Qwen2.5-Omni.md +209 -0
  79. vllm_ascend-0.13.0rc1/docs/source/tutorials/Qwen3-235B-A22B.md +630 -0
  80. vllm_ascend-0.13.0rc1/docs/source/tutorials/Qwen3-32B-W4A4.md +143 -0
  81. vllm_ascend-0.13.0rc1/docs/source/tutorials/Qwen3-8B-W4A8.md +138 -0
  82. vllm_ascend-0.13.0rc1/docs/source/tutorials/Qwen3-Dense.md +375 -0
  83. vllm_ascend-0.13.0rc1/docs/source/tutorials/Qwen3-Next.md +213 -0
  84. vllm_ascend-0.13.0rc1/docs/source/tutorials/Qwen3-VL-235B-A22B-Instruct.md +273 -0
  85. vllm_ascend-0.13.0rc1/docs/source/tutorials/Qwen3_embedding.md +118 -0
  86. vllm_ascend-0.13.0rc1/docs/source/tutorials/Qwen3_reranker.md +188 -0
  87. vllm_ascend-0.13.0rc1/docs/source/tutorials/index.md +29 -0
  88. vllm_ascend-0.13.0rc1/docs/source/tutorials/long_sequence_context_parallel_multi_node.md +372 -0
  89. vllm_ascend-0.13.0rc1/docs/source/tutorials/long_sequence_context_parallel_single_node.md +174 -0
  90. vllm_ascend-0.13.0rc1/docs/source/tutorials/pd_disaggregation_mooncake_multi_node.md +926 -0
  91. vllm_ascend-0.13.0rc1/docs/source/tutorials/pd_disaggregation_mooncake_single_node.md +264 -0
  92. vllm_ascend-0.13.0rc1/docs/source/user_guide/configuration/additional_config.md +111 -0
  93. vllm_ascend-0.13.0rc1/docs/source/user_guide/deployment_guide/index.md +7 -0
  94. vllm_ascend-0.13.0rc1/docs/source/user_guide/deployment_guide/using_volcano_kthena.md +433 -0
  95. vllm_ascend-0.13.0rc1/docs/source/user_guide/feature_guide/Fine_grained_TP.md +103 -0
  96. vllm_ascend-0.13.0rc1/docs/source/user_guide/feature_guide/context_parallel.md +88 -0
  97. vllm_ascend-0.13.0rc1/docs/source/user_guide/feature_guide/graph_mode.md +82 -0
  98. vllm_ascend-0.13.0rc1/docs/source/user_guide/feature_guide/index.md +24 -0
  99. vllm_ascend-0.13.0rc1/docs/source/user_guide/feature_guide/kv_pool.md +346 -0
  100. vllm_ascend-0.13.0rc1/docs/source/user_guide/feature_guide/large_scale_ep.md +504 -0
  101. vllm_ascend-0.13.0rc1/docs/source/user_guide/feature_guide/speculative_decoding.md +146 -0
  102. vllm_ascend-0.13.0rc1/docs/source/user_guide/feature_guide/ucm_deployment.md +141 -0
  103. vllm_ascend-0.13.0rc1/docs/source/user_guide/release_notes.md +890 -0
  104. vllm_ascend-0.13.0rc1/docs/source/user_guide/support_matrix/supported_features.md +48 -0
  105. vllm_ascend-0.13.0rc1/docs/source/user_guide/support_matrix/supported_models.md +82 -0
  106. vllm_ascend-0.13.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +586 -0
  107. vllm_ascend-0.13.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +897 -0
  108. vllm_ascend-0.13.0rc1/examples/eplb/eplb_strategy.py +183 -0
  109. vllm_ascend-0.13.0rc1/examples/external_online_dp/dp_load_balance_proxy_server.py +405 -0
  110. vllm_ascend-0.13.0rc1/examples/offline_disaggregated_prefill_npu.py +168 -0
  111. vllm_ascend-0.13.0rc1/examples/offline_embed.py +58 -0
  112. vllm_ascend-0.13.0rc1/mypy.ini +34 -0
  113. vllm_ascend-0.13.0rc1/pyproject.toml +43 -0
  114. vllm_ascend-0.13.0rc1/requirements-dev.txt +25 -0
  115. vllm_ascend-0.13.0rc1/requirements.txt +34 -0
  116. vllm_ascend-0.13.0rc1/setup.py +532 -0
  117. vllm_ascend-0.13.0rc1/tests/e2e/conftest.py +794 -0
  118. vllm_ascend-0.13.0rc1/tests/e2e/models/configs/Llama-3.2-3B-Instruct.yaml +10 -0
  119. vllm_ascend-0.13.0rc1/tests/e2e/models/configs/Qwen3-Omni-30B-A3B-Instruct.yaml +11 -0
  120. vllm_ascend-0.13.0rc1/tests/e2e/models/configs/accuracy.txt +15 -0
  121. vllm_ascend-0.13.0rc1/tests/e2e/models/configs/gemma-3-4b-it.yaml +14 -0
  122. vllm_ascend-0.13.0rc1/tests/e2e/models/configs/llava-onevision-qwen2-0.5b-ov-hf.yaml +10 -0
  123. vllm_ascend-0.13.0rc1/tests/e2e/multicard/long_sequence/test_accuracy.py +101 -0
  124. vllm_ascend-0.13.0rc1/tests/e2e/multicard/long_sequence/test_basic.py +273 -0
  125. vllm_ascend-0.13.0rc1/tests/e2e/multicard/long_sequence/test_mtp.py +165 -0
  126. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_aclgraph_capture_replay.py +237 -0
  127. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_data_parallel.py +81 -0
  128. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_data_parallel_tp2.py +52 -0
  129. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_expert_parallel.py +34 -0
  130. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_external_launcher.py +239 -0
  131. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_full_graph_mode.py +119 -0
  132. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_ilama_lora_tp2.py +25 -0
  133. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_offline_inference_distributed.py +239 -0
  134. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_offline_weight_load.py +74 -0
  135. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_pipeline_parallel.py +48 -0
  136. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_prefix_caching.py +85 -0
  137. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_quantization.py +48 -0
  138. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_qwen3_moe.py +131 -0
  139. vllm_ascend-0.13.0rc1/tests/e2e/multicard/test_qwen3_next.py +132 -0
  140. vllm_ascend-0.13.0rc1/tests/e2e/nightly/features/test_mtpx_deepseek_r1_0528_w8a8.py +140 -0
  141. vllm_ascend-0.13.0rc1/tests/e2e/nightly/features/test_prefix_cache_deepseek_r1_0528_w8a8.py +107 -0
  142. vllm_ascend-0.13.0rc1/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py +99 -0
  143. vllm_ascend-0.13.0rc1/tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py +98 -0
  144. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py +117 -0
  145. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8_eplb.py +115 -0
  146. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_deepseek_v3_2_exp_w8a8.py +105 -0
  147. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_glm4_5.py +111 -0
  148. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py +110 -0
  149. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +102 -0
  150. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py +105 -0
  151. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_qwen3_235b_w8a8.py +101 -0
  152. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_qwen3_30b_w8a8.py +92 -0
  153. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_qwen3_32b_int8.py +129 -0
  154. vllm_ascend-0.13.0rc1/tests/e2e/nightly/models/test_qwq_32b.py +116 -0
  155. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml +57 -0
  156. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-EPLB.yaml +195 -0
  157. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml +194 -0
  158. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml +111 -0
  159. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A22B-A2.yaml +72 -0
  160. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8-EPLB.yaml +91 -0
  161. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml +87 -0
  162. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multi_node/config/utils.py +129 -0
  163. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multi_node/scripts/run.sh +158 -0
  164. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multi_node/test_multi_node.py +130 -0
  165. vllm_ascend-0.13.0rc1/tests/e2e/nightly/multicard_ops/test_dispatch_gmm_combine_decode.py +439 -0
  166. vllm_ascend-0.13.0rc1/tests/e2e/nightly/ops/test_fused_moe.py +338 -0
  167. vllm_ascend-0.13.0rc1/tests/e2e/nightly/ops/triton/test_causal_conv1d.py +361 -0
  168. vllm_ascend-0.13.0rc1/tests/e2e/nightly/ops/triton/test_l2norm.py +34 -0
  169. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/compile/test_norm_quant_fusion.py +362 -0
  170. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/pooling/test_classification.py +34 -0
  171. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/pooling/test_embedding.py +99 -0
  172. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/pooling/test_scoring.py +187 -0
  173. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +175 -0
  174. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +449 -0
  175. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_aclgraph_accuracy.py +311 -0
  176. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_async_scheduling.py +240 -0
  177. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_camem.py +60 -0
  178. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_completion_with_prompt_embeds.py +76 -0
  179. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_cpu_offloading.py +178 -0
  180. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_fused_sigmoid_gating_delta_rule.py +65 -0
  181. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_guided_decoding.py +155 -0
  182. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_ilama_lora.py +64 -0
  183. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_models.py +46 -0
  184. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +108 -0
  185. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_quantization.py +79 -0
  186. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_sampler.py +70 -0
  187. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_vlm.py +98 -0
  188. vllm_ascend-0.13.0rc1/tests/e2e/singlecard/test_xlite.py +126 -0
  189. vllm_ascend-0.13.0rc1/tests/e2e/vllm_interface/singlecard/test_sampler.py +37 -0
  190. vllm_ascend-0.13.0rc1/tests/ut/attention/test_attention_cp.py +824 -0
  191. vllm_ascend-0.13.0rc1/tests/ut/attention/test_attention_v1.py +393 -0
  192. vllm_ascend-0.13.0rc1/tests/ut/attention/test_mla_cp.py +1061 -0
  193. vllm_ascend-0.13.0rc1/tests/ut/attention/test_mla_v1.py +1198 -0
  194. vllm_ascend-0.13.0rc1/tests/ut/attention/test_sfa_v1.py +192 -0
  195. vllm_ascend-0.13.0rc1/tests/ut/compilation/test_acl_graph.py +850 -0
  196. vllm_ascend-0.13.0rc1/tests/ut/conftest.py +39 -0
  197. vllm_ascend-0.13.0rc1/tests/ut/distributed/test_parallel_state.py +82 -0
  198. vllm_ascend-0.13.0rc1/tests/ut/kv_connector/test_mooncake_connector.py +1341 -0
  199. vllm_ascend-0.13.0rc1/tests/ut/kv_connector/utils.py +210 -0
  200. vllm_ascend-0.13.0rc1/tests/ut/ops/test_activation.py +76 -0
  201. vllm_ascend-0.13.0rc1/tests/ut/ops/test_fused_moe.py +593 -0
  202. vllm_ascend-0.13.0rc1/tests/ut/ops/test_layernorm.py +57 -0
  203. vllm_ascend-0.13.0rc1/tests/ut/ops/test_linear.py +160 -0
  204. vllm_ascend-0.13.0rc1/tests/ut/ops/test_moe_comm_method.py +225 -0
  205. vllm_ascend-0.13.0rc1/tests/ut/ops/test_moe_mlp.py +51 -0
  206. vllm_ascend-0.13.0rc1/tests/ut/ops/test_prepare_finalize.py +223 -0
  207. vllm_ascend-0.13.0rc1/tests/ut/ops/test_rotary_embedding.py +453 -0
  208. vllm_ascend-0.13.0rc1/tests/ut/ops/test_token_dispatcher.py +535 -0
  209. vllm_ascend-0.13.0rc1/tests/ut/quantization/test_quant_config.py +158 -0
  210. vllm_ascend-0.13.0rc1/tests/ut/quantization/test_utils.py +50 -0
  211. vllm_ascend-0.13.0rc1/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +216 -0
  212. vllm_ascend-0.13.0rc1/tests/ut/quantization/test_w4a8_dynamic.py +306 -0
  213. vllm_ascend-0.13.0rc1/tests/ut/quantization/test_w8a16.py +91 -0
  214. vllm_ascend-0.13.0rc1/tests/ut/quantization/test_w8a8.py +218 -0
  215. vllm_ascend-0.13.0rc1/tests/ut/quantization/test_w8a8_dynamic.py +106 -0
  216. vllm_ascend-0.13.0rc1/tests/ut/sample/test_rejection_sampler.py +217 -0
  217. vllm_ascend-0.13.0rc1/tests/ut/sample/test_sampler.py +11 -0
  218. vllm_ascend-0.13.0rc1/tests/ut/spec_decode/test_eagle_proposer.py +325 -0
  219. vllm_ascend-0.13.0rc1/tests/ut/spec_decode/test_mtp_proposer.py +452 -0
  220. vllm_ascend-0.13.0rc1/tests/ut/test_ascend_config.py +93 -0
  221. vllm_ascend-0.13.0rc1/tests/ut/test_platform.py +583 -0
  222. vllm_ascend-0.13.0rc1/tests/ut/test_utils.py +301 -0
  223. vllm_ascend-0.13.0rc1/tests/ut/worker/test_model_runner_v1.py +473 -0
  224. vllm_ascend-0.13.0rc1/tests/ut/worker/test_worker_v1.py +1186 -0
  225. vllm_ascend-0.13.0rc1/tools/aisbench.py +330 -0
  226. vllm_ascend-0.13.0rc1/tools/collect_user_first_contribution.sh +83 -0
  227. vllm_ascend-0.13.0rc1/tools/format_contributors.py +98 -0
  228. vllm_ascend-0.13.0rc1/tools/install_flash_infer_attention_score_ops_a2.sh +37 -0
  229. vllm_ascend-0.13.0rc1/tools/install_flash_infer_attention_score_ops_a3.sh +36 -0
  230. vllm_ascend-0.13.0rc1/tools/send_request.py +37 -0
  231. vllm_ascend-0.13.0rc1/tools/vllm_bench.py +125 -0
  232. vllm_ascend-0.13.0rc1/vllm_ascend/_version.py +34 -0
  233. vllm_ascend-0.13.0rc1/vllm_ascend/ascend_config.py +277 -0
  234. vllm_ascend-0.13.0rc1/vllm_ascend/ascend_forward_context.py +265 -0
  235. vllm_ascend-0.13.0rc1/vllm_ascend/attention/attention_cp.py +900 -0
  236. vllm_ascend-0.13.0rc1/vllm_ascend/attention/attention_v1.py +715 -0
  237. vllm_ascend-0.13.0rc1/vllm_ascend/attention/common_cp.py +40 -0
  238. vllm_ascend-0.13.0rc1/vllm_ascend/attention/mla_cp.py +1088 -0
  239. vllm_ascend-0.13.0rc1/vllm_ascend/attention/mla_v1.py +1490 -0
  240. vllm_ascend-0.13.0rc1/vllm_ascend/attention/sfa_v1.py +979 -0
  241. vllm_ascend-0.13.0rc1/vllm_ascend/attention/utils.py +323 -0
  242. vllm_ascend-0.13.0rc1/vllm_ascend/compilation/acl_graph.py +556 -0
  243. vllm_ascend-0.13.0rc1/vllm_ascend/compilation/compiler_interface.py +137 -0
  244. vllm_ascend-0.13.0rc1/vllm_ascend/compilation/graph_fusion_pass_manager.py +56 -0
  245. vllm_ascend-0.13.0rc1/vllm_ascend/compilation/passes/norm_quant_fusion_pass.py +315 -0
  246. vllm_ascend-0.13.0rc1/vllm_ascend/compilation/passes/qknorm_rope_fusion_pass.py +290 -0
  247. vllm_ascend-0.13.0rc1/vllm_ascend/core/scheduler_dynamic_batch.py +597 -0
  248. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/__init__.py +44 -0
  249. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/kvpool/ascend_store_connector.py +192 -0
  250. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/kvpool/backend/memcache_backend.py +71 -0
  251. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/kvpool/config_data.py +381 -0
  252. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/kvpool/kv_transfer.py +343 -0
  253. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/kvpool/pool_scheduler.py +327 -0
  254. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/kvpool/pool_worker.py +547 -0
  255. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/mooncake_connector.py +1541 -0
  256. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/parallel_state.py +352 -0
  257. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/ucm_connector.py +237 -0
  258. vllm_ascend-0.13.0rc1/vllm_ascend/distributed/utils.py +92 -0
  259. vllm_ascend-0.13.0rc1/vllm_ascend/envs.py +153 -0
  260. vllm_ascend-0.13.0rc1/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +768 -0
  261. vllm_ascend-0.13.0rc1/vllm_ascend/eplb/core/policy/policy_flashlb.py +648 -0
  262. vllm_ascend-0.13.0rc1/vllm_ascend/eplb/utils.py +89 -0
  263. vllm_ascend-0.13.0rc1/vllm_ascend/flash_common3_context.py +42 -0
  264. vllm_ascend-0.13.0rc1/vllm_ascend/kv_offload/npu.py +64 -0
  265. vllm_ascend-0.13.0rc1/vllm_ascend/lora/punica_npu.py +352 -0
  266. vllm_ascend-0.13.0rc1/vllm_ascend/ops/__init__.py +62 -0
  267. vllm_ascend-0.13.0rc1/vllm_ascend/ops/fused_moe/experts_selector.py +306 -0
  268. vllm_ascend-0.13.0rc1/vllm_ascend/ops/fused_moe/fused_moe.py +504 -0
  269. vllm_ascend-0.13.0rc1/vllm_ascend/ops/fused_moe/moe_comm_method.py +328 -0
  270. vllm_ascend-0.13.0rc1/vllm_ascend/ops/fused_moe/moe_mlp.py +354 -0
  271. vllm_ascend-0.13.0rc1/vllm_ascend/ops/fused_moe/prepare_finalize.py +472 -0
  272. vllm_ascend-0.13.0rc1/vllm_ascend/ops/fused_moe/token_dispatcher.py +687 -0
  273. vllm_ascend-0.13.0rc1/vllm_ascend/ops/layernorm.py +97 -0
  274. vllm_ascend-0.13.0rc1/vllm_ascend/ops/linear.py +470 -0
  275. vllm_ascend-0.13.0rc1/vllm_ascend/ops/linear_op.py +715 -0
  276. vllm_ascend-0.13.0rc1/vllm_ascend/ops/mm_encoder_attention.py +146 -0
  277. vllm_ascend-0.13.0rc1/vllm_ascend/ops/register_custom_ops.py +371 -0
  278. vllm_ascend-0.13.0rc1/vllm_ascend/ops/rotary_embedding.py +584 -0
  279. vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/activation/swiglu_quant.py +120 -0
  280. vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/fla/chunk.py +226 -0
  281. vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py +118 -0
  282. vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/fla/l2norm.py +70 -0
  283. vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/fla/sigmoid_gating.py +395 -0
  284. vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/fused_gdn_gating.py +118 -0
  285. vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/linearnorm/split_qkv_rmsnorm_rope.py +305 -0
  286. vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/mamba/causal_conv1d.py +722 -0
  287. vllm_ascend-0.13.0rc1/vllm_ascend/patch/__init__.py +291 -0
  288. vllm_ascend-0.13.0rc1/vllm_ascend/patch/platform/__init__.py +32 -0
  289. vllm_ascend-0.13.0rc1/vllm_ascend/patch/platform/patch_balance_schedule.py +681 -0
  290. vllm_ascend-0.13.0rc1/vllm_ascend/patch/platform/patch_compile_backend.py +235 -0
  291. vllm_ascend-0.13.0rc1/vllm_ascend/patch/platform/patch_ec_connector.py +31 -0
  292. vllm_ascend-0.13.0rc1/vllm_ascend/patch/worker/__init__.py +35 -0
  293. vllm_ascend-0.13.0rc1/vllm_ascend/patch/worker/patch_module.py +36 -0
  294. vllm_ascend-0.13.0rc1/vllm_ascend/patch/worker/patch_qwen3_next.py +343 -0
  295. vllm_ascend-0.13.0rc1/vllm_ascend/patch/worker/patch_rejection_sampler.py +11 -0
  296. vllm_ascend-0.13.0rc1/vllm_ascend/platform.py +411 -0
  297. vllm_ascend-0.13.0rc1/vllm_ascend/quantization/quant_config.py +554 -0
  298. vllm_ascend-0.13.0rc1/vllm_ascend/quantization/utils.py +115 -0
  299. vllm_ascend-0.13.0rc1/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +191 -0
  300. vllm_ascend-0.13.0rc1/vllm_ascend/quantization/w4a8_dynamic.py +488 -0
  301. vllm_ascend-0.13.0rc1/vllm_ascend/quantization/w8a16.py +89 -0
  302. vllm_ascend-0.13.0rc1/vllm_ascend/quantization/w8a8.py +193 -0
  303. vllm_ascend-0.13.0rc1/vllm_ascend/quantization/w8a8_dynamic.py +332 -0
  304. vllm_ascend-0.13.0rc1/vllm_ascend/sample/rejection_sampler.py +859 -0
  305. vllm_ascend-0.13.0rc1/vllm_ascend/sample/sampler.py +126 -0
  306. vllm_ascend-0.13.0rc1/vllm_ascend/spec_decode/eagle_proposer.py +809 -0
  307. vllm_ascend-0.13.0rc1/vllm_ascend/spec_decode/interface.py +53 -0
  308. vllm_ascend-0.13.0rc1/vllm_ascend/spec_decode/mtp_proposer.py +1300 -0
  309. vllm_ascend-0.13.0rc1/vllm_ascend/spec_decode/ngram_proposer.py +72 -0
  310. vllm_ascend-0.13.0rc1/vllm_ascend/spec_decode/suffix_proposer.py +45 -0
  311. vllm_ascend-0.13.0rc1/vllm_ascend/utils.py +1117 -0
  312. vllm_ascend-0.13.0rc1/vllm_ascend/worker/__init__.py +0 -0
  313. vllm_ascend-0.13.0rc1/vllm_ascend/worker/block_table.py +329 -0
  314. vllm_ascend-0.13.0rc1/vllm_ascend/worker/model_runner_v1.py +3488 -0
  315. vllm_ascend-0.13.0rc1/vllm_ascend/worker/npu_input_batch.py +249 -0
  316. vllm_ascend-0.13.0rc1/vllm_ascend/worker/v2/__init__.py +0 -0
  317. vllm_ascend-0.13.0rc1/vllm_ascend/worker/v2/aclgraph_utils.py +71 -0
  318. vllm_ascend-0.13.0rc1/vllm_ascend/worker/v2/attn_utils.py +159 -0
  319. vllm_ascend-0.13.0rc1/vllm_ascend/worker/v2/input_batch.py +37 -0
  320. vllm_ascend-0.13.0rc1/vllm_ascend/worker/v2/model_runner.py +346 -0
  321. vllm_ascend-0.13.0rc1/vllm_ascend/worker/v2/states.py +88 -0
  322. vllm_ascend-0.13.0rc1/vllm_ascend/worker/v2/utils.py +33 -0
  323. vllm_ascend-0.13.0rc1/vllm_ascend/worker/worker.py +501 -0
  324. vllm_ascend-0.13.0rc1/vllm_ascend/xlite/__init__.py +0 -0
  325. vllm_ascend-0.13.0rc1/vllm_ascend/xlite/xlite.py +291 -0
  326. vllm_ascend-0.13.0rc1/vllm_ascend/xlite/xlite_worker.py +26 -0
  327. vllm_ascend-0.13.0rc1/vllm_ascend.egg-info/PKG-INFO +148 -0
  328. vllm_ascend-0.13.0rc1/vllm_ascend.egg-info/SOURCES.txt +916 -0
  329. vllm_ascend-0.13.0rc1/vllm_ascend.egg-info/requires.txt +24 -0
  330. vllm_ascend-0.12.0rc1/.github/workflows/_e2e_nightly_multi_node.yaml +0 -267
  331. vllm_ascend-0.12.0rc1/.github/workflows/_e2e_nightly_single_node.yaml +0 -130
  332. vllm_ascend-0.12.0rc1/.github/workflows/_e2e_nightly_single_node_models.yaml +0 -232
  333. vllm_ascend-0.12.0rc1/.github/workflows/_e2e_test.yaml +0 -286
  334. vllm_ascend-0.12.0rc1/.github/workflows/_nightly_image_build.yaml +0 -59
  335. vllm_ascend-0.12.0rc1/.github/workflows/_pre_commit.yml +0 -42
  336. vllm_ascend-0.12.0rc1/.github/workflows/image_build_and_push.yaml +0 -528
  337. vllm_ascend-0.12.0rc1/.github/workflows/label_merge_conflict.yml +0 -21
  338. vllm_ascend-0.12.0rc1/.github/workflows/nightly_benchmarks.yaml +0 -203
  339. vllm_ascend-0.12.0rc1/.github/workflows/pr_create.yaml +0 -121
  340. vllm_ascend-0.12.0rc1/.github/workflows/release_code_and_wheel.yml +0 -167
  341. vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_doctest.yaml +0 -87
  342. vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_310p.yaml +0 -116
  343. vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +0 -39
  344. vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_nightly_a2.yaml +0 -136
  345. vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_nightly_a3.yaml +0 -158
  346. vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_pr_full.yaml +0 -85
  347. vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_pr_light.yaml +0 -167
  348. vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_report.yaml +0 -172
  349. vllm_ascend-0.12.0rc1/.pre-commit-config.yaml +0 -137
  350. vllm_ascend-0.12.0rc1/CMakeLists.txt +0 -136
  351. vllm_ascend-0.12.0rc1/Dockerfile +0 -69
  352. vllm_ascend-0.12.0rc1/Dockerfile.310p +0 -61
  353. vllm_ascend-0.12.0rc1/Dockerfile.310p.openEuler +0 -58
  354. vllm_ascend-0.12.0rc1/Dockerfile.a3 +0 -68
  355. vllm_ascend-0.12.0rc1/Dockerfile.a3.openEuler +0 -71
  356. vllm_ascend-0.12.0rc1/Dockerfile.openEuler +0 -71
  357. vllm_ascend-0.12.0rc1/PKG-INFO +0 -146
  358. vllm_ascend-0.12.0rc1/README.md +0 -92
  359. vllm_ascend-0.12.0rc1/README.zh.md +0 -91
  360. vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2.cpp +0 -134
  361. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.cpp +0 -101
  362. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.h +0 -51
  363. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_def.cpp +0 -83
  364. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_tiling.cpp +0 -339
  365. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_swiglu_quant_multistage_workspace.h +0 -1990
  366. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_combine.h +0 -814
  367. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_dispatch.h +0 -1072
  368. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.cpp +0 -33
  369. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.h +0 -436
  370. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_base.h +0 -18
  371. vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_tiling.h +0 -74
  372. vllm_ascend-0.12.0rc1/csrc/kernels/bgmv_expand.cpp +0 -369
  373. vllm_ascend-0.12.0rc1/csrc/kernels/bgmv_shrink.cpp +0 -252
  374. vllm_ascend-0.12.0rc1/csrc/kernels/sgmv_expand.cpp +0 -389
  375. vllm_ascend-0.12.0rc1/csrc/kernels/sgmv_shrink.cpp +0 -275
  376. vllm_ascend-0.12.0rc1/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.cpp +0 -89
  377. vllm_ascend-0.12.0rc1/csrc/torch_binding.cpp +0 -1257
  378. vllm_ascend-0.12.0rc1/csrc/torch_binding_meta.cpp +0 -319
  379. vllm_ascend-0.12.0rc1/docs/source/_templates/sections/header.html +0 -58
  380. vllm_ascend-0.12.0rc1/docs/source/community/contributors.md +0 -171
  381. vllm_ascend-0.12.0rc1/docs/source/community/versioning_policy.md +0 -149
  382. vllm_ascend-0.12.0rc1/docs/source/conf.py +0 -145
  383. vllm_ascend-0.12.0rc1/docs/source/developer_guide/contribution/testing.md +0 -288
  384. vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +0 -20
  385. vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +0 -19
  386. vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +0 -21
  387. vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +0 -21
  388. vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -10
  389. vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/index.md +0 -11
  390. vllm_ascend-0.12.0rc1/docs/source/developer_guide/feature_guide/index.md +0 -16
  391. vllm_ascend-0.12.0rc1/docs/source/developer_guide/feature_guide/patch.md +0 -75
  392. vllm_ascend-0.12.0rc1/docs/source/developer_guide/performance_and_debug/msprobe_guide.md +0 -516
  393. vllm_ascend-0.12.0rc1/docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md +0 -184
  394. vllm_ascend-0.12.0rc1/docs/source/developer_guide/performance_and_debug/performance_benchmark.md +0 -195
  395. vllm_ascend-0.12.0rc1/docs/source/faqs.md +0 -242
  396. vllm_ascend-0.12.0rc1/docs/source/index.md +0 -70
  397. vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -237
  398. vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -248
  399. vllm_ascend-0.12.0rc1/docs/source/tutorials/DeepSeek-R1.md +0 -290
  400. vllm_ascend-0.12.0rc1/docs/source/tutorials/DeepSeek-V3.1.md +0 -807
  401. vllm_ascend-0.12.0rc1/docs/source/tutorials/DeepSeek-V3.2.md +0 -652
  402. vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen2.5-7B.md +0 -177
  403. vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen2.5-Omni.md +0 -206
  404. vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-235B-A22B.md +0 -313
  405. vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-32B-W4A4.md +0 -141
  406. vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-8B-W4A8.md +0 -134
  407. vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-Dense.md +0 -372
  408. vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-Next.md +0 -154
  409. vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3_embedding.md +0 -100
  410. vllm_ascend-0.12.0rc1/docs/source/tutorials/index.md +0 -25
  411. vllm_ascend-0.12.0rc1/docs/source/tutorials/pd_disaggregation_mooncake_multi_node.md +0 -922
  412. vllm_ascend-0.12.0rc1/docs/source/tutorials/pd_disaggregation_mooncake_single_node.md +0 -252
  413. vllm_ascend-0.12.0rc1/docs/source/user_guide/configuration/additional_config.md +0 -98
  414. vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/graph_mode.md +0 -82
  415. vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/index.md +0 -20
  416. vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/kv_pool.md +0 -293
  417. vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/large_scale_ep.md +0 -504
  418. vllm_ascend-0.12.0rc1/docs/source/user_guide/release_notes.md +0 -810
  419. vllm_ascend-0.12.0rc1/docs/source/user_guide/support_matrix/supported_features.md +0 -47
  420. vllm_ascend-0.12.0rc1/docs/source/user_guide/support_matrix/supported_models.md +0 -81
  421. vllm_ascend-0.12.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +0 -586
  422. vllm_ascend-0.12.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +0 -666
  423. vllm_ascend-0.12.0rc1/examples/eplb/eplb_strategy.py +0 -186
  424. vllm_ascend-0.12.0rc1/examples/external_online_dp/dp_load_balance_proxy_server.py +0 -405
  425. vllm_ascend-0.12.0rc1/examples/offline_disaggregated_prefill_npu.py +0 -168
  426. vllm_ascend-0.12.0rc1/examples/offline_embed.py +0 -58
  427. vllm_ascend-0.12.0rc1/mypy.ini +0 -32
  428. vllm_ascend-0.12.0rc1/pyproject.toml +0 -42
  429. vllm_ascend-0.12.0rc1/requirements-dev.txt +0 -24
  430. vllm_ascend-0.12.0rc1/requirements.txt +0 -33
  431. vllm_ascend-0.12.0rc1/setup.py +0 -533
  432. vllm_ascend-0.12.0rc1/tests/e2e/conftest.py +0 -773
  433. vllm_ascend-0.12.0rc1/tests/e2e/models/configs/InternVL2-8B.yaml +0 -11
  434. vllm_ascend-0.12.0rc1/tests/e2e/models/configs/InternVL2_5-8B.yaml +0 -11
  435. vllm_ascend-0.12.0rc1/tests/e2e/models/configs/InternVL3-8B.yaml +0 -11
  436. vllm_ascend-0.12.0rc1/tests/e2e/models/configs/Meta-Llama-3.1-8B-Instruct.yaml +0 -11
  437. vllm_ascend-0.12.0rc1/tests/e2e/models/configs/accuracy.txt +0 -17
  438. vllm_ascend-0.12.0rc1/tests/e2e/models/configs/gemma-3-4b-it.yaml +0 -13
  439. vllm_ascend-0.12.0rc1/tests/e2e/models/configs/llava-1.5-7b-hf.yaml +0 -11
  440. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_aclgraph_capture_replay.py +0 -237
  441. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_data_parallel.py +0 -83
  442. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_data_parallel_tp2.py +0 -52
  443. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_expert_parallel.py +0 -33
  444. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_external_launcher.py +0 -239
  445. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_full_graph_mode.py +0 -121
  446. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -74
  447. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_ilama_lora_tp2.py +0 -23
  448. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_offline_inference_distributed.py +0 -223
  449. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_offline_weight_load.py +0 -74
  450. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_pipeline_parallel.py +0 -47
  451. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_prefix_caching.py +0 -83
  452. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_quantization.py +0 -47
  453. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_qwen3_moe.py +0 -89
  454. vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_qwen3_next.py +0 -125
  455. vllm_ascend-0.12.0rc1/tests/e2e/nightly/features/test_mtpx_deepseek_r1_0528_w8a8.py +0 -140
  456. vllm_ascend-0.12.0rc1/tests/e2e/nightly/features/test_prefix_cache_deepseek_r1_0528_w8a8.py +0 -110
  457. vllm_ascend-0.12.0rc1/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py +0 -99
  458. vllm_ascend-0.12.0rc1/tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py +0 -99
  459. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py +0 -120
  460. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8_eplb.py +0 -115
  461. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_deepseek_v3_2_exp_w8a8.py +0 -105
  462. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_glm4_5.py +0 -111
  463. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py +0 -110
  464. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +0 -102
  465. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py +0 -105
  466. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen3_235b_w8a8.py +0 -101
  467. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen3_30b_w8a8.py +0 -92
  468. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen3_32b_int8.py +0 -130
  469. vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwq_32b.py +0 -116
  470. vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml +0 -57
  471. vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-EPLB.yaml +0 -195
  472. vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml +0 -194
  473. vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml +0 -111
  474. vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8-EPLB.yaml +0 -91
  475. vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml +0 -87
  476. vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/utils.py +0 -129
  477. vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/scripts/run.sh +0 -164
  478. vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/test_multi_node.py +0 -130
  479. vllm_ascend-0.12.0rc1/tests/e2e/nightly/multicard_ops/test_dispatch_gmm_combine_decode.py +0 -411
  480. vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops/test_fused_moe.py +0 -352
  481. vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops/triton/test_causal_conv1d.py +0 -230
  482. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/compile/test_norm_quant_fusion.py +0 -113
  483. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/pooling/test_classification.py +0 -34
  484. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/pooling/test_embedding.py +0 -100
  485. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/pooling/test_scoring.py +0 -187
  486. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +0 -176
  487. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +0 -241
  488. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_aclgraph_accuracy.py +0 -213
  489. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_async_scheduling.py +0 -239
  490. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_camem.py +0 -99
  491. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_completion_with_prompt_embeds.py +0 -75
  492. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_guided_decoding.py +0 -153
  493. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_ilama_lora.py +0 -62
  494. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +0 -103
  495. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_quantization.py +0 -35
  496. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_sampler.py +0 -49
  497. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_vlm.py +0 -89
  498. vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_xlite.py +0 -132
  499. vllm_ascend-0.12.0rc1/tests/e2e/vllm_interface/singlecard/test_sampler.py +0 -36
  500. vllm_ascend-0.12.0rc1/tests/ut/attention/test_attention_v1.py +0 -392
  501. vllm_ascend-0.12.0rc1/tests/ut/attention/test_mla_v1.py +0 -1221
  502. vllm_ascend-0.12.0rc1/tests/ut/attention/test_sfa_v1.py +0 -187
  503. vllm_ascend-0.12.0rc1/tests/ut/compilation/test_acl_graph.py +0 -847
  504. vllm_ascend-0.12.0rc1/tests/ut/conftest.py +0 -26
  505. vllm_ascend-0.12.0rc1/tests/ut/distributed/test_parallel_state.py +0 -81
  506. vllm_ascend-0.12.0rc1/tests/ut/kv_connector/test_mooncake_connector.py +0 -1262
  507. vllm_ascend-0.12.0rc1/tests/ut/kv_connector/utils.py +0 -210
  508. vllm_ascend-0.12.0rc1/tests/ut/ops/test_activation.py +0 -76
  509. vllm_ascend-0.12.0rc1/tests/ut/ops/test_fused_moe.py +0 -596
  510. vllm_ascend-0.12.0rc1/tests/ut/ops/test_layernorm.py +0 -57
  511. vllm_ascend-0.12.0rc1/tests/ut/ops/test_linear.py +0 -157
  512. vllm_ascend-0.12.0rc1/tests/ut/ops/test_moe_comm_method.py +0 -245
  513. vllm_ascend-0.12.0rc1/tests/ut/ops/test_prepare_finalize.py +0 -224
  514. vllm_ascend-0.12.0rc1/tests/ut/ops/test_rotary_embedding.py +0 -453
  515. vllm_ascend-0.12.0rc1/tests/ut/ops/test_token_dispatcher.py +0 -535
  516. vllm_ascend-0.12.0rc1/tests/ut/quantization/test_quant_config.py +0 -241
  517. vllm_ascend-0.12.0rc1/tests/ut/quantization/test_utils.py +0 -62
  518. vllm_ascend-0.12.0rc1/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +0 -246
  519. vllm_ascend-0.12.0rc1/tests/ut/quantization/test_w4a8_dynamic.py +0 -301
  520. vllm_ascend-0.12.0rc1/tests/ut/quantization/test_w8a8.py +0 -985
  521. vllm_ascend-0.12.0rc1/tests/ut/quantization/test_w8a8_dynamic.py +0 -61
  522. vllm_ascend-0.12.0rc1/tests/ut/sample/test_rejection_sampler.py +0 -204
  523. vllm_ascend-0.12.0rc1/tests/ut/sample/test_sampler.py +0 -35
  524. vllm_ascend-0.12.0rc1/tests/ut/spec_decode/test_eagle_proposer.py +0 -312
  525. vllm_ascend-0.12.0rc1/tests/ut/spec_decode/test_mtp_proposer.py +0 -445
  526. vllm_ascend-0.12.0rc1/tests/ut/test_ascend_config.py +0 -93
  527. vllm_ascend-0.12.0rc1/tests/ut/test_platform.py +0 -580
  528. vllm_ascend-0.12.0rc1/tests/ut/test_utils.py +0 -309
  529. vllm_ascend-0.12.0rc1/tests/ut/worker/test_input_batch.py +0 -375
  530. vllm_ascend-0.12.0rc1/tests/ut/worker/test_worker_v1.py +0 -1184
  531. vllm_ascend-0.12.0rc1/tools/aisbench.py +0 -326
  532. vllm_ascend-0.12.0rc1/tools/send_request.py +0 -23
  533. vllm_ascend-0.12.0rc1/vllm_ascend/_version.py +0 -34
  534. vllm_ascend-0.12.0rc1/vllm_ascend/ascend_config.py +0 -321
  535. vllm_ascend-0.12.0rc1/vllm_ascend/ascend_forward_context.py +0 -233
  536. vllm_ascend-0.12.0rc1/vllm_ascend/attention/attention_cp.py +0 -915
  537. vllm_ascend-0.12.0rc1/vllm_ascend/attention/attention_v1.py +0 -761
  538. vllm_ascend-0.12.0rc1/vllm_ascend/attention/mla_v1.py +0 -2119
  539. vllm_ascend-0.12.0rc1/vllm_ascend/attention/sfa_v1.py +0 -1002
  540. vllm_ascend-0.12.0rc1/vllm_ascend/attention/utils.py +0 -235
  541. vllm_ascend-0.12.0rc1/vllm_ascend/compilation/acl_graph.py +0 -485
  542. vllm_ascend-0.12.0rc1/vllm_ascend/compilation/compiler_interface.py +0 -137
  543. vllm_ascend-0.12.0rc1/vllm_ascend/compilation/graph_fusion_pass_manager.py +0 -53
  544. vllm_ascend-0.12.0rc1/vllm_ascend/compilation/passes/norm_quant_fusion_pass.py +0 -113
  545. vllm_ascend-0.12.0rc1/vllm_ascend/core/scheduler_dynamic_batch.py +0 -595
  546. vllm_ascend-0.12.0rc1/vllm_ascend/distributed/__init__.py +0 -40
  547. vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/ascend_store_connector.py +0 -192
  548. vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/backend/memcache_backend.py +0 -74
  549. vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/config_data.py +0 -377
  550. vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/kv_transfer.py +0 -261
  551. vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/pool_scheduler.py +0 -328
  552. vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/pool_worker.py +0 -604
  553. vllm_ascend-0.12.0rc1/vllm_ascend/distributed/mooncake_connector.py +0 -1473
  554. vllm_ascend-0.12.0rc1/vllm_ascend/distributed/parallel_state.py +0 -345
  555. vllm_ascend-0.12.0rc1/vllm_ascend/distributed/utils.py +0 -61
  556. vllm_ascend-0.12.0rc1/vllm_ascend/envs.py +0 -148
  557. vllm_ascend-0.12.0rc1/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +0 -772
  558. vllm_ascend-0.12.0rc1/vllm_ascend/eplb/core/policy/policy_flashlb.py +0 -651
  559. vllm_ascend-0.12.0rc1/vllm_ascend/eplb/utils.py +0 -89
  560. vllm_ascend-0.12.0rc1/vllm_ascend/kv_offload/npu.py +0 -71
  561. vllm_ascend-0.12.0rc1/vllm_ascend/lora/punica_npu.py +0 -351
  562. vllm_ascend-0.12.0rc1/vllm_ascend/ops/__init__.py +0 -57
  563. vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/experts_selector.py +0 -305
  564. vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/fused_moe.py +0 -472
  565. vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/moe_comm_method.py +0 -317
  566. vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/moe_mlp.py +0 -344
  567. vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/prepare_finalize.py +0 -453
  568. vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/token_dispatcher.py +0 -749
  569. vllm_ascend-0.12.0rc1/vllm_ascend/ops/layernorm.py +0 -122
  570. vllm_ascend-0.12.0rc1/vllm_ascend/ops/linear.py +0 -468
  571. vllm_ascend-0.12.0rc1/vllm_ascend/ops/linear_op.py +0 -683
  572. vllm_ascend-0.12.0rc1/vllm_ascend/ops/register_custom_ops.py +0 -343
  573. vllm_ascend-0.12.0rc1/vllm_ascend/ops/rotary_embedding.py +0 -437
  574. vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/fla/chunk.py +0 -226
  575. vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/fla/sigmoid_gating.py +0 -171
  576. vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/mamba/causal_conv1d.py +0 -1348
  577. vllm_ascend-0.12.0rc1/vllm_ascend/patch/__init__.py +0 -255
  578. vllm_ascend-0.12.0rc1/vllm_ascend/patch/platform/__init__.py +0 -26
  579. vllm_ascend-0.12.0rc1/vllm_ascend/patch/platform/patch_ec_connector.py +0 -32
  580. vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/__init__.py +0 -35
  581. vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_module.py +0 -34
  582. vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_qwen2_5_omni.py +0 -72
  583. vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_qwen2_5_vl.py +0 -175
  584. vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_qwen3_vl.py +0 -85
  585. vllm_ascend-0.12.0rc1/vllm_ascend/platform.py +0 -415
  586. vllm_ascend-0.12.0rc1/vllm_ascend/quantization/quant_config.py +0 -511
  587. vllm_ascend-0.12.0rc1/vllm_ascend/quantization/utils.py +0 -120
  588. vllm_ascend-0.12.0rc1/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +0 -193
  589. vllm_ascend-0.12.0rc1/vllm_ascend/quantization/w4a8_dynamic.py +0 -496
  590. vllm_ascend-0.12.0rc1/vllm_ascend/quantization/w8a8.py +0 -711
  591. vllm_ascend-0.12.0rc1/vllm_ascend/quantization/w8a8_dynamic.py +0 -314
  592. vllm_ascend-0.12.0rc1/vllm_ascend/sample/rejection_sampler.py +0 -849
  593. vllm_ascend-0.12.0rc1/vllm_ascend/sample/sampler.py +0 -102
  594. vllm_ascend-0.12.0rc1/vllm_ascend/spec_decode/eagle_proposer.py +0 -684
  595. vllm_ascend-0.12.0rc1/vllm_ascend/spec_decode/interface.py +0 -54
  596. vllm_ascend-0.12.0rc1/vllm_ascend/spec_decode/mtp_proposer.py +0 -1210
  597. vllm_ascend-0.12.0rc1/vllm_ascend/spec_decode/ngram_proposer.py +0 -72
  598. vllm_ascend-0.12.0rc1/vllm_ascend/spec_decode/suffix_proposer.py +0 -45
  599. vllm_ascend-0.12.0rc1/vllm_ascend/utils.py +0 -1064
  600. vllm_ascend-0.12.0rc1/vllm_ascend/worker/block_table.py +0 -329
  601. vllm_ascend-0.12.0rc1/vllm_ascend/worker/model_runner_v1.py +0 -3522
  602. vllm_ascend-0.12.0rc1/vllm_ascend/worker/npu_input_batch.py +0 -982
  603. vllm_ascend-0.12.0rc1/vllm_ascend/worker/worker_v1.py +0 -478
  604. vllm_ascend-0.12.0rc1/vllm_ascend/xlite/xlite.py +0 -275
  605. vllm_ascend-0.12.0rc1/vllm_ascend/xlite/xlite_worker.py +0 -26
  606. vllm_ascend-0.12.0rc1/vllm_ascend.egg-info/PKG-INFO +0 -146
  607. vllm_ascend-0.12.0rc1/vllm_ascend.egg-info/SOURCES.txt +0 -866
  608. vllm_ascend-0.12.0rc1/vllm_ascend.egg-info/requires.txt +0 -23
  609. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.gemini/config.yaml +0 -0
  610. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/Dockerfile.buildwheel +0 -0
  611. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/Dockerfile.nightly.a2 +0 -0
  612. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/Dockerfile.nightly.a3 +0 -0
  613. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  614. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  615. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  616. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  617. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  618. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  619. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  620. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  621. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  622. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  623. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +0 -0
  624. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  625. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  626. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/actionlint.yaml +0 -0
  627. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/dependabot.yml +0 -0
  628. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/labeler.yml +0 -0
  629. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/workflows/matchers/actionlint.json +0 -0
  630. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.github/workflows/matchers/mypy.json +0 -0
  631. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.gitignore +0 -0
  632. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.gitmodules +0 -0
  633. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/.readthedocs.yaml +0 -0
  634. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/CODE_OF_CONDUCT.md +0 -0
  635. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/CONTRIBUTING.md +0 -0
  636. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/DCO +0 -0
  637. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/LICENSE +0 -0
  638. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/benchmarks/README.md +0 -0
  639. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
  640. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/benchmarks/requirements-bench.txt +0 -0
  641. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  642. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/benchmarks/scripts/perf_result_template.md +0 -0
  643. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
  644. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/benchmarks/tests/latency-tests.json +0 -0
  645. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/benchmarks/tests/serving-tests.json +0 -0
  646. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/benchmarks/tests/throughput-tests.json +0 -0
  647. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/cmake/utils.cmake +0 -0
  648. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/codecov.yml +0 -0
  649. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/collect_env.py +0 -0
  650. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/CMakeLists.txt +0 -0
  651. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/aclnn_torch_adapter/NPUBridge.cpp +0 -0
  652. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/aclnn_torch_adapter/NPUBridge.h +0 -0
  653. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/aclnn_torch_adapter/NPUStorageImpl.cpp +0 -0
  654. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/aclnn_torch_adapter/NPUStorageImpl.h +0 -0
  655. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/aclnn_torch_adapter/op_api_common.h +0 -0
  656. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/batch_matmul_transpose/op_host/batch_matmul_transpose.h +0 -0
  657. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/batch_matmul_transpose/op_host/common.h +0 -0
  658. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/batch_matmul_transpose/op_host/common_tiling.h +0 -0
  659. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp +0 -0
  660. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.h +0 -0
  661. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp +0 -0
  662. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/build.sh +0 -0
  663. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/build_aclnn.sh +0 -0
  664. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/camem_allocator.cpp +0 -0
  665. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/cmake/config.cmake +0 -0
  666. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/cmake/func.cmake +0 -0
  667. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/cmake/intf.cmake +0 -0
  668. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/cmake/intf_pub.cmake +0 -0
  669. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/cmake/modules/Findalog.cmake +0 -0
  670. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/cmake/scripts/prepare.sh +0 -0
  671. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_host/CMakeLists.txt +0 -0
  672. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.cpp +0 -0
  673. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.h +0 -0
  674. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_def.cpp +0 -0
  675. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_proto.cpp +0 -0
  676. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_tiling.cpp +0 -0
  677. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_host/error_log.h +0 -0
  678. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_host/hcom_topo_info.h +0 -0
  679. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_host/tiling_args.h +0 -0
  680. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.cpp +0 -0
  681. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.h +0 -0
  682. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_kernel.hpp +0 -0
  683. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_tiling.h +0 -0
  684. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2_tiling.h +0 -0
  685. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_v2_tiling.h +0 -0
  686. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_common.h +0 -0
  687. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h +0 -0
  688. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_dynamic_quant.h +0 -0
  689. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant.h +0 -0
  690. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant_base.h +0 -0
  691. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_dynamic_quant.h +0 -0
  692. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_out.h +0 -0
  693. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_quant.h +0 -0
  694. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_init_routing_fullload.h +0 -0
  695. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort.h +0 -0
  696. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort_out.h +0 -0
  697. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_base.h +0 -0
  698. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_multi_core.h +0 -0
  699. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_one_core.h +0 -0
  700. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_and_gather.h +0 -0
  701. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_op.h +0 -0
  702. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_with_capacity.h +0 -0
  703. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/tiling_base.h +0 -0
  704. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute.h +0 -0
  705. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute_tiling.h +0 -0
  706. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_row.hpp +0 -0
  707. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_swiglu.hpp +0 -0
  708. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/block_mmad_preload_async_fixpipe_quant.hpp +0 -0
  709. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/const_args.hpp +0 -0
  710. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp +0 -0
  711. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/copy_l0c_to_gm_custom.hpp +0 -0
  712. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/dispatch_policy_custom.hpp +0 -0
  713. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/hccl_shmem.hpp +0 -0
  714. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/layout3d.hpp +0 -0
  715. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/select_helper.hpp +0 -0
  716. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_host/CMakeLists.txt +0 -0
  717. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_proto.cpp +0 -0
  718. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue.h +0 -0
  719. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant.hpp +0 -0
  720. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant_swiglu.h +0 -0
  721. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/dispatch_policy.h +0 -0
  722. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_binary.h +0 -0
  723. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_muls.h +0 -0
  724. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad.h +0 -0
  725. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad_preload_async_with_callback_resident_a.h +0 -0
  726. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/dispatch_policy.h +0 -0
  727. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_multistage_workspace.h +0 -0
  728. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_layout/op_host/CMakeLists.txt +0 -0
  729. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.cpp +0 -0
  730. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.h +0 -0
  731. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_layout/op_host/dispatch_layout.cpp +0 -0
  732. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_layout/op_host/dispatch_layout_tiling.cpp +0 -0
  733. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_layout/op_kernel/dispatch_layout.cpp +0 -0
  734. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_layout/op_kernel/dispatch_layout.h +0 -0
  735. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/dispatch_layout/op_kernel/dispatch_layout_tiling.h +0 -0
  736. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/CMakeLists.txt +0 -0
  737. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
  738. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
  739. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
  740. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
  741. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_def.cpp +0 -0
  742. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_proto.cpp +0 -0
  743. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.cpp +0 -0
  744. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.h +0 -0
  745. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
  746. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
  747. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_split_ws.h +0 -0
  748. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h +0 -0
  749. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  750. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/kernels/math_utils.h +0 -0
  751. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
  752. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/kernels/types.h +0 -0
  753. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/kernels/utils.h +0 -0
  754. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_host/CMakeLists.txt +0 -0
  755. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_host/lightning_indexer_def.cpp +0 -0
  756. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_host/lightning_indexer_proto.cpp +0 -0
  757. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp +0 -0
  758. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_host/lightning_indexer_tiling.h +0 -0
  759. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer.cpp +0 -0
  760. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_common.h +0 -0
  761. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_kernel.h +0 -0
  762. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_service_cube.h +0 -0
  763. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_service_vector.h +0 -0
  764. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_template_tiling_key.h +0 -0
  765. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_vector.h +0 -0
  766. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/CMakeLists.txt +0 -0
  767. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.h +0 -0
  768. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_def.cpp +0 -0
  769. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_proto.cpp +0 -0
  770. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_tiling.cpp +0 -0
  771. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_workspace.h +0 -0
  772. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm.cpp +0 -0
  773. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aic_kernel.h +0 -0
  774. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aiv_kernel.h +0 -0
  775. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_tiling.h +0 -0
  776. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_utils.h +0 -0
  777. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_host/mla_preprocess.h +0 -0
  778. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +0 -0
  779. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/common.h +0 -0
  780. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/common_func.h +0 -0
  781. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/hardware.h +0 -0
  782. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterator.h +0 -0
  783. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +0 -0
  784. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +0 -0
  785. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +0 -0
  786. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +0 -0
  787. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +0 -0
  788. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +0 -0
  789. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +0 -0
  790. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +0 -0
  791. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +0 -0
  792. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +0 -0
  793. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/layout.h +0 -0
  794. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/mem.h +0 -0
  795. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/mma.h +0 -0
  796. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +0 -0
  797. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/simd.h +0 -0
  798. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/kernel/utils.h +0 -0
  799. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess.h +0 -0
  800. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +0 -0
  801. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +0 -0
  802. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_nq.hpp +0 -0
  803. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_qdown.hpp +0 -0
  804. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +0 -0
  805. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_combine_normal/op_host/CMakeLists.txt +0 -0
  806. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.cpp +0 -0
  807. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.h +0 -0
  808. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_combine_normal/op_host/moe_combine_normal.cpp +0 -0
  809. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp +0 -0
  810. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_combine_normal/op_kernel/moe_combine_normal.cpp +0 -0
  811. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_combine_normal/op_kernel/moe_combine_normal.h +0 -0
  812. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_combine_normal/op_kernel/moe_combine_normal_tiling.h +0 -0
  813. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_dispatch_normal/op_host/CMakeLists.txt +0 -0
  814. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_dispatch_normal/op_host/aclnn_moe_dispatch_normal.cpp +0 -0
  815. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_dispatch_normal/op_host/aclnn_moe_dispatch_normal.h +0 -0
  816. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_dispatch_normal/op_host/moe_dispatch_normal.cpp +0 -0
  817. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_dispatch_normal/op_host/moe_dispatch_normal_tiling.cpp +0 -0
  818. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal.cpp +0 -0
  819. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal.h +0 -0
  820. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal_tiling.h +0 -0
  821. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/notify_dispatch/op_host/CMakeLists.txt +0 -0
  822. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.cpp +0 -0
  823. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.h +0 -0
  824. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/notify_dispatch/op_host/notify_dispatch.cpp +0 -0
  825. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/notify_dispatch/op_host/notify_dispatch_tiling.cpp +0 -0
  826. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/notify_dispatch/op_kernel/notify_dispatch.cpp +0 -0
  827. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/notify_dispatch/op_kernel/notify_dispatch.h +0 -0
  828. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/notify_dispatch/op_kernel/notify_dispatch_tiling.h +0 -0
  829. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/ops.h +0 -0
  830. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_host/CMakeLists.txt +0 -0
  831. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_def.cpp +0 -0
  832. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_proto.cpp +0 -0
  833. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp +0 -0
  834. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.h +0 -0
  835. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention.cpp +0 -0
  836. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_common.h +0 -0
  837. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_kernel_mla.h +0 -0
  838. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_cube_mla.h +0 -0
  839. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_vector_mla.h +0 -0
  840. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_template_tiling_key.h +0 -0
  841. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/CMakeLists.txt +0 -0
  842. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/aclnn_util.h +0 -0
  843. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/error/ops_error.h +0 -0
  844. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/fallback.h +0 -0
  845. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/fallback_comm.h +0 -0
  846. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/kernel/comm_args.h +0 -0
  847. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/kernel/data_copy.h +0 -0
  848. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/kernel/dropmask.h +0 -0
  849. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/kernel/pse.h +0 -0
  850. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/kernel/sync_collectives.h +0 -0
  851. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/kernel/util.h +0 -0
  852. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/log/inner/dfx_base.h +0 -0
  853. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/log/ops_log.h +0 -0
  854. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/tiling/data_copy_transpose_tiling.h +0 -0
  855. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/tiling/data_copy_transpose_tiling_def.h +0 -0
  856. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/tiling/tiling_base.h +0 -0
  857. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/tiling/tiling_templates_registry.h +0 -0
  858. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/inc/tiling/tiling_type.h +0 -0
  859. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils/src/fallback_comm.cpp +0 -0
  860. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/csrc/utils.h +0 -0
  861. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/Makefile +0 -0
  862. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/README.md +0 -0
  863. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/requirements-docs.txt +0 -0
  864. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/requirements-test.txt +0 -0
  865. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/assets/deployment.png +0 -0
  866. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/assets/disaggregated_prefill_pull.png +0 -0
  867. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/assets/disaggregated_prefill_push.png +0 -0
  868. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/assets/eplb.png +0 -0
  869. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  870. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/assets/multi_node_dp_kimi.png +0 -0
  871. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/assets/workflow.png +0 -0
  872. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/community/governance.md +0 -0
  873. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/community/user_stories/index.md +0 -0
  874. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/community/user_stories/llamafactory.md +0 -0
  875. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/contribution/index.md +0 -0
  876. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/contribution/multi_node_test.md +0 -0
  877. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/evaluation/using_ais_bench.md +0 -0
  878. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  879. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
  880. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  881. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/feature_guide/ACL_Graph.md +0 -0
  882. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/feature_guide/KV_Cache_Pool_Guide.md +0 -0
  883. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
  884. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/feature_guide/Multi_Token_Prediction.md +0 -0
  885. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/feature_guide/add_custom_aclnn_op.md +0 -0
  886. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/feature_guide/disaggregated_prefill.md +0 -0
  887. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/feature_guide/eplb_swift_balancer.md +0 -0
  888. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/performance_and_debug/index.md +0 -0
  889. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md +0 -0
  890. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md +0 -0
  891. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/installation.md +0 -0
  892. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
  893. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
  894. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
  895. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
  896. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
  897. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
  898. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
  899. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
  900. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
  901. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
  902. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
  903. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
  904. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
  905. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
  906. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
  907. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/index.po +0 -0
  908. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po +0 -0
  909. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po +0 -0
  910. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/profile_execute_duration.po +0 -0
  911. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po +0 -0
  912. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
  913. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
  914. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
  915. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
  916. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
  917. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
  918. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
  919. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
  920. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
  921. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
  922. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
  923. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
  924. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
  925. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
  926. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
  927. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -0
  928. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
  929. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
  930. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
  931. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
  932. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
  933. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
  934. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
  935. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
  936. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
  937. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
  938. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
  939. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
  940. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  941. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  942. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/quick_start.md +0 -0
  943. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/tutorials/310p.md +0 -0
  944. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/tutorials/Kimi-K2-Thinking.md +0 -0
  945. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/tutorials/Qwen-VL-Dense.md +0 -0
  946. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/tutorials/Qwen3-30B-A3B.md +0 -0
  947. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/tutorials/Qwen3-Coder-30B-A3B.md +0 -0
  948. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/tutorials/ray.md +0 -0
  949. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/configuration/env_vars.md +0 -0
  950. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/configuration/index.md +0 -0
  951. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/dynamic_batch.md +0 -0
  952. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +0 -0
  953. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/external_dp.md +0 -0
  954. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
  955. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/images/netloader_flowchart.png +0 -0
  956. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/images/netloader_timing_diagram.png +0 -0
  957. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  958. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/lora.md +0 -0
  959. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/netloader.md +0 -0
  960. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/quantization-llm-compressor.md +0 -0
  961. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/quantization.md +0 -0
  962. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
  963. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  964. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/docs/source/user_guide/support_matrix/index.md +0 -0
  965. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/chat_templates/template_qwen2_audio.jinja +0 -0
  966. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
  967. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/eplb/eplb_deepseek.py +0 -0
  968. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/external_online_dp/README.md +0 -0
  969. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/external_online_dp/launch_online_dp.py +0 -0
  970. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/external_online_dp/run_dp_template.sh +0 -0
  971. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/offline_data_parallel.py +0 -0
  972. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/offline_external_launcher.py +0 -0
  973. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/offline_inference_audio_language.py +0 -0
  974. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/offline_inference_npu.py +0 -0
  975. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/offline_inference_npu_long_seq.py +0 -0
  976. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/offline_inference_npu_tp2.py +0 -0
  977. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/offline_inference_sleep_mode_npu.py +0 -0
  978. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/offline_weight_load.py +0 -0
  979. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/prompt_embed_inference.py +0 -0
  980. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/prompt_embedding_inference.py +0 -0
  981. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/quantization/llm-compressor/w8a8_int8.py +0 -0
  982. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/quantization/llm-compressor/w8a8_int8_dynamic.py +0 -0
  983. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/examples/run_dp_server.sh +0 -0
  984. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/format.sh +0 -0
  985. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/packages.txt +0 -0
  986. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/requirements-lint.txt +0 -0
  987. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/setup.cfg +0 -0
  988. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/__init__.py +0 -0
  989. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
  990. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/__init__.py +0 -0
  991. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/common.sh +0 -0
  992. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
  993. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
  994. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/model_utils.py +0 -0
  995. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml +0 -0
  996. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/InternVL3_5-8B-hf.yaml +0 -0
  997. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/Molmo-7B-D-0924.yaml +0 -0
  998. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +0 -0
  999. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/Qwen2.5-Omni-7B.yaml +0 -0
  1000. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/Qwen3-30B-A3B-W8A8.yaml +0 -0
  1001. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +0 -0
  1002. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/Qwen3-8B-W8A8.yaml +0 -0
  1003. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/Qwen3-8B.yaml +0 -0
  1004. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/Qwen3-Next-80B-A3B-Instruct.yaml +0 -0
  1005. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +0 -0
  1006. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml +0 -0
  1007. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/configs/internlm3-8b-instruct.yaml +0 -0
  1008. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/conftest.py +0 -0
  1009. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/report_template.md +0 -0
  1010. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/models/test_lm_eval_correctness.py +0 -0
  1011. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/multicard/test_chunk_gated_delta_rule.py +0 -0
  1012. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/multicard/test_shared_expert_dp.py +0 -0
  1013. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/multicard/test_single_request_aclgraph.py +0 -0
  1014. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/models/test_qwen3_32b.py +0 -0
  1015. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/multi_node/__init__.py +0 -0
  1016. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/multi_node/config/__init__.py +0 -0
  1017. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3_2-Exp-bf16.yaml +0 -0
  1018. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A22B.yaml +0 -0
  1019. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/multi_node/config/multi_node_config.py +0 -0
  1020. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 +0 -0
  1021. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/__init__.py +0 -0
  1022. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_batch_matmul_transpose.py +0 -0
  1023. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_bgmv_expand.py +0 -0
  1024. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_bgmv_shrink.py +0 -0
  1025. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_dispatch_ffn_combine.py +0 -0
  1026. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_gating_top_k_softmax.py +0 -0
  1027. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py +0 -0
  1028. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_grouped_matmul_swiglu_quant.py +0 -0
  1029. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_matmul_allreduce_add_rmsnorm.py +0 -0
  1030. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_mla_preprocess.py +0 -0
  1031. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_mla_preprocess_nq.py +0 -0
  1032. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_mla_preprocess_qdown.py +0 -0
  1033. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_rotary_embedding.py +0 -0
  1034. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/test_vocabparallelembedding.py +0 -0
  1035. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/triton/__init__.py +0 -0
  1036. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/nightly/ops/triton/test_rope.py +0 -0
  1037. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/prompts/example.txt +0 -0
  1038. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/run_doctests.sh +0 -0
  1039. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/singlecard/__init__.py +0 -0
  1040. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/singlecard/compile/backend.py +0 -0
  1041. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/singlecard/pooling/__init__.py +0 -0
  1042. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/singlecard/test_aclgraph_mem.py +0 -0
  1043. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
  1044. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/utils.py +0 -0
  1045. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/e2e/vllm_interface/vllm_test.cfg +0 -0
  1046. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/__init__.py +0 -0
  1047. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/attention/test_attention_mask.py +0 -0
  1048. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/base.py +0 -0
  1049. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/core/test_scheduler_dynamic_batch.py +0 -0
  1050. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/device_allocator/test_camem.py +0 -0
  1051. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
  1052. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
  1053. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/distributed/mooncake/test_config_data.py +0 -0
  1054. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/distributed/test_communicator.py +0 -0
  1055. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
  1056. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/eplb/adaptor/test_abstract_adaptor.py +0 -0
  1057. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/eplb/core/policy/test_policy_abstract.py +0 -0
  1058. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +0 -0
  1059. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +0 -0
  1060. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/eplb/core/policy/test_policy_factor.py +0 -0
  1061. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +0 -0
  1062. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/eplb/core/test_eplb_utils.py +0 -0
  1063. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/fake_weight/config.json +0 -0
  1064. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +0 -0
  1065. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
  1066. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
  1067. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/model_loader/netloader/test_netloader.py +0 -0
  1068. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/model_loader/netloader/test_netloader_elastic.py +0 -0
  1069. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/model_loader/netloader/test_netloader_load.py +0 -0
  1070. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/model_loader/netloader/test_netloader_utils.py +0 -0
  1071. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/ops/expert_map.json +0 -0
  1072. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/ops/test_comm_utils.py +0 -0
  1073. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/ops/test_expert_load_balancer.py +0 -0
  1074. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/ops/test_mla.py +0 -0
  1075. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/ops/test_vocab_parallel_embedding.py +0 -0
  1076. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
  1077. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
  1078. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/quantization/test_w4a16.py +0 -0
  1079. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/sample/logits_processor/test_builtin.py +0 -0
  1080. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tests/ut/test_envs.py +0 -0
  1081. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tools/actionlint.sh +0 -0
  1082. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tools/check_python_src_init.py +0 -0
  1083. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tools/check_repo.sh +0 -0
  1084. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tools/enforce_regex_import.py +0 -0
  1085. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tools/mooncake_installer.sh +0 -0
  1086. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tools/mypy.sh +0 -0
  1087. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tools/png-lint.sh +0 -0
  1088. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tools/send_mm_request.py +0 -0
  1089. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tools/shellcheck.sh +0 -0
  1090. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/tools/sphinx-lint.sh +0 -0
  1091. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/typos.toml +0 -0
  1092. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/__init__.py +0 -0
  1093. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/_cann_ops_custom/.gitkeep +0 -0
  1094. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/attention/__init__.py +0 -0
  1095. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/attention/attention_mask.py +0 -0
  1096. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/compilation/__init__.py +0 -0
  1097. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/compilation/npugraph_ex_passes/__init__.py +0 -0
  1098. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/compilation/npugraph_ex_passes/add_rms_norm_quant.py +0 -0
  1099. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/compilation/passes/__init__.py +0 -0
  1100. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/core/__init__.py +0 -0
  1101. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/core/recompute_scheduler.py +0 -0
  1102. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/cpu_binding.py +0 -0
  1103. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/device_allocator/__init__.py +0 -0
  1104. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/device_allocator/camem.py +0 -0
  1105. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/communicator.py +0 -0
  1106. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/cpu_offload_connector.py +0 -0
  1107. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/cpu_offload_manager/__init__.py +0 -0
  1108. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +0 -0
  1109. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/cpu_offload_manager/metadata.py +0 -0
  1110. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/device_communicators/__init__.py +0 -0
  1111. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  1112. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  1113. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/kvpool/__init__.py +0 -0
  1114. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/kvpool/backend/__init__.py +0 -0
  1115. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/kvpool/backend/backend.py +0 -0
  1116. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/kvpool/backend/mooncake_backend.py +0 -0
  1117. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/mooncake_layerwise_connector.py +0 -0
  1118. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/distributed/mooncake_transfer_engine.py +0 -0
  1119. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/__init__.py +0 -0
  1120. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/adaptor/__init__.py +0 -0
  1121. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/adaptor/abstract_adaptor.py +0 -0
  1122. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/adaptor/vllm_adaptor.py +0 -0
  1123. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/core/__init__.py +0 -0
  1124. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +0 -0
  1125. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/core/eplb_utils.py +0 -0
  1126. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/core/eplb_worker.py +0 -0
  1127. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/core/policy/__init__.py +0 -0
  1128. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/core/policy/policy_abstract.py +0 -0
  1129. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +0 -0
  1130. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/core/policy/policy_factory.py +0 -0
  1131. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/core/policy/policy_random.py +0 -0
  1132. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/eplb/eplb_updator.py +0 -0
  1133. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/kv_offload/__init__.py +0 -0
  1134. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/kv_offload/cpu_npu.py +0 -0
  1135. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/lora/__init__.py +0 -0
  1136. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/lora/lora_ops.py +0 -0
  1137. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/lora/utils.py +0 -0
  1138. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/meta_registration.py +0 -0
  1139. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/model_loader/__init__.py +0 -0
  1140. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/model_loader/netloader/__init__.py +0 -0
  1141. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/model_loader/netloader/executor/__init__.py +0 -0
  1142. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/model_loader/netloader/executor/elastic_load.py +0 -0
  1143. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/model_loader/netloader/interaction/__init__.py +0 -0
  1144. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/model_loader/netloader/interaction/elastic.py +0 -0
  1145. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/model_loader/netloader/load.py +0 -0
  1146. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/model_loader/netloader/netloader.py +0 -0
  1147. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/model_loader/netloader/utils.py +0 -0
  1148. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/activation.py +0 -0
  1149. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  1150. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/fused_moe/__init__.py +0 -0
  1151. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/fused_moe/comm_utils.py +0 -0
  1152. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/mla.py +0 -0
  1153. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/shared_weight_layer.py +0 -0
  1154. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/__init__.py +0 -0
  1155. {vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/fla → vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/activation}/__init__.py +0 -0
  1156. {vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/mamba → vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/fla}/__init__.py +0 -0
  1157. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/fla/chunk_delta_h.py +0 -0
  1158. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/fla/chunk_o.py +0 -0
  1159. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/fla/chunk_scaled_dot_kkt.py +0 -0
  1160. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/fla/cumsum.py +0 -0
  1161. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/fla/layernorm_guard.py +0 -0
  1162. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/fla/solve_tril.py +0 -0
  1163. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/fla/utils.py +0 -0
  1164. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/fla/wy_fast.py +0 -0
  1165. {vllm_ascend-0.12.0rc1/vllm_ascend/quantization → vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/linearnorm}/__init__.py +0 -0
  1166. {vllm_ascend-0.12.0rc1/vllm_ascend/quantization/compressed_tensors → vllm_ascend-0.13.0rc1/vllm_ascend/ops/triton/mamba}/__init__.py +0 -0
  1167. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/rope.py +0 -0
  1168. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/triton/triton_utils.py +0 -0
  1169. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/vocab_parallel_embedding.py +0 -0
  1170. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/ops/weight_prefetch.py +0 -0
  1171. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/platform/patch_distributed.py +0 -0
  1172. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/platform/patch_mamba_config.py +0 -0
  1173. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/platform/patch_multiproc_executor.py +0 -0
  1174. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/platform/patch_sched_yield.py +0 -0
  1175. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/worker/patch_bert.py +0 -0
  1176. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/worker/patch_deepseek.py +0 -0
  1177. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/worker/patch_distributed.py +0 -0
  1178. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/worker/patch_minicpm.py +0 -0
  1179. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/worker/patch_multimodal_merge.py +0 -0
  1180. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/worker/patch_qwen3_next_mtp.py +0 -0
  1181. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/worker/patch_rope.py +0 -0
  1182. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/worker/patch_triton.py +0 -0
  1183. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/patch/worker/patch_weight_loader.py +0 -0
  1184. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/profiling_config.py +0 -0
  1185. {vllm_ascend-0.12.0rc1/vllm_ascend/sample → vllm_ascend-0.13.0rc1/vllm_ascend/quantization}/__init__.py +0 -0
  1186. {vllm_ascend-0.12.0rc1/vllm_ascend/worker → vllm_ascend-0.13.0rc1/vllm_ascend/quantization/compressed_tensors}/__init__.py +0 -0
  1187. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/quantization/compressed_tensors/compressed_tensors.py +0 -0
  1188. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/quantization/w4a16.py +0 -0
  1189. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/quantization/w8a8_pdmix.py +0 -0
  1190. {vllm_ascend-0.12.0rc1/vllm_ascend/xlite → vllm_ascend-0.13.0rc1/vllm_ascend/sample}/__init__.py +0 -0
  1191. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/sample/logits_processor/__init__.py +0 -0
  1192. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/sample/logits_processor/builtin.py +0 -0
  1193. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/spec_decode/__init__.py +0 -0
  1194. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend/xlite/xlite_model_runner.py +0 -0
  1195. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  1196. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
  1197. {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -0,0 +1,299 @@
1
+ name: 'e2e nightly test multi_node'
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ soc_version:
7
+ required: true
8
+ type: string
9
+ description: use a2 or a3
10
+ runner:
11
+ required: false
12
+ type: string
13
+ default: linux-aarch64-a3-0
14
+ image:
15
+ required: false
16
+ type: string
17
+ description: base image for pods
18
+ default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
19
+ config_file_path:
20
+ required: true
21
+ type: string
22
+ description: the model config for multi_node test
23
+ replicas:
24
+ required: false
25
+ default: "1"
26
+ type: string
27
+ description: replicas of the k8s cluster
28
+ size:
29
+ required: false
30
+ default: "2"
31
+ type: string
32
+ description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
33
+ vllm_version:
34
+ required: false
35
+ default: "v0.13.0"
36
+ type: string
37
+ description: vllm version to use
38
+ vllm_ascend_remote_url:
39
+ required: false
40
+ default: https://github.com/vllm-project/vllm-ascend.git
41
+ type: string
42
+ description: used for pr level tests
43
+ vllm_ascend_ref:
44
+ required: false
45
+ default: main
46
+ type: string
47
+ description: used for pr level tests
48
+ secrets:
49
+ KUBECONFIG_B64:
50
+ required: true
51
+
52
+
53
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
54
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
55
+ # It's used to activate ascend-toolkit environment variables.
56
+ defaults:
57
+ run:
58
+ shell: bash -el {0}
59
+
60
+ # only cancel in-progress runs of the same workflow
61
+ # and ignore the lint / 8 cards test type
62
+ concurrency:
63
+ group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.soc_version }}
64
+ cancel-in-progress: true
65
+
66
+ jobs:
67
+ e2e:
68
+ name: ${{ inputs.config_file_path }}
69
+ # This is the runner with no NPU for k8s controller
70
+ runs-on: ${{ inputs.runner }}
71
+ container:
72
+ image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
73
+ env:
74
+ KUBECONFIG: /tmp/kubeconfig
75
+ KUBECTL: /root/.cache/.kube/kubectl
76
+ NAMESPACE: vllm-project
77
+ LEADER_POD: vllm-0
78
+ RESULT_FILE: /root/.cache/tests/ret_${{ inputs.soc_version }}
79
+ steps:
80
+ - name: Install system denpendencies
81
+ run: |
82
+ # configure apt and pip source
83
+ sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
84
+ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
85
+ pip install jinja2-cli
86
+
87
+ - name: Install kubectl
88
+ run: |
89
+ # Install kubectl
90
+ arch=$(uname -m)
91
+
92
+ if echo "$arch" | grep -qiE "arm|aarch64"; then
93
+ echo "Detected ARM architecture: $arch"
94
+ KUBECTL="$KUBECTL"_arm
95
+ fi
96
+ install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
97
+
98
+ # Verify kubectl installation
99
+ kubectl version --client=true
100
+
101
+ - name: Decode kubeconfig from secrets
102
+ run: |
103
+ # Decode and save kubeconfig
104
+ echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
105
+
106
+ - name: Checkout code
107
+ uses: actions/checkout@v6
108
+
109
+ - name: Prepare scripts
110
+ run: |
111
+ # prepare for lws entrypoint scripts
112
+ install -D tests/e2e/nightly/multi_node/scripts/run.sh /root/.cache/tests/run.sh
113
+ # clear log directory
114
+ rm -fr $RESULT_FILE
115
+
116
+ - name: Clear resources
117
+ run: |
118
+ set -euo pipefail
119
+
120
+ CRD_NAME="${CRD_NAME:-vllm}"
121
+ TIMEOUT=${TIMEOUT:-120}
122
+ SLEEP_INTERVAL=2
123
+
124
+ echo "Deleting leaderworkerset [$CRD_NAME] in namespace [$NAMESPACE]..."
125
+ kubectl delete leaderworkerset "$CRD_NAME" -n "$NAMESPACE" --ignore-not-found
126
+
127
+ echo "Waiting for all pods starting with 'vllm' to be deleted..."
128
+ START_TIME=$(date +%s)
129
+
130
+ while true; do
131
+ NOW=$(date +%s)
132
+ ELAPSED=$((NOW - START_TIME))
133
+
134
+ if [[ $ELAPSED -ge $TIMEOUT ]]; then
135
+ echo "Timeout reached ($TIMEOUT seconds), some pods still exist:"
136
+ kubectl get pods -n "$NAMESPACE" | grep '^vllm' || true
137
+ exit 1
138
+ fi
139
+
140
+ PODS_EXIST=$(kubectl get pods -n "$NAMESPACE" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null | tr ' ' '\n' | grep '^vllm' || true)
141
+
142
+ if [[ -z "$PODS_EXIST" ]]; then
143
+ echo "All vllm pods deleted."
144
+ break
145
+ else
146
+ echo "Waiting for pods to be deleted: $PODS_EXIST"
147
+ sleep $SLEEP_INTERVAL
148
+ fi
149
+ done
150
+
151
+ - name: Launch cluster
152
+ id: launcher
153
+ run: |
154
+ set -e
155
+
156
+ size="${{ inputs.size }}"
157
+ replicas="${{ inputs.replicas }}"
158
+ image="${{ inputs.image }}"
159
+ config_file_path="${{ inputs.config_file_path }}"
160
+ vllm_version="${{ inputs.vllm_version }}"
161
+ vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}"
162
+ vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}"
163
+ result_file_path="$RESULT_FILE"
164
+ fail_tag=FAIL_TAG_"${{ inputs.config_file_path }}"
165
+ echo "FAIL_TAG=${fail_tag}" >> $GITHUB_ENV
166
+
167
+ required_params=("size" "replicas" "image" "config_file_path")
168
+ for param in "${required_params[@]}"; do
169
+ if [ -z "${!param}" ]; then
170
+ echo "Error: Parameter '$param' is required but empty"
171
+ exit 1
172
+ fi
173
+ done
174
+
175
+ if [ "${{ inputs.soc_version }}" = "a3" ]; then
176
+ npu_per_node=16
177
+ else
178
+ npu_per_node=8
179
+ fi
180
+
181
+ jinja2 tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 \
182
+ -D size="$size" \
183
+ -D replicas="$replicas" \
184
+ -D image="$image" \
185
+ -D config_file_path="$config_file_path" \
186
+ -D vllm_version="$vllm_version" \
187
+ -D vllm_ascend_remote_url="$vllm_ascend_remote_url" \
188
+ -D vllm_ascend_ref="$vllm_ascend_ref" \
189
+ -D result_file_path="$result_file_path" \
190
+ -D npu_per_node="$npu_per_node" \
191
+ -D fail_tag="$fail_tag" \
192
+ --outfile lws.yaml
193
+
194
+ kubectl apply -f ./lws.yaml
195
+
196
+ - name: Waiting for pod ready
197
+ run: |
198
+ POD_PREFIX="${POD_PREFIX:-vllm-0}"
199
+ SIZE="${{ inputs.size }}"
200
+ TIMEOUT=1200 # default timeout 20 minutes
201
+
202
+ echo "Waiting for Pods in namespace [$NAMESPACE] to become Running and Ready (timeout ${TIMEOUT}s)..."
203
+
204
+ START_TIME=$(date +%s)
205
+
206
+ while true; do
207
+ NOW=$(date +%s)
208
+ ELAPSED=$((NOW - START_TIME))
209
+ if [[ $ELAPSED -ge $TIMEOUT ]]; then
210
+ echo "Timeout reached after ${ELAPSED}s"
211
+ echo "Dumping pod status for debugging:"
212
+ kubectl get pods -n "$NAMESPACE"
213
+ kubectl describe pod "$LEADER_POD" -n "$NAMESPACE"
214
+ exit 1
215
+ fi
216
+
217
+ # 1) check follower pods
218
+ ALL_FOLLOWERS_READY=true
219
+ for ((i=1; i<SIZE; i++)); do
220
+ POD="${POD_PREFIX}-${i}"
221
+ PHASE=$(kubectl get pod "$POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
222
+ READY=$(kubectl get pod "$POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}' 2>/dev/null)
223
+
224
+ echo "Follower [$POD] phase=$PHASE ready=$READY"
225
+
226
+ if [[ "$PHASE" != "Running" || "$READY" != "true" ]]; then
227
+ echo "Follower [$POD] not Ready yet..."
228
+ ALL_FOLLOWERS_READY=false
229
+ break
230
+ fi
231
+ done
232
+
233
+ # 2) check leader pod
234
+ LEADER_PHASE=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
235
+ LEADER_READY=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}' 2>/dev/null)
236
+
237
+ echo "Leader [$LEADER_POD] phase=$LEADER_PHASE ready=$LEADER_READY"
238
+
239
+ if [[ "$LEADER_PHASE" != "Running" || "$LEADER_READY" != "true" ]]; then
240
+ echo "Leader not Ready yet..."
241
+ ALL_FOLLOWERS_READY=false
242
+ fi
243
+
244
+ if [[ "$ALL_FOLLOWERS_READY" == "true" ]]; then
245
+ echo "All follower pods and leader pod are Running and Ready — continuing."
246
+ break
247
+ fi
248
+
249
+ sleep 2
250
+ done
251
+
252
+ - name: Stream logs
253
+ run: |
254
+ set -euo pipefail
255
+
256
+ size="${{ inputs.size }}"
257
+ pids=()
258
+
259
+ cleanup() {
260
+ echo "Cleaning up background log streams..."
261
+ for pid in "${pids[@]}"; do
262
+ kill "$pid" 2>/dev/null || true
263
+ done
264
+ }
265
+ trap cleanup EXIT
266
+
267
+ for i in $(seq 1 $((size - 1))); do
268
+ POD="vllm-0-${i}"
269
+
270
+ echo "==== Collecting logs from worker pod: $POD ===="
271
+ kubectl logs -f "$POD" -n "$NAMESPACE" \
272
+ > "/tmp/${POD}_logs.txt" 2>&1 &
273
+
274
+ pids+=($!)
275
+ done
276
+
277
+ echo "==== Streaming logs from leader pod: $LEADER_POD ===="
278
+ echo "Looking for logs containing: $FAIL_TAG"
279
+
280
+ kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" | while IFS= read -r line; do
281
+ echo "$line"
282
+ if echo "$line" | grep -q "$FAIL_TAG"; then
283
+ exit 1
284
+ fi
285
+ done
286
+
287
+ - name: Upload logs
288
+ if: always()
289
+ uses: actions/upload-artifact@v4
290
+ with:
291
+ name: ${{ inputs.config_file_path }}-pod-logs
292
+ path: /tmp/vllm*_logs.txt
293
+ retention-days: 7
294
+
295
+ - name: Post process
296
+ if: always()
297
+ run: |
298
+ kubectl get pods -n $NAMESPACE --ignore-not-found=true
299
+ kubectl delete -f ./lws.yaml --ignore-not-found=true || true
@@ -0,0 +1,150 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ name: 'e2e nightly test'
19
+
20
+ on:
21
+ workflow_call:
22
+ inputs:
23
+ vllm:
24
+ required: true
25
+ type: string
26
+ runner:
27
+ required: true
28
+ type: string
29
+ image:
30
+ required: false
31
+ type: string
32
+ default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
33
+ tests:
34
+ required: true
35
+ type: string
36
+ name:
37
+ required: false
38
+ type: string
39
+
40
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
41
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
42
+ # It's used to activate ascend-toolkit environment variables.
43
+ defaults:
44
+ run:
45
+ shell: bash -el {0}
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ # and ignore the lint / 1 card / 4 cards test type
49
+ concurrency:
50
+ group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.tests }}
51
+ cancel-in-progress: true
52
+
53
+ jobs:
54
+ e2e-nightly:
55
+ name: ${{ inputs.tests }}
56
+ runs-on: ${{ inputs.runner }}
57
+ timeout-minutes: 600
58
+ container:
59
+ image: ${{ inputs.image }}
60
+ env:
61
+ TRANSFORMERS_OFFLINE: 1
62
+ VLLM_USE_MODELSCOPE: True
63
+ steps:
64
+ - name: Check npu and CANN info
65
+ run: |
66
+ npu-smi info
67
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
68
+
69
+ - name: Show vLLM and vLLM-Ascend version
70
+ working-directory: /vllm-workspace
71
+ run: |
72
+ echo "Installed vLLM-related Python packages:"
73
+ pip list | grep vllm || echo "No vllm packages found."
74
+
75
+ echo ""
76
+ echo "============================"
77
+ echo "vLLM Git information"
78
+ echo "============================"
79
+ cd vllm
80
+ if [ -d .git ]; then
81
+ echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
82
+ echo "Commit hash: $(git rev-parse HEAD)"
83
+ echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
84
+ echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
85
+ echo "Message: $(git log -1 --pretty=format:'%s')"
86
+ echo "Tags: $(git tag --points-at HEAD || echo 'None')"
87
+ echo "Remote: $(git remote -v | head -n1)"
88
+ echo ""
89
+ else
90
+ echo "No .git directory found in vllm"
91
+ fi
92
+ cd ..
93
+
94
+ echo ""
95
+ echo "============================"
96
+ echo "vLLM-Ascend Git information"
97
+ echo "============================"
98
+ cd vllm-ascend
99
+ if [ -d .git ]; then
100
+ echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
101
+ echo "Commit hash: $(git rev-parse HEAD)"
102
+ echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
103
+ echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
104
+ echo "Message: $(git log -1 --pretty=format:'%s')"
105
+ echo "Tags: $(git tag --points-at HEAD || echo 'None')"
106
+ echo "Remote: $(git remote -v | head -n1)"
107
+ echo ""
108
+ else
109
+ echo "No .git directory found in vllm-ascend"
110
+ fi
111
+ cd ..
112
+
113
+ - name: Install custom-ops (for DeepSeek-V3.2-Exp)
114
+ if: ${{ inputs.name == 'deepseek3_2-exp-w8a8' }}
115
+ shell: bash -l {0}
116
+ run: |
117
+ wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run
118
+ chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run
119
+ ./CANN-custom_ops-sfa-linux.aarch64.run --quiet
120
+ export ASCEND_CUSTOM_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize:${ASCEND_CUSTOM_OPP_PATH}
121
+ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH}
122
+ wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl
123
+ pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl
124
+ . /usr/local/Ascend/ascend-toolkit/set_env.sh
125
+
126
+ - name: Install triton-ascend
127
+ if: ${{ inputs.name == 'test_custom_op' }}
128
+ shell: bash -l {0}
129
+ run: |
130
+ . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
131
+ wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl
132
+ pip install triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl
133
+
134
+ - name: Run vllm-project/vllm-ascend test
135
+ env:
136
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
137
+ VLLM_USE_MODELSCOPE: True
138
+ VLLM_CI_RUNNER: ${{ inputs.runner }}
139
+ BENCHMARK_HOME: /vllm-workspace/vllm-ascend/benchmark
140
+ working-directory: /vllm-workspace/vllm-ascend
141
+ run: |
142
+ # ignore test_dispatch_ffn_combine until the test is fixed
143
+ pytest -sv ${{ inputs.tests }} \
144
+ --ignore=tests/e2e/nightly/ops/test_dispatch_ffn_combine.py \
145
+ --ignore=tests/e2e/nightly/ops/test_fused_moe.py \
146
+ --ignore=tests/e2e/nightly/ops/test_rotary_embedding.py \
147
+ --ignore=tests/e2e/nightly/ops/test_matmul_allreduce_add_rmsnorm.py
148
+
149
+
150
+
@@ -0,0 +1,232 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ name: 'e2e nightly models test'
19
+
20
+ on:
21
+ workflow_call:
22
+ inputs:
23
+ vllm:
24
+ required: true
25
+ type: string
26
+ vllm-ascend:
27
+ required: false
28
+ type: string
29
+ default: main
30
+ runner:
31
+ required: true
32
+ type: string
33
+ image:
34
+ required: true
35
+ type: string
36
+ model_list:
37
+ required: true
38
+ type: string
39
+ upload:
40
+ required: false
41
+ type: boolean
42
+ default: false
43
+
44
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
45
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
46
+ # It's used to activate ascend-toolkit environment variables.
47
+ defaults:
48
+ run:
49
+ shell: bash -el {0}
50
+
51
+ # only cancel in-progress runs of the same workflow
52
+ # and ignore the lint / 1 card / 2 cards / 4 cards test type
53
+ concurrency:
54
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.runner }}-${{inputs.model_list}}
55
+ cancel-in-progress: true
56
+
57
+ jobs:
58
+ e2e-nightly:
59
+ name: ${{inputs.model_list}} accuracy test
60
+ runs-on: ${{ inputs.runner }}
61
+ container:
62
+ image: "${{ inputs.image }}"
63
+ env:
64
+ VLLM_USE_MODELSCOPE: True
65
+ GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
66
+ steps:
67
+ - name: Check npu and CANN info
68
+ run: |
69
+ npu-smi info
70
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
71
+
72
+ - name: Config mirrors
73
+ run: |
74
+ sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
75
+ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
76
+ apt-get update -y
77
+ apt install git -y
78
+ git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
79
+
80
+ - name: Checkout vllm-project/vllm-ascend repo
81
+ uses: actions/checkout@v6
82
+
83
+ - name: Install system dependencies
84
+ run: |
85
+ apt-get -y install `cat packages.txt`
86
+ apt-get -y install gcc g++ cmake libnuma-dev
87
+
88
+ - name: Checkout vllm-project/vllm repo
89
+ uses: actions/checkout@v6
90
+ with:
91
+ repository: vllm-project/vllm
92
+ ref: ${{ inputs.vllm }}
93
+ path: ./vllm-empty
94
+
95
+ - name: Install vllm-project/vllm from source
96
+ working-directory: ./vllm-empty
97
+ run: |
98
+ VLLM_TARGET_DEVICE=empty pip install -e .
99
+
100
+ - name: Install vllm-project/vllm-ascend
101
+ env:
102
+ PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
103
+ run: |
104
+ pip install -r requirements-dev.txt
105
+ pip install -v -e .
106
+
107
+ - name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
108
+ if: ${{ inputs.runner == 'linux-aarch64-a2-4' && contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
109
+ shell: bash -l {0}
110
+ run: |
111
+ . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
112
+ python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
113
+
114
+ - name: Install tensorflow (for Molmo-7B-D-0924)
115
+ if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }}
116
+ shell: bash -l {0}
117
+ run: |
118
+ pip install tensorflow --no-cache-dir
119
+
120
+ - name: Resolve vllm-ascend version
121
+ run: |
122
+ VERSION_INPUT="${{ inputs.vllm-ascend }}"
123
+
124
+ if [[ "$VERSION_INPUT" == "latest" ]]; then
125
+ TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
126
+ LATEST_TAG=$(echo "$TAGS" | head -n1)
127
+ if [[ -z "$LATEST_TAG" ]]; then
128
+ RESOLVED_VERSION="main"
129
+ else
130
+ RESOLVED_VERSION="$LATEST_TAG"
131
+ fi
132
+ else
133
+ RESOLVED_VERSION="$VERSION_INPUT"
134
+ fi
135
+ echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
136
+
137
+ - name: Checkout vllm-project/vllm-ascend repo
138
+ uses: actions/checkout@v6
139
+ with:
140
+ repository: vllm-project/vllm-ascend
141
+ path: ./vllm-ascend
142
+ ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
143
+
144
+ - name: Get vLLM commit hash and URL
145
+ working-directory: ./vllm-empty
146
+ run: |
147
+ VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
148
+ echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
149
+
150
+ - name: Get vLLM-Ascend commit hash and URL
151
+ working-directory: ./vllm-ascend
152
+ run: |
153
+ VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
154
+ echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
155
+
156
+ - name: Collect version info
157
+ run: |
158
+ for dir in /usr/local/Ascend/ascend-toolkit/*; do
159
+ dname=$(basename "$dir")
160
+ if [ "$dname" != "latest" ]; then
161
+ TOOLKIT_DIR="$dname"
162
+ break
163
+ fi
164
+ done
165
+ INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
166
+ GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
167
+ | head -n1 \
168
+ | cut -d'=' -f2 \
169
+ | tr -d '"')
170
+ {
171
+ echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
172
+ pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
173
+ pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
174
+ pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
175
+ } >> "$GITHUB_ENV"
176
+
177
+ - name: Run vllm-project/vllm-ascend accuracy test
178
+ id: report
179
+ env:
180
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
181
+ HF_DATASETS_OFFLINE: True
182
+ VLLM_USE_MODELSCOPE: True
183
+ VLLM_CI_RUNNER: ${{ inputs.runner }}
184
+ VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
185
+ VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
186
+ VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
187
+ VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
188
+ CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
189
+ TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
190
+ TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
191
+ run: |
192
+ mkdir -p ./benchmarks/accuracy
193
+ echo "Received model_list: ${{ inputs.model_list }}"
194
+ models=$(echo '${{ inputs.model_list }}' | jq -r '.[]')
195
+ any_failure=0
196
+ for model in $models; do
197
+ echo "Running test for model: $model"
198
+ pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
199
+ --config "./tests/e2e/models/configs/${model}.yaml" || {
200
+ echo "Test failed for model: $model"
201
+ any_failure=1
202
+ }
203
+ done
204
+
205
+ if [ $any_failure -ne 0 ]; then
206
+ exit 1
207
+ fi
208
+
209
+ - name: Generate step summary
210
+ if: ${{ always() }}
211
+ run: |
212
+ models=$(echo '${{ inputs.model_list }}' | jq -r '.[]')
213
+ for model in $models; do
214
+ echo "Processing model: $model"
215
+ model_base_name=$(basename "$model")
216
+ cat ./benchmarks/accuracy/${model_base_name}.md >> $GITHUB_STEP_SUMMARY
217
+ done
218
+
219
+ - name: Set artifact timestamp
220
+ id: ts
221
+ run: |
222
+ echo "artifact_ts=$(date -u +%Y%m%dT%H%M%SZ)" >> $GITHUB_OUTPUT
223
+
224
+ - name: Upload Report
225
+ if: ${{ inputs.upload == true }}
226
+ uses: actions/upload-artifact@v6
227
+ with:
228
+ name: report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.ts.outputs.artifact_ts }}
229
+ path: ./benchmarks/accuracy/
230
+ if-no-files-found: warn
231
+ retention-days: 90
232
+ overwrite: true