vllm-ascend 0.13.0rc2__tar.gz → 0.14.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1670) hide show
  1. vllm_ascend-0.14.0rc1/.github/CODEOWNERS +65 -0
  2. vllm_ascend-0.14.0rc1/.github/Dockerfile.buildwheel +45 -0
  3. vllm_ascend-0.14.0rc1/.github/ISSUE_TEMPLATE/110-user-story.yml +37 -0
  4. vllm_ascend-0.14.0rc1/.github/ISSUE_TEMPLATE/600-new-model.yml +33 -0
  5. vllm_ascend-0.14.0rc1/.github/actionlint.yaml +27 -0
  6. vllm_ascend-0.14.0rc1/.github/workflows/_e2e_nightly_multi_node.yaml +268 -0
  7. vllm_ascend-0.14.0rc1/.github/workflows/_e2e_nightly_single_node.yaml +130 -0
  8. vllm_ascend-0.14.0rc1/.github/workflows/_e2e_nightly_single_node_models.yaml +228 -0
  9. vllm_ascend-0.14.0rc1/.github/workflows/_e2e_test.yaml +430 -0
  10. vllm_ascend-0.14.0rc1/.github/workflows/_pre_commit.yml +76 -0
  11. vllm_ascend-0.14.0rc1/.github/workflows/_schedule_image_build.yaml +181 -0
  12. vllm_ascend-0.14.0rc1/.github/workflows/_unit_test.yaml +85 -0
  13. vllm_ascend-0.14.0rc1/.github/workflows/bot_pr_create.yaml +113 -0
  14. vllm_ascend-0.14.0rc1/.github/workflows/dockerfiles/Dockerfile.lint +46 -0
  15. vllm_ascend-0.14.0rc1/.github/workflows/labled_doctest.yaml +88 -0
  16. vllm_ascend-0.14.0rc1/.github/workflows/labled_download_model.yaml +84 -0
  17. vllm_ascend-0.14.0rc1/.github/workflows/matchers/markdownlint.json +17 -0
  18. vllm_ascend-0.14.0rc1/.github/workflows/misc/model_list.json +237 -0
  19. vllm_ascend-0.14.0rc1/.github/workflows/nightly_test_a2.yaml +144 -0
  20. vllm_ascend-0.14.0rc1/.github/workflows/nightly_test_a3.yaml +187 -0
  21. vllm_ascend-0.14.0rc1/.github/workflows/pr_close_cancel_job.yaml +46 -0
  22. vllm_ascend-0.14.0rc1/.github/workflows/pr_test_full.yaml +87 -0
  23. vllm_ascend-0.14.0rc1/.github/workflows/pr_test_light.yaml +111 -0
  24. vllm_ascend-0.14.0rc1/.github/workflows/schedule_codecov_refresh.yaml +42 -0
  25. vllm_ascend-0.14.0rc1/.github/workflows/schedule_image_build_and_push.yaml +60 -0
  26. vllm_ascend-0.14.0rc1/.github/workflows/schedule_lint_image_build.yaml +67 -0
  27. vllm_ascend-0.14.0rc1/.github/workflows/schedule_release_code_and_wheel.yml +151 -0
  28. vllm_ascend-0.14.0rc1/.github/workflows/schedule_test_benchmarks.yaml +203 -0
  29. vllm_ascend-0.14.0rc1/.github/workflows/schedule_test_vllm_main.yaml +40 -0
  30. vllm_ascend-0.14.0rc1/.markdownlint.yaml +15 -0
  31. vllm_ascend-0.14.0rc1/.pre-commit-config.yaml +91 -0
  32. vllm_ascend-0.14.0rc1/CMakeLists.txt +143 -0
  33. vllm_ascend-0.14.0rc1/Dockerfile +81 -0
  34. vllm_ascend-0.14.0rc1/Dockerfile.310p +65 -0
  35. vllm_ascend-0.14.0rc1/Dockerfile.310p.openEuler +61 -0
  36. vllm_ascend-0.14.0rc1/Dockerfile.a3 +80 -0
  37. vllm_ascend-0.14.0rc1/Dockerfile.a3.openEuler +80 -0
  38. vllm_ascend-0.14.0rc1/Dockerfile.openEuler +80 -0
  39. vllm_ascend-0.14.0rc1/PKG-INFO +164 -0
  40. vllm_ascend-0.14.0rc1/README.md +106 -0
  41. vllm_ascend-0.14.0rc1/README.zh.md +100 -0
  42. vllm_ascend-0.14.0rc1/benchmarks/README.md +186 -0
  43. vllm_ascend-0.14.0rc1/benchmarks/ops/ben_vocabparallelembedding.py +148 -0
  44. vllm_ascend-0.14.0rc1/benchmarks/scripts/convert_json_to_markdown.py +166 -0
  45. vllm_ascend-0.14.0rc1/benchmarks/scripts/perf_result_template.md +32 -0
  46. vllm_ascend-0.14.0rc1/cmake/utils.cmake +26 -0
  47. vllm_ascend-0.14.0rc1/collect_env.py +474 -0
  48. vllm_ascend-0.14.0rc1/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.cpp +443 -0
  49. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_host/CMakeLists.txt +41 -0
  50. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_host/aclnn_apply_top_k_top_p_custom.cpp +213 -0
  51. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_host/aclnn_apply_top_k_top_p_custom.h +54 -0
  52. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_host/apply_top_k_top_p_custom.cpp +46 -0
  53. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_host/apply_top_k_top_p_custom.h +24 -0
  54. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_host/apply_top_k_top_p_custom_def.cpp +100 -0
  55. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_host/apply_top_k_top_p_custom_tiling.cpp +314 -0
  56. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_host/apply_top_k_top_p_custom_tiling.h +51 -0
  57. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_host/error_log.h +71 -0
  58. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_host/sort.h +26 -0
  59. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_kernel/apply_top_k_top_p_custom.cpp +42 -0
  60. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_kernel/apply_top_k_top_p_custom.h +719 -0
  61. vllm_ascend-0.14.0rc1/csrc/apply_top_k_top_p_custom/op_kernel/apply_top_p_custom.h +468 -0
  62. vllm_ascend-0.14.0rc1/csrc/build_aclnn.sh +101 -0
  63. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h +310 -0
  64. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp +40 -0
  65. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_host/CMakeLists.txt +66 -0
  66. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_host/aclnn_dispatch_ffn_combine_bf16.cpp +84 -0
  67. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_host/aclnn_dispatch_ffn_combine_bf16.h +39 -0
  68. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_host/dispatch_ffn_combine_bf16_def.cpp +88 -0
  69. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_host/dispatch_ffn_combine_bf16_proto.cpp +40 -0
  70. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_host/dispatch_ffn_combine_bf16_tiling.cpp +278 -0
  71. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_host/error_log.h +47 -0
  72. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_host/hcom_topo_info.h +72 -0
  73. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_host/tiling_args.h +9 -0
  74. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/dispatch_ffn_combine_bf16.cpp +51 -0
  75. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/dispatch_ffn_combine_bf16.h +289 -0
  76. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/dispatch_ffn_combine_bf16_kernel.hpp +1056 -0
  77. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/dispatch_ffn_combine_bf16_tiling.h +56 -0
  78. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_init_routing_v2.cpp +125 -0
  79. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_init_routing_v2_tiling.h +557 -0
  80. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_common.h +201 -0
  81. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_expert_token_out.h +380 -0
  82. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_gather_out.h +198 -0
  83. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_init_routing_fullload.h +388 -0
  84. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_mrgsort.h +211 -0
  85. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_mrgsort_out.h +245 -0
  86. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_sort_base.h +74 -0
  87. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_sort_multi_core.h +507 -0
  88. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_sort_one_core.h +226 -0
  89. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_src_to_dst_op.h +173 -0
  90. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_src_to_dst_op_simt.h +96 -0
  91. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_src_to_dst_with_capacity.h +279 -0
  92. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/tiling_base.h +66 -0
  93. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/unpermute/moe_token_unpermute.h +376 -0
  94. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/unpermute/moe_token_unpermute_tiling.h +38 -0
  95. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/block_epilogue_pertoken_row.hpp +208 -0
  96. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/block_epilogue_pertoken_swiglu.hpp +402 -0
  97. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/block_epilogue_pertoken_v2.hpp +330 -0
  98. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/block_mmad_preload_async_fixpipe_quant.hpp +502 -0
  99. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/const_args.hpp +9 -0
  100. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/copy_gm_to_l1_custom.hpp +40 -0
  101. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/copy_l0c_to_gm_custom.hpp +47 -0
  102. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/dispatch_policy_custom.hpp +53 -0
  103. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/get_tensor_addr.hpp +16 -0
  104. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/hccl_shmem.hpp +195 -0
  105. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/layout3d.hpp +20 -0
  106. vllm_ascend-0.14.0rc1/csrc/dispatch_ffn_combine_bf16/op_kernel/utils/select_helper.hpp +25 -0
  107. vllm_ascend-0.14.0rc1/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_def.cpp +123 -0
  108. vllm_ascend-0.14.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant.hpp +429 -0
  109. vllm_ascend-0.14.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant_swiglu.h +330 -0
  110. vllm_ascend-0.14.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_multistage_workspace.h +383 -0
  111. vllm_ascend-0.14.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_swiglu_quant_multistage_workspace.h +2060 -0
  112. vllm_ascend-0.14.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.cpp +36 -0
  113. vllm_ascend-0.14.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.h +439 -0
  114. vllm_ascend-0.14.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_base.h +21 -0
  115. vllm_ascend-0.14.0rc1/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.h +50 -0
  116. vllm_ascend-0.14.0rc1/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +498 -0
  117. vllm_ascend-0.14.0rc1/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp +694 -0
  118. vllm_ascend-0.14.0rc1/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.h +62 -0
  119. vllm_ascend-0.14.0rc1/csrc/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp +546 -0
  120. vllm_ascend-0.14.0rc1/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.h +66 -0
  121. vllm_ascend-0.14.0rc1/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.cpp +573 -0
  122. vllm_ascend-0.14.0rc1/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_arch35.cpp +521 -0
  123. vllm_ascend-0.14.0rc1/csrc/moe_gating_top_k/tiling_base/tiling_key.h +63 -0
  124. vllm_ascend-0.14.0rc1/csrc/moe_gating_top_k/tiling_base/tiling_type.h +139 -0
  125. vllm_ascend-0.14.0rc1/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.cpp +1267 -0
  126. vllm_ascend-0.14.0rc1/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_dynamic_quant.h +300 -0
  127. vllm_ascend-0.14.0rc1/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_static_quant.h +229 -0
  128. vllm_ascend-0.14.0rc1/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_unquantized.h +224 -0
  129. vllm_ascend-0.14.0rc1/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.h +61 -0
  130. vllm_ascend-0.14.0rc1/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp +1845 -0
  131. vllm_ascend-0.14.0rc1/csrc/torch_binding.cpp +1521 -0
  132. vllm_ascend-0.14.0rc1/csrc/utils/inc/tiling/tiling_type.h +136 -0
  133. vllm_ascend-0.14.0rc1/docs/README.md +25 -0
  134. vllm_ascend-0.14.0rc1/docs/source/_templates/sections/header.html +58 -0
  135. vllm_ascend-0.14.0rc1/docs/source/assets/cp/blocktable.png +0 -0
  136. vllm_ascend-0.14.0rc1/docs/source/assets/cp/device_world.png +0 -0
  137. vllm_ascend-0.14.0rc1/docs/source/assets/cp/pcp-prefill.png +0 -0
  138. vllm_ascend-0.14.0rc1/docs/source/community/contributors.md +291 -0
  139. vllm_ascend-0.14.0rc1/docs/source/community/governance.md +51 -0
  140. vllm_ascend-0.14.0rc1/docs/source/community/versioning_policy.md +169 -0
  141. vllm_ascend-0.14.0rc1/docs/source/conf.py +144 -0
  142. vllm_ascend-0.14.0rc1/docs/source/developer_guide/contribution/index.md +113 -0
  143. vllm_ascend-0.14.0rc1/docs/source/developer_guide/contribution/multi_node_test.md +349 -0
  144. vllm_ascend-0.14.0rc1/docs/source/developer_guide/contribution/testing.md +298 -0
  145. vllm_ascend-0.14.0rc1/docs/source/developer_guide/evaluation/using_ais_bench.md +327 -0
  146. vllm_ascend-0.14.0rc1/docs/source/developer_guide/evaluation/using_evalscope.md +176 -0
  147. vllm_ascend-0.14.0rc1/docs/source/developer_guide/evaluation/using_lm_eval.md +307 -0
  148. vllm_ascend-0.14.0rc1/docs/source/developer_guide/evaluation/using_opencompass.md +126 -0
  149. vllm_ascend-0.14.0rc1/docs/source/developer_guide/feature_guide/ACL_Graph.md +103 -0
  150. vllm_ascend-0.14.0rc1/docs/source/developer_guide/feature_guide/KV_Cache_Pool_Guide.md +91 -0
  151. vllm_ascend-0.14.0rc1/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +286 -0
  152. vllm_ascend-0.14.0rc1/docs/source/developer_guide/feature_guide/context_parallel.md +129 -0
  153. vllm_ascend-0.14.0rc1/docs/source/developer_guide/feature_guide/disaggregated_prefill.md +105 -0
  154. vllm_ascend-0.14.0rc1/docs/source/developer_guide/feature_guide/eplb_swift_balancer.md +249 -0
  155. vllm_ascend-0.14.0rc1/docs/source/developer_guide/feature_guide/patch.md +76 -0
  156. vllm_ascend-0.14.0rc1/docs/source/developer_guide/feature_guide/quantization.md +114 -0
  157. vllm_ascend-0.14.0rc1/docs/source/developer_guide/performance_and_debug/msprobe_guide.md +516 -0
  158. vllm_ascend-0.14.0rc1/docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md +275 -0
  159. vllm_ascend-0.14.0rc1/docs/source/developer_guide/performance_and_debug/performance_benchmark.md +242 -0
  160. vllm_ascend-0.14.0rc1/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md +41 -0
  161. vllm_ascend-0.14.0rc1/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md +257 -0
  162. vllm_ascend-0.14.0rc1/docs/source/faqs.md +272 -0
  163. vllm_ascend-0.14.0rc1/docs/source/installation.md +498 -0
  164. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +1647 -0
  165. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +204 -0
  166. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +103 -0
  167. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +87 -0
  168. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +624 -0
  169. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +187 -0
  170. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +237 -0
  171. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +112 -0
  172. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +65 -0
  173. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +83 -0
  174. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +248 -0
  175. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +333 -0
  176. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/index.po +26 -0
  177. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po +646 -0
  178. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po +88 -0
  179. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/profile_execute_duration.po +81 -0
  180. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po +575 -0
  181. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +479 -0
  182. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/index.po +79 -0
  183. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +293 -0
  184. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +149 -0
  185. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +286 -0
  186. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +121 -0
  187. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +58 -0
  188. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +183 -0
  189. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +156 -0
  190. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +220 -0
  191. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +1660 -0
  192. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +30 -0
  193. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +264 -0
  194. vllm_ascend-0.14.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +190 -0
  195. vllm_ascend-0.14.0rc1/docs/source/quick_start.md +196 -0
  196. vllm_ascend-0.14.0rc1/docs/source/tutorials/310p.md +409 -0
  197. vllm_ascend-0.14.0rc1/docs/source/tutorials/DeepSeek-R1.md +309 -0
  198. vllm_ascend-0.14.0rc1/docs/source/tutorials/DeepSeek-V3.1.md +722 -0
  199. vllm_ascend-0.14.0rc1/docs/source/tutorials/DeepSeek-V3.2.md +915 -0
  200. vllm_ascend-0.14.0rc1/docs/source/tutorials/GLM4.x.md +177 -0
  201. vllm_ascend-0.14.0rc1/docs/source/tutorials/Kimi-K2-Thinking.md +108 -0
  202. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen-VL-Dense.md +580 -0
  203. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen2.5-7B.md +180 -0
  204. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen2.5-Omni.md +210 -0
  205. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-235B-A22B.md +625 -0
  206. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-30B-A3B.md +113 -0
  207. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-32B-W4A4.md +143 -0
  208. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-8B-W4A8.md +141 -0
  209. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-Coder-30B-A3B.md +105 -0
  210. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-Dense.md +397 -0
  211. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-Next.md +182 -0
  212. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-Omni-30B-A3B-Thinking.md +319 -0
  213. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-VL-235B-A22B-Instruct.md +276 -0
  214. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-VL-30B-A3B-Instruct.md +207 -0
  215. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-VL-Embedding.md +127 -0
  216. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3-VL-Reranker.md +243 -0
  217. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3_embedding.md +122 -0
  218. vllm_ascend-0.14.0rc1/docs/source/tutorials/Qwen3_reranker.md +192 -0
  219. vllm_ascend-0.14.0rc1/docs/source/tutorials/index.md +46 -0
  220. vllm_ascend-0.14.0rc1/docs/source/tutorials/long_sequence_context_parallel_multi_node.md +371 -0
  221. vllm_ascend-0.14.0rc1/docs/source/tutorials/long_sequence_context_parallel_single_node.md +179 -0
  222. vllm_ascend-0.14.0rc1/docs/source/tutorials/pd_disaggregation_mooncake_multi_node.md +938 -0
  223. vllm_ascend-0.14.0rc1/docs/source/tutorials/pd_disaggregation_mooncake_single_node.md +277 -0
  224. vllm_ascend-0.14.0rc1/docs/source/tutorials/ray.md +192 -0
  225. vllm_ascend-0.14.0rc1/docs/source/user_guide/configuration/additional_config.md +128 -0
  226. vllm_ascend-0.14.0rc1/docs/source/user_guide/deployment_guide/using_volcano_kthena.md +433 -0
  227. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/Fine_grained_TP.md +109 -0
  228. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/Multi_Token_Prediction.md +115 -0
  229. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/context_parallel.md +96 -0
  230. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/dynamic_batch.md +53 -0
  231. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +100 -0
  232. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/external_dp.md +91 -0
  233. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/graph_mode.md +84 -0
  234. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/images/layer_sharding.png +0 -0
  235. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/kv_pool.md +365 -0
  236. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/large_scale_ep.md +500 -0
  237. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/layer_sharding.md +74 -0
  238. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/lora.md +28 -0
  239. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/quantization.md +164 -0
  240. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/sleep_mode.md +116 -0
  241. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/speculative_decoding.md +152 -0
  242. vllm_ascend-0.14.0rc1/docs/source/user_guide/feature_guide/ucm_deployment.md +144 -0
  243. vllm_ascend-0.14.0rc1/docs/source/user_guide/release_notes.md +1073 -0
  244. vllm_ascend-0.14.0rc1/docs/source/user_guide/support_matrix/supported_features.md +48 -0
  245. vllm_ascend-0.14.0rc1/docs/source/user_guide/support_matrix/supported_models.md +108 -0
  246. vllm_ascend-0.14.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +527 -0
  247. vllm_ascend-0.14.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +823 -0
  248. vllm_ascend-0.14.0rc1/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +161 -0
  249. vllm_ascend-0.14.0rc1/examples/eplb/eplb_deepseek.py +180 -0
  250. vllm_ascend-0.14.0rc1/examples/eplb/eplb_strategy.py +163 -0
  251. vllm_ascend-0.14.0rc1/examples/external_online_dp/README.md +59 -0
  252. vllm_ascend-0.14.0rc1/examples/external_online_dp/dp_load_balance_proxy_server.py +366 -0
  253. vllm_ascend-0.14.0rc1/examples/external_online_dp/launch_online_dp.py +64 -0
  254. vllm_ascend-0.14.0rc1/examples/offline_data_parallel.py +237 -0
  255. vllm_ascend-0.14.0rc1/examples/offline_disaggregated_prefill_npu.py +160 -0
  256. vllm_ascend-0.14.0rc1/examples/offline_embed.py +60 -0
  257. vllm_ascend-0.14.0rc1/examples/offline_external_launcher.py +312 -0
  258. vllm_ascend-0.14.0rc1/examples/offline_inference_audio_language.py +108 -0
  259. vllm_ascend-0.14.0rc1/examples/offline_inference_npu_long_seq.py +59 -0
  260. vllm_ascend-0.14.0rc1/examples/offline_inference_npu_tp2.py +57 -0
  261. vllm_ascend-0.14.0rc1/examples/offline_inference_sleep_mode_npu.py +56 -0
  262. vllm_ascend-0.14.0rc1/examples/offline_weight_load.py +315 -0
  263. vllm_ascend-0.14.0rc1/examples/prompt_embed_inference.py +88 -0
  264. vllm_ascend-0.14.0rc1/examples/prompt_embedding_inference.py +69 -0
  265. vllm_ascend-0.14.0rc1/examples/quantization/llm-compressor/w8a8_int8.py +150 -0
  266. vllm_ascend-0.14.0rc1/examples/quantization/llm-compressor/w8a8_int8_dynamic.py +82 -0
  267. vllm_ascend-0.14.0rc1/examples/quantization/llm-compressor/w8a8_int8_dynamic_moe.py +26 -0
  268. vllm_ascend-0.14.0rc1/mypy.ini +36 -0
  269. vllm_ascend-0.14.0rc1/pyproject.toml +122 -0
  270. vllm_ascend-0.14.0rc1/requirements-dev.txt +25 -0
  271. vllm_ascend-0.14.0rc1/requirements.txt +37 -0
  272. vllm_ascend-0.14.0rc1/setup.py +513 -0
  273. vllm_ascend-0.14.0rc1/tests/e2e/310p/test_offline_inference_310p.py +78 -0
  274. vllm_ascend-0.14.0rc1/tests/e2e/310p/test_offline_inference_parallel_310p.py +36 -0
  275. vllm_ascend-0.14.0rc1/tests/e2e/conftest.py +924 -0
  276. vllm_ascend-0.14.0rc1/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +12 -0
  277. vllm_ascend-0.14.0rc1/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py +157 -0
  278. vllm_ascend-0.14.0rc1/tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py +242 -0
  279. vllm_ascend-0.14.0rc1/tests/e2e/multicard/2-cards/test_external_launcher.py +240 -0
  280. vllm_ascend-0.14.0rc1/tests/e2e/multicard/2-cards/test_offline_inference_distributed.py +285 -0
  281. vllm_ascend-0.14.0rc1/tests/e2e/multicard/2-cards/test_offline_weight_load.py +75 -0
  282. vllm_ascend-0.14.0rc1/tests/e2e/multicard/2-cards/test_quantization.py +69 -0
  283. vllm_ascend-0.14.0rc1/tests/e2e/multicard/2-cards/test_qwen3_moe.py +130 -0
  284. vllm_ascend-0.14.0rc1/tests/e2e/multicard/2-cards/test_qwen3_performance.py +101 -0
  285. vllm_ascend-0.14.0rc1/tests/e2e/multicard/4-cards/long_sequence/test_basic.py +249 -0
  286. vllm_ascend-0.14.0rc1/tests/e2e/multicard/4-cards/long_sequence/test_mtp.py +167 -0
  287. vllm_ascend-0.14.0rc1/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py +155 -0
  288. vllm_ascend-0.14.0rc1/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py +49 -0
  289. vllm_ascend-0.14.0rc1/tests/e2e/multicard/4-cards/test_kimi_k2.py +48 -0
  290. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-A2.yaml +61 -0
  291. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-EPLB.yaml +191 -0
  292. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml +108 -0
  293. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8.yaml +205 -0
  294. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/DeepSeek-V3.1-BF16.yaml +82 -0
  295. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/DeepSeek-V3.yaml +110 -0
  296. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml +89 -0
  297. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/Kimi-K2-Instruct-W8A8.yaml +79 -0
  298. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/Qwen3-235B-A22B-A2.yaml +72 -0
  299. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/Qwen3-235B-A22B.yaml +73 -0
  300. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8-EPLB.yaml +91 -0
  301. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8-longseq.yaml +98 -0
  302. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8.yaml +87 -0
  303. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/Qwen3-235B-disagg-pd.yaml +119 -0
  304. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/config/Qwen3-VL-235B-disagg-pd.yaml +106 -0
  305. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/scripts/lws-a2.yaml.jinja2 +126 -0
  306. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 +126 -0
  307. vllm_ascend-0.14.0rc1/tests/e2e/nightly/multi_node/scripts/run.sh +171 -0
  308. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8.py +114 -0
  309. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8_eplb.py +115 -0
  310. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/models/test_deepseek_r1_w8a8_hbm.py +123 -0
  311. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/models/test_deepseek_v3_2_w8a8.py +109 -0
  312. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/models/test_kimi_k2_thinking.py +110 -0
  313. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/models/test_qwen3_235b_a22b_w8a8_eplb.py +105 -0
  314. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/models/test_qwen3_next.py +113 -0
  315. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py +234 -0
  316. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py +467 -0
  317. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py +149 -0
  318. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py +193 -0
  319. vllm_ascend-0.14.0rc1/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py +220 -0
  320. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/compile/test_norm_quant_fusion.py +358 -0
  321. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/model_runner_v2/test_basic.py +87 -0
  322. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/pooling/test_scoring.py +186 -0
  323. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py +210 -0
  324. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py +436 -0
  325. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/test_aclgraph_accuracy.py +135 -0
  326. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/test_aclgraph_batch_invariant.py +682 -0
  327. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/test_auto_fit_max_mode_len.py +100 -0
  328. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/test_llama32_lora.py +119 -0
  329. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/test_qwen3_multi_loras.py +159 -0
  330. vllm_ascend-0.14.0rc1/tests/e2e/singlecard/test_xlite.py +107 -0
  331. vllm_ascend-0.14.0rc1/tests/e2e/vllm_interface/vllm_test.cfg +2 -0
  332. vllm_ascend-0.14.0rc1/tests/ut/attention/test_attention_cp.py +679 -0
  333. vllm_ascend-0.14.0rc1/tests/ut/attention/test_attention_v1.py +354 -0
  334. vllm_ascend-0.14.0rc1/tests/ut/attention/test_mla_cp.py +1008 -0
  335. vllm_ascend-0.14.0rc1/tests/ut/attention/test_mla_v1.py +1130 -0
  336. vllm_ascend-0.14.0rc1/tests/ut/attention/test_sfa_v1.py +277 -0
  337. vllm_ascend-0.14.0rc1/tests/ut/compilation/test_acl_graph.py +863 -0
  338. vllm_ascend-0.14.0rc1/tests/ut/compilation/test_npugraph_ex_utils_check.py +54 -0
  339. vllm_ascend-0.14.0rc1/tests/ut/device_allocator/test_cpu_binding.py +167 -0
  340. vllm_ascend-0.14.0rc1/tests/ut/distributed/mooncake/test_config_data.py +78 -0
  341. vllm_ascend-0.14.0rc1/tests/ut/distributed/test_communicator.py +90 -0
  342. vllm_ascend-0.14.0rc1/tests/ut/eplb/core/policy/test_policy_default_eplb.py +98 -0
  343. vllm_ascend-0.14.0rc1/tests/ut/eplb/core/policy/test_policy_factor.py +23 -0
  344. vllm_ascend-0.14.0rc1/tests/ut/eplb/core/policy/test_policy_swift_balancer.py +99 -0
  345. vllm_ascend-0.14.0rc1/tests/ut/eplb/core/test_eplb_utils.py +57 -0
  346. vllm_ascend-0.14.0rc1/tests/ut/kv_connector/test_mooncake_connector.py +1372 -0
  347. vllm_ascend-0.14.0rc1/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +951 -0
  348. vllm_ascend-0.14.0rc1/tests/ut/kv_connector/utils.py +209 -0
  349. vllm_ascend-0.14.0rc1/tests/ut/ops/test_activation.py +96 -0
  350. vllm_ascend-0.14.0rc1/tests/ut/ops/test_fused_moe.py +572 -0
  351. vllm_ascend-0.14.0rc1/tests/ut/ops/test_layernorm.py +82 -0
  352. vllm_ascend-0.14.0rc1/tests/ut/ops/test_rotary_embedding.py +470 -0
  353. vllm_ascend-0.14.0rc1/tests/ut/ops/test_token_dispatcher.py +474 -0
  354. vllm_ascend-0.14.0rc1/tests/ut/ops/test_vocab_parallel_embedding.py +269 -0
  355. vllm_ascend-0.14.0rc1/tests/ut/quantization/test_modelslim_config.py +162 -0
  356. vllm_ascend-0.14.0rc1/tests/ut/quantization/test_w4a16.py +270 -0
  357. vllm_ascend-0.14.0rc1/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +216 -0
  358. vllm_ascend-0.14.0rc1/tests/ut/quantization/test_w4a8_dynamic.py +306 -0
  359. vllm_ascend-0.14.0rc1/tests/ut/quantization/test_w8a16.py +91 -0
  360. vllm_ascend-0.14.0rc1/tests/ut/quantization/test_w8a8.py +194 -0
  361. vllm_ascend-0.14.0rc1/tests/ut/quantization/test_w8a8_dynamic.py +106 -0
  362. vllm_ascend-0.14.0rc1/tests/ut/spec_decode/test_eagle_proposer.py +441 -0
  363. vllm_ascend-0.14.0rc1/tests/ut/spec_decode/test_mtp_proposer.py +343 -0
  364. vllm_ascend-0.14.0rc1/tests/ut/test_ascend_config.py +116 -0
  365. vllm_ascend-0.14.0rc1/tests/ut/test_platform.py +437 -0
  366. vllm_ascend-0.14.0rc1/tests/ut/worker/test_worker_v1.py +1194 -0
  367. vllm_ascend-0.14.0rc1/tools/aisbench.py +297 -0
  368. vllm_ascend-0.14.0rc1/tools/check_python_src_init.py +74 -0
  369. vllm_ascend-0.14.0rc1/tools/enforce_regex_import.py +96 -0
  370. vllm_ascend-0.14.0rc1/tools/format_contributors.py +86 -0
  371. vllm_ascend-0.14.0rc1/tools/mypy.sh +44 -0
  372. vllm_ascend-0.14.0rc1/tools/send_mm_request.py +40 -0
  373. vllm_ascend-0.14.0rc1/tools/send_request.py +39 -0
  374. vllm_ascend-0.14.0rc1/tools/vllm_bench.py +157 -0
  375. vllm_ascend-0.14.0rc1/typos.toml +177 -0
  376. vllm_ascend-0.14.0rc1/vllm_ascend/_310p/attention/attention_mask.py +98 -0
  377. vllm_ascend-0.14.0rc1/vllm_ascend/_310p/attention/attention_v1.py +179 -0
  378. vllm_ascend-0.14.0rc1/vllm_ascend/_310p/attention/metadata_builder.py +40 -0
  379. vllm_ascend-0.14.0rc1/vllm_ascend/_310p/model_runner_310p.py +186 -0
  380. vllm_ascend-0.14.0rc1/vllm_ascend/_310p/ops/activation.py +30 -0
  381. vllm_ascend-0.14.0rc1/vllm_ascend/_310p/ops/layernorm.py +44 -0
  382. vllm_ascend-0.14.0rc1/vllm_ascend/_310p/ops/mm_encoder_attention.py +67 -0
  383. vllm_ascend-0.14.0rc1/vllm_ascend/_310p/ops/rotary_embedding.py +23 -0
  384. vllm_ascend-0.14.0rc1/vllm_ascend/_310p/worker_310p.py +37 -0
  385. vllm_ascend-0.14.0rc1/vllm_ascend/__init__.py +40 -0
  386. vllm_ascend-0.14.0rc1/vllm_ascend/_version.py +34 -0
  387. vllm_ascend-0.14.0rc1/vllm_ascend/ascend_config.py +378 -0
  388. vllm_ascend-0.14.0rc1/vllm_ascend/ascend_forward_context.py +272 -0
  389. vllm_ascend-0.14.0rc1/vllm_ascend/attention/attention_mask.py +95 -0
  390. vllm_ascend-0.14.0rc1/vllm_ascend/attention/attention_v1.py +942 -0
  391. vllm_ascend-0.14.0rc1/vllm_ascend/attention/context_parallel/attention_cp.py +904 -0
  392. vllm_ascend-0.14.0rc1/vllm_ascend/attention/context_parallel/common_cp.py +138 -0
  393. vllm_ascend-0.14.0rc1/vllm_ascend/attention/context_parallel/mla_cp.py +821 -0
  394. vllm_ascend-0.14.0rc1/vllm_ascend/attention/mla_v1.py +1535 -0
  395. vllm_ascend-0.14.0rc1/vllm_ascend/attention/sfa_v1.py +1060 -0
  396. vllm_ascend-0.14.0rc1/vllm_ascend/attention/utils.py +310 -0
  397. vllm_ascend-0.14.0rc1/vllm_ascend/batch_invariant.py +87 -0
  398. vllm_ascend-0.14.0rc1/vllm_ascend/compilation/acl_graph.py +288 -0
  399. vllm_ascend-0.14.0rc1/vllm_ascend/compilation/compiler_interface.py +139 -0
  400. vllm_ascend-0.14.0rc1/vllm_ascend/compilation/graph_fusion_pass_manager.py +65 -0
  401. vllm_ascend-0.14.0rc1/vllm_ascend/compilation/npu_graph_ex_pass_manager.py +51 -0
  402. vllm_ascend-0.14.0rc1/vllm_ascend/compilation/npugraph_ex_passes/graphex_norm_quant_fusion_pass.py +316 -0
  403. vllm_ascend-0.14.0rc1/vllm_ascend/compilation/npugraph_ex_passes/graphex_qknorm_rope_fusion_pass.py +227 -0
  404. vllm_ascend-0.14.0rc1/vllm_ascend/compilation/npugraph_ex_passes/utils/npugraph_ex_utils_check.py +53 -0
  405. vllm_ascend-0.14.0rc1/vllm_ascend/compilation/passes/allreduce_rmsnorm_fusion_pass.py +153 -0
  406. vllm_ascend-0.14.0rc1/vllm_ascend/compilation/passes/norm_quant_fusion_pass.py +303 -0
  407. vllm_ascend-0.14.0rc1/vllm_ascend/compilation/passes/qknorm_rope_fusion_pass.py +229 -0
  408. vllm_ascend-0.14.0rc1/vllm_ascend/core/recompute_scheduler.py +798 -0
  409. vllm_ascend-0.14.0rc1/vllm_ascend/core/scheduler_dynamic_batch.py +576 -0
  410. vllm_ascend-0.14.0rc1/vllm_ascend/cpu_binding.py +286 -0
  411. vllm_ascend-0.14.0rc1/vllm_ascend/device/device_op.py +47 -0
  412. vllm_ascend-0.14.0rc1/vllm_ascend/device_allocator/camem.py +269 -0
  413. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/device_communicators/npu_communicator.py +64 -0
  414. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/device_communicators/pyhccl.py +174 -0
  415. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +263 -0
  416. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/__init__.py +46 -0
  417. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py +1804 -0
  418. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py +1340 -0
  419. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/ascend_store_connector.py +168 -0
  420. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/backend/backend.py +29 -0
  421. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/backend/memcache_backend.py +85 -0
  422. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/backend/mooncake_backend.py +188 -0
  423. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/config_data.py +397 -0
  424. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/kv_transfer.py +369 -0
  425. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_scheduler.py +375 -0
  426. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_worker.py +645 -0
  427. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_kv_cache_manager.py +179 -0
  428. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py +447 -0
  429. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/metadata.py +257 -0
  430. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ucm_connector.py +245 -0
  431. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/utils/mooncake_transfer_engine.py +39 -0
  432. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/utils/utils.py +52 -0
  433. vllm_ascend-0.14.0rc1/vllm_ascend/distributed/utils.py +55 -0
  434. vllm_ascend-0.14.0rc1/vllm_ascend/envs.py +132 -0
  435. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/adaptor/abstract_adaptor.py +36 -0
  436. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/adaptor/vllm_adaptor.py +167 -0
  437. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +114 -0
  438. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core/eplb_utils.py +114 -0
  439. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core/eplb_worker.py +303 -0
  440. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core/policy/policy_abstract.py +41 -0
  441. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core/policy/policy_default_eplb.py +353 -0
  442. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core/policy/policy_factory.py +30 -0
  443. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core/policy/policy_flashlb.py +597 -0
  444. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core/policy/policy_random.py +29 -0
  445. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core/policy/policy_swift_balancer.py +789 -0
  446. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/eplb_updator.py +211 -0
  447. vllm_ascend-0.14.0rc1/vllm_ascend/eplb/utils.py +74 -0
  448. vllm_ascend-0.14.0rc1/vllm_ascend/flash_common3_context.py +41 -0
  449. vllm_ascend-0.14.0rc1/vllm_ascend/kv_offload/cpu_npu.py +172 -0
  450. vllm_ascend-0.14.0rc1/vllm_ascend/kv_offload/npu.py +58 -0
  451. vllm_ascend-0.14.0rc1/vllm_ascend/lora/lora_ops.py +122 -0
  452. vllm_ascend-0.14.0rc1/vllm_ascend/lora/punica_npu.py +357 -0
  453. vllm_ascend-0.14.0rc1/vllm_ascend/lora/utils.py +104 -0
  454. vllm_ascend-0.14.0rc1/vllm_ascend/meta_registration.py +111 -0
  455. vllm_ascend-0.14.0rc1/vllm_ascend/model_loader/netloader/executor/elastic_load.py +152 -0
  456. vllm_ascend-0.14.0rc1/vllm_ascend/model_loader/netloader/executor/netloader_pg.py +180 -0
  457. vllm_ascend-0.14.0rc1/vllm_ascend/model_loader/netloader/interaction/elastic.py +386 -0
  458. vllm_ascend-0.14.0rc1/vllm_ascend/model_loader/netloader/load.py +75 -0
  459. vllm_ascend-0.14.0rc1/vllm_ascend/model_loader/netloader/netloader.py +316 -0
  460. vllm_ascend-0.14.0rc1/vllm_ascend/model_loader/netloader/utils.py +63 -0
  461. vllm_ascend-0.14.0rc1/vllm_ascend/ops/activation.py +39 -0
  462. vllm_ascend-0.14.0rc1/vllm_ascend/ops/fused_moe/experts_selector.py +333 -0
  463. vllm_ascend-0.14.0rc1/vllm_ascend/ops/fused_moe/fused_moe.py +578 -0
  464. vllm_ascend-0.14.0rc1/vllm_ascend/ops/fused_moe/moe_comm_method.py +352 -0
  465. vllm_ascend-0.14.0rc1/vllm_ascend/ops/layernorm.py +172 -0
  466. vllm_ascend-0.14.0rc1/vllm_ascend/ops/linear_op.py +812 -0
  467. vllm_ascend-0.14.0rc1/vllm_ascend/ops/mla.py +185 -0
  468. vllm_ascend-0.14.0rc1/vllm_ascend/ops/mm_encoder_attention.py +149 -0
  469. vllm_ascend-0.14.0rc1/vllm_ascend/ops/register_custom_ops.py +385 -0
  470. vllm_ascend-0.14.0rc1/vllm_ascend/ops/rotary_embedding.py +654 -0
  471. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/activation/swiglu_quant.py +100 -0
  472. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/batch_invariant/matmul.py +396 -0
  473. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/batch_invariant/mean.py +173 -0
  474. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/batch_invariant/rmsnorm.py +152 -0
  475. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/chunk.py +224 -0
  476. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/chunk_delta_h.py +235 -0
  477. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/chunk_o.py +163 -0
  478. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/chunk_scaled_dot_kkt.py +146 -0
  479. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/cumsum.py +143 -0
  480. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py +115 -0
  481. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/l2norm.py +66 -0
  482. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/layernorm_guard.py +197 -0
  483. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/sigmoid_gating.py +393 -0
  484. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/solve_tril.py +392 -0
  485. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/utils.py +65 -0
  486. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla/wy_fast.py +141 -0
  487. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fused_gdn_gating.py +108 -0
  488. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/layernorm_gated.py +168 -0
  489. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/linearnorm/__init__.py +0 -0
  490. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/linearnorm/split_qkv_rmsnorm_rope.py +285 -0
  491. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/mamba/__init__.py +0 -0
  492. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/mamba/causal_conv1d.py +665 -0
  493. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/reject_sample.py +432 -0
  494. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/rope.py +189 -0
  495. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/spec_decode/__init__.py +0 -0
  496. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/spec_decode/utils.py +63 -0
  497. vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/triton_utils.py +30 -0
  498. vllm_ascend-0.14.0rc1/vllm_ascend/patch/__init__.py +290 -0
  499. vllm_ascend-0.14.0rc1/vllm_ascend/patch/platform/__init__.py +30 -0
  500. vllm_ascend-0.14.0rc1/vllm_ascend/patch/platform/patch_balance_schedule.py +659 -0
  501. vllm_ascend-0.14.0rc1/vllm_ascend/patch/platform/patch_distributed.py +89 -0
  502. vllm_ascend-0.14.0rc1/vllm_ascend/patch/platform/patch_ec_connector.py +30 -0
  503. vllm_ascend-0.14.0rc1/vllm_ascend/patch/platform/patch_mamba_config.py +95 -0
  504. vllm_ascend-0.14.0rc1/vllm_ascend/patch/platform/patch_multiproc_executor.py +180 -0
  505. vllm_ascend-0.14.0rc1/vllm_ascend/patch/platform/patch_sched_yield.py +12 -0
  506. vllm_ascend-0.14.0rc1/vllm_ascend/patch/worker/__init__.py +36 -0
  507. vllm_ascend-0.14.0rc1/vllm_ascend/patch/worker/patch_distributed.py +121 -0
  508. vllm_ascend-0.14.0rc1/vllm_ascend/patch/worker/patch_qwen3_next.py +343 -0
  509. vllm_ascend-0.14.0rc1/vllm_ascend/patch/worker/patch_triton.py +18 -0
  510. vllm_ascend-0.14.0rc1/vllm_ascend/patch/worker/patch_unquantized_gemm.py +57 -0
  511. vllm_ascend-0.14.0rc1/vllm_ascend/patch/worker/patch_v2_egale.py +166 -0
  512. vllm_ascend-0.14.0rc1/vllm_ascend/platform.py +791 -0
  513. vllm_ascend-0.14.0rc1/vllm_ascend/profiling_config.py +193 -0
  514. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/__init__.py +38 -0
  515. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/compressed_tensors_config.py +439 -0
  516. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/method_adapters.py +288 -0
  517. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/__init__.py +82 -0
  518. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/base.py +279 -0
  519. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/registry.py +62 -0
  520. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/w4a16.py +278 -0
  521. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/w4a4_flatquant.py +190 -0
  522. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/w4a4_laos_dynamic.py +126 -0
  523. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/w4a8.py +475 -0
  524. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/w8a16.py +83 -0
  525. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/w8a8_dynamic.py +331 -0
  526. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/w8a8_mxfp8.py +94 -0
  527. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/w8a8_pdmix.py +117 -0
  528. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/methods/w8a8_static.py +181 -0
  529. vllm_ascend-0.14.0rc1/vllm_ascend/quantization/modelslim_config.py +471 -0
  530. vllm_ascend-0.14.0rc1/vllm_ascend/sample/__init__.py +0 -0
  531. vllm_ascend-0.14.0rc1/vllm_ascend/sample/sampler.py +138 -0
  532. vllm_ascend-0.14.0rc1/vllm_ascend/spec_decode/__init__.py +39 -0
  533. vllm_ascend-0.14.0rc1/vllm_ascend/spec_decode/eagle_proposer.py +1232 -0
  534. vllm_ascend-0.14.0rc1/vllm_ascend/spec_decode/interface.py +54 -0
  535. vllm_ascend-0.14.0rc1/vllm_ascend/spec_decode/medusa_proposer.py +98 -0
  536. vllm_ascend-0.14.0rc1/vllm_ascend/spec_decode/mtp_proposer.py +532 -0
  537. vllm_ascend-0.14.0rc1/vllm_ascend/utils.py +1211 -0
  538. vllm_ascend-0.14.0rc1/vllm_ascend/worker/__init__.py +0 -0
  539. vllm_ascend-0.14.0rc1/vllm_ascend/worker/model_runner_v1.py +2911 -0
  540. vllm_ascend-0.14.0rc1/vllm_ascend/worker/pcp_utils.py +841 -0
  541. vllm_ascend-0.14.0rc1/vllm_ascend/worker/v2/__init__.py +0 -0
  542. vllm_ascend-0.14.0rc1/vllm_ascend/worker/v2/attn_utils.py +146 -0
  543. vllm_ascend-0.14.0rc1/vllm_ascend/worker/v2/model_runner.py +352 -0
  544. vllm_ascend-0.14.0rc1/vllm_ascend/worker/v2/sample/__init__.py +0 -0
  545. vllm_ascend-0.14.0rc1/vllm_ascend/worker/v2/sample/penalties.py +145 -0
  546. vllm_ascend-0.14.0rc1/vllm_ascend/worker/v2/spec_decode/__init__.py +38 -0
  547. vllm_ascend-0.14.0rc1/vllm_ascend/worker/v2/spec_decode/eagle.py +146 -0
  548. vllm_ascend-0.14.0rc1/vllm_ascend/worker/worker.py +539 -0
  549. vllm_ascend-0.14.0rc1/vllm_ascend/xlite/__init__.py +0 -0
  550. vllm_ascend-0.14.0rc1/vllm_ascend/xlite/xlite.py +369 -0
  551. vllm_ascend-0.14.0rc1/vllm_ascend.egg-info/PKG-INFO +164 -0
  552. vllm_ascend-0.14.0rc1/vllm_ascend.egg-info/SOURCES.txt +1160 -0
  553. vllm_ascend-0.14.0rc1/vllm_ascend.egg-info/requires.txt +27 -0
  554. vllm_ascend-0.13.0rc2/.github/Dockerfile.buildwheel +0 -45
  555. vllm_ascend-0.13.0rc2/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -37
  556. vllm_ascend-0.13.0rc2/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -33
  557. vllm_ascend-0.13.0rc2/.github/actionlint.yaml +0 -23
  558. vllm_ascend-0.13.0rc2/.github/workflows/_e2e_nightly_multi_node.yaml +0 -299
  559. vllm_ascend-0.13.0rc2/.github/workflows/_e2e_nightly_single_node.yaml +0 -138
  560. vllm_ascend-0.13.0rc2/.github/workflows/_e2e_nightly_single_node_models.yaml +0 -232
  561. vllm_ascend-0.13.0rc2/.github/workflows/_e2e_test.yaml +0 -319
  562. vllm_ascend-0.13.0rc2/.github/workflows/_pre_commit.yml +0 -57
  563. vllm_ascend-0.13.0rc2/.github/workflows/_schedule_image_build.yaml +0 -180
  564. vllm_ascend-0.13.0rc2/.github/workflows/_unit_test.yaml +0 -92
  565. vllm_ascend-0.13.0rc2/.github/workflows/bot_pr_create.yaml +0 -113
  566. vllm_ascend-0.13.0rc2/.github/workflows/labled_doctest.yaml +0 -87
  567. vllm_ascend-0.13.0rc2/.github/workflows/labled_download_model.yaml +0 -80
  568. vllm_ascend-0.13.0rc2/.github/workflows/labled_test_310.yaml +0 -110
  569. vllm_ascend-0.13.0rc2/.github/workflows/misc/model_list.json +0 -240
  570. vllm_ascend-0.13.0rc2/.github/workflows/nightly_test_a2.yaml +0 -147
  571. vllm_ascend-0.13.0rc2/.github/workflows/nightly_test_a3.yaml +0 -176
  572. vllm_ascend-0.13.0rc2/.github/workflows/pr_close_cancel_job.yaml +0 -46
  573. vllm_ascend-0.13.0rc2/.github/workflows/pr_test_full.yaml +0 -85
  574. vllm_ascend-0.13.0rc2/.github/workflows/pr_test_light.yaml +0 -104
  575. vllm_ascend-0.13.0rc2/.github/workflows/schedule_codecov_refresh.yaml +0 -42
  576. vllm_ascend-0.13.0rc2/.github/workflows/schedule_image_build_and_push.yaml +0 -58
  577. vllm_ascend-0.13.0rc2/.github/workflows/schedule_release_code_and_wheel.yml +0 -150
  578. vllm_ascend-0.13.0rc2/.github/workflows/schedule_test_benchmarks.yaml +0 -203
  579. vllm_ascend-0.13.0rc2/.github/workflows/schedule_test_vllm_main.yaml +0 -39
  580. vllm_ascend-0.13.0rc2/.pre-commit-config.yaml +0 -137
  581. vllm_ascend-0.13.0rc2/CMakeLists.txt +0 -140
  582. vllm_ascend-0.13.0rc2/Dockerfile +0 -81
  583. vllm_ascend-0.13.0rc2/Dockerfile.310p +0 -66
  584. vllm_ascend-0.13.0rc2/Dockerfile.310p.openEuler +0 -62
  585. vllm_ascend-0.13.0rc2/Dockerfile.a3 +0 -80
  586. vllm_ascend-0.13.0rc2/Dockerfile.a3.openEuler +0 -80
  587. vllm_ascend-0.13.0rc2/Dockerfile.openEuler +0 -80
  588. vllm_ascend-0.13.0rc2/PKG-INFO +0 -150
  589. vllm_ascend-0.13.0rc2/README.md +0 -93
  590. vllm_ascend-0.13.0rc2/README.zh.md +0 -92
  591. vllm_ascend-0.13.0rc2/benchmarks/README.md +0 -175
  592. vllm_ascend-0.13.0rc2/benchmarks/ops/ben_vocabparallelembedding.py +0 -158
  593. vllm_ascend-0.13.0rc2/benchmarks/scripts/convert_json_to_markdown.py +0 -188
  594. vllm_ascend-0.13.0rc2/benchmarks/scripts/perf_result_template.md +0 -31
  595. vllm_ascend-0.13.0rc2/cmake/utils.cmake +0 -133
  596. vllm_ascend-0.13.0rc2/collect_env.py +0 -489
  597. vllm_ascend-0.13.0rc2/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.cpp +0 -443
  598. vllm_ascend-0.13.0rc2/csrc/build_aclnn.sh +0 -92
  599. vllm_ascend-0.13.0rc2/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h +0 -310
  600. vllm_ascend-0.13.0rc2/csrc/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp +0 -40
  601. vllm_ascend-0.13.0rc2/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_def.cpp +0 -88
  602. vllm_ascend-0.13.0rc2/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant.hpp +0 -760
  603. vllm_ascend-0.13.0rc2/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant_swiglu.h +0 -311
  604. vllm_ascend-0.13.0rc2/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_multistage_workspace.h +0 -383
  605. vllm_ascend-0.13.0rc2/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_swiglu_quant_multistage_workspace.h +0 -2060
  606. vllm_ascend-0.13.0rc2/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.cpp +0 -35
  607. vllm_ascend-0.13.0rc2/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.h +0 -440
  608. vllm_ascend-0.13.0rc2/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_base.h +0 -21
  609. vllm_ascend-0.13.0rc2/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.h +0 -50
  610. vllm_ascend-0.13.0rc2/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -498
  611. vllm_ascend-0.13.0rc2/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp +0 -694
  612. vllm_ascend-0.13.0rc2/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.h +0 -62
  613. vllm_ascend-0.13.0rc2/csrc/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp +0 -546
  614. vllm_ascend-0.13.0rc2/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.h +0 -66
  615. vllm_ascend-0.13.0rc2/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.cpp +0 -573
  616. vllm_ascend-0.13.0rc2/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_arch35.cpp +0 -521
  617. vllm_ascend-0.13.0rc2/csrc/moe_gating_top_k/tiling_base/tiling_key.h +0 -63
  618. vllm_ascend-0.13.0rc2/csrc/moe_gating_top_k/tiling_base/tiling_type.h +0 -139
  619. vllm_ascend-0.13.0rc2/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.cpp +0 -1267
  620. vllm_ascend-0.13.0rc2/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_dynamic_quant.h +0 -300
  621. vllm_ascend-0.13.0rc2/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_static_quant.h +0 -229
  622. vllm_ascend-0.13.0rc2/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_unquantized.h +0 -224
  623. vllm_ascend-0.13.0rc2/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.h +0 -61
  624. vllm_ascend-0.13.0rc2/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp +0 -1845
  625. vllm_ascend-0.13.0rc2/csrc/torch_binding.cpp +0 -1483
  626. vllm_ascend-0.13.0rc2/csrc/utils/inc/tiling/tiling_type.h +0 -136
  627. vllm_ascend-0.13.0rc2/docs/README.md +0 -24
  628. vllm_ascend-0.13.0rc2/docs/requirements-test.txt +0 -2
  629. vllm_ascend-0.13.0rc2/docs/source/_templates/sections/header.html +0 -58
  630. vllm_ascend-0.13.0rc2/docs/source/assets/cp/blocktable.png +0 -0
  631. vllm_ascend-0.13.0rc2/docs/source/assets/cp/pcp-prefill.png +0 -0
  632. vllm_ascend-0.13.0rc2/docs/source/community/contributors.md +0 -291
  633. vllm_ascend-0.13.0rc2/docs/source/community/governance.md +0 -48
  634. vllm_ascend-0.13.0rc2/docs/source/community/versioning_policy.md +0 -160
  635. vllm_ascend-0.13.0rc2/docs/source/conf.py +0 -145
  636. vllm_ascend-0.13.0rc2/docs/source/developer_guide/contribution/index.md +0 -112
  637. vllm_ascend-0.13.0rc2/docs/source/developer_guide/contribution/multi_node_test.md +0 -349
  638. vllm_ascend-0.13.0rc2/docs/source/developer_guide/contribution/testing.md +0 -288
  639. vllm_ascend-0.13.0rc2/docs/source/developer_guide/evaluation/using_ais_bench.md +0 -324
  640. vllm_ascend-0.13.0rc2/docs/source/developer_guide/evaluation/using_evalscope.md +0 -176
  641. vllm_ascend-0.13.0rc2/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -302
  642. vllm_ascend-0.13.0rc2/docs/source/developer_guide/evaluation/using_opencompass.md +0 -124
  643. vllm_ascend-0.13.0rc2/docs/source/developer_guide/feature_guide/ACL_Graph.md +0 -102
  644. vllm_ascend-0.13.0rc2/docs/source/developer_guide/feature_guide/KV_Cache_Pool_Guide.md +0 -83
  645. vllm_ascend-0.13.0rc2/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -259
  646. vllm_ascend-0.13.0rc2/docs/source/developer_guide/feature_guide/context_parallel.md +0 -119
  647. vllm_ascend-0.13.0rc2/docs/source/developer_guide/feature_guide/disaggregated_prefill.md +0 -103
  648. vllm_ascend-0.13.0rc2/docs/source/developer_guide/feature_guide/eplb_swift_balancer.md +0 -222
  649. vllm_ascend-0.13.0rc2/docs/source/developer_guide/feature_guide/patch.md +0 -75
  650. vllm_ascend-0.13.0rc2/docs/source/developer_guide/feature_guide/quantization.md +0 -111
  651. vllm_ascend-0.13.0rc2/docs/source/developer_guide/performance_and_debug/msprobe_guide.md +0 -516
  652. vllm_ascend-0.13.0rc2/docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md +0 -268
  653. vllm_ascend-0.13.0rc2/docs/source/developer_guide/performance_and_debug/performance_benchmark.md +0 -239
  654. vllm_ascend-0.13.0rc2/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md +0 -40
  655. vllm_ascend-0.13.0rc2/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md +0 -250
  656. vllm_ascend-0.13.0rc2/docs/source/faqs.md +0 -250
  657. vllm_ascend-0.13.0rc2/docs/source/installation.md +0 -494
  658. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -5508
  659. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -218
  660. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -98
  661. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -97
  662. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -926
  663. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -182
  664. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/multi_node_test.po +0 -219
  665. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -225
  666. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po +0 -239
  667. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -107
  668. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -118
  669. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -80
  670. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/ACL_Graph.po +0 -266
  671. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/KV_Cache_Pool_Guide.po +0 -300
  672. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/ModelRunner_prepare_inputs.po +0 -625
  673. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/add_custom_aclnn_op.po +0 -85
  674. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/context_parallel.po +0 -369
  675. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/disaggregated_prefill.po +0 -347
  676. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/eplb_swift_balancer.po +0 -457
  677. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -224
  678. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/quantization.po +0 -360
  679. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -331
  680. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/index.po +0 -26
  681. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po +0 -588
  682. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/optimization_and_tuning.po +0 -349
  683. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po +0 -312
  684. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/profile_execute_duration.po +0 -78
  685. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po +0 -612
  686. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -654
  687. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -75
  688. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -453
  689. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -160
  690. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/310p.po +0 -125
  691. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-R1.po +0 -370
  692. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-V3.1.po +0 -612
  693. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-V3.2.po +0 -395
  694. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/GLM4.x.po +0 -325
  695. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Kimi-K2-Thinking.po +0 -65
  696. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/PaddleOCR-VL.po +0 -218
  697. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen-VL-Dense.po +0 -363
  698. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen2.5-7B.po +0 -279
  699. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen2.5-Omni.po +0 -302
  700. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-235B-A22B.po +0 -739
  701. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-30B-A3B.po +0 -67
  702. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-32B-W4A4.po +0 -91
  703. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-8B-W4A8.po +0 -73
  704. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Coder-30B-A3B.po +0 -216
  705. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Dense.po +0 -908
  706. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Next.po +0 -305
  707. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Omni-30B-A3B-Thinking.po +0 -248
  708. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-VL-235B-A22B-Instruct.po +0 -475
  709. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3_embedding.po +0 -164
  710. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3_reranker.po +0 -171
  711. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/long_sequence_context_parallel_multi_node.po +0 -447
  712. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/long_sequence_context_parallel_single_node.po +0 -387
  713. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_colocated_mooncake_multi_instance.po +0 -518
  714. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_disaggregation_mooncake_multi_node.po +0 -406
  715. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_disaggregation_mooncake_single_node.po +0 -214
  716. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/ray.po +0 -235
  717. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -464
  718. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/index.po +0 -25
  719. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/using_volcano_kthena.po +0 -290
  720. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po +0 -327
  721. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Multi_Token_Prediction.po +0 -260
  722. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/context_parallel.po +0 -304
  723. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_batch.po +0 -101
  724. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/eplb_swift_balancer.po +0 -268
  725. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po +0 -175
  726. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -143
  727. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po +0 -290
  728. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po +0 -496
  729. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/layer_sharding.po +0 -181
  730. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -106
  731. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/netloader.po +0 -359
  732. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -193
  733. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -148
  734. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/speculative_decoding.po +0 -176
  735. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -93
  736. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/ucm_deployment.po +0 -240
  737. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -5278
  738. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -31
  739. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -244
  740. vllm_ascend-0.13.0rc2/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -537
  741. vllm_ascend-0.13.0rc2/docs/source/quick_start.md +0 -195
  742. vllm_ascend-0.13.0rc2/docs/source/tutorials/310p.md +0 -409
  743. vllm_ascend-0.13.0rc2/docs/source/tutorials/DeepSeek-R1.md +0 -309
  744. vllm_ascend-0.13.0rc2/docs/source/tutorials/DeepSeek-V3.1.md +0 -724
  745. vllm_ascend-0.13.0rc2/docs/source/tutorials/DeepSeek-V3.2.md +0 -656
  746. vllm_ascend-0.13.0rc2/docs/source/tutorials/GLM4.x.md +0 -173
  747. vllm_ascend-0.13.0rc2/docs/source/tutorials/Kimi-K2-Thinking.md +0 -107
  748. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen-VL-Dense.md +0 -577
  749. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen2.5-7B.md +0 -178
  750. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen2.5-Omni.md +0 -209
  751. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3-235B-A22B.md +0 -622
  752. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3-30B-A3B.md +0 -113
  753. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3-32B-W4A4.md +0 -143
  754. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3-8B-W4A8.md +0 -138
  755. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3-Coder-30B-A3B.md +0 -105
  756. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3-Dense.md +0 -378
  757. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3-Next.md +0 -181
  758. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3-Omni-30B-A3B-Thinking.md +0 -311
  759. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3-VL-235B-A22B-Instruct.md +0 -273
  760. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3_embedding.md +0 -117
  761. vllm_ascend-0.13.0rc2/docs/source/tutorials/Qwen3_reranker.md +0 -188
  762. vllm_ascend-0.13.0rc2/docs/source/tutorials/index.md +0 -33
  763. vllm_ascend-0.13.0rc2/docs/source/tutorials/long_sequence_context_parallel_multi_node.md +0 -372
  764. vllm_ascend-0.13.0rc2/docs/source/tutorials/long_sequence_context_parallel_single_node.md +0 -174
  765. vllm_ascend-0.13.0rc2/docs/source/tutorials/pd_disaggregation_mooncake_multi_node.md +0 -947
  766. vllm_ascend-0.13.0rc2/docs/source/tutorials/pd_disaggregation_mooncake_single_node.md +0 -278
  767. vllm_ascend-0.13.0rc2/docs/source/tutorials/ray.md +0 -187
  768. vllm_ascend-0.13.0rc2/docs/source/user_guide/configuration/additional_config.md +0 -119
  769. vllm_ascend-0.13.0rc2/docs/source/user_guide/deployment_guide/using_volcano_kthena.md +0 -433
  770. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/Fine_grained_TP.md +0 -103
  771. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/Multi_Token_Prediction.md +0 -109
  772. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/context_parallel.md +0 -88
  773. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/dynamic_batch.md +0 -51
  774. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +0 -98
  775. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/external_dp.md +0 -86
  776. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/graph_mode.md +0 -82
  777. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/images/layer_sharding.png +0 -0
  778. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/kv_pool.md +0 -362
  779. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/large_scale_ep.md +0 -504
  780. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/layer_sharding.md +0 -71
  781. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/lora.md +0 -26
  782. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/quantization.md +0 -148
  783. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/sleep_mode.md +0 -116
  784. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/speculative_decoding.md +0 -146
  785. vllm_ascend-0.13.0rc2/docs/source/user_guide/feature_guide/ucm_deployment.md +0 -141
  786. vllm_ascend-0.13.0rc2/docs/source/user_guide/release_notes.md +0 -946
  787. vllm_ascend-0.13.0rc2/docs/source/user_guide/support_matrix/supported_features.md +0 -48
  788. vllm_ascend-0.13.0rc2/docs/source/user_guide/support_matrix/supported_models.md +0 -86
  789. vllm_ascend-0.13.0rc2/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +0 -585
  790. vllm_ascend-0.13.0rc2/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +0 -897
  791. vllm_ascend-0.13.0rc2/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -165
  792. vllm_ascend-0.13.0rc2/examples/eplb/eplb_deepseek.py +0 -205
  793. vllm_ascend-0.13.0rc2/examples/eplb/eplb_strategy.py +0 -183
  794. vllm_ascend-0.13.0rc2/examples/external_online_dp/README.md +0 -54
  795. vllm_ascend-0.13.0rc2/examples/external_online_dp/dp_load_balance_proxy_server.py +0 -404
  796. vllm_ascend-0.13.0rc2/examples/external_online_dp/launch_online_dp.py +0 -98
  797. vllm_ascend-0.13.0rc2/examples/offline_data_parallel.py +0 -266
  798. vllm_ascend-0.13.0rc2/examples/offline_disaggregated_prefill_npu.py +0 -168
  799. vllm_ascend-0.13.0rc2/examples/offline_embed.py +0 -58
  800. vllm_ascend-0.13.0rc2/examples/offline_external_launcher.py +0 -331
  801. vllm_ascend-0.13.0rc2/examples/offline_inference_audio_language.py +0 -106
  802. vllm_ascend-0.13.0rc2/examples/offline_inference_npu_long_seq.py +0 -59
  803. vllm_ascend-0.13.0rc2/examples/offline_inference_npu_tp2.py +0 -55
  804. vllm_ascend-0.13.0rc2/examples/offline_inference_sleep_mode_npu.py +0 -57
  805. vllm_ascend-0.13.0rc2/examples/offline_weight_load.py +0 -334
  806. vllm_ascend-0.13.0rc2/examples/prompt_embed_inference.py +0 -97
  807. vllm_ascend-0.13.0rc2/examples/prompt_embedding_inference.py +0 -88
  808. vllm_ascend-0.13.0rc2/examples/quantization/llm-compressor/w8a8_int8.py +0 -162
  809. vllm_ascend-0.13.0rc2/examples/quantization/llm-compressor/w8a8_int8_dynamic.py +0 -82
  810. vllm_ascend-0.13.0rc2/mypy.ini +0 -34
  811. vllm_ascend-0.13.0rc2/pyproject.toml +0 -45
  812. vllm_ascend-0.13.0rc2/requirements-dev.txt +0 -25
  813. vllm_ascend-0.13.0rc2/requirements.txt +0 -38
  814. vllm_ascend-0.13.0rc2/setup.py +0 -532
  815. vllm_ascend-0.13.0rc2/tests/e2e/310p/test_offline_inference_310p.py +0 -72
  816. vllm_ascend-0.13.0rc2/tests/e2e/conftest.py +0 -794
  817. vllm_ascend-0.13.0rc2/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +0 -12
  818. vllm_ascend-0.13.0rc2/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py +0 -154
  819. vllm_ascend-0.13.0rc2/tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py +0 -240
  820. vllm_ascend-0.13.0rc2/tests/e2e/multicard/2-cards/test_external_launcher.py +0 -240
  821. vllm_ascend-0.13.0rc2/tests/e2e/multicard/2-cards/test_offline_inference_distributed.py +0 -237
  822. vllm_ascend-0.13.0rc2/tests/e2e/multicard/2-cards/test_offline_weight_load.py +0 -75
  823. vllm_ascend-0.13.0rc2/tests/e2e/multicard/2-cards/test_quantization.py +0 -44
  824. vllm_ascend-0.13.0rc2/tests/e2e/multicard/2-cards/test_qwen3_moe.py +0 -128
  825. vllm_ascend-0.13.0rc2/tests/e2e/multicard/4-cards/long_sequence/test_basic.py +0 -248
  826. vllm_ascend-0.13.0rc2/tests/e2e/multicard/4-cards/long_sequence/test_mtp.py +0 -153
  827. vllm_ascend-0.13.0rc2/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py +0 -155
  828. vllm_ascend-0.13.0rc2/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py +0 -49
  829. vllm_ascend-0.13.0rc2/tests/e2e/multicard/4-cards/test_kimi_k2.py +0 -44
  830. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-A2.yaml +0 -57
  831. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-EPLB.yaml +0 -195
  832. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml +0 -109
  833. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8.yaml +0 -194
  834. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/DeepSeek-V3.yaml +0 -111
  835. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-Exp-bf16.yaml +0 -51
  836. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml +0 -86
  837. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/Qwen3-235B-A22B-A2.yaml +0 -72
  838. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/Qwen3-235B-A22B.yaml +0 -70
  839. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8-EPLB.yaml +0 -91
  840. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8-longseq.yaml +0 -98
  841. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8.yaml +0 -87
  842. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/Qwen3-235B-disagg-pd.yaml +0 -121
  843. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/Qwen3-VL-235B-disagg-pd.yaml +0 -108
  844. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 +0 -140
  845. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/scripts/run.sh +0 -170
  846. vllm_ascend-0.13.0rc2/tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8.py +0 -117
  847. vllm_ascend-0.13.0rc2/tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8_eplb.py +0 -115
  848. vllm_ascend-0.13.0rc2/tests/e2e/nightly/single_node/models/test_deepseek_v3_2_exp_w8a8.py +0 -105
  849. vllm_ascend-0.13.0rc2/tests/e2e/nightly/single_node/models/test_qwen3_235b_a22b_w8a8_eplb.py +0 -105
  850. vllm_ascend-0.13.0rc2/tests/e2e/nightly/single_node/models/test_qwen3_8b.py +0 -99
  851. vllm_ascend-0.13.0rc2/tests/e2e/nightly/single_node/models/test_qwen3_next.py +0 -113
  852. vllm_ascend-0.13.0rc2/tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py +0 -469
  853. vllm_ascend-0.13.0rc2/tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py +0 -148
  854. vllm_ascend-0.13.0rc2/tests/e2e/singlecard/compile/test_norm_quant_fusion.py +0 -362
  855. vllm_ascend-0.13.0rc2/tests/e2e/singlecard/model_runner_v2/test_basic.py +0 -51
  856. vllm_ascend-0.13.0rc2/tests/e2e/singlecard/pooling/test_scoring.py +0 -187
  857. vllm_ascend-0.13.0rc2/tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py +0 -210
  858. vllm_ascend-0.13.0rc2/tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py +0 -379
  859. vllm_ascend-0.13.0rc2/tests/e2e/singlecard/test_aclgraph_accuracy.py +0 -136
  860. vllm_ascend-0.13.0rc2/tests/e2e/singlecard/test_xlite.py +0 -107
  861. vllm_ascend-0.13.0rc2/tests/e2e/vllm_interface/vllm_test.cfg +0 -2
  862. vllm_ascend-0.13.0rc2/tests/ut/attention/test_attention_cp.py +0 -672
  863. vllm_ascend-0.13.0rc2/tests/ut/attention/test_attention_v1.py +0 -322
  864. vllm_ascend-0.13.0rc2/tests/ut/attention/test_mla_cp.py +0 -1003
  865. vllm_ascend-0.13.0rc2/tests/ut/attention/test_mla_v1.py +0 -1051
  866. vllm_ascend-0.13.0rc2/tests/ut/attention/test_sfa_v1.py +0 -207
  867. vllm_ascend-0.13.0rc2/tests/ut/compilation/test_acl_graph.py +0 -857
  868. vllm_ascend-0.13.0rc2/tests/ut/compilation/test_add_rms_norm_quant.py +0 -148
  869. vllm_ascend-0.13.0rc2/tests/ut/distributed/mooncake/test_config_data.py +0 -78
  870. vllm_ascend-0.13.0rc2/tests/ut/distributed/test_communicator.py +0 -89
  871. vllm_ascend-0.13.0rc2/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +0 -98
  872. vllm_ascend-0.13.0rc2/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +0 -99
  873. vllm_ascend-0.13.0rc2/tests/ut/eplb/core/policy/test_policy_factor.py +0 -23
  874. vllm_ascend-0.13.0rc2/tests/ut/eplb/core/test_eplb_utils.py +0 -211
  875. vllm_ascend-0.13.0rc2/tests/ut/kv_connector/test_mooncake_connector.py +0 -1345
  876. vllm_ascend-0.13.0rc2/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +0 -930
  877. vllm_ascend-0.13.0rc2/tests/ut/kv_connector/utils.py +0 -210
  878. vllm_ascend-0.13.0rc2/tests/ut/ops/test_activation.py +0 -76
  879. vllm_ascend-0.13.0rc2/tests/ut/ops/test_fused_moe.py +0 -593
  880. vllm_ascend-0.13.0rc2/tests/ut/ops/test_layernorm.py +0 -75
  881. vllm_ascend-0.13.0rc2/tests/ut/ops/test_rotary_embedding.py +0 -453
  882. vllm_ascend-0.13.0rc2/tests/ut/ops/test_token_dispatcher.py +0 -456
  883. vllm_ascend-0.13.0rc2/tests/ut/ops/test_vocab_parallel_embedding.py +0 -260
  884. vllm_ascend-0.13.0rc2/tests/ut/quantization/test_quant_config.py +0 -158
  885. vllm_ascend-0.13.0rc2/tests/ut/quantization/test_utils.py +0 -50
  886. vllm_ascend-0.13.0rc2/tests/ut/quantization/test_w4a16.py +0 -269
  887. vllm_ascend-0.13.0rc2/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +0 -216
  888. vllm_ascend-0.13.0rc2/tests/ut/quantization/test_w4a8_dynamic.py +0 -306
  889. vllm_ascend-0.13.0rc2/tests/ut/quantization/test_w8a16.py +0 -91
  890. vllm_ascend-0.13.0rc2/tests/ut/quantization/test_w8a8.py +0 -218
  891. vllm_ascend-0.13.0rc2/tests/ut/quantization/test_w8a8_dynamic.py +0 -106
  892. vllm_ascend-0.13.0rc2/tests/ut/spec_decode/test_eagle_proposer.py +0 -440
  893. vllm_ascend-0.13.0rc2/tests/ut/spec_decode/test_mtp_proposer.py +0 -344
  894. vllm_ascend-0.13.0rc2/tests/ut/test_ascend_config.py +0 -109
  895. vllm_ascend-0.13.0rc2/tests/ut/test_platform.py +0 -492
  896. vllm_ascend-0.13.0rc2/tests/ut/worker/test_worker_v1.py +0 -1166
  897. vllm_ascend-0.13.0rc2/tools/aisbench.py +0 -330
  898. vllm_ascend-0.13.0rc2/tools/check_python_src_init.py +0 -76
  899. vllm_ascend-0.13.0rc2/tools/enforce_regex_import.py +0 -104
  900. vllm_ascend-0.13.0rc2/tools/format_contributors.py +0 -98
  901. vllm_ascend-0.13.0rc2/tools/mypy.sh +0 -40
  902. vllm_ascend-0.13.0rc2/tools/send_mm_request.py +0 -49
  903. vllm_ascend-0.13.0rc2/tools/send_request.py +0 -37
  904. vllm_ascend-0.13.0rc2/tools/vllm_bench.py +0 -154
  905. vllm_ascend-0.13.0rc2/typos.toml +0 -177
  906. vllm_ascend-0.13.0rc2/vllm_ascend/__init__.py +0 -37
  907. vllm_ascend-0.13.0rc2/vllm_ascend/_version.py +0 -34
  908. vllm_ascend-0.13.0rc2/vllm_ascend/ascend_config.py +0 -320
  909. vllm_ascend-0.13.0rc2/vllm_ascend/ascend_forward_context.py +0 -275
  910. vllm_ascend-0.13.0rc2/vllm_ascend/attention/attention_mask.py +0 -102
  911. vllm_ascend-0.13.0rc2/vllm_ascend/attention/attention_v1.py +0 -776
  912. vllm_ascend-0.13.0rc2/vllm_ascend/attention/context_parallel/attention_cp.py +0 -899
  913. vllm_ascend-0.13.0rc2/vllm_ascend/attention/context_parallel/common_cp.py +0 -132
  914. vllm_ascend-0.13.0rc2/vllm_ascend/attention/context_parallel/mla_cp.py +0 -779
  915. vllm_ascend-0.13.0rc2/vllm_ascend/attention/mla_v1.py +0 -1544
  916. vllm_ascend-0.13.0rc2/vllm_ascend/attention/sfa_v1.py +0 -1110
  917. vllm_ascend-0.13.0rc2/vllm_ascend/attention/utils.py +0 -271
  918. vllm_ascend-0.13.0rc2/vllm_ascend/batch_invariant.py +0 -82
  919. vllm_ascend-0.13.0rc2/vllm_ascend/compilation/acl_graph.py +0 -609
  920. vllm_ascend-0.13.0rc2/vllm_ascend/compilation/compiler_interface.py +0 -139
  921. vllm_ascend-0.13.0rc2/vllm_ascend/compilation/graph_fusion_pass_manager.py +0 -60
  922. vllm_ascend-0.13.0rc2/vllm_ascend/compilation/npugraph_ex_passes/add_rms_norm_quant.py +0 -301
  923. vllm_ascend-0.13.0rc2/vllm_ascend/compilation/passes/norm_quant_fusion_pass.py +0 -316
  924. vllm_ascend-0.13.0rc2/vllm_ascend/compilation/passes/qknorm_rope_fusion_pass.py +0 -291
  925. vllm_ascend-0.13.0rc2/vllm_ascend/core/multi_block_pool.py +0 -184
  926. vllm_ascend-0.13.0rc2/vllm_ascend/core/recompute_scheduler.py +0 -883
  927. vllm_ascend-0.13.0rc2/vllm_ascend/core/scheduler_dynamic_batch.py +0 -597
  928. vllm_ascend-0.13.0rc2/vllm_ascend/cpu_binding.py +0 -330
  929. vllm_ascend-0.13.0rc2/vllm_ascend/device_allocator/camem.py +0 -275
  930. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/__init__.py +0 -44
  931. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/communicator.py +0 -75
  932. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/cpu_offload_connector.py +0 -528
  933. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +0 -203
  934. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/cpu_offload_manager/metadata.py +0 -272
  935. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -165
  936. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -253
  937. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/kvpool/ascend_store_connector.py +0 -183
  938. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/kvpool/backend/__init__.py +0 -1
  939. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/kvpool/backend/backend.py +0 -29
  940. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/kvpool/backend/memcache_backend.py +0 -95
  941. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/kvpool/backend/mooncake_backend.py +0 -190
  942. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/kvpool/config_data.py +0 -405
  943. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/kvpool/kv_transfer.py +0 -366
  944. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/kvpool/pool_scheduler.py +0 -391
  945. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/kvpool/pool_worker.py +0 -626
  946. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/mooncake_connector.py +0 -1849
  947. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/mooncake_layerwise_connector.py +0 -1353
  948. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/mooncake_transfer_engine.py +0 -53
  949. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/ucm_connector.py +0 -254
  950. vllm_ascend-0.13.0rc2/vllm_ascend/distributed/utils.py +0 -111
  951. vllm_ascend-0.13.0rc2/vllm_ascend/envs.py +0 -148
  952. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/adaptor/abstract_adaptor.py +0 -40
  953. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/adaptor/vllm_adaptor.py +0 -184
  954. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +0 -133
  955. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core/eplb_utils.py +0 -191
  956. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core/eplb_worker.py +0 -440
  957. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core/policy/policy_abstract.py +0 -42
  958. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +0 -389
  959. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +0 -768
  960. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core/policy/policy_factory.py +0 -33
  961. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core/policy/policy_flashlb.py +0 -648
  962. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core/policy/policy_random.py +0 -30
  963. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/eplb_updator.py +0 -233
  964. vllm_ascend-0.13.0rc2/vllm_ascend/eplb/utils.py +0 -77
  965. vllm_ascend-0.13.0rc2/vllm_ascend/flash_common3_context.py +0 -42
  966. vllm_ascend-0.13.0rc2/vllm_ascend/kv_offload/cpu_npu.py +0 -168
  967. vllm_ascend-0.13.0rc2/vllm_ascend/kv_offload/npu.py +0 -64
  968. vllm_ascend-0.13.0rc2/vllm_ascend/lora/lora_ops.py +0 -113
  969. vllm_ascend-0.13.0rc2/vllm_ascend/lora/punica_npu.py +0 -362
  970. vllm_ascend-0.13.0rc2/vllm_ascend/lora/utils.py +0 -110
  971. vllm_ascend-0.13.0rc2/vllm_ascend/meta_registration.py +0 -105
  972. vllm_ascend-0.13.0rc2/vllm_ascend/model_loader/netloader/executor/elastic_load.py +0 -170
  973. vllm_ascend-0.13.0rc2/vllm_ascend/model_loader/netloader/executor/netloader_pg.py +0 -188
  974. vllm_ascend-0.13.0rc2/vllm_ascend/model_loader/netloader/interaction/elastic.py +0 -408
  975. vllm_ascend-0.13.0rc2/vllm_ascend/model_loader/netloader/load.py +0 -84
  976. vllm_ascend-0.13.0rc2/vllm_ascend/model_loader/netloader/netloader.py +0 -326
  977. vllm_ascend-0.13.0rc2/vllm_ascend/model_loader/netloader/utils.py +0 -66
  978. vllm_ascend-0.13.0rc2/vllm_ascend/ops/activation.py +0 -44
  979. vllm_ascend-0.13.0rc2/vllm_ascend/ops/fused_moe/experts_selector.py +0 -354
  980. vllm_ascend-0.13.0rc2/vllm_ascend/ops/fused_moe/fused_moe.py +0 -603
  981. vllm_ascend-0.13.0rc2/vllm_ascend/ops/fused_moe/moe_comm_method.py +0 -345
  982. vllm_ascend-0.13.0rc2/vllm_ascend/ops/layernorm.py +0 -107
  983. vllm_ascend-0.13.0rc2/vllm_ascend/ops/linear_op.py +0 -814
  984. vllm_ascend-0.13.0rc2/vllm_ascend/ops/mla.py +0 -185
  985. vllm_ascend-0.13.0rc2/vllm_ascend/ops/mm_encoder_attention.py +0 -146
  986. vllm_ascend-0.13.0rc2/vllm_ascend/ops/register_custom_ops.py +0 -379
  987. vllm_ascend-0.13.0rc2/vllm_ascend/ops/rotary_embedding.py +0 -655
  988. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/activation/swiglu_quant.py +0 -117
  989. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/batch_invariant/matmul.py +0 -403
  990. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/batch_invariant/mean.py +0 -177
  991. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/batch_invariant/rmsnorm.py +0 -153
  992. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/chunk.py +0 -226
  993. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/chunk_delta_h.py +0 -259
  994. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/chunk_o.py +0 -168
  995. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/chunk_scaled_dot_kkt.py +0 -147
  996. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/cumsum.py +0 -145
  997. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py +0 -115
  998. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/l2norm.py +0 -70
  999. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/layernorm_guard.py +0 -201
  1000. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/sigmoid_gating.py +0 -395
  1001. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/solve_tril.py +0 -419
  1002. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/utils.py +0 -79
  1003. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla/wy_fast.py +0 -131
  1004. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fused_gdn_gating.py +0 -118
  1005. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/linearnorm/split_qkv_rmsnorm_rope.py +0 -305
  1006. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/mamba/causal_conv1d.py +0 -721
  1007. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/reject_sample.py +0 -461
  1008. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/rope.py +0 -207
  1009. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/spec_decode/utils.py +0 -68
  1010. vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/triton_utils.py +0 -30
  1011. vllm_ascend-0.13.0rc2/vllm_ascend/patch/__init__.py +0 -333
  1012. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/__init__.py +0 -48
  1013. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_balance_schedule.py +0 -622
  1014. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_core.py +0 -78
  1015. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_distributed.py +0 -93
  1016. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_ec_connector.py +0 -31
  1017. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_kv_cache_coordinator.py +0 -142
  1018. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_kv_cache_utils.py +0 -193
  1019. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_lora_model_manager.py +0 -83
  1020. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_mamba_config.py +0 -97
  1021. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_message_queue.py +0 -78
  1022. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_multiproc_executor.py +0 -182
  1023. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_sched_yield.py +0 -13
  1024. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_set_cudagraph_sizes.py +0 -144
  1025. vllm_ascend-0.13.0rc2/vllm_ascend/patch/platform/patch_vllm_config.py +0 -436
  1026. vllm_ascend-0.13.0rc2/vllm_ascend/patch/worker/__init__.py +0 -41
  1027. vllm_ascend-0.13.0rc2/vllm_ascend/patch/worker/patch_deepseekv3.py +0 -484
  1028. vllm_ascend-0.13.0rc2/vllm_ascend/patch/worker/patch_distributed.py +0 -115
  1029. vllm_ascend-0.13.0rc2/vllm_ascend/patch/worker/patch_model_runner.py +0 -298
  1030. vllm_ascend-0.13.0rc2/vllm_ascend/patch/worker/patch_qwen3_next.py +0 -343
  1031. vllm_ascend-0.13.0rc2/vllm_ascend/patch/worker/patch_qwen3vl.py +0 -41
  1032. vllm_ascend-0.13.0rc2/vllm_ascend/patch/worker/patch_triton.py +0 -14
  1033. vllm_ascend-0.13.0rc2/vllm_ascend/platform.py +0 -598
  1034. vllm_ascend-0.13.0rc2/vllm_ascend/profiling_config.py +0 -202
  1035. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/compressed_tensors/compressed_tensors.py +0 -279
  1036. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/quant_config.py +0 -600
  1037. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/utils.py +0 -125
  1038. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/w4a16.py +0 -278
  1039. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +0 -191
  1040. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/w4a8_dynamic.py +0 -482
  1041. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/w8a16.py +0 -89
  1042. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/w8a8.py +0 -193
  1043. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/w8a8_dynamic.py +0 -353
  1044. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/w8a8_pdmix.py +0 -70
  1045. vllm_ascend-0.13.0rc2/vllm_ascend/quantization/w8a8mxfp8.py +0 -98
  1046. vllm_ascend-0.13.0rc2/vllm_ascend/sample/sampler.py +0 -126
  1047. vllm_ascend-0.13.0rc2/vllm_ascend/spec_decode/__init__.py +0 -36
  1048. vllm_ascend-0.13.0rc2/vllm_ascend/spec_decode/eagle_proposer.py +0 -1124
  1049. vllm_ascend-0.13.0rc2/vllm_ascend/spec_decode/interface.py +0 -53
  1050. vllm_ascend-0.13.0rc2/vllm_ascend/spec_decode/mtp_proposer.py +0 -567
  1051. vllm_ascend-0.13.0rc2/vllm_ascend/utils.py +0 -1223
  1052. vllm_ascend-0.13.0rc2/vllm_ascend/worker/model_runner_v1.py +0 -3146
  1053. vllm_ascend-0.13.0rc2/vllm_ascend/worker/pcp_utils.py +0 -840
  1054. vllm_ascend-0.13.0rc2/vllm_ascend/worker/v2/attn_utils.py +0 -171
  1055. vllm_ascend-0.13.0rc2/vllm_ascend/worker/v2/model_runner.py +0 -369
  1056. vllm_ascend-0.13.0rc2/vllm_ascend/worker/v2/sample/penalties.py +0 -137
  1057. vllm_ascend-0.13.0rc2/vllm_ascend/worker/worker.py +0 -537
  1058. vllm_ascend-0.13.0rc2/vllm_ascend/xlite/xlite.py +0 -298
  1059. vllm_ascend-0.13.0rc2/vllm_ascend.egg-info/PKG-INFO +0 -150
  1060. vllm_ascend-0.13.0rc2/vllm_ascend.egg-info/SOURCES.txt +0 -1115
  1061. vllm_ascend-0.13.0rc2/vllm_ascend.egg-info/requires.txt +0 -26
  1062. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.gemini/config.yaml +0 -0
  1063. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/Dockerfile.nightly.a2 +0 -0
  1064. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/Dockerfile.nightly.a3 +0 -0
  1065. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  1066. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  1067. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  1068. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  1069. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  1070. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  1071. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  1072. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  1073. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +0 -0
  1074. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  1075. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  1076. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/dependabot.yml +0 -0
  1077. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/labeler.yml +0 -0
  1078. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/workflows/bot_merge_conflict.yaml +0 -0
  1079. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/workflows/matchers/actionlint.json +0 -0
  1080. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/workflows/matchers/mypy.json +0 -0
  1081. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.github/workflows/schedule_nightly_image_build.yaml +0 -0
  1082. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.gitignore +0 -0
  1083. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.gitmodules +0 -0
  1084. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/.readthedocs.yaml +0 -0
  1085. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/CODE_OF_CONDUCT.md +0 -0
  1086. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/CONTRIBUTING.md +0 -0
  1087. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/DCO +0 -0
  1088. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/LICENSE +0 -0
  1089. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/benchmarks/requirements-bench.txt +0 -0
  1090. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
  1091. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/benchmarks/tests/latency-tests.json +0 -0
  1092. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/benchmarks/tests/serving-tests.json +0 -0
  1093. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/benchmarks/tests/throughput-tests.json +0 -0
  1094. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/codecov.yml +0 -0
  1095. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/CMakeLists.txt +0 -0
  1096. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/aclnn_torch_adapter/NPUBridge.cpp +0 -0
  1097. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/aclnn_torch_adapter/NPUBridge.h +0 -0
  1098. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/aclnn_torch_adapter/NPUStorageImpl.cpp +0 -0
  1099. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/aclnn_torch_adapter/NPUStorageImpl.h +0 -0
  1100. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/aclnn_torch_adapter/op_api_common.h +0 -0
  1101. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_host/CMakeLists.txt +0 -0
  1102. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_def.cpp +0 -0
  1103. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_infershape.cpp +0 -0
  1104. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.h +0 -0
  1105. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_host/error_log.h +0 -0
  1106. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias.cpp +0 -0
  1107. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias.h +0 -0
  1108. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_merge_n.h +0 -0
  1109. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_multi_n.h +0 -0
  1110. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_single_n.h +0 -0
  1111. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_split_d.h +0 -0
  1112. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_kernel/reduce_common.h +0 -0
  1113. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/add_rms_norm_bias/op_kernel/rms_norm_base.h +0 -0
  1114. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/batch_matmul_transpose/op_host/batch_matmul_transpose.h +0 -0
  1115. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/batch_matmul_transpose/op_host/common.h +0 -0
  1116. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/batch_matmul_transpose/op_host/common_tiling.h +0 -0
  1117. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp +0 -0
  1118. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.h +0 -0
  1119. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp +0 -0
  1120. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/build.sh +0 -0
  1121. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/camem_allocator.cpp +0 -0
  1122. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/cmake/config.cmake +0 -0
  1123. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/cmake/func.cmake +0 -0
  1124. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/cmake/intf.cmake +0 -0
  1125. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/cmake/intf_pub.cmake +0 -0
  1126. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/cmake/modules/Findalog.cmake +0 -0
  1127. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/cmake/scripts/prepare.sh +0 -0
  1128. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_host/CMakeLists.txt +0 -0
  1129. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.cpp +0 -0
  1130. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.h +0 -0
  1131. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_def.cpp +0 -0
  1132. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_proto.cpp +0 -0
  1133. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_tiling.cpp +0 -0
  1134. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_host/error_log.h +0 -0
  1135. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_host/hcom_topo_info.h +0 -0
  1136. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_host/tiling_args.h +0 -0
  1137. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.cpp +0 -0
  1138. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.h +0 -0
  1139. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_kernel.hpp +0 -0
  1140. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_tiling.h +0 -0
  1141. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2.cpp +0 -0
  1142. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2_tiling.h +0 -0
  1143. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_v2_tiling.h +0 -0
  1144. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_common.h +0 -0
  1145. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_dynamic_quant.h +0 -0
  1146. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant.h +0 -0
  1147. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant_base.h +0 -0
  1148. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_dynamic_quant.h +0 -0
  1149. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_out.h +0 -0
  1150. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_quant.h +0 -0
  1151. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_init_routing_fullload.h +0 -0
  1152. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort.h +0 -0
  1153. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort_out.h +0 -0
  1154. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_base.h +0 -0
  1155. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_multi_core.h +0 -0
  1156. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_one_core.h +0 -0
  1157. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_and_gather.h +0 -0
  1158. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_op.h +0 -0
  1159. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_with_capacity.h +0 -0
  1160. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/tiling_base.h +0 -0
  1161. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute.h +0 -0
  1162. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute_tiling.h +0 -0
  1163. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_row.hpp +0 -0
  1164. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_swiglu.hpp +0 -0
  1165. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/block_mmad_preload_async_fixpipe_quant.hpp +0 -0
  1166. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/const_args.hpp +0 -0
  1167. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/copy_l0c_to_gm_custom.hpp +0 -0
  1168. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/dispatch_policy_custom.hpp +0 -0
  1169. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/get_tensor_addr.hpp +0 -0
  1170. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/hccl_shmem.hpp +0 -0
  1171. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/layout3d.hpp +0 -0
  1172. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_ffn_combine/op_kernel/utils/select_helper.hpp +0 -0
  1173. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_host/CMakeLists.txt +0 -0
  1174. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.cpp +0 -0
  1175. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.h +0 -0
  1176. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_proto.cpp +0 -0
  1177. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_tiling.cpp +0 -0
  1178. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue.h +0 -0
  1179. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/dispatch_policy.h +0 -0
  1180. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_binary.h +0 -0
  1181. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_muls.h +0 -0
  1182. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad.h +0 -0
  1183. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad_preload_async_with_callback_resident_a.h +0 -0
  1184. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/dispatch_policy.h +0 -0
  1185. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_combine.h +0 -0
  1186. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_dispatch.h +0 -0
  1187. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_tiling.h +0 -0
  1188. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_layout/op_host/CMakeLists.txt +0 -0
  1189. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.cpp +0 -0
  1190. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_layout/op_host/dispatch_layout.cpp +0 -0
  1191. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_layout/op_host/dispatch_layout_tiling.cpp +0 -0
  1192. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_layout/op_kernel/dispatch_layout.cpp +0 -0
  1193. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_layout/op_kernel/dispatch_layout.h +0 -0
  1194. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/dispatch_layout/op_kernel/dispatch_layout_tiling.h +0 -0
  1195. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/CMakeLists.txt +0 -0
  1196. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
  1197. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
  1198. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
  1199. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
  1200. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_def.cpp +0 -0
  1201. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_proto.cpp +0 -0
  1202. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.cpp +0 -0
  1203. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.h +0 -0
  1204. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
  1205. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_split_ws.h +0 -0
  1206. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h +0 -0
  1207. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/kernels/bgmv_expand.cpp +0 -0
  1208. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/kernels/bgmv_shrink.cpp +0 -0
  1209. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  1210. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/kernels/math_utils.h +0 -0
  1211. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
  1212. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/kernels/sgmv_expand.cpp +0 -0
  1213. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/kernels/sgmv_shrink.cpp +0 -0
  1214. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/kernels/types.h +0 -0
  1215. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/kernels/utils.h +0 -0
  1216. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_host/CMakeLists.txt +0 -0
  1217. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_host/lightning_indexer_def.cpp +0 -0
  1218. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_host/lightning_indexer_proto.cpp +0 -0
  1219. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_host/lightning_indexer_tiling.h +0 -0
  1220. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer.cpp +0 -0
  1221. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_common.h +0 -0
  1222. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_kernel.h +0 -0
  1223. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_service_cube.h +0 -0
  1224. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_service_vector.h +0 -0
  1225. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_template_tiling_key.h +0 -0
  1226. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/lightning_indexer/op_kernel/lightning_indexer_vector.h +0 -0
  1227. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/CMakeLists.txt +0 -0
  1228. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.cpp +0 -0
  1229. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.h +0 -0
  1230. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_def.cpp +0 -0
  1231. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_proto.cpp +0 -0
  1232. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_tiling.cpp +0 -0
  1233. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_workspace.h +0 -0
  1234. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm.cpp +0 -0
  1235. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aic_kernel.h +0 -0
  1236. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aiv_kernel.h +0 -0
  1237. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_tiling.h +0 -0
  1238. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_utils.h +0 -0
  1239. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_host/mla_preprocess.h +0 -0
  1240. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +0 -0
  1241. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/common.h +0 -0
  1242. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/common_func.h +0 -0
  1243. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/hardware.h +0 -0
  1244. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterator.h +0 -0
  1245. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +0 -0
  1246. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +0 -0
  1247. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +0 -0
  1248. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +0 -0
  1249. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +0 -0
  1250. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +0 -0
  1251. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +0 -0
  1252. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +0 -0
  1253. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +0 -0
  1254. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +0 -0
  1255. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/layout.h +0 -0
  1256. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/mem.h +0 -0
  1257. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/mma.h +0 -0
  1258. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +0 -0
  1259. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/simd.h +0 -0
  1260. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/kernel/utils.h +0 -0
  1261. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess.h +0 -0
  1262. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +0 -0
  1263. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +0 -0
  1264. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_nq.hpp +0 -0
  1265. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_qdown.hpp +0 -0
  1266. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +0 -0
  1267. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_combine_normal/op_host/CMakeLists.txt +0 -0
  1268. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.cpp +0 -0
  1269. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_combine_normal/op_host/moe_combine_normal.cpp +0 -0
  1270. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_combine_normal/op_kernel/moe_combine_normal.cpp +0 -0
  1271. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_combine_normal/op_kernel/moe_combine_normal.h +0 -0
  1272. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_combine_normal/op_kernel/moe_combine_normal_tiling.h +0 -0
  1273. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_dispatch_normal/op_host/CMakeLists.txt +0 -0
  1274. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_dispatch_normal/op_host/aclnn_moe_dispatch_normal.cpp +0 -0
  1275. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_dispatch_normal/op_host/aclnn_moe_dispatch_normal.h +0 -0
  1276. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_dispatch_normal/op_host/moe_dispatch_normal.cpp +0 -0
  1277. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_dispatch_normal/op_host/moe_dispatch_normal_tiling.cpp +0 -0
  1278. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal.cpp +0 -0
  1279. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal.h +0 -0
  1280. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal_tiling.h +0 -0
  1281. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_host/CMakeLists.txt +0 -0
  1282. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_host/error_log.h +0 -0
  1283. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_host/math_util.h +0 -0
  1284. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_def.cpp +0 -0
  1285. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_infershape.cpp +0 -0
  1286. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.cpp +0 -0
  1287. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.h +0 -0
  1288. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_base.cpp +0 -0
  1289. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_kernel/common.h +0 -0
  1290. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_kernel/error_log.h +0 -0
  1291. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k.cpp +0 -0
  1292. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_apt.cpp +0 -0
  1293. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_e_k_fullload.h +0 -0
  1294. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_generalized.h +0 -0
  1295. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_without_group.h +0 -0
  1296. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/tiling_base/data_copy_transpose_tiling.h +0 -0
  1297. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/tiling_base/data_copy_transpose_tiling_def.h +0 -0
  1298. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/tiling_base/error_log.h +0 -0
  1299. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/tiling_base/tiling_base.h +0 -0
  1300. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/tiling_base/tiling_templates_registry.h +0 -0
  1301. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_gating_top_k/tiling_base/tiling_util.h +0 -0
  1302. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_host/CMakeLists.txt +0 -0
  1303. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_host/aclnn_moe_init_routing_custom.cpp +0 -0
  1304. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_host/aclnn_moe_init_routing_custom.h +0 -0
  1305. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom.cpp +0 -0
  1306. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom.h +0 -0
  1307. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_def.cpp +0 -0
  1308. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_infershape.cpp +0 -0
  1309. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.h +0 -0
  1310. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling_base.cpp +0 -0
  1311. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_common.h +0 -0
  1312. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_expert_tokens_count.h +0 -0
  1313. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load.h +0 -0
  1314. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_base.h +0 -0
  1315. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_droppad_static_quant.h +0 -0
  1316. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_dynamic_quant.h +0 -0
  1317. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_out.h +0 -0
  1318. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_out_droppad.h +0 -0
  1319. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_sort_multi_core.h +0 -0
  1320. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_static_quant.h +0 -0
  1321. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort.h +0 -0
  1322. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_out.h +0 -0
  1323. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_out_performance.h +0 -0
  1324. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_performance.h +0 -0
  1325. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather.h +0 -0
  1326. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather_droppad.h +0 -0
  1327. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather_droppad_dynamic.h +0 -0
  1328. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_actual_expert.h +0 -0
  1329. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_base.h +0 -0
  1330. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_multi_core.h +0 -0
  1331. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_multi_core_performance.h +0 -0
  1332. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_one_core.h +0 -0
  1333. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/moe_init_routing_custom/op_kernel/moe_init_routing_custom.cpp +0 -0
  1334. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/notify_dispatch/op_host/CMakeLists.txt +0 -0
  1335. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.cpp +0 -0
  1336. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/notify_dispatch/op_host/notify_dispatch.cpp +0 -0
  1337. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/notify_dispatch/op_host/notify_dispatch_tiling.cpp +0 -0
  1338. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/notify_dispatch/op_kernel/notify_dispatch.cpp +0 -0
  1339. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/notify_dispatch/op_kernel/notify_dispatch.h +0 -0
  1340. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/notify_dispatch/op_kernel/notify_dispatch_tiling.h +0 -0
  1341. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/ops.h +0 -0
  1342. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/sparse_flash_attention/op_host/CMakeLists.txt +0 -0
  1343. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_def.cpp +0 -0
  1344. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_proto.cpp +0 -0
  1345. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.h +0 -0
  1346. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention.cpp +0 -0
  1347. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_common.h +0 -0
  1348. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_kernel_mla.h +0 -0
  1349. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_cube_mla.h +0 -0
  1350. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_vector_mla.h +0 -0
  1351. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_template_tiling_key.h +0 -0
  1352. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/torch_binding_meta.cpp +0 -0
  1353. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/CMakeLists.txt +0 -0
  1354. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/aclnn_util.h +0 -0
  1355. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/error/ops_error.h +0 -0
  1356. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/fallback.h +0 -0
  1357. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/fallback_comm.h +0 -0
  1358. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/kernel/comm_args.h +0 -0
  1359. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/kernel/data_copy.h +0 -0
  1360. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/kernel/dropmask.h +0 -0
  1361. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/kernel/pse.h +0 -0
  1362. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/kernel/sync_collectives.h +0 -0
  1363. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/kernel/util.h +0 -0
  1364. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/log/inner/dfx_base.h +0 -0
  1365. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/log/ops_log.h +0 -0
  1366. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/tiling/data_copy_transpose_tiling.h +0 -0
  1367. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/tiling/data_copy_transpose_tiling_def.h +0 -0
  1368. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/tiling/tiling_base.h +0 -0
  1369. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/inc/tiling/tiling_templates_registry.h +0 -0
  1370. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils/src/fallback_comm.cpp +0 -0
  1371. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/csrc/utils.h +0 -0
  1372. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/Makefile +0 -0
  1373. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/requirements-docs.txt +0 -0
  1374. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/cp/chunkedprefill.png +0 -0
  1375. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/cp/dcp-decode.png +0 -0
  1376. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/cp/dcp-prefill.png +0 -0
  1377. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/cp/head-tail-style.png +0 -0
  1378. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/cp/overview.png +0 -0
  1379. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/cp/pcp-decode.png +0 -0
  1380. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/deployment.png +0 -0
  1381. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/disaggregated_prefill_pull.png +0 -0
  1382. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/disaggregated_prefill_push.png +0 -0
  1383. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/eplb.png +0 -0
  1384. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  1385. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/multi_node_dp_kimi.png +0 -0
  1386. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/quantization/get_quant_method.png +0 -0
  1387. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/quantization/quant_algorithm_overview.png +0 -0
  1388. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/quantization/quant_method_base_class.png +0 -0
  1389. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/quantization/quant_method_call_flow.png +0 -0
  1390. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/quantization/quant_methods_overview.png +0 -0
  1391. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/assets/workflow.png +0 -0
  1392. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/community/user_stories/index.md +0 -0
  1393. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/community/user_stories/llamafactory.md +0 -0
  1394. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/developer_guide/evaluation/index.md +0 -0
  1395. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/developer_guide/feature_guide/add_custom_aclnn_op.md +0 -0
  1396. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/developer_guide/feature_guide/index.md +0 -0
  1397. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/developer_guide/performance_and_debug/index.md +0 -0
  1398. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/index.md +0 -0
  1399. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
  1400. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
  1401. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
  1402. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
  1403. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
  1404. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
  1405. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
  1406. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
  1407. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
  1408. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
  1409. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
  1410. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
  1411. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
  1412. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
  1413. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
  1414. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
  1415. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
  1416. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
  1417. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
  1418. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  1419. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  1420. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/tutorials/PaddleOCR-VL.md +0 -0
  1421. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/tutorials/pd_colocated_mooncake_multi_instance.md +0 -0
  1422. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/configuration/env_vars.md +0 -0
  1423. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/configuration/index.md +0 -0
  1424. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/deployment_guide/index.md +0 -0
  1425. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
  1426. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/feature_guide/images/netloader_flowchart.png +0 -0
  1427. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/feature_guide/images/netloader_timing_diagram.png +0 -0
  1428. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  1429. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/feature_guide/index.md +0 -0
  1430. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/feature_guide/netloader.md +0 -0
  1431. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  1432. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/docs/source/user_guide/support_matrix/index.md +0 -0
  1433. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/examples/chat_templates/template_qwen2_audio.jinja +0 -0
  1434. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/examples/external_online_dp/run_dp_template.sh +0 -0
  1435. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/examples/offline_inference_npu.py +0 -0
  1436. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/examples/run_dp_server.sh +0 -0
  1437. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/format.sh +0 -0
  1438. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/packages.txt +0 -0
  1439. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/requirements-lint.txt +0 -0
  1440. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/setup.cfg +0 -0
  1441. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/__init__.py +0 -0
  1442. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/__init__.py +0 -0
  1443. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/common.sh +0 -0
  1444. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
  1445. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
  1446. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/model_utils.py +0 -0
  1447. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml +0 -0
  1448. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/InternVL3_5-8B-hf.yaml +0 -0
  1449. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Llama-3.2-3B-Instruct.yaml +0 -0
  1450. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Molmo-7B-D-0924.yaml +0 -0
  1451. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +0 -0
  1452. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Qwen2.5-Omni-7B.yaml +0 -0
  1453. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Qwen3-30B-A3B-W8A8.yaml +0 -0
  1454. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +0 -0
  1455. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Qwen3-8B-W8A8.yaml +0 -0
  1456. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Qwen3-8B.yaml +0 -0
  1457. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Qwen3-Next-80B-A3B-Instruct.yaml +0 -0
  1458. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Qwen3-Omni-30B-A3B-Instruct.yaml +0 -0
  1459. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml +0 -0
  1460. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/accuracy.txt +0 -0
  1461. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/gemma-3-4b-it.yaml +0 -0
  1462. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/internlm3-8b-instruct.yaml +0 -0
  1463. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/configs/llava-onevision-qwen2-0.5b-ov-hf.yaml +0 -0
  1464. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/conftest.py +0 -0
  1465. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/report_template.md +0 -0
  1466. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/models/test_lm_eval_correctness.py +0 -0
  1467. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/2-cards/test_data_parallel.py +0 -0
  1468. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/2-cards/test_expert_parallel.py +0 -0
  1469. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/2-cards/test_full_graph_mode.py +0 -0
  1470. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py +0 -0
  1471. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/2-cards/test_pipeline_parallel.py +0 -0
  1472. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/2-cards/test_prefix_caching.py +0 -0
  1473. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/2-cards/test_shared_expert_dp.py +0 -0
  1474. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/2-cards/test_single_request_aclgraph.py +0 -0
  1475. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py +0 -0
  1476. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py +0 -0
  1477. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/multicard/4-cards/test_qwen3_next.py +0 -0
  1478. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/multi_node/__init__.py +0 -0
  1479. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/multi_node/scripts/__init__.py +0 -0
  1480. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/multi_node/scripts/multi_node_config.py +0 -0
  1481. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/multi_node/scripts/test_multi_node.py +0 -0
  1482. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/multi_node/scripts/utils.py +0 -0
  1483. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/__init__.py +0 -0
  1484. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_glm4_5.py +0 -0
  1485. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_mtpx_deepseek_r1_0528_w8a8.py +0 -0
  1486. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_prefix_cache_deepseek_r1_0528_w8a8.py +0 -0
  1487. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_prefix_cache_qwen3_32b_int8.py +0 -0
  1488. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_qwen2_5_vl_32b.py +0 -0
  1489. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_qwen2_5_vl_7b.py +0 -0
  1490. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_qwen3_235b_w8a8.py +0 -0
  1491. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_qwen3_30b_w8a8.py +0 -0
  1492. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_qwen3_32b.py +0 -0
  1493. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_qwen3_32b_int8.py +0 -0
  1494. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_qwen3_32b_int8_a3_feature_stack3.py +0 -0
  1495. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_qwen3_next_w8a8.py +0 -0
  1496. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/models/test_qwq_32b.py +0 -0
  1497. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/__init__.py +0 -0
  1498. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py +0 -0
  1499. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/multicard_ops_a3/__init__.py +0 -0
  1500. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py +0 -0
  1501. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/__init__.py +0 -0
  1502. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py +0 -0
  1503. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py +0 -0
  1504. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py +0 -0
  1505. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py +0 -0
  1506. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py +0 -0
  1507. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py +0 -0
  1508. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py +0 -0
  1509. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py +0 -0
  1510. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py +0 -0
  1511. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py +0 -0
  1512. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py +0 -0
  1513. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py +0 -0
  1514. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_rotary_embedding.py +0 -0
  1515. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py +0 -0
  1516. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/__init__.py +0 -0
  1517. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py +0 -0
  1518. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py +0 -0
  1519. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py +0 -0
  1520. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py +0 -0
  1521. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py +0 -0
  1522. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py +0 -0
  1523. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py +0 -0
  1524. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py +0 -0
  1525. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py +0 -0
  1526. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/prompts/example.txt +0 -0
  1527. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/run_doctests.sh +0 -0
  1528. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/__init__.py +0 -0
  1529. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/compile/__init__.py +0 -0
  1530. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/compile/backend.py +0 -0
  1531. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/model_runner_v2/__init__.py +0 -0
  1532. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/pooling/__init__.py +0 -0
  1533. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/pooling/test_classification.py +0 -0
  1534. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/pooling/test_embedding.py +0 -0
  1535. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/spec_decode/__init__.py +0 -0
  1536. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_aclgraph_mem.py +0 -0
  1537. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_async_scheduling.py +0 -0
  1538. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_batch_invariant.py +0 -0
  1539. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_camem.py +0 -0
  1540. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_completion_with_prompt_embeds.py +0 -0
  1541. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_cpu_offloading.py +0 -0
  1542. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_guided_decoding.py +0 -0
  1543. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_ilama_lora.py +0 -0
  1544. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_models.py +0 -0
  1545. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +0 -0
  1546. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
  1547. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_quantization.py +0 -0
  1548. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_sampler.py +0 -0
  1549. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/test_vlm.py +0 -0
  1550. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/singlecard/utils.py +0 -0
  1551. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/utils.py +0 -0
  1552. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/e2e/vllm_interface/singlecard/test_sampler.py +0 -0
  1553. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/__init__.py +0 -0
  1554. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/attention/test_attention_mask.py +0 -0
  1555. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/attention/utils.py +0 -0
  1556. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/base.py +0 -0
  1557. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/conftest.py +0 -0
  1558. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/core/test_scheduler_dynamic_batch.py +0 -0
  1559. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/device_allocator/test_camem.py +0 -0
  1560. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
  1561. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
  1562. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
  1563. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/distributed/test_parallel_state.py +0 -0
  1564. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/eplb/adaptor/test_abstract_adaptor.py +0 -0
  1565. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/eplb/core/expert_map.json +0 -0
  1566. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/eplb/core/policy/test_policy_abstract.py +0 -0
  1567. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +0 -0
  1568. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/fake_weight/config.json +0 -0
  1569. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
  1570. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
  1571. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/model_loader/netloader/test_netloader.py +0 -0
  1572. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/model_loader/netloader/test_netloader_elastic.py +0 -0
  1573. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/model_loader/netloader/test_netloader_load.py +0 -0
  1574. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/model_loader/netloader/test_netloader_utils.py +0 -0
  1575. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/ops/test_comm_utils.py +0 -0
  1576. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/ops/test_linear.py +0 -0
  1577. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/ops/test_mla.py +0 -0
  1578. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/ops/test_moe_comm_method.py +0 -0
  1579. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/ops/test_moe_mlp.py +0 -0
  1580. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/ops/test_prepare_finalize.py +0 -0
  1581. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
  1582. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
  1583. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/sample/test_rejection_sampler.py +0 -0
  1584. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/sample/test_sampler.py +0 -0
  1585. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/test_envs.py +0 -0
  1586. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/test_utils.py +0 -0
  1587. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/worker/test_block_table.py +0 -0
  1588. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tests/ut/worker/test_pcp_manager.py +0 -0
  1589. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tools/actionlint.sh +0 -0
  1590. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tools/check_repo.sh +0 -0
  1591. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tools/collect_user_first_contribution.sh +0 -0
  1592. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tools/install_flash_infer_attention_score_ops_a2.sh +0 -0
  1593. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tools/install_flash_infer_attention_score_ops_a3.sh +0 -0
  1594. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tools/mooncake_installer.sh +0 -0
  1595. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tools/png-lint.sh +0 -0
  1596. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tools/shellcheck.sh +0 -0
  1597. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/tools/sphinx-lint.sh +0 -0
  1598. {vllm_ascend-0.13.0rc2/vllm_ascend/attention → vllm_ascend-0.14.0rc1/vllm_ascend/_310p}/__init__.py +0 -0
  1599. {vllm_ascend-0.13.0rc2/vllm_ascend/attention/context_parallel → vllm_ascend-0.14.0rc1/vllm_ascend/_310p/attention}/__init__.py +0 -0
  1600. {vllm_ascend-0.13.0rc2/vllm_ascend/compilation → vllm_ascend-0.14.0rc1/vllm_ascend/_310p/ops}/__init__.py +0 -0
  1601. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/_cann_ops_custom/.gitkeep +0 -0
  1602. {vllm_ascend-0.13.0rc2/vllm_ascend/compilation/npugraph_ex_passes → vllm_ascend-0.14.0rc1/vllm_ascend/attention}/__init__.py +0 -0
  1603. {vllm_ascend-0.13.0rc2/vllm_ascend/compilation/passes → vllm_ascend-0.14.0rc1/vllm_ascend/attention/context_parallel}/__init__.py +0 -0
  1604. {vllm_ascend-0.13.0rc2/vllm_ascend/core → vllm_ascend-0.14.0rc1/vllm_ascend/compilation}/__init__.py +0 -0
  1605. {vllm_ascend-0.13.0rc2/vllm_ascend/device_allocator → vllm_ascend-0.14.0rc1/vllm_ascend/compilation/npugraph_ex_passes}/__init__.py +0 -0
  1606. {vllm_ascend-0.13.0rc2/vllm_ascend/distributed/cpu_offload_manager → vllm_ascend-0.14.0rc1/vllm_ascend/compilation/npugraph_ex_passes/utils}/__init__.py +0 -0
  1607. {vllm_ascend-0.13.0rc2/vllm_ascend/distributed/device_communicators → vllm_ascend-0.14.0rc1/vllm_ascend/compilation/passes}/__init__.py +0 -0
  1608. {vllm_ascend-0.13.0rc2/vllm_ascend/eplb → vllm_ascend-0.14.0rc1/vllm_ascend/core}/__init__.py +0 -0
  1609. {vllm_ascend-0.13.0rc2/vllm_ascend/eplb/adaptor → vllm_ascend-0.14.0rc1/vllm_ascend/device}/__init__.py +0 -0
  1610. {vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core → vllm_ascend-0.14.0rc1/vllm_ascend/device_allocator}/__init__.py +0 -0
  1611. {vllm_ascend-0.13.0rc2/vllm_ascend/eplb/core/policy → vllm_ascend-0.14.0rc1/vllm_ascend/distributed}/__init__.py +0 -0
  1612. {vllm_ascend-0.13.0rc2/vllm_ascend/kv_offload → vllm_ascend-0.14.0rc1/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
  1613. {vllm_ascend-0.13.0rc2/vllm_ascend/lora → vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_p2p}/__init__.py +0 -0
  1614. {vllm_ascend-0.13.0rc2/vllm_ascend/model_loader → vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool}/__init__.py +0 -0
  1615. {vllm_ascend-0.13.0rc2/vllm_ascend/model_loader/netloader/executor → vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store}/__init__.py +0 -0
  1616. {vllm_ascend-0.13.0rc2/vllm_ascend/distributed/kvpool → vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/backend}/__init__.py +0 -0
  1617. {vllm_ascend-0.13.0rc2/vllm_ascend/model_loader/netloader/interaction → vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload}/__init__.py +0 -0
  1618. {vllm_ascend-0.13.0rc2/vllm_ascend/ops/fused_moe → vllm_ascend-0.14.0rc1/vllm_ascend/distributed/kv_transfer/utils}/__init__.py +0 -0
  1619. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/distributed/parallel_state.py +0 -0
  1620. {vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton → vllm_ascend-0.14.0rc1/vllm_ascend/eplb}/__init__.py +0 -0
  1621. {vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/activation → vllm_ascend-0.14.0rc1/vllm_ascend/eplb/adaptor}/__init__.py +0 -0
  1622. {vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/batch_invariant → vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core}/__init__.py +0 -0
  1623. {vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/fla → vllm_ascend-0.14.0rc1/vllm_ascend/eplb/core/policy}/__init__.py +0 -0
  1624. {vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/linearnorm → vllm_ascend-0.14.0rc1/vllm_ascend/kv_offload}/__init__.py +0 -0
  1625. {vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/mamba → vllm_ascend-0.14.0rc1/vllm_ascend/lora}/__init__.py +0 -0
  1626. {vllm_ascend-0.13.0rc2/vllm_ascend/ops/triton/spec_decode → vllm_ascend-0.14.0rc1/vllm_ascend/model_loader}/__init__.py +0 -0
  1627. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/model_loader/netloader/__init__.py +0 -0
  1628. {vllm_ascend-0.13.0rc2/vllm_ascend/quantization → vllm_ascend-0.14.0rc1/vllm_ascend/model_loader/netloader/executor}/__init__.py +0 -0
  1629. {vllm_ascend-0.13.0rc2/vllm_ascend/quantization/compressed_tensors → vllm_ascend-0.14.0rc1/vllm_ascend/model_loader/netloader/interaction}/__init__.py +0 -0
  1630. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/__init__.py +0 -0
  1631. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/flashcomm2_oshard_manager.py +0 -0
  1632. {vllm_ascend-0.13.0rc2/vllm_ascend/sample → vllm_ascend-0.14.0rc1/vllm_ascend/ops/fused_moe}/__init__.py +0 -0
  1633. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/fused_moe/comm_utils.py +0 -0
  1634. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/fused_moe/moe_mlp.py +0 -0
  1635. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/fused_moe/prepare_finalize.py +0 -0
  1636. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/fused_moe/token_dispatcher.py +0 -0
  1637. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/layer_shard_linear.py +0 -0
  1638. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/linear.py +0 -0
  1639. {vllm_ascend-0.13.0rc2/vllm_ascend/worker → vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton}/__init__.py +0 -0
  1640. {vllm_ascend-0.13.0rc2/vllm_ascend/worker/v2 → vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/activation}/__init__.py +0 -0
  1641. {vllm_ascend-0.13.0rc2/vllm_ascend/worker/v2/sample → vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/batch_invariant}/__init__.py +0 -0
  1642. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/triton/batch_invariant/softmax.py +0 -0
  1643. {vllm_ascend-0.13.0rc2/vllm_ascend/xlite → vllm_ascend-0.14.0rc1/vllm_ascend/ops/triton/fla}/__init__.py +0 -0
  1644. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/vocab_parallel_embedding.py +0 -0
  1645. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/ops/weight_prefetch.py +0 -0
  1646. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/patch/worker/patch_bert.py +0 -0
  1647. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/patch/worker/patch_deepseek.py +0 -0
  1648. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/patch/worker/patch_minicpm.py +0 -0
  1649. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/patch/worker/patch_module.py +0 -0
  1650. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/patch/worker/patch_multimodal_merge.py +0 -0
  1651. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/patch/worker/patch_qwen3_next_mtp.py +0 -0
  1652. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/patch/worker/patch_rejection_sampler.py +0 -0
  1653. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/patch/worker/patch_rope.py +0 -0
  1654. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/sample/rejection_sampler.py +0 -0
  1655. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/spec_decode/ngram_proposer.py +0 -0
  1656. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/spec_decode/suffix_proposer.py +0 -0
  1657. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/worker/block_table.py +0 -0
  1658. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/worker/npu_input_batch.py +0 -0
  1659. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/worker/v2/README.md +0 -0
  1660. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/worker/v2/aclgraph_utils.py +0 -0
  1661. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/worker/v2/input_batch.py +0 -0
  1662. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/worker/v2/sample/gumbel.py +0 -0
  1663. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/worker/v2/sample/sampler.py +0 -0
  1664. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/worker/v2/states.py +0 -0
  1665. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/worker/v2/utils.py +0 -0
  1666. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/xlite/xlite_model_runner.py +0 -0
  1667. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend/xlite/xlite_worker.py +0 -0
  1668. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  1669. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
  1670. {vllm_ascend-0.13.0rc2 → vllm_ascend-0.14.0rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -0,0 +1,65 @@
1
+ # See https://help.github.com/articles/about-codeowners/
2
+ # for more info about CODEOWNERS file
3
+
4
+ # Infra, CI
5
+ /.gemini @wangxiyuan @Yikun
6
+ /.github @wangxiyuan @Yikun
7
+ /tools @wangxiyuan @Yikun
8
+ /.gitignore @wangxiyuan
9
+ /.gitmodules @wangxiyuan @zzzzwwjj
10
+ /.pre-commit-config.yaml @wangxiyuan
11
+ /codecov.yml @wangxiyuan
12
+ /Dockerfile* @wangxiyuan
13
+ /format.sh @wangxiyuan
14
+ /mypy.ini @wangxiyuan
15
+ /requirements* @wangxiyuan
16
+ /setup.py @wangxiyuan
17
+ /typos.toml @wangxiyuan
18
+
19
+ # benchmark
20
+ /benchmarks @wangxiyuan
21
+
22
+ # c++ source code
23
+ /cmake @zzzzwwjj
24
+ /csrc @zzzzwwjj
25
+ /CMakeLists.txt @zzzzwwjj
26
+
27
+ # docs
28
+ /docs @wangxiyuan @Yikun @LCAIZJ
29
+ /.readthedocs.yaml @wangxiyuan @Yikun
30
+ /README* @wangxiyuan @Yikun
31
+
32
+ # example
33
+ /examples @wangxiyuan
34
+
35
+ # tests
36
+ /tests @wangxiyuan
37
+
38
+ # python source code
39
+ /vllm_ascend/attention @weijinqian0
40
+ /vllm_ascend/compilation @yiz-liu
41
+ /vllm_ascend/core @wangxiyuan @MengqingCao
42
+ /vllm_ascend/device @weijinqian0 @zzzzwwjj
43
+ /vllm_ascend/device_allocator @wangxiyuan @weijinqian0
44
+ /vllm_ascend/distributed @MengqingCao @LCAIZJ
45
+ /vllm_ascend/eplb @wangxiyuan
46
+ /vllm_ascend/kv_offload @nalinaly
47
+ /vllm_ascend/lora @paulyu12
48
+ /vllm_ascend/model_loader @wangxiyuan
49
+ /vllm_ascend/ops @zzzzwwjj @realliujiaxu
50
+ /vllm_ascend/patch @wangxiyuan
51
+ /vllm_ascend/quantization @wangxiyuan
52
+ /vllm_ascend/sample @realliujiaxu
53
+ /vllm_ascend/spec_decode @wangxiyuan
54
+ /vllm_ascend/worker @MengqingCao
55
+ /vllm_ascend/xlite @wangxiyuan
56
+ /vllm_ascend/ascend_config.py @wangxiyuan
57
+ /vllm_ascend/ascend_forward_context.py @wangxiyuan
58
+ /vllm_ascend/batch_invariant.py @wangxiyuan
59
+ /vllm_ascend/cpu_binding.py @wangxiyuan
60
+ /vllm_ascend/envs.py @wangxiyuan
61
+ /vllm_ascend/flash_common3_context.py @wangxiyuan
62
+ /vllm_ascend/meta_registration.py @wangxiyuan
63
+ /vllm_ascend/platform.py @wangxiyuan
64
+ /vllm_ascend/profiling_config.py @wangxiyuan
65
+ /vllm_ascend/utils.py @wangxiyuan
@@ -0,0 +1,45 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+ ARG PY_VERSION=3.11
18
+ FROM quay.io/ascend/manylinux:8.5.0-910b-manylinux_2_28-py${PY_VERSION}
19
+
20
+ ARG SOC_VERSION="ascend910b1"
21
+
22
+ # Define environments
23
+ ENV DEBIAN_FRONTEND=noninteractive
24
+ ENV SOC_VERSION=$SOC_VERSION
25
+ RUN yum update -y && \
26
+ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
27
+ rm -rf /var/cache/yum
28
+
29
+ WORKDIR /workspace
30
+
31
+ COPY . /workspace/vllm-ascend/
32
+
33
+ # Install req
34
+ RUN python3 -m pip install -r vllm-ascend/requirements.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
35
+ python3 -m pip install twine attrs psutil
36
+
37
+ # Install vllm-ascend
38
+ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
39
+ source /usr/local/Ascend/nnal/atb/set_env.sh && \
40
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
41
+ cd vllm-ascend && \
42
+ python3 setup.py bdist_wheel && \
43
+ ls -l dist
44
+
45
+ CMD ["/bin/bash"]
@@ -0,0 +1,37 @@
1
+ name: 📚 User Story
2
+ description: Apply for an user story to be displayed on https://docs.vllm.ai/projects/ascend/en/latest/community/user_stories/index.html
3
+ title: "[User Story]: "
4
+ labels: ["user-story"]
5
+
6
+ body:
7
+ - type: textarea
8
+ attributes:
9
+ label: 📚 Title
10
+ description: >
11
+ A clear title about what your user story is about.
12
+ validations:
13
+ required: true
14
+ - type: textarea
15
+ attributes:
16
+ label: About / Introduction
17
+ description: >
18
+ A brief introduction about the background of your use case, like your scenario, hardware size etc.
19
+ - type: textarea
20
+ attributes:
21
+ label: Business Challenges
22
+ description: >
23
+ Tell us how what kind of challenge you faced in this user story.
24
+ - type: textarea
25
+ attributes:
26
+ label: Solving challenges with vLLM Ascend and benefits
27
+ description: >
28
+ Tell us how vLLM Ascend helped you overcome the challenges, including details like how you use it, what version you used, hardware info, etc. And what kind of benefit do you get from using vLLM Ascend
29
+ - type: textarea
30
+ attributes:
31
+ label: Extra Info
32
+ description: >
33
+ Any extra information you want to include in this story
34
+ - type: markdown
35
+ attributes:
36
+ value: >
37
+ Thanks for contributing 🎉!
@@ -0,0 +1,33 @@
1
+ name: 🤗 Support request for new model supported from huggingface/modelscope/modelers on Ascend
2
+ description: Submit a proposal/request for a new model from huggingface/modelscope/modelers on Ascend
3
+ title: "[New Model]: "
4
+ labels: ["new model"]
5
+
6
+ body:
7
+ - type: markdown
8
+ attributes:
9
+ value: >
10
+ #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm-ascend/issues?q=is%3Aissue+sort%3Acreated-desc+).
11
+
12
+ #### We also highly recommend you read https://docs.vllm.ai/projects/ascend/en/latest/user_guide/supported_models.html first to know which model already supported.
13
+ - type: textarea
14
+ attributes:
15
+ label: The model to consider.
16
+ description: >
17
+ A huggingface/modelscope/modelers url, pointing to the model, e.g. https://huggingface.co/openai-community/gpt2 .
18
+ validations:
19
+ required: true
20
+ - type: textarea
21
+ attributes:
22
+ label: The closest model vllm already supports.
23
+ description: >
24
+ Here is the list of models already supported by vllm: https://docs.vllm.ai/projects/ascend/en/latest/user_guide/supported_models.html . Which model is the most similar to the model you want to add support for?
25
+ - type: textarea
26
+ attributes:
27
+ label: What's your difficulty of supporting the model you want?
28
+ description: >
29
+ For example, any new operators or new architecture?
30
+ - type: markdown
31
+ attributes:
32
+ value: >
33
+ Thanks for contributing 🎉!
@@ -0,0 +1,27 @@
1
+ self-hosted-runner:
2
+ # Labels of self-hosted runner in array of strings.
3
+ labels:
4
+ - linux-aarch64-a2-0
5
+ - linux-aarch64-a2-1
6
+ - linux-aarch64-a2-2
7
+ - linux-aarch64-a2-4
8
+ - linux-aarch64-a2-8
9
+ - linux-arm64-npu-static-8
10
+ - linux-aarch64-310p-1
11
+ - linux-aarch64-310p-2
12
+ - linux-aarch64-310p-4
13
+ - ubuntu-24.04-arm
14
+ - linux-aarch64-a3-1
15
+ - linux-aarch64-a3-2
16
+ - linux-aarch64-a3-4
17
+ - linux-aarch64-a3-8
18
+ - linux-amd64-cpu-0
19
+ - linux-amd64-cpu-8
20
+ - linux-amd64-cpu-16
21
+ - linux-aarch64-a3-0
22
+ - linux-amd64-cpu-8-hk
23
+ - linux-amd64-cpu-16-hk
24
+ - linux-aarch64-a2b3-0
25
+ - linux-aarch64-a2b3-1
26
+ - linux-aarch64-a2b3-2
27
+ - linux-aarch64-a2b3-4
@@ -0,0 +1,268 @@
1
+ name: 'e2e nightly test multi_node'
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ soc_version:
7
+ required: true
8
+ type: string
9
+ description: use a2 or a3
10
+ runner:
11
+ required: false
12
+ type: string
13
+ default: linux-aarch64-a3-0
14
+ image:
15
+ required: false
16
+ type: string
17
+ description: base image for pods
18
+ default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11"
19
+ config_file_path:
20
+ required: true
21
+ type: string
22
+ description: the model config for multi_node test
23
+ replicas:
24
+ required: false
25
+ default: "1"
26
+ type: string
27
+ description: replicas of the k8s cluster
28
+ size:
29
+ required: false
30
+ default: "2"
31
+ type: string
32
+ description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
33
+ vllm_version:
34
+ required: false
35
+ default: "v0.14.1"
36
+ type: string
37
+ description: vllm version to use
38
+ vllm_ascend_remote_url:
39
+ required: false
40
+ default: https://github.com/vllm-project/vllm-ascend.git
41
+ type: string
42
+ description: used for pr level tests
43
+ vllm_ascend_ref:
44
+ required: false
45
+ default: main
46
+ type: string
47
+ description: used for pr level tests
48
+ secrets:
49
+ KUBECONFIG_B64:
50
+ required: true
51
+
52
+
53
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
54
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
55
+ # It's used to activate ascend-toolkit environment variables.
56
+ defaults:
57
+ run:
58
+ shell: bash -el {0}
59
+
60
+ # only cancel in-progress runs of the same workflow
61
+ # and ignore the lint / 8 cards test type
62
+ concurrency:
63
+ group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.soc_version }}
64
+ cancel-in-progress: true
65
+
66
+ jobs:
67
+ e2e:
68
+ name: ${{ inputs.config_file_path }}
69
+ # This is the runner with no NPU for k8s controller
70
+ runs-on: ${{ inputs.runner }}
71
+ container:
72
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-cpu
73
+ env:
74
+ KUBECONFIG: /tmp/kubeconfig
75
+ NAMESPACE: vllm-project
76
+ LEADER_POD: vllm-0
77
+ steps:
78
+ - name: Decode kubeconfig from secrets
79
+ run: |
80
+ # Decode and save kubeconfig
81
+ echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
82
+
83
+ - name: Checkout code
84
+ uses: actions/checkout@v6
85
+
86
+ - name: Prepare scripts
87
+ run: |
88
+ # prepare for lws entrypoint scripts
89
+ install -D tests/e2e/nightly/multi_node/scripts/run.sh /root/.cache/tests/run.sh
90
+
91
+ - name: Clear resources
92
+ run: |
93
+ set -euo pipefail
94
+
95
+ CRD_NAME="${CRD_NAME:-vllm}"
96
+ TIMEOUT=${TIMEOUT:-120}
97
+ SLEEP_INTERVAL=2
98
+
99
+ echo "Deleting leaderworkerset [$CRD_NAME] in namespace [$NAMESPACE]..."
100
+ kubectl delete leaderworkerset "$CRD_NAME" -n "$NAMESPACE" --ignore-not-found
101
+
102
+ echo "Waiting for all pods starting with 'vllm' to be deleted..."
103
+ START_TIME=$(date +%s)
104
+
105
+ while true; do
106
+ NOW=$(date +%s)
107
+ ELAPSED=$((NOW - START_TIME))
108
+
109
+ if [[ $ELAPSED -ge $TIMEOUT ]]; then
110
+ echo "Timeout reached ($TIMEOUT seconds), some pods still exist:"
111
+ kubectl get pods -n "$NAMESPACE" | grep '^vllm' || true
112
+ exit 1
113
+ fi
114
+
115
+ PODS_EXIST=$(kubectl get pods -n "$NAMESPACE" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null | tr ' ' '\n' | grep '^vllm' || true)
116
+
117
+ if [[ -z "$PODS_EXIST" ]]; then
118
+ echo "All vllm pods deleted."
119
+ break
120
+ else
121
+ echo "Waiting for pods to be deleted: $PODS_EXIST"
122
+ sleep $SLEEP_INTERVAL
123
+ fi
124
+ done
125
+
126
+ - name: Launch cluster
127
+ id: launcher
128
+ run: |
129
+ set -e
130
+
131
+ size="${{ inputs.size }}"
132
+ replicas="${{ inputs.replicas }}"
133
+ image="${{ inputs.image }}"
134
+ config_file_path="${{ inputs.config_file_path }}"
135
+ fail_tag=FAIL_TAG_"${{ inputs.config_file_path }}"
136
+ echo "FAIL_TAG=${fail_tag}" >> $GITHUB_ENV
137
+
138
+ required_params=("size" "replicas" "image" "config_file_path")
139
+ for param in "${required_params[@]}"; do
140
+ if [ -z "${!param}" ]; then
141
+ echo "Error: Parameter '$param' is required but empty"
142
+ exit 1
143
+ fi
144
+ done
145
+
146
+ if [ "${{ inputs.soc_version }}" = "a3" ]; then
147
+ npu_per_node=16
148
+ TEMPLATE_FILE="tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2"
149
+ else
150
+ npu_per_node=8
151
+ TEMPLATE_FILE="tests/e2e/nightly/multi_node/scripts/lws-a2.yaml.jinja2"
152
+ fi
153
+
154
+ jinja2 $TEMPLATE_FILE \
155
+ -D size="$size" \
156
+ -D replicas="$replicas" \
157
+ -D image="$image" \
158
+ -D config_file_path="$config_file_path" \
159
+ -D npu_per_node="$npu_per_node" \
160
+ -D fail_tag="$fail_tag" \
161
+ --outfile lws.yaml
162
+
163
+ kubectl apply -f ./lws.yaml
164
+
165
+ - name: Waiting for pod ready
166
+ run: |
167
+ POD_PREFIX="${POD_PREFIX:-vllm-0}"
168
+ SIZE="${{ inputs.size }}"
169
+ TIMEOUT=1200 # default timeout 20 minutes
170
+
171
+ echo "Waiting for Pods in namespace [$NAMESPACE] to become Running and Ready (timeout ${TIMEOUT}s)..."
172
+
173
+ START_TIME=$(date +%s)
174
+
175
+ while true; do
176
+ NOW=$(date +%s)
177
+ ELAPSED=$((NOW - START_TIME))
178
+ if [[ $ELAPSED -ge $TIMEOUT ]]; then
179
+ echo "Timeout reached after ${ELAPSED}s"
180
+ echo "Dumping pod status for debugging:"
181
+ kubectl get pods -n "$NAMESPACE"
182
+ kubectl describe pod "$LEADER_POD" -n "$NAMESPACE"
183
+ exit 1
184
+ fi
185
+
186
+ # 1) check follower pods
187
+ ALL_FOLLOWERS_READY=true
188
+ for ((i=1; i<SIZE; i++)); do
189
+ POD="${POD_PREFIX}-${i}"
190
+ PHASE=$(kubectl get pod "$POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
191
+ READY=$(kubectl get pod "$POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}' 2>/dev/null)
192
+
193
+ echo "Follower [$POD] phase=$PHASE ready=$READY"
194
+
195
+ if [[ "$PHASE" != "Running" || "$READY" != "true" ]]; then
196
+ echo "Follower [$POD] not Ready yet..."
197
+ ALL_FOLLOWERS_READY=false
198
+ break
199
+ fi
200
+ done
201
+
202
+ # 2) check leader pod
203
+ LEADER_PHASE=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
204
+ LEADER_READY=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}' 2>/dev/null)
205
+
206
+ echo "Leader [$LEADER_POD] phase=$LEADER_PHASE ready=$LEADER_READY"
207
+
208
+ if [[ "$LEADER_PHASE" != "Running" || "$LEADER_READY" != "true" ]]; then
209
+ echo "Leader not Ready yet..."
210
+ ALL_FOLLOWERS_READY=false
211
+ fi
212
+
213
+ if [[ "$ALL_FOLLOWERS_READY" == "true" ]]; then
214
+ echo "All follower pods and leader pod are Running and Ready — continuing."
215
+ break
216
+ fi
217
+
218
+ sleep 2
219
+ done
220
+
221
+ - name: Stream logs
222
+ run: |
223
+ set -euo pipefail
224
+
225
+ size="${{ inputs.size }}"
226
+ pids=()
227
+
228
+ cleanup() {
229
+ echo "Cleaning up background log streams..."
230
+ for pid in "${pids[@]}"; do
231
+ kill "$pid" 2>/dev/null || true
232
+ done
233
+ }
234
+ trap cleanup EXIT
235
+
236
+ for i in $(seq 1 $((size - 1))); do
237
+ POD="vllm-0-${i}"
238
+
239
+ echo "==== Collecting logs from worker pod: $POD ===="
240
+ kubectl logs -f "$POD" -n "$NAMESPACE" \
241
+ > "/tmp/${POD}_logs.txt" 2>&1 &
242
+
243
+ pids+=($!)
244
+ done
245
+
246
+ echo "==== Streaming logs from leader pod: $LEADER_POD ===="
247
+ echo "Looking for logs containing: $FAIL_TAG"
248
+
249
+ kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" | while IFS= read -r line; do
250
+ echo "$line"
251
+ if echo "$line" | grep -q "$FAIL_TAG"; then
252
+ exit 1
253
+ fi
254
+ done
255
+
256
+ - name: Upload logs
257
+ if: always()
258
+ uses: actions/upload-artifact@v6
259
+ with:
260
+ name: ${{ inputs.config_file_path }}-pod-logs
261
+ path: /tmp/vllm*_logs.txt
262
+ retention-days: 7
263
+
264
+ - name: Post process
265
+ if: always()
266
+ run: |
267
+ kubectl get pods -n $NAMESPACE --ignore-not-found=true
268
+ kubectl delete -f ./lws.yaml --ignore-not-found=true || true
@@ -0,0 +1,130 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+
18
+ name: 'e2e nightly test'
19
+
20
+ on:
21
+ workflow_call:
22
+ inputs:
23
+ vllm:
24
+ required: true
25
+ type: string
26
+ runner:
27
+ required: true
28
+ type: string
29
+ image:
30
+ required: false
31
+ type: string
32
+ default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11"
33
+ tests:
34
+ required: true
35
+ type: string
36
+ name:
37
+ required: false
38
+ type: string
39
+
40
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
41
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
42
+ # It's used to activate ascend-toolkit environment variables.
43
+ defaults:
44
+ run:
45
+ shell: bash -el {0}
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ # and ignore the lint / 1 card / 4 cards test type
49
+ concurrency:
50
+ group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.tests }}
51
+ cancel-in-progress: true
52
+
53
+ jobs:
54
+ e2e-nightly:
55
+ name: ${{ inputs.tests }}
56
+ runs-on: ${{ inputs.runner }}
57
+ timeout-minutes: 600
58
+ container:
59
+ image: ${{ inputs.image }}
60
+ env:
61
+ TRANSFORMERS_OFFLINE: 1
62
+ VLLM_USE_MODELSCOPE: True
63
+ steps:
64
+ - name: Check npu and CANN info
65
+ run: |
66
+ npu-smi info
67
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
68
+
69
+ - name: Show vLLM and vLLM-Ascend version
70
+ working-directory: /vllm-workspace
71
+ run: |
72
+ echo "Installed vLLM-related Python packages:"
73
+ pip list | grep vllm || echo "No vllm packages found."
74
+
75
+ echo ""
76
+ echo "============================"
77
+ echo "vLLM Git information"
78
+ echo "============================"
79
+ cd vllm
80
+ if [ -d .git ]; then
81
+ echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
82
+ echo "Commit hash: $(git rev-parse HEAD)"
83
+ echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
84
+ echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
85
+ echo "Message: $(git log -1 --pretty=format:'%s')"
86
+ echo "Tags: $(git tag --points-at HEAD || echo 'None')"
87
+ echo "Remote: $(git remote -v | head -n1)"
88
+ echo ""
89
+ else
90
+ echo "No .git directory found in vllm"
91
+ fi
92
+ cd ..
93
+
94
+ echo ""
95
+ echo "============================"
96
+ echo "vLLM-Ascend Git information"
97
+ echo "============================"
98
+ cd vllm-ascend
99
+ if [ -d .git ]; then
100
+ echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
101
+ echo "Commit hash: $(git rev-parse HEAD)"
102
+ echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
103
+ echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
104
+ echo "Message: $(git log -1 --pretty=format:'%s')"
105
+ echo "Tags: $(git tag --points-at HEAD || echo 'None')"
106
+ echo "Remote: $(git remote -v | head -n1)"
107
+ echo ""
108
+ else
109
+ echo "No .git directory found in vllm-ascend"
110
+ fi
111
+ cd ..
112
+
113
+ - name: Install clang
114
+ shell: bash -l {0}
115
+ run: |
116
+ apt-get update && apt-get -y install clang-15
117
+ update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
118
+ update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
119
+
120
+ - name: Run vllm-project/vllm-ascend test
121
+ env:
122
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
123
+ VLLM_USE_MODELSCOPE: True
124
+ VLLM_CI_RUNNER: ${{ inputs.runner }}
125
+ BENCHMARK_HOME: /vllm-workspace/vllm-ascend/benchmark
126
+ working-directory: /vllm-workspace/vllm-ascend
127
+ run: |
128
+ # ignore test_dispatch_ffn_combine until the test is fixed
129
+ pytest -sv ${{ inputs.tests }} \
130
+ --ignore=tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py