vllm-ascend 0.13.0__tar.gz → 0.13.0rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1121) hide show
  1. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/Dockerfile.buildwheel +1 -1
  2. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/Dockerfile.nightly.a2 +1 -1
  3. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/Dockerfile.nightly.a3 +1 -1
  4. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/actionlint.yaml +0 -4
  5. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_e2e_nightly_single_node.yaml +15 -0
  6. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_e2e_nightly_single_node_models.yaml +2 -2
  7. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_e2e_test.yaml +6 -4
  8. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_schedule_image_build.yaml +1 -5
  9. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_unit_test.yaml +0 -1
  10. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/labled_doctest.yaml +1 -1
  11. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/labled_download_model.yaml +1 -1
  12. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/nightly_test_a2.yaml +15 -12
  13. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/nightly_test_a3.yaml +4 -15
  14. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/pr_test_full.yaml +2 -2
  15. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/pr_test_light.yaml +3 -3
  16. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_image_build_and_push.yaml +4 -33
  17. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_release_code_and_wheel.yml +1 -7
  18. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_test_vllm_main.yaml +1 -1
  19. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile +1 -1
  20. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile.310p +1 -1
  21. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile.310p.openEuler +3 -2
  22. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile.a3 +1 -1
  23. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile.a3.openEuler +3 -2
  24. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile.openEuler +3 -2
  25. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/PKG-INFO +3 -3
  26. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_kernel.hpp +54 -71
  27. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/block_mmad_preload_async_fixpipe_quant.hpp +1 -3
  28. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/_templates/sections/header.html +1 -1
  29. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/community/versioning_policy.md +0 -2
  30. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/conf.py +3 -3
  31. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3_reranker.md +1 -1
  32. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/release_notes.md +45 -155
  33. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/pyproject.toml +2 -2
  34. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/requirements.txt +2 -2
  35. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/setup.py +1 -5
  36. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/InternVL3_5-8B-hf.yaml +1 -0
  37. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py +1 -0
  38. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-A2.yaml +14 -19
  39. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-EPLB.yaml +2 -2
  40. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml +11 -13
  41. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8.yaml +5 -20
  42. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-V3.yaml +1 -2
  43. vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-Exp-bf16.yaml +51 -0
  44. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-A22B-A2.yaml +2 -2
  45. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-A22B.yaml +1 -4
  46. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8-EPLB.yaml +1 -3
  47. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8-longseq.yaml +1 -3
  48. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8.yaml +1 -3
  49. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 +1 -1
  50. vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_kimi_k2_thinking.py → vllm_ascend-0.13.0rc2/tests/e2e/nightly/single_node/models/test_deepseek_v3_2_exp_w8a8.py +105 -110
  51. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_32b_int8.py +2 -2
  52. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_next.py +2 -2
  53. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/compile/test_norm_quant_fusion.py +14 -9
  54. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py +1 -0
  55. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py +1 -4
  56. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_aclgraph_accuracy.py +24 -3
  57. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_layernorm.py +0 -2
  58. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_token_dispatcher.py +0 -23
  59. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/test_platform.py +4 -1
  60. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/test_utils.py +0 -3
  61. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/aisbench.py +0 -5
  62. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/_version.py +3 -3
  63. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ascend_config.py +1 -2
  64. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ascend_forward_context.py +0 -4
  65. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/context_parallel/attention_cp.py +8 -17
  66. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/context_parallel/mla_cp.py +1 -1
  67. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/acl_graph.py +2 -41
  68. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/passes/norm_quant_fusion_pass.py +10 -11
  69. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/pool_scheduler.py +0 -1
  70. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/mooncake_layerwise_connector.py +2 -19
  71. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/envs.py +0 -7
  72. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/eplb_worker.py +0 -3
  73. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/fused_moe.py +1 -2
  74. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/rotary_embedding.py +1 -1
  75. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_kv_cache_utils.py +4 -3
  76. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_set_cudagraph_sizes.py +0 -2
  77. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_model_runner.py +1 -1
  78. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/platform.py +32 -15
  79. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/eagle_proposer.py +4 -20
  80. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/utils.py +55 -16
  81. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/block_table.py +22 -44
  82. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/model_runner_v1.py +6 -17
  83. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/npu_input_batch.py +0 -3
  84. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/worker.py +5 -12
  85. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/PKG-INFO +3 -3
  86. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/SOURCES.txt +2 -5
  87. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/requires.txt +2 -2
  88. vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/DeepSeek-V3.1-BF16.yaml +0 -82
  89. vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/Kimi-K2-Instruct-W8A8.yaml +0 -79
  90. vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_deepseek_r1_w8a8_hbm.py +0 -123
  91. vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_deepseek_v3_2_w8a8.py +0 -108
  92. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.gemini/config.yaml +0 -0
  93. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  94. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  95. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  96. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  97. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  98. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  99. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  100. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  101. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  102. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  103. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +0 -0
  104. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  105. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  106. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/dependabot.yml +0 -0
  107. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/labeler.yml +0 -0
  108. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_e2e_nightly_multi_node.yaml +0 -0
  109. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_pre_commit.yml +0 -0
  110. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/bot_merge_conflict.yaml +0 -0
  111. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/bot_pr_create.yaml +0 -0
  112. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/labled_test_310.yaml +0 -0
  113. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/matchers/actionlint.json +0 -0
  114. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/matchers/mypy.json +0 -0
  115. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/misc/model_list.json +0 -0
  116. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/pr_close_cancel_job.yaml +0 -0
  117. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_codecov_refresh.yaml +0 -0
  118. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_nightly_image_build.yaml +0 -0
  119. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_test_benchmarks.yaml +0 -0
  120. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.gitignore +0 -0
  121. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.gitmodules +0 -0
  122. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.pre-commit-config.yaml +0 -0
  123. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.readthedocs.yaml +0 -0
  124. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/CMakeLists.txt +0 -0
  125. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/CODE_OF_CONDUCT.md +0 -0
  126. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/CONTRIBUTING.md +0 -0
  127. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/DCO +0 -0
  128. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/LICENSE +0 -0
  129. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/README.md +0 -0
  130. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/README.zh.md +0 -0
  131. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/README.md +0 -0
  132. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
  133. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/requirements-bench.txt +0 -0
  134. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  135. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/scripts/perf_result_template.md +0 -0
  136. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
  137. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/tests/latency-tests.json +0 -0
  138. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/tests/serving-tests.json +0 -0
  139. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/tests/throughput-tests.json +0 -0
  140. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/cmake/utils.cmake +0 -0
  141. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/codecov.yml +0 -0
  142. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/collect_env.py +0 -0
  143. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/CMakeLists.txt +0 -0
  144. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/aclnn_torch_adapter/NPUBridge.cpp +0 -0
  145. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/aclnn_torch_adapter/NPUBridge.h +0 -0
  146. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/aclnn_torch_adapter/NPUStorageImpl.cpp +0 -0
  147. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/aclnn_torch_adapter/NPUStorageImpl.h +0 -0
  148. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/aclnn_torch_adapter/op_api_common.h +0 -0
  149. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/CMakeLists.txt +0 -0
  150. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_def.cpp +0 -0
  151. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_infershape.cpp +0 -0
  152. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.cpp +0 -0
  153. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.h +0 -0
  154. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/error_log.h +0 -0
  155. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias.cpp +0 -0
  156. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias.h +0 -0
  157. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_merge_n.h +0 -0
  158. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_multi_n.h +0 -0
  159. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_single_n.h +0 -0
  160. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_split_d.h +0 -0
  161. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/reduce_common.h +0 -0
  162. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/rms_norm_base.h +0 -0
  163. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_host/batch_matmul_transpose.h +0 -0
  164. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_host/common.h +0 -0
  165. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_host/common_tiling.h +0 -0
  166. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp +0 -0
  167. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.h +0 -0
  168. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp +0 -0
  169. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/build.sh +0 -0
  170. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/build_aclnn.sh +0 -0
  171. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/camem_allocator.cpp +0 -0
  172. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/config.cmake +0 -0
  173. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/func.cmake +0 -0
  174. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/intf.cmake +0 -0
  175. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/intf_pub.cmake +0 -0
  176. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/modules/Findalog.cmake +0 -0
  177. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/scripts/prepare.sh +0 -0
  178. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/CMakeLists.txt +0 -0
  179. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.cpp +0 -0
  180. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.h +0 -0
  181. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_def.cpp +0 -0
  182. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_proto.cpp +0 -0
  183. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_tiling.cpp +0 -0
  184. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/error_log.h +0 -0
  185. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/hcom_topo_info.h +0 -0
  186. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/tiling_args.h +0 -0
  187. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.cpp +0 -0
  188. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.h +0 -0
  189. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_tiling.h +0 -0
  190. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2.cpp +0 -0
  191. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2_tiling.h +0 -0
  192. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_v2_tiling.h +0 -0
  193. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_common.h +0 -0
  194. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h +0 -0
  195. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_dynamic_quant.h +0 -0
  196. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant.h +0 -0
  197. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant_base.h +0 -0
  198. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_dynamic_quant.h +0 -0
  199. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_out.h +0 -0
  200. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_quant.h +0 -0
  201. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_init_routing_fullload.h +0 -0
  202. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort.h +0 -0
  203. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort_out.h +0 -0
  204. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_base.h +0 -0
  205. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_multi_core.h +0 -0
  206. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_one_core.h +0 -0
  207. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_and_gather.h +0 -0
  208. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_op.h +0 -0
  209. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_with_capacity.h +0 -0
  210. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/tiling_base.h +0 -0
  211. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute.h +0 -0
  212. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute_tiling.h +0 -0
  213. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_row.hpp +0 -0
  214. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_swiglu.hpp +0 -0
  215. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/const_args.hpp +0 -0
  216. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp +0 -0
  217. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/copy_l0c_to_gm_custom.hpp +0 -0
  218. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/dispatch_policy_custom.hpp +0 -0
  219. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/get_tensor_addr.hpp +0 -0
  220. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/hccl_shmem.hpp +0 -0
  221. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/layout3d.hpp +0 -0
  222. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/select_helper.hpp +0 -0
  223. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/CMakeLists.txt +0 -0
  224. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.cpp +0 -0
  225. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.h +0 -0
  226. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_def.cpp +0 -0
  227. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_proto.cpp +0 -0
  228. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_tiling.cpp +0 -0
  229. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue.h +0 -0
  230. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant.hpp +0 -0
  231. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant_swiglu.h +0 -0
  232. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/dispatch_policy.h +0 -0
  233. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_binary.h +0 -0
  234. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_muls.h +0 -0
  235. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad.h +0 -0
  236. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad_preload_async_with_callback_resident_a.h +0 -0
  237. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/dispatch_policy.h +0 -0
  238. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_multistage_workspace.h +0 -0
  239. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_swiglu_quant_multistage_workspace.h +0 -0
  240. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_combine.h +0 -0
  241. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_dispatch.h +0 -0
  242. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.cpp +0 -0
  243. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.h +0 -0
  244. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_base.h +0 -0
  245. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_tiling.h +0 -0
  246. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_host/CMakeLists.txt +0 -0
  247. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.cpp +0 -0
  248. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.h +0 -0
  249. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_host/dispatch_layout.cpp +0 -0
  250. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_host/dispatch_layout_tiling.cpp +0 -0
  251. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_kernel/dispatch_layout.cpp +0 -0
  252. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_kernel/dispatch_layout.h +0 -0
  253. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_kernel/dispatch_layout_tiling.h +0 -0
  254. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/CMakeLists.txt +0 -0
  255. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
  256. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
  257. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
  258. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
  259. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_def.cpp +0 -0
  260. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_proto.cpp +0 -0
  261. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.cpp +0 -0
  262. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.h +0 -0
  263. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
  264. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
  265. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_split_ws.h +0 -0
  266. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h +0 -0
  267. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/bgmv_expand.cpp +0 -0
  268. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/bgmv_shrink.cpp +0 -0
  269. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  270. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/math_utils.h +0 -0
  271. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
  272. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/sgmv_expand.cpp +0 -0
  273. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/sgmv_shrink.cpp +0 -0
  274. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/types.h +0 -0
  275. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/utils.h +0 -0
  276. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_host/CMakeLists.txt +0 -0
  277. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_host/lightning_indexer_def.cpp +0 -0
  278. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_host/lightning_indexer_proto.cpp +0 -0
  279. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp +0 -0
  280. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_host/lightning_indexer_tiling.h +0 -0
  281. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer.cpp +0 -0
  282. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_common.h +0 -0
  283. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_kernel.h +0 -0
  284. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_service_cube.h +0 -0
  285. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_service_vector.h +0 -0
  286. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_template_tiling_key.h +0 -0
  287. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_vector.h +0 -0
  288. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/CMakeLists.txt +0 -0
  289. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.cpp +0 -0
  290. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.h +0 -0
  291. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_def.cpp +0 -0
  292. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_proto.cpp +0 -0
  293. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_tiling.cpp +0 -0
  294. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_workspace.h +0 -0
  295. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm.cpp +0 -0
  296. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aic_kernel.h +0 -0
  297. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aiv_kernel.h +0 -0
  298. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_tiling.h +0 -0
  299. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_utils.h +0 -0
  300. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_host/mla_preprocess.h +0 -0
  301. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +0 -0
  302. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/common.h +0 -0
  303. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/common_func.h +0 -0
  304. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/hardware.h +0 -0
  305. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterator.h +0 -0
  306. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +0 -0
  307. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +0 -0
  308. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +0 -0
  309. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +0 -0
  310. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +0 -0
  311. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +0 -0
  312. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +0 -0
  313. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +0 -0
  314. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +0 -0
  315. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +0 -0
  316. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/layout.h +0 -0
  317. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/mem.h +0 -0
  318. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/mma.h +0 -0
  319. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +0 -0
  320. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/simd.h +0 -0
  321. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/utils.h +0 -0
  322. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess.h +0 -0
  323. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +0 -0
  324. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +0 -0
  325. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_nq.hpp +0 -0
  326. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_qdown.hpp +0 -0
  327. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +0 -0
  328. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_host/CMakeLists.txt +0 -0
  329. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.cpp +0 -0
  330. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.h +0 -0
  331. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_host/moe_combine_normal.cpp +0 -0
  332. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp +0 -0
  333. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_kernel/moe_combine_normal.cpp +0 -0
  334. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_kernel/moe_combine_normal.h +0 -0
  335. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_kernel/moe_combine_normal_tiling.h +0 -0
  336. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_host/CMakeLists.txt +0 -0
  337. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_host/aclnn_moe_dispatch_normal.cpp +0 -0
  338. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_host/aclnn_moe_dispatch_normal.h +0 -0
  339. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_host/moe_dispatch_normal.cpp +0 -0
  340. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_host/moe_dispatch_normal_tiling.cpp +0 -0
  341. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal.cpp +0 -0
  342. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal.h +0 -0
  343. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal_tiling.h +0 -0
  344. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/CMakeLists.txt +0 -0
  345. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/error_log.h +0 -0
  346. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/math_util.h +0 -0
  347. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_def.cpp +0 -0
  348. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_infershape.cpp +0 -0
  349. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.cpp +0 -0
  350. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.h +0 -0
  351. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.cpp +0 -0
  352. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.h +0 -0
  353. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_arch35.cpp +0 -0
  354. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_base.cpp +0 -0
  355. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/common.h +0 -0
  356. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/error_log.h +0 -0
  357. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k.cpp +0 -0
  358. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_apt.cpp +0 -0
  359. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_e_k_fullload.h +0 -0
  360. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_generalized.h +0 -0
  361. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_without_group.h +0 -0
  362. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/data_copy_transpose_tiling.h +0 -0
  363. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/data_copy_transpose_tiling_def.h +0 -0
  364. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/error_log.h +0 -0
  365. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/tiling_base.h +0 -0
  366. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/tiling_key.h +0 -0
  367. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/tiling_templates_registry.h +0 -0
  368. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/tiling_type.h +0 -0
  369. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/tiling_util.h +0 -0
  370. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/CMakeLists.txt +0 -0
  371. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/aclnn_moe_init_routing_custom.cpp +0 -0
  372. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/aclnn_moe_init_routing_custom.h +0 -0
  373. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom.cpp +0 -0
  374. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom.h +0 -0
  375. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_def.cpp +0 -0
  376. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_infershape.cpp +0 -0
  377. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.cpp +0 -0
  378. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.h +0 -0
  379. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling_base.cpp +0 -0
  380. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_common.h +0 -0
  381. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_expert_tokens_count.h +0 -0
  382. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load.h +0 -0
  383. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_base.h +0 -0
  384. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_dynamic_quant.h +0 -0
  385. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_static_quant.h +0 -0
  386. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_unquantized.h +0 -0
  387. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_droppad_static_quant.h +0 -0
  388. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_dynamic_quant.h +0 -0
  389. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_out.h +0 -0
  390. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_out_droppad.h +0 -0
  391. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_sort_multi_core.h +0 -0
  392. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_static_quant.h +0 -0
  393. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort.h +0 -0
  394. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_out.h +0 -0
  395. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_out_performance.h +0 -0
  396. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_performance.h +0 -0
  397. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather.h +0 -0
  398. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather_droppad.h +0 -0
  399. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather_droppad_dynamic.h +0 -0
  400. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_actual_expert.h +0 -0
  401. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_base.h +0 -0
  402. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_multi_core.h +0 -0
  403. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_multi_core_performance.h +0 -0
  404. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_one_core.h +0 -0
  405. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_init_routing_custom.cpp +0 -0
  406. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_host/CMakeLists.txt +0 -0
  407. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.cpp +0 -0
  408. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.h +0 -0
  409. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_host/notify_dispatch.cpp +0 -0
  410. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_host/notify_dispatch_tiling.cpp +0 -0
  411. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_kernel/notify_dispatch.cpp +0 -0
  412. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_kernel/notify_dispatch.h +0 -0
  413. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_kernel/notify_dispatch_tiling.h +0 -0
  414. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/ops.h +0 -0
  415. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_host/CMakeLists.txt +0 -0
  416. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_def.cpp +0 -0
  417. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_proto.cpp +0 -0
  418. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp +0 -0
  419. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.h +0 -0
  420. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention.cpp +0 -0
  421. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_common.h +0 -0
  422. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_kernel_mla.h +0 -0
  423. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_cube_mla.h +0 -0
  424. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_vector_mla.h +0 -0
  425. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_template_tiling_key.h +0 -0
  426. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/torch_binding.cpp +0 -0
  427. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/torch_binding_meta.cpp +0 -0
  428. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/CMakeLists.txt +0 -0
  429. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/aclnn_util.h +0 -0
  430. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/error/ops_error.h +0 -0
  431. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/fallback.h +0 -0
  432. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/fallback_comm.h +0 -0
  433. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/comm_args.h +0 -0
  434. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/data_copy.h +0 -0
  435. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/dropmask.h +0 -0
  436. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/pse.h +0 -0
  437. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/sync_collectives.h +0 -0
  438. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/util.h +0 -0
  439. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/log/inner/dfx_base.h +0 -0
  440. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/log/ops_log.h +0 -0
  441. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/tiling/data_copy_transpose_tiling.h +0 -0
  442. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/tiling/data_copy_transpose_tiling_def.h +0 -0
  443. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/tiling/tiling_base.h +0 -0
  444. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/tiling/tiling_templates_registry.h +0 -0
  445. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/tiling/tiling_type.h +0 -0
  446. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/src/fallback_comm.cpp +0 -0
  447. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils.h +0 -0
  448. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/Makefile +0 -0
  449. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/README.md +0 -0
  450. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/requirements-docs.txt +0 -0
  451. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/requirements-test.txt +0 -0
  452. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/blocktable.png +0 -0
  453. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/chunkedprefill.png +0 -0
  454. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/dcp-decode.png +0 -0
  455. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/dcp-prefill.png +0 -0
  456. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/head-tail-style.png +0 -0
  457. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/overview.png +0 -0
  458. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/pcp-decode.png +0 -0
  459. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/pcp-prefill.png +0 -0
  460. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/deployment.png +0 -0
  461. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/disaggregated_prefill_pull.png +0 -0
  462. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/disaggregated_prefill_push.png +0 -0
  463. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/eplb.png +0 -0
  464. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  465. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/multi_node_dp_kimi.png +0 -0
  466. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/quantization/get_quant_method.png +0 -0
  467. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/quantization/quant_algorithm_overview.png +0 -0
  468. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/quantization/quant_method_base_class.png +0 -0
  469. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/quantization/quant_method_call_flow.png +0 -0
  470. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/quantization/quant_methods_overview.png +0 -0
  471. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/workflow.png +0 -0
  472. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/community/contributors.md +0 -0
  473. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/community/governance.md +0 -0
  474. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/community/user_stories/index.md +0 -0
  475. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/community/user_stories/llamafactory.md +0 -0
  476. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/contribution/index.md +0 -0
  477. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/contribution/multi_node_test.md +0 -0
  478. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/contribution/testing.md +0 -0
  479. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/evaluation/index.md +0 -0
  480. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/evaluation/using_ais_bench.md +0 -0
  481. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  482. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
  483. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  484. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/ACL_Graph.md +0 -0
  485. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/KV_Cache_Pool_Guide.md +0 -0
  486. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
  487. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/add_custom_aclnn_op.md +0 -0
  488. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/context_parallel.md +0 -0
  489. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/disaggregated_prefill.md +0 -0
  490. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/eplb_swift_balancer.md +0 -0
  491. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/index.md +0 -0
  492. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/patch.md +0 -0
  493. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/quantization.md +0 -0
  494. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/index.md +0 -0
  495. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/msprobe_guide.md +0 -0
  496. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md +0 -0
  497. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/performance_benchmark.md +0 -0
  498. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md +0 -0
  499. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md +0 -0
  500. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/faqs.md +0 -0
  501. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/index.md +0 -0
  502. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/installation.md +0 -0
  503. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
  504. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
  505. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
  506. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
  507. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
  508. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
  509. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/multi_node_test.po +0 -0
  510. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
  511. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
  512. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
  513. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po +0 -0
  514. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
  515. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
  516. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
  517. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/ACL_Graph.po +0 -0
  518. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/KV_Cache_Pool_Guide.po +0 -0
  519. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/ModelRunner_prepare_inputs.po +0 -0
  520. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/add_custom_aclnn_op.po +0 -0
  521. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/context_parallel.po +0 -0
  522. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/disaggregated_prefill.po +0 -0
  523. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/eplb_swift_balancer.po +0 -0
  524. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
  525. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
  526. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/quantization.po +0 -0
  527. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
  528. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
  529. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
  530. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/index.po +0 -0
  531. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po +0 -0
  532. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/optimization_and_tuning.po +0 -0
  533. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po +0 -0
  534. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/profile_execute_duration.po +0 -0
  535. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po +0 -0
  536. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
  537. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
  538. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
  539. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
  540. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/310p.po +0 -0
  541. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-R1.po +0 -0
  542. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-V3.1.po +0 -0
  543. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-V3.2.po +0 -0
  544. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/GLM4.x.po +0 -0
  545. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Kimi-K2-Thinking.po +0 -0
  546. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/PaddleOCR-VL.po +0 -0
  547. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen-VL-Dense.po +0 -0
  548. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen2.5-7B.po +0 -0
  549. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen2.5-Omni.po +0 -0
  550. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-235B-A22B.po +0 -0
  551. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-30B-A3B.po +0 -0
  552. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-32B-W4A4.po +0 -0
  553. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-8B-W4A8.po +0 -0
  554. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Coder-30B-A3B.po +0 -0
  555. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Dense.po +0 -0
  556. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Next.po +0 -0
  557. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Omni-30B-A3B-Thinking.po +0 -0
  558. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-VL-235B-A22B-Instruct.po +0 -0
  559. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3_embedding.po +0 -0
  560. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3_reranker.po +0 -0
  561. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
  562. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/long_sequence_context_parallel_multi_node.po +0 -0
  563. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/long_sequence_context_parallel_single_node.po +0 -0
  564. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
  565. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
  566. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
  567. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
  568. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
  569. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_colocated_mooncake_multi_instance.po +0 -0
  570. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_disaggregation_mooncake_multi_node.po +0 -0
  571. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_disaggregation_mooncake_single_node.po +0 -0
  572. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/ray.po +0 -0
  573. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
  574. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
  575. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
  576. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
  577. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
  578. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -0
  579. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
  580. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
  581. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/index.po +0 -0
  582. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/using_volcano_kthena.po +0 -0
  583. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po +0 -0
  584. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Multi_Token_Prediction.po +0 -0
  585. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/context_parallel.po +0 -0
  586. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_batch.po +0 -0
  587. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/eplb_swift_balancer.po +0 -0
  588. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po +0 -0
  589. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
  590. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
  591. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po +0 -0
  592. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po +0 -0
  593. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/layer_sharding.po +0 -0
  594. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
  595. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/netloader.po +0 -0
  596. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
  597. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
  598. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/speculative_decoding.po +0 -0
  599. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
  600. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/ucm_deployment.po +0 -0
  601. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
  602. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
  603. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
  604. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
  605. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  606. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  607. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/quick_start.md +0 -0
  608. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/310p.md +0 -0
  609. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/DeepSeek-R1.md +0 -0
  610. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/DeepSeek-V3.1.md +0 -0
  611. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/DeepSeek-V3.2.md +0 -0
  612. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/GLM4.x.md +0 -0
  613. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Kimi-K2-Thinking.md +0 -0
  614. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/PaddleOCR-VL.md +0 -0
  615. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen-VL-Dense.md +0 -0
  616. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen2.5-7B.md +0 -0
  617. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen2.5-Omni.md +0 -0
  618. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-235B-A22B.md +0 -0
  619. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-30B-A3B.md +0 -0
  620. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-32B-W4A4.md +0 -0
  621. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-8B-W4A8.md +0 -0
  622. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-Coder-30B-A3B.md +0 -0
  623. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-Dense.md +0 -0
  624. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-Next.md +0 -0
  625. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-Omni-30B-A3B-Thinking.md +0 -0
  626. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-VL-235B-A22B-Instruct.md +0 -0
  627. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3_embedding.md +0 -0
  628. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/index.md +0 -0
  629. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/long_sequence_context_parallel_multi_node.md +0 -0
  630. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/long_sequence_context_parallel_single_node.md +0 -0
  631. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/pd_colocated_mooncake_multi_instance.md +0 -0
  632. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/pd_disaggregation_mooncake_multi_node.md +0 -0
  633. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/pd_disaggregation_mooncake_single_node.md +0 -0
  634. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/ray.md +0 -0
  635. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/configuration/additional_config.md +0 -0
  636. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/configuration/env_vars.md +0 -0
  637. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/configuration/index.md +0 -0
  638. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/deployment_guide/index.md +0 -0
  639. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/deployment_guide/using_volcano_kthena.md +0 -0
  640. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/Fine_grained_TP.md +0 -0
  641. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/Multi_Token_Prediction.md +0 -0
  642. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/context_parallel.md +0 -0
  643. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/dynamic_batch.md +0 -0
  644. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +0 -0
  645. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/external_dp.md +0 -0
  646. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
  647. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
  648. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/images/layer_sharding.png +0 -0
  649. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/images/netloader_flowchart.png +0 -0
  650. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/images/netloader_timing_diagram.png +0 -0
  651. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  652. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/index.md +0 -0
  653. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/kv_pool.md +0 -0
  654. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/large_scale_ep.md +0 -0
  655. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/layer_sharding.md +0 -0
  656. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/lora.md +0 -0
  657. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/netloader.md +0 -0
  658. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/quantization.md +0 -0
  659. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
  660. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/speculative_decoding.md +0 -0
  661. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  662. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/ucm_deployment.md +0 -0
  663. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/support_matrix/index.md +0 -0
  664. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
  665. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
  666. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/chat_templates/template_qwen2_audio.jinja +0 -0
  667. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +0 -0
  668. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +0 -0
  669. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
  670. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/eplb/eplb_deepseek.py +0 -0
  671. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/eplb/eplb_strategy.py +0 -0
  672. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/external_online_dp/README.md +0 -0
  673. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/external_online_dp/dp_load_balance_proxy_server.py +0 -0
  674. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/external_online_dp/launch_online_dp.py +0 -0
  675. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/external_online_dp/run_dp_template.sh +0 -0
  676. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_data_parallel.py +0 -0
  677. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_disaggregated_prefill_npu.py +0 -0
  678. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_embed.py +0 -0
  679. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_external_launcher.py +0 -0
  680. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_inference_audio_language.py +0 -0
  681. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_inference_npu.py +0 -0
  682. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_inference_npu_long_seq.py +0 -0
  683. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_inference_npu_tp2.py +0 -0
  684. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_inference_sleep_mode_npu.py +0 -0
  685. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_weight_load.py +0 -0
  686. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/prompt_embed_inference.py +0 -0
  687. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/prompt_embedding_inference.py +0 -0
  688. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/quantization/llm-compressor/w8a8_int8.py +0 -0
  689. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/quantization/llm-compressor/w8a8_int8_dynamic.py +0 -0
  690. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/run_dp_server.sh +0 -0
  691. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/format.sh +0 -0
  692. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/mypy.ini +0 -0
  693. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/packages.txt +0 -0
  694. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/requirements-dev.txt +0 -0
  695. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/requirements-lint.txt +0 -0
  696. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/setup.cfg +0 -0
  697. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/__init__.py +0 -0
  698. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
  699. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/__init__.py +0 -0
  700. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/common.sh +0 -0
  701. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/conftest.py +0 -0
  702. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
  703. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
  704. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/model_utils.py +0 -0
  705. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml +0 -0
  706. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Llama-3.2-3B-Instruct.yaml +0 -0
  707. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Molmo-7B-D-0924.yaml +0 -0
  708. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +0 -0
  709. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen2.5-Omni-7B.yaml +0 -0
  710. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-30B-A3B-W8A8.yaml +0 -0
  711. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +0 -0
  712. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-8B-W8A8.yaml +0 -0
  713. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-8B.yaml +0 -0
  714. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-Next-80B-A3B-Instruct.yaml +0 -0
  715. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-Omni-30B-A3B-Instruct.yaml +0 -0
  716. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +0 -0
  717. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml +0 -0
  718. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/accuracy.txt +0 -0
  719. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/gemma-3-4b-it.yaml +0 -0
  720. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/internlm3-8b-instruct.yaml +0 -0
  721. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/llava-onevision-qwen2-0.5b-ov-hf.yaml +0 -0
  722. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/conftest.py +0 -0
  723. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/report_template.md +0 -0
  724. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/test_lm_eval_correctness.py +0 -0
  725. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py +0 -0
  726. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_data_parallel.py +0 -0
  727. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_expert_parallel.py +0 -0
  728. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_external_launcher.py +0 -0
  729. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_full_graph_mode.py +0 -0
  730. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py +0 -0
  731. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_offline_inference_distributed.py +0 -0
  732. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_offline_weight_load.py +0 -0
  733. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_pipeline_parallel.py +0 -0
  734. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_prefix_caching.py +0 -0
  735. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_quantization.py +0 -0
  736. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_qwen3_moe.py +0 -0
  737. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_shared_expert_dp.py +0 -0
  738. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_single_request_aclgraph.py +0 -0
  739. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py +0 -0
  740. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/long_sequence/test_basic.py +0 -0
  741. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py +0 -0
  742. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/long_sequence/test_mtp.py +0 -0
  743. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py +0 -0
  744. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py +0 -0
  745. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/test_kimi_k2.py +0 -0
  746. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/test_qwen3_next.py +0 -0
  747. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/__init__.py +0 -0
  748. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml +0 -0
  749. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-disagg-pd.yaml +0 -0
  750. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-VL-235B-disagg-pd.yaml +0 -0
  751. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/__init__.py +0 -0
  752. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/multi_node_config.py +0 -0
  753. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/run.sh +0 -0
  754. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/test_multi_node.py +0 -0
  755. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/utils.py +0 -0
  756. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/__init__.py +0 -0
  757. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8.py +0 -0
  758. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8_eplb.py +0 -0
  759. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_glm4_5.py +0 -0
  760. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_mtpx_deepseek_r1_0528_w8a8.py +0 -0
  761. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_prefix_cache_deepseek_r1_0528_w8a8.py +0 -0
  762. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_prefix_cache_qwen3_32b_int8.py +0 -0
  763. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen2_5_vl_32b.py +0 -0
  764. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen2_5_vl_7b.py +0 -0
  765. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_235b_a22b_w8a8_eplb.py +0 -0
  766. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_235b_w8a8.py +0 -0
  767. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_30b_w8a8.py +0 -0
  768. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_32b.py +0 -0
  769. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_32b_int8_a3_feature_stack3.py +0 -0
  770. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_8b.py +0 -0
  771. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_next_w8a8.py +0 -0
  772. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwq_32b.py +0 -0
  773. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/__init__.py +0 -0
  774. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py +0 -0
  775. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/multicard_ops_a3/__init__.py +0 -0
  776. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py +0 -0
  777. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py +0 -0
  778. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/__init__.py +0 -0
  779. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py +0 -0
  780. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py +0 -0
  781. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py +0 -0
  782. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py +0 -0
  783. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py +0 -0
  784. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py +0 -0
  785. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py +0 -0
  786. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py +0 -0
  787. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py +0 -0
  788. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py +0 -0
  789. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py +0 -0
  790. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py +0 -0
  791. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py +0 -0
  792. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_rotary_embedding.py +0 -0
  793. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py +0 -0
  794. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/__init__.py +0 -0
  795. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py +0 -0
  796. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py +0 -0
  797. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py +0 -0
  798. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py +0 -0
  799. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py +0 -0
  800. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py +0 -0
  801. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py +0 -0
  802. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py +0 -0
  803. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py +0 -0
  804. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/prompts/example.txt +0 -0
  805. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/run_doctests.sh +0 -0
  806. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/__init__.py +0 -0
  807. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/compile/__init__.py +0 -0
  808. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/compile/backend.py +0 -0
  809. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/model_runner_v2/__init__.py +0 -0
  810. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/model_runner_v2/test_basic.py +0 -0
  811. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/pooling/__init__.py +0 -0
  812. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/pooling/test_classification.py +0 -0
  813. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/pooling/test_embedding.py +0 -0
  814. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/pooling/test_scoring.py +0 -0
  815. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/spec_decode/__init__.py +0 -0
  816. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_aclgraph_mem.py +0 -0
  817. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_async_scheduling.py +0 -0
  818. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_batch_invariant.py +0 -0
  819. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_camem.py +0 -0
  820. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_completion_with_prompt_embeds.py +0 -0
  821. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_cpu_offloading.py +0 -0
  822. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_guided_decoding.py +0 -0
  823. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_ilama_lora.py +0 -0
  824. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_models.py +0 -0
  825. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +0 -0
  826. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
  827. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_quantization.py +0 -0
  828. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_sampler.py +0 -0
  829. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_vlm.py +0 -0
  830. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_xlite.py +0 -0
  831. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/utils.py +0 -0
  832. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/utils.py +0 -0
  833. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/vllm_interface/singlecard/test_sampler.py +0 -0
  834. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/vllm_interface/vllm_test.cfg +0 -0
  835. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/__init__.py +0 -0
  836. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_attention_cp.py +0 -0
  837. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_attention_mask.py +0 -0
  838. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_attention_v1.py +0 -0
  839. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_mla_cp.py +0 -0
  840. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_mla_v1.py +0 -0
  841. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_sfa_v1.py +0 -0
  842. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/utils.py +0 -0
  843. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/base.py +0 -0
  844. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/compilation/test_acl_graph.py +0 -0
  845. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/compilation/test_add_rms_norm_quant.py +0 -0
  846. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/conftest.py +0 -0
  847. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/core/test_scheduler_dynamic_batch.py +0 -0
  848. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/device_allocator/test_camem.py +0 -0
  849. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
  850. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
  851. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/mooncake/test_config_data.py +0 -0
  852. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/test_communicator.py +0 -0
  853. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
  854. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/test_parallel_state.py +0 -0
  855. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/adaptor/test_abstract_adaptor.py +0 -0
  856. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/expert_map.json +0 -0
  857. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/policy/test_policy_abstract.py +0 -0
  858. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +0 -0
  859. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +0 -0
  860. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/policy/test_policy_factor.py +0 -0
  861. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +0 -0
  862. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/test_eplb_utils.py +0 -0
  863. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/fake_weight/config.json +0 -0
  864. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/kv_connector/test_mooncake_connector.py +0 -0
  865. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +0 -0
  866. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
  867. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
  868. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/kv_connector/utils.py +0 -0
  869. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/model_loader/netloader/test_netloader.py +0 -0
  870. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/model_loader/netloader/test_netloader_elastic.py +0 -0
  871. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/model_loader/netloader/test_netloader_load.py +0 -0
  872. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/model_loader/netloader/test_netloader_utils.py +0 -0
  873. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_activation.py +0 -0
  874. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_comm_utils.py +0 -0
  875. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_fused_moe.py +0 -0
  876. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_linear.py +0 -0
  877. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_mla.py +0 -0
  878. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_moe_comm_method.py +0 -0
  879. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_moe_mlp.py +0 -0
  880. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_prepare_finalize.py +0 -0
  881. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_rotary_embedding.py +0 -0
  882. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_vocab_parallel_embedding.py +0 -0
  883. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
  884. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
  885. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_quant_config.py +0 -0
  886. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_utils.py +0 -0
  887. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w4a16.py +0 -0
  888. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +0 -0
  889. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w4a8_dynamic.py +0 -0
  890. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w8a16.py +0 -0
  891. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w8a8.py +0 -0
  892. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w8a8_dynamic.py +0 -0
  893. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/sample/test_rejection_sampler.py +0 -0
  894. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/sample/test_sampler.py +0 -0
  895. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/spec_decode/test_eagle_proposer.py +0 -0
  896. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/spec_decode/test_mtp_proposer.py +0 -0
  897. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/test_ascend_config.py +0 -0
  898. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/test_envs.py +0 -0
  899. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/worker/test_block_table.py +0 -0
  900. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/worker/test_pcp_manager.py +0 -0
  901. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/worker/test_worker_v1.py +0 -0
  902. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/actionlint.sh +0 -0
  903. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/check_python_src_init.py +0 -0
  904. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/check_repo.sh +0 -0
  905. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/collect_user_first_contribution.sh +0 -0
  906. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/enforce_regex_import.py +0 -0
  907. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/format_contributors.py +0 -0
  908. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/install_flash_infer_attention_score_ops_a2.sh +0 -0
  909. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/install_flash_infer_attention_score_ops_a3.sh +0 -0
  910. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/mooncake_installer.sh +0 -0
  911. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/mypy.sh +0 -0
  912. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/png-lint.sh +0 -0
  913. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/send_mm_request.py +0 -0
  914. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/send_request.py +0 -0
  915. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/shellcheck.sh +0 -0
  916. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/sphinx-lint.sh +0 -0
  917. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/vllm_bench.py +0 -0
  918. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/typos.toml +0 -0
  919. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/__init__.py +0 -0
  920. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/_cann_ops_custom/.gitkeep +0 -0
  921. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/__init__.py +0 -0
  922. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/attention_mask.py +0 -0
  923. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/attention_v1.py +0 -0
  924. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/context_parallel/__init__.py +0 -0
  925. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/context_parallel/common_cp.py +0 -0
  926. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/mla_v1.py +0 -0
  927. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/sfa_v1.py +0 -0
  928. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/utils.py +0 -0
  929. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/batch_invariant.py +0 -0
  930. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/__init__.py +0 -0
  931. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/compiler_interface.py +0 -0
  932. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/graph_fusion_pass_manager.py +0 -0
  933. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/npugraph_ex_passes/__init__.py +0 -0
  934. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/npugraph_ex_passes/add_rms_norm_quant.py +0 -0
  935. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/passes/__init__.py +0 -0
  936. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/passes/qknorm_rope_fusion_pass.py +0 -0
  937. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/core/__init__.py +0 -0
  938. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/core/multi_block_pool.py +0 -0
  939. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/core/recompute_scheduler.py +0 -0
  940. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/core/scheduler_dynamic_batch.py +0 -0
  941. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/cpu_binding.py +0 -0
  942. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/device_allocator/__init__.py +0 -0
  943. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/device_allocator/camem.py +0 -0
  944. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/__init__.py +0 -0
  945. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/communicator.py +0 -0
  946. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/cpu_offload_connector.py +0 -0
  947. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/cpu_offload_manager/__init__.py +0 -0
  948. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +0 -0
  949. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/cpu_offload_manager/metadata.py +0 -0
  950. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/device_communicators/__init__.py +0 -0
  951. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  952. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  953. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/__init__.py +0 -0
  954. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/ascend_store_connector.py +0 -0
  955. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/backend/__init__.py +0 -0
  956. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/backend/backend.py +0 -0
  957. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/backend/memcache_backend.py +0 -0
  958. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/backend/mooncake_backend.py +0 -0
  959. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/config_data.py +0 -0
  960. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/kv_transfer.py +0 -0
  961. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/pool_worker.py +0 -0
  962. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/mooncake_connector.py +0 -0
  963. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/mooncake_transfer_engine.py +0 -0
  964. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/parallel_state.py +0 -0
  965. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/ucm_connector.py +0 -0
  966. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/utils.py +0 -0
  967. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/__init__.py +0 -0
  968. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/adaptor/__init__.py +0 -0
  969. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/adaptor/abstract_adaptor.py +0 -0
  970. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/adaptor/vllm_adaptor.py +0 -0
  971. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/__init__.py +0 -0
  972. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +0 -0
  973. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/eplb_utils.py +0 -0
  974. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/__init__.py +0 -0
  975. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_abstract.py +0 -0
  976. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +0 -0
  977. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +0 -0
  978. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_factory.py +0 -0
  979. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_flashlb.py +0 -0
  980. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_random.py +0 -0
  981. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/eplb_updator.py +0 -0
  982. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/utils.py +0 -0
  983. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/flash_common3_context.py +0 -0
  984. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/kv_offload/__init__.py +0 -0
  985. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/kv_offload/cpu_npu.py +0 -0
  986. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/kv_offload/npu.py +0 -0
  987. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/lora/__init__.py +0 -0
  988. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/lora/lora_ops.py +0 -0
  989. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/lora/punica_npu.py +0 -0
  990. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/lora/utils.py +0 -0
  991. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/meta_registration.py +0 -0
  992. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/__init__.py +0 -0
  993. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/__init__.py +0 -0
  994. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/executor/__init__.py +0 -0
  995. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/executor/elastic_load.py +0 -0
  996. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/executor/netloader_pg.py +0 -0
  997. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/interaction/__init__.py +0 -0
  998. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/interaction/elastic.py +0 -0
  999. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/load.py +0 -0
  1000. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/netloader.py +0 -0
  1001. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/utils.py +0 -0
  1002. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/__init__.py +0 -0
  1003. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/activation.py +0 -0
  1004. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/flashcomm2_oshard_manager.py +0 -0
  1005. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/__init__.py +0 -0
  1006. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/comm_utils.py +0 -0
  1007. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/experts_selector.py +0 -0
  1008. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/moe_comm_method.py +0 -0
  1009. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/moe_mlp.py +0 -0
  1010. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/prepare_finalize.py +0 -0
  1011. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/token_dispatcher.py +0 -0
  1012. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/layer_shard_linear.py +0 -0
  1013. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/layernorm.py +0 -0
  1014. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/linear.py +0 -0
  1015. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/linear_op.py +0 -0
  1016. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/mla.py +0 -0
  1017. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/mm_encoder_attention.py +0 -0
  1018. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/register_custom_ops.py +0 -0
  1019. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/__init__.py +0 -0
  1020. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/activation/__init__.py +0 -0
  1021. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/activation/swiglu_quant.py +0 -0
  1022. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/batch_invariant/__init__.py +0 -0
  1023. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/batch_invariant/matmul.py +0 -0
  1024. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/batch_invariant/mean.py +0 -0
  1025. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/batch_invariant/rmsnorm.py +0 -0
  1026. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/batch_invariant/softmax.py +0 -0
  1027. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/__init__.py +0 -0
  1028. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/chunk.py +0 -0
  1029. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/chunk_delta_h.py +0 -0
  1030. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/chunk_o.py +0 -0
  1031. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/chunk_scaled_dot_kkt.py +0 -0
  1032. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/cumsum.py +0 -0
  1033. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py +0 -0
  1034. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/l2norm.py +0 -0
  1035. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/layernorm_guard.py +0 -0
  1036. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/sigmoid_gating.py +0 -0
  1037. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/solve_tril.py +0 -0
  1038. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/utils.py +0 -0
  1039. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/wy_fast.py +0 -0
  1040. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fused_gdn_gating.py +0 -0
  1041. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/linearnorm/__init__.py +0 -0
  1042. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/linearnorm/split_qkv_rmsnorm_rope.py +0 -0
  1043. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/mamba/__init__.py +0 -0
  1044. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/mamba/causal_conv1d.py +0 -0
  1045. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/reject_sample.py +0 -0
  1046. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/rope.py +0 -0
  1047. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/spec_decode/__init__.py +0 -0
  1048. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/spec_decode/utils.py +0 -0
  1049. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/triton_utils.py +0 -0
  1050. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/vocab_parallel_embedding.py +0 -0
  1051. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/weight_prefetch.py +0 -0
  1052. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/__init__.py +0 -0
  1053. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/__init__.py +0 -0
  1054. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_balance_schedule.py +0 -0
  1055. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_core.py +0 -0
  1056. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_distributed.py +0 -0
  1057. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_ec_connector.py +0 -0
  1058. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_kv_cache_coordinator.py +0 -0
  1059. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_lora_model_manager.py +0 -0
  1060. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_mamba_config.py +0 -0
  1061. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_message_queue.py +0 -0
  1062. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_multiproc_executor.py +0 -0
  1063. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_sched_yield.py +0 -0
  1064. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_vllm_config.py +0 -0
  1065. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/__init__.py +0 -0
  1066. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_bert.py +0 -0
  1067. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_deepseek.py +0 -0
  1068. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_deepseekv3.py +0 -0
  1069. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_distributed.py +0 -0
  1070. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_minicpm.py +0 -0
  1071. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_module.py +0 -0
  1072. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_multimodal_merge.py +0 -0
  1073. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_qwen3_next.py +0 -0
  1074. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_qwen3_next_mtp.py +0 -0
  1075. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_qwen3vl.py +0 -0
  1076. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_rejection_sampler.py +0 -0
  1077. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_rope.py +0 -0
  1078. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_triton.py +0 -0
  1079. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/profiling_config.py +0 -0
  1080. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/__init__.py +0 -0
  1081. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/compressed_tensors/__init__.py +0 -0
  1082. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/compressed_tensors/compressed_tensors.py +0 -0
  1083. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/quant_config.py +0 -0
  1084. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/utils.py +0 -0
  1085. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w4a16.py +0 -0
  1086. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +0 -0
  1087. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w4a8_dynamic.py +0 -0
  1088. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w8a16.py +0 -0
  1089. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w8a8.py +0 -0
  1090. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w8a8_dynamic.py +0 -0
  1091. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w8a8_pdmix.py +0 -0
  1092. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w8a8mxfp8.py +0 -0
  1093. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/sample/__init__.py +0 -0
  1094. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/sample/rejection_sampler.py +0 -0
  1095. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/sample/sampler.py +0 -0
  1096. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/__init__.py +0 -0
  1097. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/interface.py +0 -0
  1098. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/mtp_proposer.py +0 -0
  1099. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/ngram_proposer.py +0 -0
  1100. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/suffix_proposer.py +0 -0
  1101. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/__init__.py +0 -0
  1102. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/pcp_utils.py +0 -0
  1103. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/README.md +0 -0
  1104. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/__init__.py +0 -0
  1105. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/aclgraph_utils.py +0 -0
  1106. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/attn_utils.py +0 -0
  1107. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/input_batch.py +0 -0
  1108. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/model_runner.py +0 -0
  1109. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/sample/__init__.py +0 -0
  1110. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/sample/gumbel.py +0 -0
  1111. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/sample/penalties.py +0 -0
  1112. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/sample/sampler.py +0 -0
  1113. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/states.py +0 -0
  1114. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/utils.py +0 -0
  1115. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/xlite/__init__.py +0 -0
  1116. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/xlite/xlite.py +0 -0
  1117. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/xlite/xlite_model_runner.py +0 -0
  1118. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/xlite/xlite_worker.py +0 -0
  1119. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  1120. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/entry_points.txt +0 -0
  1121. {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -32,7 +32,7 @@ COPY . /workspace/vllm-ascend/
32
32
 
33
33
  # Install req
34
34
  RUN python3 -m pip install -r vllm-ascend/requirements.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
35
- python3 -m pip install twine attrs psutil
35
+ python3 -m pip install twine
36
36
 
37
37
  # Install vllm-ascend
38
38
  RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
@@ -15,7 +15,7 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
 
18
- FROM quay.io/ascend/vllm-ascend:releases-v0.13.0
18
+ FROM quay.io/ascend/vllm-ascend:main
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG AIS_BENCH_TAG="v3.0-20250930-master"
@@ -15,7 +15,7 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
 
18
- FROM quay.io/ascend/vllm-ascend:releases-v0.13.0-a3
18
+ FROM quay.io/ascend/vllm-ascend:main-a3
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG AIS_BENCH_TAG="v3.0-20250930-master"
@@ -21,7 +21,3 @@ self-hosted-runner:
21
21
  - linux-aarch64-a3-0
22
22
  - linux-amd64-cpu-8-hk
23
23
  - linux-amd64-cpu-16-hk
24
- - linux-aarch64-a2b3-0
25
- - linux-aarch64-a2b3-1
26
- - linux-aarch64-a2b3-2
27
- - linux-aarch64-a2b3-4
@@ -110,6 +110,19 @@ jobs:
110
110
  fi
111
111
  cd ..
112
112
 
113
+ - name: Install custom-ops (for DeepSeek-V3.2-Exp)
114
+ if: ${{ inputs.name == 'deepseek3_2-exp-w8a8' }}
115
+ shell: bash -l {0}
116
+ run: |
117
+ wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run
118
+ chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run
119
+ ./CANN-custom_ops-sfa-linux.aarch64.run --quiet
120
+ export ASCEND_CUSTOM_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize:${ASCEND_CUSTOM_OPP_PATH}
121
+ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH}
122
+ wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl
123
+ pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl
124
+ . /usr/local/Ascend/ascend-toolkit/set_env.sh
125
+
113
126
  - name: Run vllm-project/vllm-ascend test
114
127
  env:
115
128
  VLLM_WORKER_MULTIPROC_METHOD: spawn
@@ -121,3 +134,5 @@ jobs:
121
134
  # ignore test_dispatch_ffn_combine until the test is fixed
122
135
  pytest -sv ${{ inputs.tests }} \
123
136
  --ignore=tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py
137
+
138
+
@@ -112,10 +112,10 @@ jobs:
112
112
  update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
113
113
 
114
114
  - name: Install tensorflow (for Molmo-7B-D-0924)
115
- if: ${{ inputs.runner == 'linux-aarch64-a2b3-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }}
115
+ if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }}
116
116
  shell: bash -l {0}
117
117
  run: |
118
- pip install tensorflow==2.19.1 --no-cache-dir
118
+ pip install tensorflow --no-cache-dir
119
119
 
120
120
  - name: Resolve vllm-ascend version
121
121
  run: |
@@ -19,7 +19,7 @@ on:
19
19
  jobs:
20
20
  e2e:
21
21
  name: singlecard
22
- runs-on: linux-aarch64-a2b3-1
22
+ runs-on: ${{ inputs.runner }}-1
23
23
  container:
24
24
  image: ${{ inputs.image }}
25
25
  env:
@@ -145,9 +145,11 @@ jobs:
145
145
 
146
146
  - name: Config mirrors
147
147
  run: |
148
- sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
149
- pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
150
- pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
148
+ # Fix me: use nginx cache rather than the pypi
149
+ # sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
150
+ # pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
151
+ # pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
152
+ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
151
153
  apt-get update -y
152
154
  apt install git -y
153
155
 
@@ -46,7 +46,6 @@ jobs:
46
46
  with:
47
47
  fetch-depth: 0
48
48
  persist-credentials: false
49
- ref: ${{ github.ref }}
50
49
 
51
50
  - name: Free up disk space
52
51
  uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
@@ -106,8 +105,6 @@ jobs:
106
105
  steps:
107
106
  - name: Checkout
108
107
  uses: actions/checkout@v6
109
- with:
110
- ref: ${{ github.ref }}
111
108
 
112
109
  - name: Download arm64 digests
113
110
  uses: actions/download-artifact@v7
@@ -149,9 +146,8 @@ jobs:
149
146
  # which follow the rule from vLLM with prefix v
150
147
  # TODO(yikun): the post release might be considered as latest release
151
148
  tags: |
152
- type=ref,event=branch,suffix=${{ env.SUFFIX }}
153
- type=ref,event=pr,suffix=${{ env.SUFFIX }}
154
149
  type=pep440,pattern={{raw}},suffix=${{ env.SUFFIX }}
150
+ type=schedule,pattern=main,suffix=${{ env.SUFFIX }}
155
151
  flavor:
156
152
  latest=false
157
153
 
@@ -27,7 +27,6 @@ jobs:
27
27
  VLLM_USE_MODELSCOPE: True
28
28
  SOC_VERSION: ascend910b1
29
29
  MAX_JOBS: 4
30
- COMPILE_CUSTOM_KERNELS: 0
31
30
  steps:
32
31
  - name: Install packages
33
32
  run: |
@@ -48,7 +48,7 @@ jobs:
48
48
  matrix:
49
49
  vllm_verison: [v0.9.1-dev, v0.9.1-dev-openeuler, main, main-openeuler]
50
50
  name: vLLM Ascend test
51
- runs-on: linux-aarch64-a2b3-1
51
+ runs-on: linux-aarch64-a2-1
52
52
  container:
53
53
  image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:${{ matrix.vllm_verison }}
54
54
  steps:
@@ -19,7 +19,7 @@ jobs:
19
19
  download-models:
20
20
  if: contains(github.event.pull_request.labels.*.name, 'model-download')
21
21
  name: Download models from ModelScope
22
- runs-on: linux-aarch64-a2b3-0
22
+ runs-on: linux-aarch64-a2-0
23
23
  container:
24
24
  image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-cpu
25
25
 
@@ -50,22 +50,22 @@ jobs:
50
50
  matrix:
51
51
  test_config:
52
52
  - name: qwen3-8b
53
- os: linux-aarch64-a2b3-1
53
+ os: linux-aarch64-a2-1
54
54
  tests: tests/e2e/nightly/single_node/models/test_qwen3_8b.py
55
55
  - name: qwen3next
56
- os: linux-aarch64-a2b3-4
56
+ os: linux-aarch64-a2-4
57
57
  ests: tests/e2e/nightly/single_node/models/test_qwen3_next.py
58
58
  - name: qwen3-32b
59
- os: linux-aarch64-a2b3-4
59
+ os: linux-aarch64-a2-4
60
60
  tests: tests/e2e/nightly/single_node/models/test_qwen3_32b.py
61
61
  - name: qwen3-32b-in8-a2
62
- os: linux-aarch64-a2b3-4
62
+ os: linux-aarch64-a2-4
63
63
  tests: tests/e2e/nightly/single_node/models/test_qwen3_32b_int8.py
64
64
  - name: test_custom_op
65
- os: linux-aarch64-a2b3-1
65
+ os: linux-aarch64-a2-1
66
66
  tests: tests/e2e/nightly/single_node/ops/singlecard_ops
67
67
  - name: test_custom_op_multi_card
68
- os: linux-aarch64-a2b3-4
68
+ os: linux-aarch64-a2-4
69
69
  tests: tests/e2e/nightly/single_node/ops/multicard_ops_a2/
70
70
  uses: ./.github/workflows/_e2e_nightly_single_node.yaml
71
71
  with:
@@ -93,7 +93,7 @@ jobs:
93
93
  uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
94
94
  with:
95
95
  soc_version: a2
96
- runner: linux-aarch64-a2b3-0
96
+ runner: linux-aarch64-a2-0
97
97
  image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2'
98
98
  replicas: 1
99
99
  size: ${{ matrix.test_config.size }}
@@ -106,32 +106,35 @@ jobs:
106
106
  ${{
107
107
  github.event_name == 'schedule' ||
108
108
  github.event_name == 'workflow_dispatch' ||
109
- contains(github.event.pull_request.labels.*.name, 'accuracy-test')
109
+ (
110
+ contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
111
+ contains(github.event.pull_request.labels.*.name, 'ready-for-test')
112
+ )
110
113
  }}
111
114
  strategy:
112
115
  fail-fast: false
113
116
  matrix:
114
117
  test_config:
115
- - os: linux-aarch64-a2b3-1
118
+ - os: linux-aarch64-a2-1
116
119
  model_list:
117
120
  - Qwen3-8B
118
121
  - Qwen2-Audio-7B-Instruct
119
122
  - Qwen3-8B-W8A8
120
123
  - Qwen3-VL-8B-Instruct
121
124
  - Qwen2.5-Omni-7B
122
- - os: linux-aarch64-a2b3-1
125
+ - os: linux-aarch64-a2-1
123
126
  model_list:
124
127
  - ERNIE-4.5-21B-A3B-PT
125
128
  - InternVL3_5-8B-hf
126
129
  - Molmo-7B-D-0924
127
130
  - Llama-3.2-3B-Instruct
128
131
  - llava-onevision-qwen2-0.5b-ov-hf
129
- - os: linux-aarch64-a2b3-2
132
+ - os: linux-aarch64-a2-2
130
133
  model_list:
131
134
  - Qwen3-30B-A3B
132
135
  - Qwen3-VL-30B-A3B-Instruct
133
136
  - Qwen3-30B-A3B-W8A8
134
- - os: linux-aarch64-a2b3-4
137
+ - os: linux-aarch64-a2-4
135
138
  model_list:
136
139
  - Qwen3-Next-80B-A3B-Instruct
137
140
  - Qwen3-Omni-30B-A3B-Instruct
@@ -83,12 +83,6 @@ jobs:
83
83
  - name: multi-node-qwen-vl-disagg-pd
84
84
  config_file_path: Qwen3-VL-235B-disagg-pd.yaml
85
85
  size: 2
86
- - name: multi-node-kimi-k2-instruct-w8a8
87
- config_file_path: Kimi-K2-Instruct-W8A8.yaml
88
- size: 2
89
- - name: multi-node-deepseek-v3.1
90
- config_file_path: DeepSeek-V3.1-BF16.yaml
91
- size: 2
92
86
  uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
93
87
  with:
94
88
  soc_version: a3
@@ -150,15 +144,10 @@ jobs:
150
144
  - name: qwen3-next-w8a8
151
145
  os: linux-aarch64-a3-4
152
146
  tests: tests/e2e/nightly/single_node/models/test_qwen3_next_w8a8.py
153
- - name: kimi-k2-thinking
154
- os: linux-aarch64-a3-16
155
- tests: tests/e2e/nightly/single_node/models/test_kimi_k2_thinking.py
156
- - name: deepseek-r1-w8a8-hbm
157
- os: linux-aarch64-a3-16
158
- tests: tests/e2e/nightly/single_node/models/test_deepseek_r1_w8a8_hbm.py
159
- - name: deepseek3_2-w8a8
160
- os: linux-aarch64-a3-16
161
- tests: tests/e2e/nightly/single_node/models/test_deepseek_v3_2_w8a8.py
147
+ # TODO: Replace deepseek3.2-exp with deepseek3.2 after nightly tests pass
148
+ # - name: deepseek3_2-exp-w8a8
149
+ # os: linux-aarch64-a3-16
150
+ # tests: tests/e2e/nightly/single_node/models/test_deepseek_v3_2_exp_w8a8.py
162
151
  uses: ./.github/workflows/_e2e_nightly_single_node.yaml
163
152
  with:
164
153
  vllm: v0.13.0
@@ -38,7 +38,7 @@ concurrency:
38
38
 
39
39
  jobs:
40
40
  changes:
41
- runs-on: linux-aarch64-a2b3-0
41
+ runs-on: linux-aarch64-a2-0
42
42
  if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
43
43
  outputs:
44
44
  e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
@@ -80,6 +80,6 @@ jobs:
80
80
  uses: ./.github/workflows/_e2e_test.yaml
81
81
  with:
82
82
  vllm: ${{ matrix.vllm_version }}
83
- runner: linux-aarch64-a2b3
83
+ runner: linux-aarch64-a2
84
84
  image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
85
85
  type: full
@@ -41,7 +41,7 @@ jobs:
41
41
  with:
42
42
  vllm: v0.13.0
43
43
  changes:
44
- runs-on: linux-aarch64-a2b3-0
44
+ runs-on: linux-aarch64-a2-0
45
45
  outputs:
46
46
  e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
47
47
  ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
@@ -84,7 +84,7 @@ jobs:
84
84
  with:
85
85
  vllm: ${{ matrix.vllm_version }}
86
86
  runner: linux-amd64-cpu-16-hk
87
- image: quay.nju.edu.cn/ascend/cann:8.5.0-910b-ubuntu22.04-py3.11
87
+ image: quay.nju.edu.cn/ascend/cann:8.2.rc2-910b-ubuntu22.04-py3.11
88
88
  type: pr
89
89
 
90
90
  e2e-light:
@@ -99,6 +99,6 @@ jobs:
99
99
  uses: ./.github/workflows/_e2e_test.yaml
100
100
  with:
101
101
  vllm: ${{ matrix.vllm_version }}
102
- runner: linux-aarch64-a2b3
102
+ runner: linux-aarch64-a2
103
103
  image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
104
104
  type: light
@@ -11,36 +11,12 @@
11
11
  # - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3 / vllm-ascend:v1.2.3rc1
12
12
  name: Image Build and Push
13
13
  on:
14
- pull_request:
15
- branches:
16
- - 'releases/*'
17
- paths:
18
- - '.github/workflows/schedule_image_build_and_push.yaml'
19
- - 'Dockerfile*'
20
- - 'vllm_ascend/**'
21
- - 'setup.py'
22
- - 'pyproject.toml'
23
- - 'requirements.txt'
24
- - 'cmake/**'
25
- - 'CMakeLists.txt'
26
- - 'csrc/**'
27
- types: [ labeled, synchronize ]
14
+ schedule:
15
+ # UTC+8: 8am, 12pm, 16pm, 22pm
16
+ - cron: '0 0,4,8,14 * * *'
28
17
  push:
29
- # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
- branches:
31
- - 'releases/*'
32
18
  tags:
33
19
  - 'v*'
34
- paths:
35
- - '.github/workflows/schedule_image_build_and_push.yaml'
36
- - 'Dockerfile*'
37
- - 'vllm_ascend/**'
38
- - 'setup.py'
39
- - 'pyproject.toml'
40
- - 'requirements.txt'
41
- - 'cmake/**'
42
- - 'CMakeLists.txt'
43
- - 'csrc/**'
44
20
  workflow_dispatch:
45
21
  inputs:
46
22
  tag:
@@ -49,13 +25,8 @@ on:
49
25
  default: main
50
26
  required: true
51
27
 
52
- concurrency:
53
- group: ${{ github.workflow }}-${{ github.ref_name }}
54
- cancel-in-progress: true
55
-
56
28
  jobs:
57
29
  image_build:
58
- if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'image-build') }}
59
30
  name: Image Build and Push
60
31
  strategy:
61
32
  matrix:
@@ -81,7 +52,7 @@ jobs:
81
52
  dockerfile: ${{ matrix.build_meta.dockerfile }}
82
53
  suffix: ${{ matrix.build_meta.suffix }}
83
54
  quay_username: ${{ vars.QUAY_USERNAME }}
84
- should_push: ${{ github.repository_owner == 'vllm-project' && github.event_name == 'push'}}
55
+ should_push: ${{ github.repository_owner == 'vllm-project' }}
85
56
  workflow_dispatch_tag: ${{ inputs.tag }}
86
57
  secrets:
87
58
  QUAY_PASSWORD: ${{ secrets.QUAY_PASSWORD }}
@@ -122,13 +122,7 @@ jobs:
122
122
  --exclude "libascend*.so" \
123
123
  --exclude "libtorch*.so" \
124
124
  --exclude "libopapi.so" \
125
- --exclude "liberror_manager.so" \
126
- --exclude "libruntime.so" \
127
- --exclude "libmmpa.so" \
128
- --exclude "libops_base.so" \
129
- --exclude "libopapi_math.so" \
130
- --exclude "libunified_dlog.so" \
131
- --exclude "liboptiling.so"
125
+ --exclude "liberror_manager.so"
132
126
  done
133
127
  rm -f dist/*.whl
134
128
  mv dist/repaired/*.whl dist/
@@ -34,6 +34,6 @@ jobs:
34
34
  uses: ./.github/workflows/_e2e_test.yaml
35
35
  with:
36
36
  vllm: main
37
- runner: linux-aarch64-a2b3
37
+ runner: linux-aarch64-a2
38
38
  image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
39
39
  type: full
@@ -73,7 +73,7 @@ RUN apt-get update -y && \
73
73
  rm -rf /var/lib/apt/lists/*
74
74
 
75
75
  # Install modelscope (for fast download), ray (for multinode) and torch-npu post version
76
- RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
76
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
77
77
  python3 -m pip cache purge
78
78
 
79
79
  RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
@@ -58,7 +58,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
58
58
  python3 -m pip cache purge
59
59
 
60
60
  # Install modelscope (for fast download), ray (for multinode) and torch-npu post version
61
- RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
61
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
62
62
  python3 -m pip cache purge
63
63
 
64
64
  RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
@@ -25,7 +25,7 @@ ENV SOC_VERSION=$SOC_VERSION \
25
25
  OMP_NUM_THREADS=1
26
26
 
27
27
  RUN yum update -y && \
28
- yum install -y patch python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
28
+ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
29
29
  rm -rf /var/cache/yum
30
30
 
31
31
  RUN pip config set global.index-url ${PIP_INDEX_URL}
@@ -48,12 +48,13 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
48
48
  source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
49
49
  source /usr/local/Ascend/nnal/atb/set_env.sh && \
50
50
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
51
+ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
51
52
  export SOC_VERSION=ASCEND310P3 && \
52
53
  python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
53
54
  python3 -m pip cache purge
54
55
 
55
56
  # Install modelscope (for fast download), ray (for multinode) and torch-npu post version
56
- RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
57
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
57
58
  python3 -m pip cache purge
58
59
 
59
60
  RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
@@ -72,7 +72,7 @@ RUN apt-get update -y && \
72
72
  rm -rf /var/lib/apt/lists/*
73
73
 
74
74
  # Install modelscope (for fast download), ray (for multinode) and torch-npu post version
75
- RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
75
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
76
76
  python3 -m pip cache purge
77
77
 
78
78
  RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
34
34
  SHELL ["/bin/bash", "-c"]
35
35
 
36
36
  RUN yum update -y && \
37
- yum install -y patch git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
37
+ yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
38
38
  git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
39
39
  cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
40
40
  ARCH=$(uname -m) && \
@@ -62,6 +62,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
62
62
  source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
63
63
  source /usr/local/Ascend/nnal/atb/set_env.sh && \
64
64
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
65
+ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
65
66
  python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
66
67
  python3 -m pip cache purge
67
68
 
@@ -71,7 +72,7 @@ RUN yum update -y && \
71
72
  rm -rf /var/cache/yum/*
72
73
 
73
74
  # Install modelscope (for fast download), ray (for multinode) and torch-npu post version
74
- RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
75
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
75
76
  python3 -m pip cache purge
76
77
 
77
78
  RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
34
34
  SHELL ["/bin/bash", "-c"]
35
35
 
36
36
  RUN yum update -y && \
37
- yum install -y patch git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
37
+ yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
38
38
  git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
39
39
  cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
40
40
  ARCH=$(uname -m) && \
@@ -62,6 +62,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
62
62
  source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
63
63
  source /usr/local/Ascend/nnal/atb/set_env.sh && \
64
64
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
65
+ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
65
66
  python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
66
67
  python3 -m pip cache purge
67
68
 
@@ -71,7 +72,7 @@ RUN yum update -y && \
71
72
  rm -rf /var/cache/yum/*
72
73
 
73
74
  # Install modelscope (for fast download), ray (for multinode) and torch-npu post version
74
- RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
75
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
75
76
  python3 -m pip cache purge
76
77
 
77
78
  RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vllm_ascend
3
- Version: 0.13.0
3
+ Version: 0.13.0rc2
4
4
  Summary: vLLM Ascend backend plugin
5
5
  Home-page: https://github.com/vllm-project/vllm-ascend
6
6
  Author: vLLM-Ascend team
@@ -38,9 +38,9 @@ Requires-Dist: compressed_tensors>=0.11.0
38
38
  Requires-Dist: msgpack
39
39
  Requires-Dist: quart
40
40
  Requires-Dist: numba
41
- Requires-Dist: torch-npu==2.8.0.post2
41
+ Requires-Dist: torch-npu==2.8.0
42
42
  Requires-Dist: arctic-inference==0.1.1
43
- Requires-Dist: transformers<5,>=4.57.4
43
+ Requires-Dist: transformers>=4.57.3
44
44
  Requires-Dist: fastapi<0.124.0
45
45
  Requires-Dist: triton-ascend==3.2.0
46
46
  Dynamic: author