vllm-ascend 0.11.0rc1__tar.gz → 0.11.0rc3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (631) hide show
  1. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/_accuracy_test.yaml +1 -1
  2. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/_e2e_test.yaml +2 -2
  3. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/accuracy_test.yaml +1 -1
  4. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/multi_node_test.yaml +1 -1
  5. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/nightly_benchmarks.yaml +1 -1
  6. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/release_whl.yml +7 -1
  7. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_dist.yaml +2 -2
  8. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test.yaml +1 -1
  9. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_310p.yaml +1 -1
  10. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_full.yaml +1 -1
  11. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +1 -1
  12. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_models.yaml +1 -1
  13. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_pd.yaml +2 -2
  14. vllm_ascend-0.11.0rc1/Dockerfile.a3 → vllm_ascend-0.11.0rc3/Dockerfile +18 -9
  15. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/Dockerfile.310p +2 -2
  16. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/Dockerfile.310p.openEuler +2 -2
  17. vllm_ascend-0.11.0rc1/Dockerfile → vllm_ascend-0.11.0rc3/Dockerfile.a3 +18 -10
  18. vllm_ascend-0.11.0rc1/Dockerfile.openEuler → vllm_ascend-0.11.0rc3/Dockerfile.a3.openEuler +21 -8
  19. vllm_ascend-0.11.0rc1/Dockerfile.a3.openEuler → vllm_ascend-0.11.0rc3/Dockerfile.openEuler +23 -9
  20. {vllm_ascend-0.11.0rc1/vllm_ascend.egg-info → vllm_ascend-0.11.0rc3}/PKG-INFO +2 -1
  21. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/conf.py +1 -1
  22. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/installation.md +10 -10
  23. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/index.md +1 -0
  24. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu_qwen3_next.md +2 -2
  25. vllm_ascend-0.11.0rc3/docs/source/tutorials/single_node_pd_disaggregation_llmdatadist.md +181 -0
  26. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +7 -0
  27. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +10 -0
  28. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +9 -0
  29. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/pyproject.toml +1 -0
  30. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/requirements-dev.txt +1 -1
  31. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/requirements.txt +2 -0
  32. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/scripts/lws.yaml +2 -2
  33. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +3 -4
  34. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/vllm_interface/vllm_test.cfg +1 -1
  35. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_rotary_embedding.py +9 -2
  36. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_w8a8.py +0 -1
  37. vllm_ascend-0.11.0rc3/tools/mooncake_installer.sh +331 -0
  38. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/_version.py +3 -3
  39. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/attention_v1.py +9 -1
  40. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/mla_v1.py +3 -1
  41. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/compilation/acl_graph.py +20 -21
  42. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/transfer_engine.py +10 -0
  43. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/envs.py +0 -5
  44. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/adaptor/vllm_adaptor.py +3 -3
  45. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/qwen2_5_vl.py +68 -2
  46. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/qwen2_5_vl_without_padding.py +66 -0
  47. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/qwen2_vl.py +2 -2
  48. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/common_fused_moe.py +12 -17
  49. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/linear.py +1 -2
  50. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/token_dispatcher.py +4 -0
  51. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/rotary_embedding.py +5 -1
  52. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/__init__.py +1 -1
  53. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_mamba_config.py +1 -1
  54. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/__init__.py +6 -1
  55. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/platform.py +14 -1
  56. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/quant_config.py +5 -0
  57. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/spec_decode/eagle_proposer.py +3 -1
  58. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/spec_decode/interface.py +2 -1
  59. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/spec_decode/mtp_proposer.py +5 -2
  60. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/spec_decode/ngram_proposer.py +19 -15
  61. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/torchair_fused_moe.py +1 -1
  62. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/torchair_sfa.py +2 -2
  63. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/utils.py +1 -1
  64. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/utils.py +61 -1
  65. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/worker/model_runner_v1.py +68 -31
  66. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/worker/worker_v1.py +1 -1
  67. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3/vllm_ascend.egg-info}/PKG-INFO +2 -1
  68. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend.egg-info/SOURCES.txt +2 -2
  69. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend.egg-info/requires.txt +1 -0
  70. vllm_ascend-0.11.0rc1/.github/workflows/_e2e_nightly.yaml +0 -115
  71. vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_nightly.yaml +0 -105
  72. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.gemini/config.yaml +0 -0
  73. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/Dockerfile.buildwheel +0 -0
  74. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  75. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  76. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  77. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  78. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  79. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  80. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  81. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  82. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  83. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  84. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +0 -0
  85. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  86. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  87. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/actionlint.yaml +0 -0
  88. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/dependabot.yml +0 -0
  89. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/format_pr_body.sh +0 -0
  90. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/labeler.yml +0 -0
  91. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/format_pr_body.yaml +0 -0
  92. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/image_310p_openeuler.yml +0 -0
  93. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/image_310p_ubuntu.yml +0 -0
  94. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/image_a3_openeuler.yml +0 -0
  95. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/image_a3_ubuntu.yml +0 -0
  96. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/image_openeuler.yml +0 -0
  97. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/image_ubuntu.yml +0 -0
  98. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/label_merge_conflict.yml +0 -0
  99. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/labeler.yml +0 -0
  100. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/matchers/actionlint.json +0 -0
  101. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/matchers/mypy.json +0 -0
  102. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/matchers/ruff.json +0 -0
  103. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/pre-commit.yml +0 -0
  104. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/release_code.yml +0 -0
  105. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/reminder_comment.yml +0 -0
  106. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_doctest.yaml +0 -0
  107. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.gitignore +0 -0
  108. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.pre-commit-config.yaml +0 -0
  109. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.readthedocs.yaml +0 -0
  110. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/CMakeLists.txt +0 -0
  111. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/CODE_OF_CONDUCT.md +0 -0
  112. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/CONTRIBUTING.md +0 -0
  113. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/DCO +0 -0
  114. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/LICENSE +0 -0
  115. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/README.md +0 -0
  116. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/README.zh.md +0 -0
  117. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/benchmarks/README.md +0 -0
  118. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
  119. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/benchmarks/requirements-bench.txt +0 -0
  120. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  121. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/benchmarks/scripts/perf_result_template.md +0 -0
  122. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
  123. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/benchmarks/tests/latency-tests.json +0 -0
  124. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/benchmarks/tests/serving-tests.json +0 -0
  125. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/benchmarks/tests/throughput-tests.json +0 -0
  126. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/cmake/utils.cmake +0 -0
  127. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/codecov.yml +0 -0
  128. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/collect_env.py +0 -0
  129. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/camem_allocator.cpp +0 -0
  130. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/kernels/bgmv_expand.cpp +0 -0
  131. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/kernels/bgmv_shrink.cpp +0 -0
  132. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  133. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
  134. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/kernels/sgmv_expand.cpp +0 -0
  135. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/kernels/sgmv_shrink.cpp +0 -0
  136. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/kernels/types.h +0 -0
  137. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/kernels/utils.h +0 -0
  138. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_host/mla_preprocess.h +0 -0
  139. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +0 -0
  140. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/common.h +0 -0
  141. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/common_func.h +0 -0
  142. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/hardware.h +0 -0
  143. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterator.h +0 -0
  144. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +0 -0
  145. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +0 -0
  146. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +0 -0
  147. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +0 -0
  148. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +0 -0
  149. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +0 -0
  150. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +0 -0
  151. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +0 -0
  152. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +0 -0
  153. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +0 -0
  154. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/layout.h +0 -0
  155. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/mem.h +0 -0
  156. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/mma.h +0 -0
  157. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +0 -0
  158. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/simd.h +0 -0
  159. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/utils.h +0 -0
  160. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/mla_preprocess.h +0 -0
  161. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +0 -0
  162. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +0 -0
  163. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +0 -0
  164. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/ops.h +0 -0
  165. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/torch_binding.cpp +0 -0
  166. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/torch_binding_meta.cpp +0 -0
  167. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/csrc/utils.h +0 -0
  168. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/Makefile +0 -0
  169. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/README.md +0 -0
  170. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/requirements-docs.txt +0 -0
  171. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/requirements-test.txt +0 -0
  172. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/_templates/sections/header.html +0 -0
  173. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  174. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/assets/multi_node_dp_kimi.png +0 -0
  175. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/community/contributors.md +0 -0
  176. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/community/governance.md +0 -0
  177. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/community/user_stories/index.md +0 -0
  178. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/community/user_stories/llamafactory.md +0 -0
  179. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/community/versioning_policy.md +0 -0
  180. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/contribution/index.md +0 -0
  181. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/contribution/testing.md +0 -0
  182. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +0 -0
  183. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +0 -0
  184. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +0 -0
  185. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +0 -0
  186. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -0
  187. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/index.md +0 -0
  188. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  189. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
  190. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  191. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/feature_guide/ACL_Graph.md +0 -0
  192. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
  193. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/feature_guide/index.md +0 -0
  194. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/feature_guide/patch.md +0 -0
  195. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -0
  196. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
  197. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/modeling/index.md +0 -0
  198. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/performance/index.md +0 -0
  199. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -0
  200. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/performance/performance_benchmark.md +0 -0
  201. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/performance/profile_execute_duration.md +0 -0
  202. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/faqs.md +0 -0
  203. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/index.md +0 -0
  204. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
  205. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
  206. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
  207. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
  208. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
  209. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
  210. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
  211. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
  212. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
  213. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
  214. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
  215. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
  216. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
  217. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
  218. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
  219. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
  220. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
  221. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +0 -0
  222. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +0 -0
  223. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +0 -0
  224. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
  225. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
  226. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
  227. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
  228. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
  229. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
  230. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
  231. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
  232. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
  233. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
  234. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
  235. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
  236. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
  237. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
  238. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
  239. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -0
  240. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
  241. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
  242. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
  243. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
  244. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
  245. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
  246. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
  247. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
  248. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
  249. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
  250. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
  251. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
  252. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  253. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  254. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/quick_start.md +0 -0
  255. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi-node_dsv3.2.md +0 -0
  256. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node.md +0 -0
  257. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node_kimi.md +0 -0
  258. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node_pd_disaggregation_llmdatadist.md +0 -0
  259. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md +0 -0
  260. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node_qwen3vl.md +0 -0
  261. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node_ray.md +0 -0
  262. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu.md +0 -0
  263. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu_moge.md +0 -0
  264. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu_quantization.md +0 -0
  265. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
  266. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_node_300i.md +0 -0
  267. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_npu.md +0 -0
  268. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_npu_audio.md +0 -0
  269. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_npu_multimodal.md +0 -0
  270. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
  271. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_npu_qwen3_quantization.md +0 -0
  272. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/configuration/additional_config.md +0 -0
  273. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/configuration/env_vars.md +0 -0
  274. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/configuration/index.md +0 -0
  275. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
  276. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
  277. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  278. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/index.md +0 -0
  279. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/lora.md +0 -0
  280. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/quantization.md +0 -0
  281. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
  282. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  283. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/release_notes.md +0 -0
  284. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/support_matrix/index.md +0 -0
  285. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
  286. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
  287. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/README.md +0 -0
  288. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/gen_ranktable.py +0 -0
  289. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/gen_ranktable.sh +0 -0
  290. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
  291. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md +0 -0
  292. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/run_server.sh +0 -0
  293. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/eplb/eplb_deepseek.py +0 -0
  294. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/eplb/eplb_strategy.py +0 -0
  295. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/external_online_dp/README.md +0 -0
  296. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/external_online_dp/launch_online_dp.py +0 -0
  297. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/external_online_dp/run_dp_template.sh +0 -0
  298. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/offline_data_parallel.py +0 -0
  299. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/offline_disaggregated_prefill_npu.py +0 -0
  300. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/offline_dualbatch_overlap_npu.py +0 -0
  301. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/offline_embed.py +0 -0
  302. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/offline_external_launcher.py +0 -0
  303. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/offline_inference_audio_language.py +0 -0
  304. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/offline_inference_npu.py +0 -0
  305. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/offline_inference_npu_tp2.py +0 -0
  306. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/offline_inference_sleep_mode_npu.py +0 -0
  307. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/offline_weight_load.py +0 -0
  308. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/prompt_embedding_inference.py +0 -0
  309. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/examples/run_dp_server.sh +0 -0
  310. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/format.sh +0 -0
  311. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/mypy.ini +0 -0
  312. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/packages.txt +0 -0
  313. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/requirements-lint.txt +0 -0
  314. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/setup.cfg +0 -0
  315. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/setup.py +0 -0
  316. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/__init__.py +0 -0
  317. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
  318. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/310p/test_offline_inference_parallel_310p.py +0 -0
  319. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/__init__.py +0 -0
  320. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/common.sh +0 -0
  321. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/conftest.py +0 -0
  322. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
  323. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
  324. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/model_utils.py +0 -0
  325. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +0 -0
  326. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +0 -0
  327. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml +0 -0
  328. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +0 -0
  329. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +0 -0
  330. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen3-8B-Base.yaml +0 -0
  331. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen3-8B.yaml +0 -0
  332. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +0 -0
  333. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/accuracy.txt +0 -0
  334. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/conftest.py +0 -0
  335. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/report_template.md +0 -0
  336. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/models/test_lm_eval_correctness.py +0 -0
  337. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_data_parallel.py +0 -0
  338. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_expert_parallel.py +0 -0
  339. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_external_launcher.py +0 -0
  340. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_full_graph_mode.py +0 -0
  341. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -0
  342. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_ilama_lora_tp2.py +0 -0
  343. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_offline_inference_distributed.py +0 -0
  344. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_pipeline_parallel.py +0 -0
  345. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_prefix_caching.py +0 -0
  346. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_qwen3_moe.py +0 -0
  347. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_single_request_aclgraph.py +0 -0
  348. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_torchair_graph_mode.py +0 -0
  349. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_weight_loader.py +0 -0
  350. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py +0 -0
  351. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +0 -0
  352. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py +0 -0
  353. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/models/test_qwen3_32b.py +0 -0
  354. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/models/test_qwen3_32b_int8.py +0 -0
  355. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/__init__.py +0 -0
  356. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/config/__init__.py +0 -0
  357. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml +0 -0
  358. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml +0 -0
  359. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/config/multi_node_config.py +0 -0
  360. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/config/utils.py +0 -0
  361. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/scripts/run.sh +0 -0
  362. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/test_multi_node.py +0 -0
  363. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/pd_disaggreate/run_edge_case_test.sh +0 -0
  364. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
  365. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
  366. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  367. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/prompts/example.txt +0 -0
  368. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/run_disagg_pd.sh +0 -0
  369. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/run_doctests.sh +0 -0
  370. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/__init__.py +0 -0
  371. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/__init__.py +0 -0
  372. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_bgmv_expand.py +0 -0
  373. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_bgmv_shrink.py +0 -0
  374. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_fused_moe.py +0 -0
  375. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
  376. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_mla_preprocess.py +0 -0
  377. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_rotary_embedding.py +0 -0
  378. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +0 -0
  379. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +0 -0
  380. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +0 -0
  381. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_aclgraph.py +0 -0
  382. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_aclgraph_mem.py +0 -0
  383. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_ascend_scheduler.py +0 -0
  384. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_bge_model.py +0 -0
  385. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_camem.py +0 -0
  386. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_chunked.py +0 -0
  387. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_embedding.py +0 -0
  388. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_embedding_aclgraph.py +0 -0
  389. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_guided_decoding.py +0 -0
  390. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_ilama_lora.py +0 -0
  391. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +0 -0
  392. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
  393. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_quantization.py +0 -0
  394. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_sampler.py +0 -0
  395. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_vlm.py +0 -0
  396. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/utils.py +0 -0
  397. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/e2e/vllm_interface/singlecard/test_sampler.py +0 -0
  398. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/__init__.py +0 -0
  399. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/attention/test_attention_mask.py +0 -0
  400. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/attention/test_attention_v1.py +0 -0
  401. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/attention/test_mla_v1.py +0 -0
  402. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/base.py +0 -0
  403. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/compilation/test_acl_graph.py +0 -0
  404. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/conftest.py +0 -0
  405. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/core/test_schedule_config.py +0 -0
  406. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/core/test_scheduler.py +0 -0
  407. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/device_allocator/test_camem.py +0 -0
  408. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
  409. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
  410. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/distributed/test_communicator.py +0 -0
  411. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
  412. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/distributed/test_parallel_state.py +0 -0
  413. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/adaptor/test_abstract_adaptor.py +0 -0
  414. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/policy/test_policy_abstract.py +0 -0
  415. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +0 -0
  416. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +0 -0
  417. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/policy/test_policy_factor.py +0 -0
  418. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +0 -0
  419. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/test_eplb_utils.py +0 -0
  420. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/fake_weight/config.json +0 -0
  421. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -0
  422. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/test_mooncake_connector.py +0 -0
  423. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +0 -0
  424. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
  425. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
  426. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/utils.py +0 -0
  427. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/models/__init__.py +0 -0
  428. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/models/conftest.py +0 -0
  429. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/models/test_qwen2_5_vl.py +0 -0
  430. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/models/test_qwen2_5_vl_without_padding.py +0 -0
  431. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/models/test_qwen2_vl.py +0 -0
  432. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/multistream/test_base.py +0 -0
  433. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/multistream/test_decorator.py +0 -0
  434. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/multistream/test_layers.py +0 -0
  435. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/multistream/test_metadata.py +0 -0
  436. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/multistream/test_ms_split.py +0 -0
  437. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/expert_map.json +0 -0
  438. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_activation.py +0 -0
  439. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_comm_utils.py +0 -0
  440. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_common_fused_moe.py +0 -0
  441. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_expert_load_balancer.py +0 -0
  442. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +0 -0
  443. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_fused_ops.py +0 -0
  444. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_layernorm.py +0 -0
  445. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_linear.py +0 -0
  446. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_moe_comm_method.py +0 -0
  447. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_token_dispatcher.py +0 -0
  448. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_vocab_parallel_embedding.py +0 -0
  449. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
  450. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
  451. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_quant_config.py +0 -0
  452. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_utils.py +0 -0
  453. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +0 -0
  454. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_w4a8_dynamic.py +0 -0
  455. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_w8a8_dynamic.py +0 -0
  456. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/sample/logits_processor/test_builtin.py +0 -0
  457. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/sample/test_rejection_sampler.py +0 -0
  458. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/sample/test_sampler.py +0 -0
  459. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/test_ascend_config.py +0 -0
  460. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/test_envs.py +0 -0
  461. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/test_platform.py +0 -0
  462. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/test_utils.py +0 -0
  463. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/__init__.py +0 -0
  464. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +0 -0
  465. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/models/test_torchair_deepseek_v2.py +0 -0
  466. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/ops/test_torchair_fused_moe.py +0 -0
  467. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +0 -0
  468. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +0 -0
  469. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +0 -0
  470. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/test_torchair_attention.py +0 -0
  471. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/test_torchair_mla.py +0 -0
  472. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/test_utils.py +0 -0
  473. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/worker/test_input_batch.py +0 -0
  474. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/worker/test_model_runner_v1.py +0 -0
  475. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tests/ut/worker/test_worker_v1.py +0 -0
  476. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tools/actionlint.sh +0 -0
  477. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tools/aisbench.py +0 -0
  478. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tools/check_python_src_init.py +0 -0
  479. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tools/check_repo.sh +0 -0
  480. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tools/enforce_regex_import.py +0 -0
  481. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tools/mypy.sh +0 -0
  482. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tools/png-lint.sh +0 -0
  483. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tools/send_mm_request.py +0 -0
  484. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tools/shellcheck.sh +0 -0
  485. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/tools/sphinx-lint.sh +0 -0
  486. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/typos.toml +0 -0
  487. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/__init__.py +0 -0
  488. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ascend_config.py +0 -0
  489. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ascend_forward_context.py +0 -0
  490. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/__init__.py +0 -0
  491. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/attention_mask.py +0 -0
  492. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/sfa_v1.py +0 -0
  493. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/utils.py +0 -0
  494. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/compilation/__init__.py +0 -0
  495. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/core/__init__.py +0 -0
  496. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/core/recompute_schedule_config.py +0 -0
  497. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/core/recompute_scheduler.py +0 -0
  498. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/core/schedule_config.py +0 -0
  499. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/core/scheduler.py +0 -0
  500. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/cpu_binding.py +0 -0
  501. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/device_allocator/__init__.py +0 -0
  502. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/device_allocator/camem.py +0 -0
  503. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/__init__.py +0 -0
  504. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/communicator.py +0 -0
  505. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/cpu_offload_connector.py +0 -0
  506. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/cpu_offload_manager/__init__.py +0 -0
  507. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +0 -0
  508. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/cpu_offload_manager/metadata.py +0 -0
  509. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/device_communicators/__init__.py +0 -0
  510. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  511. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  512. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +0 -0
  513. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/__init__.py +0 -0
  514. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/config_data.py +0 -0
  515. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/kv_transfer.py +0 -0
  516. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/mooncake_engine.py +0 -0
  517. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/mooncake_store.py +0 -0
  518. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +0 -0
  519. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake_connector.py +0 -0
  520. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake_layerwise_connector.py +0 -0
  521. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/parallel_state.py +0 -0
  522. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/utils.py +0 -0
  523. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/__init__.py +0 -0
  524. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/adaptor/__init__.py +0 -0
  525. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/adaptor/abstract_adaptor.py +0 -0
  526. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/__init__.py +0 -0
  527. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +0 -0
  528. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/eplb_utils.py +0 -0
  529. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/eplb_worker.py +0 -0
  530. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/__init__.py +0 -0
  531. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_abstract.py +0 -0
  532. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +0 -0
  533. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +0 -0
  534. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_factory.py +0 -0
  535. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_flashlb.py +0 -0
  536. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_random.py +0 -0
  537. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/eplb_updator.py +0 -0
  538. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/utils.py +0 -0
  539. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/lora/__init__.py +0 -0
  540. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/lora/lora_ops.py +0 -0
  541. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/lora/punica_npu.py +0 -0
  542. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/lora/utils.py +0 -0
  543. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/meta_registration.py +0 -0
  544. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/__init__.py +0 -0
  545. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/deepseek_v3_2.py +0 -0
  546. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/layers/__init__.py +0 -0
  547. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/layers/mla.py +0 -0
  548. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/layers/sfa.py +0 -0
  549. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/qwen2_5_omni_thinker.py +0 -0
  550. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/qwen3_next.py +0 -0
  551. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/__init__.py +0 -0
  552. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/base.py +0 -0
  553. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/context.py +0 -0
  554. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/decorator.py +0 -0
  555. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/layers.py +0 -0
  556. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/metadata.py +0 -0
  557. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/ms_split.py +0 -0
  558. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/__init__.py +0 -0
  559. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/activation.py +0 -0
  560. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/attention.py +0 -0
  561. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/casual_conv1d.py +0 -0
  562. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  563. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/fla.py +0 -0
  564. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/layernorm.py +0 -0
  565. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/linear_op.py +0 -0
  566. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/__init__.py +0 -0
  567. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/comm_utils.py +0 -0
  568. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/experts_selector.py +0 -0
  569. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +0 -0
  570. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/moe_comm_method.py +0 -0
  571. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/moe_mlp.py +0 -0
  572. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/register_custom_ops.py +0 -0
  573. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/sigmoid_gating.py +0 -0
  574. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/vocab_parallel_embedding.py +0 -0
  575. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/weight_prefetch.py +0 -0
  576. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/__init__.py +0 -0
  577. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_config.py +0 -0
  578. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_core.py +0 -0
  579. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_distributed.py +0 -0
  580. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_message_queue.py +0 -0
  581. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_multiproc_executor.py +0 -0
  582. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_sched_yield.py +0 -0
  583. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_attention_layer.py +0 -0
  584. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_deepseek_mtp.py +0 -0
  585. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_distributed.py +0 -0
  586. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_logits.py +0 -0
  587. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_minicpm.py +0 -0
  588. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_multimodal_merge.py +0 -0
  589. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_roberta.py +0 -0
  590. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_triton.py +0 -0
  591. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_weight_loader.py +0 -0
  592. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/__init__.py +0 -0
  593. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/utils.py +0 -0
  594. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +0 -0
  595. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/w4a8_dynamic.py +0 -0
  596. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/w8a8.py +0 -0
  597. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/w8a8_dynamic.py +0 -0
  598. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/sample/__init__.py +0 -0
  599. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/sample/logits_processor/__init__.py +0 -0
  600. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/sample/logits_processor/builtin.py +0 -0
  601. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/sample/rejection_sampler.py +0 -0
  602. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/sample/sampler.py +0 -0
  603. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/spec_decode/__init__.py +0 -0
  604. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/__init__.py +0 -0
  605. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/__init__.py +0 -0
  606. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/qwen2.py +0 -0
  607. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/qwen3_moe.py +0 -0
  608. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +0 -0
  609. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/torchair_deepseek_v2.py +0 -0
  610. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/torchair_deepseek_v3.py +0 -0
  611. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/torchair_pangu_moe.py +0 -0
  612. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/__init__.py +0 -0
  613. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/sequence_parallel.py +0 -0
  614. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/shared_weight_layer.py +0 -0
  615. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/torchair_activation.py +0 -0
  616. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/torchair_layernorm.py +0 -0
  617. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +0 -0
  618. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/torchair_vocab_parallel_embedding.py +0 -0
  619. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/quantization/__init__.py +0 -0
  620. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +0 -0
  621. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +0 -0
  622. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/torchair_attention.py +0 -0
  623. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/torchair_mla.py +0 -0
  624. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/torchair_model_runner.py +0 -0
  625. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/torchair_worker.py +0 -0
  626. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/worker/__init__.py +0 -0
  627. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/worker/block_table.py +0 -0
  628. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend/worker/npu_input_batch.py +0 -0
  629. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  630. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend.egg-info/entry_points.txt +0 -0
  631. {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -30,7 +30,7 @@ jobs:
30
30
  runs-on: ${{ inputs.runner }}
31
31
  name: ${{ inputs.model_name }} accuracy
32
32
  container:
33
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
33
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
34
34
  env:
35
35
  VLLM_USE_MODELSCOPE: True
36
36
  # 1. If version specified (work_dispatch), do specified branch accuracy test
@@ -106,8 +106,8 @@ jobs:
106
106
  # ------------------------------------ v1 spec decode test ------------------------------------ #
107
107
  pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
108
108
  pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
109
- # Fix me: OOM error
110
- #pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
109
+ # Fix me: test_eagle_correctness OOM error
110
+ pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
111
111
 
112
112
  pytest -sv tests/e2e/singlecard/ops/
113
113
 
@@ -68,5 +68,5 @@ jobs:
68
68
  with:
69
69
  vllm: v0.11.0
70
70
  runner: linux-aarch64-${{ matrix.runner }}
71
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
71
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
72
72
  model_name: ${{ matrix.model_name }}
@@ -23,7 +23,7 @@ jobs:
23
23
  # This is a runner with no NPU for k8s controller
24
24
  runs-on: linux-aarch64-a3-0
25
25
  container:
26
- image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
26
+ image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
27
27
  env:
28
28
  KUBECONFIG: /tmp/kubeconfig
29
29
  KUBECTL: /root/.cache/.kube/kubectl
@@ -56,7 +56,7 @@ jobs:
56
56
  vllm_use_v1: 1
57
57
  max-parallel: 1
58
58
  container:
59
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
59
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
60
60
  volumes:
61
61
  - /usr/local/dcmi:/usr/local/dcmi
62
62
  - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
@@ -57,7 +57,13 @@ jobs:
57
57
  - name: Print
58
58
  run: |
59
59
  lscpu
60
-
60
+
61
+ - name: Free up disk space
62
+ uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
63
+ with:
64
+ tool-cache: true
65
+ docker-images: false
66
+
61
67
  - name: Build wheel
62
68
  run: |
63
69
  ls
@@ -47,7 +47,7 @@ jobs:
47
47
  name: vLLM Ascend test
48
48
  runs-on: ${{ matrix.os }}
49
49
  container:
50
- image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
50
+ image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
51
51
  env:
52
52
  DEBIAN_FRONTEND: noninteractive
53
53
  steps:
@@ -97,4 +97,4 @@ jobs:
97
97
  VLLM_USE_MODELSCOPE: True
98
98
  run: |
99
99
  # TODO: enable more tests
100
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
100
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
@@ -145,5 +145,5 @@ jobs:
145
145
  with:
146
146
  vllm: ${{ matrix.vllm_version }}
147
147
  runner: linux-aarch64-a2
148
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
148
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
149
149
  type: light
@@ -58,7 +58,7 @@ jobs:
58
58
  runs-on: ${{ matrix.os }}
59
59
  container:
60
60
  # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
61
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-310p-ubuntu22.04-py3.11
61
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
62
62
  env:
63
63
  VLLM_LOGGING_LEVEL: ERROR
64
64
  VLLM_USE_MODELSCOPE: True
@@ -76,5 +76,5 @@ jobs:
76
76
  with:
77
77
  vllm: ${{ matrix.vllm_version }}
78
78
  runner: linux-aarch64-a2
79
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
79
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
80
80
  type: full
@@ -41,5 +41,5 @@ jobs:
41
41
  with:
42
42
  vllm: main
43
43
  runner: linux-aarch64-a2
44
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
44
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
45
45
  type: full
@@ -79,7 +79,7 @@ jobs:
79
79
  with:
80
80
  vllm: v0.11.0
81
81
  runner: linux-aarch64-${{ matrix.runner }}
82
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
82
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
83
83
  model_name: ${{ matrix.model_name }}
84
84
  upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
85
85
 
@@ -49,7 +49,7 @@ jobs:
49
49
  runs-on: linux-arm64-npu-static-8
50
50
 
51
51
  container:
52
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
52
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
53
53
  volumes:
54
54
  - /usr/local/dcmi:/usr/local/dcmi
55
55
  - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
@@ -109,4 +109,4 @@ jobs:
109
109
  - name: Run vllm-project/vllm-ascend PD Disaggregation edge test
110
110
  run: |
111
111
  git config --global --add safe.directory/__w/vllm-ascend/vllm-ascend
112
- bash tests/e2e/pd_disaggreate/run_edge_case_test.sh
112
+ bash tests/e2e/pd_disaggreate/run_edge_case_test.sh
@@ -15,24 +15,33 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
 
18
- FROM quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
18
+ FROM quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG COMPILE_CUSTOM_KERNELS=1
22
+ ARG MOONCAKE_TAG="v0.3.7.post2"
22
23
 
23
24
  # Define environments
24
25
  ENV DEBIAN_FRONTEND=noninteractive
25
26
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
26
27
 
27
- RUN apt-get update -y && \
28
- apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
29
- rm -rf /var/cache/apt/* && \
30
- rm -rf /var/lib/apt/lists/*
31
-
32
28
  WORKDIR /workspace
33
29
 
34
30
  COPY . /vllm-workspace/vllm-ascend/
35
31
 
32
+ # Install Mooncake dependencies
33
+ RUN apt-get update -y && \
34
+ apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev && \
35
+ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
36
+ cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
37
+ cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \
38
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/lib64 && \
39
+ mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
40
+ make -j$(nproc) && make install && \
41
+ rm -fr /vllm-workspace/Mooncake/build && \
42
+ rm -rf /var/cache/apt/* && \
43
+ rm -rf /var/lib/apt/lists/*
44
+
36
45
  RUN pip config set global.index-url ${PIP_INDEX_URL}
37
46
 
38
47
  # Install vLLM
@@ -40,7 +49,7 @@ ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
40
49
  ARG VLLM_TAG=v0.11.0
41
50
  RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
42
51
  # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
43
- RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
52
+ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
44
53
  python3 -m pip uninstall -y triton && \
45
54
  python3 -m pip cache purge
46
55
 
@@ -54,7 +63,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
54
63
  python3 -m pip cache purge
55
64
 
56
65
  # Install modelscope (for fast download) and ray (for multinode)
57
- RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
66
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
58
67
  python3 -m pip cache purge
59
68
 
60
- CMD ["/bin/bash"]
69
+ CMD ["/bin/bash"]
@@ -15,7 +15,7 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
 
18
- FROM quay.io/ascend/cann:8.3.rc1-310p-ubuntu22.04-py3.11
18
+ FROM quay.io/ascend/cann:8.3.rc2-310p-ubuntu22.04-py3.11
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG COMPILE_CUSTOM_KERNELS=1
@@ -40,7 +40,7 @@ ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
40
40
  ARG VLLM_TAG=v0.11.0
41
41
  RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
42
42
  # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
43
- RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
43
+ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
44
44
  python3 -m pip uninstall -y triton && \
45
45
  python3 -m pip cache purge
46
46
 
@@ -15,7 +15,7 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
 
18
- FROM quay.io/ascend/cann:8.3.rc1-310p-openeuler24.03-py3.11
18
+ FROM quay.io/ascend/cann:8.3.rc2-310p-openeuler24.03-py3.11
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG COMPILE_CUSTOM_KERNELS=1
@@ -38,7 +38,7 @@ ARG VLLM_TAG=v0.11.0
38
38
 
39
39
  RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
40
40
  # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
41
- RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
41
+ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
42
42
  python3 -m pip uninstall -y triton && \
43
43
  python3 -m pip cache purge
44
44
 
@@ -15,32 +15,40 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
 
18
- FROM quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11
18
+ FROM quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG COMPILE_CUSTOM_KERNELS=1
22
+ ARG MOONCAKE_TAG=v0.3.7.post2
22
23
 
24
+ COPY . /vllm-workspace/vllm-ascend/
23
25
  # Define environments
24
26
  ENV DEBIAN_FRONTEND=noninteractive
25
27
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
26
28
 
27
- RUN apt-get update -y && \
28
- apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
29
- rm -rf /var/cache/apt/* && \
30
- rm -rf /var/lib/apt/lists/*
29
+ RUN pip config set global.index-url ${PIP_INDEX_URL}
31
30
 
32
31
  WORKDIR /workspace
33
32
 
34
- COPY . /vllm-workspace/vllm-ascend/
35
-
36
- RUN pip config set global.index-url ${PIP_INDEX_URL}
33
+ # Install Mooncake dependencies
34
+ RUN apt-get update -y && \
35
+ apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev && \
36
+ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
37
+ cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
38
+ cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \
39
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/lib64 && \
40
+ mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
41
+ make -j$(nproc) && make install && \
42
+ rm -fr /vllm-workspace/Mooncake/build && \
43
+ rm -rf /var/cache/apt/* && \
44
+ rm -rf /var/lib/apt/lists/*
37
45
 
38
46
  # Install vLLM
39
47
  ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
40
48
  ARG VLLM_TAG=v0.11.0
41
49
  RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
42
50
  # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
43
- RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
51
+ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
44
52
  python3 -m pip uninstall -y triton && \
45
53
  python3 -m pip cache purge
46
54
 
@@ -54,7 +62,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
54
62
  python3 -m pip cache purge
55
63
 
56
64
  # Install modelscope (for fast download) and ray (for multinode)
57
- RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
65
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
58
66
  python3 -m pip cache purge
59
67
 
60
68
  CMD ["/bin/bash"]
@@ -15,30 +15,43 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
 
18
- FROM quay.io/ascend/cann:8.3.rc1-910b-openeuler24.03-py3.11
18
+ FROM quay.io/ascend/cann:8.3.rc2-a3-openeuler24.03-py3.11
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG COMPILE_CUSTOM_KERNELS=1
22
+ ARG MOONCAKE_TAG="v0.3.7.post2"
22
23
 
23
24
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
24
25
 
25
- RUN yum update -y && \
26
- yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
27
- rm -rf /var/cache/yum
28
-
29
26
  RUN pip config set global.index-url ${PIP_INDEX_URL}
30
27
 
31
28
  WORKDIR /workspace
32
29
 
33
30
  COPY . /vllm-workspace/vllm-ascend/
34
31
 
32
+ SHELL ["/bin/bash", "-c"]
33
+
34
+ RUN yum update -y && \
35
+ yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
36
+ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
37
+ cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
38
+ ARCH=$(uname -m) && \
39
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
40
+ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \
41
+ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/${ARCH}-openEuler-linux && \
42
+ cd /vllm-workspace/Mooncake && \
43
+ bash mooncake_installer.sh -y && \
44
+ mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
45
+ make -j$(nproc) && make install && \
46
+ rm -fr /vllm-workspace/Mooncake/build && \
47
+ rm -rf /var/cache/yum/*
48
+
35
49
  # Install vLLM
36
50
  ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
37
51
  ARG VLLM_TAG=v0.11.0
38
-
39
52
  RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
40
53
  # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
41
- RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
54
+ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
42
55
  python3 -m pip uninstall -y triton && \
43
56
  python3 -m pip cache purge
44
57
 
@@ -52,7 +65,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
52
65
  python3 -m pip cache purge
53
66
 
54
67
  # Install modelscope (for fast download) and ray (for multinode)
55
- RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
68
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
56
69
  python3 -m pip cache purge
57
70
 
58
71
  CMD ["/bin/bash"]
@@ -15,16 +15,14 @@
15
15
  # This file is a part of the vllm-ascend project.
16
16
  #
17
17
 
18
- FROM quay.io/ascend/cann:8.3.rc1-a3-openeuler24.03-py3.11
18
+ FROM quay.io/ascend/cann:8.3.rc2-910b-openeuler24.03-py3.11
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG COMPILE_CUSTOM_KERNELS=1
22
+ ARG MOONCAKE_TAG="v0.3.7.post2"
22
23
 
23
24
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
24
25
 
25
- RUN yum update -y && \
26
- yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
27
- rm -rf /var/cache/yum
28
26
 
29
27
  RUN pip config set global.index-url ${PIP_INDEX_URL}
30
28
 
@@ -32,13 +30,29 @@ WORKDIR /workspace
32
30
 
33
31
  COPY . /vllm-workspace/vllm-ascend/
34
32
 
33
+ SHELL ["/bin/bash", "-c"]
34
+
35
+ RUN yum update -y && \
36
+ yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
37
+ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
38
+ cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
39
+ ARCH=$(uname -m) && \
40
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
41
+ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \
42
+ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/${ARCH}-openEuler-linux && \
43
+ cd /vllm-workspace/Mooncake && \
44
+ bash mooncake_installer.sh -y && \
45
+ mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
46
+ make -j$(nproc) && make install && \
47
+ rm -fr /vllm-workspace/Mooncake/build && \
48
+ rm -rf /var/cache/yum/*
49
+
35
50
  # Install vLLM
36
51
  ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
37
- ARG VLLM_TAG=v0.11.0
38
-
52
+ ARG VLLM_TAG=v0.11.2
39
53
  RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
40
54
  # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
41
- RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
55
+ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
42
56
  python3 -m pip uninstall -y triton && \
43
57
  python3 -m pip cache purge
44
58
 
@@ -52,7 +66,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
52
66
  python3 -m pip cache purge
53
67
 
54
68
  # Install modelscope (for fast download) and ray (for multinode)
55
- RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
69
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
56
70
  python3 -m pip cache purge
57
71
 
58
- CMD ["/bin/bash"]
72
+ CMD ["/bin/bash"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vllm_ascend
3
- Version: 0.11.0rc1
3
+ Version: 0.11.0rc3
4
4
  Summary: vLLM Ascend backend plugin
5
5
  Home-page: https://github.com/vllm-project/vllm-ascend
6
6
  Author: vLLM-Ascend team
@@ -37,6 +37,7 @@ Requires-Dist: msgpack
37
37
  Requires-Dist: quart
38
38
  Requires-Dist: numba
39
39
  Requires-Dist: torch-npu==2.7.1
40
+ Requires-Dist: transformers<=4.57.1
40
41
  Dynamic: author
41
42
  Dynamic: classifier
42
43
  Dynamic: description
@@ -75,7 +75,7 @@ myst_substitutions = {
75
75
  'pip_vllm_ascend_version': "0.11.0rc0",
76
76
  'pip_vllm_version': "0.11.0",
77
77
  # CANN image tag
78
- 'cann_image_tag': "8.3.rc1-910b-ubuntu22.04-py3.11",
78
+ 'cann_image_tag': "8.3.rc2-910b-ubuntu22.04-py3.11",
79
79
  # vllm version in ci
80
80
  'ci_vllm_version': 'v0.11.0rc3',
81
81
  }
@@ -79,19 +79,19 @@ source vllm-ascend-env/bin/activate
79
79
  pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple attrs 'numpy<2.0.0' decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions
80
80
 
81
81
  # Download and install the CANN package.
82
- wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run
83
- chmod +x ./Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run
84
- ./Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run --full
85
- # https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.3.rc1_linux-aarch64.run
82
+ wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run
83
+ chmod +x ./Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run
84
+ ./Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run --full
85
+ # https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.3.rc2_linux-aarch64.run
86
86
 
87
87
  source /usr/local/Ascend/ascend-toolkit/set_env.sh
88
- wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run
89
- chmod +x ./Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run
90
- ./Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run --install
88
+ wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run
89
+ chmod +x ./Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run
90
+ ./Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run --install
91
91
 
92
- wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run
93
- chmod +x ./Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run
94
- ./Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run --install
92
+ wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run
93
+ chmod +x ./Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run
94
+ ./Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run --install
95
95
 
96
96
  source /usr/local/Ascend/nnal/atb/set_env.sh
97
97
  ```
@@ -8,6 +8,7 @@ single_npu_multimodal
8
8
  single_npu_audio
9
9
  single_npu_qwen3_embedding
10
10
  single_npu_qwen3_quantization
11
+ single_node_pd_disaggregation_llmdatadist
11
12
  multi_npu_qwen3_next
12
13
  multi_npu
13
14
  multi_npu_moge
@@ -51,7 +51,7 @@ Install the Ascend BiSheng toolkit:
51
51
  wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run
52
52
  chmod a+x Ascend-BiSheng-toolkit_aarch64.run
53
53
  ./Ascend-BiSheng-toolkit_aarch64.run --install
54
- source /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
54
+ source /usr/local/Ascend/8.3.RC2/bisheng_toolkit/set_env.sh
55
55
  ```
56
56
 
57
57
  Install Triton Ascend:
@@ -75,7 +75,7 @@ Coming soon ...
75
75
  Please make sure you have already executed the command:
76
76
 
77
77
  ```bash
78
- source /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
78
+ source /usr/local/Ascend/8.3.RC2/bisheng_toolkit/set_env.sh
79
79
  ```
80
80
 
81
81
  :::::{tab-set}