vllm-ascend 0.11.0rc2__tar.gz → 0.11.0rc3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (629) hide show
  1. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/Dockerfile +15 -6
  2. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/Dockerfile.a3 +16 -8
  3. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/Dockerfile.a3.openEuler +19 -6
  4. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/Dockerfile.openEuler +20 -6
  5. {vllm_ascend-0.11.0rc2/vllm_ascend.egg-info → vllm_ascend-0.11.0rc3}/PKG-INFO +2 -1
  6. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/index.md +1 -0
  7. vllm_ascend-0.11.0rc3/docs/source/tutorials/single_node_pd_disaggregation_llmdatadist.md +181 -0
  8. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/pyproject.toml +1 -0
  9. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/requirements.txt +2 -0
  10. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_w8a8.py +0 -1
  11. vllm_ascend-0.11.0rc3/tools/mooncake_installer.sh +331 -0
  12. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/_version.py +3 -3
  13. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/mla_v1.py +1 -1
  14. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/adaptor/vllm_adaptor.py +3 -3
  15. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/qwen2_5_vl.py +68 -2
  16. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/qwen2_5_vl_without_padding.py +66 -0
  17. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/qwen2_vl.py +2 -2
  18. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/common_fused_moe.py +4 -17
  19. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/linear.py +1 -2
  20. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/__init__.py +1 -1
  21. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/__init__.py +6 -1
  22. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/spec_decode/eagle_proposer.py +3 -1
  23. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/spec_decode/interface.py +2 -1
  24. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/spec_decode/mtp_proposer.py +5 -2
  25. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/spec_decode/ngram_proposer.py +2 -1
  26. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/torchair_fused_moe.py +1 -1
  27. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/torchair_sfa.py +2 -2
  28. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/utils.py +1 -1
  29. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/utils.py +5 -2
  30. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/worker/model_runner_v1.py +63 -25
  31. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/worker/worker_v1.py +1 -1
  32. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3/vllm_ascend.egg-info}/PKG-INFO +2 -1
  33. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend.egg-info/SOURCES.txt +2 -0
  34. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend.egg-info/requires.txt +1 -0
  35. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.gemini/config.yaml +0 -0
  36. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/Dockerfile.buildwheel +0 -0
  37. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  38. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  39. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  40. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  41. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  42. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  43. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  44. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  45. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  46. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  47. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +0 -0
  48. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  49. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  50. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/actionlint.yaml +0 -0
  51. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/dependabot.yml +0 -0
  52. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/format_pr_body.sh +0 -0
  53. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/labeler.yml +0 -0
  54. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/_accuracy_test.yaml +0 -0
  55. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/_e2e_test.yaml +0 -0
  56. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/accuracy_test.yaml +0 -0
  57. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/format_pr_body.yaml +0 -0
  58. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/image_310p_openeuler.yml +0 -0
  59. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/image_310p_ubuntu.yml +0 -0
  60. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/image_a3_openeuler.yml +0 -0
  61. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/image_a3_ubuntu.yml +0 -0
  62. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/image_openeuler.yml +0 -0
  63. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/image_ubuntu.yml +0 -0
  64. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/label_merge_conflict.yml +0 -0
  65. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/labeler.yml +0 -0
  66. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/matchers/actionlint.json +0 -0
  67. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/matchers/mypy.json +0 -0
  68. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/matchers/ruff.json +0 -0
  69. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/multi_node_test.yaml +0 -0
  70. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/nightly_benchmarks.yaml +0 -0
  71. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/pre-commit.yml +0 -0
  72. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/release_code.yml +0 -0
  73. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/release_whl.yml +0 -0
  74. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/reminder_comment.yml +0 -0
  75. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_dist.yaml +0 -0
  76. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_doctest.yaml +0 -0
  77. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test.yaml +0 -0
  78. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_310p.yaml +0 -0
  79. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_full.yaml +0 -0
  80. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +0 -0
  81. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_models.yaml +0 -0
  82. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_pd.yaml +0 -0
  83. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.gitignore +0 -0
  84. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.pre-commit-config.yaml +0 -0
  85. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/.readthedocs.yaml +0 -0
  86. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/CMakeLists.txt +0 -0
  87. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/CODE_OF_CONDUCT.md +0 -0
  88. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/CONTRIBUTING.md +0 -0
  89. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/DCO +0 -0
  90. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/Dockerfile.310p +0 -0
  91. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/Dockerfile.310p.openEuler +0 -0
  92. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/LICENSE +0 -0
  93. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/README.md +0 -0
  94. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/README.zh.md +0 -0
  95. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/benchmarks/README.md +0 -0
  96. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
  97. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/benchmarks/requirements-bench.txt +0 -0
  98. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  99. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/benchmarks/scripts/perf_result_template.md +0 -0
  100. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
  101. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/benchmarks/tests/latency-tests.json +0 -0
  102. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/benchmarks/tests/serving-tests.json +0 -0
  103. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/benchmarks/tests/throughput-tests.json +0 -0
  104. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/cmake/utils.cmake +0 -0
  105. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/codecov.yml +0 -0
  106. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/collect_env.py +0 -0
  107. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/camem_allocator.cpp +0 -0
  108. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/kernels/bgmv_expand.cpp +0 -0
  109. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/kernels/bgmv_shrink.cpp +0 -0
  110. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  111. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
  112. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/kernels/sgmv_expand.cpp +0 -0
  113. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/kernels/sgmv_shrink.cpp +0 -0
  114. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/kernels/types.h +0 -0
  115. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/kernels/utils.h +0 -0
  116. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_host/mla_preprocess.h +0 -0
  117. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +0 -0
  118. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/common.h +0 -0
  119. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/common_func.h +0 -0
  120. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/hardware.h +0 -0
  121. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterator.h +0 -0
  122. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +0 -0
  123. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +0 -0
  124. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +0 -0
  125. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +0 -0
  126. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +0 -0
  127. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +0 -0
  128. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +0 -0
  129. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +0 -0
  130. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +0 -0
  131. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +0 -0
  132. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/layout.h +0 -0
  133. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/mem.h +0 -0
  134. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/mma.h +0 -0
  135. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +0 -0
  136. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/simd.h +0 -0
  137. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/kernel/utils.h +0 -0
  138. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/mla_preprocess.h +0 -0
  139. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +0 -0
  140. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +0 -0
  141. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +0 -0
  142. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/ops.h +0 -0
  143. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/torch_binding.cpp +0 -0
  144. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/torch_binding_meta.cpp +0 -0
  145. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/csrc/utils.h +0 -0
  146. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/Makefile +0 -0
  147. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/README.md +0 -0
  148. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/requirements-docs.txt +0 -0
  149. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/requirements-test.txt +0 -0
  150. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/_templates/sections/header.html +0 -0
  151. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  152. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/assets/multi_node_dp_kimi.png +0 -0
  153. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/community/contributors.md +0 -0
  154. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/community/governance.md +0 -0
  155. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/community/user_stories/index.md +0 -0
  156. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/community/user_stories/llamafactory.md +0 -0
  157. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/community/versioning_policy.md +0 -0
  158. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/conf.py +0 -0
  159. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/contribution/index.md +0 -0
  160. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/contribution/testing.md +0 -0
  161. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +0 -0
  162. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +0 -0
  163. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +0 -0
  164. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +0 -0
  165. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -0
  166. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/index.md +0 -0
  167. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  168. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
  169. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  170. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/feature_guide/ACL_Graph.md +0 -0
  171. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
  172. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/feature_guide/index.md +0 -0
  173. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/feature_guide/patch.md +0 -0
  174. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -0
  175. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
  176. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/modeling/index.md +0 -0
  177. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/performance/index.md +0 -0
  178. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -0
  179. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/performance/performance_benchmark.md +0 -0
  180. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/developer_guide/performance/profile_execute_duration.md +0 -0
  181. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/faqs.md +0 -0
  182. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/index.md +0 -0
  183. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/installation.md +0 -0
  184. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
  185. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
  186. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
  187. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
  188. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
  189. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
  190. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
  191. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
  192. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
  193. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
  194. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
  195. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
  196. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
  197. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
  198. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
  199. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
  200. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
  201. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +0 -0
  202. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +0 -0
  203. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +0 -0
  204. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
  205. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
  206. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
  207. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
  208. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
  209. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
  210. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
  211. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
  212. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
  213. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
  214. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
  215. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
  216. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
  217. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
  218. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
  219. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -0
  220. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
  221. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
  222. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
  223. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
  224. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
  225. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
  226. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
  227. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
  228. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
  229. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
  230. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
  231. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
  232. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  233. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  234. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/quick_start.md +0 -0
  235. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi-node_dsv3.2.md +0 -0
  236. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node.md +0 -0
  237. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node_kimi.md +0 -0
  238. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node_pd_disaggregation_llmdatadist.md +0 -0
  239. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md +0 -0
  240. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node_qwen3vl.md +0 -0
  241. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_node_ray.md +0 -0
  242. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu.md +0 -0
  243. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu_moge.md +0 -0
  244. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu_quantization.md +0 -0
  245. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
  246. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu_qwen3_next.md +0 -0
  247. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_node_300i.md +0 -0
  248. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_npu.md +0 -0
  249. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_npu_audio.md +0 -0
  250. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_npu_multimodal.md +0 -0
  251. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
  252. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/single_npu_qwen3_quantization.md +0 -0
  253. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/configuration/additional_config.md +0 -0
  254. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/configuration/env_vars.md +0 -0
  255. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/configuration/index.md +0 -0
  256. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +0 -0
  257. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
  258. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
  259. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  260. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/index.md +0 -0
  261. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/lora.md +0 -0
  262. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/quantization.md +0 -0
  263. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
  264. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  265. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/release_notes.md +0 -0
  266. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/support_matrix/index.md +0 -0
  267. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
  268. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
  269. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/README.md +0 -0
  270. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/gen_ranktable.py +0 -0
  271. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/gen_ranktable.sh +0 -0
  272. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +0 -0
  273. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +0 -0
  274. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
  275. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md +0 -0
  276. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/disaggregated_prefill_v1/run_server.sh +0 -0
  277. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/eplb/eplb_deepseek.py +0 -0
  278. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/eplb/eplb_strategy.py +0 -0
  279. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/external_online_dp/README.md +0 -0
  280. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/external_online_dp/launch_online_dp.py +0 -0
  281. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/external_online_dp/run_dp_template.sh +0 -0
  282. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/offline_data_parallel.py +0 -0
  283. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/offline_disaggregated_prefill_npu.py +0 -0
  284. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/offline_dualbatch_overlap_npu.py +0 -0
  285. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/offline_embed.py +0 -0
  286. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/offline_external_launcher.py +0 -0
  287. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/offline_inference_audio_language.py +0 -0
  288. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/offline_inference_npu.py +0 -0
  289. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/offline_inference_npu_tp2.py +0 -0
  290. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/offline_inference_sleep_mode_npu.py +0 -0
  291. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/offline_weight_load.py +0 -0
  292. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/prompt_embedding_inference.py +0 -0
  293. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/examples/run_dp_server.sh +0 -0
  294. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/format.sh +0 -0
  295. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/mypy.ini +0 -0
  296. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/packages.txt +0 -0
  297. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/requirements-dev.txt +0 -0
  298. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/requirements-lint.txt +0 -0
  299. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/setup.cfg +0 -0
  300. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/setup.py +0 -0
  301. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/__init__.py +0 -0
  302. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
  303. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/310p/test_offline_inference_parallel_310p.py +0 -0
  304. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/__init__.py +0 -0
  305. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/common.sh +0 -0
  306. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/conftest.py +0 -0
  307. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
  308. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
  309. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/model_utils.py +0 -0
  310. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +0 -0
  311. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +0 -0
  312. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml +0 -0
  313. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +0 -0
  314. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +0 -0
  315. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen3-8B-Base.yaml +0 -0
  316. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen3-8B.yaml +0 -0
  317. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +0 -0
  318. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/configs/accuracy.txt +0 -0
  319. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/conftest.py +0 -0
  320. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/report_template.md +0 -0
  321. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/models/test_lm_eval_correctness.py +0 -0
  322. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_data_parallel.py +0 -0
  323. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_expert_parallel.py +0 -0
  324. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_external_launcher.py +0 -0
  325. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_full_graph_mode.py +0 -0
  326. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -0
  327. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_ilama_lora_tp2.py +0 -0
  328. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_offline_inference_distributed.py +0 -0
  329. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_pipeline_parallel.py +0 -0
  330. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_prefix_caching.py +0 -0
  331. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_qwen3_moe.py +0 -0
  332. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_single_request_aclgraph.py +0 -0
  333. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_torchair_graph_mode.py +0 -0
  334. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/multicard/test_weight_loader.py +0 -0
  335. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py +0 -0
  336. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +0 -0
  337. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py +0 -0
  338. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/models/test_qwen3_32b.py +0 -0
  339. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/models/test_qwen3_32b_int8.py +0 -0
  340. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/__init__.py +0 -0
  341. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/config/__init__.py +0 -0
  342. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml +0 -0
  343. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml +0 -0
  344. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/config/multi_node_config.py +0 -0
  345. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/config/utils.py +0 -0
  346. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/scripts/lws.yaml +0 -0
  347. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/scripts/run.sh +0 -0
  348. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/nightly/multi_node/test_multi_node.py +0 -0
  349. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/pd_disaggreate/run_edge_case_test.sh +0 -0
  350. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
  351. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
  352. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  353. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/prompts/example.txt +0 -0
  354. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/run_disagg_pd.sh +0 -0
  355. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/run_doctests.sh +0 -0
  356. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/__init__.py +0 -0
  357. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/__init__.py +0 -0
  358. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_bgmv_expand.py +0 -0
  359. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_bgmv_shrink.py +0 -0
  360. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_fused_moe.py +0 -0
  361. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
  362. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_mla_preprocess.py +0 -0
  363. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_rotary_embedding.py +0 -0
  364. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +0 -0
  365. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +0 -0
  366. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +0 -0
  367. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +0 -0
  368. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_aclgraph.py +0 -0
  369. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_aclgraph_mem.py +0 -0
  370. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_ascend_scheduler.py +0 -0
  371. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_bge_model.py +0 -0
  372. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_camem.py +0 -0
  373. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_chunked.py +0 -0
  374. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_embedding.py +0 -0
  375. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_embedding_aclgraph.py +0 -0
  376. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_guided_decoding.py +0 -0
  377. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_ilama_lora.py +0 -0
  378. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +0 -0
  379. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
  380. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_quantization.py +0 -0
  381. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_sampler.py +0 -0
  382. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/singlecard/test_vlm.py +0 -0
  383. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/utils.py +0 -0
  384. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/vllm_interface/singlecard/test_sampler.py +0 -0
  385. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/e2e/vllm_interface/vllm_test.cfg +0 -0
  386. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/__init__.py +0 -0
  387. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/attention/test_attention_mask.py +0 -0
  388. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/attention/test_attention_v1.py +0 -0
  389. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/attention/test_mla_v1.py +0 -0
  390. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/base.py +0 -0
  391. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/compilation/test_acl_graph.py +0 -0
  392. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/conftest.py +0 -0
  393. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/core/test_schedule_config.py +0 -0
  394. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/core/test_scheduler.py +0 -0
  395. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/device_allocator/test_camem.py +0 -0
  396. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
  397. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
  398. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/distributed/test_communicator.py +0 -0
  399. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
  400. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/distributed/test_parallel_state.py +0 -0
  401. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/adaptor/test_abstract_adaptor.py +0 -0
  402. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/policy/test_policy_abstract.py +0 -0
  403. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +0 -0
  404. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +0 -0
  405. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/policy/test_policy_factor.py +0 -0
  406. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +0 -0
  407. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/eplb/core/test_eplb_utils.py +0 -0
  408. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/fake_weight/config.json +0 -0
  409. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -0
  410. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/test_mooncake_connector.py +0 -0
  411. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +0 -0
  412. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
  413. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
  414. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/kv_connector/utils.py +0 -0
  415. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/models/__init__.py +0 -0
  416. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/models/conftest.py +0 -0
  417. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/models/test_qwen2_5_vl.py +0 -0
  418. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/models/test_qwen2_5_vl_without_padding.py +0 -0
  419. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/models/test_qwen2_vl.py +0 -0
  420. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/multistream/test_base.py +0 -0
  421. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/multistream/test_decorator.py +0 -0
  422. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/multistream/test_layers.py +0 -0
  423. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/multistream/test_metadata.py +0 -0
  424. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/multistream/test_ms_split.py +0 -0
  425. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/expert_map.json +0 -0
  426. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_activation.py +0 -0
  427. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_comm_utils.py +0 -0
  428. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_common_fused_moe.py +0 -0
  429. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_expert_load_balancer.py +0 -0
  430. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +0 -0
  431. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_fused_ops.py +0 -0
  432. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_layernorm.py +0 -0
  433. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_linear.py +0 -0
  434. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_moe_comm_method.py +0 -0
  435. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_rotary_embedding.py +0 -0
  436. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_token_dispatcher.py +0 -0
  437. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/ops/test_vocab_parallel_embedding.py +0 -0
  438. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
  439. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
  440. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_quant_config.py +0 -0
  441. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_utils.py +0 -0
  442. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +0 -0
  443. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_w4a8_dynamic.py +0 -0
  444. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/quantization/test_w8a8_dynamic.py +0 -0
  445. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/sample/logits_processor/test_builtin.py +0 -0
  446. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/sample/test_rejection_sampler.py +0 -0
  447. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/sample/test_sampler.py +0 -0
  448. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/test_ascend_config.py +0 -0
  449. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/test_envs.py +0 -0
  450. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/test_platform.py +0 -0
  451. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/test_utils.py +0 -0
  452. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/__init__.py +0 -0
  453. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +0 -0
  454. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/models/test_torchair_deepseek_v2.py +0 -0
  455. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/ops/test_torchair_fused_moe.py +0 -0
  456. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +0 -0
  457. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +0 -0
  458. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +0 -0
  459. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/test_torchair_attention.py +0 -0
  460. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/test_torchair_mla.py +0 -0
  461. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/torchair/test_utils.py +0 -0
  462. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/worker/test_input_batch.py +0 -0
  463. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/worker/test_model_runner_v1.py +0 -0
  464. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tests/ut/worker/test_worker_v1.py +0 -0
  465. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tools/actionlint.sh +0 -0
  466. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tools/aisbench.py +0 -0
  467. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tools/check_python_src_init.py +0 -0
  468. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tools/check_repo.sh +0 -0
  469. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tools/enforce_regex_import.py +0 -0
  470. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tools/mypy.sh +0 -0
  471. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tools/png-lint.sh +0 -0
  472. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tools/send_mm_request.py +0 -0
  473. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tools/shellcheck.sh +0 -0
  474. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/tools/sphinx-lint.sh +0 -0
  475. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/typos.toml +0 -0
  476. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/__init__.py +0 -0
  477. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ascend_config.py +0 -0
  478. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ascend_forward_context.py +0 -0
  479. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/__init__.py +0 -0
  480. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/attention_mask.py +0 -0
  481. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/attention_v1.py +0 -0
  482. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/sfa_v1.py +0 -0
  483. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/attention/utils.py +0 -0
  484. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/compilation/__init__.py +0 -0
  485. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/compilation/acl_graph.py +0 -0
  486. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/core/__init__.py +0 -0
  487. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/core/recompute_schedule_config.py +0 -0
  488. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/core/recompute_scheduler.py +0 -0
  489. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/core/schedule_config.py +0 -0
  490. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/core/scheduler.py +0 -0
  491. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/cpu_binding.py +0 -0
  492. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/device_allocator/__init__.py +0 -0
  493. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/device_allocator/camem.py +0 -0
  494. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/__init__.py +0 -0
  495. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/communicator.py +0 -0
  496. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/cpu_offload_connector.py +0 -0
  497. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/cpu_offload_manager/__init__.py +0 -0
  498. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +0 -0
  499. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/cpu_offload_manager/metadata.py +0 -0
  500. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/device_communicators/__init__.py +0 -0
  501. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  502. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  503. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +0 -0
  504. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/__init__.py +0 -0
  505. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/config_data.py +0 -0
  506. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/kv_transfer.py +0 -0
  507. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/mooncake_engine.py +0 -0
  508. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/mooncake_store.py +0 -0
  509. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +0 -0
  510. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake/transfer_engine.py +0 -0
  511. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake_connector.py +0 -0
  512. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/mooncake_layerwise_connector.py +0 -0
  513. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/parallel_state.py +0 -0
  514. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/distributed/utils.py +0 -0
  515. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/envs.py +0 -0
  516. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/__init__.py +0 -0
  517. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/adaptor/__init__.py +0 -0
  518. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/adaptor/abstract_adaptor.py +0 -0
  519. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/__init__.py +0 -0
  520. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +0 -0
  521. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/eplb_utils.py +0 -0
  522. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/eplb_worker.py +0 -0
  523. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/__init__.py +0 -0
  524. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_abstract.py +0 -0
  525. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +0 -0
  526. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +0 -0
  527. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_factory.py +0 -0
  528. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_flashlb.py +0 -0
  529. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/core/policy/policy_random.py +0 -0
  530. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/eplb_updator.py +0 -0
  531. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/eplb/utils.py +0 -0
  532. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/lora/__init__.py +0 -0
  533. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/lora/lora_ops.py +0 -0
  534. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/lora/punica_npu.py +0 -0
  535. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/lora/utils.py +0 -0
  536. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/meta_registration.py +0 -0
  537. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/__init__.py +0 -0
  538. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/deepseek_v3_2.py +0 -0
  539. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/layers/__init__.py +0 -0
  540. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/layers/mla.py +0 -0
  541. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/layers/sfa.py +0 -0
  542. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/qwen2_5_omni_thinker.py +0 -0
  543. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/models/qwen3_next.py +0 -0
  544. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/__init__.py +0 -0
  545. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/base.py +0 -0
  546. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/context.py +0 -0
  547. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/decorator.py +0 -0
  548. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/layers.py +0 -0
  549. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/metadata.py +0 -0
  550. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/multistream/ms_split.py +0 -0
  551. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/__init__.py +0 -0
  552. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/activation.py +0 -0
  553. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/attention.py +0 -0
  554. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/casual_conv1d.py +0 -0
  555. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  556. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/fla.py +0 -0
  557. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/layernorm.py +0 -0
  558. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/linear_op.py +0 -0
  559. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/__init__.py +0 -0
  560. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/comm_utils.py +0 -0
  561. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/experts_selector.py +0 -0
  562. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +0 -0
  563. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/moe_comm_method.py +0 -0
  564. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/moe_mlp.py +0 -0
  565. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/moe/token_dispatcher.py +0 -0
  566. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/register_custom_ops.py +0 -0
  567. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/rotary_embedding.py +0 -0
  568. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/sigmoid_gating.py +0 -0
  569. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/vocab_parallel_embedding.py +0 -0
  570. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/ops/weight_prefetch.py +0 -0
  571. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/__init__.py +0 -0
  572. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_config.py +0 -0
  573. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_core.py +0 -0
  574. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_distributed.py +0 -0
  575. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_mamba_config.py +0 -0
  576. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_message_queue.py +0 -0
  577. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_multiproc_executor.py +0 -0
  578. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/platform/patch_sched_yield.py +0 -0
  579. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_attention_layer.py +0 -0
  580. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_deepseek_mtp.py +0 -0
  581. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_distributed.py +0 -0
  582. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_logits.py +0 -0
  583. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_minicpm.py +0 -0
  584. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_multimodal_merge.py +0 -0
  585. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_roberta.py +0 -0
  586. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_triton.py +0 -0
  587. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/patch/worker/patch_weight_loader.py +0 -0
  588. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/platform.py +0 -0
  589. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/__init__.py +0 -0
  590. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/quant_config.py +0 -0
  591. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/utils.py +0 -0
  592. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +0 -0
  593. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/w4a8_dynamic.py +0 -0
  594. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/w8a8.py +0 -0
  595. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/quantization/w8a8_dynamic.py +0 -0
  596. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/sample/__init__.py +0 -0
  597. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/sample/logits_processor/__init__.py +0 -0
  598. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/sample/logits_processor/builtin.py +0 -0
  599. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/sample/rejection_sampler.py +0 -0
  600. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/sample/sampler.py +0 -0
  601. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/spec_decode/__init__.py +0 -0
  602. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/__init__.py +0 -0
  603. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/__init__.py +0 -0
  604. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/qwen2.py +0 -0
  605. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/qwen3_moe.py +0 -0
  606. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +0 -0
  607. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/torchair_deepseek_v2.py +0 -0
  608. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/torchair_deepseek_v3.py +0 -0
  609. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/models/torchair_pangu_moe.py +0 -0
  610. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/__init__.py +0 -0
  611. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/sequence_parallel.py +0 -0
  612. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/shared_weight_layer.py +0 -0
  613. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/torchair_activation.py +0 -0
  614. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/torchair_layernorm.py +0 -0
  615. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +0 -0
  616. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/ops/torchair_vocab_parallel_embedding.py +0 -0
  617. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/quantization/__init__.py +0 -0
  618. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +0 -0
  619. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +0 -0
  620. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/torchair_attention.py +0 -0
  621. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/torchair_mla.py +0 -0
  622. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/torchair_model_runner.py +0 -0
  623. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/torchair/torchair_worker.py +0 -0
  624. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/worker/__init__.py +0 -0
  625. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/worker/block_table.py +0 -0
  626. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend/worker/npu_input_batch.py +0 -0
  627. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  628. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend.egg-info/entry_points.txt +0 -0
  629. {vllm_ascend-0.11.0rc2 → vllm_ascend-0.11.0rc3}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -19,20 +19,29 @@ FROM quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG COMPILE_CUSTOM_KERNELS=1
22
+ ARG MOONCAKE_TAG="v0.3.7.post2"
22
23
 
23
24
  # Define environments
24
25
  ENV DEBIAN_FRONTEND=noninteractive
25
26
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
26
27
 
27
- RUN apt-get update -y && \
28
- apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
29
- rm -rf /var/cache/apt/* && \
30
- rm -rf /var/lib/apt/lists/*
31
-
32
28
  WORKDIR /workspace
33
29
 
34
30
  COPY . /vllm-workspace/vllm-ascend/
35
31
 
32
+ # Install Mooncake dependencies
33
+ RUN apt-get update -y && \
34
+ apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev && \
35
+ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
36
+ cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
37
+ cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \
38
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/lib64 && \
39
+ mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
40
+ make -j$(nproc) && make install && \
41
+ rm -fr /vllm-workspace/Mooncake/build && \
42
+ rm -rf /var/cache/apt/* && \
43
+ rm -rf /var/lib/apt/lists/*
44
+
36
45
  RUN pip config set global.index-url ${PIP_INDEX_URL}
37
46
 
38
47
  # Install vLLM
@@ -54,7 +63,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
54
63
  python3 -m pip cache purge
55
64
 
56
65
  # Install modelscope (for fast download) and ray (for multinode)
57
- RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
66
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
58
67
  python3 -m pip cache purge
59
68
 
60
69
  CMD ["/bin/bash"]
@@ -19,21 +19,29 @@ FROM quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG COMPILE_CUSTOM_KERNELS=1
22
+ ARG MOONCAKE_TAG=v0.3.7.post2
22
23
 
24
+ COPY . /vllm-workspace/vllm-ascend/
23
25
  # Define environments
24
26
  ENV DEBIAN_FRONTEND=noninteractive
25
27
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
26
28
 
27
- RUN apt-get update -y && \
28
- apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
29
- rm -rf /var/cache/apt/* && \
30
- rm -rf /var/lib/apt/lists/*
29
+ RUN pip config set global.index-url ${PIP_INDEX_URL}
31
30
 
32
31
  WORKDIR /workspace
33
32
 
34
- COPY . /vllm-workspace/vllm-ascend/
35
-
36
- RUN pip config set global.index-url ${PIP_INDEX_URL}
33
+ # Install Mooncake dependencies
34
+ RUN apt-get update -y && \
35
+ apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev && \
36
+ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
37
+ cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
38
+ cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \
39
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/lib64 && \
40
+ mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
41
+ make -j$(nproc) && make install && \
42
+ rm -fr /vllm-workspace/Mooncake/build && \
43
+ rm -rf /var/cache/apt/* && \
44
+ rm -rf /var/lib/apt/lists/*
37
45
 
38
46
  # Install vLLM
39
47
  ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
@@ -54,7 +62,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
54
62
  python3 -m pip cache purge
55
63
 
56
64
  # Install modelscope (for fast download) and ray (for multinode)
57
- RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
65
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
58
66
  python3 -m pip cache purge
59
67
 
60
68
  CMD ["/bin/bash"]
@@ -19,23 +19,36 @@ FROM quay.io/ascend/cann:8.3.rc2-a3-openeuler24.03-py3.11
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG COMPILE_CUSTOM_KERNELS=1
22
+ ARG MOONCAKE_TAG="v0.3.7.post2"
22
23
 
23
24
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
24
25
 
25
- RUN yum update -y && \
26
- yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
27
- rm -rf /var/cache/yum
28
-
29
26
  RUN pip config set global.index-url ${PIP_INDEX_URL}
30
27
 
31
28
  WORKDIR /workspace
32
29
 
33
30
  COPY . /vllm-workspace/vllm-ascend/
34
31
 
32
+ SHELL ["/bin/bash", "-c"]
33
+
34
+ RUN yum update -y && \
35
+ yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
36
+ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
37
+ cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
38
+ ARCH=$(uname -m) && \
39
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
40
+ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \
41
+ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/${ARCH}-openEuler-linux && \
42
+ cd /vllm-workspace/Mooncake && \
43
+ bash mooncake_installer.sh -y && \
44
+ mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
45
+ make -j$(nproc) && make install && \
46
+ rm -fr /vllm-workspace/Mooncake/build && \
47
+ rm -rf /var/cache/yum/*
48
+
35
49
  # Install vLLM
36
50
  ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
37
51
  ARG VLLM_TAG=v0.11.0
38
-
39
52
  RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
40
53
  # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
41
54
  RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
@@ -52,7 +65,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
52
65
  python3 -m pip cache purge
53
66
 
54
67
  # Install modelscope (for fast download) and ray (for multinode)
55
- RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
68
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
56
69
  python3 -m pip cache purge
57
70
 
58
71
  CMD ["/bin/bash"]
@@ -19,12 +19,10 @@ FROM quay.io/ascend/cann:8.3.rc2-910b-openeuler24.03-py3.11
19
19
 
20
20
  ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21
21
  ARG COMPILE_CUSTOM_KERNELS=1
22
+ ARG MOONCAKE_TAG="v0.3.7.post2"
22
23
 
23
24
  ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
24
25
 
25
- RUN yum update -y && \
26
- yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
27
- rm -rf /var/cache/yum
28
26
 
29
27
  RUN pip config set global.index-url ${PIP_INDEX_URL}
30
28
 
@@ -32,10 +30,26 @@ WORKDIR /workspace
32
30
 
33
31
  COPY . /vllm-workspace/vllm-ascend/
34
32
 
33
+ SHELL ["/bin/bash", "-c"]
34
+
35
+ RUN yum update -y && \
36
+ yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
37
+ git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
38
+ cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
39
+ ARCH=$(uname -m) && \
40
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
41
+ export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \
42
+ export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/${ARCH}-openEuler-linux && \
43
+ cd /vllm-workspace/Mooncake && \
44
+ bash mooncake_installer.sh -y && \
45
+ mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
46
+ make -j$(nproc) && make install && \
47
+ rm -fr /vllm-workspace/Mooncake/build && \
48
+ rm -rf /var/cache/yum/*
49
+
35
50
  # Install vLLM
36
51
  ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
37
- ARG VLLM_TAG=v0.11.0
38
-
52
+ ARG VLLM_TAG=v0.11.2
39
53
  RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
40
54
  # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
41
55
  RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
@@ -52,7 +66,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
52
66
  python3 -m pip cache purge
53
67
 
54
68
  # Install modelscope (for fast download) and ray (for multinode)
55
- RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
69
+ RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
56
70
  python3 -m pip cache purge
57
71
 
58
72
  CMD ["/bin/bash"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vllm_ascend
3
- Version: 0.11.0rc2
3
+ Version: 0.11.0rc3
4
4
  Summary: vLLM Ascend backend plugin
5
5
  Home-page: https://github.com/vllm-project/vllm-ascend
6
6
  Author: vLLM-Ascend team
@@ -37,6 +37,7 @@ Requires-Dist: msgpack
37
37
  Requires-Dist: quart
38
38
  Requires-Dist: numba
39
39
  Requires-Dist: torch-npu==2.7.1
40
+ Requires-Dist: transformers<=4.57.1
40
41
  Dynamic: author
41
42
  Dynamic: classifier
42
43
  Dynamic: description
@@ -8,6 +8,7 @@ single_npu_multimodal
8
8
  single_npu_audio
9
9
  single_npu_qwen3_embedding
10
10
  single_npu_qwen3_quantization
11
+ single_node_pd_disaggregation_llmdatadist
11
12
  multi_npu_qwen3_next
12
13
  multi_npu
13
14
  multi_npu_moge
@@ -0,0 +1,181 @@
1
+ # Prefill-Decode Disaggregation Llmdatadist Verification (Qwen2.5-VL)
2
+
3
+ ## Getting Start
4
+
5
+ vLLM-Ascend now supports prefill-decode (PD) disaggregation. This guide takes one-by-one steps to verify these features with constrained resources.
6
+
7
+ Using the Qwen2.5-VL-7B-Instruct model as an example, use vllm-ascend v0.11.0rc1 (with vLLM v0.11.0) on 1 Atlas 800T A2 server to deploy the "1P1D" architecture. Assume the IP address is 192.0.0.1.
8
+
9
+ ## Verify Communication Environment
10
+
11
+ ### Verification Process
12
+
13
+ 1. Single Node Verification:
14
+
15
+ Execute the following commands in sequence. The results must all be `success` and the status must be `UP`:
16
+
17
+ ```bash
18
+ # Check the remote switch ports
19
+ for i in {0..7}; do hccn_tool -i $i -lldp -g | grep Ifname; done
20
+ # Get the link status of the Ethernet ports (UP or DOWN)
21
+ for i in {0..7}; do hccn_tool -i $i -link -g ; done
22
+ # Check the network health status
23
+ for i in {0..7}; do hccn_tool -i $i -net_health -g ; done
24
+ # View the network detected IP configuration
25
+ for i in {0..7}; do hccn_tool -i $i -netdetect -g ; done
26
+ # View gateway configuration
27
+ for i in {0..7}; do hccn_tool -i $i -gateway -g ; done
28
+ # View NPU network configuration
29
+ cat /etc/hccn.conf
30
+ ```
31
+
32
+ 2. Get NPU IP Addresses
33
+
34
+ ```bash
35
+ for i in {0..7}; do hccn_tool -i $i -ip -g;done
36
+ ```
37
+
38
+ ## Generate Ranktable
39
+
40
+ The rank table is a JSON file that specifies the mapping of Ascend NPU ranks to nodes. For more details, please refer to the [vllm-ascend examples](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/README.md). Execute the following commands for reference.
41
+
42
+ ```shell
43
+ cd vllm-ascend/examples/disaggregate_prefill_v1/
44
+ bash gen_ranktable.sh --ips 192.0.0.1 \
45
+ --npus-per-node 2 --network-card-name eth0 --prefill-device-cnt 1 --decode-device-cnt 1
46
+ ```
47
+
48
+ If you want to run "2P1D", please set npus-per-node to 3 and prefill-device-cnt to 2. The rank table will be generated at /vllm-workspace/vllm-ascend/examples/disaggregate_prefill_v1/ranktable.json
49
+
50
+ |Parameter | Meaning |
51
+ | --- | --- |
52
+ | --ips | Each node's local IP address (prefiller nodes should be in front of decoder nodes) |
53
+ | --npus-per-node | Each node's NPU clips |
54
+ | --network-card-name | The physical machines' NIC |
55
+ |--prefill-device-cnt | NPU clips used for prefill |
56
+ |--decode-device-cnt |NPU clips used for decode |
57
+
58
+ ## Prefiller/Decoder Deployment
59
+
60
+ We can run the following scripts to launch a server on the prefiller/decoder NPU, respectively.
61
+
62
+ :::::{tab-set}
63
+
64
+ ::::{tab-item} Prefiller
65
+
66
+ ```shell
67
+ export ASCEND_RT_VISIBLE_DEVICES=0
68
+ export HCCL_IF_IP=192.0.0.1 # node ip
69
+ export GLOO_SOCKET_IFNAME="eth0" # network card name
70
+ export TP_SOCKET_IFNAME="eth0"
71
+ export HCCL_SOCKET_IFNAME="eth0"
72
+ export DISAGGREGATED_PREFILL_RANK_TABLE_PATH="/path/to/your/generated/ranktable.json"
73
+ export OMP_PROC_BIND=false
74
+ export OMP_NUM_THREADS=10
75
+ export VLLM_ASCEND_LLMDD_RPC_PORT=5959
76
+
77
+ vllm serve /model/Qwen2.5-VL-7B-Instruct \
78
+ --host 0.0.0.0 \
79
+ --port 13700 \
80
+ --tensor-parallel-size 1 \
81
+ --no-enable-prefix-caching \
82
+ --seed 1024 \
83
+ --served-model-name qwen25vl \
84
+ --max-model-len 40000 \
85
+ --max-num-batched-tokens 40000 \
86
+ --trust-remote-code \
87
+ --gpu-memory-utilization 0.9 \
88
+ --kv-transfer-config \
89
+ '{"kv_connector": "LLMDataDistCMgrConnector",
90
+ "kv_buffer_device": "npu",
91
+ "kv_role": "kv_producer",
92
+ "kv_parallel_size": 1,
93
+ "kv_port": "20001",
94
+ "engine_id": "0",
95
+ "kv_connector_module_path": "vllm_ascend.distributed.llmdatadist_c_mgr_connector"
96
+ }'
97
+ ```
98
+
99
+ ::::
100
+
101
+ ::::{tab-item} Decoder
102
+
103
+ ```shell
104
+ export ASCEND_RT_VISIBLE_DEVICES=1
105
+ export HCCL_IF_IP=192.0.0.1 # node ip
106
+ export GLOO_SOCKET_IFNAME="eth0" # network card name
107
+ export TP_SOCKET_IFNAME="eth0"
108
+ export HCCL_SOCKET_IFNAME="eth0"
109
+ export DISAGGREGATED_PREFILL_RANK_TABLE_PATH="/path/to/your/generated/ranktable.json"
110
+ export OMP_PROC_BIND=false
111
+ export OMP_NUM_THREADS=10
112
+ export VLLM_ASCEND_LLMDD_RPC_PORT=5979
113
+
114
+ vllm serve /model/Qwen2.5-VL-7B-Instruct \
115
+ --host 0.0.0.0 \
116
+ --port 13701 \
117
+ --no-enable-prefix-caching \
118
+ --tensor-parallel-size 1 \
119
+ --seed 1024 \
120
+ --served-model-name qwen25vl \
121
+ --max-model-len 40000 \
122
+ --max-num-batched-tokens 40000 \
123
+ --trust-remote-code \
124
+ --gpu-memory-utilization 0.9 \
125
+ --kv-transfer-config \
126
+ '{"kv_connector": "LLMDataDistCMgrConnector",
127
+ "kv_buffer_device": "npu",
128
+ "kv_role": "kv_consumer",
129
+ "kv_parallel_size": 1,
130
+ "kv_port": "20001",
131
+ "engine_id": "0",
132
+ "kv_connector_module_path": "vllm_ascend.distributed.llmdatadist_c_mgr_connector"
133
+ }'
134
+ ```
135
+
136
+ ::::
137
+
138
+ :::::
139
+
140
+ If you want to run "2P1D", please set ASCEND_RT_VISIBLE_DEVICES, VLLM_ASCEND_LLMDD_RPC_PORT and port to different values for each P process.
141
+
142
+ ## Example Proxy for Deployment
143
+
144
+ Run a proxy server on the same node with the prefiller service instance. You can get the proxy program in the repository's examples: [load\_balance\_proxy\_server\_example.py](https://github.com/vllm-project/vllm-ascend/blob/main/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py)
145
+
146
+ ```shell
147
+ python load_balance_proxy_server_example.py \
148
+ --host 192.0.0.1 \
149
+ --port 8080 \
150
+ --prefiller-hosts 192.0.0.1 \
151
+ --prefiller-port 13700 \
152
+ --decoder-hosts 192.0.0.1 \
153
+ --decoder-ports 13701
154
+ ```
155
+
156
+ |Parameter | Meaning |
157
+ | --- | --- |
158
+ | --port | Port of proxy |
159
+ | --prefiller-port | All ports of prefill |
160
+ | --decoder-ports | All ports of decoder |
161
+
162
+ ## Verification
163
+
164
+ Check service health using the proxy server endpoint.
165
+
166
+ ```shell
167
+ curl http://192.0.0.1:8080/v1/chat/completions \
168
+ -H "Content-Type: application/json" \
169
+ -d '{
170
+ "model": "qwen25vl",
171
+ "messages": [
172
+ {"role": "system", "content": "You are a helpful assistant."},
173
+ {"role": "user", "content": [
174
+ {"type": "image_url", "image_url": {"url": "https://modelscope.oss-cn-beijing.aliyuncs.com/resource/qwen.png"}},
175
+ {"type": "text", "text": "What is the text in the illustrate?"}
176
+ ]}
177
+ ],
178
+ "max_tokens": 100,
179
+ "temperature": 0
180
+ }'
181
+ ```
@@ -12,6 +12,7 @@ requires = [
12
12
  "scipy",
13
13
  "setuptools>=64",
14
14
  "setuptools-scm>=8",
15
+ "transformers<=4.57.1",
15
16
  "torch-npu==2.7.1",
16
17
  "torch==2.7.1",
17
18
  "torchvision",
@@ -26,3 +26,5 @@ numba
26
26
  #--pre
27
27
  #--extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
28
28
  torch-npu==2.7.1
29
+
30
+ transformers<=4.57.1
@@ -326,7 +326,6 @@ class TestAscendC8KVCacheMethod(TestBase):
326
326
  self.attention_type.ENCODER = "encoder"
327
327
 
328
328
  def test_create_weights(self):
329
- """测试 create_weights 是否正确注册参数"""
330
329
  AscendC8KVCacheMethod.create_weights(self.layer)
331
330
 
332
331
  self.layer.register_parameter.assert_any_call("key_antiquant_scale",