vllm-ascend 0.10.1rc1__tar.gz → 0.10.2rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (524) hide show
  1. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/actionlint.yaml +3 -0
  2. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/accuracy_test.yaml +2 -2
  3. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_310p_openeuler.yml +12 -0
  4. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_310p_ubuntu.yml +13 -1
  5. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_a3_openeuler.yml +12 -0
  6. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_a3_ubuntu.yml +13 -1
  7. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_openeuler.yml +12 -0
  8. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_ubuntu.yml +13 -1
  9. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/nightly_benchmarks.yaml +1 -1
  10. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/pre-commit.yml +1 -0
  11. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/reminder_comment.yml +1 -1
  12. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_dist.yaml +1 -1
  13. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_test.yaml +14 -50
  14. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_test_310p.yaml +1 -1
  15. vllm_ascend-0.10.2rc1/.github/workflows/vllm_ascend_test_full.yaml +233 -0
  16. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_test_pd.yaml +1 -0
  17. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/Dockerfile +1 -1
  18. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.310p +1 -1
  19. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.310p.openEuler +1 -1
  20. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.a3 +1 -1
  21. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.a3.openEuler +1 -1
  22. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.openEuler +1 -1
  23. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/PKG-INFO +3 -3
  24. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/README.md +2 -2
  25. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/README.zh.md +2 -2
  26. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/benchmarks/ops/ben_vocabparallelembedding.py +1 -1
  27. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/benchmarks/scripts/run-performance-benchmarks.sh +3 -1
  28. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/benchmarks/tests/serving-tests.json +2 -1
  29. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/torch_binding.cpp +14 -17
  30. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/torch_binding_meta.cpp +4 -4
  31. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/versioning_policy.md +2 -0
  32. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/conf.py +5 -5
  33. vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +20 -0
  34. vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +19 -0
  35. vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +21 -0
  36. vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +21 -0
  37. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/accuracy_report/index.md +4 -0
  38. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/faqs.md +1 -1
  39. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/installation.md +1 -0
  40. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/index.md +2 -0
  41. vllm_ascend-0.10.2rc1/docs/source/tutorials/multi_node_pd_disaggregation.md +244 -0
  42. vllm_ascend-0.10.2rc1/docs/source/tutorials/multi_npu_qwen3_next.md +156 -0
  43. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/configuration/additional_config.md +3 -0
  44. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/release_notes.md +41 -0
  45. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/README.md +4 -4
  46. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/gen_ranktable.py +43 -29
  47. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/gen_ranktable.sh +10 -1
  48. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +1 -0
  49. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/requirements-dev.txt +1 -1
  50. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_offline_inference_distributed.py +48 -0
  51. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_prefix_caching.py +22 -20
  52. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_qwen3_moe.py +0 -1
  53. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_bgmv_expand.py +2 -2
  54. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_bgmv_shrink.py +1 -1
  55. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_fused_moe.py +86 -18
  56. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_rotary_embedding.py +3 -3
  57. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +1 -1
  58. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +18 -2
  59. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_ascend_scheduler.py +23 -0
  60. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_vlm.py +11 -6
  61. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/attention/test_attention_v1.py +14 -37
  62. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/attention/test_mla_v1.py +4 -2
  63. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/core/test_schedule_config.py +18 -25
  64. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/core/test_scheduler.py +237 -350
  65. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/distributed/test_parallel_state.py +7 -3
  66. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/test_mooncake_connector.py +40 -0
  67. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/utils.py +21 -46
  68. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_deepseek_v2.py +0 -1
  69. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_qwen2_5_vl.py +52 -0
  70. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_activation.py +12 -1
  71. vllm_ascend-0.10.2rc1/tests/ut/ops/test_ascend_forwad_context.py +22 -0
  72. vllm_ascend-0.10.2rc1/tests/ut/ops/test_comm_utils.py +98 -0
  73. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_common_fused_moe.py +37 -1
  74. vllm_ascend-0.10.2rc1/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +218 -0
  75. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_fused_ops.py +113 -12
  76. vllm_ascend-0.10.2rc1/tests/ut/ops/test_layernorm.py +93 -0
  77. vllm_ascend-0.10.2rc1/tests/ut/ops/test_linear.py +105 -0
  78. vllm_ascend-0.10.2rc1/tests/ut/ops/test_moe_comm_method.py +212 -0
  79. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_rotary_embedding.py +96 -36
  80. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_token_dispatcher.py +80 -67
  81. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_vocab_parallel_embedding.py +9 -1
  82. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/quantization/test_quant_config.py +8 -19
  83. vllm_ascend-0.10.2rc1/tests/ut/quantization/test_utils.py +62 -0
  84. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/quantization/test_w4a8_dynamic.py +13 -2
  85. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/quantization/test_w8a8.py +3 -3
  86. vllm_ascend-0.10.2rc1/tests/ut/quantization/test_w8a8_dynamic.py +69 -0
  87. vllm_ascend-0.10.2rc1/tests/ut/sample/logits_processor/test_builtin.py +40 -0
  88. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_ascend_config.py +24 -0
  89. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_platform.py +2 -33
  90. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_utils.py +20 -5
  91. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/torchair/models/test_torchair_deepseek_v2.py +12 -6
  92. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/torchair/ops/test_torchair_fused_moe.py +1 -7
  93. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +26 -27
  94. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/torchair/test_torchair_mla.py +20 -8
  95. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/torchair/test_utils.py +0 -13
  96. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/worker/test_input_batch.py +1 -1
  97. vllm_ascend-0.10.2rc1/tests/ut/worker/test_model_runner_v1.py +94 -0
  98. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/worker/test_worker_v1.py +11 -2
  99. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/_version.py +3 -3
  100. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ascend_config.py +18 -0
  101. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ascend_forward_context.py +49 -10
  102. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/attention/attention_v1.py +61 -51
  103. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/attention/mla_v1.py +3 -1
  104. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/attention/utils.py +7 -0
  105. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/compilation/acl_graph.py +7 -6
  106. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/core/schedule_config.py +4 -8
  107. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/core/scheduler.py +117 -84
  108. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +26 -46
  109. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/mooncake_connector.py +6 -4
  110. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/parallel_state.py +26 -1
  111. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/envs.py +20 -0
  112. {vllm_ascend-0.10.1rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.10.2rc1/vllm_ascend/lora}/lora_ops.py +13 -12
  113. {vllm_ascend-0.10.1rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.10.2rc1/vllm_ascend/lora}/punica_npu.py +6 -3
  114. vllm_ascend-0.10.2rc1/vllm_ascend/lora/utils.py +77 -0
  115. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/meta_registration.py +7 -6
  116. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/__init__.py +7 -10
  117. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_v2.py +28 -60
  118. vllm_ascend-0.10.2rc1/vllm_ascend/models/layers/mla.py +144 -0
  119. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen2_5_vl.py +60 -3
  120. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen3_moe.py +2 -6
  121. vllm_ascend-0.10.2rc1/vllm_ascend/models/qwen3_next.py +1361 -0
  122. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/__init__.py +10 -8
  123. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/activation.py +2 -0
  124. vllm_ascend-0.10.2rc1/vllm_ascend/ops/casual_conv1d.py +597 -0
  125. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/common_fused_moe.py +165 -252
  126. vllm_ascend-0.10.2rc1/vllm_ascend/ops/fla.py +381 -0
  127. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/fused_moe.py +9 -11
  128. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/layernorm.py +32 -1
  129. vllm_ascend-0.10.2rc1/vllm_ascend/ops/linear.py +626 -0
  130. {vllm_ascend-0.10.1rc1/vllm_ascend/ops → vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe}/comm_utils.py +52 -1
  131. vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +240 -0
  132. vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/moe_comm_method.py +298 -0
  133. {vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers → vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe}/moe_mlp.py +111 -58
  134. {vllm_ascend-0.10.1rc1/vllm_ascend/ops/moe_dispatcher → vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe}/token_dispatcher.py +35 -118
  135. vllm_ascend-0.10.2rc1/vllm_ascend/ops/register_custom_ops.py +192 -0
  136. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/rotary_embedding.py +60 -44
  137. vllm_ascend-0.10.2rc1/vllm_ascend/ops/sigmoid_gating.py +403 -0
  138. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/vocab_parallel_embedding.py +1 -0
  139. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/__init__.py +16 -11
  140. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/platform/patch_common/__init__.py +2 -0
  141. vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_common/patch_mamba_config.py +97 -0
  142. vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_common/patch_shared_fused_moe.py +21 -0
  143. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_common/__init__.py +0 -2
  144. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/platform.py +40 -8
  145. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/quantization/quant_config.py +29 -28
  146. vllm_ascend-0.10.2rc1/vllm_ascend/quantization/utils.py +83 -0
  147. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/quantization/w4a8_dynamic.py +1 -1
  148. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/quantization/w8a8.py +1 -1
  149. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/quantization/w8a8_dynamic.py +13 -178
  150. vllm_ascend-0.10.2rc1/vllm_ascend/sample/logits_processor/__init__.py +50 -0
  151. vllm_ascend-0.10.2rc1/vllm_ascend/sample/logits_processor/builtin.py +35 -0
  152. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/sample/sampler.py +9 -21
  153. vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/__init__.py +33 -0
  154. vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/eagle_proposer.py +644 -0
  155. vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/interface.py +51 -0
  156. vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/mtp_proposer.py +630 -0
  157. vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/ngram_proposer.py +65 -0
  158. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/models/qwen3_moe.py +2 -1
  159. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/models/torchair_deepseek_v2.py +13 -6
  160. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/communication_op.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/torchair_activation.py +37 -25
  161. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/ops/torchair_fused_moe.py +2 -4
  162. vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/torchair_layernorm.py +51 -0
  163. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +8 -15
  164. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +1 -3
  165. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/torchair_attention.py +10 -2
  166. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/torchair_mla.py +5 -7
  167. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/torchair_model_runner.py +47 -18
  168. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/utils.py +23 -9
  169. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/utils.py +102 -41
  170. vllm_ascend-0.10.2rc1/vllm_ascend/worker/block_table.py +313 -0
  171. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/worker/model_runner_v1.py +1010 -849
  172. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/worker/npu_input_batch.py +44 -31
  173. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/worker/worker_v1.py +27 -10
  174. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/PKG-INFO +3 -3
  175. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/SOURCES.txt +45 -25
  176. vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops/test_moe_comm.py +0 -175
  177. vllm_ascend-0.10.1rc1/tests/ut/distributed/test_distributed_tensor_parallel.py +0 -139
  178. vllm_ascend-0.10.1rc1/tests/ut/ops/test_layernorm.py +0 -53
  179. vllm_ascend-0.10.1rc1/tests/ut/ops/test_linear.py +0 -363
  180. vllm_ascend-0.10.1rc1/tests/ut/patch/worker/patch_common/test_patch_linear.py +0 -167
  181. vllm_ascend-0.10.1rc1/tests/ut/quantization/test_func_wrapper.py +0 -134
  182. vllm_ascend-0.10.1rc1/tests/ut/quantization/test_quantizer.py +0 -145
  183. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/moe_comm_method.py +0 -556
  184. vllm_ascend-0.10.1rc1/vllm_ascend/distributed/tensor_parallel.py +0 -248
  185. vllm_ascend-0.10.1rc1/vllm_ascend/models/pangu_moe.py +0 -1106
  186. vllm_ascend-0.10.1rc1/vllm_ascend/ops/linear.py +0 -309
  187. vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker/patch_common/patch_linear.py +0 -147
  188. vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker/patch_common/patch_lora_embedding.py +0 -29
  189. vllm_ascend-0.10.1rc1/vllm_ascend/quantization/func_wrapper.py +0 -184
  190. vllm_ascend-0.10.1rc1/vllm_ascend/quantization/quantizer.py +0 -311
  191. vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization/torchair_quantizer.py +0 -29
  192. vllm_ascend-0.10.1rc1/vllm_ascend/worker/__init__.py +0 -0
  193. vllm_ascend-0.10.1rc1/vllm_ascend/worker/eagle_proposer_v1.py +0 -398
  194. vllm_ascend-0.10.1rc1/vllm_ascend/worker/mtp_proposer_v1.py +0 -439
  195. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.gemini/config.yaml +0 -0
  196. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/Dockerfile.buildwheel +0 -0
  197. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  198. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  199. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  200. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  201. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  202. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  203. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  204. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  205. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
  206. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  207. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +0 -0
  208. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  209. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  210. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/dependabot.yml +0 -0
  211. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/format_pr_body.sh +0 -0
  212. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/labeler.yml +0 -0
  213. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/format_pr_body.yaml +0 -0
  214. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/label_merge_conflict.yml +0 -0
  215. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/labeler.yml +0 -0
  216. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/matchers/actionlint.json +0 -0
  217. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/matchers/mypy.json +0 -0
  218. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/matchers/ruff.json +0 -0
  219. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/release_code.yml +0 -0
  220. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/release_whl.yml +0 -0
  221. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_doctest.yaml +0 -0
  222. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.gitignore +0 -0
  223. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.pre-commit-config.yaml +0 -0
  224. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/.readthedocs.yaml +0 -0
  225. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/CMakeLists.txt +0 -0
  226. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/CODE_OF_CONDUCT.md +0 -0
  227. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/CONTRIBUTING.md +0 -0
  228. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/DCO +0 -0
  229. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/LICENSE +0 -0
  230. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/benchmarks/README.md +0 -0
  231. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/benchmarks/requirements-bench.txt +0 -0
  232. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  233. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/benchmarks/scripts/perf_result_template.md +0 -0
  234. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/benchmarks/tests/latency-tests.json +0 -0
  235. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/benchmarks/tests/throughput-tests.json +0 -0
  236. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/cmake/utils.cmake +0 -0
  237. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/codecov.yml +0 -0
  238. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/collect_env.py +0 -0
  239. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/camem_allocator.cpp +0 -0
  240. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/bgmv_expand.cpp +0 -0
  241. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/bgmv_shrink.cpp +0 -0
  242. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  243. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
  244. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/sgmv_expand.cpp +0 -0
  245. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/sgmv_shrink.cpp +0 -0
  246. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/types.h +0 -0
  247. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/utils.h +0 -0
  248. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/ops.h +0 -0
  249. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/csrc/utils.h +0 -0
  250. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/Makefile +0 -0
  251. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/README.md +0 -0
  252. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/requirements-docs.txt +0 -0
  253. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/requirements-test.txt +0 -0
  254. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/_templates/sections/header.html +0 -0
  255. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  256. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/assets/multi_node_dp_kimi.png +0 -0
  257. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/contributors.md +0 -0
  258. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/governance.md +0 -0
  259. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/user_stories/index.md +0 -0
  260. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/user_stories/llamafactory.md +0 -0
  261. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/contribution/index.md +0 -0
  262. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/contribution/testing.md +0 -0
  263. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/index.md +0 -0
  264. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  265. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
  266. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  267. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
  268. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/feature_guide/index.md +0 -0
  269. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/feature_guide/patch.md +0 -0
  270. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -0
  271. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
  272. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/modeling/index.md +0 -0
  273. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/index.md +0 -0
  274. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -0
  275. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/performance_benchmark.md +0 -0
  276. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/profile_execute_duration.md +0 -0
  277. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/index.md +0 -0
  278. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
  279. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
  280. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
  281. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
  282. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
  283. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
  284. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
  285. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
  286. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
  287. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
  288. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
  289. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
  290. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
  291. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
  292. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
  293. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
  294. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
  295. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +0 -0
  296. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +0 -0
  297. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +0 -0
  298. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
  299. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
  300. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
  301. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
  302. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
  303. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
  304. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
  305. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
  306. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
  307. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
  308. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
  309. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
  310. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
  311. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
  312. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
  313. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -0
  314. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
  315. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
  316. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
  317. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
  318. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
  319. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
  320. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
  321. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
  322. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
  323. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
  324. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
  325. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
  326. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  327. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  328. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/quick_start.md +0 -0
  329. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_node.md +0 -0
  330. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_node_kimi.md +0 -0
  331. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu.md +0 -0
  332. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu_moge.md +0 -0
  333. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu_quantization.md +0 -0
  334. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
  335. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_node_300i.md +0 -0
  336. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu.md +0 -0
  337. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_audio.md +0 -0
  338. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_multimodal.md +0 -0
  339. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
  340. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_qwen3_quantization.md +0 -0
  341. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/configuration/env_vars.md +0 -0
  342. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/configuration/index.md +0 -0
  343. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
  344. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  345. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/index.md +0 -0
  346. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/lora.md +0 -0
  347. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/quantization.md +0 -0
  348. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
  349. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  350. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/support_matrix/index.md +0 -0
  351. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
  352. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
  353. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
  354. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/run_server.sh +0 -0
  355. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/eplb/eplb_deepseek.py +0 -0
  356. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/eplb/eplb_strategy.py +0 -0
  357. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/external_online_dp/README.md +0 -0
  358. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/external_online_dp/launch_online_dp.py +0 -0
  359. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/external_online_dp/run_dp_template.sh +0 -0
  360. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/offline_data_parallel.py +0 -0
  361. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/offline_disaggregated_prefill_npu.py +0 -0
  362. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/offline_dualbatch_overlap_npu.py +0 -0
  363. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/offline_embed.py +0 -0
  364. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/offline_external_launcher.py +0 -0
  365. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_audio_language.py +0 -0
  366. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_npu.py +0 -0
  367. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_npu_tp2.py +0 -0
  368. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_sleep_mode_npu.py +0 -0
  369. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/prompt_embedding_inference.py +0 -0
  370. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/examples/run_dp_server.sh +0 -0
  371. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/format.sh +0 -0
  372. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/mypy.ini +0 -0
  373. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/packages.txt +0 -0
  374. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/pyproject.toml +0 -0
  375. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/requirements-lint.txt +0 -0
  376. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/requirements.txt +0 -0
  377. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/setup.cfg +0 -0
  378. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/setup.py +0 -0
  379. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/__init__.py +0 -0
  380. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
  381. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/310p/test_offline_inference_parallel_310p.py +0 -0
  382. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/__init__.py +0 -0
  383. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/common.sh +0 -0
  384. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/conftest.py +0 -0
  385. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
  386. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
  387. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/model_utils.py +0 -0
  388. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +0 -0
  389. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +0 -0
  390. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +0 -0
  391. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/models/configs/Qwen3-8B-Base.yaml +0 -0
  392. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/models/configs/accuracy.txt +0 -0
  393. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/models/conftest.py +0 -0
  394. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/models/report_template.md +0 -0
  395. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/models/test_lm_eval_correctness.py +0 -0
  396. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_data_parallel.py +0 -0
  397. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_expert_parallel.py +0 -0
  398. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_external_launcher.py +0 -0
  399. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -0
  400. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_ilama_lora_tp2.py +0 -0
  401. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_pipeline_parallel.py +0 -0
  402. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_torchair_graph_mode.py +0 -0
  403. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/run_edge_case_test.sh +0 -0
  404. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
  405. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
  406. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  407. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/prompts/example.txt +0 -0
  408. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/run_disagg_pd.sh +0 -0
  409. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/run_doctests.sh +0 -0
  410. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/__init__.py +0 -0
  411. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/__init__.py +0 -0
  412. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
  413. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +0 -0
  414. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +0 -0
  415. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_aclgraph.py +0 -0
  416. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_camem.py +0 -0
  417. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_chunked.py +0 -0
  418. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_embedding.py +0 -0
  419. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_guided_decoding.py +0 -0
  420. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_ilama_lora.py +0 -0
  421. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
  422. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_quantization.py +0 -0
  423. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_sampler.py +0 -0
  424. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/utils.py +0 -0
  425. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/__init__.py +0 -0
  426. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/attention/test_attention_mask.py +0 -0
  427. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/base.py +0 -0
  428. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/conftest.py +0 -0
  429. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/device_allocator/test_camem.py +0 -0
  430. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
  431. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
  432. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/distributed/test_communicator.py +0 -0
  433. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/fake_weight/config.json +0 -0
  434. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -0
  435. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
  436. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
  437. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/__init__.py +0 -0
  438. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_deepseek_mtp.py +0 -0
  439. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_qwen2_5_vl_without_padding.py +0 -0
  440. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_qwen2_vl.py +0 -0
  441. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_qwen3_moe.py +0 -0
  442. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_base.py +0 -0
  443. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_decorator.py +0 -0
  444. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_layers.py +0 -0
  445. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_metadata.py +0 -0
  446. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_ms_split.py +0 -0
  447. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/expert_map.json +0 -0
  448. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_expert_load_balancer.py +0 -0
  449. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
  450. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
  451. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/sample/test_rejection_sampler.py +0 -0
  452. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/sample/test_sampler.py +0 -0
  453. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_envs.py +0 -0
  454. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/torchair/__init__.py +0 -0
  455. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +0 -0
  456. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +0 -0
  457. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +0 -0
  458. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tools/actionlint.sh +0 -0
  459. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tools/check_python_src_init.py +0 -0
  460. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tools/check_repo.sh +0 -0
  461. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tools/enforce_regex_import.py +0 -0
  462. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tools/mypy.sh +0 -0
  463. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tools/png-lint.sh +0 -0
  464. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tools/shellcheck.sh +0 -0
  465. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/tools/sphinx-lint.sh +0 -0
  466. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/typos.toml +0 -0
  467. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/__init__.py +0 -0
  468. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/attention/__init__.py +0 -0
  469. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/attention/attention_mask.py +0 -0
  470. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/compilation/__init__.py +0 -0
  471. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/core/__init__.py +0 -0
  472. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/device_allocator/__init__.py +0 -0
  473. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/device_allocator/camem.py +0 -0
  474. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/__init__.py +0 -0
  475. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/communicator.py +0 -0
  476. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/device_communicators/__init__.py +0 -0
  477. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  478. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  479. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/lora/__init__.py +0 -0
  480. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_dbo.py +0 -0
  481. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_mtp.py +0 -0
  482. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_v3.py +0 -0
  483. {vllm_ascend-0.10.1rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.10.2rc1/vllm_ascend/models/layers}/__init__.py +0 -0
  484. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen2_5_vl_without_padding.py +0 -0
  485. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen2_vl.py +0 -0
  486. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen3.py +0 -0
  487. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/__init__.py +0 -0
  488. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/base.py +0 -0
  489. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/context.py +0 -0
  490. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/decorator.py +0 -0
  491. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/layers.py +0 -0
  492. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/metadata.py +0 -0
  493. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/ms_split.py +0 -0
  494. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/attention.py +0 -0
  495. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  496. {vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers → vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe}/__init__.py +0 -0
  497. {vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers → vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe}/experts_selector.py +0 -0
  498. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/sequence_parallel.py +0 -0
  499. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/platform/__init__.py +0 -0
  500. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/platform/patch_common/patch_distributed.py +0 -0
  501. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/platform/patch_main/__init__.py +0 -0
  502. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/__init__.py +0 -0
  503. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
  504. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_common/patch_logits.py +0 -0
  505. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
  506. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
  507. {vllm_ascend-0.10.1rc1/vllm_ascend/ops/moe_dispatcher → vllm_ascend-0.10.2rc1/vllm_ascend/quantization}/__init__.py +0 -0
  508. {vllm_ascend-0.10.1rc1/vllm_ascend/quantization → vllm_ascend-0.10.2rc1/vllm_ascend/sample}/__init__.py +0 -0
  509. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/sample/rejection_sampler.py +0 -0
  510. {vllm_ascend-0.10.1rc1/vllm_ascend/sample → vllm_ascend-0.10.2rc1/vllm_ascend/torchair}/__init__.py +0 -0
  511. {vllm_ascend-0.10.1rc1/vllm_ascend/torchair → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models}/__init__.py +0 -0
  512. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/models/qwen2.py +0 -0
  513. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +0 -0
  514. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/models/torchair_deepseek_v3.py +0 -0
  515. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/models/torchair_pangu_moe.py +0 -0
  516. {vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops}/__init__.py +0 -0
  517. {vllm_ascend-0.10.1rc1/vllm_ascend/torchair/ops → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/quantization}/__init__.py +0 -0
  518. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +0 -0
  519. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/torchair_worker.py +0 -0
  520. {vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization → vllm_ascend-0.10.2rc1/vllm_ascend/worker}/__init__.py +0 -0
  521. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  522. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
  523. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/requires.txt +0 -0
  524. {vllm_ascend-0.10.1rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -15,3 +15,6 @@ self-hosted-runner:
15
15
  - linux-aarch64-a3-2
16
16
  - linux-aarch64-a3-4
17
17
  - linux-aarch64-a3-8
18
+ - linux-amd64-cpu-0
19
+ - linux-amd64-cpu-8
20
+ - linux-amd64-cpu-16
@@ -112,7 +112,7 @@ jobs:
112
112
  uses: actions/checkout@v4
113
113
  with:
114
114
  repository: vllm-project/vllm
115
- ref: v0.10.1.1
115
+ ref: v0.10.2
116
116
  path: ./vllm-empty
117
117
 
118
118
  - name: Install vllm-project/vllm from source
@@ -303,7 +303,7 @@ jobs:
303
303
  git push -f origin "${{ env.BRANCH_NAME }}"
304
304
 
305
305
  - name: Create PR in upstream via API
306
- uses: actions/github-script@v7
306
+ uses: actions/github-script@v8
307
307
  with:
308
308
  github-token: ${{ secrets.PAT_TOKEN }}
309
309
  script: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -43,17 +44,28 @@ on:
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
45
46
 
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
46
52
  jobs:
47
53
  build:
48
54
  name: vllm-ascend image build
55
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
56
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
57
  runs-on: >-
50
58
  ${{
51
59
  github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
52
60
  'ubuntu-latest' ||
53
61
  'ubuntu-24.04-arm'
54
62
  }}
63
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
55
64
  steps:
56
65
  - uses: actions/checkout@v4
66
+ with:
67
+ fetch-depth: 0
68
+ persist-credentials: false
57
69
 
58
70
  - name: Print
59
71
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -42,14 +43,25 @@ on:
42
43
  - 'cmake/**'
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
45
52
  jobs:
46
53
 
47
54
  build:
48
55
  name: vllm-ascend image build
56
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
57
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
58
  runs-on: ubuntu-latest
50
-
59
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
51
60
  steps:
52
61
  - uses: actions/checkout@v4
62
+ with:
63
+ fetch-depth: 0
64
+ persist-credentials: false
53
65
 
54
66
  - name: Print
55
67
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -43,17 +44,28 @@ on:
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
45
46
 
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
46
52
  jobs:
47
53
  build:
48
54
  name: vllm-ascend image build
55
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
56
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
57
  runs-on: >-
50
58
  ${{
51
59
  github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
52
60
  'ubuntu-latest' ||
53
61
  'ubuntu-24.04-arm'
54
62
  }}
63
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
55
64
  steps:
56
65
  - uses: actions/checkout@v4
66
+ with:
67
+ fetch-depth: 0
68
+ persist-credentials: false
57
69
 
58
70
  - name: Print
59
71
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -42,14 +43,25 @@ on:
42
43
  - 'cmake/**'
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
45
52
  jobs:
46
53
 
47
54
  build:
48
55
  name: vllm-ascend image build
56
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
57
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
58
  runs-on: ubuntu-latest
50
-
59
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
51
60
  steps:
52
61
  - uses: actions/checkout@v4
62
+ with:
63
+ fetch-depth: 0
64
+ persist-credentials: false
53
65
 
54
66
  - name: Print
55
67
  run: |
@@ -24,6 +24,7 @@ on:
24
24
  - 'cmake/**'
25
25
  - 'CMakeLists.txt'
26
26
  - 'csrc/**'
27
+ types: [ labeled ]
27
28
  push:
28
29
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
29
30
  branches:
@@ -42,17 +43,28 @@ on:
42
43
  - 'CMakeLists.txt'
43
44
  - 'csrc/**'
44
45
 
46
+ # only cancel in-progress runs of the same workflow
47
+ concurrency:
48
+ group: ${{ github.workflow }}-${{ github.ref }}
49
+ cancel-in-progress: true
50
+
45
51
  jobs:
46
52
  build:
47
53
  name: vllm-ascend image build
54
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
55
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
48
56
  runs-on: >-
49
57
  ${{
50
58
  github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
51
59
  'ubuntu-latest' ||
52
60
  'ubuntu-24.04-arm'
53
61
  }}
62
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
54
63
  steps:
55
64
  - uses: actions/checkout@v4
65
+ with:
66
+ fetch-depth: 0
67
+ persist-credentials: false
56
68
 
57
69
  - name: Print
58
70
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -42,14 +43,25 @@ on:
42
43
  - 'cmake/**'
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
45
52
  jobs:
46
53
 
47
54
  build:
48
55
  name: vllm-ascend image build
56
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
57
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
58
  runs-on: ubuntu-latest
50
-
59
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
51
60
  steps:
52
61
  - uses: actions/checkout@v4
62
+ with:
63
+ fetch-depth: 0
64
+ persist-credentials: false
53
65
 
54
66
  - name: Print
55
67
  run: |
@@ -51,7 +51,7 @@ jobs:
51
51
  strategy:
52
52
  matrix:
53
53
  include:
54
- - vllm_branch: v0.10.1.1
54
+ - vllm_branch: v0.10.2
55
55
  vllm_ascend_branch: main
56
56
  vllm_use_v1: 1
57
57
  max-parallel: 1
@@ -35,3 +35,4 @@ jobs:
35
35
  SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
36
36
  with:
37
37
  extra_args: --all-files --hook-stage manual
38
+
@@ -9,7 +9,7 @@ jobs:
9
9
  runs-on: ubuntu-latest
10
10
  steps:
11
11
  - name: Remind to run full CI on PR
12
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
12
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
13
13
  with:
14
14
  script: |
15
15
  github.rest.issues.createComment({
@@ -43,7 +43,7 @@ jobs:
43
43
  strategy:
44
44
  matrix:
45
45
  os: [linux-aarch64-a3-8]
46
- vllm_version: [v0.10.1.1, main]
46
+ vllm_version: [v0.10.2]
47
47
  name: vLLM Ascend test
48
48
  runs-on: ${{ matrix.os }}
49
49
  container:
@@ -25,7 +25,6 @@ on:
25
25
  branches:
26
26
  - 'main'
27
27
  - '*-dev'
28
-
29
28
  # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
30
29
  # declared as "shell: bash -el {0}" on steps that need to be properly activated.
31
30
  # It's used to activate ascend-toolkit environment variables.
@@ -44,6 +43,7 @@ jobs:
44
43
  uses: ./.github/workflows/pre-commit.yml
45
44
 
46
45
  changes:
46
+ if: github.event_name == 'pull_request'
47
47
  runs-on: ubuntu-latest
48
48
  outputs:
49
49
  e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
@@ -68,6 +68,7 @@ jobs:
68
68
  - 'packages.txt'
69
69
  ut_tracker:
70
70
  - 'tests/ut/**'
71
+
71
72
  ut:
72
73
  needs: [lint, changes]
73
74
  name: unit test
@@ -81,7 +82,7 @@ jobs:
81
82
  VLLM_USE_MODELSCOPE: True
82
83
  strategy:
83
84
  matrix:
84
- vllm_version: [v0.10.1.1, main]
85
+ vllm_version: [v0.10.2]
85
86
  steps:
86
87
  - name: Install packages
87
88
  run: |
@@ -129,16 +130,16 @@ jobs:
129
130
  name: vllm-ascend
130
131
  verbose: true
131
132
 
132
- e2e:
133
+ e2e-light:
133
134
  needs: [lint, changes]
134
135
  # only trigger e2e test after lint passed and the change is e2e related with pull request.
135
- if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
136
+ if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
136
137
  strategy:
137
138
  max-parallel: 2
138
139
  matrix:
139
140
  os: [linux-aarch64-a2-1]
140
- vllm_version: [v0.10.1.1, main]
141
- name: singlecard e2e test
141
+ vllm_version: [v0.10.2]
142
+ name: singlecard e2e test - light
142
143
  runs-on: ${{ matrix.os }}
143
144
  container:
144
145
  image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -191,38 +192,19 @@ jobs:
191
192
  VLLM_WORKER_MULTIPROC_METHOD: spawn
192
193
  VLLM_USE_MODELSCOPE: True
193
194
  run: |
194
- # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
195
- # the test separately.
196
-
197
195
  pytest -sv tests/e2e/singlecard/test_aclgraph.py
198
- pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
199
- pytest -sv tests/e2e/singlecard/test_camem.py
200
- pytest -sv tests/e2e/singlecard/test_chunked.py
201
- pytest -sv tests/e2e/singlecard/test_embedding.py
202
- pytest -sv tests/e2e/singlecard/test_guided_decoding.py
203
- # TODO: Fix lora accuracy error
204
- pytest -sv tests/e2e/singlecard/test_ilama_lora.py
205
- pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
206
196
  pytest -sv tests/e2e/singlecard/test_quantization.py
207
- pytest -sv tests/e2e/singlecard/test_sampler.py
208
- pytest -sv tests/e2e/singlecard/test_vlm.py
209
-
210
- # ------------------------------------ v1 spec decode test ------------------------------------ #
211
- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
212
- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
213
- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
197
+ pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
214
198
 
215
- pytest -sv tests/e2e/singlecard/ops/
216
-
217
- e2e-2-cards:
218
- needs: [e2e]
219
- if: ${{ needs.e2e.result == 'success' }}
199
+ e2e-2-cards-light:
200
+ needs: [e2e-light]
201
+ if: ${{ needs.e2e-light.result == 'success' }}
220
202
  strategy:
221
203
  max-parallel: 2
222
204
  matrix:
223
205
  os: [linux-aarch64-a2-2]
224
- vllm_version: [v0.10.1.1, main]
225
- name: multicard e2e test
206
+ vllm_version: [v0.10.2]
207
+ name: multicard e2e test - light
226
208
  runs-on: ${{ matrix.os }}
227
209
  container:
228
210
  image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -275,22 +257,4 @@ jobs:
275
257
  VLLM_WORKER_MULTIPROC_METHOD: spawn
276
258
  VLLM_USE_MODELSCOPE: True
277
259
  run: |
278
- pytest -sv tests/e2e/multicard/test_data_parallel.py
279
- pytest -sv tests/e2e/multicard/test_expert_parallel.py
280
- # external_launcher test is not stable enough. Fix it later
281
- # pytest -sv tests/e2e/multicard/test_external_launcher.py
282
- pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
283
- pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
284
-
285
- # To avoid oom, we need to run the test in a single process.
286
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
287
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
288
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
289
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
290
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
291
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
292
-
293
- #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
294
- pytest -sv tests/e2e/multicard/test_prefix_caching.py
295
- pytest -sv tests/e2e/multicard/test_qwen3_moe.py
296
- pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
260
+ pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
@@ -53,7 +53,7 @@ jobs:
53
53
  max-parallel: 2
54
54
  matrix:
55
55
  os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
56
- vllm_version: [v0.10.1.1, main]
56
+ vllm_version: [v0.10.2]
57
57
  name: 310p e2e test
58
58
  runs-on: ${{ matrix.os }}
59
59
  container:
@@ -0,0 +1,233 @@
1
+ #
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
+ #
17
+ name: 'test-full'
18
+
19
+ on:
20
+ pull_request:
21
+ branches:
22
+ - 'main'
23
+ - '*-dev'
24
+ types: [ labeled, synchronize ]
25
+
26
+ # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
27
+ # declared as "shell: bash -el {0}" on steps that need to be properly activated.
28
+ # It's used to activate ascend-toolkit environment variables.
29
+ defaults:
30
+ run:
31
+ shell: bash -el {0}
32
+
33
+ # only cancel in-progress runs of the same workflow
34
+ # and ignore the lint / 1 card / 4 cards test type
35
+ concurrency:
36
+ group: ${{ github.workflow }}-${{ github.ref }}
37
+ cancel-in-progress: true
38
+
39
+ jobs:
40
+ changes:
41
+ runs-on: ubuntu-latest
42
+ if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
43
+ outputs:
44
+ e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
45
+ ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
46
+ steps:
47
+ - uses: actions/checkout@v4
48
+ - uses: dorny/paths-filter@v3
49
+ id: filter
50
+ with:
51
+ filters: |
52
+ e2e_tracker:
53
+ - '.github/workflows/vllm_ascend_test.yaml'
54
+ - 'vllm_ascend/**'
55
+ - 'csrc/**'
56
+ - 'cmake/**'
57
+ - 'tests/e2e/**'
58
+ - 'CMakeLists.txt'
59
+ - 'setup.py'
60
+ - 'requirements.txt'
61
+ - 'requirements-dev.txt'
62
+ - 'requirements-lint.txt'
63
+ - 'packages.txt'
64
+ ut_tracker:
65
+ - 'tests/ut/**'
66
+
67
+ e2e-full:
68
+ # only trigger full test when pull request is approved
69
+ needs: [changes]
70
+ if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
71
+ strategy:
72
+ max-parallel: 2
73
+ matrix:
74
+ os: [linux-aarch64-a2-1]
75
+ vllm_version: [v0.10.2]
76
+ name: singlecard e2e test - full
77
+ runs-on: ${{ matrix.os }}
78
+ container:
79
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
80
+ env:
81
+ VLLM_LOGGING_LEVEL: ERROR
82
+ VLLM_USE_MODELSCOPE: True
83
+ steps:
84
+ - name: Check npu and CANN info
85
+ run: |
86
+ npu-smi info
87
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
88
+
89
+ - name: Config mirrors
90
+ run: |
91
+ sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
92
+ pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
93
+ pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
94
+ apt-get update -y
95
+ apt install git -y
96
+
97
+ - name: Checkout vllm-project/vllm-ascend repo
98
+ uses: actions/checkout@v4
99
+
100
+ - name: Install system dependencies
101
+ run: |
102
+ apt-get -y install `cat packages.txt`
103
+ apt-get -y install gcc g++ cmake libnuma-dev
104
+
105
+ - name: Checkout vllm-project/vllm repo
106
+ uses: actions/checkout@v4
107
+ with:
108
+ repository: vllm-project/vllm
109
+ ref: ${{ matrix.vllm_version }}
110
+ path: ./vllm-empty
111
+
112
+ - name: Install vllm-project/vllm from source
113
+ working-directory: ./vllm-empty
114
+ run: |
115
+ VLLM_TARGET_DEVICE=empty pip install -e .
116
+
117
+ - name: Install vllm-project/vllm-ascend
118
+ env:
119
+ PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
120
+ run: |
121
+ pip install -r requirements-dev.txt
122
+ pip install -v -e .
123
+
124
+ - name: Run e2e test
125
+ env:
126
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
127
+ VLLM_USE_MODELSCOPE: True
128
+ run: |
129
+ # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
130
+ # the test separately.
131
+
132
+ pytest -sv tests/e2e/singlecard/test_aclgraph.py
133
+ pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
134
+ pytest -sv tests/e2e/singlecard/test_camem.py
135
+ pytest -sv tests/e2e/singlecard/test_chunked.py
136
+ pytest -sv tests/e2e/singlecard/test_embedding.py
137
+ pytest -sv tests/e2e/singlecard/test_guided_decoding.py
138
+ #pytest -sv tests/e2e/singlecard/test_ilama_lora.py
139
+ pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
140
+ pytest -sv tests/e2e/singlecard/test_quantization.py
141
+ pytest -sv tests/e2e/singlecard/test_sampler.py
142
+ pytest -sv tests/e2e/singlecard/test_vlm.py
143
+
144
+ # ------------------------------------ v1 spec decode test ------------------------------------ #
145
+ pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
146
+ pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
147
+ pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
148
+
149
+ pytest -sv tests/e2e/singlecard/ops/
150
+
151
+ e2e-2-cards-full:
152
+ # only trigger full test when pull request is approved
153
+ needs: [changes]
154
+ if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
155
+ strategy:
156
+ max-parallel: 2
157
+ matrix:
158
+ os: [linux-aarch64-a2-2]
159
+ vllm_version: [v0.10.2]
160
+ name: multicard e2e test - full
161
+ runs-on: ${{ matrix.os }}
162
+ container:
163
+ image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
164
+ env:
165
+ VLLM_LOGGING_LEVEL: ERROR
166
+ VLLM_USE_MODELSCOPE: True
167
+ steps:
168
+ - name: Check npu and CANN info
169
+ run: |
170
+ npu-smi info
171
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
172
+
173
+ - name: Config mirrors
174
+ run: |
175
+ sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
176
+ pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
177
+ pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
178
+ apt-get update -y
179
+ apt install git -y
180
+
181
+ - name: Checkout vllm-project/vllm-ascend repo
182
+ uses: actions/checkout@v4
183
+
184
+ - name: Install system dependencies
185
+ run: |
186
+ apt-get -y install `cat packages.txt`
187
+ apt-get -y install gcc g++ cmake libnuma-dev
188
+
189
+ - name: Checkout vllm-project/vllm repo
190
+ uses: actions/checkout@v4
191
+ with:
192
+ repository: vllm-project/vllm
193
+ ref: ${{ matrix.vllm_version }}
194
+ path: ./vllm-empty
195
+
196
+ - name: Install vllm-project/vllm from source
197
+ working-directory: ./vllm-empty
198
+ run: |
199
+ VLLM_TARGET_DEVICE=empty pip install -e .
200
+
201
+ - name: Install vllm-project/vllm-ascend
202
+ env:
203
+ PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
204
+ run: |
205
+ pip install -r requirements-dev.txt
206
+ pip install -v -e .
207
+
208
+ - name: Run vllm-project/vllm-ascend test
209
+ env:
210
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
211
+ VLLM_USE_MODELSCOPE: True
212
+ run: |
213
+ pytest -sv tests/e2e/multicard/test_data_parallel.py
214
+ pytest -sv tests/e2e/multicard/test_expert_parallel.py
215
+ # external_launcher test is not stable enough. Fix it later
216
+ # pytest -sv tests/e2e/multicard/test_external_launcher.py
217
+ pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
218
+ #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
219
+
220
+ # To avoid oom, we need to run the test in a single process.
221
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
222
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
223
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
224
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
225
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
226
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
227
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
228
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
229
+
230
+ #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
231
+ pytest -sv tests/e2e/multicard/test_prefix_caching.py
232
+ pytest -sv tests/e2e/multicard/test_qwen3_moe.py
233
+ pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
@@ -108,4 +108,5 @@ jobs:
108
108
 
109
109
  - name: Run vllm-project/vllm-ascend PD Disaggregation edge test
110
110
  run: |
111
+ git config --global --add safe.directory/__w/vllm-ascend/vllm-ascend
111
112
  bash tests/e2e/pd_disaggreate/run_edge_case_test.sh
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
37
37
 
38
38
  # Install vLLM
39
39
  ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
40
- ARG VLLM_TAG=v0.10.1.1
40
+ ARG VLLM_TAG=v0.10.2
41
41
  RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
42
42
  # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
43
43
  RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
37
37
 
38
38
  # Install vLLM
39
39
  ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
40
- ARG VLLM_TAG=v0.10.1.1
40
+ ARG VLLM_TAG=v0.10.2
41
41
  RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
42
42
  # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
43
43
  RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \