vllm-ascend 0.10.0rc1__tar.gz → 0.10.2rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (548) hide show
  1. vllm_ascend-0.10.2rc1/.gemini/config.yaml +6 -0
  2. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +1 -1
  3. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +2 -0
  4. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/actionlint.yaml +7 -0
  5. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/accuracy_test.yaml +14 -13
  6. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/format_pr_body.yaml +1 -1
  7. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_310p_openeuler.yml +12 -0
  8. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_310p_ubuntu.yml +13 -1
  9. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_a3_openeuler.yml +12 -0
  10. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_a3_ubuntu.yml +13 -1
  11. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_openeuler.yml +12 -0
  12. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_ubuntu.yml +13 -1
  13. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/nightly_benchmarks.yaml +1 -1
  14. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/pre-commit.yml +1 -0
  15. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/release_code.yml +1 -1
  16. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/release_whl.yml +1 -1
  17. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/reminder_comment.yml +1 -1
  18. vllm_ascend-0.10.0rc1/.github/workflows/vllm_ascend_test_long_term.yaml → vllm_ascend-0.10.2rc1/.github/workflows/vllm_ascend_dist.yaml +25 -27
  19. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_test.yaml +17 -49
  20. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_test_310p.yaml +4 -4
  21. vllm_ascend-0.10.2rc1/.github/workflows/vllm_ascend_test_full.yaml +233 -0
  22. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_test_pd.yaml +1 -0
  23. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile +1 -1
  24. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.310p +1 -1
  25. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.310p.openEuler +3 -2
  26. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.a3 +1 -1
  27. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.a3.openEuler +3 -2
  28. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.openEuler +3 -2
  29. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/PKG-INFO +7 -6
  30. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/README.md +6 -5
  31. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/README.zh.md +6 -4
  32. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/ops/ben_vocabparallelembedding.py +1 -1
  33. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/scripts/run-performance-benchmarks.sh +3 -1
  34. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/tests/serving-tests.json +2 -1
  35. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/codecov.yml +2 -4
  36. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/bgmv_expand.cpp +7 -7
  37. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/bgmv_shrink.cpp +7 -7
  38. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/pos_encoding_kernels.cpp +0 -5
  39. vllm_ascend-0.10.2rc1/csrc/kernels/sgmv_expand.cpp +389 -0
  40. vllm_ascend-0.10.2rc1/csrc/kernels/sgmv_shrink.cpp +275 -0
  41. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/ops.h +36 -0
  42. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/torch_binding.cpp +121 -16
  43. vllm_ascend-0.10.2rc1/csrc/torch_binding_meta.cpp +102 -0
  44. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/utils.h +0 -12
  45. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/_templates/sections/header.html +1 -1
  46. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/contributors.md +15 -1
  47. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/versioning_policy.md +21 -1
  48. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/conf.py +5 -5
  49. vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +20 -0
  50. vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +19 -0
  51. vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +21 -0
  52. vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +21 -0
  53. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/accuracy_report/index.md +4 -0
  54. vllm_ascend-0.10.2rc1/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +237 -0
  55. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/feature_guide/index.md +1 -0
  56. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/faqs.md +34 -8
  57. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/installation.md +2 -1
  58. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/index.md +2 -0
  59. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_node.md +11 -8
  60. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_node_kimi.md +1 -1
  61. vllm_ascend-0.10.2rc1/docs/source/tutorials/multi_node_pd_disaggregation.md +244 -0
  62. vllm_ascend-0.10.2rc1/docs/source/tutorials/multi_npu_qwen3_next.md +156 -0
  63. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_qwen3_quantization.md +4 -2
  64. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/configuration/additional_config.md +7 -0
  65. vllm_ascend-0.10.2rc1/docs/source/user_guide/feature_guide/lora.md +23 -0
  66. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/quantization.md +5 -5
  67. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/release_notes.md +447 -280
  68. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/README.md +12 -12
  69. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/gen_ranktable.py +43 -29
  70. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/gen_ranktable.sh +10 -1
  71. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +32 -3
  72. vllm_ascend-0.10.2rc1/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +165 -0
  73. vllm_ascend-0.10.2rc1/examples/external_online_dp/README.md +38 -0
  74. vllm_ascend-0.10.2rc1/examples/external_online_dp/launch_online_dp.py +97 -0
  75. vllm_ascend-0.10.2rc1/examples/external_online_dp/run_dp_template.sh +46 -0
  76. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_audio_language.py +1 -1
  77. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/pyproject.toml +0 -2
  78. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/requirements-dev.txt +1 -1
  79. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/requirements.txt +0 -2
  80. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/setup.py +1 -1
  81. {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e/310p}/test_offline_inference_310p.py +1 -1
  82. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/conftest.py +25 -113
  83. vllm_ascend-0.10.2rc1/tests/e2e/model_utils.py +74 -0
  84. vllm_ascend-0.10.2rc1/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +13 -0
  85. {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/conftest.py +21 -22
  86. vllm_ascend-0.10.2rc1/tests/e2e/models/report_template.md +21 -0
  87. {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/test_lm_eval_correctness.py +14 -9
  88. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_data_parallel.py +1 -1
  89. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_expert_parallel.py +4 -2
  90. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_external_launcher.py +38 -0
  91. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -2
  92. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_ilama_lora_tp2.py +3 -2
  93. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_offline_inference_distributed.py +57 -117
  94. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_pipeline_parallel.py +0 -1
  95. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_prefix_caching.py +22 -20
  96. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_qwen3_moe.py +35 -6
  97. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_torchair_graph_mode.py +62 -2
  98. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_bgmv_expand.py +8 -3
  99. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_bgmv_shrink.py +7 -2
  100. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_fused_moe.py +181 -22
  101. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_rotary_embedding.py +153 -2
  102. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +5 -1
  103. vllm_ascend-0.10.2rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +92 -0
  104. vllm_ascend-0.10.2rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +85 -0
  105. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +31 -32
  106. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_aclgraph.py +14 -33
  107. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_ascend_scheduler.py +29 -6
  108. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_camem.py +25 -14
  109. vllm_ascend-0.10.2rc1/tests/e2e/singlecard/test_chunked.py +81 -0
  110. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_embedding.py +17 -36
  111. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_guided_decoding.py +5 -4
  112. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_ilama_lora.py +3 -1
  113. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_profile_execute_duration.py +9 -0
  114. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/quant/test_w8a8.py → vllm_ascend-0.10.2rc1/tests/e2e/singlecard/test_quantization.py +5 -12
  115. vllm_ascend-0.10.2rc1/tests/e2e/singlecard/test_sampler.py +49 -0
  116. vllm_ascend-0.10.2rc1/tests/e2e/singlecard/test_vlm.py +94 -0
  117. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/attention/test_attention_mask.py +49 -72
  118. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/attention/test_attention_v1.py +107 -55
  119. vllm_ascend-0.10.2rc1/tests/ut/attention/test_mla_v1.py +633 -0
  120. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/core/test_schedule_config.py +67 -24
  121. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/core/test_scheduler.py +120 -53
  122. vllm_ascend-0.10.2rc1/tests/ut/distributed/test_communicator.py +89 -0
  123. vllm_ascend-0.10.2rc1/tests/ut/distributed/test_parallel_state.py +48 -0
  124. vllm_ascend-0.10.2rc1/tests/ut/kv_connector/test_mooncake_connector.py +1038 -0
  125. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +8 -15
  126. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +10 -16
  127. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/utils.py +27 -31
  128. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_deepseek_mtp.py +16 -1
  129. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_deepseek_v2.py +23 -48
  130. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_qwen2_5_vl.py +52 -0
  131. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_qwen2_5_vl_without_padding.py +24 -0
  132. vllm_ascend-0.10.2rc1/tests/ut/models/test_qwen3_moe.py +98 -0
  133. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_activation.py +12 -1
  134. vllm_ascend-0.10.2rc1/tests/ut/ops/test_ascend_forwad_context.py +22 -0
  135. vllm_ascend-0.10.2rc1/tests/ut/ops/test_comm_utils.py +98 -0
  136. vllm_ascend-0.10.2rc1/tests/ut/ops/test_common_fused_moe.py +105 -0
  137. vllm_ascend-0.10.2rc1/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +218 -0
  138. vllm_ascend-0.10.2rc1/tests/ut/ops/test_fused_ops.py +842 -0
  139. vllm_ascend-0.10.2rc1/tests/ut/ops/test_layernorm.py +93 -0
  140. vllm_ascend-0.10.2rc1/tests/ut/ops/test_linear.py +105 -0
  141. vllm_ascend-0.10.2rc1/tests/ut/ops/test_moe_comm_method.py +212 -0
  142. vllm_ascend-0.10.2rc1/tests/ut/ops/test_rotary_embedding.py +378 -0
  143. vllm_ascend-0.10.2rc1/tests/ut/ops/test_token_dispatcher.py +619 -0
  144. vllm_ascend-0.10.2rc1/tests/ut/ops/test_vocab_parallel_embedding.py +240 -0
  145. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/quantization/test_quant_config.py +12 -23
  146. vllm_ascend-0.10.2rc1/tests/ut/quantization/test_utils.py +62 -0
  147. vllm_ascend-0.10.2rc1/tests/ut/quantization/test_w4a8_dynamic.py +177 -0
  148. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/quantization/test_w8a8.py +68 -44
  149. vllm_ascend-0.10.2rc1/tests/ut/quantization/test_w8a8_dynamic.py +69 -0
  150. vllm_ascend-0.10.2rc1/tests/ut/sample/logits_processor/test_builtin.py +40 -0
  151. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/sample/test_rejection_sampler.py +4 -2
  152. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_ascend_config.py +82 -3
  153. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_envs.py +8 -7
  154. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_platform.py +156 -67
  155. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_utils.py +29 -79
  156. vllm_ascend-0.10.2rc1/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +195 -0
  157. vllm_ascend-0.10.2rc1/tests/ut/torchair/models/test_torchair_deepseek_v2.py +331 -0
  158. vllm_ascend-0.10.0rc1/tests/ut/ops/test_fused_ops.py → vllm_ascend-0.10.2rc1/tests/ut/torchair/ops/test_torchair_fused_moe.py +404 -377
  159. vllm_ascend-0.10.0rc1/tests/ut/ops/test_rotary_embedding.py → vllm_ascend-0.10.2rc1/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +331 -314
  160. vllm_ascend-0.10.2rc1/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +176 -0
  161. vllm_ascend-0.10.0rc1/tests/ut/quantization/test_w8a8_dynamic.py → vllm_ascend-0.10.2rc1/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +75 -75
  162. vllm_ascend-0.10.0rc1/tests/ut/attention/test_mla_v1.py → vllm_ascend-0.10.2rc1/tests/ut/torchair/test_torchair_mla.py +266 -129
  163. vllm_ascend-0.10.2rc1/tests/ut/torchair/test_utils.py +136 -0
  164. vllm_ascend-0.10.2rc1/tests/ut/worker/test_input_batch.py +372 -0
  165. vllm_ascend-0.10.2rc1/tests/ut/worker/test_model_runner_v1.py +94 -0
  166. vllm_ascend-0.10.2rc1/tests/ut/worker/test_worker_v1.py +1152 -0
  167. vllm_ascend-0.10.2rc1/vllm_ascend/_version.py +34 -0
  168. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ascend_config.py +52 -2
  169. vllm_ascend-0.10.2rc1/vllm_ascend/ascend_forward_context.py +177 -0
  170. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/attention/attention_mask.py +35 -46
  171. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/attention/attention_v1.py +269 -132
  172. vllm_ascend-0.10.2rc1/vllm_ascend/attention/mla_v1.py +1052 -0
  173. vllm_ascend-0.10.2rc1/vllm_ascend/attention/utils.py +102 -0
  174. vllm_ascend-0.10.2rc1/vllm_ascend/compilation/acl_graph.py +186 -0
  175. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/core/schedule_config.py +14 -8
  176. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/core/scheduler.py +98 -14
  177. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/__init__.py +4 -0
  178. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/communicator.py +0 -21
  179. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +32 -52
  180. vllm_ascend-0.10.2rc1/vllm_ascend/distributed/mooncake_connector.py +1072 -0
  181. vllm_ascend-0.10.2rc1/vllm_ascend/distributed/parallel_state.py +144 -0
  182. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/envs.py +37 -32
  183. {vllm_ascend-0.10.0rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.10.2rc1/vllm_ascend/lora}/lora_ops.py +21 -20
  184. {vllm_ascend-0.10.0rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.10.2rc1/vllm_ascend/lora}/punica_npu.py +17 -14
  185. vllm_ascend-0.10.2rc1/vllm_ascend/lora/utils.py +77 -0
  186. vllm_ascend-0.10.2rc1/vllm_ascend/meta_registration.py +105 -0
  187. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/__init__.py +10 -13
  188. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_mtp.py +1 -1
  189. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_v2.py +85 -110
  190. vllm_ascend-0.10.2rc1/vllm_ascend/models/layers/mla.py +144 -0
  191. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen2_5_vl.py +63 -14
  192. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen2_5_vl_without_padding.py +20 -24
  193. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen3_moe.py +2 -1
  194. vllm_ascend-0.10.2rc1/vllm_ascend/models/qwen3_next.py +1361 -0
  195. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/ms_split.py +1 -1
  196. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/__init__.py +19 -10
  197. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/activation.py +2 -0
  198. vllm_ascend-0.10.2rc1/vllm_ascend/ops/casual_conv1d.py +597 -0
  199. vllm_ascend-0.10.2rc1/vllm_ascend/ops/common_fused_moe.py +444 -0
  200. vllm_ascend-0.10.2rc1/vllm_ascend/ops/fla.py +381 -0
  201. vllm_ascend-0.10.2rc1/vllm_ascend/ops/fused_moe.py +585 -0
  202. vllm_ascend-0.10.2rc1/vllm_ascend/ops/layernorm.py +116 -0
  203. vllm_ascend-0.10.2rc1/vllm_ascend/ops/linear.py +626 -0
  204. {vllm_ascend-0.10.0rc1/vllm_ascend/ops → vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe}/comm_utils.py +52 -1
  205. vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/experts_selector.py +283 -0
  206. vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +240 -0
  207. vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/moe_comm_method.py +298 -0
  208. vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/moe_mlp.py +252 -0
  209. vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/token_dispatcher.py +726 -0
  210. vllm_ascend-0.10.2rc1/vllm_ascend/ops/register_custom_ops.py +192 -0
  211. vllm_ascend-0.10.2rc1/vllm_ascend/ops/rotary_embedding.py +355 -0
  212. vllm_ascend-0.10.2rc1/vllm_ascend/ops/sigmoid_gating.py +403 -0
  213. vllm_ascend-0.10.2rc1/vllm_ascend/ops/vocab_parallel_embedding.py +255 -0
  214. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/__init__.py +16 -11
  215. {vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/patch_common → vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform}/__init__.py +2 -2
  216. vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_common/__init__.py +20 -0
  217. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/platform/patch_common/patch_distributed.py +2 -2
  218. vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_common/patch_mamba_config.py +97 -0
  219. vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_common/patch_shared_fused_moe.py +21 -0
  220. {vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/patch_0_10_0 → vllm_ascend-0.10.2rc1/vllm_ascend/patch/worker}/__init__.py +2 -1
  221. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_common/__init__.py +1 -1
  222. vllm_ascend-0.10.2rc1/vllm_ascend/patch/worker/patch_common/patch_logits.py +26 -0
  223. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/platform.py +124 -35
  224. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/quantization/quant_config.py +38 -34
  225. vllm_ascend-0.10.2rc1/vllm_ascend/quantization/utils.py +83 -0
  226. vllm_ascend-0.10.2rc1/vllm_ascend/quantization/w4a8_dynamic.py +394 -0
  227. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/quantization/w8a8.py +2 -122
  228. vllm_ascend-0.10.2rc1/vllm_ascend/quantization/w8a8_dynamic.py +288 -0
  229. vllm_ascend-0.10.2rc1/vllm_ascend/sample/logits_processor/__init__.py +50 -0
  230. vllm_ascend-0.10.2rc1/vllm_ascend/sample/logits_processor/builtin.py +35 -0
  231. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/sample/rejection_sampler.py +99 -48
  232. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/sample/sampler.py +11 -2
  233. vllm_ascend-0.10.0rc1/vllm_ascend/torchair/torchair_model_runner.py → vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/__init__.py +13 -9
  234. vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/eagle_proposer.py +644 -0
  235. vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/interface.py +51 -0
  236. vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/mtp_proposer.py +630 -0
  237. vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/ngram_proposer.py +65 -0
  238. vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/qwen2.py +364 -0
  239. vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/qwen3_moe.py +538 -0
  240. vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +218 -0
  241. vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/torchair_deepseek_v2.py +1056 -0
  242. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_pyhccl.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/torchair_deepseek_v3.py +11 -12
  243. vllm_ascend-0.10.0rc1/vllm_ascend/models/pangu_moe.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/torchair_pangu_moe.py +1119 -1117
  244. vllm_ascend-0.10.0rc1/vllm_ascend/distributed/communication_op.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/torchair_activation.py +37 -25
  245. vllm_ascend-0.10.0rc1/vllm_ascend/ops/fused_moe.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/torchair_fused_moe.py +1319 -1557
  246. vllm_ascend-0.10.0rc1/vllm_ascend/ops/layernorm.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/torchair_layernorm.py +51 -86
  247. vllm_ascend-0.10.0rc1/vllm_ascend/ops/rotary_embedding.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +365 -292
  248. vllm_ascend-0.10.0rc1/vllm_ascend/quantization/w4a8_dynamic.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +439 -396
  249. vllm_ascend-0.10.0rc1/vllm_ascend/quantization/w8a8_dynamic.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +1033 -1033
  250. vllm_ascend-0.10.0rc1/vllm_ascend/attention/attention_v1_torchair.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/torchair_attention.py +86 -122
  251. vllm_ascend-0.10.0rc1/vllm_ascend/attention/mla_v1.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/torchair_mla.py +224 -184
  252. vllm_ascend-0.10.2rc1/vllm_ascend/torchair/torchair_model_runner.py +475 -0
  253. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/torchair_worker.py +4 -2
  254. vllm_ascend-0.10.2rc1/vllm_ascend/torchair/utils.py +219 -0
  255. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/utils.py +160 -48
  256. vllm_ascend-0.10.2rc1/vllm_ascend/worker/__init__.py +0 -0
  257. vllm_ascend-0.10.2rc1/vllm_ascend/worker/block_table.py +313 -0
  258. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/worker/model_runner_v1.py +1530 -1277
  259. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/worker/npu_input_batch.py +163 -87
  260. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/worker/worker_v1.py +46 -30
  261. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/PKG-INFO +7 -6
  262. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/SOURCES.txt +101 -45
  263. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/requires.txt +0 -1
  264. vllm_ascend-0.10.0rc1/docs/source/user_guide/feature_guide/lora.md +0 -8
  265. vllm_ascend-0.10.0rc1/tests/e2e/long_term/accuracy/accuracy_multicard.py +0 -167
  266. vllm_ascend-0.10.0rc1/tests/e2e/long_term/accuracy/accuracy_singlecard.py +0 -115
  267. vllm_ascend-0.10.0rc1/tests/e2e/model_utils.py +0 -274
  268. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_dynamic_npugraph_batchsize.py +0 -59
  269. vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_pyhccl_distributed.py +0 -121
  270. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/compile/test_simple.py +0 -118
  271. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/models/report_template.md +0 -24
  272. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/sample/test_rejection_sampler.py +0 -608
  273. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +0 -90
  274. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_chunked.py +0 -67
  275. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_offline_inference.py +0 -166
  276. vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_sampler.py +0 -109
  277. vllm_ascend-0.10.0rc1/tests/ut/distributed/test_distributed_tensor_parallel.py +0 -139
  278. vllm_ascend-0.10.0rc1/tests/ut/models/test_qwen3_moe.py +0 -46
  279. vllm_ascend-0.10.0rc1/tests/ut/ops/test_token_dispatcher.py +0 -65
  280. vllm_ascend-0.10.0rc1/tests/ut/ops/test_vocab_parallel_embedding.py +0 -299
  281. vllm_ascend-0.10.0rc1/tests/ut/patch/worker/patch_common/test_patch_linear.py +0 -167
  282. vllm_ascend-0.10.0rc1/tests/ut/quantization/test_func_wrapper.py +0 -134
  283. vllm_ascend-0.10.0rc1/tests/ut/quantization/test_quantizer.py +0 -145
  284. vllm_ascend-0.10.0rc1/tests/ut/quantization/test_w4a8_dynamic.py +0 -109
  285. vllm_ascend-0.10.0rc1/tests/ut/torchair/test_utils.py +0 -28
  286. vllm_ascend-0.10.0rc1/tests/ut/worker/test_input_batch.py +0 -161
  287. vllm_ascend-0.10.0rc1/tests/ut/worker/test_worker_v1.py +0 -1
  288. vllm_ascend-0.10.0rc1/vllm_ascend/_version.py +0 -21
  289. vllm_ascend-0.10.0rc1/vllm_ascend/ascend_forward_context.py +0 -114
  290. vllm_ascend-0.10.0rc1/vllm_ascend/compilation/piecewise_backend.py +0 -225
  291. vllm_ascend-0.10.0rc1/vllm_ascend/distributed/parallel_state.py +0 -48
  292. vllm_ascend-0.10.0rc1/vllm_ascend/distributed/tensor_parallel.py +0 -248
  293. vllm_ascend-0.10.0rc1/vllm_ascend/ops/cache.py +0 -35
  294. vllm_ascend-0.10.0rc1/vllm_ascend/ops/common_fused_moe.py +0 -115
  295. vllm_ascend-0.10.0rc1/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py +0 -453
  296. vllm_ascend-0.10.0rc1/vllm_ascend/ops/vocab_parallel_embedding.py +0 -74
  297. vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/__init__.py +0 -25
  298. vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/patch_main/__init__.py +0 -16
  299. vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/__init__.py +0 -26
  300. vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/patch_0_10_0/patch_sampler_gather_logprobs.py +0 -87
  301. vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/patch_common/patch_linear.py +0 -145
  302. vllm_ascend-0.10.0rc1/vllm_ascend/quantization/func_wrapper.py +0 -184
  303. vllm_ascend-0.10.0rc1/vllm_ascend/quantization/quantizer.py +0 -311
  304. vllm_ascend-0.10.0rc1/vllm_ascend/torchair/utils.py +0 -98
  305. vllm_ascend-0.10.0rc1/vllm_ascend/worker/eagle_proposer_v1.py +0 -384
  306. vllm_ascend-0.10.0rc1/vllm_ascend/worker/mtp_proposer_v1.py +0 -400
  307. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/Dockerfile.buildwheel +0 -0
  308. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
  309. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
  310. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
  311. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
  312. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
  313. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
  314. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
  315. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
  316. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
  317. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  318. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  319. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/dependabot.yml +0 -0
  320. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/format_pr_body.sh +0 -0
  321. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/labeler.yml +0 -0
  322. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/label_merge_conflict.yml +0 -0
  323. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/labeler.yml +0 -0
  324. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/matchers/actionlint.json +0 -0
  325. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/matchers/mypy.json +0 -0
  326. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/matchers/ruff.json +0 -0
  327. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_doctest.yaml +0 -0
  328. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.gitignore +0 -0
  329. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.pre-commit-config.yaml +0 -0
  330. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.readthedocs.yaml +0 -0
  331. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/CMakeLists.txt +0 -0
  332. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/CODE_OF_CONDUCT.md +0 -0
  333. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/CONTRIBUTING.md +0 -0
  334. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/DCO +0 -0
  335. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/LICENSE +0 -0
  336. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/README.md +0 -0
  337. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/requirements-bench.txt +0 -0
  338. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
  339. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/scripts/perf_result_template.md +0 -0
  340. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/tests/latency-tests.json +0 -0
  341. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/tests/throughput-tests.json +0 -0
  342. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/cmake/utils.cmake +0 -0
  343. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/collect_env.py +0 -0
  344. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/camem_allocator.cpp +0 -0
  345. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
  346. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/types.h +0 -0
  347. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/utils.h +0 -0
  348. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/Makefile +0 -0
  349. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/README.md +0 -0
  350. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/requirements-docs.txt +0 -0
  351. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/requirements-test.txt +0 -0
  352. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
  353. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/assets/multi_node_dp_kimi.png +0 -0
  354. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/governance.md +0 -0
  355. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/user_stories/index.md +0 -0
  356. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/user_stories/llamafactory.md +0 -0
  357. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/contribution/index.md +0 -0
  358. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/contribution/testing.md +0 -0
  359. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/index.md +0 -0
  360. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
  361. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
  362. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
  363. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/feature_guide/patch.md +0 -0
  364. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -0
  365. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
  366. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/modeling/index.md +0 -0
  367. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/index.md +0 -0
  368. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -0
  369. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/performance_benchmark.md +0 -0
  370. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/profile_execute_duration.md +0 -0
  371. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/index.md +0 -0
  372. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
  373. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
  374. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
  375. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
  376. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
  377. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
  378. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
  379. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
  380. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
  381. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
  382. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
  383. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
  384. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
  385. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
  386. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
  387. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
  388. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
  389. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +0 -0
  390. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +0 -0
  391. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +0 -0
  392. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
  393. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
  394. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
  395. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
  396. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
  397. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
  398. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
  399. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
  400. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
  401. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
  402. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
  403. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
  404. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
  405. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
  406. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
  407. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -0
  408. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
  409. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
  410. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
  411. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
  412. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
  413. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
  414. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
  415. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
  416. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
  417. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
  418. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
  419. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
  420. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
  421. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
  422. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/quick_start.md +0 -0
  423. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu.md +0 -0
  424. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu_moge.md +0 -0
  425. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu_quantization.md +0 -0
  426. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
  427. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_node_300i.md +0 -0
  428. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu.md +0 -0
  429. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_audio.md +0 -0
  430. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_multimodal.md +0 -0
  431. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
  432. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/configuration/env_vars.md +0 -0
  433. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/configuration/index.md +0 -0
  434. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
  435. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
  436. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/index.md +0 -0
  437. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
  438. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
  439. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/support_matrix/index.md +0 -0
  440. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
  441. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
  442. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/run_server.sh +0 -0
  443. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/eplb/eplb_deepseek.py +0 -0
  444. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/eplb/eplb_strategy.py +0 -0
  445. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_data_parallel.py +0 -0
  446. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_disaggregated_prefill_npu.py +0 -0
  447. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_dualbatch_overlap_npu.py +0 -0
  448. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_embed.py +0 -0
  449. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_external_launcher.py +0 -0
  450. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_npu.py +0 -0
  451. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_npu_tp2.py +0 -0
  452. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_sleep_mode_npu.py +0 -0
  453. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/prompt_embedding_inference.py +0 -0
  454. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/run_dp_server.sh +0 -0
  455. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/format.sh +0 -0
  456. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/mypy.ini +0 -0
  457. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/packages.txt +0 -0
  458. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/requirements-lint.txt +0 -0
  459. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/setup.cfg +0 -0
  460. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/__init__.py +0 -0
  461. /vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_offline_inference_310p.py → /vllm_ascend-0.10.2rc1/tests/e2e/310p/test_offline_inference_parallel_310p.py +0 -0
  462. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/__init__.py +0 -0
  463. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/common.sh +0 -0
  464. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
  465. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
  466. {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/configs/Qwen2.5-VL-7B-Instruct.yaml +0 -0
  467. {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/configs/Qwen3-30B-A3B.yaml +0 -0
  468. {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/configs/Qwen3-8B-Base.yaml +0 -0
  469. {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/configs/accuracy.txt +0 -0
  470. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/run_edge_case_test.sh +0 -0
  471. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
  472. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
  473. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
  474. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/prompts/example.txt +0 -0
  475. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/run_disagg_pd.sh +0 -0
  476. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/run_doctests.sh +0 -0
  477. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/__init__.py +0 -0
  478. {vllm_ascend-0.10.0rc1/tests/e2e/singlecard/compile → vllm_ascend-0.10.2rc1/tests/e2e/singlecard/ops}/__init__.py +0 -0
  479. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
  480. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/utils.py +0 -0
  481. {vllm_ascend-0.10.0rc1/tests/e2e/singlecard/ops → vllm_ascend-0.10.2rc1/tests/ut}/__init__.py +0 -0
  482. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/base.py +0 -0
  483. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/conftest.py +0 -0
  484. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/device_allocator/test_camem.py +0 -0
  485. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
  486. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
  487. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/fake_weight/config.json +0 -0
  488. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -0
  489. {vllm_ascend-0.10.0rc1/tests/e2e/singlecard/sample → vllm_ascend-0.10.2rc1/tests/ut/models}/__init__.py +0 -0
  490. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_qwen2_vl.py +0 -0
  491. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_base.py +0 -0
  492. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_decorator.py +0 -0
  493. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_layers.py +0 -0
  494. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_metadata.py +0 -0
  495. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_ms_split.py +0 -0
  496. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/expert_map.json +0 -0
  497. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_expert_load_balancer.py +0 -0
  498. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
  499. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
  500. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/sample/test_sampler.py +0 -0
  501. {vllm_ascend-0.10.0rc1/tests/ut → vllm_ascend-0.10.2rc1/tests/ut/torchair}/__init__.py +0 -0
  502. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/actionlint.sh +0 -0
  503. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/check_python_src_init.py +0 -0
  504. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/check_repo.sh +0 -0
  505. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/enforce_regex_import.py +0 -0
  506. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/mypy.sh +0 -0
  507. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/png-lint.sh +0 -0
  508. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/shellcheck.sh +0 -0
  509. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/sphinx-lint.sh +0 -0
  510. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/typos.toml +0 -0
  511. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/__init__.py +0 -0
  512. {vllm_ascend-0.10.0rc1/tests/ut/models → vllm_ascend-0.10.2rc1/vllm_ascend/attention}/__init__.py +0 -0
  513. {vllm_ascend-0.10.0rc1/tests/ut/torchair → vllm_ascend-0.10.2rc1/vllm_ascend/compilation}/__init__.py +0 -0
  514. {vllm_ascend-0.10.0rc1/vllm_ascend/attention → vllm_ascend-0.10.2rc1/vllm_ascend/core}/__init__.py +0 -0
  515. {vllm_ascend-0.10.0rc1/vllm_ascend/compilation → vllm_ascend-0.10.2rc1/vllm_ascend/device_allocator}/__init__.py +0 -0
  516. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/device_allocator/camem.py +0 -0
  517. {vllm_ascend-0.10.0rc1/vllm_ascend/core → vllm_ascend-0.10.2rc1/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
  518. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
  519. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
  520. {vllm_ascend-0.10.0rc1/vllm_ascend/device_allocator → vllm_ascend-0.10.2rc1/vllm_ascend/lora}/__init__.py +0 -0
  521. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_dbo.py +0 -0
  522. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_v3.py +0 -0
  523. {vllm_ascend-0.10.0rc1/vllm_ascend/distributed/device_communicators → vllm_ascend-0.10.2rc1/vllm_ascend/models/layers}/__init__.py +0 -0
  524. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen2_vl.py +0 -0
  525. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen3.py +0 -0
  526. {vllm_ascend-0.10.0rc1/vllm_ascend/lora → vllm_ascend-0.10.2rc1/vllm_ascend/multistream}/__init__.py +0 -0
  527. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/base.py +0 -0
  528. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/context.py +0 -0
  529. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/decorator.py +0 -0
  530. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/layers.py +0 -0
  531. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/metadata.py +0 -0
  532. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/attention.py +0 -0
  533. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/expert_load_balancer.py +0 -0
  534. {vllm_ascend-0.10.0rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe}/__init__.py +0 -0
  535. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/sequence_parallel.py +0 -0
  536. {vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/patch_0_10_0 → vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_main}/__init__.py +0 -0
  537. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
  538. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
  539. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
  540. {vllm_ascend-0.10.0rc1/vllm_ascend/multistream → vllm_ascend-0.10.2rc1/vllm_ascend/quantization}/__init__.py +0 -0
  541. {vllm_ascend-0.10.0rc1/vllm_ascend/ops/moe_dispatcher → vllm_ascend-0.10.2rc1/vllm_ascend/sample}/__init__.py +0 -0
  542. {vllm_ascend-0.10.0rc1/vllm_ascend/quantization → vllm_ascend-0.10.2rc1/vllm_ascend/torchair}/__init__.py +0 -0
  543. {vllm_ascend-0.10.0rc1/vllm_ascend/sample → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models}/__init__.py +0 -0
  544. {vllm_ascend-0.10.0rc1/vllm_ascend/torchair → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops}/__init__.py +0 -0
  545. {vllm_ascend-0.10.0rc1/vllm_ascend/worker → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/quantization}/__init__.py +0 -0
  546. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
  547. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
  548. {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
@@ -0,0 +1,6 @@
1
+ # https://developers.google.com/gemini-code-assist/docs/customize-gemini-behavior-github
2
+ have_fun: false # Just review the code
3
+ code_review:
4
+ comment_severity_threshold: HIGH # Reduce quantity of comments
5
+ pull_request_opened:
6
+ summary: false # Don't summarize the PR in a separate comment
@@ -40,7 +40,7 @@ body:
40
40
  attributes:
41
41
  label: Any Other Things.
42
42
  description: >
43
- Any other things you would like to mention.
43
+ Any other things you would like to mention, such as feature branch request.
44
44
  validations:
45
45
  required: false
46
46
  - type: markdown
@@ -30,6 +30,8 @@ body:
30
30
 
31
31
  - [ ] Add release note to docs/source/user_guide/release_notes.md
32
32
 
33
+ - [ ] Update release version in README.md and README.zh.md
34
+
33
35
  - [ ] Update version info in docs/source/community/versioning_policy.md
34
36
 
35
37
  - [ ] Update contributor info in docs/source/community/contributors.md
@@ -11,3 +11,10 @@ self-hosted-runner:
11
11
  - linux-aarch64-310p-2
12
12
  - linux-aarch64-310p-4
13
13
  - ubuntu-24.04-arm
14
+ - linux-aarch64-a3-1
15
+ - linux-aarch64-a3-2
16
+ - linux-aarch64-a3-4
17
+ - linux-aarch64-a3-8
18
+ - linux-amd64-cpu-0
19
+ - linux-amd64-cpu-8
20
+ - linux-amd64-cpu-16
@@ -70,6 +70,8 @@ jobs:
70
70
  runner: linux-aarch64-a2-1
71
71
  - model_name: Qwen3-30B-A3B
72
72
  runner: linux-aarch64-a2-2
73
+ - model_name: DeepSeek-V2-Lite
74
+ runner: linux-aarch64-a2-2
73
75
  fail-fast: false
74
76
 
75
77
  name: ${{ matrix.model_name }} accuracy
@@ -110,7 +112,7 @@ jobs:
110
112
  uses: actions/checkout@v4
111
113
  with:
112
114
  repository: vllm-project/vllm
113
- ref: v0.10.0
115
+ ref: v0.10.2
114
116
  path: ./vllm-empty
115
117
 
116
118
  - name: Install vllm-project/vllm from source
@@ -200,9 +202,8 @@ jobs:
200
202
  markdown_name="${model_base_name}"
201
203
  echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
202
204
  mkdir -p ./benchmarks/accuracy
203
- pytest -sv ./tests/e2e/singlecard/models/test_lm_eval_correctness.py \
204
- --config ./tests/e2e/singlecard/models/configs/${{ matrix.model_name }}.yaml \
205
- --report_output ./benchmarks/accuracy/${model_base_name}.md
205
+ pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
206
+ --config ./tests/e2e/models/configs/${{ matrix.model_name }}.yaml
206
207
 
207
208
  - name: Generate step summary
208
209
  if: ${{ always() }}
@@ -225,14 +226,14 @@ jobs:
225
226
 
226
227
  outputs:
227
228
  model_name: ${{ steps.set_output.outputs.model_name }}
228
-
229
+ vllm_ascend_version: ${{ env.GHA_VLLM_ASCEND_VERSION }}
230
+
229
231
  create_pr:
230
232
  runs-on: ubuntu-latest
231
233
  needs: accuracy_tests
232
234
  if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
233
235
  env:
234
236
  UPSTREAM_REPO: vllm-project/vllm-ascend
235
-
236
237
  steps:
237
238
  - name: Checkout repository
238
239
  uses: actions/checkout@v4
@@ -257,10 +258,10 @@ jobs:
257
258
  TIMESTAMP=$(date +%Y%m%d%H%M%S)
258
259
  BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
259
260
  echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
260
- git checkout -B "${BRANCH_NAME}" upstream/${{ github.event.inputs.vllm-ascend-version }}
261
+ git checkout -B "${BRANCH_NAME}" upstream/main
261
262
 
262
263
  - name: Download only current run reports
263
- uses: actions/download-artifact@v4
264
+ uses: actions/download-artifact@v5
264
265
  with:
265
266
  path: ./docs/source/developer_guide/evaluation/accuracy_report
266
267
  pattern: report-*
@@ -298,11 +299,11 @@ jobs:
298
299
  GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
299
300
  run: |
300
301
  git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
301
- git commit -s -m "[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}"
302
+ git commit -s -m "[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}"
302
303
  git push -f origin "${{ env.BRANCH_NAME }}"
303
304
 
304
305
  - name: Create PR in upstream via API
305
- uses: actions/github-script@v7
306
+ uses: actions/github-script@v8
306
307
  with:
307
308
  github-token: ${{ secrets.PAT_TOKEN }}
308
309
  script: |
@@ -310,9 +311,9 @@ jobs:
310
311
  owner: 'vllm-project',
311
312
  repo: 'vllm-ascend',
312
313
  head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
313
- base: '${{ github.event.inputs.vllm-ascend-version }}',
314
- title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
315
- body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)
314
+ base: 'main',
315
+ title: `[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}`,
316
+ body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base, DeepSeek-V2-Lite)
316
317
 
317
318
  - [Workflow run][1]
318
319
 
@@ -46,7 +46,7 @@ jobs:
46
46
  echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
47
47
 
48
48
  - name: Checkout repository
49
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
49
+ uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
50
50
 
51
51
  - name: Set up Python
52
52
  uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -43,17 +44,28 @@ on:
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
45
46
 
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
46
52
  jobs:
47
53
  build:
48
54
  name: vllm-ascend image build
55
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
56
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
57
  runs-on: >-
50
58
  ${{
51
59
  github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
52
60
  'ubuntu-latest' ||
53
61
  'ubuntu-24.04-arm'
54
62
  }}
63
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
55
64
  steps:
56
65
  - uses: actions/checkout@v4
66
+ with:
67
+ fetch-depth: 0
68
+ persist-credentials: false
57
69
 
58
70
  - name: Print
59
71
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -42,14 +43,25 @@ on:
42
43
  - 'cmake/**'
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
45
52
  jobs:
46
53
 
47
54
  build:
48
55
  name: vllm-ascend image build
56
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
57
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
58
  runs-on: ubuntu-latest
50
-
59
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
51
60
  steps:
52
61
  - uses: actions/checkout@v4
62
+ with:
63
+ fetch-depth: 0
64
+ persist-credentials: false
53
65
 
54
66
  - name: Print
55
67
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -43,17 +44,28 @@ on:
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
45
46
 
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
46
52
  jobs:
47
53
  build:
48
54
  name: vllm-ascend image build
55
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
56
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
57
  runs-on: >-
50
58
  ${{
51
59
  github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
52
60
  'ubuntu-latest' ||
53
61
  'ubuntu-24.04-arm'
54
62
  }}
63
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
55
64
  steps:
56
65
  - uses: actions/checkout@v4
66
+ with:
67
+ fetch-depth: 0
68
+ persist-credentials: false
57
69
 
58
70
  - name: Print
59
71
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -42,14 +43,25 @@ on:
42
43
  - 'cmake/**'
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
45
52
  jobs:
46
53
 
47
54
  build:
48
55
  name: vllm-ascend image build
56
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
57
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
58
  runs-on: ubuntu-latest
50
-
59
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
51
60
  steps:
52
61
  - uses: actions/checkout@v4
62
+ with:
63
+ fetch-depth: 0
64
+ persist-credentials: false
53
65
 
54
66
  - name: Print
55
67
  run: |
@@ -24,6 +24,7 @@ on:
24
24
  - 'cmake/**'
25
25
  - 'CMakeLists.txt'
26
26
  - 'csrc/**'
27
+ types: [ labeled ]
27
28
  push:
28
29
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
29
30
  branches:
@@ -42,17 +43,28 @@ on:
42
43
  - 'CMakeLists.txt'
43
44
  - 'csrc/**'
44
45
 
46
+ # only cancel in-progress runs of the same workflow
47
+ concurrency:
48
+ group: ${{ github.workflow }}-${{ github.ref }}
49
+ cancel-in-progress: true
50
+
45
51
  jobs:
46
52
  build:
47
53
  name: vllm-ascend image build
54
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
55
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
48
56
  runs-on: >-
49
57
  ${{
50
58
  github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
51
59
  'ubuntu-latest' ||
52
60
  'ubuntu-24.04-arm'
53
61
  }}
62
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
54
63
  steps:
55
64
  - uses: actions/checkout@v4
65
+ with:
66
+ fetch-depth: 0
67
+ persist-credentials: false
56
68
 
57
69
  - name: Print
58
70
  run: |
@@ -25,6 +25,7 @@ on:
25
25
  - 'cmake/**'
26
26
  - 'CMakeLists.txt'
27
27
  - 'csrc/**'
28
+ types: [ labeled ]
28
29
  push:
29
30
  # Publish image when tagging, the Dockerfile in tag will be build as tag image
30
31
  branches:
@@ -42,14 +43,25 @@ on:
42
43
  - 'cmake/**'
43
44
  - 'CMakeLists.txt'
44
45
  - 'csrc/**'
46
+
47
+ # only cancel in-progress runs of the same workflow
48
+ concurrency:
49
+ group: ${{ github.workflow }}-${{ github.ref }}
50
+ cancel-in-progress: true
51
+
45
52
  jobs:
46
53
 
47
54
  build:
48
55
  name: vllm-ascend image build
56
+ # Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
57
+ # Push event or PR with both 'ready' and 'ready-for-test' labels
49
58
  runs-on: ubuntu-latest
50
-
59
+ if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
51
60
  steps:
52
61
  - uses: actions/checkout@v4
62
+ with:
63
+ fetch-depth: 0
64
+ persist-credentials: false
53
65
 
54
66
  - name: Print
55
67
  run: |
@@ -51,7 +51,7 @@ jobs:
51
51
  strategy:
52
52
  matrix:
53
53
  include:
54
- - vllm_branch: v0.10.0
54
+ - vllm_branch: v0.10.2
55
55
  vllm_ascend_branch: main
56
56
  vllm_use_v1: 1
57
57
  max-parallel: 1
@@ -35,3 +35,4 @@ jobs:
35
35
  SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
36
36
  with:
37
37
  extra_args: --all-files --hook-stage manual
38
+
@@ -43,7 +43,7 @@ jobs:
43
43
  matrix:
44
44
  python-version: ["3.11"]
45
45
  steps:
46
- - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
46
+ - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
47
47
 
48
48
  - name: Print
49
49
  run: |
@@ -52,7 +52,7 @@ jobs:
52
52
  ) }}
53
53
  runs-on: ${{ matrix.os }}
54
54
  steps:
55
- - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
55
+ - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
56
56
 
57
57
  - name: Print
58
58
  run: |
@@ -9,7 +9,7 @@ jobs:
9
9
  runs-on: ubuntu-latest
10
10
  steps:
11
11
  - name: Remind to run full CI on PR
12
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
12
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
13
13
  with:
14
14
  script: |
15
15
  github.rest.issues.createComment({
@@ -1,6 +1,5 @@
1
1
  #
2
2
  # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
- # This file is a part of the vllm-ascend project.
4
3
  #
5
4
  # Licensed under the Apache License, Version 2.0 (the "License");
6
5
  # you may not use this file except in compliance with the License.
@@ -13,13 +12,14 @@
13
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
13
  # See the License for the specific language governing permissions and
15
14
  # limitations under the License.
15
+ # This file is a part of the vllm-ascend project.
16
16
  #
17
- name: 'e2e test / long-term-test'
17
+
18
+ name: 'e2e test / a3-test'
18
19
 
19
20
  on:
20
- schedule:
21
- # Runs at 23:00 UTC (7:00 AM Beijing) every day
22
- - cron: '0 23 * * *'
21
+ workflow_call:
22
+
23
23
  pull_request:
24
24
  types: [ labeled ]
25
25
 
@@ -31,26 +31,25 @@ defaults:
31
31
  shell: bash -el {0}
32
32
 
33
33
  # only cancel in-progress runs of the same workflow
34
+ # and ignore the lint / 8 cards test type
34
35
  concurrency:
35
36
  group: ${{ github.workflow }}-${{ github.ref }}
36
37
  cancel-in-progress: true
37
38
 
38
39
  jobs:
39
- long-term-test:
40
- # long-term-test will be triggered when tag 'long-term-test' & 'ready-for-test' or schedule job
41
- if: ${{ contains(github.event.pull_request.labels.*.name, 'long-term-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
40
+ e2e:
41
+ # only trigger e2e test after lint passed and the change is e2e related with pull request.
42
+ if: ${{ contains(github.event.pull_request.labels.*.name, 'dist-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'workflow_dispatch' }}
42
43
  strategy:
43
- max-parallel: 2
44
44
  matrix:
45
- os: [linux-aarch64-a2-1, linux-aarch64-a2-2]
46
- vllm_version: [main, v0.10.0]
47
- name: vLLM Ascend long term test
45
+ os: [linux-aarch64-a3-8]
46
+ vllm_version: [v0.10.2]
47
+ name: vLLM Ascend test
48
48
  runs-on: ${{ matrix.os }}
49
49
  container:
50
- image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
50
+ image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
51
51
  env:
52
- VLLM_LOGGING_LEVEL: ERROR
53
- VLLM_USE_MODELSCOPE: True
52
+ DEBIAN_FRONTEND: noninteractive
54
53
  steps:
55
54
  - name: Check npu and CANN info
56
55
  run: |
@@ -59,11 +58,11 @@ jobs:
59
58
 
60
59
  - name: Config mirrors
61
60
  run: |
62
- sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
63
- pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
64
- pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
61
+ sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
62
+ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
65
63
  apt-get update -y
66
64
  apt install git -y
65
+ git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
67
66
 
68
67
  - name: Checkout vllm-project/vllm-ascend repo
69
68
  uses: actions/checkout@v4
@@ -86,17 +85,16 @@ jobs:
86
85
  VLLM_TARGET_DEVICE=empty pip install -e .
87
86
 
88
87
  - name: Install vllm-project/vllm-ascend
89
- env:
90
- PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
91
88
  run: |
89
+ export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
90
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
92
91
  pip install -r requirements-dev.txt
93
92
  pip install -v -e .
94
93
 
95
- - name: Run vllm-project/vllm-ascend long term test
94
+ - name: Run vllm-project/vllm-ascend test for V1 Engine
95
+ env:
96
+ VLLM_WORKER_MULTIPROC_METHOD: spawn
97
+ VLLM_USE_MODELSCOPE: True
96
98
  run: |
97
- if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
98
- pytest -sv tests/e2e/long_term/accuracy/accuracy_singlecard.py
99
- else
100
- # accuracy test multi card
101
- pytest -sv tests/e2e/long_term/accuracy/accuracy_multicard.py
102
- fi
99
+ # TODO: enable more tests
100
+ pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
@@ -25,7 +25,6 @@ on:
25
25
  branches:
26
26
  - 'main'
27
27
  - '*-dev'
28
-
29
28
  # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
30
29
  # declared as "shell: bash -el {0}" on steps that need to be properly activated.
31
30
  # It's used to activate ascend-toolkit environment variables.
@@ -44,6 +43,7 @@ jobs:
44
43
  uses: ./.github/workflows/pre-commit.yml
45
44
 
46
45
  changes:
46
+ if: github.event_name == 'pull_request'
47
47
  runs-on: ubuntu-latest
48
48
  outputs:
49
49
  e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
@@ -68,6 +68,7 @@ jobs:
68
68
  - 'packages.txt'
69
69
  ut_tracker:
70
70
  - 'tests/ut/**'
71
+
71
72
  ut:
72
73
  needs: [lint, changes]
73
74
  name: unit test
@@ -81,7 +82,7 @@ jobs:
81
82
  VLLM_USE_MODELSCOPE: True
82
83
  strategy:
83
84
  matrix:
84
- vllm_version: [main, v0.10.0]
85
+ vllm_version: [v0.10.2]
85
86
  steps:
86
87
  - name: Install packages
87
88
  run: |
@@ -117,7 +118,7 @@ jobs:
117
118
  TORCH_DEVICE_BACKEND_AUTOLOAD: 0
118
119
  run: |
119
120
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
120
- pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut
121
+ pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut --ignore=tests/ut/test_platform.py --ignore=tests/ut/ops/test_vocab_parallel_embedding.py
121
122
 
122
123
  - name: Upload coverage to Codecov
123
124
  if: ${{ matrix.vllm_version == 'main' }}
@@ -129,16 +130,16 @@ jobs:
129
130
  name: vllm-ascend
130
131
  verbose: true
131
132
 
132
- e2e:
133
+ e2e-light:
133
134
  needs: [lint, changes]
134
135
  # only trigger e2e test after lint passed and the change is e2e related with pull request.
135
- if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
136
+ if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
136
137
  strategy:
137
138
  max-parallel: 2
138
139
  matrix:
139
140
  os: [linux-aarch64-a2-1]
140
- vllm_version: [main, v0.10.0]
141
- name: singlecard e2e test
141
+ vllm_version: [v0.10.2]
142
+ name: singlecard e2e test - light
142
143
  runs-on: ${{ matrix.os }}
143
144
  container:
144
145
  image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -191,37 +192,19 @@ jobs:
191
192
  VLLM_WORKER_MULTIPROC_METHOD: spawn
192
193
  VLLM_USE_MODELSCOPE: True
193
194
  run: |
194
- pytest -sv tests/e2e/singlecard/test_offline_inference.py
195
- pytest -sv tests/e2e/singlecard/test_ilama_lora.py
196
- pytest -sv tests/e2e/singlecard/test_guided_decoding.py
197
- pytest -sv tests/e2e/singlecard/test_camem.py
198
- pytest -sv tests/e2e/singlecard/test_embedding.py
199
-
200
- # ------------------------------------ v1 spec decode test ------------------------------------ #
201
- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
202
- # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
203
- pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
195
+ pytest -sv tests/e2e/singlecard/test_aclgraph.py
196
+ pytest -sv tests/e2e/singlecard/test_quantization.py
197
+ pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
204
198
 
205
- # All other tests, ignore: 310p test, accuracy test.
206
- pytest -sv tests/e2e/singlecard/ \
207
- --ignore=tests/e2e/singlecard/test_offline_inference.py \
208
- --ignore=tests/e2e/singlecard/test_ilama_lora.py \
209
- --ignore=tests/e2e/singlecard/test_guided_decoding.py \
210
- --ignore=tests/e2e/singlecard/test_camem.py \
211
- --ignore=tests/e2e/singlecard/test_embedding.py \
212
- --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \
213
- --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py \
214
- --ignore=tests/e2e/singlecard/test_offline_inference_310p.py \
215
- --ignore=tests/e2e/singlecard/models/test_lm_eval_correctness.py
216
- e2e-2-cards:
217
- needs: [e2e]
218
- if: ${{ needs.e2e.result == 'success' }}
199
+ e2e-2-cards-light:
200
+ needs: [e2e-light]
201
+ if: ${{ needs.e2e-light.result == 'success' }}
219
202
  strategy:
220
203
  max-parallel: 2
221
204
  matrix:
222
205
  os: [linux-aarch64-a2-2]
223
- vllm_version: [main, v0.10.0]
224
- name: multicard e2e test
206
+ vllm_version: [v0.10.2]
207
+ name: multicard e2e test - light
225
208
  runs-on: ${{ matrix.os }}
226
209
  container:
227
210
  image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -274,19 +257,4 @@ jobs:
274
257
  VLLM_WORKER_MULTIPROC_METHOD: spawn
275
258
  VLLM_USE_MODELSCOPE: True
276
259
  run: |
277
- pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
278
- # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
279
- # To avoid oom, we need to run the test in a single process.
280
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
281
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
282
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
283
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
284
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_alltoallv
285
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
286
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
287
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
288
- pytest -sv tests/e2e/multicard/test_data_parallel.py
289
- pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
290
- --ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
291
- --ignore=tests/e2e/multicard/test_data_parallel.py \
292
- --ignore=tests/e2e/multicard/test_offline_inference_310p.py
260
+ pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
@@ -53,7 +53,7 @@ jobs:
53
53
  max-parallel: 2
54
54
  matrix:
55
55
  os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
56
- vllm_version: [main, v0.10.0]
56
+ vllm_version: [v0.10.2]
57
57
  name: 310p e2e test
58
58
  runs-on: ${{ matrix.os }}
59
59
  container:
@@ -111,7 +111,7 @@ jobs:
111
111
  PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
112
112
  run: |
113
113
  if [[ "${{ matrix.os }}" == "linux-aarch64-310p-1" ]]; then
114
- pytest -sv tests/e2e/singlecard/test_offline_inference_310p.py
114
+ pytest -sv tests/e2e/310p/test_offline_inference_310p.py
115
115
  else
116
- pytest -sv tests/e2e/multicard/test_offline_inference_310p.py
117
- fi
116
+ pytest -sv tests/e2e/310p/test_offline_inference_parallel_310p.py
117
+ fi