vllm-ascend 0.11.0rc1__tar.gz → 0.11.0rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/_accuracy_test.yaml +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/_e2e_test.yaml +2 -2
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/accuracy_test.yaml +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/multi_node_test.yaml +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/nightly_benchmarks.yaml +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/release_whl.yml +7 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_dist.yaml +2 -2
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_test.yaml +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_test_310p.yaml +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_test_full.yaml +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_test_models.yaml +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_test_pd.yaml +2 -2
- vllm_ascend-0.11.0rc1/Dockerfile.a3 → vllm_ascend-0.11.0rc2/Dockerfile +3 -3
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/Dockerfile.310p +2 -2
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/Dockerfile.310p.openEuler +2 -2
- vllm_ascend-0.11.0rc1/Dockerfile → vllm_ascend-0.11.0rc2/Dockerfile.a3 +2 -2
- vllm_ascend-0.11.0rc1/Dockerfile.openEuler → vllm_ascend-0.11.0rc2/Dockerfile.a3.openEuler +2 -2
- vllm_ascend-0.11.0rc1/Dockerfile.a3.openEuler → vllm_ascend-0.11.0rc2/Dockerfile.openEuler +3 -3
- {vllm_ascend-0.11.0rc1/vllm_ascend.egg-info → vllm_ascend-0.11.0rc2}/PKG-INFO +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/conf.py +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/installation.md +10 -10
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_npu_qwen3_next.md +2 -2
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +7 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +10 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +9 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/requirements-dev.txt +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/multi_node/scripts/lws.yaml +2 -2
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +3 -4
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/vllm_interface/vllm_test.cfg +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_rotary_embedding.py +9 -2
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/_version.py +3 -3
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/attention/attention_v1.py +9 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/attention/mla_v1.py +2 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/compilation/acl_graph.py +20 -21
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/mooncake/transfer_engine.py +10 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/envs.py +0 -5
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/common_fused_moe.py +8 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/moe/token_dispatcher.py +4 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/rotary_embedding.py +5 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/platform/patch_mamba_config.py +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/platform.py +14 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/quantization/quant_config.py +5 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/spec_decode/ngram_proposer.py +17 -14
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/utils.py +57 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/worker/model_runner_v1.py +5 -6
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2/vllm_ascend.egg-info}/PKG-INFO +1 -1
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend.egg-info/SOURCES.txt +0 -2
- vllm_ascend-0.11.0rc1/.github/workflows/_e2e_nightly.yaml +0 -115
- vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_nightly.yaml +0 -105
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.gemini/config.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/Dockerfile.buildwheel +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/actionlint.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/dependabot.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/format_pr_body.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/labeler.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/format_pr_body.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/image_310p_openeuler.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/image_310p_ubuntu.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/image_a3_openeuler.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/image_a3_ubuntu.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/image_openeuler.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/image_ubuntu.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/label_merge_conflict.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/labeler.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/matchers/actionlint.json +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/matchers/mypy.json +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/matchers/ruff.json +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/pre-commit.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/release_code.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/reminder_comment.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_doctest.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.gitignore +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.pre-commit-config.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.readthedocs.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/CMakeLists.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/CODE_OF_CONDUCT.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/CONTRIBUTING.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/DCO +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/LICENSE +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/README.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/README.zh.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/benchmarks/README.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/benchmarks/requirements-bench.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/benchmarks/scripts/perf_result_template.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/benchmarks/tests/latency-tests.json +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/benchmarks/tests/serving-tests.json +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/benchmarks/tests/throughput-tests.json +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/cmake/utils.cmake +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/codecov.yml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/collect_env.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/camem_allocator.cpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/kernels/bgmv_expand.cpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/kernels/bgmv_shrink.cpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/kernels/sgmv_expand.cpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/kernels/sgmv_shrink.cpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/kernels/types.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/kernels/utils.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_host/mla_preprocess.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/common.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/common_func.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/hardware.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterator.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/layout.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/mem.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/mma.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/simd.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/kernel/utils.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/ops.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/torch_binding.cpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/torch_binding_meta.cpp +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/csrc/utils.h +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/Makefile +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/README.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/requirements-docs.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/requirements-test.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/_templates/sections/header.html +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/assets/multi_node_dp_kimi.png +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/community/contributors.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/community/governance.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/community/user_stories/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/community/user_stories/llamafactory.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/community/versioning_policy.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/contribution/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/contribution/testing.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/feature_guide/ACL_Graph.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/feature_guide/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/feature_guide/patch.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/modeling/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/performance/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/performance/performance_benchmark.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/developer_guide/performance/profile_execute_duration.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/faqs.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/quick_start.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi-node_dsv3.2.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_node.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_node_kimi.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_node_pd_disaggregation_llmdatadist.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_node_qwen3vl.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_node_ray.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_npu.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_npu_moge.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_npu_quantization.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/single_node_300i.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/single_npu.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/single_npu_audio.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/single_npu_multimodal.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/single_npu_qwen3_quantization.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/configuration/additional_config.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/configuration/env_vars.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/configuration/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/lora.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/quantization.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/release_notes.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/support_matrix/index.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/disaggregated_prefill_v1/README.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/disaggregated_prefill_v1/gen_ranktable.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/disaggregated_prefill_v1/gen_ranktable.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/disaggregated_prefill_v1/run_server.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/eplb/eplb_deepseek.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/eplb/eplb_strategy.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/external_online_dp/README.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/external_online_dp/launch_online_dp.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/external_online_dp/run_dp_template.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/offline_data_parallel.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/offline_disaggregated_prefill_npu.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/offline_dualbatch_overlap_npu.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/offline_embed.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/offline_external_launcher.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/offline_inference_audio_language.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/offline_inference_npu.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/offline_inference_npu_tp2.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/offline_inference_sleep_mode_npu.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/offline_weight_load.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/prompt_embedding_inference.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/examples/run_dp_server.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/format.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/mypy.ini +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/packages.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/pyproject.toml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/requirements-lint.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/requirements.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/setup.cfg +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/setup.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/310p/test_offline_inference_parallel_310p.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/common.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/conftest.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/model_utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/Qwen3-8B-Base.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/Qwen3-8B.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/configs/accuracy.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/conftest.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/report_template.md +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/models/test_lm_eval_correctness.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_data_parallel.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_expert_parallel.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_external_launcher.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_full_graph_mode.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_ilama_lora_tp2.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_offline_inference_distributed.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_pipeline_parallel.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_prefix_caching.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_qwen3_moe.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_single_request_aclgraph.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_torchair_graph_mode.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/multicard/test_weight_loader.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/models/test_qwen3_32b.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/models/test_qwen3_32b_int8.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/multi_node/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/multi_node/config/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/multi_node/config/multi_node_config.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/multi_node/config/utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/multi_node/scripts/run.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/multi_node/test_multi_node.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/pd_disaggreate/run_edge_case_test.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/prompts/example.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/run_disagg_pd.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/run_doctests.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_bgmv_expand.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_bgmv_shrink.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_fused_moe.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_mla_preprocess.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_rotary_embedding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_aclgraph.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_aclgraph_mem.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_ascend_scheduler.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_bge_model.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_camem.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_chunked.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_embedding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_embedding_aclgraph.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_guided_decoding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_ilama_lora.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_quantization.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_sampler.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/singlecard/test_vlm.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/vllm_interface/singlecard/test_sampler.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/attention/test_attention_mask.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/attention/test_attention_v1.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/attention/test_mla_v1.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/base.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/compilation/test_acl_graph.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/conftest.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/core/test_schedule_config.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/core/test_scheduler.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/device_allocator/test_camem.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/distributed/test_communicator.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/distributed/test_parallel_state.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/adaptor/test_abstract_adaptor.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/core/policy/test_policy_abstract.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/core/policy/test_policy_factor.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/eplb/core/test_eplb_utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/fake_weight/config.json +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/kv_connector/test_mooncake_connector.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/kv_connector/utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/models/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/models/conftest.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/models/test_qwen2_5_vl.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/models/test_qwen2_5_vl_without_padding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/models/test_qwen2_vl.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/multistream/test_base.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/multistream/test_decorator.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/multistream/test_layers.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/multistream/test_metadata.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/multistream/test_ms_split.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/expert_map.json +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_activation.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_comm_utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_common_fused_moe.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_expert_load_balancer.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_fused_ops.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_layernorm.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_linear.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_moe_comm_method.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_token_dispatcher.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/ops/test_vocab_parallel_embedding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/quantization/test_quant_config.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/quantization/test_utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/quantization/test_w4a8_dynamic.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/quantization/test_w8a8.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/quantization/test_w8a8_dynamic.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/sample/logits_processor/test_builtin.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/sample/test_rejection_sampler.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/sample/test_sampler.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/test_ascend_config.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/test_envs.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/test_platform.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/test_utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/models/test_torchair_deepseek_v2.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/ops/test_torchair_fused_moe.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/test_torchair_attention.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/test_torchair_mla.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/torchair/test_utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/worker/test_input_batch.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/worker/test_model_runner_v1.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/ut/worker/test_worker_v1.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tools/actionlint.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tools/aisbench.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tools/check_python_src_init.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tools/check_repo.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tools/enforce_regex_import.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tools/mypy.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tools/png-lint.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tools/send_mm_request.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tools/shellcheck.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tools/sphinx-lint.sh +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/typos.toml +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ascend_config.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ascend_forward_context.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/attention/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/attention/attention_mask.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/attention/sfa_v1.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/attention/utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/compilation/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/core/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/core/recompute_schedule_config.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/core/recompute_scheduler.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/core/schedule_config.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/core/scheduler.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/cpu_binding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/device_allocator/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/device_allocator/camem.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/communicator.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/cpu_offload_connector.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/cpu_offload_manager/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/cpu_offload_manager/metadata.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/device_communicators/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/mooncake/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/mooncake/config_data.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/mooncake/kv_transfer.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/mooncake/mooncake_engine.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/mooncake/mooncake_store.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/mooncake_connector.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/mooncake_layerwise_connector.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/parallel_state.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/distributed/utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/adaptor/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/adaptor/abstract_adaptor.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/adaptor/vllm_adaptor.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/eplb_utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/eplb_worker.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_abstract.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_factory.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_flashlb.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/core/policy/policy_random.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/eplb_updator.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/eplb/utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/lora/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/lora/lora_ops.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/lora/punica_npu.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/lora/utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/meta_registration.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/deepseek_v3_2.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/layers/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/layers/mla.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/layers/sfa.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/qwen2_5_omni_thinker.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/qwen2_5_vl.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/qwen2_5_vl_without_padding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/qwen2_vl.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/models/qwen3_next.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/base.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/context.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/decorator.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/layers.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/metadata.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/multistream/ms_split.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/activation.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/attention.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/casual_conv1d.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/expert_load_balancer.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/fla.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/layernorm.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/linear.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/linear_op.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/moe/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/moe/comm_utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/moe/experts_selector.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/moe/moe_comm_method.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/moe/moe_mlp.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/register_custom_ops.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/sigmoid_gating.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/vocab_parallel_embedding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/ops/weight_prefetch.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/platform/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/platform/patch_config.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/platform/patch_core.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/platform/patch_distributed.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/platform/patch_message_queue.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/platform/patch_multiproc_executor.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/platform/patch_sched_yield.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/worker/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/worker/patch_attention_layer.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/worker/patch_deepseek_mtp.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/worker/patch_distributed.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/worker/patch_logits.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/worker/patch_minicpm.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/worker/patch_multimodal_merge.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/worker/patch_roberta.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/worker/patch_triton.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/patch/worker/patch_weight_loader.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/quantization/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/quantization/utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/quantization/w4a8_dynamic.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/quantization/w8a8.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/quantization/w8a8_dynamic.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/sample/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/sample/logits_processor/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/sample/logits_processor/builtin.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/sample/rejection_sampler.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/sample/sampler.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/spec_decode/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/spec_decode/eagle_proposer.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/spec_decode/interface.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/spec_decode/mtp_proposer.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/models/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/models/qwen2.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/models/qwen3_moe.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/models/torchair_deepseek_v2.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/models/torchair_deepseek_v3.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/models/torchair_pangu_moe.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/sequence_parallel.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/shared_weight_layer.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/torchair_activation.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/torchair_fused_moe.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/torchair_layernorm.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/ops/torchair_vocab_parallel_embedding.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/quantization/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/torchair_attention.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/torchair_mla.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/torchair_model_runner.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/torchair_sfa.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/torchair_worker.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/torchair/utils.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/worker/__init__.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/worker/block_table.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/worker/npu_input_batch.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend/worker/worker_v1.py +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend.egg-info/dependency_links.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend.egg-info/entry_points.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend.egg-info/requires.txt +0 -0
- {vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/vllm_ascend.egg-info/top_level.txt +0 -0
|
@@ -30,7 +30,7 @@ jobs:
|
|
|
30
30
|
runs-on: ${{ inputs.runner }}
|
|
31
31
|
name: ${{ inputs.model_name }} accuracy
|
|
32
32
|
container:
|
|
33
|
-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.
|
|
33
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
|
34
34
|
env:
|
|
35
35
|
VLLM_USE_MODELSCOPE: True
|
|
36
36
|
# 1. If version specified (work_dispatch), do specified branch accuracy test
|
|
@@ -106,8 +106,8 @@ jobs:
|
|
|
106
106
|
# ------------------------------------ v1 spec decode test ------------------------------------ #
|
|
107
107
|
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
|
|
108
108
|
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
|
|
109
|
-
# Fix me: OOM error
|
|
110
|
-
|
|
109
|
+
# Fix me: test_eagle_correctness OOM error
|
|
110
|
+
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
|
|
111
111
|
|
|
112
112
|
pytest -sv tests/e2e/singlecard/ops/
|
|
113
113
|
|
|
@@ -68,5 +68,5 @@ jobs:
|
|
|
68
68
|
with:
|
|
69
69
|
vllm: v0.11.0
|
|
70
70
|
runner: linux-aarch64-${{ matrix.runner }}
|
|
71
|
-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.
|
|
71
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
|
72
72
|
model_name: ${{ matrix.model_name }}
|
|
@@ -23,7 +23,7 @@ jobs:
|
|
|
23
23
|
# This is a runner with no NPU for k8s controller
|
|
24
24
|
runs-on: linux-aarch64-a3-0
|
|
25
25
|
container:
|
|
26
|
-
image: m.daocloud.io/quay.io/ascend/cann:8.3.
|
|
26
|
+
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
|
|
27
27
|
env:
|
|
28
28
|
KUBECONFIG: /tmp/kubeconfig
|
|
29
29
|
KUBECTL: /root/.cache/.kube/kubectl
|
|
@@ -56,7 +56,7 @@ jobs:
|
|
|
56
56
|
vllm_use_v1: 1
|
|
57
57
|
max-parallel: 1
|
|
58
58
|
container:
|
|
59
|
-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.
|
|
59
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
|
60
60
|
volumes:
|
|
61
61
|
- /usr/local/dcmi:/usr/local/dcmi
|
|
62
62
|
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
|
|
@@ -57,7 +57,13 @@ jobs:
|
|
|
57
57
|
- name: Print
|
|
58
58
|
run: |
|
|
59
59
|
lscpu
|
|
60
|
-
|
|
60
|
+
|
|
61
|
+
- name: Free up disk space
|
|
62
|
+
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
|
63
|
+
with:
|
|
64
|
+
tool-cache: true
|
|
65
|
+
docker-images: false
|
|
66
|
+
|
|
61
67
|
- name: Build wheel
|
|
62
68
|
run: |
|
|
63
69
|
ls
|
|
@@ -47,7 +47,7 @@ jobs:
|
|
|
47
47
|
name: vLLM Ascend test
|
|
48
48
|
runs-on: ${{ matrix.os }}
|
|
49
49
|
container:
|
|
50
|
-
image: m.daocloud.io/quay.io/ascend/cann:8.3.
|
|
50
|
+
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
|
|
51
51
|
env:
|
|
52
52
|
DEBIAN_FRONTEND: noninteractive
|
|
53
53
|
steps:
|
|
@@ -97,4 +97,4 @@ jobs:
|
|
|
97
97
|
VLLM_USE_MODELSCOPE: True
|
|
98
98
|
run: |
|
|
99
99
|
# TODO: enable more tests
|
|
100
|
-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
|
|
100
|
+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
|
|
@@ -145,5 +145,5 @@ jobs:
|
|
|
145
145
|
with:
|
|
146
146
|
vllm: ${{ matrix.vllm_version }}
|
|
147
147
|
runner: linux-aarch64-a2
|
|
148
|
-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.
|
|
148
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
|
149
149
|
type: light
|
{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_test_310p.yaml
RENAMED
|
@@ -58,7 +58,7 @@ jobs:
|
|
|
58
58
|
runs-on: ${{ matrix.os }}
|
|
59
59
|
container:
|
|
60
60
|
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
|
|
61
|
-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.
|
|
61
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
|
|
62
62
|
env:
|
|
63
63
|
VLLM_LOGGING_LEVEL: ERROR
|
|
64
64
|
VLLM_USE_MODELSCOPE: True
|
{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_test_full.yaml
RENAMED
|
@@ -76,5 +76,5 @@ jobs:
|
|
|
76
76
|
with:
|
|
77
77
|
vllm: ${{ matrix.vllm_version }}
|
|
78
78
|
runner: linux-aarch64-a2
|
|
79
|
-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.
|
|
79
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
|
80
80
|
type: full
|
|
@@ -41,5 +41,5 @@ jobs:
|
|
|
41
41
|
with:
|
|
42
42
|
vllm: main
|
|
43
43
|
runner: linux-aarch64-a2
|
|
44
|
-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.
|
|
44
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
|
45
45
|
type: full
|
{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/.github/workflows/vllm_ascend_test_models.yaml
RENAMED
|
@@ -79,7 +79,7 @@ jobs:
|
|
|
79
79
|
with:
|
|
80
80
|
vllm: v0.11.0
|
|
81
81
|
runner: linux-aarch64-${{ matrix.runner }}
|
|
82
|
-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.
|
|
82
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
|
83
83
|
model_name: ${{ matrix.model_name }}
|
|
84
84
|
upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
|
|
85
85
|
|
|
@@ -49,7 +49,7 @@ jobs:
|
|
|
49
49
|
runs-on: linux-arm64-npu-static-8
|
|
50
50
|
|
|
51
51
|
container:
|
|
52
|
-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.
|
|
52
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
|
53
53
|
volumes:
|
|
54
54
|
- /usr/local/dcmi:/usr/local/dcmi
|
|
55
55
|
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
|
|
@@ -109,4 +109,4 @@ jobs:
|
|
|
109
109
|
- name: Run vllm-project/vllm-ascend PD Disaggregation edge test
|
|
110
110
|
run: |
|
|
111
111
|
git config --global --add safe.directory/__w/vllm-ascend/vllm-ascend
|
|
112
|
-
bash tests/e2e/pd_disaggreate/run_edge_case_test.sh
|
|
112
|
+
bash tests/e2e/pd_disaggreate/run_edge_case_test.sh
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
FROM quay.io/ascend/cann:8.3.
|
|
18
|
+
FROM quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11
|
|
19
19
|
|
|
20
20
|
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
|
21
21
|
ARG COMPILE_CUSTOM_KERNELS=1
|
|
@@ -40,7 +40,7 @@ ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
|
|
40
40
|
ARG VLLM_TAG=v0.11.0
|
|
41
41
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
|
42
42
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
|
43
|
-
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
43
|
+
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
44
44
|
python3 -m pip uninstall -y triton && \
|
|
45
45
|
python3 -m pip cache purge
|
|
46
46
|
|
|
@@ -57,4 +57,4 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
|
57
57
|
RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
|
|
58
58
|
python3 -m pip cache purge
|
|
59
59
|
|
|
60
|
-
CMD ["/bin/bash"]
|
|
60
|
+
CMD ["/bin/bash"]
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
FROM quay.io/ascend/cann:8.3.
|
|
18
|
+
FROM quay.io/ascend/cann:8.3.rc2-310p-ubuntu22.04-py3.11
|
|
19
19
|
|
|
20
20
|
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
|
21
21
|
ARG COMPILE_CUSTOM_KERNELS=1
|
|
@@ -40,7 +40,7 @@ ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
|
|
40
40
|
ARG VLLM_TAG=v0.11.0
|
|
41
41
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
|
42
42
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
|
43
|
-
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
43
|
+
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
44
44
|
python3 -m pip uninstall -y triton && \
|
|
45
45
|
python3 -m pip cache purge
|
|
46
46
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
FROM quay.io/ascend/cann:8.3.
|
|
18
|
+
FROM quay.io/ascend/cann:8.3.rc2-310p-openeuler24.03-py3.11
|
|
19
19
|
|
|
20
20
|
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
|
21
21
|
ARG COMPILE_CUSTOM_KERNELS=1
|
|
@@ -38,7 +38,7 @@ ARG VLLM_TAG=v0.11.0
|
|
|
38
38
|
|
|
39
39
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
|
40
40
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
|
41
|
-
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
41
|
+
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
42
42
|
python3 -m pip uninstall -y triton && \
|
|
43
43
|
python3 -m pip cache purge
|
|
44
44
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
FROM quay.io/ascend/cann:8.3.
|
|
18
|
+
FROM quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
|
|
19
19
|
|
|
20
20
|
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
|
21
21
|
ARG COMPILE_CUSTOM_KERNELS=1
|
|
@@ -40,7 +40,7 @@ ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
|
|
|
40
40
|
ARG VLLM_TAG=v0.11.0
|
|
41
41
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
|
42
42
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
|
43
|
-
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
43
|
+
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
44
44
|
python3 -m pip uninstall -y triton && \
|
|
45
45
|
python3 -m pip cache purge
|
|
46
46
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
FROM quay.io/ascend/cann:8.3.
|
|
18
|
+
FROM quay.io/ascend/cann:8.3.rc2-a3-openeuler24.03-py3.11
|
|
19
19
|
|
|
20
20
|
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
|
21
21
|
ARG COMPILE_CUSTOM_KERNELS=1
|
|
@@ -38,7 +38,7 @@ ARG VLLM_TAG=v0.11.0
|
|
|
38
38
|
|
|
39
39
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
|
40
40
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
|
41
|
-
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
41
|
+
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
42
42
|
python3 -m pip uninstall -y triton && \
|
|
43
43
|
python3 -m pip cache purge
|
|
44
44
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
FROM quay.io/ascend/cann:8.3.
|
|
18
|
+
FROM quay.io/ascend/cann:8.3.rc2-910b-openeuler24.03-py3.11
|
|
19
19
|
|
|
20
20
|
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
|
21
21
|
ARG COMPILE_CUSTOM_KERNELS=1
|
|
@@ -38,7 +38,7 @@ ARG VLLM_TAG=v0.11.0
|
|
|
38
38
|
|
|
39
39
|
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
|
|
40
40
|
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
|
|
41
|
-
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
41
|
+
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
42
42
|
python3 -m pip uninstall -y triton && \
|
|
43
43
|
python3 -m pip cache purge
|
|
44
44
|
|
|
@@ -55,4 +55,4 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
|
55
55
|
RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
|
|
56
56
|
python3 -m pip cache purge
|
|
57
57
|
|
|
58
|
-
CMD ["/bin/bash"]
|
|
58
|
+
CMD ["/bin/bash"]
|
|
@@ -75,7 +75,7 @@ myst_substitutions = {
|
|
|
75
75
|
'pip_vllm_ascend_version': "0.11.0rc0",
|
|
76
76
|
'pip_vllm_version': "0.11.0",
|
|
77
77
|
# CANN image tag
|
|
78
|
-
'cann_image_tag': "8.3.
|
|
78
|
+
'cann_image_tag': "8.3.rc2-910b-ubuntu22.04-py3.11",
|
|
79
79
|
# vllm version in ci
|
|
80
80
|
'ci_vllm_version': 'v0.11.0rc3',
|
|
81
81
|
}
|
|
@@ -79,19 +79,19 @@ source vllm-ascend-env/bin/activate
|
|
|
79
79
|
pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple attrs 'numpy<2.0.0' decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions
|
|
80
80
|
|
|
81
81
|
# Download and install the CANN package.
|
|
82
|
-
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.
|
|
83
|
-
chmod +x ./Ascend-cann-toolkit_8.3.
|
|
84
|
-
./Ascend-cann-toolkit_8.3.
|
|
85
|
-
# https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.3.
|
|
82
|
+
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run
|
|
83
|
+
chmod +x ./Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run
|
|
84
|
+
./Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run --full
|
|
85
|
+
# https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.3.rc2_linux-aarch64.run
|
|
86
86
|
|
|
87
87
|
source /usr/local/Ascend/ascend-toolkit/set_env.sh
|
|
88
|
-
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.
|
|
89
|
-
chmod +x ./Ascend-cann-kernels-910b_8.3.
|
|
90
|
-
./Ascend-cann-kernels-910b_8.3.
|
|
88
|
+
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run
|
|
89
|
+
chmod +x ./Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run
|
|
90
|
+
./Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run --install
|
|
91
91
|
|
|
92
|
-
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.
|
|
93
|
-
chmod +x ./Ascend-cann-nnal_8.3.
|
|
94
|
-
./Ascend-cann-nnal_8.3.
|
|
92
|
+
wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run
|
|
93
|
+
chmod +x ./Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run
|
|
94
|
+
./Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run --install
|
|
95
95
|
|
|
96
96
|
source /usr/local/Ascend/nnal/atb/set_env.sh
|
|
97
97
|
```
|
{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/docs/source/tutorials/multi_npu_qwen3_next.md
RENAMED
|
@@ -51,7 +51,7 @@ Install the Ascend BiSheng toolkit:
|
|
|
51
51
|
wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run
|
|
52
52
|
chmod a+x Ascend-BiSheng-toolkit_aarch64.run
|
|
53
53
|
./Ascend-BiSheng-toolkit_aarch64.run --install
|
|
54
|
-
source /usr/local/Ascend/8.3.
|
|
54
|
+
source /usr/local/Ascend/8.3.RC2/bisheng_toolkit/set_env.sh
|
|
55
55
|
```
|
|
56
56
|
|
|
57
57
|
Install Triton Ascend:
|
|
@@ -75,7 +75,7 @@ Coming soon ...
|
|
|
75
75
|
Please make sure you have already executed the command:
|
|
76
76
|
|
|
77
77
|
```bash
|
|
78
|
-
source /usr/local/Ascend/8.3.
|
|
78
|
+
source /usr/local/Ascend/8.3.RC2/bisheng_toolkit/set_env.sh
|
|
79
79
|
```
|
|
80
80
|
|
|
81
81
|
:::::{tab-set}
|
|
@@ -12,6 +12,13 @@ Expert balancing for MoE models in LLM serving is essential for optimal performa
|
|
|
12
12
|
- Adaptive Scaling: Automatically adjusts to workload fluctuations while maintaining stable performance.
|
|
13
13
|
- Fault Tolerance: Redundant expert placement ensures system resilience during hardware failures.
|
|
14
14
|
|
|
15
|
+
## Support Scenarios
|
|
16
|
+
|
|
17
|
+
### Models:
|
|
18
|
+
DeepseekV3/V3.1/R1、Qwen3-MOE
|
|
19
|
+
### MOE QuantType:
|
|
20
|
+
W8A8-dynamic
|
|
21
|
+
|
|
15
22
|
## How to Use EPLB
|
|
16
23
|
|
|
17
24
|
### Dynamic EPLB
|
|
@@ -88,6 +88,7 @@ import argparse
|
|
|
88
88
|
import asyncio
|
|
89
89
|
import functools
|
|
90
90
|
import heapq
|
|
91
|
+
import ipaddress
|
|
91
92
|
import os
|
|
92
93
|
import sys
|
|
93
94
|
import threading
|
|
@@ -116,6 +117,12 @@ class ServerState:
|
|
|
116
117
|
self.host = host
|
|
117
118
|
self.port = port
|
|
118
119
|
self.url = f'http://{host}:{port}/v1'
|
|
120
|
+
try:
|
|
121
|
+
ip = ipaddress.ip_address(self.host)
|
|
122
|
+
if isinstance(ip, ipaddress.IPv6Address):
|
|
123
|
+
self.url = f'http://[{host}]:{port}/v1'
|
|
124
|
+
except Exception:
|
|
125
|
+
pass
|
|
119
126
|
self.client = httpx.AsyncClient(timeout=None,
|
|
120
127
|
base_url=self.url,
|
|
121
128
|
limits=httpx.Limits(
|
|
@@ -356,6 +363,9 @@ async def send_request_to_service(client: httpx.AsyncClient,
|
|
|
356
363
|
req_data = req_data.copy()
|
|
357
364
|
req_data["stream"] = False
|
|
358
365
|
req_data["max_tokens"] = 1
|
|
366
|
+
req_data["min_tokens"] = 1
|
|
367
|
+
if "max_completion_tokens" in req_data:
|
|
368
|
+
req_data["max_completion_tokens"] = 1
|
|
359
369
|
if "stream_options" in req_data:
|
|
360
370
|
del req_data["stream_options"]
|
|
361
371
|
headers = {
|
|
@@ -88,6 +88,7 @@ import argparse
|
|
|
88
88
|
import asyncio
|
|
89
89
|
import functools
|
|
90
90
|
import heapq
|
|
91
|
+
import ipaddress
|
|
91
92
|
import json
|
|
92
93
|
import os
|
|
93
94
|
import sys
|
|
@@ -118,6 +119,12 @@ class ServerState:
|
|
|
118
119
|
self.host = host
|
|
119
120
|
self.port = port
|
|
120
121
|
self.url = f'http://{host}:{port}/v1'
|
|
122
|
+
try:
|
|
123
|
+
ip = ipaddress.ip_address(self.host)
|
|
124
|
+
if isinstance(ip, ipaddress.IPv6Address):
|
|
125
|
+
self.url = f'http://[{host}]:{port}/v1'
|
|
126
|
+
except Exception:
|
|
127
|
+
pass
|
|
121
128
|
self.client = httpx.AsyncClient(timeout=None,
|
|
122
129
|
base_url=self.url,
|
|
123
130
|
limits=httpx.Limits(
|
|
@@ -366,6 +373,8 @@ async def send_request_to_service(client: httpx.AsyncClient,
|
|
|
366
373
|
req_data["stream"] = False
|
|
367
374
|
req_data["max_tokens"] = 1
|
|
368
375
|
req_data["min_tokens"] = 1
|
|
376
|
+
if "max_completion_tokens" in req_data:
|
|
377
|
+
req_data["max_completion_tokens"] = 1
|
|
369
378
|
if "stream_options" in req_data:
|
|
370
379
|
del req_data["stream_options"]
|
|
371
380
|
headers = {
|
{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc2}/tests/e2e/nightly/multi_node/scripts/lws.yaml
RENAMED
|
@@ -15,7 +15,7 @@ spec:
|
|
|
15
15
|
spec:
|
|
16
16
|
containers:
|
|
17
17
|
- name: vllm-leader
|
|
18
|
-
image: m.daocloud.io/quay.io/ascend/cann:8.3.
|
|
18
|
+
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
|
|
19
19
|
env:
|
|
20
20
|
- name: WORKSPACE
|
|
21
21
|
value: "/root/workspace"
|
|
@@ -70,7 +70,7 @@ spec:
|
|
|
70
70
|
spec:
|
|
71
71
|
containers:
|
|
72
72
|
- name: vllm-worker
|
|
73
|
-
image: m.daocloud.io/quay.io/ascend/cann:8.3.
|
|
73
|
+
image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
|
|
74
74
|
env:
|
|
75
75
|
- name: WORKSPACE
|
|
76
76
|
value: "/root/workspace"
|
|
@@ -13,7 +13,7 @@ from tests.e2e.conftest import VllmRunner
|
|
|
13
13
|
@pytest.fixture
|
|
14
14
|
def test_prompts():
|
|
15
15
|
prompt_types = ["repeat", "sentence"]
|
|
16
|
-
num_prompts =
|
|
16
|
+
num_prompts = 100
|
|
17
17
|
prompts = []
|
|
18
18
|
|
|
19
19
|
random.seed(0)
|
|
@@ -70,7 +70,6 @@ def test_ngram_correctness(
|
|
|
70
70
|
Compare the outputs of a original LLM and a speculative LLM
|
|
71
71
|
should be the same when using ngram speculative decoding.
|
|
72
72
|
'''
|
|
73
|
-
pytest.skip("Not current support for the test.")
|
|
74
73
|
ref_llm = LLM(model=model_name, max_model_len=1024, enforce_eager=False)
|
|
75
74
|
ref_outputs = ref_llm.chat(test_prompts, sampling_config)
|
|
76
75
|
del ref_llm
|
|
@@ -96,7 +95,7 @@ def test_ngram_correctness(
|
|
|
96
95
|
|
|
97
96
|
# Heuristic: expect at least 70% of the prompts to match exactly
|
|
98
97
|
# Upon failure, inspect the outputs to check for inaccuracy.
|
|
99
|
-
assert matches > int(0.
|
|
98
|
+
assert matches > int(0.66 * len(ref_outputs))
|
|
100
99
|
|
|
101
100
|
|
|
102
101
|
@pytest.mark.parametrize("use_eagle3", [False, True], ids=["eagle", "eagle3"])
|
|
@@ -110,7 +109,7 @@ def test_eagle_correctness(
|
|
|
110
109
|
Compare the outputs of a original LLM and a speculative LLM
|
|
111
110
|
should be the same when using eagle speculative decoding.
|
|
112
111
|
'''
|
|
113
|
-
|
|
112
|
+
pytest.skip("exist OOM error")
|
|
114
113
|
ref_llm = LLM(model=model_name, max_model_len=2048, enforce_eager=False)
|
|
115
114
|
ref_outputs = ref_llm.chat(test_prompts, sampling_config)
|
|
116
115
|
del ref_llm
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# Base docker image used to build the vllm-ascend e2e test image, which is built in the vLLM repository
|
|
2
|
-
BASE_IMAGE_NAME="quay.io/ascend/cann:8.3.
|
|
2
|
+
BASE_IMAGE_NAME="quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
|
|
@@ -7,6 +7,7 @@ from transformers.configuration_utils import PretrainedConfig
|
|
|
7
7
|
from vllm.config import ModelConfig, VllmConfig
|
|
8
8
|
from vllm.model_executor.layers.rotary_embedding import (
|
|
9
9
|
DeepseekScalingRotaryEmbedding, MRotaryEmbedding, RotaryEmbedding)
|
|
10
|
+
from vllm.platforms import CpuArchEnum
|
|
10
11
|
|
|
11
12
|
from tests.ut.base import TestBase
|
|
12
13
|
from vllm_ascend.ascend_forward_context import set_ascend_forward_context
|
|
@@ -424,11 +425,14 @@ class TestAscendMRotaryEmbedding(unittest.TestCase):
|
|
|
424
425
|
return vllm_config
|
|
425
426
|
|
|
426
427
|
@patch('torch_npu.npu_mrope')
|
|
428
|
+
@patch('vllm_ascend.platform.NPUPlatform.get_cpu_architecture')
|
|
427
429
|
@patch('vllm.config.ModelConfig.__post_init__', MagicMock())
|
|
428
430
|
@patch('vllm.config.VllmConfig.__post_init__', MagicMock())
|
|
429
431
|
@patch('vllm.distributed.parallel_state._DP', MagicMock(world_size=1))
|
|
430
432
|
@patch('vllm.distributed.parallel_state._TP', MagicMock(world_size=1))
|
|
431
|
-
def test_forward_oot_1d_positions(self, mock_npu_mrope):
|
|
433
|
+
def test_forward_oot_1d_positions(self, mock_cpu_arc, mock_npu_mrope):
|
|
434
|
+
mock_cpu_arc.return_value = CpuArchEnum.ARM
|
|
435
|
+
|
|
432
436
|
mock_npu_mrope.return_value = (torch.zeros_like(self.query),
|
|
433
437
|
torch.zeros_like(self.key))
|
|
434
438
|
|
|
@@ -443,11 +447,14 @@ class TestAscendMRotaryEmbedding(unittest.TestCase):
|
|
|
443
447
|
self.assertEqual(result_q.shape, self.query.shape)
|
|
444
448
|
|
|
445
449
|
@patch('torch_npu.npu_mrope')
|
|
450
|
+
@patch('vllm_ascend.platform.NPUPlatform.get_cpu_architecture')
|
|
446
451
|
@patch('vllm.config.ModelConfig.__post_init__', MagicMock())
|
|
447
452
|
@patch('vllm.config.VllmConfig.__post_init__', MagicMock())
|
|
448
453
|
@patch('vllm.distributed.parallel_state._DP', MagicMock(world_size=1))
|
|
449
454
|
@patch('vllm.distributed.parallel_state._TP', MagicMock(world_size=1))
|
|
450
|
-
def test_forward_oot_2d_positions(self, mock_npu_mrope):
|
|
455
|
+
def test_forward_oot_2d_positions(self, mock_cpu_arc, mock_npu_mrope):
|
|
456
|
+
mock_cpu_arc.return_value = CpuArchEnum.ARM
|
|
457
|
+
|
|
451
458
|
mock_npu_mrope.return_value = (torch.zeros_like(self.query),
|
|
452
459
|
torch.zeros_like(self.key))
|
|
453
460
|
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.11.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 11, 0, '
|
|
31
|
+
__version__ = version = '0.11.0rc2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 11, 0, 'rc2')
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'ga2e4c3fe7'
|
|
@@ -115,7 +115,7 @@ class AscendAttentionBackend(AttentionBackend):
|
|
|
115
115
|
|
|
116
116
|
@staticmethod
|
|
117
117
|
def get_supported_block_size() -> list[int]:
|
|
118
|
-
return [
|
|
118
|
+
return [128]
|
|
119
119
|
|
|
120
120
|
|
|
121
121
|
class AscendAttentionState(Enum):
|
|
@@ -191,6 +191,14 @@ class AscendAttentionMetadataBuilder:
|
|
|
191
191
|
self.max_num_blocks_per_req = cdiv(
|
|
192
192
|
self.model_config.max_model_len,
|
|
193
193
|
AscendAttentionBackend.get_supported_block_size()[0])
|
|
194
|
+
self.speculative_config = vllm_config.speculative_config
|
|
195
|
+
self.decode_threshold = 1
|
|
196
|
+
if self.speculative_config:
|
|
197
|
+
spec_token_num = self.speculative_config.num_speculative_tokens
|
|
198
|
+
self.decode_threshold += spec_token_num
|
|
199
|
+
assert self.decode_threshold <= 16, f"decode_threshold exceeded \
|
|
200
|
+
npu_fused_infer_attention_score TND layout's limit of 16, \
|
|
201
|
+
got {self.decode_threshold}"
|
|
194
202
|
|
|
195
203
|
def reorder_batch(self, input_batch,
|
|
196
204
|
scheduler_output: "SchedulerOutput") -> bool:
|
|
@@ -1166,6 +1166,8 @@ class AscendMLAImpl(MLAAttentionImpl):
|
|
|
1166
1166
|
dim=-1,
|
|
1167
1167
|
)
|
|
1168
1168
|
q_c = self.q_a_layernorm(q_c)
|
|
1169
|
+
# allgather need contiguous data
|
|
1170
|
+
kv_no_split = kv_no_split.contiguous()
|
|
1169
1171
|
else:
|
|
1170
1172
|
q_c = hidden_states
|
|
1171
1173
|
kv_no_split = self.kv_a_proj_with_mqa(hidden_states)[0]
|
|
@@ -213,26 +213,24 @@ def update_attn_params(update_stream, forward_context, runtime_shape):
|
|
|
213
213
|
) = param
|
|
214
214
|
seq_lens = forward_context.attn_metadata[key].seq_lens
|
|
215
215
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
with torch.npu.stream(update_stream):
|
|
216
|
+
# When using FULL_DECODE_ONLY, there are some rare bugs for FULL_DECODE_ONLY
|
|
217
|
+
# mode with GQA. This is triggered by getting workspace for _npu_paged_attention
|
|
218
|
+
# in torch_npu. On some rare cases, _npu_paged_attention with smaller seq_lens
|
|
219
|
+
# might encounter a bigger workspace, while currently we use max_model_len to
|
|
220
|
+
# calculate max workspace in capturing. So additional get_workspace is added
|
|
221
|
+
# here to avoid such bugs.
|
|
222
|
+
# TODO(Angazenn): we will remove this once _npu_paged_attention is fully
|
|
223
|
+
# replaced by npu_fused_infer_attention_score which does not contain such bugs.
|
|
224
|
+
workspace = torch_npu._npu_paged_attention_get_workspace(
|
|
225
|
+
query=query,
|
|
226
|
+
key_cache=key_cache,
|
|
227
|
+
value_cache=value_cache,
|
|
228
|
+
num_kv_heads=num_kv_heads,
|
|
229
|
+
num_heads=num_heads,
|
|
230
|
+
scale_value=scale,
|
|
231
|
+
block_table=block_table,
|
|
232
|
+
context_lens=seq_lens,
|
|
233
|
+
out=output)
|
|
236
234
|
torch.npu.graph_task_update_begin(update_stream, handle)
|
|
237
235
|
torch_npu._npu_paged_attention(query=query,
|
|
238
236
|
key_cache=key_cache,
|
|
@@ -280,7 +278,8 @@ def update_mla_attn_params(update_stream, forward_context, runtime_shape,
|
|
|
280
278
|
else:
|
|
281
279
|
seq_lens_list = seq_lens_list + [0] * (runtime_shape -
|
|
282
280
|
len(seq_lens_list))
|
|
283
|
-
|
|
281
|
+
|
|
282
|
+
torch.npu.graph_task_update_begin(update_stream, handle)
|
|
284
283
|
|
|
285
284
|
torch_npu.npu_fused_infer_attention_score.out(
|
|
286
285
|
q_nope,
|