vllm-ascend 0.13.0__tar.gz → 0.13.0rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/Dockerfile.buildwheel +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/Dockerfile.nightly.a2 +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/Dockerfile.nightly.a3 +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/actionlint.yaml +0 -4
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_e2e_nightly_single_node.yaml +15 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_e2e_nightly_single_node_models.yaml +2 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_e2e_test.yaml +6 -4
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_schedule_image_build.yaml +1 -5
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_unit_test.yaml +0 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/labled_doctest.yaml +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/labled_download_model.yaml +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/nightly_test_a2.yaml +15 -12
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/nightly_test_a3.yaml +4 -15
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/pr_test_full.yaml +2 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/pr_test_light.yaml +3 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_image_build_and_push.yaml +4 -33
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_release_code_and_wheel.yml +1 -7
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_test_vllm_main.yaml +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile.310p +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile.310p.openEuler +3 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile.a3 +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile.a3.openEuler +3 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/Dockerfile.openEuler +3 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/PKG-INFO +3 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_kernel.hpp +54 -71
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/block_mmad_preload_async_fixpipe_quant.hpp +1 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/_templates/sections/header.html +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/community/versioning_policy.md +0 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/conf.py +3 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3_reranker.md +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/release_notes.md +45 -155
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/pyproject.toml +2 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/requirements.txt +2 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/setup.py +1 -5
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/InternVL3_5-8B-hf.yaml +1 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py +1 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-A2.yaml +14 -19
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-EPLB.yaml +2 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml +11 -13
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8.yaml +5 -20
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-V3.yaml +1 -2
- vllm_ascend-0.13.0rc2/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-Exp-bf16.yaml +51 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-A22B-A2.yaml +2 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-A22B.yaml +1 -4
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8-EPLB.yaml +1 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8-longseq.yaml +1 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8.yaml +1 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 +1 -1
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_kimi_k2_thinking.py → vllm_ascend-0.13.0rc2/tests/e2e/nightly/single_node/models/test_deepseek_v3_2_exp_w8a8.py +105 -110
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_32b_int8.py +2 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_next.py +2 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/compile/test_norm_quant_fusion.py +14 -9
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py +1 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py +1 -4
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_aclgraph_accuracy.py +24 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_layernorm.py +0 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_token_dispatcher.py +0 -23
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/test_platform.py +4 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/test_utils.py +0 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/aisbench.py +0 -5
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/_version.py +3 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ascend_config.py +1 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ascend_forward_context.py +0 -4
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/context_parallel/attention_cp.py +8 -17
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/context_parallel/mla_cp.py +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/acl_graph.py +2 -41
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/passes/norm_quant_fusion_pass.py +10 -11
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/pool_scheduler.py +0 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/mooncake_layerwise_connector.py +2 -19
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/envs.py +0 -7
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/eplb_worker.py +0 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/fused_moe.py +1 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/rotary_embedding.py +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_kv_cache_utils.py +4 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_set_cudagraph_sizes.py +0 -2
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_model_runner.py +1 -1
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/platform.py +32 -15
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/eagle_proposer.py +4 -20
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/utils.py +55 -16
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/block_table.py +22 -44
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/model_runner_v1.py +6 -17
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/npu_input_batch.py +0 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/worker.py +5 -12
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/PKG-INFO +3 -3
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/SOURCES.txt +2 -5
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/requires.txt +2 -2
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/DeepSeek-V3.1-BF16.yaml +0 -82
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/Kimi-K2-Instruct-W8A8.yaml +0 -79
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_deepseek_r1_w8a8_hbm.py +0 -123
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_deepseek_v3_2_w8a8.py +0 -108
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.gemini/config.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/dependabot.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/labeler.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_e2e_nightly_multi_node.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_pre_commit.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/bot_merge_conflict.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/bot_pr_create.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/labled_test_310.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/matchers/actionlint.json +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/matchers/mypy.json +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/misc/model_list.json +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/pr_close_cancel_job.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_codecov_refresh.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_nightly_image_build.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_test_benchmarks.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.gitignore +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.gitmodules +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.pre-commit-config.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.readthedocs.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/CODE_OF_CONDUCT.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/CONTRIBUTING.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/DCO +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/LICENSE +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/README.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/README.zh.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/README.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/requirements-bench.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/scripts/perf_result_template.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/tests/latency-tests.json +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/tests/serving-tests.json +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/benchmarks/tests/throughput-tests.json +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/cmake/utils.cmake +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/codecov.yml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/collect_env.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/aclnn_torch_adapter/NPUBridge.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/aclnn_torch_adapter/NPUBridge.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/aclnn_torch_adapter/NPUStorageImpl.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/aclnn_torch_adapter/NPUStorageImpl.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/aclnn_torch_adapter/op_api_common.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_def.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_infershape.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_host/error_log.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_merge_n.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_multi_n.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_single_n.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_split_d.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/reduce_common.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/add_rms_norm_bias/op_kernel/rms_norm_base.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_host/batch_matmul_transpose.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_host/common.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_host/common_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/build.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/build_aclnn.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/camem_allocator.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/config.cmake +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/func.cmake +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/intf.cmake +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/intf_pub.cmake +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/modules/Findalog.cmake +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/cmake/scripts/prepare.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_def.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_proto.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/error_log.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/hcom_topo_info.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_host/tiling_args.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_v2_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_common.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_dynamic_quant.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant_base.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_dynamic_quant.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_out.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_quant.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_init_routing_fullload.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort_out.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_base.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_multi_core.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_one_core.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_and_gather.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_op.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_with_capacity.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/tiling_base.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_row.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_swiglu.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/const_args.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/copy_l0c_to_gm_custom.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/dispatch_policy_custom.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/get_tensor_addr.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/hccl_shmem.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/layout3d.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_ffn_combine/op_kernel/utils/select_helper.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_def.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_proto.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant_swiglu.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/dispatch_policy.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_binary.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_muls.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad_preload_async_with_callback_resident_a.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/dispatch_policy.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_multistage_workspace.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_swiglu_quant_multistage_workspace.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_combine.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_dispatch.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_base.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_host/dispatch_layout.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_host/dispatch_layout_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_kernel/dispatch_layout.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_kernel/dispatch_layout.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/dispatch_layout/op_kernel/dispatch_layout_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_def.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_proto.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_split_ws.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/bgmv_expand.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/bgmv_shrink.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/math_utils.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/sgmv_expand.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/sgmv_shrink.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/types.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/kernels/utils.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_host/lightning_indexer_def.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_host/lightning_indexer_proto.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_host/lightning_indexer_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_common.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_kernel.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_service_cube.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_service_vector.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_template_tiling_key.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/lightning_indexer/op_kernel/lightning_indexer_vector.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_def.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_proto.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_workspace.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aic_kernel.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aiv_kernel.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_utils.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_host/mla_preprocess.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/common.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/common_func.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/hardware.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterator.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/layout.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/mem.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/mma.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/simd.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/kernel/utils.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_nq.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_qdown.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_host/moe_combine_normal.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_kernel/moe_combine_normal.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_kernel/moe_combine_normal.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_combine_normal/op_kernel/moe_combine_normal_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_host/aclnn_moe_dispatch_normal.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_host/aclnn_moe_dispatch_normal.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_host/moe_dispatch_normal.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_host/moe_dispatch_normal_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/error_log.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/math_util.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_def.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_infershape.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_arch35.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_base.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/common.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/error_log.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_apt.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_e_k_fullload.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_generalized.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_without_group.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/data_copy_transpose_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/data_copy_transpose_tiling_def.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/error_log.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/tiling_base.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/tiling_key.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/tiling_templates_registry.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/tiling_type.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_gating_top_k/tiling_base/tiling_util.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/aclnn_moe_init_routing_custom.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/aclnn_moe_init_routing_custom.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_def.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_infershape.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling_base.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_common.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_expert_tokens_count.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_base.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_dynamic_quant.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_static_quant.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_unquantized.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_droppad_static_quant.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_dynamic_quant.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_out.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_out_droppad.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_sort_multi_core.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_static_quant.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_out.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_out_performance.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_performance.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather_droppad.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather_droppad_dynamic.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_actual_expert.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_base.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_multi_core.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_multi_core_performance.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_one_core.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/moe_init_routing_custom/op_kernel/moe_init_routing_custom.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_host/notify_dispatch.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_host/notify_dispatch_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_kernel/notify_dispatch.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_kernel/notify_dispatch.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/notify_dispatch/op_kernel/notify_dispatch_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/ops.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_def.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_proto.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_common.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_kernel_mla.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_cube_mla.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_vector_mla.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_template_tiling_key.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/torch_binding.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/torch_binding_meta.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/CMakeLists.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/aclnn_util.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/error/ops_error.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/fallback.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/fallback_comm.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/comm_args.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/data_copy.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/dropmask.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/pse.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/sync_collectives.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/kernel/util.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/log/inner/dfx_base.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/log/ops_log.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/tiling/data_copy_transpose_tiling.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/tiling/data_copy_transpose_tiling_def.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/tiling/tiling_base.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/tiling/tiling_templates_registry.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/inc/tiling/tiling_type.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils/src/fallback_comm.cpp +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/csrc/utils.h +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/Makefile +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/README.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/requirements-docs.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/requirements-test.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/blocktable.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/chunkedprefill.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/dcp-decode.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/dcp-prefill.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/head-tail-style.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/overview.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/pcp-decode.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/cp/pcp-prefill.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/deployment.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/disaggregated_prefill_pull.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/disaggregated_prefill_push.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/eplb.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/multi_node_dp_kimi.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/quantization/get_quant_method.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/quantization/quant_algorithm_overview.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/quantization/quant_method_base_class.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/quantization/quant_method_call_flow.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/quantization/quant_methods_overview.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/assets/workflow.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/community/contributors.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/community/governance.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/community/user_stories/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/community/user_stories/llamafactory.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/contribution/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/contribution/multi_node_test.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/contribution/testing.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/evaluation/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/evaluation/using_ais_bench.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/ACL_Graph.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/KV_Cache_Pool_Guide.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/add_custom_aclnn_op.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/context_parallel.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/disaggregated_prefill.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/eplb_swift_balancer.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/patch.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/feature_guide/quantization.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/msprobe_guide.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/performance_benchmark.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/faqs.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/installation.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/multi_node_test.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/ACL_Graph.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/KV_Cache_Pool_Guide.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/ModelRunner_prepare_inputs.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/add_custom_aclnn_op.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/context_parallel.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/disaggregated_prefill.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/eplb_swift_balancer.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/quantization.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/optimization_and_tuning.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/profile_execute_duration.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/310p.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-R1.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-V3.1.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-V3.2.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/GLM4.x.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Kimi-K2-Thinking.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/PaddleOCR-VL.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen-VL-Dense.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen2.5-7B.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen2.5-Omni.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-235B-A22B.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-30B-A3B.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-32B-W4A4.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-8B-W4A8.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Coder-30B-A3B.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Dense.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Next.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Omni-30B-A3B-Thinking.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-VL-235B-A22B-Instruct.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3_embedding.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3_reranker.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/long_sequence_context_parallel_multi_node.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/long_sequence_context_parallel_single_node.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_colocated_mooncake_multi_instance.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_disaggregation_mooncake_multi_node.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_disaggregation_mooncake_single_node.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/ray.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/using_volcano_kthena.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Multi_Token_Prediction.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/context_parallel.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_batch.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/eplb_swift_balancer.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/layer_sharding.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/netloader.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/speculative_decoding.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/ucm_deployment.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/quick_start.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/310p.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/DeepSeek-R1.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/DeepSeek-V3.1.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/DeepSeek-V3.2.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/GLM4.x.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Kimi-K2-Thinking.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/PaddleOCR-VL.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen-VL-Dense.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen2.5-7B.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen2.5-Omni.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-235B-A22B.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-30B-A3B.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-32B-W4A4.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-8B-W4A8.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-Coder-30B-A3B.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-Dense.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-Next.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-Omni-30B-A3B-Thinking.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3-VL-235B-A22B-Instruct.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/Qwen3_embedding.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/long_sequence_context_parallel_multi_node.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/long_sequence_context_parallel_single_node.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/pd_colocated_mooncake_multi_instance.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/pd_disaggregation_mooncake_multi_node.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/pd_disaggregation_mooncake_single_node.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/tutorials/ray.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/configuration/additional_config.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/configuration/env_vars.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/configuration/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/deployment_guide/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/deployment_guide/using_volcano_kthena.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/Fine_grained_TP.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/Multi_Token_Prediction.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/context_parallel.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/dynamic_batch.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/external_dp.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/images/layer_sharding.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/images/netloader_flowchart.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/images/netloader_timing_diagram.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/kv_pool.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/large_scale_ep.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/layer_sharding.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/lora.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/netloader.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/quantization.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/speculative_decoding.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/feature_guide/ucm_deployment.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/support_matrix/index.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/chat_templates/template_qwen2_audio.jinja +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/eplb/eplb_deepseek.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/eplb/eplb_strategy.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/external_online_dp/README.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/external_online_dp/dp_load_balance_proxy_server.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/external_online_dp/launch_online_dp.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/external_online_dp/run_dp_template.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_data_parallel.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_disaggregated_prefill_npu.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_embed.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_external_launcher.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_inference_audio_language.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_inference_npu.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_inference_npu_long_seq.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_inference_npu_tp2.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_inference_sleep_mode_npu.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/offline_weight_load.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/prompt_embed_inference.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/prompt_embedding_inference.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/quantization/llm-compressor/w8a8_int8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/quantization/llm-compressor/w8a8_int8_dynamic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/examples/run_dp_server.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/format.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/mypy.ini +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/packages.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/requirements-dev.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/requirements-lint.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/setup.cfg +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/common.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/conftest.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/model_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Llama-3.2-3B-Instruct.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Molmo-7B-D-0924.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen2.5-Omni-7B.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-30B-A3B-W8A8.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-8B-W8A8.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-8B.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-Next-80B-A3B-Instruct.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-Omni-30B-A3B-Instruct.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/accuracy.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/gemma-3-4b-it.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/internlm3-8b-instruct.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/configs/llava-onevision-qwen2-0.5b-ov-hf.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/conftest.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/report_template.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/models/test_lm_eval_correctness.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_data_parallel.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_expert_parallel.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_external_launcher.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_full_graph_mode.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_offline_inference_distributed.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_offline_weight_load.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_pipeline_parallel.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_prefix_caching.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_quantization.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_qwen3_moe.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_shared_expert_dp.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/2-cards/test_single_request_aclgraph.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/long_sequence/test_basic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/long_sequence/test_mtp.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/test_kimi_k2.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/multicard/4-cards/test_qwen3_next.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-235B-disagg-pd.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/config/Qwen3-VL-235B-disagg-pd.yaml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/multi_node_config.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/run.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/test_multi_node.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/multi_node/scripts/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8_eplb.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_glm4_5.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_mtpx_deepseek_r1_0528_w8a8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_prefix_cache_deepseek_r1_0528_w8a8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_prefix_cache_qwen3_32b_int8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen2_5_vl_32b.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen2_5_vl_7b.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_235b_a22b_w8a8_eplb.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_235b_w8a8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_30b_w8a8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_32b.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_32b_int8_a3_feature_stack3.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_8b.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwen3_next_w8a8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/models/test_qwq_32b.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/multicard_ops_a3/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_rotary_embedding.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/prompts/example.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/run_doctests.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/compile/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/compile/backend.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/model_runner_v2/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/model_runner_v2/test_basic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/pooling/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/pooling/test_classification.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/pooling/test_embedding.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/pooling/test_scoring.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/spec_decode/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_aclgraph_mem.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_async_scheduling.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_batch_invariant.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_camem.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_completion_with_prompt_embeds.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_cpu_offloading.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_guided_decoding.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_ilama_lora.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_models.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_quantization.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_sampler.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_vlm.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/test_xlite.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/singlecard/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/vllm_interface/singlecard/test_sampler.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/e2e/vllm_interface/vllm_test.cfg +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_attention_cp.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_attention_mask.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_attention_v1.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_mla_cp.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_mla_v1.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/test_sfa_v1.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/attention/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/base.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/compilation/test_acl_graph.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/compilation/test_add_rms_norm_quant.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/conftest.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/core/test_scheduler_dynamic_batch.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/device_allocator/test_camem.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/mooncake/test_config_data.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/test_communicator.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/distributed/test_parallel_state.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/adaptor/test_abstract_adaptor.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/expert_map.json +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/policy/test_policy_abstract.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/policy/test_policy_factor.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/eplb/core/test_eplb_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/fake_weight/config.json +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/kv_connector/test_mooncake_connector.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/kv_connector/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/model_loader/netloader/test_netloader.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/model_loader/netloader/test_netloader_elastic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/model_loader/netloader/test_netloader_load.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/model_loader/netloader/test_netloader_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_activation.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_comm_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_fused_moe.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_linear.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_mla.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_moe_comm_method.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_moe_mlp.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_prepare_finalize.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_rotary_embedding.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/ops/test_vocab_parallel_embedding.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_quant_config.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w4a16.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w4a8_dynamic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w8a16.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w8a8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/quantization/test_w8a8_dynamic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/sample/test_rejection_sampler.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/sample/test_sampler.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/spec_decode/test_eagle_proposer.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/spec_decode/test_mtp_proposer.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/test_ascend_config.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/test_envs.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/worker/test_block_table.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/worker/test_pcp_manager.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tests/ut/worker/test_worker_v1.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/actionlint.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/check_python_src_init.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/check_repo.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/collect_user_first_contribution.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/enforce_regex_import.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/format_contributors.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/install_flash_infer_attention_score_ops_a2.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/install_flash_infer_attention_score_ops_a3.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/mooncake_installer.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/mypy.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/png-lint.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/send_mm_request.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/send_request.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/shellcheck.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/sphinx-lint.sh +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/tools/vllm_bench.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/typos.toml +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/_cann_ops_custom/.gitkeep +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/attention_mask.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/attention_v1.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/context_parallel/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/context_parallel/common_cp.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/mla_v1.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/sfa_v1.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/attention/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/batch_invariant.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/compiler_interface.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/graph_fusion_pass_manager.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/npugraph_ex_passes/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/npugraph_ex_passes/add_rms_norm_quant.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/passes/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/compilation/passes/qknorm_rope_fusion_pass.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/core/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/core/multi_block_pool.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/core/recompute_scheduler.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/core/scheduler_dynamic_batch.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/cpu_binding.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/device_allocator/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/device_allocator/camem.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/communicator.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/cpu_offload_connector.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/cpu_offload_manager/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/cpu_offload_manager/metadata.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/device_communicators/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/ascend_store_connector.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/backend/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/backend/backend.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/backend/memcache_backend.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/backend/mooncake_backend.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/config_data.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/kv_transfer.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/kvpool/pool_worker.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/mooncake_connector.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/mooncake_transfer_engine.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/parallel_state.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/ucm_connector.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/distributed/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/adaptor/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/adaptor/abstract_adaptor.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/adaptor/vllm_adaptor.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/eplb_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_abstract.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_factory.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_flashlb.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/core/policy/policy_random.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/eplb_updator.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/eplb/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/flash_common3_context.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/kv_offload/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/kv_offload/cpu_npu.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/kv_offload/npu.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/lora/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/lora/lora_ops.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/lora/punica_npu.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/lora/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/meta_registration.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/executor/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/executor/elastic_load.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/executor/netloader_pg.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/interaction/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/interaction/elastic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/load.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/netloader.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/model_loader/netloader/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/activation.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/flashcomm2_oshard_manager.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/comm_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/experts_selector.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/moe_comm_method.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/moe_mlp.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/prepare_finalize.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/fused_moe/token_dispatcher.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/layer_shard_linear.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/layernorm.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/linear.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/linear_op.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/mla.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/mm_encoder_attention.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/register_custom_ops.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/activation/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/activation/swiglu_quant.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/batch_invariant/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/batch_invariant/matmul.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/batch_invariant/mean.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/batch_invariant/rmsnorm.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/batch_invariant/softmax.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/chunk.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/chunk_delta_h.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/chunk_o.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/chunk_scaled_dot_kkt.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/cumsum.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/l2norm.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/layernorm_guard.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/sigmoid_gating.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/solve_tril.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fla/wy_fast.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/fused_gdn_gating.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/linearnorm/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/linearnorm/split_qkv_rmsnorm_rope.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/mamba/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/mamba/causal_conv1d.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/reject_sample.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/rope.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/spec_decode/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/spec_decode/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/triton/triton_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/vocab_parallel_embedding.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/ops/weight_prefetch.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_balance_schedule.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_core.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_distributed.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_ec_connector.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_kv_cache_coordinator.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_lora_model_manager.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_mamba_config.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_message_queue.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_multiproc_executor.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_sched_yield.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/platform/patch_vllm_config.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_bert.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_deepseek.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_deepseekv3.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_distributed.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_minicpm.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_module.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_multimodal_merge.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_qwen3_next.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_qwen3_next_mtp.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_qwen3vl.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_rejection_sampler.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_rope.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/patch/worker/patch_triton.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/profiling_config.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/compressed_tensors/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/compressed_tensors/compressed_tensors.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/quant_config.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w4a16.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w4a8_dynamic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w8a16.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w8a8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w8a8_dynamic.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w8a8_pdmix.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/quantization/w8a8mxfp8.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/sample/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/sample/rejection_sampler.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/sample/sampler.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/interface.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/mtp_proposer.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/ngram_proposer.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/spec_decode/suffix_proposer.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/pcp_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/README.md +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/aclgraph_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/attn_utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/input_batch.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/model_runner.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/sample/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/sample/gumbel.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/sample/penalties.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/sample/sampler.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/states.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/worker/v2/utils.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/xlite/__init__.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/xlite/xlite.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/xlite/xlite_model_runner.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend/xlite/xlite_worker.py +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/dependency_links.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/entry_points.txt +0 -0
- {vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/vllm_ascend.egg-info/top_level.txt +0 -0
|
@@ -32,7 +32,7 @@ COPY . /workspace/vllm-ascend/
|
|
|
32
32
|
|
|
33
33
|
# Install req
|
|
34
34
|
RUN python3 -m pip install -r vllm-ascend/requirements.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
35
|
-
python3 -m pip install twine
|
|
35
|
+
python3 -m pip install twine
|
|
36
36
|
|
|
37
37
|
# Install vllm-ascend
|
|
38
38
|
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
FROM quay.io/ascend/vllm-ascend:
|
|
18
|
+
FROM quay.io/ascend/vllm-ascend:main
|
|
19
19
|
|
|
20
20
|
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
|
21
21
|
ARG AIS_BENCH_TAG="v3.0-20250930-master"
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
FROM quay.io/ascend/vllm-ascend:
|
|
18
|
+
FROM quay.io/ascend/vllm-ascend:main-a3
|
|
19
19
|
|
|
20
20
|
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
|
21
21
|
ARG AIS_BENCH_TAG="v3.0-20250930-master"
|
{vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_e2e_nightly_single_node.yaml
RENAMED
|
@@ -110,6 +110,19 @@ jobs:
|
|
|
110
110
|
fi
|
|
111
111
|
cd ..
|
|
112
112
|
|
|
113
|
+
- name: Install custom-ops (for DeepSeek-V3.2-Exp)
|
|
114
|
+
if: ${{ inputs.name == 'deepseek3_2-exp-w8a8' }}
|
|
115
|
+
shell: bash -l {0}
|
|
116
|
+
run: |
|
|
117
|
+
wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/CANN-custom_ops-sfa-linux.aarch64.run
|
|
118
|
+
chmod +x ./CANN-custom_ops-sfa-linux.aarch64.run
|
|
119
|
+
./CANN-custom_ops-sfa-linux.aarch64.run --quiet
|
|
120
|
+
export ASCEND_CUSTOM_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize:${ASCEND_CUSTOM_OPP_PATH}
|
|
121
|
+
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/vendors/customize/op_api/lib/:${LD_LIBRARY_PATH}
|
|
122
|
+
wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/a3/custom_ops-1.0-cp311-cp311-linux_aarch64.whl
|
|
123
|
+
pip install custom_ops-1.0-cp311-cp311-linux_aarch64.whl
|
|
124
|
+
. /usr/local/Ascend/ascend-toolkit/set_env.sh
|
|
125
|
+
|
|
113
126
|
- name: Run vllm-project/vllm-ascend test
|
|
114
127
|
env:
|
|
115
128
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
@@ -121,3 +134,5 @@ jobs:
|
|
|
121
134
|
# ignore test_dispatch_ffn_combine until the test is fixed
|
|
122
135
|
pytest -sv ${{ inputs.tests }} \
|
|
123
136
|
--ignore=tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py
|
|
137
|
+
|
|
138
|
+
|
{vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/_e2e_nightly_single_node_models.yaml
RENAMED
|
@@ -112,10 +112,10 @@ jobs:
|
|
|
112
112
|
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20
|
|
113
113
|
|
|
114
114
|
- name: Install tensorflow (for Molmo-7B-D-0924)
|
|
115
|
-
if: ${{ inputs.runner == 'linux-aarch64-
|
|
115
|
+
if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }}
|
|
116
116
|
shell: bash -l {0}
|
|
117
117
|
run: |
|
|
118
|
-
pip install tensorflow
|
|
118
|
+
pip install tensorflow --no-cache-dir
|
|
119
119
|
|
|
120
120
|
- name: Resolve vllm-ascend version
|
|
121
121
|
run: |
|
|
@@ -19,7 +19,7 @@ on:
|
|
|
19
19
|
jobs:
|
|
20
20
|
e2e:
|
|
21
21
|
name: singlecard
|
|
22
|
-
runs-on:
|
|
22
|
+
runs-on: ${{ inputs.runner }}-1
|
|
23
23
|
container:
|
|
24
24
|
image: ${{ inputs.image }}
|
|
25
25
|
env:
|
|
@@ -145,9 +145,11 @@ jobs:
|
|
|
145
145
|
|
|
146
146
|
- name: Config mirrors
|
|
147
147
|
run: |
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
pip config set global.
|
|
148
|
+
# Fix me: use nginx cache rather than the pypi
|
|
149
|
+
# sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
150
|
+
# pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
151
|
+
# pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
152
|
+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
|
151
153
|
apt-get update -y
|
|
152
154
|
apt install git -y
|
|
153
155
|
|
|
@@ -46,7 +46,6 @@ jobs:
|
|
|
46
46
|
with:
|
|
47
47
|
fetch-depth: 0
|
|
48
48
|
persist-credentials: false
|
|
49
|
-
ref: ${{ github.ref }}
|
|
50
49
|
|
|
51
50
|
- name: Free up disk space
|
|
52
51
|
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
|
@@ -106,8 +105,6 @@ jobs:
|
|
|
106
105
|
steps:
|
|
107
106
|
- name: Checkout
|
|
108
107
|
uses: actions/checkout@v6
|
|
109
|
-
with:
|
|
110
|
-
ref: ${{ github.ref }}
|
|
111
108
|
|
|
112
109
|
- name: Download arm64 digests
|
|
113
110
|
uses: actions/download-artifact@v7
|
|
@@ -149,9 +146,8 @@ jobs:
|
|
|
149
146
|
# which follow the rule from vLLM with prefix v
|
|
150
147
|
# TODO(yikun): the post release might be considered as latest release
|
|
151
148
|
tags: |
|
|
152
|
-
type=ref,event=branch,suffix=${{ env.SUFFIX }}
|
|
153
|
-
type=ref,event=pr,suffix=${{ env.SUFFIX }}
|
|
154
149
|
type=pep440,pattern={{raw}},suffix=${{ env.SUFFIX }}
|
|
150
|
+
type=schedule,pattern=main,suffix=${{ env.SUFFIX }}
|
|
155
151
|
flavor:
|
|
156
152
|
latest=false
|
|
157
153
|
|
|
@@ -48,7 +48,7 @@ jobs:
|
|
|
48
48
|
matrix:
|
|
49
49
|
vllm_verison: [v0.9.1-dev, v0.9.1-dev-openeuler, main, main-openeuler]
|
|
50
50
|
name: vLLM Ascend test
|
|
51
|
-
runs-on: linux-aarch64-
|
|
51
|
+
runs-on: linux-aarch64-a2-1
|
|
52
52
|
container:
|
|
53
53
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:${{ matrix.vllm_verison }}
|
|
54
54
|
steps:
|
|
@@ -19,7 +19,7 @@ jobs:
|
|
|
19
19
|
download-models:
|
|
20
20
|
if: contains(github.event.pull_request.labels.*.name, 'model-download')
|
|
21
21
|
name: Download models from ModelScope
|
|
22
|
-
runs-on: linux-aarch64-
|
|
22
|
+
runs-on: linux-aarch64-a2-0
|
|
23
23
|
container:
|
|
24
24
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-cpu
|
|
25
25
|
|
|
@@ -50,22 +50,22 @@ jobs:
|
|
|
50
50
|
matrix:
|
|
51
51
|
test_config:
|
|
52
52
|
- name: qwen3-8b
|
|
53
|
-
os: linux-aarch64-
|
|
53
|
+
os: linux-aarch64-a2-1
|
|
54
54
|
tests: tests/e2e/nightly/single_node/models/test_qwen3_8b.py
|
|
55
55
|
- name: qwen3next
|
|
56
|
-
os: linux-aarch64-
|
|
56
|
+
os: linux-aarch64-a2-4
|
|
57
57
|
ests: tests/e2e/nightly/single_node/models/test_qwen3_next.py
|
|
58
58
|
- name: qwen3-32b
|
|
59
|
-
os: linux-aarch64-
|
|
59
|
+
os: linux-aarch64-a2-4
|
|
60
60
|
tests: tests/e2e/nightly/single_node/models/test_qwen3_32b.py
|
|
61
61
|
- name: qwen3-32b-in8-a2
|
|
62
|
-
os: linux-aarch64-
|
|
62
|
+
os: linux-aarch64-a2-4
|
|
63
63
|
tests: tests/e2e/nightly/single_node/models/test_qwen3_32b_int8.py
|
|
64
64
|
- name: test_custom_op
|
|
65
|
-
os: linux-aarch64-
|
|
65
|
+
os: linux-aarch64-a2-1
|
|
66
66
|
tests: tests/e2e/nightly/single_node/ops/singlecard_ops
|
|
67
67
|
- name: test_custom_op_multi_card
|
|
68
|
-
os: linux-aarch64-
|
|
68
|
+
os: linux-aarch64-a2-4
|
|
69
69
|
tests: tests/e2e/nightly/single_node/ops/multicard_ops_a2/
|
|
70
70
|
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
|
|
71
71
|
with:
|
|
@@ -93,7 +93,7 @@ jobs:
|
|
|
93
93
|
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
|
|
94
94
|
with:
|
|
95
95
|
soc_version: a2
|
|
96
|
-
runner: linux-aarch64-
|
|
96
|
+
runner: linux-aarch64-a2-0
|
|
97
97
|
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2'
|
|
98
98
|
replicas: 1
|
|
99
99
|
size: ${{ matrix.test_config.size }}
|
|
@@ -106,32 +106,35 @@ jobs:
|
|
|
106
106
|
${{
|
|
107
107
|
github.event_name == 'schedule' ||
|
|
108
108
|
github.event_name == 'workflow_dispatch' ||
|
|
109
|
-
|
|
109
|
+
(
|
|
110
|
+
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
|
|
111
|
+
contains(github.event.pull_request.labels.*.name, 'ready-for-test')
|
|
112
|
+
)
|
|
110
113
|
}}
|
|
111
114
|
strategy:
|
|
112
115
|
fail-fast: false
|
|
113
116
|
matrix:
|
|
114
117
|
test_config:
|
|
115
|
-
- os: linux-aarch64-
|
|
118
|
+
- os: linux-aarch64-a2-1
|
|
116
119
|
model_list:
|
|
117
120
|
- Qwen3-8B
|
|
118
121
|
- Qwen2-Audio-7B-Instruct
|
|
119
122
|
- Qwen3-8B-W8A8
|
|
120
123
|
- Qwen3-VL-8B-Instruct
|
|
121
124
|
- Qwen2.5-Omni-7B
|
|
122
|
-
- os: linux-aarch64-
|
|
125
|
+
- os: linux-aarch64-a2-1
|
|
123
126
|
model_list:
|
|
124
127
|
- ERNIE-4.5-21B-A3B-PT
|
|
125
128
|
- InternVL3_5-8B-hf
|
|
126
129
|
- Molmo-7B-D-0924
|
|
127
130
|
- Llama-3.2-3B-Instruct
|
|
128
131
|
- llava-onevision-qwen2-0.5b-ov-hf
|
|
129
|
-
- os: linux-aarch64-
|
|
132
|
+
- os: linux-aarch64-a2-2
|
|
130
133
|
model_list:
|
|
131
134
|
- Qwen3-30B-A3B
|
|
132
135
|
- Qwen3-VL-30B-A3B-Instruct
|
|
133
136
|
- Qwen3-30B-A3B-W8A8
|
|
134
|
-
- os: linux-aarch64-
|
|
137
|
+
- os: linux-aarch64-a2-4
|
|
135
138
|
model_list:
|
|
136
139
|
- Qwen3-Next-80B-A3B-Instruct
|
|
137
140
|
- Qwen3-Omni-30B-A3B-Instruct
|
|
@@ -83,12 +83,6 @@ jobs:
|
|
|
83
83
|
- name: multi-node-qwen-vl-disagg-pd
|
|
84
84
|
config_file_path: Qwen3-VL-235B-disagg-pd.yaml
|
|
85
85
|
size: 2
|
|
86
|
-
- name: multi-node-kimi-k2-instruct-w8a8
|
|
87
|
-
config_file_path: Kimi-K2-Instruct-W8A8.yaml
|
|
88
|
-
size: 2
|
|
89
|
-
- name: multi-node-deepseek-v3.1
|
|
90
|
-
config_file_path: DeepSeek-V3.1-BF16.yaml
|
|
91
|
-
size: 2
|
|
92
86
|
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
|
|
93
87
|
with:
|
|
94
88
|
soc_version: a3
|
|
@@ -150,15 +144,10 @@ jobs:
|
|
|
150
144
|
- name: qwen3-next-w8a8
|
|
151
145
|
os: linux-aarch64-a3-4
|
|
152
146
|
tests: tests/e2e/nightly/single_node/models/test_qwen3_next_w8a8.py
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
os: linux-aarch64-a3-16
|
|
158
|
-
tests: tests/e2e/nightly/single_node/models/test_deepseek_r1_w8a8_hbm.py
|
|
159
|
-
- name: deepseek3_2-w8a8
|
|
160
|
-
os: linux-aarch64-a3-16
|
|
161
|
-
tests: tests/e2e/nightly/single_node/models/test_deepseek_v3_2_w8a8.py
|
|
147
|
+
# TODO: Replace deepseek3.2-exp with deepseek3.2 after nightly tests pass
|
|
148
|
+
# - name: deepseek3_2-exp-w8a8
|
|
149
|
+
# os: linux-aarch64-a3-16
|
|
150
|
+
# tests: tests/e2e/nightly/single_node/models/test_deepseek_v3_2_exp_w8a8.py
|
|
162
151
|
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
|
|
163
152
|
with:
|
|
164
153
|
vllm: v0.13.0
|
|
@@ -38,7 +38,7 @@ concurrency:
|
|
|
38
38
|
|
|
39
39
|
jobs:
|
|
40
40
|
changes:
|
|
41
|
-
runs-on: linux-aarch64-
|
|
41
|
+
runs-on: linux-aarch64-a2-0
|
|
42
42
|
if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
|
|
43
43
|
outputs:
|
|
44
44
|
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
|
|
@@ -80,6 +80,6 @@ jobs:
|
|
|
80
80
|
uses: ./.github/workflows/_e2e_test.yaml
|
|
81
81
|
with:
|
|
82
82
|
vllm: ${{ matrix.vllm_version }}
|
|
83
|
-
runner: linux-aarch64-
|
|
83
|
+
runner: linux-aarch64-a2
|
|
84
84
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
|
|
85
85
|
type: full
|
|
@@ -41,7 +41,7 @@ jobs:
|
|
|
41
41
|
with:
|
|
42
42
|
vllm: v0.13.0
|
|
43
43
|
changes:
|
|
44
|
-
runs-on: linux-aarch64-
|
|
44
|
+
runs-on: linux-aarch64-a2-0
|
|
45
45
|
outputs:
|
|
46
46
|
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
|
|
47
47
|
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
|
|
@@ -84,7 +84,7 @@ jobs:
|
|
|
84
84
|
with:
|
|
85
85
|
vllm: ${{ matrix.vllm_version }}
|
|
86
86
|
runner: linux-amd64-cpu-16-hk
|
|
87
|
-
image: quay.nju.edu.cn/ascend/cann:8.
|
|
87
|
+
image: quay.nju.edu.cn/ascend/cann:8.2.rc2-910b-ubuntu22.04-py3.11
|
|
88
88
|
type: pr
|
|
89
89
|
|
|
90
90
|
e2e-light:
|
|
@@ -99,6 +99,6 @@ jobs:
|
|
|
99
99
|
uses: ./.github/workflows/_e2e_test.yaml
|
|
100
100
|
with:
|
|
101
101
|
vllm: ${{ matrix.vllm_version }}
|
|
102
|
-
runner: linux-aarch64-
|
|
102
|
+
runner: linux-aarch64-a2
|
|
103
103
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11
|
|
104
104
|
type: light
|
{vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_image_build_and_push.yaml
RENAMED
|
@@ -11,36 +11,12 @@
|
|
|
11
11
|
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3 / vllm-ascend:v1.2.3rc1
|
|
12
12
|
name: Image Build and Push
|
|
13
13
|
on:
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
paths:
|
|
18
|
-
- '.github/workflows/schedule_image_build_and_push.yaml'
|
|
19
|
-
- 'Dockerfile*'
|
|
20
|
-
- 'vllm_ascend/**'
|
|
21
|
-
- 'setup.py'
|
|
22
|
-
- 'pyproject.toml'
|
|
23
|
-
- 'requirements.txt'
|
|
24
|
-
- 'cmake/**'
|
|
25
|
-
- 'CMakeLists.txt'
|
|
26
|
-
- 'csrc/**'
|
|
27
|
-
types: [ labeled, synchronize ]
|
|
14
|
+
schedule:
|
|
15
|
+
# UTC+8: 8am, 12pm, 16pm, 22pm
|
|
16
|
+
- cron: '0 0,4,8,14 * * *'
|
|
28
17
|
push:
|
|
29
|
-
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
|
30
|
-
branches:
|
|
31
|
-
- 'releases/*'
|
|
32
18
|
tags:
|
|
33
19
|
- 'v*'
|
|
34
|
-
paths:
|
|
35
|
-
- '.github/workflows/schedule_image_build_and_push.yaml'
|
|
36
|
-
- 'Dockerfile*'
|
|
37
|
-
- 'vllm_ascend/**'
|
|
38
|
-
- 'setup.py'
|
|
39
|
-
- 'pyproject.toml'
|
|
40
|
-
- 'requirements.txt'
|
|
41
|
-
- 'cmake/**'
|
|
42
|
-
- 'CMakeLists.txt'
|
|
43
|
-
- 'csrc/**'
|
|
44
20
|
workflow_dispatch:
|
|
45
21
|
inputs:
|
|
46
22
|
tag:
|
|
@@ -49,13 +25,8 @@ on:
|
|
|
49
25
|
default: main
|
|
50
26
|
required: true
|
|
51
27
|
|
|
52
|
-
concurrency:
|
|
53
|
-
group: ${{ github.workflow }}-${{ github.ref_name }}
|
|
54
|
-
cancel-in-progress: true
|
|
55
|
-
|
|
56
28
|
jobs:
|
|
57
29
|
image_build:
|
|
58
|
-
if: ${{ github.event_name == 'push' || contains(github.event.pull_request.labels.*.name, 'image-build') }}
|
|
59
30
|
name: Image Build and Push
|
|
60
31
|
strategy:
|
|
61
32
|
matrix:
|
|
@@ -81,7 +52,7 @@ jobs:
|
|
|
81
52
|
dockerfile: ${{ matrix.build_meta.dockerfile }}
|
|
82
53
|
suffix: ${{ matrix.build_meta.suffix }}
|
|
83
54
|
quay_username: ${{ vars.QUAY_USERNAME }}
|
|
84
|
-
should_push: ${{ github.repository_owner == 'vllm-project'
|
|
55
|
+
should_push: ${{ github.repository_owner == 'vllm-project' }}
|
|
85
56
|
workflow_dispatch_tag: ${{ inputs.tag }}
|
|
86
57
|
secrets:
|
|
87
58
|
QUAY_PASSWORD: ${{ secrets.QUAY_PASSWORD }}
|
{vllm_ascend-0.13.0 → vllm_ascend-0.13.0rc2}/.github/workflows/schedule_release_code_and_wheel.yml
RENAMED
|
@@ -122,13 +122,7 @@ jobs:
|
|
|
122
122
|
--exclude "libascend*.so" \
|
|
123
123
|
--exclude "libtorch*.so" \
|
|
124
124
|
--exclude "libopapi.so" \
|
|
125
|
-
--exclude "liberror_manager.so"
|
|
126
|
-
--exclude "libruntime.so" \
|
|
127
|
-
--exclude "libmmpa.so" \
|
|
128
|
-
--exclude "libops_base.so" \
|
|
129
|
-
--exclude "libopapi_math.so" \
|
|
130
|
-
--exclude "libunified_dlog.so" \
|
|
131
|
-
--exclude "liboptiling.so"
|
|
125
|
+
--exclude "liberror_manager.so"
|
|
132
126
|
done
|
|
133
127
|
rm -f dist/*.whl
|
|
134
128
|
mv dist/repaired/*.whl dist/
|
|
@@ -73,7 +73,7 @@ RUN apt-get update -y && \
|
|
|
73
73
|
rm -rf /var/lib/apt/lists/*
|
|
74
74
|
|
|
75
75
|
# Install modelscope (for fast download), ray (for multinode) and torch-npu post version
|
|
76
|
-
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
|
76
|
+
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
|
|
77
77
|
python3 -m pip cache purge
|
|
78
78
|
|
|
79
79
|
RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
|
|
@@ -58,7 +58,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
|
58
58
|
python3 -m pip cache purge
|
|
59
59
|
|
|
60
60
|
# Install modelscope (for fast download), ray (for multinode) and torch-npu post version
|
|
61
|
-
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
|
61
|
+
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
|
|
62
62
|
python3 -m pip cache purge
|
|
63
63
|
|
|
64
64
|
RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
|
|
@@ -25,7 +25,7 @@ ENV SOC_VERSION=$SOC_VERSION \
|
|
|
25
25
|
OMP_NUM_THREADS=1
|
|
26
26
|
|
|
27
27
|
RUN yum update -y && \
|
|
28
|
-
yum install -y
|
|
28
|
+
yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
|
|
29
29
|
rm -rf /var/cache/yum
|
|
30
30
|
|
|
31
31
|
RUN pip config set global.index-url ${PIP_INDEX_URL}
|
|
@@ -48,12 +48,13 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
|
48
48
|
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
|
|
49
49
|
source /usr/local/Ascend/nnal/atb/set_env.sh && \
|
|
50
50
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
|
|
51
|
+
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
|
|
51
52
|
export SOC_VERSION=ASCEND310P3 && \
|
|
52
53
|
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
53
54
|
python3 -m pip cache purge
|
|
54
55
|
|
|
55
56
|
# Install modelscope (for fast download), ray (for multinode) and torch-npu post version
|
|
56
|
-
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
|
57
|
+
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
|
|
57
58
|
python3 -m pip cache purge
|
|
58
59
|
|
|
59
60
|
RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
|
|
@@ -72,7 +72,7 @@ RUN apt-get update -y && \
|
|
|
72
72
|
rm -rf /var/lib/apt/lists/*
|
|
73
73
|
|
|
74
74
|
# Install modelscope (for fast download), ray (for multinode) and torch-npu post version
|
|
75
|
-
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
|
75
|
+
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
|
|
76
76
|
python3 -m pip cache purge
|
|
77
77
|
|
|
78
78
|
RUN echo "export LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
|
|
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
|
|
|
34
34
|
SHELL ["/bin/bash", "-c"]
|
|
35
35
|
|
|
36
36
|
RUN yum update -y && \
|
|
37
|
-
yum install -y
|
|
37
|
+
yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
|
|
38
38
|
git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
|
|
39
39
|
cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
|
|
40
40
|
ARCH=$(uname -m) && \
|
|
@@ -62,6 +62,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
|
62
62
|
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
|
|
63
63
|
source /usr/local/Ascend/nnal/atb/set_env.sh && \
|
|
64
64
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
|
|
65
|
+
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
|
|
65
66
|
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
66
67
|
python3 -m pip cache purge
|
|
67
68
|
|
|
@@ -71,7 +72,7 @@ RUN yum update -y && \
|
|
|
71
72
|
rm -rf /var/cache/yum/*
|
|
72
73
|
|
|
73
74
|
# Install modelscope (for fast download), ray (for multinode) and torch-npu post version
|
|
74
|
-
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
|
75
|
+
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
|
|
75
76
|
python3 -m pip cache purge
|
|
76
77
|
|
|
77
78
|
RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
|
|
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
|
|
|
34
34
|
SHELL ["/bin/bash", "-c"]
|
|
35
35
|
|
|
36
36
|
RUN yum update -y && \
|
|
37
|
-
yum install -y
|
|
37
|
+
yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
|
|
38
38
|
git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
|
|
39
39
|
cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
|
|
40
40
|
ARCH=$(uname -m) && \
|
|
@@ -62,6 +62,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
|
62
62
|
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
|
|
63
63
|
source /usr/local/Ascend/nnal/atb/set_env.sh && \
|
|
64
64
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
|
|
65
|
+
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
|
|
65
66
|
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
66
67
|
python3 -m pip cache purge
|
|
67
68
|
|
|
@@ -71,7 +72,7 @@ RUN yum update -y && \
|
|
|
71
72
|
rm -rf /var/cache/yum/*
|
|
72
73
|
|
|
73
74
|
# Install modelscope (for fast download), ray (for multinode) and torch-npu post version
|
|
74
|
-
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
|
|
75
|
+
RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' 'torch-npu==2.8.0.post1' -i https://mirrors.huaweicloud.com/ascend/repos/pypi && \
|
|
75
76
|
python3 -m pip cache purge
|
|
76
77
|
|
|
77
78
|
RUN echo "export LD_PRELOAD=/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" >> ~/.bashrc
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vllm_ascend
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.0rc2
|
|
4
4
|
Summary: vLLM Ascend backend plugin
|
|
5
5
|
Home-page: https://github.com/vllm-project/vllm-ascend
|
|
6
6
|
Author: vLLM-Ascend team
|
|
@@ -38,9 +38,9 @@ Requires-Dist: compressed_tensors>=0.11.0
|
|
|
38
38
|
Requires-Dist: msgpack
|
|
39
39
|
Requires-Dist: quart
|
|
40
40
|
Requires-Dist: numba
|
|
41
|
-
Requires-Dist: torch-npu==2.8.0
|
|
41
|
+
Requires-Dist: torch-npu==2.8.0
|
|
42
42
|
Requires-Dist: arctic-inference==0.1.1
|
|
43
|
-
Requires-Dist: transformers
|
|
43
|
+
Requires-Dist: transformers>=4.57.3
|
|
44
44
|
Requires-Dist: fastapi<0.124.0
|
|
45
45
|
Requires-Dist: triton-ascend==3.2.0
|
|
46
46
|
Dynamic: author
|