vllm-ascend 0.9.0rc2__tar.gz → 0.11.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vllm_ascend-0.11.0rc1/.gemini/config.yaml +6 -0
- vllm_ascend-0.11.0rc1/.github/Dockerfile.buildwheel +45 -0
- vllm_ascend-0.11.0rc1/.github/ISSUE_TEMPLATE/110-user-story.yml +37 -0
- vllm_ascend-0.11.0rc1/.github/ISSUE_TEMPLATE/750-RFC.yml +49 -0
- vllm_ascend-0.11.0rc1/.github/ISSUE_TEMPLATE/900-release-checklist.yml +104 -0
- vllm_ascend-0.11.0rc1/.github/PULL_REQUEST_TEMPLATE.md +27 -0
- vllm_ascend-0.11.0rc1/.github/actionlint.yaml +21 -0
- vllm_ascend-0.11.0rc1/.github/format_pr_body.sh +59 -0
- vllm_ascend-0.11.0rc1/.github/workflows/_accuracy_test.yaml +175 -0
- vllm_ascend-0.11.0rc1/.github/workflows/_e2e_nightly.yaml +115 -0
- vllm_ascend-0.11.0rc1/.github/workflows/_e2e_test.yaml +199 -0
- vllm_ascend-0.11.0rc1/.github/workflows/accuracy_test.yaml +72 -0
- vllm_ascend-0.11.0rc1/.github/workflows/format_pr_body.yaml +57 -0
- vllm_ascend-0.11.0rc1/.github/workflows/image_310p_openeuler.yml +135 -0
- vllm_ascend-0.11.0rc1/.github/workflows/image_310p_ubuntu.yml +131 -0
- vllm_ascend-0.11.0rc1/.github/workflows/image_a3_openeuler.yml +135 -0
- vllm_ascend-0.11.0rc1/.github/workflows/image_a3_ubuntu.yml +131 -0
- vllm_ascend-0.11.0rc1/.github/workflows/image_openeuler.yml +134 -0
- vllm_ascend-0.11.0rc1/.github/workflows/image_ubuntu.yml +131 -0
- vllm_ascend-0.11.0rc1/.github/workflows/label_merge_conflict.yml +20 -0
- vllm_ascend-0.11.0rc1/.github/workflows/labeler.yml +18 -0
- vllm_ascend-0.11.0rc1/.github/workflows/multi_node_test.yaml +118 -0
- vllm_ascend-0.11.0rc1/.github/workflows/nightly_benchmarks.yaml +206 -0
- vllm_ascend-0.11.0rc1/.github/workflows/pre-commit.yml +43 -0
- vllm_ascend-0.11.0rc1/.github/workflows/release_code.yml +75 -0
- vllm_ascend-0.11.0rc1/.github/workflows/release_whl.yml +119 -0
- vllm_ascend-0.11.0rc1/.github/workflows/reminder_comment.yml +26 -0
- vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_dist.yaml +100 -0
- vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_doctest.yaml +87 -0
- vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test.yaml +149 -0
- vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_310p.yaml +117 -0
- vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_full.yaml +80 -0
- vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +45 -0
- vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_models.yaml +177 -0
- vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_nightly.yaml +105 -0
- vllm_ascend-0.11.0rc1/.github/workflows/vllm_ascend_test_pd.yaml +112 -0
- vllm_ascend-0.11.0rc1/.gitignore +204 -0
- vllm_ascend-0.11.0rc1/.pre-commit-config.yaml +151 -0
- vllm_ascend-0.11.0rc1/CMakeLists.txt +111 -0
- vllm_ascend-0.11.0rc1/CODE_OF_CONDUCT.md +127 -0
- vllm_ascend-0.11.0rc1/CONTRIBUTING.md +3 -0
- vllm_ascend-0.11.0rc1/Dockerfile +60 -0
- vllm_ascend-0.11.0rc1/Dockerfile.310p +61 -0
- vllm_ascend-0.11.0rc1/Dockerfile.310p.openEuler +59 -0
- vllm_ascend-0.11.0rc1/Dockerfile.a3 +60 -0
- vllm_ascend-0.11.0rc1/Dockerfile.a3.openEuler +58 -0
- vllm_ascend-0.11.0rc1/Dockerfile.openEuler +58 -0
- vllm_ascend-0.11.0rc1/PKG-INFO +142 -0
- vllm_ascend-0.11.0rc1/README.md +91 -0
- vllm_ascend-0.11.0rc1/README.zh.md +90 -0
- vllm_ascend-0.11.0rc1/benchmarks/README.md +175 -0
- vllm_ascend-0.11.0rc1/benchmarks/ops/ben_vocabparallelembedding.py +158 -0
- vllm_ascend-0.11.0rc1/benchmarks/requirements-bench.txt +4 -0
- vllm_ascend-0.11.0rc1/benchmarks/scripts/convert_json_to_markdown.py +188 -0
- vllm_ascend-0.11.0rc1/benchmarks/scripts/perf_result_template.md +31 -0
- vllm_ascend-0.11.0rc1/benchmarks/scripts/run-performance-benchmarks.sh +323 -0
- vllm_ascend-0.11.0rc1/benchmarks/tests/latency-tests.json +23 -0
- vllm_ascend-0.11.0rc1/benchmarks/tests/serving-tests.json +78 -0
- vllm_ascend-0.11.0rc1/benchmarks/tests/throughput-tests.json +38 -0
- vllm_ascend-0.11.0rc1/codecov.yml +28 -0
- vllm_ascend-0.11.0rc1/csrc/camem_allocator.cpp +347 -0
- vllm_ascend-0.11.0rc1/csrc/kernels/bgmv_expand.cpp +369 -0
- vllm_ascend-0.11.0rc1/csrc/kernels/bgmv_shrink.cpp +252 -0
- vllm_ascend-0.11.0rc1/csrc/kernels/get_masked_input_and_mask_kernel.cpp +378 -0
- vllm_ascend-0.11.0rc1/csrc/kernels/pos_encoding_kernels.cpp +372 -0
- vllm_ascend-0.11.0rc1/csrc/kernels/sgmv_expand.cpp +389 -0
- vllm_ascend-0.11.0rc1/csrc/kernels/sgmv_shrink.cpp +275 -0
- vllm_ascend-0.11.0rc1/csrc/kernels/utils.h +51 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_host/mla_preprocess.h +698 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +95 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/common.h +25 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/common_func.h +121 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/hardware.h +36 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterator.h +92 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +162 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +89 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +228 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +42 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +71 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +39 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +36 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +310 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +44 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +395 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/layout.h +18 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/mem.h +82 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/mma.h +67 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +38 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/simd.h +274 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/kernel/utils.h +69 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/mla_preprocess.h +114 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +295 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +2914 -0
- vllm_ascend-0.11.0rc1/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +2503 -0
- vllm_ascend-0.11.0rc1/csrc/ops.h +161 -0
- vllm_ascend-0.11.0rc1/csrc/torch_binding.cpp +514 -0
- vllm_ascend-0.11.0rc1/csrc/torch_binding_meta.cpp +136 -0
- vllm_ascend-0.11.0rc1/csrc/utils.h +31 -0
- vllm_ascend-0.11.0rc1/docs/Makefile +25 -0
- vllm_ascend-0.11.0rc1/docs/README.md +24 -0
- vllm_ascend-0.11.0rc1/docs/requirements-docs.txt +10 -0
- vllm_ascend-0.11.0rc1/docs/requirements-test.txt +2 -0
- vllm_ascend-0.11.0rc1/docs/source/_templates/sections/header.html +58 -0
- vllm_ascend-0.11.0rc1/docs/source/assets/multi_node_dp_deepseek.png +0 -0
- vllm_ascend-0.11.0rc1/docs/source/assets/multi_node_dp_kimi.png +0 -0
- vllm_ascend-0.11.0rc1/docs/source/community/contributors.md +171 -0
- vllm_ascend-0.11.0rc1/docs/source/community/governance.md +48 -0
- vllm_ascend-0.11.0rc1/docs/source/community/user_stories/index.md +19 -0
- vllm_ascend-0.11.0rc1/docs/source/community/user_stories/llamafactory.md +19 -0
- vllm_ascend-0.11.0rc1/docs/source/community/versioning_policy.md +135 -0
- vllm_ascend-0.11.0rc1/docs/source/conf.py +142 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/contribution/index.md +111 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/contribution/testing.md +285 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +20 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +19 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +21 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +21 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/accuracy_report/index.md +10 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/index.md +10 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/using_evalscope.md +175 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/using_lm_eval.md +300 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/evaluation/using_opencompass.md +123 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/feature_guide/ACL_Graph.md +102 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +260 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/feature_guide/index.md +11 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/feature_guide/patch.md +75 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/modeling/adding_a_new_model.md +258 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +3 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/modeling/index.md +10 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/performance/index.md +9 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/performance/optimization_and_tuning.md +183 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/performance/performance_benchmark.md +194 -0
- vllm_ascend-0.11.0rc1/docs/source/developer_guide/performance/profile_execute_duration.md +40 -0
- vllm_ascend-0.11.0rc1/docs/source/faqs.md +216 -0
- vllm_ascend-0.11.0rc1/docs/source/index.md +71 -0
- vllm_ascend-0.11.0rc1/docs/source/installation.md +287 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +1647 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +204 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +103 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +87 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +624 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +187 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +237 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +26 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +26 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +112 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +65 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +83 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +33 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +248 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +333 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +29 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +32 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +26 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +88 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +81 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +479 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/index.po +79 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +293 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +149 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +29 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +192 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +62 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +86 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +82 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +71 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +110 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +107 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +77 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +99 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +70 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +286 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +28 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +30 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +121 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +30 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +58 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +183 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +156 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +220 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +1660 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +30 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +264 -0
- vllm_ascend-0.11.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +214 -0
- vllm_ascend-0.11.0rc1/docs/source/quick_start.md +185 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/index.md +24 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi-node_dsv3.2.md +405 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node.md +212 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node_kimi.md +158 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node_pd_disaggregation_llmdatadist.md +244 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md +616 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node_qwen3vl.md +165 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_node_ray.md +182 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_npu.md +107 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_npu_moge.md +242 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_npu_quantization.md +137 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_npu_qwen3_moe.md +109 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/multi_npu_qwen3_next.md +156 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/single_node_300i.md +408 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/single_npu.md +202 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/single_npu_audio.md +122 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/single_npu_multimodal.md +192 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/single_npu_qwen3_embedding.md +99 -0
- vllm_ascend-0.11.0rc1/docs/source/tutorials/single_npu_qwen3_quantization.md +133 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/configuration/additional_config.md +116 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/configuration/env_vars.md +9 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/configuration/index.md +10 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +91 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/graph_mode.md +78 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/index.md +14 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/lora.md +23 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/quantization.md +125 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/sleep_mode.md +114 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/feature_guide/structured_output.md +163 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/release_notes.md +689 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/support_matrix/index.md +10 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/support_matrix/supported_features.md +45 -0
- vllm_ascend-0.11.0rc1/docs/source/user_guide/support_matrix/supported_models.md +83 -0
- vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/README.md +242 -0
- vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/gen_ranktable.py +141 -0
- vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/gen_ranktable.sh +88 -0
- vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +576 -0
- vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +657 -0
- vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +165 -0
- vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md +278 -0
- vllm_ascend-0.11.0rc1/examples/disaggregated_prefill_v1/run_server.sh +32 -0
- vllm_ascend-0.11.0rc1/examples/eplb/eplb_deepseek.py +205 -0
- vllm_ascend-0.11.0rc1/examples/eplb/eplb_strategy.py +186 -0
- vllm_ascend-0.11.0rc1/examples/external_online_dp/README.md +38 -0
- vllm_ascend-0.11.0rc1/examples/external_online_dp/launch_online_dp.py +97 -0
- vllm_ascend-0.11.0rc1/examples/external_online_dp/run_dp_template.sh +46 -0
- vllm_ascend-0.11.0rc1/examples/offline_data_parallel.py +257 -0
- vllm_ascend-0.11.0rc1/examples/offline_disaggregated_prefill_npu.py +147 -0
- vllm_ascend-0.11.0rc1/examples/offline_dualbatch_overlap_npu.py +52 -0
- vllm_ascend-0.11.0rc1/examples/offline_embed.py +58 -0
- vllm_ascend-0.11.0rc1/examples/offline_external_launcher.py +330 -0
- vllm_ascend-0.11.0rc1/examples/offline_inference_audio_language.py +105 -0
- vllm_ascend-0.11.0rc1/examples/offline_inference_npu.py +51 -0
- vllm_ascend-0.11.0rc1/examples/offline_inference_npu_tp2.py +55 -0
- vllm_ascend-0.11.0rc1/examples/offline_inference_sleep_mode_npu.py +57 -0
- vllm_ascend-0.11.0rc1/examples/offline_weight_load.py +326 -0
- vllm_ascend-0.11.0rc1/examples/prompt_embedding_inference.py +88 -0
- vllm_ascend-0.11.0rc1/examples/run_dp_server.sh +32 -0
- vllm_ascend-0.11.0rc1/format.sh +44 -0
- vllm_ascend-0.11.0rc1/mypy.ini +16 -0
- vllm_ascend-0.11.0rc1/pyproject.toml +35 -0
- vllm_ascend-0.11.0rc1/requirements-dev.txt +20 -0
- vllm_ascend-0.11.0rc1/requirements-lint.txt +9 -0
- vllm_ascend-0.11.0rc1/requirements.txt +28 -0
- vllm_ascend-0.11.0rc1/setup.py +399 -0
- vllm_ascend-0.11.0rc1/tests/e2e/310p/test_offline_inference_310p.py +72 -0
- vllm_ascend-0.11.0rc1/tests/e2e/310p/test_offline_inference_parallel_310p.py +62 -0
- vllm_ascend-0.11.0rc1/tests/e2e/common.sh +74 -0
- vllm_ascend-0.11.0rc1/tests/e2e/conftest.py +589 -0
- vllm_ascend-0.11.0rc1/tests/e2e/doctests/001-quickstart-test.sh +64 -0
- vllm_ascend-0.11.0rc1/tests/e2e/doctests/002-pip-binary-installation-test.sh +74 -0
- vllm_ascend-0.11.0rc1/tests/e2e/model_utils.py +74 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +17 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +11 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml +10 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +10 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +20 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen3-8B-Base.yaml +15 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen3-8B.yaml +11 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +12 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/configs/accuracy.txt +8 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/conftest.py +72 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/report_template.md +34 -0
- vllm_ascend-0.11.0rc1/tests/e2e/models/test_lm_eval_correctness.py +157 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_data_parallel.py +73 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_expert_parallel.py +42 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_external_launcher.py +243 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_full_graph_mode.py +72 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_fused_moe_allgather_ep.py +86 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_ilama_lora_tp2.py +23 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_offline_inference_distributed.py +228 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_pipeline_parallel.py +47 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_prefix_caching.py +148 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_qwen3_moe.py +103 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_single_request_aclgraph.py +84 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_torchair_graph_mode.py +227 -0
- vllm_ascend-0.11.0rc1/tests/e2e/multicard/test_weight_loader.py +109 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py +106 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +110 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py +104 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/models/test_qwen3_32b.py +99 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/models/test_qwen3_32b_int8.py +118 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml +126 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml +76 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/config/multi_node_config.py +207 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/config/utils.py +95 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/scripts/lws.yaml +132 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/scripts/run.sh +145 -0
- vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/test_multi_node.py +30 -0
- vllm_ascend-0.11.0rc1/tests/e2e/pd_disaggreate/run_edge_case_test.sh +139 -0
- vllm_ascend-0.11.0rc1/tests/e2e/pd_disaggreate/test_edge_cases.py +81 -0
- vllm_ascend-0.11.0rc1/tests/e2e/prompts/example.txt +8 -0
- vllm_ascend-0.11.0rc1/tests/e2e/run_doctests.sh +32 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_bgmv_expand.py +46 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_bgmv_shrink.py +45 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_fused_moe.py +341 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +37 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_mla_preprocess.py +108 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_rotary_embedding.py +351 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +98 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +110 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +106 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +148 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_aclgraph.py +203 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_aclgraph_mem.py +100 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_ascend_scheduler.py +113 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_bge_model.py +49 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_camem.py +99 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_chunked.py +82 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_embedding.py +49 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_embedding_aclgraph.py +55 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_guided_decoding.py +153 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_ilama_lora.py +62 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +103 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_profile_execute_duration.py +71 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_quantization.py +35 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_sampler.py +49 -0
- vllm_ascend-0.11.0rc1/tests/e2e/singlecard/test_vlm.py +124 -0
- vllm_ascend-0.11.0rc1/tests/e2e/utils.py +106 -0
- vllm_ascend-0.11.0rc1/tests/e2e/vllm_interface/singlecard/test_sampler.py +36 -0
- vllm_ascend-0.11.0rc1/tests/e2e/vllm_interface/vllm_test.cfg +2 -0
- vllm_ascend-0.11.0rc1/tests/ut/attention/test_attention_mask.py +95 -0
- vllm_ascend-0.11.0rc1/tests/ut/attention/test_attention_v1.py +702 -0
- vllm_ascend-0.11.0rc1/tests/ut/attention/test_mla_v1.py +675 -0
- vllm_ascend-0.11.0rc1/tests/ut/base.py +44 -0
- vllm_ascend-0.11.0rc1/tests/ut/compilation/test_acl_graph.py +720 -0
- vllm_ascend-0.11.0rc1/tests/ut/conftest.py +26 -0
- vllm_ascend-0.11.0rc1/tests/ut/core/test_schedule_config.py +148 -0
- vllm_ascend-0.11.0rc1/tests/ut/core/test_scheduler.py +807 -0
- vllm_ascend-0.11.0rc1/tests/ut/device_allocator/test_camem.py +188 -0
- vllm_ascend-0.11.0rc1/tests/ut/distributed/device_communicators/test_pyhccl.py +84 -0
- vllm_ascend-0.11.0rc1/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +173 -0
- vllm_ascend-0.11.0rc1/tests/ut/distributed/test_communicator.py +89 -0
- vllm_ascend-0.11.0rc1/tests/ut/distributed/test_parallel_state.py +58 -0
- vllm_ascend-0.11.0rc1/tests/ut/eplb/adaptor/test_abstract_adaptor.py +73 -0
- vllm_ascend-0.11.0rc1/tests/ut/eplb/core/policy/test_policy_abstract.py +31 -0
- vllm_ascend-0.11.0rc1/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +98 -0
- vllm_ascend-0.11.0rc1/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +99 -0
- vllm_ascend-0.11.0rc1/tests/ut/eplb/core/policy/test_policy_factor.py +23 -0
- vllm_ascend-0.11.0rc1/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +116 -0
- vllm_ascend-0.11.0rc1/tests/ut/eplb/core/test_eplb_utils.py +225 -0
- vllm_ascend-0.11.0rc1/tests/ut/fake_weight/config.json +28 -0
- vllm_ascend-0.11.0rc1/tests/ut/kv_connector/test_llmdatadist_connector.py +96 -0
- vllm_ascend-0.11.0rc1/tests/ut/kv_connector/test_mooncake_connector.py +1139 -0
- vllm_ascend-0.11.0rc1/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +924 -0
- vllm_ascend-0.11.0rc1/tests/ut/kv_connector/test_remote_decode_lifecycle.py +169 -0
- vllm_ascend-0.11.0rc1/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +239 -0
- vllm_ascend-0.11.0rc1/tests/ut/kv_connector/utils.py +208 -0
- vllm_ascend-0.11.0rc1/tests/ut/models/conftest.py +100 -0
- vllm_ascend-0.11.0rc1/tests/ut/models/test_qwen2_5_vl.py +492 -0
- vllm_ascend-0.11.0rc1/tests/ut/models/test_qwen2_5_vl_without_padding.py +422 -0
- vllm_ascend-0.11.0rc1/tests/ut/models/test_qwen2_vl.py +200 -0
- vllm_ascend-0.11.0rc1/tests/ut/multistream/test_base.py +32 -0
- vllm_ascend-0.11.0rc1/tests/ut/multistream/test_decorator.py +47 -0
- vllm_ascend-0.11.0rc1/tests/ut/multistream/test_layers.py +198 -0
- vllm_ascend-0.11.0rc1/tests/ut/multistream/test_metadata.py +246 -0
- vllm_ascend-0.11.0rc1/tests/ut/multistream/test_ms_split.py +147 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/expert_map.json +17 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_activation.py +72 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_comm_utils.py +98 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_common_fused_moe.py +56 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_expert_load_balancer.py +141 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +271 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_fused_ops.py +597 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_layernorm.py +156 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_linear.py +147 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_moe_comm_method.py +230 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_rotary_embedding.py +462 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_token_dispatcher.py +515 -0
- vllm_ascend-0.11.0rc1/tests/ut/ops/test_vocab_parallel_embedding.py +240 -0
- vllm_ascend-0.11.0rc1/tests/ut/patch/worker/patch_common/test_patch_distributed.py +119 -0
- vllm_ascend-0.11.0rc1/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +77 -0
- vllm_ascend-0.11.0rc1/tests/ut/quantization/test_quant_config.py +233 -0
- vllm_ascend-0.11.0rc1/tests/ut/quantization/test_utils.py +62 -0
- vllm_ascend-0.11.0rc1/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +246 -0
- vllm_ascend-0.11.0rc1/tests/ut/quantization/test_w4a8_dynamic.py +303 -0
- vllm_ascend-0.11.0rc1/tests/ut/quantization/test_w8a8.py +977 -0
- vllm_ascend-0.11.0rc1/tests/ut/quantization/test_w8a8_dynamic.py +69 -0
- vllm_ascend-0.11.0rc1/tests/ut/sample/logits_processor/test_builtin.py +40 -0
- vllm_ascend-0.11.0rc1/tests/ut/sample/test_rejection_sampler.py +203 -0
- vllm_ascend-0.11.0rc1/tests/ut/sample/test_sampler.py +32 -0
- vllm_ascend-0.11.0rc1/tests/ut/test_ascend_config.py +362 -0
- vllm_ascend-0.11.0rc1/tests/ut/test_envs.py +62 -0
- vllm_ascend-0.11.0rc1/tests/ut/test_platform.py +765 -0
- vllm_ascend-0.11.0rc1/tests/ut/test_utils.py +381 -0
- vllm_ascend-0.11.0rc1/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +197 -0
- vllm_ascend-0.11.0rc1/tests/ut/torchair/models/test_torchair_deepseek_v2.py +357 -0
- vllm_ascend-0.11.0rc1/tests/ut/torchair/ops/test_torchair_fused_moe.py +422 -0
- vllm_ascend-0.11.0rc1/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +331 -0
- vllm_ascend-0.11.0rc1/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +296 -0
- vllm_ascend-0.11.0rc1/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +129 -0
- vllm_ascend-0.11.0rc1/tests/ut/torchair/test_torchair_attention.py +95 -0
- vllm_ascend-0.11.0rc1/tests/ut/torchair/test_torchair_mla.py +850 -0
- vllm_ascend-0.11.0rc1/tests/ut/torchair/test_utils.py +164 -0
- vllm_ascend-0.11.0rc1/tests/ut/worker/test_input_batch.py +372 -0
- vllm_ascend-0.11.0rc1/tests/ut/worker/test_model_runner_v1.py +111 -0
- vllm_ascend-0.11.0rc1/tests/ut/worker/test_worker_v1.py +1240 -0
- vllm_ascend-0.11.0rc1/tools/aisbench.py +227 -0
- vllm_ascend-0.11.0rc1/tools/check_python_src_init.py +76 -0
- vllm_ascend-0.11.0rc1/tools/enforce_regex_import.py +104 -0
- vllm_ascend-0.11.0rc1/tools/mypy.sh +40 -0
- vllm_ascend-0.11.0rc1/tools/send_mm_request.py +49 -0
- vllm_ascend-0.11.0rc1/tools/shellcheck.sh +45 -0
- vllm_ascend-0.11.0rc1/typos.toml +177 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/__init__.py +33 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/_version.py +34 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ascend_config.py +310 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ascend_forward_context.py +211 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/attention/attention_mask.py +96 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/attention/attention_v1.py +719 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/attention/mla_v1.py +1323 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/attention/sfa_v1.py +988 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/attention/utils.py +180 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/compilation/acl_graph.py +344 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/core/recompute_schedule_config.py +39 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/core/recompute_scheduler.py +1392 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/core/schedule_config.py +108 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/core/scheduler.py +587 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/cpu_binding.py +330 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/device_allocator/camem.py +278 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/__init__.py +40 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/cpu_offload_connector.py +471 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +202 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/cpu_offload_manager/metadata.py +269 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +994 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/config_data.py +449 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/kv_transfer.py +282 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/mooncake_engine.py +621 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/mooncake_store.py +126 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +492 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake/transfer_engine.py +28 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake_connector.py +1263 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake_layerwise_connector.py +1153 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/parallel_state.py +196 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/distributed/utils.py +61 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/envs.py +188 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/adaptor/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/adaptor/abstract_adaptor.py +44 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/adaptor/vllm_adaptor.py +289 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +138 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/eplb_utils.py +190 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/eplb_worker.py +440 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_abstract.py +42 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +389 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +771 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_factory.py +33 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_flashlb.py +651 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/core/policy/policy_random.py +30 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/eplb_updator.py +209 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/eplb/utils.py +77 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/lora/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/lora/lora_ops.py +113 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/lora/punica_npu.py +356 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/lora/utils.py +110 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/meta_registration.py +105 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/models/__init__.py +48 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/models/deepseek_v3_2.py +633 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/models/layers/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/models/layers/mla.py +193 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/models/layers/sfa.py +233 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/models/qwen2_5_omni_thinker.py +54 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/models/qwen2_5_vl.py +562 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/models/qwen2_5_vl_without_padding.py +605 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/models/qwen2_vl.py +369 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/models/qwen3_next.py +676 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/multistream/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/multistream/decorator.py +22 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/multistream/ms_split.py +247 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/__init__.py +57 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/activation.py +44 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/attention.py +309 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/casual_conv1d.py +539 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/common_fused_moe.py +469 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/expert_load_balancer.py +117 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/fla.py +299 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/layernorm.py +213 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/linear.py +467 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/linear_op.py +531 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/comm_utils.py +113 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/experts_selector.py +277 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +520 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/moe_comm_method.py +273 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/moe_mlp.py +258 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/moe/token_dispatcher.py +726 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/register_custom_ops.py +315 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/rotary_embedding.py +431 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/sigmoid_gating.py +384 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/vocab_parallel_embedding.py +255 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/ops/weight_prefetch.py +112 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/__init__.py +174 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/__init__.py +30 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_config.py +234 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_core.py +68 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_distributed.py +115 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_mamba_config.py +96 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_message_queue.py +164 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_multiproc_executor.py +151 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/platform/patch_sched_yield.py +13 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/__init__.py +32 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_attention_layer.py +92 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_deepseek_mtp.py +94 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_distributed.py +115 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_logits.py +26 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_multimodal_merge.py +58 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_roberta.py +88 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_triton.py +16 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker/patch_weight_loader.py +41 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/platform.py +418 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/quantization/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/quantization/quant_config.py +474 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/quantization/utils.py +87 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +193 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/quantization/w4a8_dynamic.py +490 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/quantization/w8a8.py +674 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/quantization/w8a8_dynamic.py +284 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/sample/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/sample/logits_processor/__init__.py +50 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/sample/logits_processor/builtin.py +35 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/sample/rejection_sampler.py +504 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/sample/sampler.py +74 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/spec_decode/__init__.py +33 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/spec_decode/eagle_proposer.py +661 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/spec_decode/interface.py +53 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/spec_decode/mtp_proposer.py +672 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/spec_decode/ngram_proposer.py +68 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/qwen2.py +363 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/qwen3_moe.py +537 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +218 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/torchair_deepseek_v2.py +1301 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/torchair_deepseek_v3.py +28 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/models/torchair_pangu_moe.py +1118 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/sequence_parallel.py +120 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/shared_weight_layer.py +245 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/torchair_activation.py +37 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/torchair_fused_moe.py +1429 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/torchair_layernorm.py +78 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +365 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/ops/torchair_vocab_parallel_embedding.py +38 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/quantization/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +501 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +1080 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/torchair_attention.py +463 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/torchair_mla.py +1310 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/torchair_model_runner.py +557 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/torchair_sfa.py +1333 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/torchair_worker.py +63 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/torchair/utils.py +275 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/utils.py +764 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/worker/__init__.py +0 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/worker/block_table.py +312 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/worker/model_runner_v1.py +3674 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/worker/npu_input_batch.py +842 -0
- vllm_ascend-0.11.0rc1/vllm_ascend/worker/worker_v1.py +442 -0
- vllm_ascend-0.11.0rc1/vllm_ascend.egg-info/PKG-INFO +142 -0
- vllm_ascend-0.11.0rc1/vllm_ascend.egg-info/SOURCES.txt +627 -0
- vllm_ascend-0.11.0rc1/vllm_ascend.egg-info/entry_points.txt +6 -0
- vllm_ascend-0.11.0rc1/vllm_ascend.egg-info/requires.txt +19 -0
- vllm_ascend-0.9.0rc2/.github/Dockerfile.buildwheel +0 -48
- vllm_ascend-0.9.0rc2/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -37
- vllm_ascend-0.9.0rc2/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -49
- vllm_ascend-0.9.0rc2/.github/PULL_REQUEST_TEMPLATE.md +0 -28
- vllm_ascend-0.9.0rc2/.github/actionlint.yaml +0 -8
- vllm_ascend-0.9.0rc2/.github/workflows/accuracy_report.yaml +0 -202
- vllm_ascend-0.9.0rc2/.github/workflows/accuracy_test.yaml +0 -255
- vllm_ascend-0.9.0rc2/.github/workflows/actionlint.yml +0 -53
- vllm_ascend-0.9.0rc2/.github/workflows/image_openeuler.yml +0 -99
- vllm_ascend-0.9.0rc2/.github/workflows/image_ubuntu.yml +0 -98
- vllm_ascend-0.9.0rc2/.github/workflows/label_merge_conflict.yml +0 -21
- vllm_ascend-0.9.0rc2/.github/workflows/labeler.yml +0 -18
- vllm_ascend-0.9.0rc2/.github/workflows/nightly_benchmarks.yaml +0 -193
- vllm_ascend-0.9.0rc2/.github/workflows/release_code.yml +0 -87
- vllm_ascend-0.9.0rc2/.github/workflows/release_whl.yml +0 -95
- vllm_ascend-0.9.0rc2/.github/workflows/shellcheck.yml +0 -49
- vllm_ascend-0.9.0rc2/.github/workflows/vllm_ascend_doctest.yaml +0 -102
- vllm_ascend-0.9.0rc2/.github/workflows/vllm_ascend_test.yaml +0 -222
- vllm_ascend-0.9.0rc2/.github/workflows/vllm_ascend_test_long_term.yaml +0 -106
- vllm_ascend-0.9.0rc2/.github/workflows/vllm_ascend_test_pd.yaml +0 -106
- vllm_ascend-0.9.0rc2/.gitignore +0 -198
- vllm_ascend-0.9.0rc2/CMakeLists.txt +0 -100
- vllm_ascend-0.9.0rc2/CODE_OF_CONDUCT.md +0 -128
- vllm_ascend-0.9.0rc2/Dockerfile +0 -59
- vllm_ascend-0.9.0rc2/Dockerfile.openEuler +0 -56
- vllm_ascend-0.9.0rc2/PKG-INFO +0 -100
- vllm_ascend-0.9.0rc2/README.md +0 -79
- vllm_ascend-0.9.0rc2/README.zh.md +0 -78
- vllm_ascend-0.9.0rc2/benchmarks/README.md +0 -57
- vllm_ascend-0.9.0rc2/benchmarks/requirements-bench.txt +0 -5
- vllm_ascend-0.9.0rc2/benchmarks/scripts/convert_json_to_markdown.py +0 -183
- vllm_ascend-0.9.0rc2/benchmarks/scripts/patch_benchmark_dataset.py +0 -68
- vllm_ascend-0.9.0rc2/benchmarks/scripts/perf_result_template.md +0 -31
- vllm_ascend-0.9.0rc2/benchmarks/scripts/run-performance-benchmarks.sh +0 -323
- vllm_ascend-0.9.0rc2/benchmarks/scripts/run_accuracy.py +0 -226
- vllm_ascend-0.9.0rc2/benchmarks/tests/latency-tests.json +0 -13
- vllm_ascend-0.9.0rc2/benchmarks/tests/serving-tests.json +0 -53
- vllm_ascend-0.9.0rc2/benchmarks/tests/throughput-tests.json +0 -27
- vllm_ascend-0.9.0rc2/csrc/camem_allocator.cpp +0 -338
- vllm_ascend-0.9.0rc2/csrc/kernels/advance_step.cpp +0 -241
- vllm_ascend-0.9.0rc2/csrc/kernels/pos_encoding_kernels.cpp +0 -367
- vllm_ascend-0.9.0rc2/csrc/kernels/utils.h +0 -49
- vllm_ascend-0.9.0rc2/csrc/ops.h +0 -61
- vllm_ascend-0.9.0rc2/csrc/torch_binding.cpp +0 -204
- vllm_ascend-0.9.0rc2/csrc/utils.h +0 -43
- vllm_ascend-0.9.0rc2/docs/Makefile +0 -21
- vllm_ascend-0.9.0rc2/docs/README.md +0 -23
- vllm_ascend-0.9.0rc2/docs/requirements-docs.txt +0 -9
- vllm_ascend-0.9.0rc2/docs/requirements-test.txt +0 -2
- vllm_ascend-0.9.0rc2/docs/source/_templates/sections/header.html +0 -58
- vllm_ascend-0.9.0rc2/docs/source/community/contributors.md +0 -84
- vllm_ascend-0.9.0rc2/docs/source/community/governance.md +0 -48
- vllm_ascend-0.9.0rc2/docs/source/conf.py +0 -135
- vllm_ascend-0.9.0rc2/docs/source/developer_guide/contributing.md +0 -113
- vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -6
- vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/index.md +0 -17
- vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/performance_benchmark.md +0 -187
- vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/profile_execute_duration.md +0 -34
- vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/using_evalscope.md +0 -173
- vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -62
- vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation/using_opencompass.md +0 -120
- vllm_ascend-0.9.0rc2/docs/source/developer_guide/versioning_policy.md +0 -106
- vllm_ascend-0.9.0rc2/docs/source/faqs.md +0 -125
- vllm_ascend-0.9.0rc2/docs/source/index.md +0 -76
- vllm_ascend-0.9.0rc2/docs/source/installation.md +0 -274
- vllm_ascend-0.9.0rc2/docs/source/quick_start.md +0 -169
- vllm_ascend-0.9.0rc2/docs/source/tutorials/index.md +0 -11
- vllm_ascend-0.9.0rc2/docs/source/tutorials/multi_node.md +0 -195
- vllm_ascend-0.9.0rc2/docs/source/tutorials/multi_npu.md +0 -107
- vllm_ascend-0.9.0rc2/docs/source/tutorials/multi_npu_quantization.md +0 -134
- vllm_ascend-0.9.0rc2/docs/source/tutorials/single_npu.md +0 -133
- vllm_ascend-0.9.0rc2/docs/source/tutorials/single_npu_multimodal.md +0 -191
- vllm_ascend-0.9.0rc2/docs/source/user_guide/additional_config.md +0 -76
- vllm_ascend-0.9.0rc2/docs/source/user_guide/env_vars.md +0 -9
- vllm_ascend-0.9.0rc2/docs/source/user_guide/graph_mode.md +0 -82
- vllm_ascend-0.9.0rc2/docs/source/user_guide/release.template.md +0 -13
- vllm_ascend-0.9.0rc2/docs/source/user_guide/release_notes.md +0 -243
- vllm_ascend-0.9.0rc2/docs/source/user_guide/supported_models.md +0 -52
- vllm_ascend-0.9.0rc2/docs/source/user_guide/suppoted_features.md +0 -49
- vllm_ascend-0.9.0rc2/docs/source/user_stories/example.md +0 -15
- vllm_ascend-0.9.0rc2/docs/source/user_stories/index.md +0 -22
- vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/disaggregated_prefill_offline.py +0 -138
- vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/dp_proxy.py +0 -463
- vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/find_device_ips.py +0 -67
- vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py +0 -193
- vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/run_decode_server.sh +0 -37
- vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/run_prefill_server.sh +0 -37
- vllm_ascend-0.9.0rc2/examples/dp_offline/data_parallel.py +0 -85
- vllm_ascend-0.9.0rc2/examples/dp_offline/run_dp.sh +0 -19
- vllm_ascend-0.9.0rc2/examples/offline_disaggregated_prefill_npu.py +0 -140
- vllm_ascend-0.9.0rc2/examples/offline_distributed_inference_npu.py +0 -44
- vllm_ascend-0.9.0rc2/examples/offline_dualbatch_overlap_npu.py +0 -51
- vllm_ascend-0.9.0rc2/examples/offline_inference_audio_language.py +0 -126
- vllm_ascend-0.9.0rc2/examples/offline_inference_npu.py +0 -39
- vllm_ascend-0.9.0rc2/examples/offline_inference_npu_v1.py +0 -49
- vllm_ascend-0.9.0rc2/examples/offline_multi_step_custom_ops.py +0 -53
- vllm_ascend-0.9.0rc2/examples/prompt_embedding_inference.py +0 -83
- vllm_ascend-0.9.0rc2/examples/run_dp_server.sh +0 -30
- vllm_ascend-0.9.0rc2/format.sh +0 -343
- vllm_ascend-0.9.0rc2/mypy.ini +0 -13
- vllm_ascend-0.9.0rc2/pyproject.toml +0 -23
- vllm_ascend-0.9.0rc2/pytest.ini +0 -68
- vllm_ascend-0.9.0rc2/requirements-dev.txt +0 -12
- vllm_ascend-0.9.0rc2/requirements-lint.txt +0 -15
- vllm_ascend-0.9.0rc2/requirements.txt +0 -23
- vllm_ascend-0.9.0rc2/setup.py +0 -368
- vllm_ascend-0.9.0rc2/tests/conftest.py +0 -359
- vllm_ascend-0.9.0rc2/tests/e2e/common.sh +0 -51
- vllm_ascend-0.9.0rc2/tests/e2e/doctests/001-quickstart-test.sh +0 -55
- vllm_ascend-0.9.0rc2/tests/e2e/run_doctests.sh +0 -27
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/__init__.py +0 -18
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/conftest.py +0 -28
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/conftest.py +0 -212
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_medusa_correctness.py +0 -445
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_mlp_correctness.py +0 -560
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_mtp_correctness.py +0 -455
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_ngram_correctness.py +0 -404
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py +0 -92
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_v1_spec_decode.py +0 -155
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_dynamic_spec_decode.py +0 -105
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_multi_step_worker.py +0 -846
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_ngram_worker.py +0 -237
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_spec_decode_worker.py +0 -958
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_utils.py +0 -165
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/utils.py +0 -317
- vllm_ascend-0.9.0rc2/tests/long_term/test_accuracy.py +0 -111
- vllm_ascend-0.9.0rc2/tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py +0 -71
- vllm_ascend-0.9.0rc2/tests/model_utils.py +0 -274
- vllm_ascend-0.9.0rc2/tests/multicard/test_dynamic_npugraph_batchsize.py +0 -57
- vllm_ascend-0.9.0rc2/tests/multicard/test_ilama_lora_tp2.py +0 -21
- vllm_ascend-0.9.0rc2/tests/multicard/test_offline_inference_distributed.py +0 -97
- vllm_ascend-0.9.0rc2/tests/multicard/test_pyhccl_distributed.py +0 -110
- vllm_ascend-0.9.0rc2/tests/singlecard/compile/test_simple.py +0 -134
- vllm_ascend-0.9.0rc2/tests/singlecard/ops/test_fused_moe.py +0 -100
- vllm_ascend-0.9.0rc2/tests/singlecard/ops/test_multi_step.py +0 -190
- vllm_ascend-0.9.0rc2/tests/singlecard/ops/test_rotary_embedding.py +0 -198
- vllm_ascend-0.9.0rc2/tests/singlecard/sample/test_rejection_sampler.py +0 -611
- vllm_ascend-0.9.0rc2/tests/singlecard/test_aclgraph.py +0 -95
- vllm_ascend-0.9.0rc2/tests/singlecard/test_ascend_config.py +0 -189
- vllm_ascend-0.9.0rc2/tests/singlecard/test_camem.py +0 -85
- vllm_ascend-0.9.0rc2/tests/singlecard/test_guided_decoding.py +0 -175
- vllm_ascend-0.9.0rc2/tests/singlecard/test_ilama_lora.py +0 -60
- vllm_ascend-0.9.0rc2/tests/singlecard/test_offline_inference.py +0 -129
- vllm_ascend-0.9.0rc2/tests/singlecard/test_profile_execute_duration.py +0 -62
- vllm_ascend-0.9.0rc2/tests/singlecard/test_prompt_embedding.py +0 -259
- vllm_ascend-0.9.0rc2/tests/singlecard/test_pyhccl.py +0 -29
- vllm_ascend-0.9.0rc2/tests/singlecard/test_sampler.py +0 -147
- vllm_ascend-0.9.0rc2/tests/singlecard/test_scheduler.py +0 -404
- vllm_ascend-0.9.0rc2/tests/utils.py +0 -199
- vllm_ascend-0.9.0rc2/tools/mypy.sh +0 -36
- vllm_ascend-0.9.0rc2/tools/shellcheck.sh +0 -41
- vllm_ascend-0.9.0rc2/vllm_ascend/__init__.py +0 -31
- vllm_ascend-0.9.0rc2/vllm_ascend/_version.py +0 -21
- vllm_ascend-0.9.0rc2/vllm_ascend/ascend_config.py +0 -163
- vllm_ascend-0.9.0rc2/vllm_ascend/attention/attention.py +0 -1301
- vllm_ascend-0.9.0rc2/vllm_ascend/attention/attention_v1.py +0 -409
- vllm_ascend-0.9.0rc2/vllm_ascend/attention/mla_v1.py +0 -937
- vllm_ascend-0.9.0rc2/vllm_ascend/compilation/piecewise_backend.py +0 -231
- vllm_ascend-0.9.0rc2/vllm_ascend/core/schedule_config.py +0 -74
- vllm_ascend-0.9.0rc2/vllm_ascend/core/scheduler.py +0 -407
- vllm_ascend-0.9.0rc2/vllm_ascend/device_allocator/camem.py +0 -278
- vllm_ascend-0.9.0rc2/vllm_ascend/distributed/__init__.py +0 -27
- vllm_ascend-0.9.0rc2/vllm_ascend/distributed/kv_transfer/simple_buffer.py +0 -209
- vllm_ascend-0.9.0rc2/vllm_ascend/distributed/kv_transfer/simple_connector.py +0 -376
- vllm_ascend-0.9.0rc2/vllm_ascend/distributed/kv_transfer/simple_pipe.py +0 -209
- vllm_ascend-0.9.0rc2/vllm_ascend/distributed/kv_transfer/utils.py +0 -40
- vllm_ascend-0.9.0rc2/vllm_ascend/distributed/llmdatadist_connector.py +0 -470
- vllm_ascend-0.9.0rc2/vllm_ascend/distributed/parallel_state.py +0 -77
- vllm_ascend-0.9.0rc2/vllm_ascend/envs.py +0 -149
- vllm_ascend-0.9.0rc2/vllm_ascend/lora/punica_wrapper/punica_npu.py +0 -346
- vllm_ascend-0.9.0rc2/vllm_ascend/models/__init__.py +0 -49
- vllm_ascend-0.9.0rc2/vllm_ascend/models/deepseek_dbo.py +0 -1118
- vllm_ascend-0.9.0rc2/vllm_ascend/models/deepseek_mtp.py +0 -200
- vllm_ascend-0.9.0rc2/vllm_ascend/models/deepseek_v2.py +0 -728
- vllm_ascend-0.9.0rc2/vllm_ascend/models/qwen2_5_vl.py +0 -487
- vllm_ascend-0.9.0rc2/vllm_ascend/models/qwen2_5_vl_without_padding.py +0 -273
- vllm_ascend-0.9.0rc2/vllm_ascend/models/qwen2_vl.py +0 -352
- vllm_ascend-0.9.0rc2/vllm_ascend/models/qwen3_moe.py +0 -35
- vllm_ascend-0.9.0rc2/vllm_ascend/multistream/decorator.py +0 -26
- vllm_ascend-0.9.0rc2/vllm_ascend/multistream/ms_split.py +0 -245
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/__init__.py +0 -49
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/activation.py +0 -37
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/attention.py +0 -305
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/cache.py +0 -35
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/common_fused_moe.py +0 -69
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/expert_load_balancer.py +0 -99
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/fused_moe.py +0 -1211
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/layernorm.py +0 -40
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/rotary_embedding.py +0 -279
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/vocab_parallel_embedding.py +0 -67
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/__init__.py +0 -213
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/__init__.py +0 -25
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_0_9_0/__init__.py +0 -17
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_0_9_0/patch_distributed.py +0 -116
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_common/__init__.py +0 -18
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_common/patch_distributed.py +0 -99
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_main/__init__.py +0 -16
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/__init__.py +0 -26
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_0_9_0/__init__.py +0 -16
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/__init__.py +0 -26
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -49
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_eagle.py +0 -70
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py +0 -107
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_sampler.py +0 -101
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py +0 -155
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_utils.py +0 -38
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_main/__init__.py +0 -16
- vllm_ascend-0.9.0rc2/vllm_ascend/platform.py +0 -309
- vllm_ascend-0.9.0rc2/vllm_ascend/quantization/func_wrapper.py +0 -151
- vllm_ascend-0.9.0rc2/vllm_ascend/quantization/quant_config.py +0 -339
- vllm_ascend-0.9.0rc2/vllm_ascend/quantization/quantizer.py +0 -287
- vllm_ascend-0.9.0rc2/vllm_ascend/quantization/w8a8.py +0 -115
- vllm_ascend-0.9.0rc2/vllm_ascend/quantization/w8a8_dynamic.py +0 -753
- vllm_ascend-0.9.0rc2/vllm_ascend/sample/rejection_sampler.py +0 -456
- vllm_ascend-0.9.0rc2/vllm_ascend/utils.py +0 -229
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/__init__.py +0 -17
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/cache_engine.py +0 -83
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/draft_model_runner.py +0 -319
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/model_runner.py +0 -1607
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/model_runner_v1.py +0 -1916
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/mtp_proposer_v1.py +0 -225
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/multi_step_runner.py +0 -737
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/multi_step_worker.py +0 -194
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/pooling_model_runner.py +0 -186
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/worker.py +0 -573
- vllm_ascend-0.9.0rc2/vllm_ascend/worker/worker_v1.py +0 -313
- vllm_ascend-0.9.0rc2/vllm_ascend.egg-info/PKG-INFO +0 -100
- vllm_ascend-0.9.0rc2/vllm_ascend.egg-info/SOURCES.txt +0 -286
- vllm_ascend-0.9.0rc2/vllm_ascend.egg-info/entry_points.txt +0 -5
- vllm_ascend-0.9.0rc2/vllm_ascend.egg-info/requires.txt +0 -18
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/dependabot.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/labeler.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/workflows/matchers/actionlint.json +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/workflows/matchers/mypy.json +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.github/workflows/matchers/ruff.json +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/.readthedocs.yaml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/DCO +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/LICENSE +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/cmake/utils.cmake +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/collect_env.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/csrc/kernels/types.h +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/packages.txt +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/setup.cfg +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tests/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/tests/long_term/spec_decode → vllm_ascend-0.11.0rc1/tests}/e2e/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/tests/singlecard → vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/tests/singlecard/compile → vllm_ascend-0.11.0rc1/tests/e2e/nightly/multi_node/config}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tests/e2e/run_disagg_pd.sh +0 -0
- {vllm_ascend-0.9.0rc2/tests/singlecard/ops → vllm_ascend-0.11.0rc1/tests/e2e/singlecard}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/tests/singlecard/sample → vllm_ascend-0.11.0rc1/tests/e2e/singlecard/ops}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/attention → vllm_ascend-0.11.0rc1/tests/ut}/__init__.py +0 -0
- /vllm_ascend-0.9.0rc2/vllm_ascend/compilation/__init__.py → /vllm_ascend-0.11.0rc1/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/core → vllm_ascend-0.11.0rc1/tests/ut/models}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/device_allocator → vllm_ascend-0.11.0rc1/tests/ut/torchair}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tools/actionlint.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tools/check_repo.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tools/png-lint.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/tools/sphinx-lint.sh +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/distributed/device_communicators → vllm_ascend-0.11.0rc1/vllm_ascend/attention}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/distributed/kv_transfer → vllm_ascend-0.11.0rc1/vllm_ascend/compilation}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/lora → vllm_ascend-0.11.0rc1/vllm_ascend/core}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.11.0rc1/vllm_ascend/device_allocator}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/distributed/communicator.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/multistream → vllm_ascend-0.11.0rc1/vllm_ascend/distributed/cpu_offload_manager}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/quantization → vllm_ascend-0.11.0rc1/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/sample → vllm_ascend-0.11.0rc1/vllm_ascend/distributed/mooncake}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/multistream/base.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/multistream/context.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/multistream/layers.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend/multistream/metadata.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common → vllm_ascend-0.11.0rc1/vllm_ascend/patch/worker}/patch_minicpm.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.11.0rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
# https://developers.google.com/gemini-code-assist/docs/customize-gemini-behavior-github
|
|
2
|
+
have_fun: false # Just review the code
|
|
3
|
+
code_review:
|
|
4
|
+
comment_severity_threshold: HIGH # Reduce quantity of comments
|
|
5
|
+
pull_request_opened:
|
|
6
|
+
summary: false # Don't summarize the PR in a separate comment
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
|
+
#
|
|
17
|
+
ARG PY_VERSION=3.11
|
|
18
|
+
FROM quay.io/ascend/manylinux:8.2.rc1-910b-manylinux_2_28-py${PY_VERSION}
|
|
19
|
+
|
|
20
|
+
ARG COMPILE_CUSTOM_KERNELS=1
|
|
21
|
+
|
|
22
|
+
# Define environments
|
|
23
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
24
|
+
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
|
|
25
|
+
RUN yum update -y && \
|
|
26
|
+
yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
|
|
27
|
+
rm -rf /var/cache/yum
|
|
28
|
+
|
|
29
|
+
WORKDIR /workspace
|
|
30
|
+
|
|
31
|
+
COPY . /workspace/vllm-ascend/
|
|
32
|
+
|
|
33
|
+
# Install req
|
|
34
|
+
RUN python3 -m pip install -r vllm-ascend/requirements.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
35
|
+
python3 -m pip install twine
|
|
36
|
+
|
|
37
|
+
# Install vllm-ascend
|
|
38
|
+
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
|
|
39
|
+
source /usr/local/Ascend/nnal/atb/set_env.sh && \
|
|
40
|
+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
|
|
41
|
+
cd vllm-ascend && \
|
|
42
|
+
python3 setup.py bdist_wheel && \
|
|
43
|
+
ls -l dist
|
|
44
|
+
|
|
45
|
+
CMD ["/bin/bash"]
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: 📚 User Story
|
|
2
|
+
description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.io/en/latest/community/user_stories/index.html
|
|
3
|
+
title: "[User Story]: "
|
|
4
|
+
labels: ["user-story"]
|
|
5
|
+
|
|
6
|
+
body:
|
|
7
|
+
- type: textarea
|
|
8
|
+
attributes:
|
|
9
|
+
label: 📚 Title
|
|
10
|
+
description: >
|
|
11
|
+
A clear title about what your user story is about.
|
|
12
|
+
validations:
|
|
13
|
+
required: true
|
|
14
|
+
- type: textarea
|
|
15
|
+
attributes:
|
|
16
|
+
label: About / Introduction
|
|
17
|
+
description: >
|
|
18
|
+
A brief introduction about the background of your use case, like your scenario, hardware size etc.
|
|
19
|
+
- type: textarea
|
|
20
|
+
attributes:
|
|
21
|
+
label: Bussiness Challenges
|
|
22
|
+
description: >
|
|
23
|
+
Tell us how what kind of challenge you faced in this user story.
|
|
24
|
+
- type: textarea
|
|
25
|
+
attributes:
|
|
26
|
+
label: Solving challenges with vLLM Ascend and benefits
|
|
27
|
+
description: >
|
|
28
|
+
Tell us how vLLM Ascend helped you overcome the challenges, including details like how you use it, what version you used, hardware info, etc. And what kind of benefit do you get from using vLLM Ascend
|
|
29
|
+
- type: textarea
|
|
30
|
+
attributes:
|
|
31
|
+
label: Extra Info
|
|
32
|
+
description: >
|
|
33
|
+
Any extra infomation you want to include in this story
|
|
34
|
+
- type: markdown
|
|
35
|
+
attributes:
|
|
36
|
+
value: >
|
|
37
|
+
Thanks for contributing 🎉!
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
name: 💬 Request for comments (RFC).
|
|
2
|
+
description: Ask for feedback on major architectural changes or design choices.
|
|
3
|
+
title: "[RFC]: "
|
|
4
|
+
labels: ["RFC"]
|
|
5
|
+
|
|
6
|
+
body:
|
|
7
|
+
- type: markdown
|
|
8
|
+
attributes:
|
|
9
|
+
value: >
|
|
10
|
+
#### Please take a look at previous [RFCs](https://github.com/vllm-project/vllm-ascend/issues?q=label%3ARFC+sort%3Aupdated-desc) for reference.
|
|
11
|
+
- type: textarea
|
|
12
|
+
attributes:
|
|
13
|
+
label: Motivation.
|
|
14
|
+
description: >
|
|
15
|
+
The motivation of the RFC.
|
|
16
|
+
validations:
|
|
17
|
+
required: true
|
|
18
|
+
- type: textarea
|
|
19
|
+
attributes:
|
|
20
|
+
label: Proposed Change.
|
|
21
|
+
description: >
|
|
22
|
+
The proposed change of the RFC.
|
|
23
|
+
validations:
|
|
24
|
+
required: true
|
|
25
|
+
- type: textarea
|
|
26
|
+
attributes:
|
|
27
|
+
label: Feedback Period.
|
|
28
|
+
description: >
|
|
29
|
+
The feedback period of the RFC. Usually at least one week.
|
|
30
|
+
validations:
|
|
31
|
+
required: false
|
|
32
|
+
- type: textarea
|
|
33
|
+
attributes:
|
|
34
|
+
label: CC List.
|
|
35
|
+
description: >
|
|
36
|
+
The list of people you want to CC.
|
|
37
|
+
validations:
|
|
38
|
+
required: false
|
|
39
|
+
- type: textarea
|
|
40
|
+
attributes:
|
|
41
|
+
label: Any Other Things.
|
|
42
|
+
description: >
|
|
43
|
+
Any other things you would like to mention, such as feature branch request.
|
|
44
|
+
validations:
|
|
45
|
+
required: false
|
|
46
|
+
- type: markdown
|
|
47
|
+
attributes:
|
|
48
|
+
value: >
|
|
49
|
+
Thanks for contributing 🎉!
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
name: Release Checklist
|
|
2
|
+
description: Generate a release checklist issue when prepare a new release.(Used for release team)
|
|
3
|
+
title: "[Release]: Release checklist for v"
|
|
4
|
+
|
|
5
|
+
body:
|
|
6
|
+
- type: textarea
|
|
7
|
+
attributes:
|
|
8
|
+
description: >
|
|
9
|
+
Brief info for the new release.
|
|
10
|
+
label: Release Checklist
|
|
11
|
+
value: >
|
|
12
|
+
**Release Version**:
|
|
13
|
+
|
|
14
|
+
**Release Branch**:
|
|
15
|
+
|
|
16
|
+
**Release Date**:
|
|
17
|
+
|
|
18
|
+
**Release Manager**:
|
|
19
|
+
- type: textarea
|
|
20
|
+
attributes:
|
|
21
|
+
description: >
|
|
22
|
+
Release notes.
|
|
23
|
+
label: Prepare Release Note
|
|
24
|
+
value: >
|
|
25
|
+
- [ ] Create a new issue for release feedback
|
|
26
|
+
|
|
27
|
+
- [ ] Upgrade vllm version to the new version for CI and Dockerfile
|
|
28
|
+
|
|
29
|
+
- [ ] Write the release note PR.
|
|
30
|
+
|
|
31
|
+
- [ ] Update the feedback issue link in docs/source/faqs.md
|
|
32
|
+
|
|
33
|
+
- [ ] Add release note to docs/source/user_guide/release_notes.md
|
|
34
|
+
|
|
35
|
+
- [ ] Update release version in README.md and README.zh.md
|
|
36
|
+
|
|
37
|
+
- [ ] Update version info in docs/source/community/versioning_policy.md
|
|
38
|
+
|
|
39
|
+
- [ ] Update contributor info in docs/source/community/contributors.md
|
|
40
|
+
|
|
41
|
+
- [ ] Update package version in docs/conf.py
|
|
42
|
+
- type: textarea
|
|
43
|
+
attributes:
|
|
44
|
+
description: >
|
|
45
|
+
Make sure the code is merged.
|
|
46
|
+
label: PR need Merge
|
|
47
|
+
value: >
|
|
48
|
+
- [ ] PR link1
|
|
49
|
+
|
|
50
|
+
- [ ] PR link2
|
|
51
|
+
|
|
52
|
+
- [ ] ...
|
|
53
|
+
- type: textarea
|
|
54
|
+
attributes:
|
|
55
|
+
description: >
|
|
56
|
+
Make sure the new Feature/Function is tested
|
|
57
|
+
label: Functional Test
|
|
58
|
+
value: >
|
|
59
|
+
- [ ] Feature1
|
|
60
|
+
|
|
61
|
+
- [ ] Bug1
|
|
62
|
+
|
|
63
|
+
- [ ] ...
|
|
64
|
+
- type: textarea
|
|
65
|
+
attributes:
|
|
66
|
+
description: >
|
|
67
|
+
Make sure the doc is updated.
|
|
68
|
+
label: Doc Test
|
|
69
|
+
value: >
|
|
70
|
+
- [ ] Tutorial is updated.
|
|
71
|
+
|
|
72
|
+
- [ ] User Guide is updated.
|
|
73
|
+
|
|
74
|
+
- [ ] Developer Guide is updated.
|
|
75
|
+
- type: textarea
|
|
76
|
+
attributes:
|
|
77
|
+
description: >
|
|
78
|
+
Make sure the artifacts is ready
|
|
79
|
+
label: Prepare Artifacts
|
|
80
|
+
value: >
|
|
81
|
+
- [ ] Docker image is ready.
|
|
82
|
+
|
|
83
|
+
- [ ] Wheel package is ready.
|
|
84
|
+
- type: textarea
|
|
85
|
+
attributes:
|
|
86
|
+
description: >
|
|
87
|
+
Start to release.
|
|
88
|
+
label: Release Step
|
|
89
|
+
value: >
|
|
90
|
+
- [ ] Release note PR is merged.
|
|
91
|
+
|
|
92
|
+
- [ ] Post the release on GitHub release page.
|
|
93
|
+
|
|
94
|
+
- [ ] Generate official doc page on https://app.readthedocs.org/dashboard/
|
|
95
|
+
|
|
96
|
+
- [ ] Wait for the wheel package to be available on https://pypi.org/project/vllm-ascend
|
|
97
|
+
|
|
98
|
+
- [ ] Wait for the docker image to be available on https://quay.io/ascend/vllm-ascend
|
|
99
|
+
|
|
100
|
+
- [ ] Upload 310p wheel to Github release page
|
|
101
|
+
|
|
102
|
+
- [ ] Broadcast the release news (By message, blog , etc)
|
|
103
|
+
|
|
104
|
+
- [ ] Close this issue
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
<!-- Thanks for sending a pull request!
|
|
2
|
+
|
|
3
|
+
BEFORE SUBMITTING, PLEASE READ https://docs.vllm.ai/en/latest/contributing/overview.html
|
|
4
|
+
|
|
5
|
+
-->
|
|
6
|
+
### What this PR does / why we need it?
|
|
7
|
+
<!--
|
|
8
|
+
- Please clarify what changes you are proposing. The purpose of this section is to outline the changes and how this PR fixes the issue.
|
|
9
|
+
If possible, please consider writing useful notes for better and faster reviews in your PR.
|
|
10
|
+
|
|
11
|
+
- Please clarify why the changes are needed. For instance, the use case and bug description.
|
|
12
|
+
|
|
13
|
+
- Fixes #
|
|
14
|
+
-->
|
|
15
|
+
|
|
16
|
+
### Does this PR introduce _any_ user-facing change?
|
|
17
|
+
<!--
|
|
18
|
+
Note that it means *any* user-facing change including all aspects such as API, interface or other behavior changes.
|
|
19
|
+
Documentation-only updates are not considered user-facing changes.
|
|
20
|
+
-->
|
|
21
|
+
|
|
22
|
+
### How was this patch tested?
|
|
23
|
+
<!--
|
|
24
|
+
CI passed with new added/existing test.
|
|
25
|
+
If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
|
|
26
|
+
If tests were not added, please describe why they were not added and/or why it was difficult to add.
|
|
27
|
+
-->
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
self-hosted-runner:
|
|
2
|
+
# Labels of self-hosted runner in array of strings.
|
|
3
|
+
labels:
|
|
4
|
+
- linux-aarch64-a2-0
|
|
5
|
+
- linux-aarch64-a2-1
|
|
6
|
+
- linux-aarch64-a2-2
|
|
7
|
+
- linux-aarch64-a2-4
|
|
8
|
+
- linux-aarch64-a2-8
|
|
9
|
+
- linux-arm64-npu-static-8
|
|
10
|
+
- linux-aarch64-310p-1
|
|
11
|
+
- linux-aarch64-310p-2
|
|
12
|
+
- linux-aarch64-310p-4
|
|
13
|
+
- ubuntu-24.04-arm
|
|
14
|
+
- linux-aarch64-a3-1
|
|
15
|
+
- linux-aarch64-a3-2
|
|
16
|
+
- linux-aarch64-a3-4
|
|
17
|
+
- linux-aarch64-a3-8
|
|
18
|
+
- linux-amd64-cpu-0
|
|
19
|
+
- linux-amd64-cpu-8
|
|
20
|
+
- linux-amd64-cpu-16
|
|
21
|
+
- linux-aarch64-a3-0
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
|
+
# Adapted from vllm/.github/scripts/cleanup_pr_body.sh
|
|
17
|
+
|
|
18
|
+
#!/bin/bash
|
|
19
|
+
|
|
20
|
+
set -eux
|
|
21
|
+
|
|
22
|
+
# ensure 2 argument is passed
|
|
23
|
+
if [ "$#" -ne 3 ]; then
|
|
24
|
+
echo "Usage: $0 <pr_number> <vllm_version> <vllm_commit>"
|
|
25
|
+
exit 1
|
|
26
|
+
fi
|
|
27
|
+
|
|
28
|
+
PR_NUMBER=$1
|
|
29
|
+
VLLM_VERSION=$2
|
|
30
|
+
VLLM_COMMIT=$3
|
|
31
|
+
OLD=/tmp/orig_pr_body.txt
|
|
32
|
+
NEW=/tmp/new_pr_body.txt
|
|
33
|
+
FINAL=/tmp/final_pr_body.txt
|
|
34
|
+
|
|
35
|
+
gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
|
|
36
|
+
cp "${OLD}" "${NEW}"
|
|
37
|
+
|
|
38
|
+
# Remove notes in pr description and add vLLM version and commit
|
|
39
|
+
sed -i '/<!--/,/-->/d' "${NEW}"
|
|
40
|
+
sed -i '/- vLLM .*$/d' "${NEW}"
|
|
41
|
+
{
|
|
42
|
+
echo ""
|
|
43
|
+
echo "- vLLM version: $VLLM_VERSION"
|
|
44
|
+
echo "- vLLM main: $VLLM_COMMIT"
|
|
45
|
+
} >> "${NEW}"
|
|
46
|
+
|
|
47
|
+
# Remove redundant empty lines
|
|
48
|
+
uniq "${NEW}" > "${FINAL}"
|
|
49
|
+
|
|
50
|
+
# Run this only if ${NEW} is different than ${OLD}
|
|
51
|
+
if ! cmp -s "${OLD}" "${FINAL}"; then
|
|
52
|
+
echo
|
|
53
|
+
echo "Updating PR body:"
|
|
54
|
+
echo
|
|
55
|
+
cat "${NEW}"
|
|
56
|
+
gh pr edit --body-file "${FINAL}" "${PR_NUMBER}"
|
|
57
|
+
else
|
|
58
|
+
echo "No changes needed"
|
|
59
|
+
fi
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
name: 'accuracy test'
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_call:
|
|
5
|
+
inputs:
|
|
6
|
+
vllm:
|
|
7
|
+
required: true
|
|
8
|
+
type: string
|
|
9
|
+
vllm-ascend:
|
|
10
|
+
required: false
|
|
11
|
+
type: string
|
|
12
|
+
default: main
|
|
13
|
+
runner:
|
|
14
|
+
required: true
|
|
15
|
+
type: string
|
|
16
|
+
image:
|
|
17
|
+
required: true
|
|
18
|
+
type: string
|
|
19
|
+
model_name:
|
|
20
|
+
required: true
|
|
21
|
+
type: string
|
|
22
|
+
upload:
|
|
23
|
+
required: false
|
|
24
|
+
type: boolean
|
|
25
|
+
default: false
|
|
26
|
+
|
|
27
|
+
jobs:
|
|
28
|
+
accuracy_tests:
|
|
29
|
+
|
|
30
|
+
runs-on: ${{ inputs.runner }}
|
|
31
|
+
name: ${{ inputs.model_name }} accuracy
|
|
32
|
+
container:
|
|
33
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
|
|
34
|
+
env:
|
|
35
|
+
VLLM_USE_MODELSCOPE: True
|
|
36
|
+
# 1. If version specified (work_dispatch), do specified branch accuracy test
|
|
37
|
+
# 2. If no version (labeled PR), do accuracy test by default ref:
|
|
38
|
+
# The branch, tag or SHA to checkout. When checking out the repository that
|
|
39
|
+
# triggered a workflow, this defaults to the reference or SHA for that event.
|
|
40
|
+
# Otherwise, uses the default branch.
|
|
41
|
+
GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
|
|
42
|
+
|
|
43
|
+
steps:
|
|
44
|
+
- name: Checkout repository
|
|
45
|
+
uses: actions/checkout@v4
|
|
46
|
+
|
|
47
|
+
- name: Set model name as output
|
|
48
|
+
id: set_output
|
|
49
|
+
run: |
|
|
50
|
+
echo "model_name=${{ inputs.model_name }}" >> $GITHUB_OUTPUT
|
|
51
|
+
|
|
52
|
+
- name: Config mirrors
|
|
53
|
+
run: |
|
|
54
|
+
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
55
|
+
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
56
|
+
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
57
|
+
apt-get update -y
|
|
58
|
+
apt install git -y
|
|
59
|
+
|
|
60
|
+
- name: Install system dependencies
|
|
61
|
+
run: |
|
|
62
|
+
apt-get -y install `cat packages.txt`
|
|
63
|
+
apt-get -y install gcc g++ cmake libnuma-dev
|
|
64
|
+
|
|
65
|
+
- name: Checkout vllm-project/vllm repo
|
|
66
|
+
uses: actions/checkout@v4
|
|
67
|
+
with:
|
|
68
|
+
repository: vllm-project/vllm
|
|
69
|
+
ref: ${{ inputs.vllm }}
|
|
70
|
+
path: ./vllm-empty
|
|
71
|
+
|
|
72
|
+
- name: Install vllm-project/vllm from source
|
|
73
|
+
working-directory: ./vllm-empty
|
|
74
|
+
run: |
|
|
75
|
+
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
76
|
+
|
|
77
|
+
- name: Resolve vllm-ascend version
|
|
78
|
+
run: |
|
|
79
|
+
VERSION_INPUT="${{ inputs.vllm-ascend }}"
|
|
80
|
+
|
|
81
|
+
if [[ "$VERSION_INPUT" == "latest" ]]; then
|
|
82
|
+
TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
|
|
83
|
+
LATEST_TAG=$(echo "$TAGS" | head -n1)
|
|
84
|
+
if [[ -z "$LATEST_TAG" ]]; then
|
|
85
|
+
RESOLVED_VERSION="main"
|
|
86
|
+
else
|
|
87
|
+
RESOLVED_VERSION="$LATEST_TAG"
|
|
88
|
+
fi
|
|
89
|
+
else
|
|
90
|
+
RESOLVED_VERSION="$VERSION_INPUT"
|
|
91
|
+
fi
|
|
92
|
+
echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
|
|
93
|
+
|
|
94
|
+
- name: Checkout vllm-project/vllm-ascend repo
|
|
95
|
+
uses: actions/checkout@v4
|
|
96
|
+
with:
|
|
97
|
+
repository: vllm-project/vllm-ascend
|
|
98
|
+
path: ./vllm-ascend
|
|
99
|
+
ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
|
|
100
|
+
|
|
101
|
+
- name: Install vllm-project/vllm-ascend
|
|
102
|
+
working-directory: ./vllm-ascend
|
|
103
|
+
env:
|
|
104
|
+
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
105
|
+
run: |
|
|
106
|
+
pip install -r requirements-dev.txt
|
|
107
|
+
pip install -v -e .
|
|
108
|
+
|
|
109
|
+
- name: Get vLLM commit hash and URL
|
|
110
|
+
working-directory: ./vllm-empty
|
|
111
|
+
run: |
|
|
112
|
+
VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
|
|
113
|
+
echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
|
|
114
|
+
|
|
115
|
+
- name: Get vLLM-Ascend commit hash and URL
|
|
116
|
+
working-directory: ./vllm-ascend
|
|
117
|
+
run: |
|
|
118
|
+
VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
|
|
119
|
+
echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
|
|
120
|
+
|
|
121
|
+
- name: Collect version info
|
|
122
|
+
run: |
|
|
123
|
+
for dir in /usr/local/Ascend/ascend-toolkit/*; do
|
|
124
|
+
dname=$(basename "$dir")
|
|
125
|
+
if [ "$dname" != "latest" ]; then
|
|
126
|
+
TOOLKIT_DIR="$dname"
|
|
127
|
+
break
|
|
128
|
+
fi
|
|
129
|
+
done
|
|
130
|
+
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
|
|
131
|
+
GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
|
|
132
|
+
| head -n1 \
|
|
133
|
+
| cut -d'=' -f2 \
|
|
134
|
+
| tr -d '"')
|
|
135
|
+
{
|
|
136
|
+
echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
|
|
137
|
+
pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
|
|
138
|
+
pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
|
|
139
|
+
pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
|
|
140
|
+
} >> "$GITHUB_ENV"
|
|
141
|
+
|
|
142
|
+
- name: Run accuracy test
|
|
143
|
+
id: report
|
|
144
|
+
env:
|
|
145
|
+
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
146
|
+
VLLM_USE_MODELSCOPE: True
|
|
147
|
+
VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
|
|
148
|
+
VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
|
|
149
|
+
VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
|
|
150
|
+
VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
|
|
151
|
+
CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
|
|
152
|
+
TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
|
|
153
|
+
TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
|
|
154
|
+
run: |
|
|
155
|
+
model_base_name=$(basename ${{ inputs.model_name }})
|
|
156
|
+
markdown_name="${model_base_name}"
|
|
157
|
+
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
|
|
158
|
+
mkdir -p ./benchmarks/accuracy
|
|
159
|
+
pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
|
|
160
|
+
--config ./tests/e2e/models/configs/${{ inputs.model_name }}.yaml
|
|
161
|
+
|
|
162
|
+
- name: Generate step summary
|
|
163
|
+
if: ${{ always() }}
|
|
164
|
+
run: |
|
|
165
|
+
cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
|
|
166
|
+
|
|
167
|
+
- name: Upload Report
|
|
168
|
+
if: ${{ inputs.upload == true }}
|
|
169
|
+
uses: actions/upload-artifact@v4
|
|
170
|
+
with:
|
|
171
|
+
name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
|
|
172
|
+
path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
|
|
173
|
+
if-no-files-found: warn
|
|
174
|
+
retention-days: 90
|
|
175
|
+
overwrite: true
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
|
+
#
|
|
17
|
+
|
|
18
|
+
name: 'e2e nightly test'
|
|
19
|
+
|
|
20
|
+
on:
|
|
21
|
+
workflow_call:
|
|
22
|
+
inputs:
|
|
23
|
+
vllm:
|
|
24
|
+
required: true
|
|
25
|
+
type: string
|
|
26
|
+
runner:
|
|
27
|
+
required: true
|
|
28
|
+
type: string
|
|
29
|
+
image:
|
|
30
|
+
required: false
|
|
31
|
+
type: string
|
|
32
|
+
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
|
|
33
|
+
tests:
|
|
34
|
+
required: true
|
|
35
|
+
type: string
|
|
36
|
+
|
|
37
|
+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
|
38
|
+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
|
39
|
+
# It's used to activate ascend-toolkit environment variables.
|
|
40
|
+
defaults:
|
|
41
|
+
run:
|
|
42
|
+
shell: bash -el {0}
|
|
43
|
+
|
|
44
|
+
# only cancel in-progress runs of the same workflow
|
|
45
|
+
# and ignore the lint / 1 card / 4 cards test type
|
|
46
|
+
concurrency:
|
|
47
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
48
|
+
cancel-in-progress: true
|
|
49
|
+
|
|
50
|
+
jobs:
|
|
51
|
+
e2e-nightly:
|
|
52
|
+
name: e2e-nightly
|
|
53
|
+
runs-on: ${{ inputs.runner }}
|
|
54
|
+
container:
|
|
55
|
+
image: ${{ inputs.image }}
|
|
56
|
+
env:
|
|
57
|
+
VLLM_USE_MODELSCOPE: True
|
|
58
|
+
steps:
|
|
59
|
+
- name: Check npu and CANN info
|
|
60
|
+
run: |
|
|
61
|
+
npu-smi info
|
|
62
|
+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
63
|
+
|
|
64
|
+
- name: Config mirrors
|
|
65
|
+
run: |
|
|
66
|
+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
|
67
|
+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
|
68
|
+
apt-get update -y
|
|
69
|
+
apt install git -y
|
|
70
|
+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
|
|
71
|
+
|
|
72
|
+
- name: Checkout vllm-project/vllm-ascend repo
|
|
73
|
+
uses: actions/checkout@v4
|
|
74
|
+
|
|
75
|
+
- name: Install system dependencies
|
|
76
|
+
run: |
|
|
77
|
+
apt-get -y install `cat packages.txt`
|
|
78
|
+
apt-get -y install gcc g++ cmake libnuma-dev
|
|
79
|
+
|
|
80
|
+
- name: Checkout vllm-project/vllm repo
|
|
81
|
+
uses: actions/checkout@v4
|
|
82
|
+
with:
|
|
83
|
+
repository: vllm-project/vllm
|
|
84
|
+
ref: ${{ inputs.vllm }}
|
|
85
|
+
path: ./vllm-empty
|
|
86
|
+
|
|
87
|
+
- name: Install vllm-project/vllm from source
|
|
88
|
+
working-directory: ./vllm-empty
|
|
89
|
+
run: |
|
|
90
|
+
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
91
|
+
|
|
92
|
+
- name: Install vllm-project/vllm-ascend
|
|
93
|
+
env:
|
|
94
|
+
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
95
|
+
run: |
|
|
96
|
+
pip install -r requirements-dev.txt
|
|
97
|
+
pip install -v -e .
|
|
98
|
+
|
|
99
|
+
- name: Checkout aisbench repo and Install aisbench
|
|
100
|
+
run: |
|
|
101
|
+
git clone https://gitee.com/aisbench/benchmark.git
|
|
102
|
+
cd benchmark
|
|
103
|
+
git checkout v3.0-20250930-master
|
|
104
|
+
pip3 install -e ./
|
|
105
|
+
pip3 install -r requirements/api.txt
|
|
106
|
+
pip3 install -r requirements/extra.txt
|
|
107
|
+
|
|
108
|
+
- name: Run vllm-project/vllm-ascend test
|
|
109
|
+
env:
|
|
110
|
+
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
111
|
+
VLLM_USE_MODELSCOPE: True
|
|
112
|
+
VLLM_CI_RUNNER: ${{ inputs.runner }}
|
|
113
|
+
run: |
|
|
114
|
+
# TODO: enable more tests
|
|
115
|
+
pytest -sv ${{ inputs.tests }}
|