vllm-ascend 0.9.2rc1__tar.gz → 0.10.1rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vllm_ascend-0.10.1rc1/.gemini/config.yaml +6 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/Dockerfile.buildwheel +1 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +1 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +2 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -1
- vllm_ascend-0.10.1rc1/.github/actionlint.yaml +17 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/format_pr_body.sh +6 -3
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/accuracy_test.yaml +74 -141
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/format_pr_body.yaml +1 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/image_310p_openeuler.yml +16 -7
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/image_310p_ubuntu.yml +13 -4
- vllm_ascend-0.10.1rc1/.github/workflows/image_a3_openeuler.yml +123 -0
- vllm_ascend-0.10.1rc1/.github/workflows/image_a3_ubuntu.yml +119 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/image_openeuler.yml +12 -4
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/image_ubuntu.yml +10 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/nightly_benchmarks.yaml +8 -6
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/pre-commit.yml +1 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/release_code.yml +2 -2
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/release_whl.yml +3 -2
- vllm_ascend-0.10.1rc1/.github/workflows/reminder_comment.yml +26 -0
- vllm_ascend-0.9.2rc1/.github/workflows/vllm_ascend_test_long_term.yaml → vllm_ascend-0.10.1rc1/.github/workflows/vllm_ascend_dist.yaml +22 -25
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/vllm_ascend_doctest.yaml +4 -2
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/vllm_ascend_test.yaml +77 -107
- vllm_ascend-0.10.1rc1/.github/workflows/vllm_ascend_test_310p.yaml +117 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/vllm_ascend_test_pd.yaml +5 -5
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.gitignore +4 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.pre-commit-config.yaml +11 -5
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/CODE_OF_CONDUCT.md +0 -1
- vllm_ascend-0.10.1rc1/CONTRIBUTING.md +3 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/Dockerfile +3 -3
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/Dockerfile.310p +3 -3
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/Dockerfile.310p.openEuler +4 -3
- vllm_ascend-0.10.1rc1/Dockerfile.a3 +60 -0
- vllm_ascend-0.10.1rc1/Dockerfile.a3.openEuler +58 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/Dockerfile.openEuler +4 -3
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/PKG-INFO +17 -6
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/README.md +16 -5
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/README.zh.md +15 -4
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/README.md +44 -35
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/requirements-bench.txt +0 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/scripts/perf_result_template.md +1 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/scripts/run-performance-benchmarks.sh +0 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/codecov.yml +2 -4
- vllm_ascend-0.10.1rc1/csrc/kernels/bgmv_expand.cpp +369 -0
- vllm_ascend-0.10.1rc1/csrc/kernels/bgmv_shrink.cpp +252 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/kernels/pos_encoding_kernels.cpp +0 -5
- vllm_ascend-0.10.1rc1/csrc/kernels/sgmv_expand.cpp +389 -0
- vllm_ascend-0.10.1rc1/csrc/kernels/sgmv_shrink.cpp +275 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/ops.h +64 -12
- vllm_ascend-0.10.1rc1/csrc/torch_binding.cpp +428 -0
- vllm_ascend-0.10.1rc1/csrc/torch_binding_meta.cpp +102 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/utils.h +0 -12
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/Makefile +4 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/README.md +6 -5
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/requirements-docs.txt +1 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/_templates/sections/header.html +1 -1
- vllm_ascend-0.10.1rc1/docs/source/assets/multi_node_dp_deepseek.png +0 -0
- vllm_ascend-0.10.1rc1/docs/source/assets/multi_node_dp_kimi.png +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/community/contributors.md +37 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/community/governance.md +2 -2
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/community/user_stories/llamafactory.md +1 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/community/versioning_policy.md +21 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/conf.py +8 -8
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/contribution/testing.md +5 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/evaluation/accuracy_report/index.md +1 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/evaluation/using_evalscope.md +3 -1
- vllm_ascend-0.10.1rc1/docs/source/developer_guide/evaluation/using_lm_eval.md +300 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/evaluation/using_opencompass.md +4 -1
- vllm_ascend-0.10.1rc1/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +237 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/feature_guide/index.md +1 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/feature_guide/patch.md +8 -5
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/modeling/adding_a_new_model.md +1 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/performance/index.md +1 -0
- vllm_ascend-0.10.1rc1/docs/source/developer_guide/performance/optimization_and_tuning.md +183 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/performance/performance_benchmark.md +7 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/performance/profile_execute_duration.md +2 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/faqs.md +41 -8
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/installation.md +15 -14
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +1647 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +204 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +103 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +87 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +624 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +187 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +237 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +26 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +26 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +112 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +65 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +83 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +33 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +248 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +333 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +29 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +32 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +26 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +88 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +81 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +479 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/index.po +79 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +293 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +149 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +29 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +192 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +62 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +86 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +82 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +71 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +110 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +107 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +77 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +99 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +70 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +290 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +28 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +30 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +121 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +30 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +58 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +183 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +156 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +220 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +1660 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +30 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +264 -0
- vllm_ascend-0.10.1rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +214 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/quick_start.md +14 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/index.md +2 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/multi_node.md +29 -19
- vllm_ascend-0.10.1rc1/docs/source/tutorials/multi_node_kimi.md +153 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/multi_npu_moge.md +109 -3
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/multi_npu_quantization.md +6 -3
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/single_node_300i.md +78 -2
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/single_npu.md +4 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/single_npu_audio.md +2 -2
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/single_npu_multimodal.md +3 -5
- vllm_ascend-0.10.1rc1/docs/source/tutorials/single_npu_qwen3_quantization.md +133 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/configuration/additional_config.md +4 -2
- vllm_ascend-0.10.1rc1/docs/source/user_guide/feature_guide/lora.md +23 -0
- vllm_ascend-0.10.1rc1/docs/source/user_guide/feature_guide/quantization.md +125 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -1
- vllm_ascend-0.10.1rc1/docs/source/user_guide/release_notes.md +624 -0
- vllm_ascend-0.10.1rc1/docs/source/user_guide/support_matrix/supported_features.md +45 -0
- vllm_ascend-0.10.1rc1/docs/source/user_guide/support_matrix/supported_models.md +79 -0
- vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/README.md +246 -0
- vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/gen_ranktable.py +122 -0
- vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/gen_ranktable.sh +79 -0
- vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +546 -0
- vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +165 -0
- vllm_ascend-0.10.1rc1/examples/disaggregated_prefill_v1/run_server.sh +32 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/eplb/eplb_strategy.py +3 -0
- vllm_ascend-0.10.1rc1/examples/external_online_dp/README.md +38 -0
- vllm_ascend-0.10.1rc1/examples/external_online_dp/launch_online_dp.py +97 -0
- vllm_ascend-0.10.1rc1/examples/external_online_dp/run_dp_template.sh +46 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/offline_data_parallel.py +19 -3
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/offline_disaggregated_prefill_npu.py +18 -11
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/offline_dualbatch_overlap_npu.py +3 -2
- vllm_ascend-0.10.1rc1/examples/offline_embed.py +58 -0
- vllm_ascend-0.10.1rc1/examples/offline_external_launcher.py +287 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/offline_inference_audio_language.py +33 -12
- vllm_ascend-0.9.2rc1/examples/offline_inference_npu_v0.py → vllm_ascend-0.10.1rc1/examples/offline_inference_npu.py +28 -21
- vllm_ascend-0.9.2rc1/examples/offline_inference_npu_v1.py → vllm_ascend-0.10.1rc1/examples/offline_inference_npu_tp2.py +6 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/offline_inference_sleep_mode_npu.py +5 -2
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/prompt_embedding_inference.py +5 -0
- vllm_ascend-0.10.1rc1/examples/run_dp_server.sh +32 -0
- vllm_ascend-0.10.1rc1/pyproject.toml +34 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/requirements-dev.txt +7 -2
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/requirements-lint.txt +1 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/requirements.txt +3 -6
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/setup.py +1 -1
- vllm_ascend-0.10.1rc1/tests/e2e/310p/test_offline_inference_310p.py +72 -0
- vllm_ascend-0.10.1rc1/tests/e2e/310p/test_offline_inference_parallel_310p.py +62 -0
- {vllm_ascend-0.9.2rc1/tests → vllm_ascend-0.10.1rc1/tests/e2e}/conftest.py +27 -114
- vllm_ascend-0.10.1rc1/tests/e2e/model_utils.py +74 -0
- vllm_ascend-0.10.1rc1/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +13 -0
- vllm_ascend-0.10.1rc1/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +8 -0
- vllm_ascend-0.10.1rc1/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +18 -0
- vllm_ascend-0.10.1rc1/tests/e2e/models/configs/Qwen3-8B-Base.yaml +13 -0
- vllm_ascend-0.10.1rc1/tests/e2e/models/configs/accuracy.txt +3 -0
- vllm_ascend-0.10.1rc1/tests/e2e/models/conftest.py +72 -0
- vllm_ascend-0.10.1rc1/tests/e2e/models/report_template.md +21 -0
- vllm_ascend-0.10.1rc1/tests/e2e/models/test_lm_eval_correctness.py +153 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_data_parallel.py +3 -2
- vllm_ascend-0.10.1rc1/tests/e2e/multicard/test_expert_parallel.py +32 -0
- vllm_ascend-0.10.1rc1/tests/e2e/multicard/test_external_launcher.py +187 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +19 -15
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_ilama_lora_tp2.py +4 -3
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_offline_inference_distributed.py +48 -65
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_pipeline_parallel.py +7 -4
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_prefix_caching.py +2 -8
- vllm_ascend-0.10.1rc1/tests/e2e/multicard/test_qwen3_moe.py +104 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/multicard/test_torchair_graph_mode.py +84 -21
- vllm_ascend-0.10.1rc1/tests/e2e/pd_disaggreate/run_edge_case_test.sh +141 -0
- vllm_ascend-0.10.1rc1/tests/e2e/pd_disaggreate/test_edge_cases.py +81 -0
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops/test_bgmv_expand.py +46 -0
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops/test_bgmv_shrink.py +45 -0
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops/test_fused_moe.py +284 -0
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops/test_moe_comm.py +175 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/ops/test_rotary_embedding.py +152 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +7 -0
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +76 -0
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +85 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +50 -59
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_aclgraph.py +75 -0
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_ascend_scheduler.py +88 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/test_camem.py +26 -15
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_chunked.py +81 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/test_embedding.py +17 -36
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/test_guided_decoding.py +6 -21
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/test_ilama_lora.py +4 -2
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/test_profile_execute_duration.py +9 -0
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_quantization.py +35 -0
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_sampler.py +49 -0
- vllm_ascend-0.10.1rc1/tests/e2e/singlecard/test_vlm.py +89 -0
- vllm_ascend-0.10.1rc1/tests/e2e/utils.py +106 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/attention/test_attention_mask.py +49 -72
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/attention/test_attention_v1.py +159 -78
- vllm_ascend-0.10.1rc1/tests/ut/attention/test_mla_v1.py +631 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/base.py +18 -5
- vllm_ascend-0.10.1rc1/tests/ut/conftest.py +26 -0
- vllm_ascend-0.10.1rc1/tests/ut/core/test_schedule_config.py +167 -0
- vllm_ascend-0.10.1rc1/tests/ut/core/test_scheduler.py +898 -0
- vllm_ascend-0.10.1rc1/tests/ut/device_allocator/test_camem.py +188 -0
- vllm_ascend-0.10.1rc1/tests/ut/distributed/device_communicators/test_pyhccl.py +84 -0
- vllm_ascend-0.10.1rc1/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +173 -0
- vllm_ascend-0.10.1rc1/tests/ut/distributed/test_communicator.py +89 -0
- vllm_ascend-0.10.1rc1/tests/ut/distributed/test_distributed_tensor_parallel.py +139 -0
- vllm_ascend-0.10.1rc1/tests/ut/distributed/test_parallel_state.py +44 -0
- vllm_ascend-0.10.1rc1/tests/ut/kv_connector/test_llmdatadist_connector.py +96 -0
- vllm_ascend-0.10.1rc1/tests/ut/kv_connector/test_mooncake_connector.py +998 -0
- vllm_ascend-0.10.1rc1/tests/ut/kv_connector/test_remote_decode_lifecycle.py +169 -0
- vllm_ascend-0.10.1rc1/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +239 -0
- vllm_ascend-0.10.1rc1/tests/ut/kv_connector/utils.py +233 -0
- vllm_ascend-0.10.1rc1/tests/ut/models/test_deepseek_mtp.py +195 -0
- vllm_ascend-0.10.1rc1/tests/ut/models/test_deepseek_v2.py +295 -0
- vllm_ascend-0.10.1rc1/tests/ut/models/test_qwen2_5_vl.py +424 -0
- vllm_ascend-0.10.1rc1/tests/ut/models/test_qwen2_5_vl_without_padding.py +422 -0
- vllm_ascend-0.10.1rc1/tests/ut/models/test_qwen2_vl.py +200 -0
- vllm_ascend-0.10.1rc1/tests/ut/models/test_qwen3_moe.py +98 -0
- vllm_ascend-0.10.1rc1/tests/ut/multistream/test_base.py +32 -0
- vllm_ascend-0.10.1rc1/tests/ut/multistream/test_decorator.py +47 -0
- vllm_ascend-0.10.1rc1/tests/ut/multistream/test_layers.py +198 -0
- vllm_ascend-0.10.1rc1/tests/ut/multistream/test_metadata.py +246 -0
- vllm_ascend-0.10.1rc1/tests/ut/multistream/test_ms_split.py +147 -0
- vllm_ascend-0.10.1rc1/tests/ut/ops/test_activation.py +61 -0
- vllm_ascend-0.10.1rc1/tests/ut/ops/test_common_fused_moe.py +69 -0
- vllm_ascend-0.10.1rc1/tests/ut/ops/test_fused_ops.py +741 -0
- vllm_ascend-0.10.1rc1/tests/ut/ops/test_layernorm.py +53 -0
- vllm_ascend-0.10.1rc1/tests/ut/ops/test_linear.py +363 -0
- vllm_ascend-0.10.1rc1/tests/ut/ops/test_rotary_embedding.py +318 -0
- vllm_ascend-0.10.1rc1/tests/ut/ops/test_token_dispatcher.py +606 -0
- vllm_ascend-0.10.1rc1/tests/ut/ops/test_vocab_parallel_embedding.py +232 -0
- vllm_ascend-0.10.1rc1/tests/ut/patch/worker/patch_common/test_patch_distributed.py +112 -0
- vllm_ascend-0.10.1rc1/tests/ut/patch/worker/patch_common/test_patch_linear.py +167 -0
- vllm_ascend-0.10.1rc1/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +77 -0
- vllm_ascend-0.10.1rc1/tests/ut/quantization/test_func_wrapper.py +134 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/quantization/test_quant_config.py +6 -4
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/quantization/test_quantizer.py +23 -0
- vllm_ascend-0.10.1rc1/tests/ut/quantization/test_w4a8_dynamic.py +166 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/quantization/test_w8a8.py +68 -44
- vllm_ascend-0.10.1rc1/tests/ut/sample/test_rejection_sampler.py +203 -0
- vllm_ascend-0.10.1rc1/tests/ut/sample/test_sampler.py +32 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/test_ascend_config.py +173 -79
- vllm_ascend-0.10.1rc1/tests/ut/test_envs.py +62 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/test_platform.py +177 -180
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/test_utils.py +101 -105
- vllm_ascend-0.10.1rc1/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +195 -0
- vllm_ascend-0.10.1rc1/tests/ut/torchair/models/test_torchair_deepseek_v2.py +325 -0
- vllm_ascend-0.10.1rc1/tests/ut/torchair/ops/test_torchair_fused_moe.py +410 -0
- vllm_ascend-0.9.2rc1/tests/ut/ops/test_rotary_embedding.py → vllm_ascend-0.10.1rc1/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +332 -315
- vllm_ascend-0.10.1rc1/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +176 -0
- vllm_ascend-0.10.1rc1/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +75 -0
- vllm_ascend-0.10.1rc1/tests/ut/torchair/test_torchair_mla.py +817 -0
- vllm_ascend-0.10.1rc1/tests/ut/torchair/test_utils.py +149 -0
- vllm_ascend-0.10.1rc1/tests/ut/worker/test_input_batch.py +372 -0
- vllm_ascend-0.10.1rc1/tests/ut/worker/test_worker_v1.py +1143 -0
- vllm_ascend-0.10.1rc1/tools/check_python_src_init.py +76 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/__init__.py +0 -4
- vllm_ascend-0.10.1rc1/vllm_ascend/_version.py +34 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ascend_config.py +215 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ascend_forward_context.py +138 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/attention/attention_mask.py +35 -46
- vllm_ascend-0.10.1rc1/vllm_ascend/attention/attention_v1.py +604 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/attention/mla_v1.py +1050 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/attention/utils.py +95 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/compilation/acl_graph.py +185 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/core/schedule_config.py +10 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/core/scheduler.py +100 -70
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/device_allocator/camem.py +3 -2
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/distributed/__init__.py +5 -4
- vllm_ascend-0.10.1rc1/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +894 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/distributed/moe_comm_method.py +556 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/distributed/mooncake_connector.py +1070 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/distributed/parallel_state.py +119 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/distributed/tensor_parallel.py +248 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/envs.py +44 -33
- vllm_ascend-0.10.1rc1/vllm_ascend/lora/punica_wrapper/lora_ops.py +112 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/lora/punica_wrapper/punica_npu.py +43 -25
- vllm_ascend-0.10.1rc1/vllm_ascend/meta_registration.py +104 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/__init__.py +10 -7
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/deepseek_dbo.py +12 -40
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/deepseek_mtp.py +27 -10
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/deepseek_v2.py +105 -97
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_pyhccl.py → vllm_ascend-0.10.1rc1/vllm_ascend/models/deepseek_v3.py +10 -12
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/pangu_moe.py +18 -35
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/qwen2_5_vl.py +9 -5
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/qwen2_5_vl_without_padding.py +103 -3
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/models/qwen2_vl.py +2 -2
- vllm_ascend-0.10.1rc1/vllm_ascend/models/qwen3.py +156 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/models/qwen3_moe.py +393 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/decorator.py +0 -4
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/ms_split.py +9 -7
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/ops/__init__.py +9 -2
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/ops/activation.py +14 -14
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/ops/attention.py +19 -15
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/comm_utils.py +62 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/common_fused_moe.py +531 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/fused_moe.py +587 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/layernorm.py +85 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers/experts_selector.py +283 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers/moe_mlp.py +199 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/linear.py +309 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py +809 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/rotary_embedding.py +339 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/sequence_parallel.py +120 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/ops/vocab_parallel_embedding.py +254 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/__init__.py +19 -60
- {vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_0_9_2 → vllm_ascend-0.10.1rc1/vllm_ascend/patch/platform}/__init__.py +3 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/platform/patch_common/patch_distributed.py +2 -24
- {vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_main → vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker}/__init__.py +3 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/worker/patch_common/__init__.py +3 -6
- vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker/patch_common/patch_linear.py +147 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker/patch_common/patch_logits.py +26 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/patch/worker/patch_common/patch_lora_embedding.py +29 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/platform.py +114 -76
- vllm_ascend-0.10.1rc1/vllm_ascend/quantization/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/quantization/func_wrapper.py +33 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/quantization/quant_config.py +46 -10
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/quantization/quantizer.py +31 -20
- vllm_ascend-0.10.1rc1/vllm_ascend/quantization/w4a8_dynamic.py +394 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/quantization/w8a8.py +19 -130
- vllm_ascend-0.10.1rc1/vllm_ascend/quantization/w8a8_dynamic.py +453 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/sample/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/sample/rejection_sampler.py +100 -52
- vllm_ascend-0.10.1rc1/vllm_ascend/sample/sampler.py +86 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/__init__.py +0 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/__init__.py +0 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/qwen2.py +364 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/qwen3_moe.py +537 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +218 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/torchair_deepseek_v2.py +1049 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/torchair_deepseek_v3.py +28 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/models/torchair_pangu_moe.py +1119 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/ops/__init__.py +0 -0
- vllm_ascend-0.9.2rc1/vllm_ascend/ops/fused_moe.py → vllm_ascend-0.10.1rc1/vllm_ascend/torchair/ops/torchair_fused_moe.py +1321 -1453
- vllm_ascend-0.9.2rc1/vllm_ascend/ops/rotary_embedding.py → vllm_ascend-0.10.1rc1/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +372 -292
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization/__init__.py +0 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization/torchair_quantizer.py +29 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +439 -0
- vllm_ascend-0.9.2rc1/vllm_ascend/quantization/w8a8_dynamic.py → vllm_ascend-0.10.1rc1/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +1035 -830
- vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention_v1_torchair.py → vllm_ascend-0.10.1rc1/vllm_ascend/torchair/torchair_attention.py +97 -148
- vllm_ascend-0.9.2rc1/vllm_ascend/attention/mla_v1.py → vllm_ascend-0.10.1rc1/vllm_ascend/torchair/torchair_mla.py +395 -301
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/torchair_model_runner.py +446 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/torchair_worker.py +63 -0
- vllm_ascend-0.10.1rc1/vllm_ascend/torchair/utils.py +205 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/utils.py +160 -165
- vllm_ascend-0.10.1rc1/vllm_ascend/worker/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/worker/eagle_proposer_v1.py +31 -19
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/worker/model_runner_v1.py +1203 -691
- vllm_ascend-0.10.1rc1/vllm_ascend/worker/mtp_proposer_v1.py +439 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/worker/npu_input_batch.py +156 -92
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/worker/worker_v1.py +56 -59
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/PKG-INFO +17 -6
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/SOURCES.txt +201 -67
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/requires.txt +3 -4
- vllm_ascend-0.9.2rc1/.github/actionlint.yaml +0 -8
- vllm_ascend-0.9.2rc1/benchmarks/scripts/patch_benchmark_dataset.py +0 -79
- vllm_ascend-0.9.2rc1/benchmarks/scripts/run_accuracy.py +0 -313
- vllm_ascend-0.9.2rc1/csrc/kernels/advance_step.cpp +0 -241
- vllm_ascend-0.9.2rc1/csrc/torch_binding.cpp +0 -320
- vllm_ascend-0.9.2rc1/docs/source/assets/multi_node_dp.png +0 -0
- vllm_ascend-0.9.2rc1/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -62
- vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/lora.md +0 -8
- vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/quantization.md +0 -106
- vllm_ascend-0.9.2rc1/docs/source/user_guide/release_notes.md +0 -310
- vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix/supported_features.md +0 -49
- vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix/supported_models.md +0 -52
- vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/disaggregated_prefill_offline.py +0 -138
- vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/dp_proxy.py +0 -463
- vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/find_device_ips.py +0 -69
- vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py +0 -193
- vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/run_decode_server.sh +0 -37
- vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/run_prefill_server.sh +0 -37
- vllm_ascend-0.9.2rc1/examples/offline_distributed_inference_npu.py +0 -44
- vllm_ascend-0.9.2rc1/examples/offline_embed.py +0 -53
- vllm_ascend-0.9.2rc1/examples/offline_multi_step_custom_ops.py +0 -50
- vllm_ascend-0.9.2rc1/examples/run_dp_attention_etp16.sh +0 -23
- vllm_ascend-0.9.2rc1/examples/run_dp_attention_etp16_benmark.sh +0 -57
- vllm_ascend-0.9.2rc1/examples/run_dp_server.sh +0 -30
- vllm_ascend-0.9.2rc1/pyproject.toml +0 -25
- vllm_ascend-0.9.2rc1/tests/e2e/long_term/accuracy/accuracy_multicard.py +0 -261
- vllm_ascend-0.9.2rc1/tests/e2e/long_term/accuracy/accuracy_singlecard.py +0 -115
- vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_dynamic_npugraph_batchsize.py +0 -57
- vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_pyhccl_distributed.py +0 -110
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/compile/test_simple.py +0 -118
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler.py +0 -728
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py +0 -46
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py +0 -60
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops/test_fused_moe.py +0 -100
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops/test_multi_step.py +0 -190
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/sample/test_rejection_sampler.py +0 -608
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +0 -94
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_aclgraph.py +0 -99
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_chunked.py +0 -74
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_offline_inference.py +0 -129
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_prompt_embedding.py +0 -259
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_sampler.py +0 -109
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_scheduler.py +0 -390
- vllm_ascend-0.9.2rc1/tests/model_utils.py +0 -274
- vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_buffer.py +0 -71
- vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_connector.py +0 -146
- vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_pipe.py +0 -145
- vllm_ascend-0.9.2rc1/tests/ut/distributed/test_parallel_state.py +0 -208
- vllm_ascend-0.9.2rc1/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -27
- vllm_ascend-0.9.2rc1/tests/ut/patch/worker/patch_common/test_patch_sampler.py +0 -46
- vllm_ascend-0.9.2rc1/tests/ut/worker/test_input_batch.py +0 -162
- vllm_ascend-0.9.2rc1/tests/ut/worker/test_pooling_model_runner.py +0 -355
- vllm_ascend-0.9.2rc1/tests/ut/worker/test_worker_v1.py +0 -1
- vllm_ascend-0.9.2rc1/tests/utils.py +0 -236
- vllm_ascend-0.9.2rc1/vllm_ascend/_version.py +0 -21
- vllm_ascend-0.9.2rc1/vllm_ascend/ascend_config.py +0 -171
- vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention.py +0 -1228
- vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention_v1.py +0 -478
- vllm_ascend-0.9.2rc1/vllm_ascend/compilation/piecewise_backend.py +0 -225
- vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer/simple_buffer.py +0 -209
- vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer/simple_connector.py +0 -379
- vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer/simple_pipe.py +0 -209
- vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer/utils.py +0 -40
- vllm_ascend-0.9.2rc1/vllm_ascend/distributed/llmdatadist_connector.py +0 -470
- vllm_ascend-0.9.2rc1/vllm_ascend/distributed/parallel_state.py +0 -77
- vllm_ascend-0.9.2rc1/vllm_ascend/models/qwen3_moe.py +0 -35
- vllm_ascend-0.9.2rc1/vllm_ascend/ops/cache.py +0 -35
- vllm_ascend-0.9.2rc1/vllm_ascend/ops/common_fused_moe.py +0 -112
- vllm_ascend-0.9.2rc1/vllm_ascend/ops/layernorm.py +0 -49
- vllm_ascend-0.9.2rc1/vllm_ascend/ops/vocab_parallel_embedding.py +0 -67
- vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/__init__.py +0 -25
- vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/__init__.py +0 -26
- vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py +0 -91
- vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_common/patch_sampler.py +0 -83
- vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py +0 -157
- vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_common/patch_utils.py +0 -38
- vllm_ascend-0.9.2rc1/vllm_ascend/pool/__init__.py +0 -16
- vllm_ascend-0.9.2rc1/vllm_ascend/pool/metadata.py +0 -32
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/__init__.py +0 -17
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/cache_engine.py +0 -83
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/draft_model_runner.py +0 -320
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/model_runner.py +0 -1607
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/mtp_proposer_v1.py +0 -188
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/multi_step_runner.py +0 -737
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/multi_step_worker.py +0 -194
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/pooling_model_runner.py +0 -186
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/worker.py +0 -579
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/dependabot.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/labeler.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/label_merge_conflict.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/labeler.yml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/matchers/actionlint.json +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/matchers/mypy.json +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/workflows/matchers/ruff.json +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.readthedocs.yaml +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/CMakeLists.txt +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/DCO +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/LICENSE +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/tests/latency-tests.json +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/tests/serving-tests.json +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/benchmarks/tests/throughput-tests.json +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/cmake/utils.cmake +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/collect_env.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/camem_allocator.cpp +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/kernels/types.h +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/csrc/kernels/utils.h +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/requirements-test.txt +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/community/user_stories/index.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/contribution/index.md +1 -1
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/evaluation/index.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/developer_guide/modeling/index.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/index.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/multi_npu.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/configuration/env_vars.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/configuration/index.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/feature_guide/index.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/docs/source/user_guide/support_matrix/index.md +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/examples/eplb/eplb_deepseek.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/format.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/mypy.ini +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/packages.txt +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/setup.cfg +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1/tests/e2e/singlecard → vllm_ascend-0.10.1rc1/tests/e2e}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/common.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/prompts/example.txt +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/run_disagg_pd.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/run_doctests.sh +0 -0
- {vllm_ascend-0.9.2rc1/tests/e2e/singlecard/compile → vllm_ascend-0.10.1rc1/tests/e2e/singlecard}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core → vllm_ascend-0.10.1rc1/tests/e2e/singlecard/ops}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
- {vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops → vllm_ascend-0.10.1rc1/tests/ut}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/fake_weight/config.json +0 -0
- {vllm_ascend-0.9.2rc1/tests/e2e/singlecard/sample → vllm_ascend-0.10.1rc1/tests/ut/models}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/ops/expert_map.json +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tests/ut/ops/test_expert_load_balancer.py +0 -0
- {vllm_ascend-0.9.2rc1/vllm_ascend/attention → vllm_ascend-0.10.1rc1/tests/ut/torchair}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/actionlint.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/check_repo.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/enforce_regex_import.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/mypy.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/png-lint.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/shellcheck.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/tools/sphinx-lint.sh +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/typos.toml +0 -0
- {vllm_ascend-0.9.2rc1/vllm_ascend/compilation → vllm_ascend-0.10.1rc1/vllm_ascend/attention}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1/vllm_ascend/core → vllm_ascend-0.10.1rc1/vllm_ascend/compilation}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1/vllm_ascend/device_allocator → vllm_ascend-0.10.1rc1/vllm_ascend/core}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1/vllm_ascend/distributed/device_communicators → vllm_ascend-0.10.1rc1/vllm_ascend/device_allocator}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/distributed/communication_op.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/distributed/communicator.py +0 -0
- {vllm_ascend-0.9.2rc1/vllm_ascend/distributed/kv_transfer → vllm_ascend-0.10.1rc1/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/lora/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/lora/punica_wrapper/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/base.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/context.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/layers.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/multistream/metadata.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/ops/expert_load_balancer.py +0 -0
- {vllm_ascend-0.9.2rc1/vllm_ascend/quantization → vllm_ascend-0.10.1rc1/vllm_ascend/ops/layers}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1/vllm_ascend/sample → vllm_ascend-0.10.1rc1/vllm_ascend/ops/moe_dispatcher}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/platform/patch_common/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_0_9_2 → vllm_ascend-0.10.1rc1/vllm_ascend/patch/platform/patch_main}/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
- {vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
# https://developers.google.com/gemini-code-assist/docs/customize-gemini-behavior-github
|
|
2
|
+
have_fun: false # Just review the code
|
|
3
|
+
code_review:
|
|
4
|
+
comment_severity_threshold: HIGH # Reduce quantity of comments
|
|
5
|
+
pull_request_opened:
|
|
6
|
+
summary: false # Don't summarize the PR in a separate comment
|
{vllm_ascend-0.9.2rc1 → vllm_ascend-0.10.1rc1}/.github/ISSUE_TEMPLATE/900-release-checklist.yml
RENAMED
|
@@ -30,6 +30,8 @@ body:
|
|
|
30
30
|
|
|
31
31
|
- [ ] Add release note to docs/source/user_guide/release_notes.md
|
|
32
32
|
|
|
33
|
+
- [ ] Update release version in README.md and README.zh.md
|
|
34
|
+
|
|
33
35
|
- [ ] Update version info in docs/source/community/versioning_policy.md
|
|
34
36
|
|
|
35
37
|
- [ ] Update contributor info in docs/source/community/contributors.md
|
|
@@ -25,4 +25,3 @@ CI passed with new added/existing test.
|
|
|
25
25
|
If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
|
|
26
26
|
If tests were not added, please describe why they were not added and/or why it was difficult to add.
|
|
27
27
|
-->
|
|
28
|
-
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
self-hosted-runner:
|
|
2
|
+
# Labels of self-hosted runner in array of strings.
|
|
3
|
+
labels:
|
|
4
|
+
- linux-aarch64-a2-0
|
|
5
|
+
- linux-aarch64-a2-1
|
|
6
|
+
- linux-aarch64-a2-2
|
|
7
|
+
- linux-aarch64-a2-4
|
|
8
|
+
- linux-aarch64-a2-8
|
|
9
|
+
- linux-arm64-npu-static-8
|
|
10
|
+
- linux-aarch64-310p-1
|
|
11
|
+
- linux-aarch64-310p-2
|
|
12
|
+
- linux-aarch64-310p-4
|
|
13
|
+
- ubuntu-24.04-arm
|
|
14
|
+
- linux-aarch64-a3-1
|
|
15
|
+
- linux-aarch64-a3-2
|
|
16
|
+
- linux-aarch64-a3-4
|
|
17
|
+
- linux-aarch64-a3-8
|
|
@@ -30,6 +30,7 @@ VLLM_VERSION=$2
|
|
|
30
30
|
VLLM_COMMIT=$3
|
|
31
31
|
OLD=/tmp/orig_pr_body.txt
|
|
32
32
|
NEW=/tmp/new_pr_body.txt
|
|
33
|
+
FINAL=/tmp/final_pr_body.txt
|
|
33
34
|
|
|
34
35
|
gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
|
|
35
36
|
cp "${OLD}" "${NEW}"
|
|
@@ -41,16 +42,18 @@ sed -i '/- vLLM .*$/d' "${NEW}"
|
|
|
41
42
|
echo ""
|
|
42
43
|
echo "- vLLM version: $VLLM_VERSION"
|
|
43
44
|
echo "- vLLM main: $VLLM_COMMIT"
|
|
44
|
-
echo ""
|
|
45
45
|
} >> "${NEW}"
|
|
46
46
|
|
|
47
|
+
# Remove redundant empty lines
|
|
48
|
+
uniq "${NEW}" > "${FINAL}"
|
|
49
|
+
|
|
47
50
|
# Run this only if ${NEW} is different than ${OLD}
|
|
48
|
-
if ! cmp -s "${OLD}" "${
|
|
51
|
+
if ! cmp -s "${OLD}" "${FINAL}"; then
|
|
49
52
|
echo
|
|
50
53
|
echo "Updating PR body:"
|
|
51
54
|
echo
|
|
52
55
|
cat "${NEW}"
|
|
53
|
-
gh pr edit --body-file "${
|
|
56
|
+
gh pr edit --body-file "${FINAL}" "${PR_NUMBER}"
|
|
54
57
|
else
|
|
55
58
|
echo "No changes needed"
|
|
56
59
|
fi
|
|
@@ -29,35 +29,15 @@ on:
|
|
|
29
29
|
types: [ labeled ]
|
|
30
30
|
workflow_dispatch:
|
|
31
31
|
inputs:
|
|
32
|
-
vllm-version:
|
|
33
|
-
description: 'vllm version:'
|
|
34
|
-
required: true
|
|
35
|
-
type: choice
|
|
36
|
-
# Please also update this when bump matched version
|
|
37
|
-
# Current supported vLLM versions
|
|
38
|
-
options:
|
|
39
|
-
- main
|
|
40
|
-
- v0.9.2
|
|
41
|
-
- v0.9.1
|
|
42
|
-
- v0.7.3
|
|
43
32
|
vllm-ascend-version:
|
|
44
|
-
description: 'vllm-ascend
|
|
33
|
+
description: 'vllm-ascend:'
|
|
45
34
|
required: true
|
|
46
35
|
type: choice
|
|
36
|
+
# Current supported vLLM versions
|
|
47
37
|
options:
|
|
38
|
+
- latest
|
|
48
39
|
- main
|
|
49
|
-
|
|
50
|
-
- v0.7.3-dev
|
|
51
|
-
models:
|
|
52
|
-
description: 'model:'
|
|
53
|
-
required: true
|
|
54
|
-
type: choice
|
|
55
|
-
options:
|
|
56
|
-
- all
|
|
57
|
-
- Qwen/Qwen2.5-VL-7B-Instruct
|
|
58
|
-
- Qwen/Qwen3-8B-Base
|
|
59
|
-
- Qwen/Qwen3-30B-A3B
|
|
60
|
-
default: 'all'
|
|
40
|
+
default: main
|
|
61
41
|
|
|
62
42
|
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
|
63
43
|
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
|
@@ -76,58 +56,29 @@ jobs:
|
|
|
76
56
|
# test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
|
|
77
57
|
if: >-
|
|
78
58
|
${{
|
|
79
|
-
|
|
80
|
-
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
|
|
81
|
-
contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') ||
|
|
82
|
-
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
|
|
59
|
+
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
|
|
83
60
|
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
|
|
84
61
|
github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
|
|
85
62
|
}}
|
|
86
|
-
runs-on:
|
|
87
|
-
${{
|
|
88
|
-
(matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-arm64-npu-4') ||
|
|
89
|
-
'linux-arm64-npu-2'
|
|
90
|
-
}}
|
|
63
|
+
runs-on: ${{ matrix.runner }}
|
|
91
64
|
strategy:
|
|
92
65
|
matrix:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
model_name: ${{ fromJSON(
|
|
103
|
-
(github.event_name == 'schedule' &&
|
|
104
|
-
'["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
|
|
105
|
-
(github.event.inputs.models == 'all' &&
|
|
106
|
-
'["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
|
|
107
|
-
(github.event.inputs.models == 'Qwen/Qwen3-30B-A3B' &&
|
|
108
|
-
'["Qwen/Qwen3-30B-A3B"]') ||
|
|
109
|
-
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
|
|
110
|
-
'["Qwen/Qwen2.5-VL-7B-Instruct"]') ||
|
|
111
|
-
(github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
|
|
112
|
-
'["Qwen/Qwen3-8B-Base"]') ||
|
|
113
|
-
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
|
|
114
|
-
'["Qwen/Qwen3-8B-Base","Qwen/Qwen2.5-VL-7B-Instruct", "Qwen/Qwen3-30B-A3B"]' ||
|
|
115
|
-
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
|
|
116
|
-
'["Qwen/Qwen3-8B-Base"]' ||
|
|
117
|
-
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
|
|
118
|
-
'["Qwen/Qwen2.5-VL-7B-Instruct"]' ||
|
|
119
|
-
contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') &&
|
|
120
|
-
'["Qwen/Qwen3-30B-A3B"]'
|
|
121
|
-
) }}
|
|
122
|
-
|
|
66
|
+
include:
|
|
67
|
+
- model_name: Qwen3-8B-Base
|
|
68
|
+
runner: linux-aarch64-a2-1
|
|
69
|
+
- model_name: Qwen2.5-VL-7B-Instruct
|
|
70
|
+
runner: linux-aarch64-a2-1
|
|
71
|
+
- model_name: Qwen3-30B-A3B
|
|
72
|
+
runner: linux-aarch64-a2-2
|
|
73
|
+
- model_name: DeepSeek-V2-Lite
|
|
74
|
+
runner: linux-aarch64-a2-2
|
|
123
75
|
fail-fast: false
|
|
76
|
+
|
|
124
77
|
name: ${{ matrix.model_name }} accuracy
|
|
125
78
|
container:
|
|
126
|
-
image:
|
|
79
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
|
127
80
|
env:
|
|
128
|
-
DATASET_SOURCE: ModelScope
|
|
129
81
|
VLLM_USE_MODELSCOPE: True
|
|
130
|
-
USE_MODELSCOPE_HUB: 1
|
|
131
82
|
# 1. If version specified (work_dispatch), do specified branch accuracy test
|
|
132
83
|
# 2. If no version (labeled PR), do accuracy test by default ref:
|
|
133
84
|
# The branch, tag or SHA to checkout. When checking out the repository that
|
|
@@ -139,18 +90,18 @@ jobs:
|
|
|
139
90
|
- name: Checkout repository
|
|
140
91
|
uses: actions/checkout@v4
|
|
141
92
|
|
|
142
|
-
- name:
|
|
93
|
+
- name: Set model name as output
|
|
94
|
+
id: set_output
|
|
143
95
|
run: |
|
|
144
|
-
|
|
145
|
-
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
96
|
+
echo "model_name=${{ matrix.model_name }}" >> $GITHUB_OUTPUT
|
|
146
97
|
|
|
147
98
|
- name: Config mirrors
|
|
148
99
|
run: |
|
|
149
|
-
sed -
|
|
150
|
-
pip config set global.index-url
|
|
100
|
+
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
101
|
+
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
102
|
+
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
151
103
|
apt-get update -y
|
|
152
104
|
apt install git -y
|
|
153
|
-
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
|
|
154
105
|
|
|
155
106
|
- name: Install system dependencies
|
|
156
107
|
run: |
|
|
@@ -161,13 +112,30 @@ jobs:
|
|
|
161
112
|
uses: actions/checkout@v4
|
|
162
113
|
with:
|
|
163
114
|
repository: vllm-project/vllm
|
|
115
|
+
ref: v0.10.1.1
|
|
164
116
|
path: ./vllm-empty
|
|
165
|
-
# Please also update this when bump matched version
|
|
166
|
-
ref: ${{ github.event.inputs.vllm-version || 'v0.9.2' }}
|
|
167
117
|
|
|
168
118
|
- name: Install vllm-project/vllm from source
|
|
169
119
|
working-directory: ./vllm-empty
|
|
170
|
-
run:
|
|
120
|
+
run: |
|
|
121
|
+
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
122
|
+
|
|
123
|
+
- name: Resolve vllm-ascend version
|
|
124
|
+
run: |
|
|
125
|
+
VERSION_INPUT="${{ github.event.inputs.vllm-ascend-version }}"
|
|
126
|
+
|
|
127
|
+
if [[ "$VERSION_INPUT" == "latest" ]]; then
|
|
128
|
+
TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
|
|
129
|
+
LATEST_TAG=$(echo "$TAGS" | head -n1)
|
|
130
|
+
if [[ -z "$LATEST_TAG" ]]; then
|
|
131
|
+
RESOLVED_VERSION="main"
|
|
132
|
+
else
|
|
133
|
+
RESOLVED_VERSION="$LATEST_TAG"
|
|
134
|
+
fi
|
|
135
|
+
else
|
|
136
|
+
RESOLVED_VERSION="$VERSION_INPUT"
|
|
137
|
+
fi
|
|
138
|
+
echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
|
|
171
139
|
|
|
172
140
|
- name: Checkout vllm-project/vllm-ascend repo
|
|
173
141
|
uses: actions/checkout@v4
|
|
@@ -182,8 +150,8 @@ jobs:
|
|
|
182
150
|
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
183
151
|
run: |
|
|
184
152
|
pip install -r requirements-dev.txt
|
|
185
|
-
pip install -v -e .
|
|
186
|
-
|
|
153
|
+
pip install -v -e .
|
|
154
|
+
|
|
187
155
|
- name: Get vLLM commit hash and URL
|
|
188
156
|
working-directory: ./vllm-empty
|
|
189
157
|
run: |
|
|
@@ -196,15 +164,6 @@ jobs:
|
|
|
196
164
|
VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
|
|
197
165
|
echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
|
|
198
166
|
|
|
199
|
-
- name: Print resolved hashes
|
|
200
|
-
run: |
|
|
201
|
-
echo "vLLM : ${{ env.VLLM_COMMIT }}"
|
|
202
|
-
echo "vLLM-Ascend: ${{ env.VLLM_ASCEND_COMMIT }}"
|
|
203
|
-
|
|
204
|
-
- name: Install lm-eval, ray, and datasets
|
|
205
|
-
run: |
|
|
206
|
-
pip install lm-eval==0.4.8
|
|
207
|
-
|
|
208
167
|
- name: Collect version info
|
|
209
168
|
run: |
|
|
210
169
|
for dir in /usr/local/Ascend/ascend-toolkit/*; do
|
|
@@ -224,39 +183,27 @@ jobs:
|
|
|
224
183
|
pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
|
|
225
184
|
pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
|
|
226
185
|
pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
|
|
227
|
-
echo "GHA_VLLM_ASCEND_VERSION=${{ github.event.inputs.vllm-ascend-version || github.ref }}"
|
|
228
186
|
} >> "$GITHUB_ENV"
|
|
229
|
-
|
|
230
|
-
- name: Print versions
|
|
231
|
-
run: |
|
|
232
|
-
echo "CANN: ${{ env.GHA_CANN_VERSION }}"
|
|
233
|
-
echo "Torch NPU: ${{ env.GHA_TORCH_NPU_VERSION }}"
|
|
234
|
-
echo "Torch: ${{ env.GHA_TORCH_VERSION }}"
|
|
235
|
-
echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
|
|
236
|
-
echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION }}"
|
|
237
187
|
|
|
238
|
-
- name: Run
|
|
188
|
+
- name: Run accuracy test
|
|
239
189
|
id: report
|
|
240
|
-
working-directory: ./benchmarks
|
|
241
190
|
env:
|
|
242
|
-
|
|
191
|
+
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
192
|
+
VLLM_USE_MODELSCOPE: True
|
|
193
|
+
VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
|
|
194
|
+
VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
|
|
195
|
+
VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
|
|
196
|
+
VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
|
|
197
|
+
CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
|
|
198
|
+
TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
|
|
199
|
+
TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
|
|
243
200
|
run: |
|
|
244
201
|
model_base_name=$(basename ${{ matrix.model_name }})
|
|
245
202
|
markdown_name="${model_base_name}"
|
|
246
|
-
echo "markdown_name=$markdown_name"
|
|
247
203
|
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
|
|
248
|
-
mkdir -p ./accuracy
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
--model "${{ matrix.model_name }}" \
|
|
252
|
-
--output "./accuracy/${markdown_name}.md" \
|
|
253
|
-
--vllm_ascend_version "${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}" \
|
|
254
|
-
--cann_version "${{ env.GHA_CANN_VERSION }}" \
|
|
255
|
-
--torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
|
|
256
|
-
--torch_version "${{ env.GHA_TORCH_VERSION }}" \
|
|
257
|
-
--vllm_version "${{ env.GHA_VLLM_VERSION }}" \
|
|
258
|
-
--vllm_commit "${{ env.VLLM_COMMIT }}" \
|
|
259
|
-
--vllm_ascend_commit "${{ env.VLLM_ASCEND_COMMIT }}" \
|
|
204
|
+
mkdir -p ./benchmarks/accuracy
|
|
205
|
+
pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
|
|
206
|
+
--config ./tests/e2e/models/configs/${{ matrix.model_name }}.yaml
|
|
260
207
|
|
|
261
208
|
- name: Generate step summary
|
|
262
209
|
if: ${{ always() }}
|
|
@@ -268,19 +215,7 @@ jobs:
|
|
|
268
215
|
SAFE_VLLM_ASCEND_VERSION="${GHA_VLLM_ASCEND_VERSION//\//-}"
|
|
269
216
|
echo "SAFE_VLLM_ASCEND_VERSION=$SAFE_VLLM_ASCEND_VERSION" >> "$GITHUB_ENV"
|
|
270
217
|
|
|
271
|
-
- name: Check report first line for failure
|
|
272
|
-
id: check_report
|
|
273
|
-
run: |
|
|
274
|
-
REPORT_PATH="./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md"
|
|
275
|
-
echo "Scanning $REPORT_PATH for ❌ …"
|
|
276
|
-
if grep -q '❌' "$REPORT_PATH"; then
|
|
277
|
-
echo "contains_fail=true" >> $GITHUB_OUTPUT
|
|
278
|
-
else
|
|
279
|
-
echo "contains_fail=false" >> $GITHUB_OUTPUT
|
|
280
|
-
fi
|
|
281
|
-
|
|
282
218
|
- name: Upload Report
|
|
283
|
-
if: ${{ github.event_name == 'workflow_dispatch' && steps.check_report.outputs.contains_fail == 'false' }}
|
|
284
219
|
uses: actions/upload-artifact@v4
|
|
285
220
|
with:
|
|
286
221
|
name: "report-${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
|
|
@@ -289,10 +224,14 @@ jobs:
|
|
|
289
224
|
retention-days: 90
|
|
290
225
|
overwrite: true
|
|
291
226
|
|
|
227
|
+
outputs:
|
|
228
|
+
model_name: ${{ steps.set_output.outputs.model_name }}
|
|
229
|
+
vllm_ascend_version: ${{ env.GHA_VLLM_ASCEND_VERSION }}
|
|
230
|
+
|
|
292
231
|
create_pr:
|
|
293
232
|
runs-on: ubuntu-latest
|
|
294
233
|
needs: accuracy_tests
|
|
295
|
-
if: ${{ github.event_name == 'workflow_dispatch' }}
|
|
234
|
+
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
|
|
296
235
|
env:
|
|
297
236
|
UPSTREAM_REPO: vllm-project/vllm-ascend
|
|
298
237
|
steps:
|
|
@@ -302,7 +241,7 @@ jobs:
|
|
|
302
241
|
repository: vllm-ascend-ci/vllm-ascend
|
|
303
242
|
token: ${{ secrets.PAT_TOKEN }}
|
|
304
243
|
ref: main
|
|
305
|
-
|
|
244
|
+
|
|
306
245
|
- name: Add upstream remote
|
|
307
246
|
run: |
|
|
308
247
|
git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
|
|
@@ -319,10 +258,10 @@ jobs:
|
|
|
319
258
|
TIMESTAMP=$(date +%Y%m%d%H%M%S)
|
|
320
259
|
BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
|
|
321
260
|
echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
|
|
322
|
-
git checkout -B "${BRANCH_NAME}" upstream
|
|
261
|
+
git checkout -B "${BRANCH_NAME}" upstream/main
|
|
323
262
|
|
|
324
263
|
- name: Download only current run reports
|
|
325
|
-
uses: actions/download-artifact@
|
|
264
|
+
uses: actions/download-artifact@v5
|
|
326
265
|
with:
|
|
327
266
|
path: ./docs/source/developer_guide/evaluation/accuracy_report
|
|
328
267
|
pattern: report-*
|
|
@@ -334,7 +273,7 @@ jobs:
|
|
|
334
273
|
find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
|
|
335
274
|
find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
|
|
336
275
|
find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
|
|
337
|
-
|
|
276
|
+
|
|
338
277
|
- name: Update accuracy_report/index.md
|
|
339
278
|
run: |
|
|
340
279
|
REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
|
|
@@ -360,7 +299,7 @@ jobs:
|
|
|
360
299
|
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
|
|
361
300
|
run: |
|
|
362
301
|
git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
|
|
363
|
-
git commit -s -m "[Doc] Update accuracy reports for ${{
|
|
302
|
+
git commit -s -m "[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}"
|
|
364
303
|
git push -f origin "${{ env.BRANCH_NAME }}"
|
|
365
304
|
|
|
366
305
|
- name: Create PR in upstream via API
|
|
@@ -372,18 +311,12 @@ jobs:
|
|
|
372
311
|
owner: 'vllm-project',
|
|
373
312
|
repo: 'vllm-ascend',
|
|
374
313
|
head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
|
|
375
|
-
base: '
|
|
376
|
-
title: `[Doc] Update accuracy reports for ${{
|
|
377
|
-
body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for:
|
|
378
|
-
${{
|
|
379
|
-
github.event.inputs.models == 'all'
|
|
380
|
-
&& 'All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)'
|
|
381
|
-
|| github.event.inputs.models
|
|
382
|
-
}}
|
|
383
|
-
|
|
384
|
-
- [Workflow run][1]
|
|
314
|
+
base: 'main',
|
|
315
|
+
title: `[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}`,
|
|
316
|
+
body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base, DeepSeek-V2-Lite)
|
|
385
317
|
|
|
386
|
-
|
|
318
|
+
- [Workflow run][1]
|
|
319
|
+
|
|
320
|
+
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
|
|
387
321
|
});
|
|
388
322
|
core.info(`Created PR #${pr.data.number}`);
|
|
389
|
-
|
|
@@ -46,7 +46,7 @@ jobs:
|
|
|
46
46
|
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
|
47
47
|
|
|
48
48
|
- name: Checkout repository
|
|
49
|
-
uses: actions/checkout@
|
|
49
|
+
uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
|
|
50
50
|
|
|
51
51
|
- name: Set up Python
|
|
52
52
|
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
|
|
@@ -6,10 +6,10 @@ name: 'image / openEuler / 310p'
|
|
|
6
6
|
# - push: ${{ github.event_name != 'pull_request' }} ==> false
|
|
7
7
|
# 2. branches push trigger image publish
|
|
8
8
|
# - is for branch/dev/nightly image
|
|
9
|
-
# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev
|
|
9
|
+
# - commits are merge into main/*-dev ==> vllm-ascend:main-310p-openeuler / vllm-ascend:*-dev-310p-openeuler
|
|
10
10
|
# 3. tags push trigger image publish
|
|
11
11
|
# - is for final release image
|
|
12
|
-
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-openeuler
|
|
12
|
+
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-310p-openeuler / vllm-ascend:v1.2.3rc1-310p-openeuler
|
|
13
13
|
on:
|
|
14
14
|
pull_request:
|
|
15
15
|
branches:
|
|
@@ -33,9 +33,15 @@ on:
|
|
|
33
33
|
tags:
|
|
34
34
|
- 'v*'
|
|
35
35
|
paths:
|
|
36
|
-
- '.github/workflows/
|
|
36
|
+
- '.github/workflows/image_310p_openeuler.yml'
|
|
37
37
|
- 'Dockerfile.310p.openEuler'
|
|
38
38
|
- 'vllm_ascend/**'
|
|
39
|
+
- 'setup.py'
|
|
40
|
+
- 'pyproject.toml'
|
|
41
|
+
- 'requirements.txt'
|
|
42
|
+
- 'cmake/**'
|
|
43
|
+
- 'CMakeLists.txt'
|
|
44
|
+
- 'csrc/**'
|
|
39
45
|
|
|
40
46
|
jobs:
|
|
41
47
|
build:
|
|
@@ -63,16 +69,18 @@ jobs:
|
|
|
63
69
|
# Note for test case
|
|
64
70
|
# https://github.com/marketplace/actions/docker-metadata-action#typeref
|
|
65
71
|
# 1. branch job pulish per main/*-dev branch commits
|
|
66
|
-
# 2. main and dev pull_request is build only, so the tag pr-N-openeuler is fine
|
|
72
|
+
# 2. main and dev pull_request is build only, so the tag pr-N-310p-openeuler is fine
|
|
67
73
|
# 3. only pep440 matched tag will be published:
|
|
68
|
-
# - v0.7.1 --> v0.7.1-openeuler
|
|
69
|
-
# - pre/post/dev: v0.7.1rc1-openeuler/v0.7.1rc1-openeuler/v0.7.1rc1.dev1-openeuler/v0.7.1.post1-openeuler, no latest
|
|
74
|
+
# - v0.7.1 --> v0.7.1-310p-openeuler
|
|
75
|
+
# - pre/post/dev: v0.7.1rc1-310p-openeuler/v0.7.1rc1-310p-openeuler/v0.7.1rc1.dev1-310p-openeuler/v0.7.1.post1-310p-openeuler, no latest
|
|
70
76
|
# which follow the rule from vLLM with prefix v
|
|
71
77
|
# TODO(yikun): the post release might be considered as latest release
|
|
72
78
|
tags: |
|
|
73
79
|
type=ref,event=branch,suffix=-310p-openeuler
|
|
74
|
-
type=ref,event=pr,suffix=-openeuler
|
|
80
|
+
type=ref,event=pr,suffix=-310p-openeuler
|
|
75
81
|
type=pep440,pattern={{raw}},suffix=-310p-openeuler
|
|
82
|
+
flavor:
|
|
83
|
+
latest=false
|
|
76
84
|
|
|
77
85
|
- name: Free up disk space
|
|
78
86
|
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
|
@@ -112,3 +120,4 @@ jobs:
|
|
|
112
120
|
file: Dockerfile.310p.openEuler
|
|
113
121
|
build-args: |
|
|
114
122
|
PIP_INDEX_URL=https://pypi.org/simple
|
|
123
|
+
provenance: false
|
|
@@ -6,10 +6,10 @@ name: 'image / Ubuntu / 310p'
|
|
|
6
6
|
# - push: ${{ github.event_name != 'pull_request' }} ==> false
|
|
7
7
|
# 2. branches push trigger image publish
|
|
8
8
|
# - is for branch/dev/nightly image
|
|
9
|
-
# - commits are merge into main/*-dev ==> vllm-ascend:main / vllm-ascend:*-dev
|
|
9
|
+
# - commits are merge into main/*-dev ==> vllm-ascend:main-310p / vllm-ascend:*-dev-310p
|
|
10
10
|
# 3. tags push trigger image publish
|
|
11
11
|
# - is for final release image
|
|
12
|
-
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3
|
|
12
|
+
# - Publish when tag with v* (pep440 version) ===> vllm-ascend:v1.2.3-310p / vllm-ascend:v1.2.3rc1-310p
|
|
13
13
|
on:
|
|
14
14
|
pull_request:
|
|
15
15
|
branches:
|
|
@@ -36,6 +36,12 @@ on:
|
|
|
36
36
|
- '.github/workflows/image_310p_ubuntu.yml'
|
|
37
37
|
- 'Dockerfile.310p'
|
|
38
38
|
- 'vllm_ascend/**'
|
|
39
|
+
- 'setup.py'
|
|
40
|
+
- 'pyproject.toml'
|
|
41
|
+
- 'requirements.txt'
|
|
42
|
+
- 'cmake/**'
|
|
43
|
+
- 'CMakeLists.txt'
|
|
44
|
+
- 'csrc/**'
|
|
39
45
|
jobs:
|
|
40
46
|
|
|
41
47
|
build:
|
|
@@ -61,14 +67,16 @@ jobs:
|
|
|
61
67
|
# 1. branch job pulish per main/*-dev branch commits
|
|
62
68
|
# 2. main and dev pull_request is build only, so the tag pr-N is fine
|
|
63
69
|
# 3. only pep440 matched tag will be published:
|
|
64
|
-
# - v0.7.1 --> v0.7.1
|
|
65
|
-
# - pre/post/dev: v0.7.1rc1/v0.7.1rc1/v0.7.1rc1.dev1/v0.7.1.post1, no latest
|
|
70
|
+
# - v0.7.1 --> v0.7.1-310p
|
|
71
|
+
# - pre/post/dev: v0.7.1rc1-310p/v0.7.1rc1-310p/v0.7.1rc1.dev1-310p/v0.7.1.post1-310p, no latest
|
|
66
72
|
# which follow the rule from vLLM with prefix v
|
|
67
73
|
# TODO(yikun): the post release might be considered as latest release
|
|
68
74
|
tags: |
|
|
69
75
|
type=ref,event=branch,suffix=-310p
|
|
70
76
|
type=ref,event=pr,suffix=-310p
|
|
71
77
|
type=pep440,pattern={{raw}},suffix=-310p
|
|
78
|
+
flavor:
|
|
79
|
+
latest=false
|
|
72
80
|
|
|
73
81
|
- name: Free up disk space
|
|
74
82
|
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
|
@@ -108,3 +116,4 @@ jobs:
|
|
|
108
116
|
tags: ${{ steps.meta.outputs.tags }}
|
|
109
117
|
build-args: |
|
|
110
118
|
PIP_INDEX_URL=https://pypi.org/simple
|
|
119
|
+
provenance: false
|