vllm-ascend 0.9.0rc2__tar.gz → 0.9.2rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/Dockerfile.buildwheel +5 -8
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/110-user-story.yml +1 -1
- vllm_ascend-0.9.2rc1/.github/ISSUE_TEMPLATE/900-release-checklist.yml +100 -0
- vllm_ascend-0.9.2rc1/.github/format_pr_body.sh +56 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/accuracy_test.yaml +167 -33
- vllm_ascend-0.9.2rc1/.github/workflows/format_pr_body.yaml +63 -0
- vllm_ascend-0.9.2rc1/.github/workflows/image_310p_openeuler.yml +114 -0
- vllm_ascend-0.9.2rc1/.github/workflows/image_310p_ubuntu.yml +110 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/image_openeuler.yml +21 -6
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/image_ubuntu.yml +16 -4
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/nightly_benchmarks.yaml +30 -19
- vllm_ascend-0.9.2rc1/.github/workflows/pre-commit.yml +37 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/release_code.yml +0 -12
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/release_whl.yml +43 -20
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/vllm_ascend_doctest.yaml +8 -25
- vllm_ascend-0.9.2rc1/.github/workflows/vllm_ascend_test.yaml +326 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/vllm_ascend_test_long_term.yaml +7 -10
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/vllm_ascend_test_pd.yaml +8 -3
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.gitignore +2 -0
- vllm_ascend-0.9.2rc1/.pre-commit-config.yaml +141 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/CMakeLists.txt +0 -2
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/Dockerfile +3 -2
- vllm_ascend-0.9.2rc1/Dockerfile.310p +61 -0
- vllm_ascend-0.9.2rc1/Dockerfile.310p.openEuler +58 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/Dockerfile.openEuler +3 -2
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/PKG-INFO +6 -5
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/README.md +5 -4
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/README.zh.md +5 -4
- vllm_ascend-0.9.2rc1/benchmarks/README.md +166 -0
- vllm_ascend-0.9.2rc1/benchmarks/ops/ben_vocabparallelembedding.py +158 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/convert_json_to_markdown.py +54 -49
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/patch_benchmark_dataset.py +22 -11
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/run-performance-benchmarks.sh +21 -22
- vllm_ascend-0.9.2rc1/benchmarks/scripts/run_accuracy.py +313 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/tests/latency-tests.json +10 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/tests/serving-tests.json +26 -2
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/tests/throughput-tests.json +11 -0
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/__init__.py → vllm_ascend-0.9.2rc1/codecov.yml +14 -2
- vllm_ascend-0.9.2rc1/csrc/kernels/get_masked_input_and_mask_kernel.cpp +378 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/kernels/pos_encoding_kernels.cpp +15 -5
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/kernels/utils.h +3 -1
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/ops.h +14 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/torch_binding.cpp +116 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/requirements-test.txt +1 -1
- vllm_ascend-0.9.2rc1/docs/source/assets/multi_node_dp.png +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/community/contributors.md +18 -0
- vllm_ascend-0.9.2rc1/docs/source/community/user_stories/index.md +19 -0
- vllm_ascend-0.9.2rc1/docs/source/community/user_stories/llamafactory.md +19 -0
- {vllm_ascend-0.9.0rc2/docs/source/developer_guide → vllm_ascend-0.9.2rc1/docs/source/community}/versioning_policy.md +6 -2
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/conf.py +11 -4
- vllm_ascend-0.9.0rc2/docs/source/developer_guide/contributing.md → vllm_ascend-0.9.2rc1/docs/source/developer_guide/contribution/index.md +44 -46
- vllm_ascend-0.9.2rc1/docs/source/developer_guide/contribution/testing.md +280 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/index.md +2 -9
- vllm_ascend-0.9.2rc1/docs/source/developer_guide/feature_guide/index.md +9 -0
- vllm_ascend-0.9.2rc1/docs/source/developer_guide/feature_guide/patch.md +82 -0
- vllm_ascend-0.9.2rc1/docs/source/developer_guide/modeling/adding_a_new_model.md +258 -0
- vllm_ascend-0.9.2rc1/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +3 -0
- vllm_ascend-0.9.2rc1/docs/source/developer_guide/modeling/index.md +10 -0
- vllm_ascend-0.9.2rc1/docs/source/developer_guide/performance/index.md +8 -0
- {vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation → vllm_ascend-0.9.2rc1/docs/source/developer_guide/performance}/profile_execute_duration.md +5 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/faqs.md +51 -11
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/index.md +9 -14
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/installation.md +21 -14
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/quick_start.md +4 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/index.md +5 -0
- vllm_ascend-0.9.2rc1/docs/source/tutorials/multi_node.md +197 -0
- vllm_ascend-0.9.2rc1/docs/source/tutorials/multi_npu_moge.md +136 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/multi_npu_quantization.md +1 -1
- vllm_ascend-0.9.2rc1/docs/source/tutorials/multi_npu_qwen3_moe.md +109 -0
- vllm_ascend-0.9.2rc1/docs/source/tutorials/single_node_300i.md +330 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/single_npu.md +68 -3
- vllm_ascend-0.9.2rc1/docs/source/tutorials/single_npu_audio.md +122 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/single_npu_multimodal.md +6 -3
- vllm_ascend-0.9.2rc1/docs/source/tutorials/single_npu_qwen3_embedding.md +99 -0
- {vllm_ascend-0.9.0rc2/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/configuration}/additional_config.md +20 -15
- {vllm_ascend-0.9.0rc2/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/configuration}/env_vars.md +1 -1
- vllm_ascend-0.9.2rc1/docs/source/user_guide/configuration/index.md +10 -0
- {vllm_ascend-0.9.0rc2/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide}/graph_mode.md +10 -14
- vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
- vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/index.md +13 -0
- vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/lora.md +8 -0
- vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/quantization.md +106 -0
- vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/sleep_mode.md +115 -0
- vllm_ascend-0.9.2rc1/docs/source/user_guide/feature_guide/structured_output.md +163 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/user_guide/release_notes.md +68 -1
- vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix/index.md +10 -0
- {vllm_ascend-0.9.0rc2/docs/source/user_guide → vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix}/supported_models.md +1 -1
- vllm_ascend-0.9.2rc1/examples/disaggregated_prefill/find_device_ips.py +69 -0
- vllm_ascend-0.9.2rc1/examples/eplb/eplb_deepseek.py +205 -0
- vllm_ascend-0.9.2rc1/examples/eplb/eplb_strategy.py +183 -0
- vllm_ascend-0.9.2rc1/examples/offline_data_parallel.py +241 -0
- vllm_ascend-0.9.2rc1/examples/offline_embed.py +53 -0
- vllm_ascend-0.9.2rc1/examples/offline_inference_audio_language.py +84 -0
- vllm_ascend-0.9.0rc2/examples/offline_inference_npu.py → vllm_ascend-0.9.2rc1/examples/offline_inference_npu_v0.py +5 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/offline_inference_npu_v1.py +5 -4
- vllm_ascend-0.9.2rc1/examples/offline_inference_sleep_mode_npu.py +54 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/offline_multi_step_custom_ops.py +0 -3
- vllm_ascend-0.9.2rc1/examples/run_dp_attention_etp16.sh +23 -0
- vllm_ascend-0.9.2rc1/examples/run_dp_attention_etp16_benmark.sh +57 -0
- vllm_ascend-0.9.2rc1/format.sh +44 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/mypy.ini +3 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/pyproject.toml +3 -1
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/requirements-dev.txt +3 -0
- vllm_ascend-0.9.2rc1/requirements-lint.txt +8 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/requirements.txt +8 -1
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/setup.py +31 -2
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/conftest.py +164 -5
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/common.sh +24 -1
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/doctests/001-quickstart-test.sh +11 -2
- vllm_ascend-0.9.2rc1/tests/e2e/doctests/002-pip-binary-installation-test.sh +62 -0
- vllm_ascend-0.9.2rc1/tests/e2e/long_term/accuracy/accuracy_multicard.py +261 -0
- vllm_ascend-0.9.0rc2/tests/long_term/test_accuracy.py → vllm_ascend-0.9.2rc1/tests/e2e/long_term/accuracy/accuracy_singlecard.py +15 -11
- vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_data_parallel.py +72 -0
- vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_fused_moe_allgather_ep.py +82 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/multicard/test_ilama_lora_tp2.py +4 -3
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/multicard/test_offline_inference_distributed.py +88 -16
- vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_pipeline_parallel.py +43 -0
- vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_prefix_caching.py +152 -0
- vllm_ascend-0.9.2rc1/tests/e2e/multicard/test_torchair_graph_mode.py +161 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +2 -0
- vllm_ascend-0.9.2rc1/tests/e2e/prompts/example.txt +8 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/run_doctests.sh +6 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/compile/test_simple.py +8 -24
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler.py +728 -0
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler_e2e.py +46 -0
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core/ascend_scheduler/test_chunk_prefill.py +60 -0
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +37 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/ops/test_rotary_embedding.py +3 -1
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +91 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/sample/test_rejection_sampler.py +19 -22
- {vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/spec_decode_v1}/test_v1_mtp_correctness.py +2 -0
- {vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/spec_decode_v1}/test_v1_spec_decode.py +12 -6
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_aclgraph.py +5 -1
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_chunked.py +74 -0
- vllm_ascend-0.9.2rc1/tests/e2e/singlecard/test_embedding.py +68 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_guided_decoding.py +21 -31
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_ilama_lora.py +3 -3
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_offline_inference.py +2 -2
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_sampler.py +109 -147
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_scheduler.py +40 -54
- vllm_ascend-0.9.2rc1/tests/ut/attention/test_attention_mask.py +156 -0
- vllm_ascend-0.9.2rc1/tests/ut/attention/test_attention_v1.py +497 -0
- vllm_ascend-0.9.2rc1/tests/ut/base.py +31 -0
- vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_buffer.py +71 -0
- vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_connector.py +146 -0
- vllm_ascend-0.9.2rc1/tests/ut/distributed/kv_transfer/test_simple_pipe.py +145 -0
- vllm_ascend-0.9.2rc1/tests/ut/distributed/test_parallel_state.py +208 -0
- vllm_ascend-0.9.2rc1/tests/ut/fake_weight/config.json +28 -0
- vllm_ascend-0.9.2rc1/tests/ut/ops/expert_map.json +17 -0
- vllm_ascend-0.9.2rc1/tests/ut/ops/test_expert_load_balancer.py +141 -0
- vllm_ascend-0.9.2rc1/tests/ut/ops/test_rotary_embedding.py +315 -0
- vllm_ascend-0.9.2rc1/tests/ut/patch/worker/patch_common/test_patch_distributed.py +27 -0
- vllm_ascend-0.9.2rc1/tests/ut/patch/worker/patch_common/test_patch_sampler.py +46 -0
- vllm_ascend-0.9.2rc1/tests/ut/quantization/test_quant_config.py +230 -0
- vllm_ascend-0.9.2rc1/tests/ut/quantization/test_quantizer.py +122 -0
- vllm_ascend-0.9.2rc1/tests/ut/quantization/test_w8a8.py +906 -0
- vllm_ascend-0.9.2rc1/tests/ut/test_ascend_config.py +267 -0
- vllm_ascend-0.9.2rc1/tests/ut/test_platform.py +717 -0
- vllm_ascend-0.9.2rc1/tests/ut/test_utils.py +355 -0
- vllm_ascend-0.9.2rc1/tests/ut/worker/test_input_batch.py +162 -0
- vllm_ascend-0.9.2rc1/tests/ut/worker/test_pooling_model_runner.py +355 -0
- vllm_ascend-0.9.2rc1/tests/ut/worker/test_worker_v1.py +1 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/utils.py +37 -0
- vllm_ascend-0.9.2rc1/tools/enforce_regex_import.py +104 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/mypy.sh +5 -1
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/shellcheck.sh +4 -0
- vllm_ascend-0.9.2rc1/typos.toml +177 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/_version.py +2 -2
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ascend_config.py +20 -12
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/attention.py +34 -107
- vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention_mask.py +104 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/attention_v1.py +74 -5
- vllm_ascend-0.9.2rc1/vllm_ascend/attention/attention_v1_torchair.py +503 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/mla_v1.py +375 -85
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/compilation/piecewise_backend.py +1 -7
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/core/scheduler.py +177 -76
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/device_allocator/camem.py +1 -2
- vllm_ascend-0.9.2rc1/vllm_ascend/distributed/communication_op.py +25 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/simple_connector.py +5 -2
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/envs.py +21 -21
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/__init__.py +12 -3
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/deepseek_dbo.py +120 -164
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/deepseek_mtp.py +2 -1
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/deepseek_v2.py +399 -138
- vllm_ascend-0.9.2rc1/vllm_ascend/models/pangu_moe.py +1123 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/activation.py +6 -1
- vllm_ascend-0.9.2rc1/vllm_ascend/ops/common_fused_moe.py +112 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/fused_moe.py +346 -104
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/layernorm.py +11 -2
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/rotary_embedding.py +17 -4
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/vocab_parallel_embedding.py +2 -2
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/__init__.py +17 -85
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/platform/__init__.py +2 -2
- vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_common/patch_distributed.py +137 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/__init__.py +2 -2
- vllm_ascend-0.9.2rc1/vllm_ascend/patch/worker/patch_0_9_2/__init__.py +16 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/__init__.py +0 -1
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py +0 -16
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_sampler.py +83 -101
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py +7 -5
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/platform.py +17 -19
- vllm_ascend-0.9.2rc1/vllm_ascend/pool/__init__.py +16 -0
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/conftest.py → vllm_ascend-0.9.2rc1/vllm_ascend/pool/metadata.py +14 -10
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/quant_config.py +8 -26
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/quantizer.py +14 -1
- vllm_ascend-0.9.2rc1/vllm_ascend/quantization/w8a8.py +758 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/w8a8_dynamic.py +176 -99
- vllm_ascend-0.9.2rc1/vllm_ascend/utils.py +563 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/draft_model_runner.py +7 -6
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/eagle_proposer_v1.py +386 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/model_runner_v1.py +890 -435
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/mtp_proposer_v1.py +1 -38
- vllm_ascend-0.9.2rc1/vllm_ascend/worker/npu_input_batch.py +757 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/worker.py +13 -7
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/worker_v1.py +80 -36
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/PKG-INFO +6 -5
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/SOURCES.txt +128 -65
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/requires.txt +2 -1
- vllm_ascend-0.9.0rc2/.github/workflows/accuracy_report.yaml +0 -202
- vllm_ascend-0.9.0rc2/.github/workflows/actionlint.yml +0 -53
- vllm_ascend-0.9.0rc2/.github/workflows/shellcheck.yml +0 -49
- vllm_ascend-0.9.0rc2/.github/workflows/vllm_ascend_test.yaml +0 -222
- vllm_ascend-0.9.0rc2/benchmarks/README.md +0 -57
- vllm_ascend-0.9.0rc2/benchmarks/scripts/run_accuracy.py +0 -226
- vllm_ascend-0.9.0rc2/docs/source/tutorials/multi_node.md +0 -195
- vllm_ascend-0.9.0rc2/docs/source/user_guide/release.template.md +0 -13
- vllm_ascend-0.9.0rc2/docs/source/user_stories/example.md +0 -15
- vllm_ascend-0.9.0rc2/docs/source/user_stories/index.md +0 -22
- vllm_ascend-0.9.0rc2/examples/disaggregated_prefill/find_device_ips.py +0 -67
- vllm_ascend-0.9.0rc2/examples/dp_offline/data_parallel.py +0 -85
- vllm_ascend-0.9.0rc2/examples/dp_offline/run_dp.sh +0 -19
- vllm_ascend-0.9.0rc2/examples/offline_inference_audio_language.py +0 -126
- vllm_ascend-0.9.0rc2/format.sh +0 -343
- vllm_ascend-0.9.0rc2/pytest.ini +0 -68
- vllm_ascend-0.9.0rc2/requirements-lint.txt +0 -15
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/conftest.py +0 -212
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_medusa_correctness.py +0 -445
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_mlp_correctness.py +0 -560
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_mtp_correctness.py +0 -455
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e/test_ngram_correctness.py +0 -404
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_dynamic_spec_decode.py +0 -105
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_multi_step_worker.py +0 -846
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_ngram_worker.py +0 -237
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_spec_decode_worker.py +0 -958
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/test_utils.py +0 -165
- vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/utils.py +0 -317
- vllm_ascend-0.9.0rc2/tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py +0 -71
- vllm_ascend-0.9.0rc2/tests/singlecard/test_ascend_config.py +0 -189
- vllm_ascend-0.9.0rc2/vllm_ascend/ops/common_fused_moe.py +0 -69
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_0_9_0/__init__.py +0 -17
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_0_9_0/patch_distributed.py +0 -116
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_common/patch_distributed.py +0 -99
- vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_common/patch_eagle.py +0 -70
- vllm_ascend-0.9.0rc2/vllm_ascend/quantization/w8a8.py +0 -115
- vllm_ascend-0.9.0rc2/vllm_ascend/utils.py +0 -229
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/actionlint.yaml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/dependabot.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/labeler.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/label_merge_conflict.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/labeler.yml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/matchers/actionlint.json +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/matchers/mypy.json +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.github/workflows/matchers/ruff.json +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/.readthedocs.yaml +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/CODE_OF_CONDUCT.md +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/DCO +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/LICENSE +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/requirements-bench.txt +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/benchmarks/scripts/perf_result_template.md +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/cmake/utils.cmake +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/collect_env.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/camem_allocator.cpp +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/kernels/advance_step.cpp +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/kernels/types.h +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/csrc/utils.h +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/Makefile +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/README.md +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/requirements-docs.txt +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/_templates/sections/header.html +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/community/governance.md +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
- {vllm_ascend-0.9.0rc2/docs/source/developer_guide/evaluation → vllm_ascend-0.9.2rc1/docs/source/developer_guide/performance}/performance_benchmark.md +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/docs/source/tutorials/multi_npu.md +0 -0
- /vllm_ascend-0.9.0rc2/docs/source/user_guide/suppoted_features.md → /vllm_ascend-0.9.2rc1/docs/source/user_guide/support_matrix/supported_features.md +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/disaggregated_prefill_offline.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/dp_proxy.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/run_decode_server.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/disaggregated_prefill/run_prefill_server.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/offline_disaggregated_prefill_npu.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/offline_distributed_inference_npu.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/offline_dualbatch_overlap_npu.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/prompt_embedding_inference.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/examples/run_dp_server.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/packages.txt +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/setup.cfg +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/multicard/test_dynamic_npugraph_batchsize.py +0 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/multicard/test_pyhccl_distributed.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/e2e/run_disagg_pd.sh +0 -0
- {vllm_ascend-0.9.0rc2/tests/long_term/spec_decode/e2e → vllm_ascend-0.9.2rc1/tests/e2e/singlecard}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/tests/singlecard → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/compile}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/tests/singlecard/compile → vllm_ascend-0.9.2rc1/tests/e2e/singlecard/core}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/ops/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/ops/test_fused_moe.py +0 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/ops/test_multi_step.py +0 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/sample/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_camem.py +0 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_profile_execute_duration.py +0 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_prompt_embedding.py +0 -0
- {vllm_ascend-0.9.0rc2/tests → vllm_ascend-0.9.2rc1/tests/e2e}/singlecard/test_pyhccl.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tests/model_utils.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/actionlint.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/check_repo.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/png-lint.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/tools/sphinx-lint.sh +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/attention/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/compilation/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/core/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/core/schedule_config.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/device_allocator/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/communicator.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/device_communicators/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/simple_buffer.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/simple_pipe.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/kv_transfer/utils.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/llmdatadist_connector.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/distributed/parallel_state.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/lora/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/lora/punica_wrapper/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/lora/punica_wrapper/punica_npu.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen2_5_vl.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen2_5_vl_without_padding.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen2_vl.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/models/qwen3_moe.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/base.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/context.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/decorator.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/layers.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/metadata.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/multistream/ms_split.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/attention.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/cache.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/ops/expert_load_balancer.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/patch/platform/patch_main → vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_0_9_2}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/platform/patch_common/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2/vllm_ascend/patch/worker/patch_0_9_0 → vllm_ascend-0.9.2rc1/vllm_ascend/patch/platform/patch_main}/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_common/patch_utils.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/quantization/func_wrapper.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/sample/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/sample/rejection_sampler.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/__init__.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/cache_engine.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/model_runner.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/multi_step_runner.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/multi_step_worker.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend/worker/pooling_model_runner.py +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
- {vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.2rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
|
@@ -15,17 +15,16 @@
|
|
|
15
15
|
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
17
|
ARG PY_VERSION=3.10
|
|
18
|
-
FROM quay.io/ascend/
|
|
18
|
+
FROM quay.io/ascend/manylinux:8.0.0-910b-manylinux_2_28-py${PY_VERSION}
|
|
19
19
|
|
|
20
20
|
ARG COMPILE_CUSTOM_KERNELS=1
|
|
21
21
|
|
|
22
22
|
# Define environments
|
|
23
23
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
24
24
|
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
|
|
25
|
-
RUN
|
|
26
|
-
|
|
27
|
-
rm -rf /var/cache/
|
|
28
|
-
rm -rf /var/lib/apt/lists/*
|
|
25
|
+
RUN yum update -y && \
|
|
26
|
+
yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
|
|
27
|
+
rm -rf /var/cache/yum
|
|
29
28
|
|
|
30
29
|
WORKDIR /workspace
|
|
31
30
|
|
|
@@ -41,8 +40,6 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
|
|
|
41
40
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
|
|
42
41
|
cd vllm-ascend && \
|
|
43
42
|
python3 setup.py bdist_wheel && \
|
|
44
|
-
ls -l dist
|
|
45
|
-
for f in dist/*.whl; do mv "$f" "$(echo "$f" | sed -e 's/-linux_x86_64\.whl$/-manylinux1_x86_64.whl/' -e 's/-linux_aarch64\.whl$/-manylinux2014_aarch64.whl/')"; done && \
|
|
46
|
-
ls -l dist
|
|
43
|
+
ls -l dist
|
|
47
44
|
|
|
48
45
|
CMD ["/bin/bash"]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
name: 📚 User Story
|
|
2
|
-
description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.
|
|
2
|
+
description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.io/en/latest/community/user_stories/index.html
|
|
3
3
|
title: "[User Story]: "
|
|
4
4
|
labels: ["user-story"]
|
|
5
5
|
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
name: Release Checklist
|
|
2
|
+
description: Generate a release checklist issue when prepare a new release.(Used for release team)
|
|
3
|
+
title: "[Release]: Release checklist for v"
|
|
4
|
+
|
|
5
|
+
body:
|
|
6
|
+
- type: textarea
|
|
7
|
+
attributes:
|
|
8
|
+
description: >
|
|
9
|
+
Brief info for the new release.
|
|
10
|
+
label: Release Checklist
|
|
11
|
+
value: >
|
|
12
|
+
**Release Version**:
|
|
13
|
+
|
|
14
|
+
**Release Branch**:
|
|
15
|
+
|
|
16
|
+
**Release Date**:
|
|
17
|
+
|
|
18
|
+
**Release Manager**:
|
|
19
|
+
- type: textarea
|
|
20
|
+
attributes:
|
|
21
|
+
description: >
|
|
22
|
+
Release notes.
|
|
23
|
+
label: Prepare Release Note
|
|
24
|
+
value: >
|
|
25
|
+
- [ ] Create a new issue for release feedback
|
|
26
|
+
|
|
27
|
+
- [ ] Write the release note PR.
|
|
28
|
+
|
|
29
|
+
- [ ] Update the feedback issue link in docs/source/faqs.md
|
|
30
|
+
|
|
31
|
+
- [ ] Add release note to docs/source/user_guide/release_notes.md
|
|
32
|
+
|
|
33
|
+
- [ ] Update version info in docs/source/community/versioning_policy.md
|
|
34
|
+
|
|
35
|
+
- [ ] Update contributor info in docs/source/community/contributors.md
|
|
36
|
+
|
|
37
|
+
- [ ] Update package version in docs/conf.py
|
|
38
|
+
- type: textarea
|
|
39
|
+
attributes:
|
|
40
|
+
description: >
|
|
41
|
+
Make sure the code is merged.
|
|
42
|
+
label: PR need Merge
|
|
43
|
+
value: >
|
|
44
|
+
- [ ] PR link1
|
|
45
|
+
|
|
46
|
+
- [ ] PR link2
|
|
47
|
+
|
|
48
|
+
- [ ] ...
|
|
49
|
+
- type: textarea
|
|
50
|
+
attributes:
|
|
51
|
+
description: >
|
|
52
|
+
Make sure the new Feature/Function is tested
|
|
53
|
+
label: Functional Test
|
|
54
|
+
value: >
|
|
55
|
+
- [ ] Feature1
|
|
56
|
+
|
|
57
|
+
- [ ] Bug1
|
|
58
|
+
|
|
59
|
+
- [ ] ...
|
|
60
|
+
- type: textarea
|
|
61
|
+
attributes:
|
|
62
|
+
description: >
|
|
63
|
+
Make sure the doc is updated.
|
|
64
|
+
label: Doc Test
|
|
65
|
+
value: >
|
|
66
|
+
- [ ] Tutorial is updated.
|
|
67
|
+
|
|
68
|
+
- [ ] User Guide is updated.
|
|
69
|
+
|
|
70
|
+
- [ ] Developer Guide is updated.
|
|
71
|
+
- type: textarea
|
|
72
|
+
attributes:
|
|
73
|
+
description: >
|
|
74
|
+
Make sure the artifacts is ready
|
|
75
|
+
label: Prepare Artifacts
|
|
76
|
+
value: >
|
|
77
|
+
- [ ] Docker image is ready.
|
|
78
|
+
|
|
79
|
+
- [ ] Wheel package is ready.
|
|
80
|
+
- type: textarea
|
|
81
|
+
attributes:
|
|
82
|
+
description: >
|
|
83
|
+
Start to release.
|
|
84
|
+
label: Release Step
|
|
85
|
+
value: >
|
|
86
|
+
- [ ] Release note PR is merged.
|
|
87
|
+
|
|
88
|
+
- [ ] Post the release on GitHub release page.
|
|
89
|
+
|
|
90
|
+
- [ ] Generate official doc page on https://app.readthedocs.org/dashboard/
|
|
91
|
+
|
|
92
|
+
- [ ] Wait for the wheel package to be available on https://pypi.org/project/vllm-ascend
|
|
93
|
+
|
|
94
|
+
- [ ] Wait for the docker image to be available on https://quay.io/ascend/vllm-ascend
|
|
95
|
+
|
|
96
|
+
- [ ] Upload 310p wheel to Github release page
|
|
97
|
+
|
|
98
|
+
- [ ] Broadcast the release news (By message, blog , etc)
|
|
99
|
+
|
|
100
|
+
- [ ] Close this issue
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
|
+
# Adapted from vllm/.github/scripts/cleanup_pr_body.sh
|
|
17
|
+
|
|
18
|
+
#!/bin/bash
|
|
19
|
+
|
|
20
|
+
set -eux
|
|
21
|
+
|
|
22
|
+
# ensure 2 argument is passed
|
|
23
|
+
if [ "$#" -ne 3 ]; then
|
|
24
|
+
echo "Usage: $0 <pr_number> <vllm_version> <vllm_commit>"
|
|
25
|
+
exit 1
|
|
26
|
+
fi
|
|
27
|
+
|
|
28
|
+
PR_NUMBER=$1
|
|
29
|
+
VLLM_VERSION=$2
|
|
30
|
+
VLLM_COMMIT=$3
|
|
31
|
+
OLD=/tmp/orig_pr_body.txt
|
|
32
|
+
NEW=/tmp/new_pr_body.txt
|
|
33
|
+
|
|
34
|
+
gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
|
|
35
|
+
cp "${OLD}" "${NEW}"
|
|
36
|
+
|
|
37
|
+
# Remove notes in pr description and add vLLM version and commit
|
|
38
|
+
sed -i '/<!--/,/-->/d' "${NEW}"
|
|
39
|
+
sed -i '/- vLLM .*$/d' "${NEW}"
|
|
40
|
+
{
|
|
41
|
+
echo ""
|
|
42
|
+
echo "- vLLM version: $VLLM_VERSION"
|
|
43
|
+
echo "- vLLM main: $VLLM_COMMIT"
|
|
44
|
+
echo ""
|
|
45
|
+
} >> "${NEW}"
|
|
46
|
+
|
|
47
|
+
# Run this only if ${NEW} is different than ${OLD}
|
|
48
|
+
if ! cmp -s "${OLD}" "${NEW}"; then
|
|
49
|
+
echo
|
|
50
|
+
echo "Updating PR body:"
|
|
51
|
+
echo
|
|
52
|
+
cat "${NEW}"
|
|
53
|
+
gh pr edit --body-file "${NEW}" "${PR_NUMBER}"
|
|
54
|
+
else
|
|
55
|
+
echo "No changes needed"
|
|
56
|
+
fi
|
|
@@ -22,6 +22,9 @@
|
|
|
22
22
|
name: Benchmarks / accuracy
|
|
23
23
|
|
|
24
24
|
on:
|
|
25
|
+
schedule:
|
|
26
|
+
# Runs every 6 hours
|
|
27
|
+
- cron: '0 */6 * * *'
|
|
25
28
|
pull_request:
|
|
26
29
|
types: [ labeled ]
|
|
27
30
|
workflow_dispatch:
|
|
@@ -34,8 +37,8 @@ on:
|
|
|
34
37
|
# Current supported vLLM versions
|
|
35
38
|
options:
|
|
36
39
|
- main
|
|
37
|
-
- v0.9.
|
|
38
|
-
- v0.9.
|
|
40
|
+
- v0.9.2
|
|
41
|
+
- v0.9.1
|
|
39
42
|
- v0.7.3
|
|
40
43
|
vllm-ascend-version:
|
|
41
44
|
description: 'vllm-ascend version:'
|
|
@@ -43,6 +46,7 @@ on:
|
|
|
43
46
|
type: choice
|
|
44
47
|
options:
|
|
45
48
|
- main
|
|
49
|
+
- v0.9.1-dev
|
|
46
50
|
- v0.7.3-dev
|
|
47
51
|
models:
|
|
48
52
|
description: 'model:'
|
|
@@ -50,9 +54,9 @@ on:
|
|
|
50
54
|
type: choice
|
|
51
55
|
options:
|
|
52
56
|
- all
|
|
53
|
-
- Qwen/Qwen2.5-7B-Instruct
|
|
54
57
|
- Qwen/Qwen2.5-VL-7B-Instruct
|
|
55
58
|
- Qwen/Qwen3-8B-Base
|
|
59
|
+
- Qwen/Qwen3-30B-A3B
|
|
56
60
|
default: 'all'
|
|
57
61
|
|
|
58
62
|
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
|
@@ -74,56 +78,56 @@ jobs:
|
|
|
74
78
|
${{
|
|
75
79
|
(contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
|
|
76
80
|
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
|
|
81
|
+
contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') ||
|
|
77
82
|
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
|
|
78
83
|
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
|
|
79
|
-
github.event_name == 'workflow_dispatch'
|
|
84
|
+
github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
|
|
80
85
|
}}
|
|
81
86
|
runs-on: >-
|
|
82
87
|
${{
|
|
83
|
-
(matrix.model_name == 'Qwen/
|
|
88
|
+
(matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-arm64-npu-4') ||
|
|
84
89
|
'linux-arm64-npu-2'
|
|
85
90
|
}}
|
|
86
91
|
strategy:
|
|
87
92
|
matrix:
|
|
88
|
-
vllm_use_version: [0, 1]
|
|
89
93
|
# the accuracy test will run:
|
|
90
94
|
# 1. workflow_dispatch with models input
|
|
91
|
-
# - all: Qwen/
|
|
92
|
-
# - specified but not all: Qwen/
|
|
95
|
+
# - all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
|
|
96
|
+
# - specified but not all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
|
|
93
97
|
# 2. PR labeled with "*-accuracy-test"
|
|
94
|
-
# - accuracy-test: Qwen/
|
|
95
|
-
# - dense-accuracy-test: Qwen/
|
|
98
|
+
# - accuracy-test: Qwen/Qwen3-8B-Base, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-30B-A3B
|
|
99
|
+
# - dense-accuracy-test: Qwen/Qwen3-8B-Base
|
|
96
100
|
# - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
|
|
101
|
+
# - moe-accuracy-test: Qwen/Qwen3-30B-A3B
|
|
97
102
|
model_name: ${{ fromJSON(
|
|
103
|
+
(github.event_name == 'schedule' &&
|
|
104
|
+
'["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
|
|
98
105
|
(github.event.inputs.models == 'all' &&
|
|
99
|
-
'["Qwen/
|
|
100
|
-
(github.event.inputs.models == 'Qwen/
|
|
101
|
-
'["Qwen/
|
|
106
|
+
'["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
|
|
107
|
+
(github.event.inputs.models == 'Qwen/Qwen3-30B-A3B' &&
|
|
108
|
+
'["Qwen/Qwen3-30B-A3B"]') ||
|
|
102
109
|
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
|
|
103
110
|
'["Qwen/Qwen2.5-VL-7B-Instruct"]') ||
|
|
104
111
|
(github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
|
|
105
112
|
'["Qwen/Qwen3-8B-Base"]') ||
|
|
106
113
|
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
|
|
107
|
-
'["Qwen/
|
|
114
|
+
'["Qwen/Qwen3-8B-Base","Qwen/Qwen2.5-VL-7B-Instruct", "Qwen/Qwen3-30B-A3B"]' ||
|
|
108
115
|
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
|
|
109
|
-
'["Qwen/
|
|
116
|
+
'["Qwen/Qwen3-8B-Base"]' ||
|
|
110
117
|
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
|
|
111
|
-
'["Qwen/Qwen2.5-VL-7B-Instruct"]'
|
|
118
|
+
'["Qwen/Qwen2.5-VL-7B-Instruct"]' ||
|
|
119
|
+
contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') &&
|
|
120
|
+
'["Qwen/Qwen3-30B-A3B"]'
|
|
112
121
|
) }}
|
|
113
|
-
# Remove exclude after https://github.com/vllm-project/vllm-ascend/issues/1044 resolved
|
|
114
|
-
exclude:
|
|
115
|
-
- model_name: Qwen/Qwen2.5-VL-7B-Instruct
|
|
116
|
-
vllm_use_version: 1
|
|
117
122
|
|
|
118
123
|
fail-fast: false
|
|
119
|
-
name: ${{ matrix.model_name }} accuracy
|
|
124
|
+
name: ${{ matrix.model_name }} accuracy
|
|
120
125
|
container:
|
|
121
126
|
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
|
|
122
127
|
env:
|
|
123
|
-
HF_ENDPOINT: https://hf-mirror.com
|
|
124
|
-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
|
125
128
|
DATASET_SOURCE: ModelScope
|
|
126
129
|
VLLM_USE_MODELSCOPE: True
|
|
130
|
+
USE_MODELSCOPE_HUB: 1
|
|
127
131
|
# 1. If version specified (work_dispatch), do specified branch accuracy test
|
|
128
132
|
# 2. If no version (labeled PR), do accuracy test by default ref:
|
|
129
133
|
# The branch, tag or SHA to checkout. When checking out the repository that
|
|
@@ -159,7 +163,7 @@ jobs:
|
|
|
159
163
|
repository: vllm-project/vllm
|
|
160
164
|
path: ./vllm-empty
|
|
161
165
|
# Please also update this when bump matched version
|
|
162
|
-
ref: ${{ github.event.inputs.vllm-version || 'v0.9.
|
|
166
|
+
ref: ${{ github.event.inputs.vllm-version || 'v0.9.2' }}
|
|
163
167
|
|
|
164
168
|
- name: Install vllm-project/vllm from source
|
|
165
169
|
working-directory: ./vllm-empty
|
|
@@ -174,13 +178,32 @@ jobs:
|
|
|
174
178
|
|
|
175
179
|
- name: Install vllm-project/vllm-ascend
|
|
176
180
|
working-directory: ./vllm-ascend
|
|
181
|
+
env:
|
|
182
|
+
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
177
183
|
run: |
|
|
178
184
|
pip install -r requirements-dev.txt
|
|
179
|
-
pip install -e .
|
|
185
|
+
pip install -v -e .
|
|
186
|
+
|
|
187
|
+
- name: Get vLLM commit hash and URL
|
|
188
|
+
working-directory: ./vllm-empty
|
|
189
|
+
run: |
|
|
190
|
+
VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
|
|
191
|
+
echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
|
|
192
|
+
|
|
193
|
+
- name: Get vLLM-Ascend commit hash and URL
|
|
194
|
+
working-directory: ./vllm-ascend
|
|
195
|
+
run: |
|
|
196
|
+
VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
|
|
197
|
+
echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
|
|
198
|
+
|
|
199
|
+
- name: Print resolved hashes
|
|
200
|
+
run: |
|
|
201
|
+
echo "vLLM : ${{ env.VLLM_COMMIT }}"
|
|
202
|
+
echo "vLLM-Ascend: ${{ env.VLLM_ASCEND_COMMIT }}"
|
|
180
203
|
|
|
181
204
|
- name: Install lm-eval, ray, and datasets
|
|
182
205
|
run: |
|
|
183
|
-
pip install lm-eval
|
|
206
|
+
pip install lm-eval==0.4.8
|
|
184
207
|
|
|
185
208
|
- name: Collect version info
|
|
186
209
|
run: |
|
|
@@ -212,15 +235,14 @@ jobs:
|
|
|
212
235
|
echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
|
|
213
236
|
echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION }}"
|
|
214
237
|
|
|
215
|
-
- name: Run Accuracy Test
|
|
238
|
+
- name: Run Accuracy Test
|
|
216
239
|
id: report
|
|
217
240
|
working-directory: ./benchmarks
|
|
218
241
|
env:
|
|
219
242
|
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
|
220
|
-
VLLM_USE_V1: ${{ matrix.vllm_use_version }}
|
|
221
243
|
run: |
|
|
222
244
|
model_base_name=$(basename ${{ matrix.model_name }})
|
|
223
|
-
markdown_name="${model_base_name}
|
|
245
|
+
markdown_name="${model_base_name}"
|
|
224
246
|
echo "markdown_name=$markdown_name"
|
|
225
247
|
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
|
|
226
248
|
mkdir -p ./accuracy
|
|
@@ -232,7 +254,9 @@ jobs:
|
|
|
232
254
|
--cann_version "${{ env.GHA_CANN_VERSION }}" \
|
|
233
255
|
--torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
|
|
234
256
|
--torch_version "${{ env.GHA_TORCH_VERSION }}" \
|
|
235
|
-
--vllm_version "${{ env.GHA_VLLM_VERSION }}"
|
|
257
|
+
--vllm_version "${{ env.GHA_VLLM_VERSION }}" \
|
|
258
|
+
--vllm_commit "${{ env.VLLM_COMMIT }}" \
|
|
259
|
+
--vllm_ascend_commit "${{ env.VLLM_ASCEND_COMMIT }}" \
|
|
236
260
|
|
|
237
261
|
- name: Generate step summary
|
|
238
262
|
if: ${{ always() }}
|
|
@@ -244,12 +268,122 @@ jobs:
|
|
|
244
268
|
SAFE_VLLM_ASCEND_VERSION="${GHA_VLLM_ASCEND_VERSION//\//-}"
|
|
245
269
|
echo "SAFE_VLLM_ASCEND_VERSION=$SAFE_VLLM_ASCEND_VERSION" >> "$GITHUB_ENV"
|
|
246
270
|
|
|
247
|
-
- name:
|
|
248
|
-
|
|
271
|
+
- name: Check report first line for failure
|
|
272
|
+
id: check_report
|
|
273
|
+
run: |
|
|
274
|
+
REPORT_PATH="./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md"
|
|
275
|
+
echo "Scanning $REPORT_PATH for ❌ …"
|
|
276
|
+
if grep -q '❌' "$REPORT_PATH"; then
|
|
277
|
+
echo "contains_fail=true" >> $GITHUB_OUTPUT
|
|
278
|
+
else
|
|
279
|
+
echo "contains_fail=false" >> $GITHUB_OUTPUT
|
|
280
|
+
fi
|
|
281
|
+
|
|
282
|
+
- name: Upload Report
|
|
283
|
+
if: ${{ github.event_name == 'workflow_dispatch' && steps.check_report.outputs.contains_fail == 'false' }}
|
|
249
284
|
uses: actions/upload-artifact@v4
|
|
250
285
|
with:
|
|
251
|
-
name: "
|
|
286
|
+
name: "report-${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
|
|
252
287
|
path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
|
|
253
288
|
if-no-files-found: warn
|
|
254
289
|
retention-days: 90
|
|
255
290
|
overwrite: true
|
|
291
|
+
|
|
292
|
+
create_pr:
|
|
293
|
+
runs-on: ubuntu-latest
|
|
294
|
+
needs: accuracy_tests
|
|
295
|
+
if: ${{ github.event_name == 'workflow_dispatch' }}
|
|
296
|
+
env:
|
|
297
|
+
UPSTREAM_REPO: vllm-project/vllm-ascend
|
|
298
|
+
steps:
|
|
299
|
+
- name: Checkout repository
|
|
300
|
+
uses: actions/checkout@v4
|
|
301
|
+
with:
|
|
302
|
+
repository: vllm-ascend-ci/vllm-ascend
|
|
303
|
+
token: ${{ secrets.PAT_TOKEN }}
|
|
304
|
+
ref: main
|
|
305
|
+
|
|
306
|
+
- name: Add upstream remote
|
|
307
|
+
run: |
|
|
308
|
+
git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
|
|
309
|
+
git fetch upstream
|
|
310
|
+
git remote -v
|
|
311
|
+
|
|
312
|
+
- name: Set Git user info dynamically
|
|
313
|
+
run: |
|
|
314
|
+
git config user.name "${{ github.actor }}"
|
|
315
|
+
git config user.email "${{ github.actor }}@users.noreply.github.com"
|
|
316
|
+
|
|
317
|
+
- name: Create or switch to branch
|
|
318
|
+
run: |
|
|
319
|
+
TIMESTAMP=$(date +%Y%m%d%H%M%S)
|
|
320
|
+
BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
|
|
321
|
+
echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
|
|
322
|
+
git checkout -B "${BRANCH_NAME}" upstream/${{ github.event.inputs.vllm-ascend-version }}
|
|
323
|
+
|
|
324
|
+
- name: Download only current run reports
|
|
325
|
+
uses: actions/download-artifact@v4
|
|
326
|
+
with:
|
|
327
|
+
path: ./docs/source/developer_guide/evaluation/accuracy_report
|
|
328
|
+
pattern: report-*
|
|
329
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
330
|
+
run-id: ${{ github.run_id }}
|
|
331
|
+
|
|
332
|
+
- name: Delete old report
|
|
333
|
+
run: |
|
|
334
|
+
find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
|
|
335
|
+
find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
|
|
336
|
+
find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
|
|
337
|
+
|
|
338
|
+
- name: Update accuracy_report/index.md
|
|
339
|
+
run: |
|
|
340
|
+
REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
|
|
341
|
+
INDEX_MD="$REPORT_DIR/index.md"
|
|
342
|
+
{
|
|
343
|
+
echo "# Accuracy Report"
|
|
344
|
+
echo ""
|
|
345
|
+
echo ":::{toctree}"
|
|
346
|
+
echo ":caption: Accuracy Report"
|
|
347
|
+
echo ":maxdepth: 1"
|
|
348
|
+
|
|
349
|
+
for report in "$REPORT_DIR"/*.md; do
|
|
350
|
+
filename="$(basename "$report" .md)"
|
|
351
|
+
if [ "$filename" != "index" ]; then
|
|
352
|
+
echo "$filename"
|
|
353
|
+
fi
|
|
354
|
+
done
|
|
355
|
+
echo ":::"
|
|
356
|
+
} > "$INDEX_MD"
|
|
357
|
+
|
|
358
|
+
- name: push accuracy report
|
|
359
|
+
env:
|
|
360
|
+
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
|
|
361
|
+
run: |
|
|
362
|
+
git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
|
|
363
|
+
git commit -s -m "[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}"
|
|
364
|
+
git push -f origin "${{ env.BRANCH_NAME }}"
|
|
365
|
+
|
|
366
|
+
- name: Create PR in upstream via API
|
|
367
|
+
uses: actions/github-script@v7
|
|
368
|
+
with:
|
|
369
|
+
github-token: ${{ secrets.PAT_TOKEN }}
|
|
370
|
+
script: |
|
|
371
|
+
const pr = await github.rest.pulls.create({
|
|
372
|
+
owner: 'vllm-project',
|
|
373
|
+
repo: 'vllm-ascend',
|
|
374
|
+
head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
|
|
375
|
+
base: '${{ github.event.inputs.vllm-ascend-version }}',
|
|
376
|
+
title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
|
|
377
|
+
body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for:
|
|
378
|
+
${{
|
|
379
|
+
github.event.inputs.models == 'all'
|
|
380
|
+
&& 'All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)'
|
|
381
|
+
|| github.event.inputs.models
|
|
382
|
+
}}
|
|
383
|
+
|
|
384
|
+
- [Workflow run][1]
|
|
385
|
+
|
|
386
|
+
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
|
|
387
|
+
});
|
|
388
|
+
core.info(`Created PR #${pr.data.number}`);
|
|
389
|
+
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
|
+
#
|
|
17
|
+
|
|
18
|
+
name: format / pr body
|
|
19
|
+
|
|
20
|
+
on:
|
|
21
|
+
# The PR updated when PR opened and push new commits
|
|
22
|
+
pull_request_target:
|
|
23
|
+
types: [opened, synchronize]
|
|
24
|
+
branches:
|
|
25
|
+
- 'main'
|
|
26
|
+
|
|
27
|
+
permissions:
|
|
28
|
+
pull-requests: write
|
|
29
|
+
|
|
30
|
+
jobs:
|
|
31
|
+
update-description:
|
|
32
|
+
name: update vLLM version
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
|
|
35
|
+
steps:
|
|
36
|
+
- name: Checkout vllm-project/vllm repo
|
|
37
|
+
uses: actions/checkout@v4
|
|
38
|
+
with:
|
|
39
|
+
repository: vllm-project/vllm
|
|
40
|
+
path: ./vllm-empty
|
|
41
|
+
|
|
42
|
+
- name: Get vLLM version
|
|
43
|
+
working-directory: ./vllm-empty
|
|
44
|
+
run: |
|
|
45
|
+
VLLM_COMMIT=$(git rev-parse HEAD)
|
|
46
|
+
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
|
47
|
+
|
|
48
|
+
- name: Checkout repository
|
|
49
|
+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
50
|
+
|
|
51
|
+
- name: Set up Python
|
|
52
|
+
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
|
|
53
|
+
|
|
54
|
+
- name: Get vLLM release version
|
|
55
|
+
run: |
|
|
56
|
+
VLLM_VERSION=$(python3 docs/source/conf.py | jq .ci_vllm_version | tr -d '"')
|
|
57
|
+
echo "VLLM_VERSION=$VLLM_VERSION" >> $GITHUB_ENV
|
|
58
|
+
|
|
59
|
+
- name: Update PR description
|
|
60
|
+
env:
|
|
61
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
62
|
+
run: |
|
|
63
|
+
bash .github/format_pr_body.sh "${{ github.event.number }}" "${{ env.VLLM_VERSION }}" "${{ env.VLLM_COMMIT }}"
|