vllm-ascend 0.10.0rc1__tar.gz → 0.10.2rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vllm_ascend-0.10.2rc1/.gemini/config.yaml +6 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/750-RFC.yml +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +2 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/actionlint.yaml +7 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/accuracy_test.yaml +14 -13
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/format_pr_body.yaml +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_310p_openeuler.yml +12 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_310p_ubuntu.yml +13 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_a3_openeuler.yml +12 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_a3_ubuntu.yml +13 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_openeuler.yml +12 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/image_ubuntu.yml +13 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/nightly_benchmarks.yaml +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/pre-commit.yml +1 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/release_code.yml +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/release_whl.yml +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/reminder_comment.yml +1 -1
- vllm_ascend-0.10.0rc1/.github/workflows/vllm_ascend_test_long_term.yaml → vllm_ascend-0.10.2rc1/.github/workflows/vllm_ascend_dist.yaml +25 -27
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_test.yaml +17 -49
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_test_310p.yaml +4 -4
- vllm_ascend-0.10.2rc1/.github/workflows/vllm_ascend_test_full.yaml +233 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_test_pd.yaml +1 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.310p +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.310p.openEuler +3 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.a3 +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.a3.openEuler +3 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/Dockerfile.openEuler +3 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/PKG-INFO +7 -6
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/README.md +6 -5
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/README.zh.md +6 -4
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/ops/ben_vocabparallelembedding.py +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/scripts/run-performance-benchmarks.sh +3 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/tests/serving-tests.json +2 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/codecov.yml +2 -4
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/bgmv_expand.cpp +7 -7
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/bgmv_shrink.cpp +7 -7
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/pos_encoding_kernels.cpp +0 -5
- vllm_ascend-0.10.2rc1/csrc/kernels/sgmv_expand.cpp +389 -0
- vllm_ascend-0.10.2rc1/csrc/kernels/sgmv_shrink.cpp +275 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/ops.h +36 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/torch_binding.cpp +121 -16
- vllm_ascend-0.10.2rc1/csrc/torch_binding_meta.cpp +102 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/utils.h +0 -12
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/_templates/sections/header.html +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/contributors.md +15 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/versioning_policy.md +21 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/conf.py +5 -5
- vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +20 -0
- vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +19 -0
- vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +21 -0
- vllm_ascend-0.10.2rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +21 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/accuracy_report/index.md +4 -0
- vllm_ascend-0.10.2rc1/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +237 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/feature_guide/index.md +1 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/faqs.md +34 -8
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/installation.md +2 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/index.md +2 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_node.md +11 -8
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_node_kimi.md +1 -1
- vllm_ascend-0.10.2rc1/docs/source/tutorials/multi_node_pd_disaggregation.md +244 -0
- vllm_ascend-0.10.2rc1/docs/source/tutorials/multi_npu_qwen3_next.md +156 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_qwen3_quantization.md +4 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/configuration/additional_config.md +7 -0
- vllm_ascend-0.10.2rc1/docs/source/user_guide/feature_guide/lora.md +23 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/quantization.md +5 -5
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/release_notes.md +447 -280
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/README.md +12 -12
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/gen_ranktable.py +43 -29
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/gen_ranktable.sh +10 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +32 -3
- vllm_ascend-0.10.2rc1/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +165 -0
- vllm_ascend-0.10.2rc1/examples/external_online_dp/README.md +38 -0
- vllm_ascend-0.10.2rc1/examples/external_online_dp/launch_online_dp.py +97 -0
- vllm_ascend-0.10.2rc1/examples/external_online_dp/run_dp_template.sh +46 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_audio_language.py +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/pyproject.toml +0 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/requirements-dev.txt +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/requirements.txt +0 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/setup.py +1 -1
- {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e/310p}/test_offline_inference_310p.py +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/conftest.py +25 -113
- vllm_ascend-0.10.2rc1/tests/e2e/model_utils.py +74 -0
- vllm_ascend-0.10.2rc1/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +13 -0
- {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/conftest.py +21 -22
- vllm_ascend-0.10.2rc1/tests/e2e/models/report_template.md +21 -0
- {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/test_lm_eval_correctness.py +14 -9
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_data_parallel.py +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_expert_parallel.py +4 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_external_launcher.py +38 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_ilama_lora_tp2.py +3 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_offline_inference_distributed.py +57 -117
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_pipeline_parallel.py +0 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_prefix_caching.py +22 -20
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_qwen3_moe.py +35 -6
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/multicard/test_torchair_graph_mode.py +62 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_bgmv_expand.py +8 -3
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_bgmv_shrink.py +7 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_fused_moe.py +181 -22
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_rotary_embedding.py +153 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +5 -1
- vllm_ascend-0.10.2rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +92 -0
- vllm_ascend-0.10.2rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +85 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +31 -32
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_aclgraph.py +14 -33
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_ascend_scheduler.py +29 -6
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_camem.py +25 -14
- vllm_ascend-0.10.2rc1/tests/e2e/singlecard/test_chunked.py +81 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_embedding.py +17 -36
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_guided_decoding.py +5 -4
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_ilama_lora.py +3 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/test_profile_execute_duration.py +9 -0
- vllm_ascend-0.10.0rc1/tests/e2e/singlecard/quant/test_w8a8.py → vllm_ascend-0.10.2rc1/tests/e2e/singlecard/test_quantization.py +5 -12
- vllm_ascend-0.10.2rc1/tests/e2e/singlecard/test_sampler.py +49 -0
- vllm_ascend-0.10.2rc1/tests/e2e/singlecard/test_vlm.py +94 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/attention/test_attention_mask.py +49 -72
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/attention/test_attention_v1.py +107 -55
- vllm_ascend-0.10.2rc1/tests/ut/attention/test_mla_v1.py +633 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/core/test_schedule_config.py +67 -24
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/core/test_scheduler.py +120 -53
- vllm_ascend-0.10.2rc1/tests/ut/distributed/test_communicator.py +89 -0
- vllm_ascend-0.10.2rc1/tests/ut/distributed/test_parallel_state.py +48 -0
- vllm_ascend-0.10.2rc1/tests/ut/kv_connector/test_mooncake_connector.py +1038 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +8 -15
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +10 -16
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/utils.py +27 -31
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_deepseek_mtp.py +16 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_deepseek_v2.py +23 -48
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_qwen2_5_vl.py +52 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_qwen2_5_vl_without_padding.py +24 -0
- vllm_ascend-0.10.2rc1/tests/ut/models/test_qwen3_moe.py +98 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_activation.py +12 -1
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_ascend_forwad_context.py +22 -0
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_comm_utils.py +98 -0
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_common_fused_moe.py +105 -0
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +218 -0
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_fused_ops.py +842 -0
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_layernorm.py +93 -0
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_linear.py +105 -0
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_moe_comm_method.py +212 -0
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_rotary_embedding.py +378 -0
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_token_dispatcher.py +619 -0
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_vocab_parallel_embedding.py +240 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/quantization/test_quant_config.py +12 -23
- vllm_ascend-0.10.2rc1/tests/ut/quantization/test_utils.py +62 -0
- vllm_ascend-0.10.2rc1/tests/ut/quantization/test_w4a8_dynamic.py +177 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/quantization/test_w8a8.py +68 -44
- vllm_ascend-0.10.2rc1/tests/ut/quantization/test_w8a8_dynamic.py +69 -0
- vllm_ascend-0.10.2rc1/tests/ut/sample/logits_processor/test_builtin.py +40 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/sample/test_rejection_sampler.py +4 -2
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_ascend_config.py +82 -3
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_envs.py +8 -7
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_platform.py +156 -67
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/test_utils.py +29 -79
- vllm_ascend-0.10.2rc1/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +195 -0
- vllm_ascend-0.10.2rc1/tests/ut/torchair/models/test_torchair_deepseek_v2.py +331 -0
- vllm_ascend-0.10.0rc1/tests/ut/ops/test_fused_ops.py → vllm_ascend-0.10.2rc1/tests/ut/torchair/ops/test_torchair_fused_moe.py +404 -377
- vllm_ascend-0.10.0rc1/tests/ut/ops/test_rotary_embedding.py → vllm_ascend-0.10.2rc1/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +331 -314
- vllm_ascend-0.10.2rc1/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +176 -0
- vllm_ascend-0.10.0rc1/tests/ut/quantization/test_w8a8_dynamic.py → vllm_ascend-0.10.2rc1/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +75 -75
- vllm_ascend-0.10.0rc1/tests/ut/attention/test_mla_v1.py → vllm_ascend-0.10.2rc1/tests/ut/torchair/test_torchair_mla.py +266 -129
- vllm_ascend-0.10.2rc1/tests/ut/torchair/test_utils.py +136 -0
- vllm_ascend-0.10.2rc1/tests/ut/worker/test_input_batch.py +372 -0
- vllm_ascend-0.10.2rc1/tests/ut/worker/test_model_runner_v1.py +94 -0
- vllm_ascend-0.10.2rc1/tests/ut/worker/test_worker_v1.py +1152 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/_version.py +34 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ascend_config.py +52 -2
- vllm_ascend-0.10.2rc1/vllm_ascend/ascend_forward_context.py +177 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/attention/attention_mask.py +35 -46
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/attention/attention_v1.py +269 -132
- vllm_ascend-0.10.2rc1/vllm_ascend/attention/mla_v1.py +1052 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/attention/utils.py +102 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/compilation/acl_graph.py +186 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/core/schedule_config.py +14 -8
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/core/scheduler.py +98 -14
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/__init__.py +4 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/communicator.py +0 -21
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +32 -52
- vllm_ascend-0.10.2rc1/vllm_ascend/distributed/mooncake_connector.py +1072 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/distributed/parallel_state.py +144 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/envs.py +37 -32
- {vllm_ascend-0.10.0rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.10.2rc1/vllm_ascend/lora}/lora_ops.py +21 -20
- {vllm_ascend-0.10.0rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.10.2rc1/vllm_ascend/lora}/punica_npu.py +17 -14
- vllm_ascend-0.10.2rc1/vllm_ascend/lora/utils.py +77 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/meta_registration.py +105 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/__init__.py +10 -13
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_mtp.py +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_v2.py +85 -110
- vllm_ascend-0.10.2rc1/vllm_ascend/models/layers/mla.py +144 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen2_5_vl.py +63 -14
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen2_5_vl_without_padding.py +20 -24
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen3_moe.py +2 -1
- vllm_ascend-0.10.2rc1/vllm_ascend/models/qwen3_next.py +1361 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/ms_split.py +1 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/__init__.py +19 -10
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/activation.py +2 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/casual_conv1d.py +597 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/common_fused_moe.py +444 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/fla.py +381 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/fused_moe.py +585 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/layernorm.py +116 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/linear.py +626 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/ops → vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe}/comm_utils.py +52 -1
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/experts_selector.py +283 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +240 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/moe_comm_method.py +298 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/moe_mlp.py +252 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/token_dispatcher.py +726 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/register_custom_ops.py +192 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/rotary_embedding.py +355 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/sigmoid_gating.py +403 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/vocab_parallel_embedding.py +255 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/__init__.py +16 -11
- {vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/patch_common → vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform}/__init__.py +2 -2
- vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_common/__init__.py +20 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/platform/patch_common/patch_distributed.py +2 -2
- vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_common/patch_mamba_config.py +97 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_common/patch_shared_fused_moe.py +21 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/patch_0_10_0 → vllm_ascend-0.10.2rc1/vllm_ascend/patch/worker}/__init__.py +2 -1
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_common/__init__.py +1 -1
- vllm_ascend-0.10.2rc1/vllm_ascend/patch/worker/patch_common/patch_logits.py +26 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/platform.py +124 -35
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/quantization/quant_config.py +38 -34
- vllm_ascend-0.10.2rc1/vllm_ascend/quantization/utils.py +83 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/quantization/w4a8_dynamic.py +394 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/quantization/w8a8.py +2 -122
- vllm_ascend-0.10.2rc1/vllm_ascend/quantization/w8a8_dynamic.py +288 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/sample/logits_processor/__init__.py +50 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/sample/logits_processor/builtin.py +35 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/sample/rejection_sampler.py +99 -48
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/sample/sampler.py +11 -2
- vllm_ascend-0.10.0rc1/vllm_ascend/torchair/torchair_model_runner.py → vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/__init__.py +13 -9
- vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/eagle_proposer.py +644 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/interface.py +51 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/mtp_proposer.py +630 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/spec_decode/ngram_proposer.py +65 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/qwen2.py +364 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/qwen3_moe.py +538 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +218 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/torchair_deepseek_v2.py +1056 -0
- vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_pyhccl.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/torchair_deepseek_v3.py +11 -12
- vllm_ascend-0.10.0rc1/vllm_ascend/models/pangu_moe.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models/torchair_pangu_moe.py +1119 -1117
- vllm_ascend-0.10.0rc1/vllm_ascend/distributed/communication_op.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/torchair_activation.py +37 -25
- vllm_ascend-0.10.0rc1/vllm_ascend/ops/fused_moe.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/torchair_fused_moe.py +1319 -1557
- vllm_ascend-0.10.0rc1/vllm_ascend/ops/layernorm.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/torchair_layernorm.py +51 -86
- vllm_ascend-0.10.0rc1/vllm_ascend/ops/rotary_embedding.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +365 -292
- vllm_ascend-0.10.0rc1/vllm_ascend/quantization/w4a8_dynamic.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +439 -396
- vllm_ascend-0.10.0rc1/vllm_ascend/quantization/w8a8_dynamic.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +1033 -1033
- vllm_ascend-0.10.0rc1/vllm_ascend/attention/attention_v1_torchair.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/torchair_attention.py +86 -122
- vllm_ascend-0.10.0rc1/vllm_ascend/attention/mla_v1.py → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/torchair_mla.py +224 -184
- vllm_ascend-0.10.2rc1/vllm_ascend/torchair/torchair_model_runner.py +475 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/torchair/torchair_worker.py +4 -2
- vllm_ascend-0.10.2rc1/vllm_ascend/torchair/utils.py +219 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/utils.py +160 -48
- vllm_ascend-0.10.2rc1/vllm_ascend/worker/__init__.py +0 -0
- vllm_ascend-0.10.2rc1/vllm_ascend/worker/block_table.py +313 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/worker/model_runner_v1.py +1530 -1277
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/worker/npu_input_batch.py +163 -87
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/worker/worker_v1.py +46 -30
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/PKG-INFO +7 -6
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/SOURCES.txt +101 -45
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/requires.txt +0 -1
- vllm_ascend-0.10.0rc1/docs/source/user_guide/feature_guide/lora.md +0 -8
- vllm_ascend-0.10.0rc1/tests/e2e/long_term/accuracy/accuracy_multicard.py +0 -167
- vllm_ascend-0.10.0rc1/tests/e2e/long_term/accuracy/accuracy_singlecard.py +0 -115
- vllm_ascend-0.10.0rc1/tests/e2e/model_utils.py +0 -274
- vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_dynamic_npugraph_batchsize.py +0 -59
- vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_pyhccl_distributed.py +0 -121
- vllm_ascend-0.10.0rc1/tests/e2e/singlecard/compile/test_simple.py +0 -118
- vllm_ascend-0.10.0rc1/tests/e2e/singlecard/models/report_template.md +0 -24
- vllm_ascend-0.10.0rc1/tests/e2e/singlecard/sample/test_rejection_sampler.py +0 -608
- vllm_ascend-0.10.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +0 -90
- vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_chunked.py +0 -67
- vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_offline_inference.py +0 -166
- vllm_ascend-0.10.0rc1/tests/e2e/singlecard/test_sampler.py +0 -109
- vllm_ascend-0.10.0rc1/tests/ut/distributed/test_distributed_tensor_parallel.py +0 -139
- vllm_ascend-0.10.0rc1/tests/ut/models/test_qwen3_moe.py +0 -46
- vllm_ascend-0.10.0rc1/tests/ut/ops/test_token_dispatcher.py +0 -65
- vllm_ascend-0.10.0rc1/tests/ut/ops/test_vocab_parallel_embedding.py +0 -299
- vllm_ascend-0.10.0rc1/tests/ut/patch/worker/patch_common/test_patch_linear.py +0 -167
- vllm_ascend-0.10.0rc1/tests/ut/quantization/test_func_wrapper.py +0 -134
- vllm_ascend-0.10.0rc1/tests/ut/quantization/test_quantizer.py +0 -145
- vllm_ascend-0.10.0rc1/tests/ut/quantization/test_w4a8_dynamic.py +0 -109
- vllm_ascend-0.10.0rc1/tests/ut/torchair/test_utils.py +0 -28
- vllm_ascend-0.10.0rc1/tests/ut/worker/test_input_batch.py +0 -161
- vllm_ascend-0.10.0rc1/tests/ut/worker/test_worker_v1.py +0 -1
- vllm_ascend-0.10.0rc1/vllm_ascend/_version.py +0 -21
- vllm_ascend-0.10.0rc1/vllm_ascend/ascend_forward_context.py +0 -114
- vllm_ascend-0.10.0rc1/vllm_ascend/compilation/piecewise_backend.py +0 -225
- vllm_ascend-0.10.0rc1/vllm_ascend/distributed/parallel_state.py +0 -48
- vllm_ascend-0.10.0rc1/vllm_ascend/distributed/tensor_parallel.py +0 -248
- vllm_ascend-0.10.0rc1/vllm_ascend/ops/cache.py +0 -35
- vllm_ascend-0.10.0rc1/vllm_ascend/ops/common_fused_moe.py +0 -115
- vllm_ascend-0.10.0rc1/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py +0 -453
- vllm_ascend-0.10.0rc1/vllm_ascend/ops/vocab_parallel_embedding.py +0 -74
- vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/__init__.py +0 -25
- vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/patch_main/__init__.py +0 -16
- vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/__init__.py +0 -26
- vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/patch_0_10_0/patch_sampler_gather_logprobs.py +0 -87
- vllm_ascend-0.10.0rc1/vllm_ascend/patch/worker/patch_common/patch_linear.py +0 -145
- vllm_ascend-0.10.0rc1/vllm_ascend/quantization/func_wrapper.py +0 -184
- vllm_ascend-0.10.0rc1/vllm_ascend/quantization/quantizer.py +0 -311
- vllm_ascend-0.10.0rc1/vllm_ascend/torchair/utils.py +0 -98
- vllm_ascend-0.10.0rc1/vllm_ascend/worker/eagle_proposer_v1.py +0 -384
- vllm_ascend-0.10.0rc1/vllm_ascend/worker/mtp_proposer_v1.py +0 -400
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/Dockerfile.buildwheel +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/dependabot.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/format_pr_body.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/labeler.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/label_merge_conflict.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/labeler.yml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/matchers/actionlint.json +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/matchers/mypy.json +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/matchers/ruff.json +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_doctest.yaml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.gitignore +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.pre-commit-config.yaml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.readthedocs.yaml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/CMakeLists.txt +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/CODE_OF_CONDUCT.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/CONTRIBUTING.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/DCO +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/LICENSE +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/README.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/requirements-bench.txt +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/scripts/perf_result_template.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/tests/latency-tests.json +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/benchmarks/tests/throughput-tests.json +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/cmake/utils.cmake +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/collect_env.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/camem_allocator.cpp +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/types.h +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/csrc/kernels/utils.h +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/Makefile +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/README.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/requirements-docs.txt +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/requirements-test.txt +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/assets/multi_node_dp_kimi.png +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/governance.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/user_stories/index.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/community/user_stories/llamafactory.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/contribution/index.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/contribution/testing.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/index.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/feature_guide/patch.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/modeling/index.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/index.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/performance_benchmark.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/developer_guide/performance/profile_execute_duration.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/index.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/quick_start.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu_moge.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu_quantization.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_node_300i.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_audio.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_multimodal.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/configuration/env_vars.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/configuration/index.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/index.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/support_matrix/index.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/disaggregated_prefill_v1/run_server.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/eplb/eplb_deepseek.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/eplb/eplb_strategy.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_data_parallel.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_disaggregated_prefill_npu.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_dualbatch_overlap_npu.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_embed.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_external_launcher.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_npu.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_npu_tp2.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/offline_inference_sleep_mode_npu.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/prompt_embedding_inference.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/examples/run_dp_server.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/format.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/mypy.ini +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/packages.txt +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/requirements-lint.txt +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/setup.cfg +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/__init__.py +0 -0
- /vllm_ascend-0.10.0rc1/tests/e2e/multicard/test_offline_inference_310p.py → /vllm_ascend-0.10.2rc1/tests/e2e/310p/test_offline_inference_parallel_310p.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/common.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
- {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/configs/Qwen2.5-VL-7B-Instruct.yaml +0 -0
- {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/configs/Qwen3-30B-A3B.yaml +0 -0
- {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/configs/Qwen3-8B-Base.yaml +0 -0
- {vllm_ascend-0.10.0rc1/tests/e2e/singlecard → vllm_ascend-0.10.2rc1/tests/e2e}/models/configs/accuracy.txt +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/run_edge_case_test.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/prompts/example.txt +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/run_disagg_pd.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/run_doctests.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/tests/e2e/singlecard/compile → vllm_ascend-0.10.2rc1/tests/e2e/singlecard/ops}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/e2e/utils.py +0 -0
- {vllm_ascend-0.10.0rc1/tests/e2e/singlecard/ops → vllm_ascend-0.10.2rc1/tests/ut}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/base.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/conftest.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/device_allocator/test_camem.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/fake_weight/config.json +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -0
- {vllm_ascend-0.10.0rc1/tests/e2e/singlecard/sample → vllm_ascend-0.10.2rc1/tests/ut/models}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/models/test_qwen2_vl.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_base.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_decorator.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_layers.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_metadata.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/multistream/test_ms_split.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/expert_map.json +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/ops/test_expert_load_balancer.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tests/ut/sample/test_sampler.py +0 -0
- {vllm_ascend-0.10.0rc1/tests/ut → vllm_ascend-0.10.2rc1/tests/ut/torchair}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/actionlint.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/check_python_src_init.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/check_repo.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/enforce_regex_import.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/mypy.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/png-lint.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/shellcheck.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/tools/sphinx-lint.sh +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/typos.toml +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/tests/ut/models → vllm_ascend-0.10.2rc1/vllm_ascend/attention}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/tests/ut/torchair → vllm_ascend-0.10.2rc1/vllm_ascend/compilation}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/attention → vllm_ascend-0.10.2rc1/vllm_ascend/core}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/compilation → vllm_ascend-0.10.2rc1/vllm_ascend/device_allocator}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/device_allocator/camem.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/core → vllm_ascend-0.10.2rc1/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/device_allocator → vllm_ascend-0.10.2rc1/vllm_ascend/lora}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_dbo.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/deepseek_v3.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/distributed/device_communicators → vllm_ascend-0.10.2rc1/vllm_ascend/models/layers}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen2_vl.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/models/qwen3.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/lora → vllm_ascend-0.10.2rc1/vllm_ascend/multistream}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/base.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/context.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/decorator.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/layers.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/multistream/metadata.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/attention.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/expert_load_balancer.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/lora/punica_wrapper → vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/ops/sequence_parallel.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/patch/platform/patch_0_10_0 → vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_main}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/multistream → vllm_ascend-0.10.2rc1/vllm_ascend/quantization}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/ops/moe_dispatcher → vllm_ascend-0.10.2rc1/vllm_ascend/sample}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/quantization → vllm_ascend-0.10.2rc1/vllm_ascend/torchair}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/sample → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/torchair → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1/vllm_ascend/worker → vllm_ascend-0.10.2rc1/vllm_ascend/torchair/quantization}/__init__.py +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/dependency_links.txt +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/entry_points.txt +0 -0
- {vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/vllm_ascend.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
# https://developers.google.com/gemini-code-assist/docs/customize-gemini-behavior-github
|
|
2
|
+
have_fun: false # Just review the code
|
|
3
|
+
code_review:
|
|
4
|
+
comment_severity_threshold: HIGH # Reduce quantity of comments
|
|
5
|
+
pull_request_opened:
|
|
6
|
+
summary: false # Don't summarize the PR in a separate comment
|
{vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/ISSUE_TEMPLATE/900-release-checklist.yml
RENAMED
|
@@ -30,6 +30,8 @@ body:
|
|
|
30
30
|
|
|
31
31
|
- [ ] Add release note to docs/source/user_guide/release_notes.md
|
|
32
32
|
|
|
33
|
+
- [ ] Update release version in README.md and README.zh.md
|
|
34
|
+
|
|
33
35
|
- [ ] Update version info in docs/source/community/versioning_policy.md
|
|
34
36
|
|
|
35
37
|
- [ ] Update contributor info in docs/source/community/contributors.md
|
|
@@ -11,3 +11,10 @@ self-hosted-runner:
|
|
|
11
11
|
- linux-aarch64-310p-2
|
|
12
12
|
- linux-aarch64-310p-4
|
|
13
13
|
- ubuntu-24.04-arm
|
|
14
|
+
- linux-aarch64-a3-1
|
|
15
|
+
- linux-aarch64-a3-2
|
|
16
|
+
- linux-aarch64-a3-4
|
|
17
|
+
- linux-aarch64-a3-8
|
|
18
|
+
- linux-amd64-cpu-0
|
|
19
|
+
- linux-amd64-cpu-8
|
|
20
|
+
- linux-amd64-cpu-16
|
|
@@ -70,6 +70,8 @@ jobs:
|
|
|
70
70
|
runner: linux-aarch64-a2-1
|
|
71
71
|
- model_name: Qwen3-30B-A3B
|
|
72
72
|
runner: linux-aarch64-a2-2
|
|
73
|
+
- model_name: DeepSeek-V2-Lite
|
|
74
|
+
runner: linux-aarch64-a2-2
|
|
73
75
|
fail-fast: false
|
|
74
76
|
|
|
75
77
|
name: ${{ matrix.model_name }} accuracy
|
|
@@ -110,7 +112,7 @@ jobs:
|
|
|
110
112
|
uses: actions/checkout@v4
|
|
111
113
|
with:
|
|
112
114
|
repository: vllm-project/vllm
|
|
113
|
-
ref: v0.10.
|
|
115
|
+
ref: v0.10.2
|
|
114
116
|
path: ./vllm-empty
|
|
115
117
|
|
|
116
118
|
- name: Install vllm-project/vllm from source
|
|
@@ -200,9 +202,8 @@ jobs:
|
|
|
200
202
|
markdown_name="${model_base_name}"
|
|
201
203
|
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
|
|
202
204
|
mkdir -p ./benchmarks/accuracy
|
|
203
|
-
pytest -sv ./tests/e2e/
|
|
204
|
-
--config ./tests/e2e/
|
|
205
|
-
--report_output ./benchmarks/accuracy/${model_base_name}.md
|
|
205
|
+
pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
|
|
206
|
+
--config ./tests/e2e/models/configs/${{ matrix.model_name }}.yaml
|
|
206
207
|
|
|
207
208
|
- name: Generate step summary
|
|
208
209
|
if: ${{ always() }}
|
|
@@ -225,14 +226,14 @@ jobs:
|
|
|
225
226
|
|
|
226
227
|
outputs:
|
|
227
228
|
model_name: ${{ steps.set_output.outputs.model_name }}
|
|
228
|
-
|
|
229
|
+
vllm_ascend_version: ${{ env.GHA_VLLM_ASCEND_VERSION }}
|
|
230
|
+
|
|
229
231
|
create_pr:
|
|
230
232
|
runs-on: ubuntu-latest
|
|
231
233
|
needs: accuracy_tests
|
|
232
234
|
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
|
|
233
235
|
env:
|
|
234
236
|
UPSTREAM_REPO: vllm-project/vllm-ascend
|
|
235
|
-
|
|
236
237
|
steps:
|
|
237
238
|
- name: Checkout repository
|
|
238
239
|
uses: actions/checkout@v4
|
|
@@ -257,10 +258,10 @@ jobs:
|
|
|
257
258
|
TIMESTAMP=$(date +%Y%m%d%H%M%S)
|
|
258
259
|
BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
|
|
259
260
|
echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
|
|
260
|
-
git checkout -B "${BRANCH_NAME}" upstream
|
|
261
|
+
git checkout -B "${BRANCH_NAME}" upstream/main
|
|
261
262
|
|
|
262
263
|
- name: Download only current run reports
|
|
263
|
-
uses: actions/download-artifact@
|
|
264
|
+
uses: actions/download-artifact@v5
|
|
264
265
|
with:
|
|
265
266
|
path: ./docs/source/developer_guide/evaluation/accuracy_report
|
|
266
267
|
pattern: report-*
|
|
@@ -298,11 +299,11 @@ jobs:
|
|
|
298
299
|
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }}
|
|
299
300
|
run: |
|
|
300
301
|
git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
|
|
301
|
-
git commit -s -m "[Doc] Update accuracy reports for ${{
|
|
302
|
+
git commit -s -m "[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}"
|
|
302
303
|
git push -f origin "${{ env.BRANCH_NAME }}"
|
|
303
304
|
|
|
304
305
|
- name: Create PR in upstream via API
|
|
305
|
-
uses: actions/github-script@
|
|
306
|
+
uses: actions/github-script@v8
|
|
306
307
|
with:
|
|
307
308
|
github-token: ${{ secrets.PAT_TOKEN }}
|
|
308
309
|
script: |
|
|
@@ -310,9 +311,9 @@ jobs:
|
|
|
310
311
|
owner: 'vllm-project',
|
|
311
312
|
repo: 'vllm-ascend',
|
|
312
313
|
head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
|
|
313
|
-
base: '
|
|
314
|
-
title: `[Doc] Update accuracy reports for ${{
|
|
315
|
-
body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (
|
|
314
|
+
base: 'main',
|
|
315
|
+
title: `[Doc] Update accuracy reports for ${{ needs.accuracy_tests.outputs.vllm_ascend_version }}`,
|
|
316
|
+
body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base, DeepSeek-V2-Lite)
|
|
316
317
|
|
|
317
318
|
- [Workflow run][1]
|
|
318
319
|
|
|
@@ -46,7 +46,7 @@ jobs:
|
|
|
46
46
|
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
|
47
47
|
|
|
48
48
|
- name: Checkout repository
|
|
49
|
-
uses: actions/checkout@
|
|
49
|
+
uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
|
|
50
50
|
|
|
51
51
|
- name: Set up Python
|
|
52
52
|
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
|
|
@@ -25,6 +25,7 @@ on:
|
|
|
25
25
|
- 'cmake/**'
|
|
26
26
|
- 'CMakeLists.txt'
|
|
27
27
|
- 'csrc/**'
|
|
28
|
+
types: [ labeled ]
|
|
28
29
|
push:
|
|
29
30
|
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
|
30
31
|
branches:
|
|
@@ -43,17 +44,28 @@ on:
|
|
|
43
44
|
- 'CMakeLists.txt'
|
|
44
45
|
- 'csrc/**'
|
|
45
46
|
|
|
47
|
+
# only cancel in-progress runs of the same workflow
|
|
48
|
+
concurrency:
|
|
49
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
50
|
+
cancel-in-progress: true
|
|
51
|
+
|
|
46
52
|
jobs:
|
|
47
53
|
build:
|
|
48
54
|
name: vllm-ascend image build
|
|
55
|
+
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
|
56
|
+
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
|
49
57
|
runs-on: >-
|
|
50
58
|
${{
|
|
51
59
|
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
|
52
60
|
'ubuntu-latest' ||
|
|
53
61
|
'ubuntu-24.04-arm'
|
|
54
62
|
}}
|
|
63
|
+
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
|
55
64
|
steps:
|
|
56
65
|
- uses: actions/checkout@v4
|
|
66
|
+
with:
|
|
67
|
+
fetch-depth: 0
|
|
68
|
+
persist-credentials: false
|
|
57
69
|
|
|
58
70
|
- name: Print
|
|
59
71
|
run: |
|
|
@@ -25,6 +25,7 @@ on:
|
|
|
25
25
|
- 'cmake/**'
|
|
26
26
|
- 'CMakeLists.txt'
|
|
27
27
|
- 'csrc/**'
|
|
28
|
+
types: [ labeled ]
|
|
28
29
|
push:
|
|
29
30
|
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
|
30
31
|
branches:
|
|
@@ -42,14 +43,25 @@ on:
|
|
|
42
43
|
- 'cmake/**'
|
|
43
44
|
- 'CMakeLists.txt'
|
|
44
45
|
- 'csrc/**'
|
|
46
|
+
|
|
47
|
+
# only cancel in-progress runs of the same workflow
|
|
48
|
+
concurrency:
|
|
49
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
50
|
+
cancel-in-progress: true
|
|
51
|
+
|
|
45
52
|
jobs:
|
|
46
53
|
|
|
47
54
|
build:
|
|
48
55
|
name: vllm-ascend image build
|
|
56
|
+
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
|
57
|
+
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
|
49
58
|
runs-on: ubuntu-latest
|
|
50
|
-
|
|
59
|
+
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
|
51
60
|
steps:
|
|
52
61
|
- uses: actions/checkout@v4
|
|
62
|
+
with:
|
|
63
|
+
fetch-depth: 0
|
|
64
|
+
persist-credentials: false
|
|
53
65
|
|
|
54
66
|
- name: Print
|
|
55
67
|
run: |
|
|
@@ -25,6 +25,7 @@ on:
|
|
|
25
25
|
- 'cmake/**'
|
|
26
26
|
- 'CMakeLists.txt'
|
|
27
27
|
- 'csrc/**'
|
|
28
|
+
types: [ labeled ]
|
|
28
29
|
push:
|
|
29
30
|
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
|
30
31
|
branches:
|
|
@@ -43,17 +44,28 @@ on:
|
|
|
43
44
|
- 'CMakeLists.txt'
|
|
44
45
|
- 'csrc/**'
|
|
45
46
|
|
|
47
|
+
# only cancel in-progress runs of the same workflow
|
|
48
|
+
concurrency:
|
|
49
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
50
|
+
cancel-in-progress: true
|
|
51
|
+
|
|
46
52
|
jobs:
|
|
47
53
|
build:
|
|
48
54
|
name: vllm-ascend image build
|
|
55
|
+
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
|
56
|
+
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
|
49
57
|
runs-on: >-
|
|
50
58
|
${{
|
|
51
59
|
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
|
52
60
|
'ubuntu-latest' ||
|
|
53
61
|
'ubuntu-24.04-arm'
|
|
54
62
|
}}
|
|
63
|
+
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
|
55
64
|
steps:
|
|
56
65
|
- uses: actions/checkout@v4
|
|
66
|
+
with:
|
|
67
|
+
fetch-depth: 0
|
|
68
|
+
persist-credentials: false
|
|
57
69
|
|
|
58
70
|
- name: Print
|
|
59
71
|
run: |
|
|
@@ -25,6 +25,7 @@ on:
|
|
|
25
25
|
- 'cmake/**'
|
|
26
26
|
- 'CMakeLists.txt'
|
|
27
27
|
- 'csrc/**'
|
|
28
|
+
types: [ labeled ]
|
|
28
29
|
push:
|
|
29
30
|
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
|
30
31
|
branches:
|
|
@@ -42,14 +43,25 @@ on:
|
|
|
42
43
|
- 'cmake/**'
|
|
43
44
|
- 'CMakeLists.txt'
|
|
44
45
|
- 'csrc/**'
|
|
46
|
+
|
|
47
|
+
# only cancel in-progress runs of the same workflow
|
|
48
|
+
concurrency:
|
|
49
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
50
|
+
cancel-in-progress: true
|
|
51
|
+
|
|
45
52
|
jobs:
|
|
46
53
|
|
|
47
54
|
build:
|
|
48
55
|
name: vllm-ascend image build
|
|
56
|
+
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
|
57
|
+
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
|
49
58
|
runs-on: ubuntu-latest
|
|
50
|
-
|
|
59
|
+
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
|
51
60
|
steps:
|
|
52
61
|
- uses: actions/checkout@v4
|
|
62
|
+
with:
|
|
63
|
+
fetch-depth: 0
|
|
64
|
+
persist-credentials: false
|
|
53
65
|
|
|
54
66
|
- name: Print
|
|
55
67
|
run: |
|
|
@@ -24,6 +24,7 @@ on:
|
|
|
24
24
|
- 'cmake/**'
|
|
25
25
|
- 'CMakeLists.txt'
|
|
26
26
|
- 'csrc/**'
|
|
27
|
+
types: [ labeled ]
|
|
27
28
|
push:
|
|
28
29
|
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
|
29
30
|
branches:
|
|
@@ -42,17 +43,28 @@ on:
|
|
|
42
43
|
- 'CMakeLists.txt'
|
|
43
44
|
- 'csrc/**'
|
|
44
45
|
|
|
46
|
+
# only cancel in-progress runs of the same workflow
|
|
47
|
+
concurrency:
|
|
48
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
49
|
+
cancel-in-progress: true
|
|
50
|
+
|
|
45
51
|
jobs:
|
|
46
52
|
build:
|
|
47
53
|
name: vllm-ascend image build
|
|
54
|
+
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
|
55
|
+
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
|
48
56
|
runs-on: >-
|
|
49
57
|
${{
|
|
50
58
|
github.event_name == 'push' && github.repository_owner == 'vllm-project' &&
|
|
51
59
|
'ubuntu-latest' ||
|
|
52
60
|
'ubuntu-24.04-arm'
|
|
53
61
|
}}
|
|
62
|
+
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
|
54
63
|
steps:
|
|
55
64
|
- uses: actions/checkout@v4
|
|
65
|
+
with:
|
|
66
|
+
fetch-depth: 0
|
|
67
|
+
persist-credentials: false
|
|
56
68
|
|
|
57
69
|
- name: Print
|
|
58
70
|
run: |
|
|
@@ -25,6 +25,7 @@ on:
|
|
|
25
25
|
- 'cmake/**'
|
|
26
26
|
- 'CMakeLists.txt'
|
|
27
27
|
- 'csrc/**'
|
|
28
|
+
types: [ labeled ]
|
|
28
29
|
push:
|
|
29
30
|
# Publish image when tagging, the Dockerfile in tag will be build as tag image
|
|
30
31
|
branches:
|
|
@@ -42,14 +43,25 @@ on:
|
|
|
42
43
|
- 'cmake/**'
|
|
43
44
|
- 'CMakeLists.txt'
|
|
44
45
|
- 'csrc/**'
|
|
46
|
+
|
|
47
|
+
# only cancel in-progress runs of the same workflow
|
|
48
|
+
concurrency:
|
|
49
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
50
|
+
cancel-in-progress: true
|
|
51
|
+
|
|
45
52
|
jobs:
|
|
46
53
|
|
|
47
54
|
build:
|
|
48
55
|
name: vllm-ascend image build
|
|
56
|
+
# Only arm64 build on openEuler arm64, only amd64 build on Ubuntu amd64
|
|
57
|
+
# Push event or PR with both 'ready' and 'ready-for-test' labels
|
|
49
58
|
runs-on: ubuntu-latest
|
|
50
|
-
|
|
59
|
+
if: ${{ github.event_name == 'push' || (contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test')) }}
|
|
51
60
|
steps:
|
|
52
61
|
- uses: actions/checkout@v4
|
|
62
|
+
with:
|
|
63
|
+
fetch-depth: 0
|
|
64
|
+
persist-credentials: false
|
|
53
65
|
|
|
54
66
|
- name: Print
|
|
55
67
|
run: |
|
|
@@ -9,7 +9,7 @@ jobs:
|
|
|
9
9
|
runs-on: ubuntu-latest
|
|
10
10
|
steps:
|
|
11
11
|
- name: Remind to run full CI on PR
|
|
12
|
-
uses: actions/github-script@
|
|
12
|
+
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
|
13
13
|
with:
|
|
14
14
|
script: |
|
|
15
15
|
github.rest.issues.createComment({
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
-
# This file is a part of the vllm-ascend project.
|
|
4
3
|
#
|
|
5
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
5
|
# you may not use this file except in compliance with the License.
|
|
@@ -13,13 +12,14 @@
|
|
|
13
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
13
|
# See the License for the specific language governing permissions and
|
|
15
14
|
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
|
-
|
|
17
|
+
|
|
18
|
+
name: 'e2e test / a3-test'
|
|
18
19
|
|
|
19
20
|
on:
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
- cron: '0 23 * * *'
|
|
21
|
+
workflow_call:
|
|
22
|
+
|
|
23
23
|
pull_request:
|
|
24
24
|
types: [ labeled ]
|
|
25
25
|
|
|
@@ -31,26 +31,25 @@ defaults:
|
|
|
31
31
|
shell: bash -el {0}
|
|
32
32
|
|
|
33
33
|
# only cancel in-progress runs of the same workflow
|
|
34
|
+
# and ignore the lint / 8 cards test type
|
|
34
35
|
concurrency:
|
|
35
36
|
group: ${{ github.workflow }}-${{ github.ref }}
|
|
36
37
|
cancel-in-progress: true
|
|
37
38
|
|
|
38
39
|
jobs:
|
|
39
|
-
|
|
40
|
-
#
|
|
41
|
-
if: ${{ contains(github.event.pull_request.labels.*.name, '
|
|
40
|
+
e2e:
|
|
41
|
+
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
|
42
|
+
if: ${{ contains(github.event.pull_request.labels.*.name, 'dist-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'workflow_dispatch' }}
|
|
42
43
|
strategy:
|
|
43
|
-
max-parallel: 2
|
|
44
44
|
matrix:
|
|
45
|
-
os: [linux-aarch64-
|
|
46
|
-
vllm_version: [
|
|
47
|
-
name: vLLM Ascend
|
|
45
|
+
os: [linux-aarch64-a3-8]
|
|
46
|
+
vllm_version: [v0.10.2]
|
|
47
|
+
name: vLLM Ascend test
|
|
48
48
|
runs-on: ${{ matrix.os }}
|
|
49
49
|
container:
|
|
50
|
-
image:
|
|
50
|
+
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
|
51
51
|
env:
|
|
52
|
-
|
|
53
|
-
VLLM_USE_MODELSCOPE: True
|
|
52
|
+
DEBIAN_FRONTEND: noninteractive
|
|
54
53
|
steps:
|
|
55
54
|
- name: Check npu and CANN info
|
|
56
55
|
run: |
|
|
@@ -59,11 +58,11 @@ jobs:
|
|
|
59
58
|
|
|
60
59
|
- name: Config mirrors
|
|
61
60
|
run: |
|
|
62
|
-
sed -
|
|
63
|
-
pip config set global.index-url
|
|
64
|
-
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
61
|
+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
|
62
|
+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
|
65
63
|
apt-get update -y
|
|
66
64
|
apt install git -y
|
|
65
|
+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
|
|
67
66
|
|
|
68
67
|
- name: Checkout vllm-project/vllm-ascend repo
|
|
69
68
|
uses: actions/checkout@v4
|
|
@@ -86,17 +85,16 @@ jobs:
|
|
|
86
85
|
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
87
86
|
|
|
88
87
|
- name: Install vllm-project/vllm-ascend
|
|
89
|
-
env:
|
|
90
|
-
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
91
88
|
run: |
|
|
89
|
+
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
90
|
+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
|
|
92
91
|
pip install -r requirements-dev.txt
|
|
93
92
|
pip install -v -e .
|
|
94
93
|
|
|
95
|
-
- name: Run vllm-project/vllm-ascend
|
|
94
|
+
- name: Run vllm-project/vllm-ascend test for V1 Engine
|
|
95
|
+
env:
|
|
96
|
+
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
97
|
+
VLLM_USE_MODELSCOPE: True
|
|
96
98
|
run: |
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
else
|
|
100
|
-
# accuracy test multi card
|
|
101
|
-
pytest -sv tests/e2e/long_term/accuracy/accuracy_multicard.py
|
|
102
|
-
fi
|
|
99
|
+
# TODO: enable more tests
|
|
100
|
+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
|
|
@@ -25,7 +25,6 @@ on:
|
|
|
25
25
|
branches:
|
|
26
26
|
- 'main'
|
|
27
27
|
- '*-dev'
|
|
28
|
-
|
|
29
28
|
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
|
30
29
|
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
|
31
30
|
# It's used to activate ascend-toolkit environment variables.
|
|
@@ -44,6 +43,7 @@ jobs:
|
|
|
44
43
|
uses: ./.github/workflows/pre-commit.yml
|
|
45
44
|
|
|
46
45
|
changes:
|
|
46
|
+
if: github.event_name == 'pull_request'
|
|
47
47
|
runs-on: ubuntu-latest
|
|
48
48
|
outputs:
|
|
49
49
|
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
|
|
@@ -68,6 +68,7 @@ jobs:
|
|
|
68
68
|
- 'packages.txt'
|
|
69
69
|
ut_tracker:
|
|
70
70
|
- 'tests/ut/**'
|
|
71
|
+
|
|
71
72
|
ut:
|
|
72
73
|
needs: [lint, changes]
|
|
73
74
|
name: unit test
|
|
@@ -81,7 +82,7 @@ jobs:
|
|
|
81
82
|
VLLM_USE_MODELSCOPE: True
|
|
82
83
|
strategy:
|
|
83
84
|
matrix:
|
|
84
|
-
vllm_version: [
|
|
85
|
+
vllm_version: [v0.10.2]
|
|
85
86
|
steps:
|
|
86
87
|
- name: Install packages
|
|
87
88
|
run: |
|
|
@@ -117,7 +118,7 @@ jobs:
|
|
|
117
118
|
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
|
|
118
119
|
run: |
|
|
119
120
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
|
|
120
|
-
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut
|
|
121
|
+
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut --ignore=tests/ut/test_platform.py --ignore=tests/ut/ops/test_vocab_parallel_embedding.py
|
|
121
122
|
|
|
122
123
|
- name: Upload coverage to Codecov
|
|
123
124
|
if: ${{ matrix.vllm_version == 'main' }}
|
|
@@ -129,16 +130,16 @@ jobs:
|
|
|
129
130
|
name: vllm-ascend
|
|
130
131
|
verbose: true
|
|
131
132
|
|
|
132
|
-
e2e:
|
|
133
|
+
e2e-light:
|
|
133
134
|
needs: [lint, changes]
|
|
134
135
|
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
|
135
|
-
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
|
|
136
|
+
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
|
|
136
137
|
strategy:
|
|
137
138
|
max-parallel: 2
|
|
138
139
|
matrix:
|
|
139
140
|
os: [linux-aarch64-a2-1]
|
|
140
|
-
vllm_version: [
|
|
141
|
-
name: singlecard e2e test
|
|
141
|
+
vllm_version: [v0.10.2]
|
|
142
|
+
name: singlecard e2e test - light
|
|
142
143
|
runs-on: ${{ matrix.os }}
|
|
143
144
|
container:
|
|
144
145
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
|
@@ -191,37 +192,19 @@ jobs:
|
|
|
191
192
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
192
193
|
VLLM_USE_MODELSCOPE: True
|
|
193
194
|
run: |
|
|
194
|
-
pytest -sv tests/e2e/singlecard/
|
|
195
|
-
pytest -sv tests/e2e/singlecard/
|
|
196
|
-
pytest -sv tests/e2e/singlecard/
|
|
197
|
-
pytest -sv tests/e2e/singlecard/test_camem.py
|
|
198
|
-
pytest -sv tests/e2e/singlecard/test_embedding.py
|
|
199
|
-
|
|
200
|
-
# ------------------------------------ v1 spec decode test ------------------------------------ #
|
|
201
|
-
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
|
|
202
|
-
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
|
|
203
|
-
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
|
|
195
|
+
pytest -sv tests/e2e/singlecard/test_aclgraph.py
|
|
196
|
+
pytest -sv tests/e2e/singlecard/test_quantization.py
|
|
197
|
+
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
|
|
204
198
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
|
|
209
|
-
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
|
|
210
|
-
--ignore=tests/e2e/singlecard/test_camem.py \
|
|
211
|
-
--ignore=tests/e2e/singlecard/test_embedding.py \
|
|
212
|
-
--ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \
|
|
213
|
-
--ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py \
|
|
214
|
-
--ignore=tests/e2e/singlecard/test_offline_inference_310p.py \
|
|
215
|
-
--ignore=tests/e2e/singlecard/models/test_lm_eval_correctness.py
|
|
216
|
-
e2e-2-cards:
|
|
217
|
-
needs: [e2e]
|
|
218
|
-
if: ${{ needs.e2e.result == 'success' }}
|
|
199
|
+
e2e-2-cards-light:
|
|
200
|
+
needs: [e2e-light]
|
|
201
|
+
if: ${{ needs.e2e-light.result == 'success' }}
|
|
219
202
|
strategy:
|
|
220
203
|
max-parallel: 2
|
|
221
204
|
matrix:
|
|
222
205
|
os: [linux-aarch64-a2-2]
|
|
223
|
-
vllm_version: [
|
|
224
|
-
name: multicard e2e test
|
|
206
|
+
vllm_version: [v0.10.2]
|
|
207
|
+
name: multicard e2e test - light
|
|
225
208
|
runs-on: ${{ matrix.os }}
|
|
226
209
|
container:
|
|
227
210
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
|
@@ -274,19 +257,4 @@ jobs:
|
|
|
274
257
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
275
258
|
VLLM_USE_MODELSCOPE: True
|
|
276
259
|
run: |
|
|
277
|
-
pytest -sv tests/e2e/multicard/
|
|
278
|
-
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
|
|
279
|
-
# To avoid oom, we need to run the test in a single process.
|
|
280
|
-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
|
|
281
|
-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
|
|
282
|
-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
|
|
283
|
-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
|
|
284
|
-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_alltoallv
|
|
285
|
-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
|
|
286
|
-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
|
|
287
|
-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
|
|
288
|
-
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
|
289
|
-
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
|
|
290
|
-
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
|
|
291
|
-
--ignore=tests/e2e/multicard/test_data_parallel.py \
|
|
292
|
-
--ignore=tests/e2e/multicard/test_offline_inference_310p.py
|
|
260
|
+
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
|
{vllm_ascend-0.10.0rc1 → vllm_ascend-0.10.2rc1}/.github/workflows/vllm_ascend_test_310p.yaml
RENAMED
|
@@ -53,7 +53,7 @@ jobs:
|
|
|
53
53
|
max-parallel: 2
|
|
54
54
|
matrix:
|
|
55
55
|
os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
|
|
56
|
-
vllm_version: [
|
|
56
|
+
vllm_version: [v0.10.2]
|
|
57
57
|
name: 310p e2e test
|
|
58
58
|
runs-on: ${{ matrix.os }}
|
|
59
59
|
container:
|
|
@@ -111,7 +111,7 @@ jobs:
|
|
|
111
111
|
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
|
112
112
|
run: |
|
|
113
113
|
if [[ "${{ matrix.os }}" == "linux-aarch64-310p-1" ]]; then
|
|
114
|
-
pytest -sv tests/e2e/
|
|
114
|
+
pytest -sv tests/e2e/310p/test_offline_inference_310p.py
|
|
115
115
|
else
|
|
116
|
-
pytest -sv tests/e2e/
|
|
117
|
-
fi
|
|
116
|
+
pytest -sv tests/e2e/310p/test_offline_inference_parallel_310p.py
|
|
117
|
+
fi
|