vllm-ascend 0.10.2rc1__tar.gz → 0.11.0rc0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +2 -0
- vllm_ascend-0.10.2rc1/.github/workflows/vllm_ascend_test_full.yaml → vllm_ascend-0.11.0rc0/.github/workflows/_e2e_test.yaml +54 -92
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/accuracy_test.yaml +2 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/format_pr_body.yaml +2 -8
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/label_merge_conflict.yml +0 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/labeler.yml +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/nightly_benchmarks.yaml +2 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/pre-commit.yml +6 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/release_code.yml +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/release_whl.yml +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_dist.yaml +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_doctest.yaml +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_test.yaml +26 -128
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_test_310p.yaml +1 -1
- vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full.yaml +79 -0
- vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml +51 -0
- vllm_ascend-0.11.0rc0/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +45 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.310p +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.310p.openEuler +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.a3 +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.a3.openEuler +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/Dockerfile.openEuler +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/PKG-INFO +3 -3
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/README.md +2 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/README.zh.md +2 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/versioning_policy.md +2 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/conf.py +5 -5
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/modeling/adding_a_new_model.md +0 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/faqs.md +19 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +2 -6
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/index.md +2 -0
- vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_qwen3vl.md +156 -0
- vllm_ascend-0.11.0rc0/docs/source/tutorials/multi_node_ray.md +182 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_qwen3_next.md +2 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/configuration/additional_config.md +13 -3
- vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +94 -0
- vllm_ascend-0.11.0rc0/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/index.md +1 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/quantization.md +3 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/release_notes.md +24 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/README.md +2 -6
- vllm_ascend-0.11.0rc0/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md +272 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/external_online_dp/run_dp_template.sh +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_disaggregated_prefill_npu.py +1 -1
- vllm_ascend-0.11.0rc0/examples/offline_weight_load.py +326 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/run_dp_server.sh +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/common.sh +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/conftest.py +8 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/doctests/001-quickstart-test.sh +3 -3
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/doctests/002-pip-binary-installation-test.sh +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/model_utils.py +6 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml +6 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml +2 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +2 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/Qwen3-8B-Base.yaml +2 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/configs/accuracy.txt +1 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/report_template.md +15 -3
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/test_lm_eval_correctness.py +7 -3
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_expert_parallel.py +16 -6
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_offline_inference_distributed.py +9 -3
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_torchair_graph_mode.py +3 -0
- vllm_ascend-0.11.0rc0/tests/e2e/multicard/test_weight_loader.py +188 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/run_edge_case_test.sh +2 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/run_doctests.sh +0 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +2 -6
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py +0 -4
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +0 -4
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_guided_decoding.py +54 -23
- vllm_ascend-0.11.0rc0/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +103 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_vlm.py +0 -4
- vllm_ascend-0.11.0rc0/tests/e2e/vllm_interface/singlecard/test_sampler.py +36 -0
- vllm_ascend-0.11.0rc0/tests/e2e/vllm_interface/vllm_test.cfg +2 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/attention/test_attention_v1.py +47 -10
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/attention/test_mla_v1.py +35 -1
- vllm_ascend-0.11.0rc0/tests/ut/compilation/test_acl_graph.py +720 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/core/test_schedule_config.py +4 -16
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/core/test_scheduler.py +22 -1
- vllm_ascend-0.11.0rc0/tests/ut/eplb/adaptor/test_abstract_adaptor.py +73 -0
- vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_abstract.py +31 -0
- vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +98 -0
- vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +99 -0
- vllm_ascend-0.11.0rc0/tests/ut/eplb/core/policy/test_policy_factor.py +23 -0
- vllm_ascend-0.11.0rc0/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +122 -0
- vllm_ascend-0.11.0rc0/tests/ut/eplb/core/test_eplb_utils.py +79 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_mooncake_connector.py +28 -3
- vllm_ascend-0.11.0rc0/tests/ut/models/conftest.py +114 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_deepseek_mtp.py +7 -6
- vllm_ascend-0.11.0rc0/tests/ut/models/test_deepseek_v2.py +107 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen2_5_vl.py +5 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen3_moe.py +0 -30
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_common_fused_moe.py +2 -51
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_fused_moe_prepare_and_finalize.py +72 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_fused_ops.py +75 -129
- vllm_ascend-0.11.0rc0/tests/ut/ops/test_layernorm.py +161 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_linear.py +11 -20
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_moe_comm_method.py +24 -4
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_token_dispatcher.py +2 -99
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_vocab_parallel_embedding.py +4 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_quant_config.py +15 -3
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_w4a8_dynamic.py +95 -46
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_ascend_config.py +6 -29
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_platform.py +2 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_utils.py +2 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/models/test_torchair_deepseek_mtp.py +0 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/models/test_torchair_deepseek_v2.py +1 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/ops/test_torchair_fused_moe.py +15 -3
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py +78 -45
- vllm_ascend-0.11.0rc0/tests/ut/torchair/test_torchair_attention.py +95 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/test_torchair_mla.py +17 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/worker/test_model_runner_v1.py +39 -26
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/worker/test_worker_v1.py +80 -7
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/__init__.py +2 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/_version.py +3 -3
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ascend_config.py +18 -13
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ascend_forward_context.py +34 -24
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/attention_mask.py +33 -18
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/attention_v1.py +125 -73
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/mla_v1.py +78 -97
- vllm_ascend-0.11.0rc0/vllm_ascend/attention/sfa_v1.py +986 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/attention/utils.py +37 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/compilation/acl_graph.py +74 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/core/schedule_config.py +29 -1
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/core/scheduler.py +25 -9
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/__init__.py +5 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_connector.py +457 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +202 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_manager/metadata.py +269 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py +117 -4
- vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/config_data.py +447 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/kv_transfer.py +251 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_engine.py +489 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_store.py +88 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +484 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/mooncake_connector.py +57 -17
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/envs.py +6 -1
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor/abstract_adaptor.py +44 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor/vllm_adaptor.py +289 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +137 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_utils.py +135 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/eplb_worker.py +436 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_abstract.py +42 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +389 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +771 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_factory.py +33 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_flashlb.py +651 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy/policy_random.py +30 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/eplb_updator.py +205 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/eplb/utils.py +77 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/lora/punica_npu.py +3 -14
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/lora/utils.py +33 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/__init__.py +24 -22
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/deepseek_mtp.py +8 -23
- vllm_ascend-0.11.0rc0/vllm_ascend/models/deepseek_v2.py +666 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/layers/mla.py +49 -13
- vllm_ascend-0.11.0rc0/vllm_ascend/models/layers/sfa.py +233 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen2_5_vl.py +16 -17
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen2_5_vl_without_padding.py +277 -19
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen2_vl.py +17 -7
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/models/qwen3_moe.py +11 -130
- vllm_ascend-0.11.0rc0/vllm_ascend/models/qwen3_next.py +676 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/casual_conv1d.py +539 -597
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/common_fused_moe.py +111 -187
- vllm_ascend-0.11.0rc0/vllm_ascend/ops/fla.py +218 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/fused_moe.py +122 -240
- vllm_ascend-0.11.0rc0/vllm_ascend/ops/layernorm.py +159 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/ops/linear.py +367 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/ops/linear_op.py +459 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/__init__.py +0 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +459 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/moe/moe_comm_method.py +77 -102
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/moe/moe_mlp.py +4 -4
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/moe/token_dispatcher.py +55 -61
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/register_custom_ops.py +24 -15
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/sigmoid_gating.py +1 -20
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/vocab_parallel_embedding.py +13 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/__init__.py +12 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_common/__init__.py +5 -1
- vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_config.py +313 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_common/patch_mamba_config.py +4 -1
- vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_multimodal_merge.py +58 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/patch/platform/patch_common/patch_transformers_utils.py +200 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/__init__.py +13 -1
- vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attention_layer.py +202 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attention_selector.py +181 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_attentionspec.py +110 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_triton.py +16 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/patch/worker/patch_common/patch_weight_loader.py +44 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/platform.py +70 -38
- vllm_ascend-0.11.0rc0/vllm_ascend/quantization/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/quant_config.py +79 -4
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/w4a8_dynamic.py +69 -62
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/w8a8_dynamic.py +8 -17
- vllm_ascend-0.11.0rc0/vllm_ascend/sample/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/sample/sampler.py +20 -8
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/spec_decode/eagle_proposer.py +56 -26
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/spec_decode/mtp_proposer.py +39 -12
- vllm_ascend-0.11.0rc0/vllm_ascend/torchair/__init__.py +0 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/torchair/models/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/qwen2.py +3 -4
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/qwen3_moe.py +11 -5
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_deepseek_mtp.py +1 -5
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_deepseek_v2.py +266 -32
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_pangu_moe.py +9 -10
- vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops/__init__.py +0 -0
- vllm_ascend-0.11.0rc0/vllm_ascend/torchair/ops/shared_weight_layer.py +245 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/ops/torchair_fused_moe.py +122 -61
- vllm_ascend-0.11.0rc0/vllm_ascend/torchair/quantization/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py +58 -49
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +22 -9
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_attention.py +18 -15
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_mla.py +99 -113
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_model_runner.py +68 -40
- vllm_ascend-0.11.0rc0/vllm_ascend/torchair/torchair_sfa.py +1330 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/torchair_worker.py +22 -22
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/utils.py +5 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/utils.py +46 -13
- vllm_ascend-0.11.0rc0/vllm_ascend/worker/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/block_table.py +1 -2
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/model_runner_v1.py +918 -253
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/npu_input_batch.py +22 -7
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/worker/worker_v1.py +71 -24
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/PKG-INFO +3 -3
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/SOURCES.txt +65 -5
- vllm_ascend-0.10.2rc1/tests/ut/models/test_deepseek_v2.py +0 -294
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_ascend_forwad_context.py +0 -22
- vllm_ascend-0.10.2rc1/tests/ut/ops/test_layernorm.py +0 -93
- vllm_ascend-0.10.2rc1/vllm_ascend/models/deepseek_dbo.py +0 -1046
- vllm_ascend-0.10.2rc1/vllm_ascend/models/deepseek_v2.py +0 -965
- vllm_ascend-0.10.2rc1/vllm_ascend/models/deepseek_v3.py +0 -27
- vllm_ascend-0.10.2rc1/vllm_ascend/models/qwen3.py +0 -156
- vllm_ascend-0.10.2rc1/vllm_ascend/models/qwen3_next.py +0 -1361
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/fla.py +0 -381
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/layernorm.py +0 -116
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/linear.py +0 -626
- vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe/fused_moe_prepare_and_finalize.py +0 -240
- vllm_ascend-0.10.2rc1/vllm_ascend/patch/platform/patch_common/patch_shared_fused_moe.py +0 -21
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.gemini/config.yaml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/Dockerfile.buildwheel +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/actionlint.yaml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/dependabot.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/format_pr_body.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/labeler.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_310p_openeuler.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_310p_ubuntu.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_a3_openeuler.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_a3_ubuntu.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_openeuler.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/image_ubuntu.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/matchers/actionlint.json +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/matchers/mypy.json +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/matchers/ruff.json +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/reminder_comment.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.github/workflows/vllm_ascend_test_pd.yaml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.gitignore +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.pre-commit-config.yaml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/.readthedocs.yaml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/CMakeLists.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/CODE_OF_CONDUCT.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/CONTRIBUTING.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/DCO +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/LICENSE +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/README.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/requirements-bench.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/scripts/perf_result_template.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/tests/latency-tests.json +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/tests/serving-tests.json +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/benchmarks/tests/throughput-tests.json +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/cmake/utils.cmake +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/codecov.yml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/collect_env.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/camem_allocator.cpp +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/bgmv_expand.cpp +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/bgmv_shrink.cpp +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/sgmv_expand.cpp +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/sgmv_shrink.cpp +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/types.h +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/kernels/utils.h +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/ops.h +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/torch_binding.cpp +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/torch_binding_meta.cpp +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/csrc/utils.h +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/Makefile +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/README.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/requirements-docs.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/requirements-test.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/_templates/sections/header.html +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/assets/multi_node_dp_kimi.png +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/contributors.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/governance.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/user_stories/index.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/community/user_stories/llamafactory.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/contribution/index.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/contribution/testing.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/index.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/using_evalscope.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/feature_guide/index.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/feature_guide/patch.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/modeling/adding_a_new_multimodal_model.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/modeling/index.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/index.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/optimization_and_tuning.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/performance_benchmark.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/developer_guide/performance/profile_execute_duration.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/index.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/installation.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/performance_benchmark.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance/profile_execute_duration.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/quick_start.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_node.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_node_kimi.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_node_pd_disaggregation.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_moge.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_quantization.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/multi_npu_qwen3_moe.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_node_300i.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_audio.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_multimodal.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_qwen3_embedding.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/tutorials/single_npu_qwen3_quantization.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/configuration/env_vars.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/configuration/index.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/graph_mode.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/lora.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/sleep_mode.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/support_matrix/index.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/support_matrix/supported_features.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/docs/source/user_guide/support_matrix/supported_models.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/gen_ranktable.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/gen_ranktable.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/disaggregated_prefill_v1/run_server.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/eplb/eplb_deepseek.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/eplb/eplb_strategy.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/external_online_dp/README.md +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/external_online_dp/launch_online_dp.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_data_parallel.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_dualbatch_overlap_npu.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_embed.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_external_launcher.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_audio_language.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_npu.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_npu_tp2.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/offline_inference_sleep_mode_npu.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/examples/prompt_embedding_inference.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/format.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/mypy.ini +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/packages.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/pyproject.toml +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/requirements-dev.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/requirements-lint.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/requirements.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/setup.cfg +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/setup.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/310p/test_offline_inference_parallel_310p.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/models/conftest.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_data_parallel.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_external_launcher.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_ilama_lora_tp2.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_pipeline_parallel.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_prefix_caching.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/multicard/test_qwen3_moe.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/setup_pd.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/test_edge_cases.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/pd_disaggreate/test_pd_e2e.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/prompts/example.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/run_disagg_pd.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_bgmv_expand.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_bgmv_shrink.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_fused_moe.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_gating_top_k_softmax.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_rotary_embedding.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_aclgraph.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_ascend_scheduler.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_camem.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_chunked.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_embedding.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_ilama_lora.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_quantization.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/singlecard/test_sampler.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/e2e/utils.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/attention/test_attention_mask.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/base.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/conftest.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/device_allocator/test_camem.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/test_communicator.py +0 -0
- /vllm_ascend-0.10.2rc1/tests/ut/models/__init__.py → /vllm_ascend-0.11.0rc0/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/distributed/test_parallel_state.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/fake_weight/config.json +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_llmdatadist_connector.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/kv_connector/utils.py +0 -0
- {vllm_ascend-0.10.2rc1/tests/ut/torchair → vllm_ascend-0.11.0rc0/tests/ut/models}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen2_5_vl_without_padding.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/models/test_qwen2_vl.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_base.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_decorator.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_layers.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_metadata.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/multistream/test_ms_split.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/expert_map.json +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_activation.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_comm_utils.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_expert_load_balancer.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/ops/test_rotary_embedding.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_utils.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_w8a8.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/quantization/test_w8a8_dynamic.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/sample/logits_processor/test_builtin.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/sample/test_rejection_sampler.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/sample/test_sampler.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/test_envs.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/attention → vllm_ascend-0.11.0rc0/tests/ut/torchair}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/ops/test_torchair_rotary_embedding.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/torchair/test_utils.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tests/ut/worker/test_input_batch.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/actionlint.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/check_python_src_init.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/check_repo.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/enforce_regex_import.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/mypy.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/png-lint.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/shellcheck.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/tools/sphinx-lint.sh +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/typos.toml +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/compilation → vllm_ascend-0.11.0rc0/vllm_ascend/attention}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/core → vllm_ascend-0.11.0rc0/vllm_ascend/compilation}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/device_allocator → vllm_ascend-0.11.0rc0/vllm_ascend/core}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/distributed/device_communicators → vllm_ascend-0.11.0rc0/vllm_ascend/device_allocator}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/device_allocator/camem.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/communicator.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/lora → vllm_ascend-0.11.0rc0/vllm_ascend/distributed/cpu_offload_manager}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/models/layers → vllm_ascend-0.11.0rc0/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/multistream → vllm_ascend-0.11.0rc0/vllm_ascend/distributed/mooncake}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/distributed/parallel_state.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/ops/moe → vllm_ascend-0.11.0rc0/vllm_ascend/eplb}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/quantization → vllm_ascend-0.11.0rc0/vllm_ascend/eplb/adaptor}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/sample → vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/torchair → vllm_ascend-0.11.0rc0/vllm_ascend/eplb/core/policy}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/torchair/models → vllm_ascend-0.11.0rc0/vllm_ascend/lora}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/lora/lora_ops.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/meta_registration.py +0 -0
- /vllm_ascend-0.10.2rc1/vllm_ascend/torchair/ops/__init__.py → /vllm_ascend-0.11.0rc0/vllm_ascend/models/deepseek_v3.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/torchair/quantization → vllm_ascend-0.11.0rc0/vllm_ascend/models/layers}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend/worker → vllm_ascend-0.11.0rc0/vllm_ascend/multistream}/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/base.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/context.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/decorator.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/layers.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/metadata.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/multistream/ms_split.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/activation.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/attention.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/expert_load_balancer.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/moe/comm_utils.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/moe/experts_selector.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/ops/rotary_embedding.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_common/patch_distributed.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/platform/patch_main/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/patch_distributed.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/patch_logits.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_common/patch_minicpm.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/patch/worker/patch_main/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/utils.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/quantization/w8a8.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/sample/logits_processor/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/sample/logits_processor/builtin.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/sample/rejection_sampler.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/spec_decode/__init__.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/spec_decode/interface.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/spec_decode/ngram_proposer.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/models/torchair_deepseek_v3.py +0 -0
- {vllm_ascend-0.10.2rc1/vllm_ascend → vllm_ascend-0.11.0rc0/vllm_ascend/torchair}/ops/sequence_parallel.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/ops/torchair_activation.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/ops/torchair_layernorm.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend/torchair/ops/torchair_rotary_embedding.py +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/dependency_links.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/entry_points.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/requires.txt +0 -0
- {vllm_ascend-0.10.2rc1 → vllm_ascend-0.11.0rc0}/vllm_ascend.egg-info/top_level.txt +0 -0
|
@@ -1,82 +1,27 @@
|
|
|
1
|
-
|
|
2
|
-
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
-
#
|
|
4
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
# you may not use this file except in compliance with the License.
|
|
6
|
-
# You may obtain a copy of the License at
|
|
7
|
-
#
|
|
8
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
#
|
|
10
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
# See the License for the specific language governing permissions and
|
|
14
|
-
# limitations under the License.
|
|
15
|
-
# This file is a part of the vllm-ascend project.
|
|
16
|
-
#
|
|
17
|
-
name: 'test-full'
|
|
1
|
+
name: 'e2e test'
|
|
18
2
|
|
|
19
3
|
on:
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# and ignore the lint / 1 card / 4 cards test type
|
|
35
|
-
concurrency:
|
|
36
|
-
group: ${{ github.workflow }}-${{ github.ref }}
|
|
37
|
-
cancel-in-progress: true
|
|
4
|
+
workflow_call:
|
|
5
|
+
inputs:
|
|
6
|
+
vllm:
|
|
7
|
+
required: true
|
|
8
|
+
type: string
|
|
9
|
+
runner:
|
|
10
|
+
required: true
|
|
11
|
+
type: string
|
|
12
|
+
image:
|
|
13
|
+
required: true
|
|
14
|
+
type: string
|
|
15
|
+
type:
|
|
16
|
+
required: true
|
|
17
|
+
type: string
|
|
38
18
|
|
|
39
19
|
jobs:
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
outputs:
|
|
44
|
-
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
|
|
45
|
-
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
|
|
46
|
-
steps:
|
|
47
|
-
- uses: actions/checkout@v4
|
|
48
|
-
- uses: dorny/paths-filter@v3
|
|
49
|
-
id: filter
|
|
50
|
-
with:
|
|
51
|
-
filters: |
|
|
52
|
-
e2e_tracker:
|
|
53
|
-
- '.github/workflows/vllm_ascend_test.yaml'
|
|
54
|
-
- 'vllm_ascend/**'
|
|
55
|
-
- 'csrc/**'
|
|
56
|
-
- 'cmake/**'
|
|
57
|
-
- 'tests/e2e/**'
|
|
58
|
-
- 'CMakeLists.txt'
|
|
59
|
-
- 'setup.py'
|
|
60
|
-
- 'requirements.txt'
|
|
61
|
-
- 'requirements-dev.txt'
|
|
62
|
-
- 'requirements-lint.txt'
|
|
63
|
-
- 'packages.txt'
|
|
64
|
-
ut_tracker:
|
|
65
|
-
- 'tests/ut/**'
|
|
66
|
-
|
|
67
|
-
e2e-full:
|
|
68
|
-
# only trigger full test when pull request is approved
|
|
69
|
-
needs: [changes]
|
|
70
|
-
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
|
|
71
|
-
strategy:
|
|
72
|
-
max-parallel: 2
|
|
73
|
-
matrix:
|
|
74
|
-
os: [linux-aarch64-a2-1]
|
|
75
|
-
vllm_version: [v0.10.2]
|
|
76
|
-
name: singlecard e2e test - full
|
|
77
|
-
runs-on: ${{ matrix.os }}
|
|
20
|
+
e2e:
|
|
21
|
+
name: singlecard
|
|
22
|
+
runs-on: ${{ inputs.runner }}-1
|
|
78
23
|
container:
|
|
79
|
-
image:
|
|
24
|
+
image: ${{ inputs.image }}
|
|
80
25
|
env:
|
|
81
26
|
VLLM_LOGGING_LEVEL: ERROR
|
|
82
27
|
VLLM_USE_MODELSCOPE: True
|
|
@@ -106,8 +51,9 @@ jobs:
|
|
|
106
51
|
uses: actions/checkout@v4
|
|
107
52
|
with:
|
|
108
53
|
repository: vllm-project/vllm
|
|
109
|
-
ref: ${{
|
|
54
|
+
ref: ${{ inputs.vllm }}
|
|
110
55
|
path: ./vllm-empty
|
|
56
|
+
fetch-depth: 1
|
|
111
57
|
|
|
112
58
|
- name: Install vllm-project/vllm from source
|
|
113
59
|
working-directory: ./vllm-empty
|
|
@@ -121,10 +67,23 @@ jobs:
|
|
|
121
67
|
pip install -r requirements-dev.txt
|
|
122
68
|
pip install -v -e .
|
|
123
69
|
|
|
70
|
+
- name: Run vllm-project/vllm-ascend test
|
|
71
|
+
env:
|
|
72
|
+
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
73
|
+
VLLM_USE_MODELSCOPE: True
|
|
74
|
+
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
|
75
|
+
if: ${{ inputs.type == 'light' }}
|
|
76
|
+
run: |
|
|
77
|
+
pytest -sv tests/e2e/singlecard/test_aclgraph.py
|
|
78
|
+
pytest -sv tests/e2e/singlecard/test_quantization.py
|
|
79
|
+
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
|
|
80
|
+
|
|
124
81
|
- name: Run e2e test
|
|
125
82
|
env:
|
|
126
83
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
127
84
|
VLLM_USE_MODELSCOPE: True
|
|
85
|
+
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
|
86
|
+
if: ${{ inputs.type == 'full' }}
|
|
128
87
|
run: |
|
|
129
88
|
# We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
|
|
130
89
|
# the test separately.
|
|
@@ -135,7 +94,7 @@ jobs:
|
|
|
135
94
|
pytest -sv tests/e2e/singlecard/test_chunked.py
|
|
136
95
|
pytest -sv tests/e2e/singlecard/test_embedding.py
|
|
137
96
|
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
|
|
138
|
-
|
|
97
|
+
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
|
|
139
98
|
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
|
|
140
99
|
pytest -sv tests/e2e/singlecard/test_quantization.py
|
|
141
100
|
pytest -sv tests/e2e/singlecard/test_sampler.py
|
|
@@ -144,23 +103,16 @@ jobs:
|
|
|
144
103
|
# ------------------------------------ v1 spec decode test ------------------------------------ #
|
|
145
104
|
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
|
|
146
105
|
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
|
|
147
|
-
|
|
106
|
+
# Fix me: OOM error
|
|
107
|
+
#pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
|
|
148
108
|
|
|
149
109
|
pytest -sv tests/e2e/singlecard/ops/
|
|
150
110
|
|
|
151
|
-
e2e-2-cards
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
|
|
155
|
-
strategy:
|
|
156
|
-
max-parallel: 2
|
|
157
|
-
matrix:
|
|
158
|
-
os: [linux-aarch64-a2-2]
|
|
159
|
-
vllm_version: [v0.10.2]
|
|
160
|
-
name: multicard e2e test - full
|
|
161
|
-
runs-on: ${{ matrix.os }}
|
|
111
|
+
e2e-2-cards:
|
|
112
|
+
name: multicard
|
|
113
|
+
runs-on: ${{ inputs.runner }}-2
|
|
162
114
|
container:
|
|
163
|
-
image:
|
|
115
|
+
image: ${{ inputs.image }}
|
|
164
116
|
env:
|
|
165
117
|
VLLM_LOGGING_LEVEL: ERROR
|
|
166
118
|
VLLM_USE_MODELSCOPE: True
|
|
@@ -190,8 +142,9 @@ jobs:
|
|
|
190
142
|
uses: actions/checkout@v4
|
|
191
143
|
with:
|
|
192
144
|
repository: vllm-project/vllm
|
|
193
|
-
ref: ${{
|
|
145
|
+
ref: ${{ inputs.vllm }}
|
|
194
146
|
path: ./vllm-empty
|
|
147
|
+
fetch-depth: 1
|
|
195
148
|
|
|
196
149
|
- name: Install vllm-project/vllm from source
|
|
197
150
|
working-directory: ./vllm-empty
|
|
@@ -205,17 +158,26 @@ jobs:
|
|
|
205
158
|
pip install -r requirements-dev.txt
|
|
206
159
|
pip install -v -e .
|
|
207
160
|
|
|
208
|
-
- name: Run vllm-project/vllm-ascend test
|
|
161
|
+
- name: Run vllm-project/vllm-ascend test (light)
|
|
162
|
+
env:
|
|
163
|
+
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
164
|
+
VLLM_USE_MODELSCOPE: True
|
|
165
|
+
if: ${{ inputs.type == 'light' }}
|
|
166
|
+
run: |
|
|
167
|
+
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
|
|
168
|
+
|
|
169
|
+
- name: Run vllm-project/vllm-ascend test (full)
|
|
209
170
|
env:
|
|
210
171
|
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
211
172
|
VLLM_USE_MODELSCOPE: True
|
|
173
|
+
if: ${{ inputs.type == 'full' }}
|
|
212
174
|
run: |
|
|
213
175
|
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
|
214
176
|
pytest -sv tests/e2e/multicard/test_expert_parallel.py
|
|
215
177
|
# external_launcher test is not stable enough. Fix it later
|
|
216
178
|
# pytest -sv tests/e2e/multicard/test_external_launcher.py
|
|
217
179
|
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
|
|
218
|
-
|
|
180
|
+
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
|
|
219
181
|
|
|
220
182
|
# To avoid oom, we need to run the test in a single process.
|
|
221
183
|
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
# 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test'
|
|
20
20
|
# 2. workflow_dispatch with models input
|
|
21
21
|
# See detail rule in strategy.matrix note
|
|
22
|
-
name:
|
|
22
|
+
name: ascend test / accuracy
|
|
23
23
|
|
|
24
24
|
on:
|
|
25
25
|
schedule:
|
|
@@ -112,7 +112,7 @@ jobs:
|
|
|
112
112
|
uses: actions/checkout@v4
|
|
113
113
|
with:
|
|
114
114
|
repository: vllm-project/vllm
|
|
115
|
-
ref: v0.
|
|
115
|
+
ref: v0.11.0rc3
|
|
116
116
|
path: ./vllm-empty
|
|
117
117
|
|
|
118
118
|
- name: Install vllm-project/vllm from source
|
|
@@ -33,23 +33,17 @@ jobs:
|
|
|
33
33
|
runs-on: ubuntu-latest
|
|
34
34
|
|
|
35
35
|
steps:
|
|
36
|
-
- name: Checkout vllm-project/vllm repo
|
|
37
|
-
uses: actions/checkout@v4
|
|
38
|
-
with:
|
|
39
|
-
repository: vllm-project/vllm
|
|
40
|
-
path: ./vllm-empty
|
|
41
36
|
|
|
42
37
|
- name: Get vLLM version
|
|
43
|
-
working-directory: ./vllm-empty
|
|
44
38
|
run: |
|
|
45
|
-
VLLM_COMMIT
|
|
39
|
+
VLLM_COMMIT=releases/v0.11.0
|
|
46
40
|
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
|
|
47
41
|
|
|
48
42
|
- name: Checkout repository
|
|
49
43
|
uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
|
|
50
44
|
|
|
51
45
|
- name: Set up Python
|
|
52
|
-
uses: actions/setup-python@
|
|
46
|
+
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
|
53
47
|
|
|
54
48
|
- name: Get vLLM release version
|
|
55
49
|
run: |
|
|
@@ -16,6 +16,5 @@ jobs:
|
|
|
16
16
|
uses: eps1lon/actions-label-merge-conflict@v3
|
|
17
17
|
with:
|
|
18
18
|
dirtyLabel: "merge-conflicts"
|
|
19
|
-
removeOnDirtyLabel: "ready"
|
|
20
19
|
repoToken: "${{ secrets.GITHUB_TOKEN }}"
|
|
21
20
|
commentOnDirty: "This pull request has conflicts, please resolve those before we can evaluate the pull request."
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# limitations under the License.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
name: '
|
|
18
|
+
name: 'ascend test / performance'
|
|
19
19
|
# This workflow runs nightly benchmarks for vllm-ascend.
|
|
20
20
|
|
|
21
21
|
on:
|
|
@@ -51,7 +51,7 @@ jobs:
|
|
|
51
51
|
strategy:
|
|
52
52
|
matrix:
|
|
53
53
|
include:
|
|
54
|
-
- vllm_branch: v0.
|
|
54
|
+
- vllm_branch: v0.11.0rc3
|
|
55
55
|
vllm_ascend_branch: main
|
|
56
56
|
vllm_use_v1: 1
|
|
57
57
|
max-parallel: 1
|
|
@@ -2,6 +2,10 @@ name: pre-commit
|
|
|
2
2
|
|
|
3
3
|
on:
|
|
4
4
|
workflow_call:
|
|
5
|
+
inputs:
|
|
6
|
+
vllm:
|
|
7
|
+
required: true
|
|
8
|
+
type: string
|
|
5
9
|
|
|
6
10
|
permissions:
|
|
7
11
|
contents: read
|
|
@@ -12,7 +16,7 @@ jobs:
|
|
|
12
16
|
steps:
|
|
13
17
|
- name: Checkout vllm-project/vllm-ascend repo
|
|
14
18
|
uses: actions/checkout@v4
|
|
15
|
-
- uses: actions/setup-python@
|
|
19
|
+
- uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
|
16
20
|
with:
|
|
17
21
|
python-version: "3.11"
|
|
18
22
|
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
|
|
@@ -22,6 +26,7 @@ jobs:
|
|
|
22
26
|
with:
|
|
23
27
|
repository: vllm-project/vllm
|
|
24
28
|
path: ./vllm-empty
|
|
29
|
+
ref: ${{ inputs.vllm }}
|
|
25
30
|
- name: Install vllm
|
|
26
31
|
working-directory: vllm-empty
|
|
27
32
|
run: |
|
|
@@ -50,7 +50,7 @@ jobs:
|
|
|
50
50
|
lscpu
|
|
51
51
|
|
|
52
52
|
- name: Set up Python ${{ matrix.python-version }}
|
|
53
|
-
uses: actions/setup-python@
|
|
53
|
+
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
|
54
54
|
with:
|
|
55
55
|
python-version: ${{ matrix.python-version }}
|
|
56
56
|
|
|
@@ -73,7 +73,7 @@ jobs:
|
|
|
73
73
|
|
|
74
74
|
- name: Set up Python ${{ matrix.python-version }}
|
|
75
75
|
if: startsWith(github.ref, 'refs/tags/')
|
|
76
|
-
uses: actions/setup-python@
|
|
76
|
+
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
|
77
77
|
with:
|
|
78
78
|
python-version: ${{ matrix.python-version }}
|
|
79
79
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# This file is a part of the vllm-ascend project.
|
|
16
16
|
#
|
|
17
17
|
|
|
18
|
-
name: 'test'
|
|
18
|
+
name: 'ascend test'
|
|
19
19
|
|
|
20
20
|
on:
|
|
21
21
|
push:
|
|
@@ -41,9 +41,10 @@ concurrency:
|
|
|
41
41
|
jobs:
|
|
42
42
|
lint:
|
|
43
43
|
uses: ./.github/workflows/pre-commit.yml
|
|
44
|
+
with:
|
|
45
|
+
vllm: releases/v0.11.0
|
|
44
46
|
|
|
45
47
|
changes:
|
|
46
|
-
if: github.event_name == 'pull_request'
|
|
47
48
|
runs-on: ubuntu-latest
|
|
48
49
|
outputs:
|
|
49
50
|
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
|
|
@@ -82,7 +83,7 @@ jobs:
|
|
|
82
83
|
VLLM_USE_MODELSCOPE: True
|
|
83
84
|
strategy:
|
|
84
85
|
matrix:
|
|
85
|
-
vllm_version: [v0.
|
|
86
|
+
vllm_version: [releases/v0.11.0, v0.11.0rc3]
|
|
86
87
|
steps:
|
|
87
88
|
- name: Install packages
|
|
88
89
|
run: |
|
|
@@ -118,10 +119,20 @@ jobs:
|
|
|
118
119
|
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
|
|
119
120
|
run: |
|
|
120
121
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
|
|
121
|
-
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut
|
|
122
|
+
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
|
|
123
|
+
--ignore=tests/ut/test_platform.py \
|
|
124
|
+
--ignore=tests/ut/patch/worker/patch_common/test_patch_minicpm.py \
|
|
125
|
+
--ignore=tests/ut/core/test_scheduler.py \
|
|
126
|
+
--ignore=tests/ut/kv_connector/test_llmdatadist_connector.py \
|
|
127
|
+
--ignore=tests/ut/kv_connector/test_mooncake_connector.py \
|
|
128
|
+
--ignore=tests/ut/kv_connector/test_remote_decode_lifecycle.py \
|
|
129
|
+
--ignore=tests/ut/kv_connector/test_remote_prefill_lifecycle.py \
|
|
130
|
+
--ignore=tests/ut/torchair/models/test_torchair_deepseek_v2.py \
|
|
131
|
+
--ignore=tests/ut/torchair/test_utils.py
|
|
122
132
|
|
|
123
133
|
- name: Upload coverage to Codecov
|
|
124
|
-
|
|
134
|
+
# only upload coverage when commits merged
|
|
135
|
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
125
136
|
uses: codecov/codecov-action@v5
|
|
126
137
|
env:
|
|
127
138
|
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
|
@@ -131,130 +142,17 @@ jobs:
|
|
|
131
142
|
verbose: true
|
|
132
143
|
|
|
133
144
|
e2e-light:
|
|
145
|
+
name: e2e-light
|
|
146
|
+
strategy:
|
|
147
|
+
matrix:
|
|
148
|
+
vllm_version: [releases/v0.11.0, v0.11.0rc3]
|
|
149
|
+
# Note (yikun): If CI resource are limited we can split job into two chain jobs
|
|
134
150
|
needs: [lint, changes]
|
|
135
151
|
# only trigger e2e test after lint passed and the change is e2e related with pull request.
|
|
136
152
|
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
matrix
|
|
140
|
-
|
|
141
|
-
vllm_version: [v0.10.2]
|
|
142
|
-
name: singlecard e2e test - light
|
|
143
|
-
runs-on: ${{ matrix.os }}
|
|
144
|
-
container:
|
|
145
|
-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
|
146
|
-
env:
|
|
147
|
-
VLLM_LOGGING_LEVEL: ERROR
|
|
148
|
-
VLLM_USE_MODELSCOPE: True
|
|
149
|
-
steps:
|
|
150
|
-
- name: Check npu and CANN info
|
|
151
|
-
run: |
|
|
152
|
-
npu-smi info
|
|
153
|
-
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
154
|
-
|
|
155
|
-
- name: Config mirrors
|
|
156
|
-
run: |
|
|
157
|
-
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
158
|
-
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
159
|
-
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
160
|
-
apt-get update -y
|
|
161
|
-
apt install git -y
|
|
162
|
-
|
|
163
|
-
- name: Checkout vllm-project/vllm-ascend repo
|
|
164
|
-
uses: actions/checkout@v4
|
|
165
|
-
|
|
166
|
-
- name: Install system dependencies
|
|
167
|
-
run: |
|
|
168
|
-
apt-get -y install `cat packages.txt`
|
|
169
|
-
apt-get -y install gcc g++ cmake libnuma-dev
|
|
170
|
-
|
|
171
|
-
- name: Checkout vllm-project/vllm repo
|
|
172
|
-
uses: actions/checkout@v4
|
|
173
|
-
with:
|
|
174
|
-
repository: vllm-project/vllm
|
|
175
|
-
ref: ${{ matrix.vllm_version }}
|
|
176
|
-
path: ./vllm-empty
|
|
177
|
-
|
|
178
|
-
- name: Install vllm-project/vllm from source
|
|
179
|
-
working-directory: ./vllm-empty
|
|
180
|
-
run: |
|
|
181
|
-
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
182
|
-
|
|
183
|
-
- name: Install vllm-project/vllm-ascend
|
|
184
|
-
env:
|
|
185
|
-
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
186
|
-
run: |
|
|
187
|
-
pip install -r requirements-dev.txt
|
|
188
|
-
pip install -v -e .
|
|
189
|
-
|
|
190
|
-
- name: Run e2e test
|
|
191
|
-
env:
|
|
192
|
-
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
193
|
-
VLLM_USE_MODELSCOPE: True
|
|
194
|
-
run: |
|
|
195
|
-
pytest -sv tests/e2e/singlecard/test_aclgraph.py
|
|
196
|
-
pytest -sv tests/e2e/singlecard/test_quantization.py
|
|
197
|
-
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
|
|
198
|
-
|
|
199
|
-
e2e-2-cards-light:
|
|
200
|
-
needs: [e2e-light]
|
|
201
|
-
if: ${{ needs.e2e-light.result == 'success' }}
|
|
202
|
-
strategy:
|
|
203
|
-
max-parallel: 2
|
|
204
|
-
matrix:
|
|
205
|
-
os: [linux-aarch64-a2-2]
|
|
206
|
-
vllm_version: [v0.10.2]
|
|
207
|
-
name: multicard e2e test - light
|
|
208
|
-
runs-on: ${{ matrix.os }}
|
|
209
|
-
container:
|
|
153
|
+
uses: ./.github/workflows/_e2e_test.yaml
|
|
154
|
+
with:
|
|
155
|
+
vllm: ${{ matrix.vllm_version }}
|
|
156
|
+
runner: linux-aarch64-a2
|
|
210
157
|
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
|
211
|
-
|
|
212
|
-
VLLM_LOGGING_LEVEL: ERROR
|
|
213
|
-
VLLM_USE_MODELSCOPE: True
|
|
214
|
-
steps:
|
|
215
|
-
- name: Check npu and CANN info
|
|
216
|
-
run: |
|
|
217
|
-
npu-smi info
|
|
218
|
-
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
219
|
-
|
|
220
|
-
- name: Config mirrors
|
|
221
|
-
run: |
|
|
222
|
-
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
|
|
223
|
-
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
|
|
224
|
-
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
|
|
225
|
-
apt-get update -y
|
|
226
|
-
apt install git -y
|
|
227
|
-
|
|
228
|
-
- name: Checkout vllm-project/vllm-ascend repo
|
|
229
|
-
uses: actions/checkout@v4
|
|
230
|
-
|
|
231
|
-
- name: Install system dependencies
|
|
232
|
-
run: |
|
|
233
|
-
apt-get -y install `cat packages.txt`
|
|
234
|
-
apt-get -y install gcc g++ cmake libnuma-dev
|
|
235
|
-
|
|
236
|
-
- name: Checkout vllm-project/vllm repo
|
|
237
|
-
uses: actions/checkout@v4
|
|
238
|
-
with:
|
|
239
|
-
repository: vllm-project/vllm
|
|
240
|
-
ref: ${{ matrix.vllm_version }}
|
|
241
|
-
path: ./vllm-empty
|
|
242
|
-
|
|
243
|
-
- name: Install vllm-project/vllm from source
|
|
244
|
-
working-directory: ./vllm-empty
|
|
245
|
-
run: |
|
|
246
|
-
VLLM_TARGET_DEVICE=empty pip install -e .
|
|
247
|
-
|
|
248
|
-
- name: Install vllm-project/vllm-ascend
|
|
249
|
-
env:
|
|
250
|
-
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
|
|
251
|
-
run: |
|
|
252
|
-
pip install -r requirements-dev.txt
|
|
253
|
-
pip install -v -e .
|
|
254
|
-
|
|
255
|
-
- name: Run vllm-project/vllm-ascend test
|
|
256
|
-
env:
|
|
257
|
-
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
258
|
-
VLLM_USE_MODELSCOPE: True
|
|
259
|
-
run: |
|
|
260
|
-
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
|
|
158
|
+
type: light
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
|
+
#
|
|
17
|
+
name: 'ascend test / full'
|
|
18
|
+
|
|
19
|
+
on:
|
|
20
|
+
pull_request:
|
|
21
|
+
branches:
|
|
22
|
+
- 'main'
|
|
23
|
+
- '*-dev'
|
|
24
|
+
types: [ labeled, synchronize ]
|
|
25
|
+
|
|
26
|
+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
|
27
|
+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
|
28
|
+
# It's used to activate ascend-toolkit environment variables.
|
|
29
|
+
defaults:
|
|
30
|
+
run:
|
|
31
|
+
shell: bash -el {0}
|
|
32
|
+
|
|
33
|
+
# only cancel in-progress runs of the same workflow
|
|
34
|
+
# and ignore the lint / 1 card / 4 cards test type
|
|
35
|
+
concurrency:
|
|
36
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
37
|
+
cancel-in-progress: true
|
|
38
|
+
|
|
39
|
+
jobs:
|
|
40
|
+
changes:
|
|
41
|
+
runs-on: ubuntu-latest
|
|
42
|
+
if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
|
|
43
|
+
outputs:
|
|
44
|
+
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
|
|
45
|
+
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
|
|
46
|
+
steps:
|
|
47
|
+
- uses: actions/checkout@v4
|
|
48
|
+
- uses: dorny/paths-filter@v3
|
|
49
|
+
id: filter
|
|
50
|
+
with:
|
|
51
|
+
filters: |
|
|
52
|
+
e2e_tracker:
|
|
53
|
+
- '.github/workflows/vllm_ascend_test.yaml'
|
|
54
|
+
- 'vllm_ascend/**'
|
|
55
|
+
- 'csrc/**'
|
|
56
|
+
- 'cmake/**'
|
|
57
|
+
- 'tests/e2e/**'
|
|
58
|
+
- 'CMakeLists.txt'
|
|
59
|
+
- 'setup.py'
|
|
60
|
+
- 'requirements.txt'
|
|
61
|
+
- 'requirements-dev.txt'
|
|
62
|
+
- 'requirements-lint.txt'
|
|
63
|
+
- 'packages.txt'
|
|
64
|
+
ut_tracker:
|
|
65
|
+
- 'tests/ut/**'
|
|
66
|
+
|
|
67
|
+
e2e-test:
|
|
68
|
+
name: e2e-full
|
|
69
|
+
strategy:
|
|
70
|
+
matrix:
|
|
71
|
+
vllm_version: [releases/v0.11.0, v0.11.0rc3]
|
|
72
|
+
needs: [changes]
|
|
73
|
+
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
|
|
74
|
+
uses: ./.github/workflows/_e2e_test.yaml
|
|
75
|
+
with:
|
|
76
|
+
vllm: ${{ matrix.vllm_version }}
|
|
77
|
+
runner: linux-aarch64-a2
|
|
78
|
+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
|
79
|
+
type: full
|