PyPI - vllm-ascend - Versions diffs - 0.9.0rc2__tar.gz → 0.9.1rc2__tar.gz - Mend

vllm-ascend 0.9.0rc2tar.gz → 0.9.1rc2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (389) hide show

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/Dockerfile.buildwheel RENAMED Viewed

@@ -14,18 +14,17 @@
 # limitations under the License.
 # This file is a part of the vllm-ascend project.
 #
-ARG PY_VERSION=3.10
-FROM quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py${PY_VERSION}
+ARG PY_VERSION=3.11
+FROM quay.io/ascend/manylinux:8.0.0-910b-manylinux_2_28-py${PY_VERSION}
 ARG COMPILE_CUSTOM_KERNELS=1
 # Define environments
 ENV DEBIAN_FRONTEND=noninteractive
 ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
-RUN apt-get update -y && \
-    apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
-    rm -rf /var/cache/apt/* && \
-    rm -rf /var/lib/apt/lists/*
+RUN yum update -y && \
+    yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
+    rm -rf /var/cache/yum
 WORKDIR /workspace
@@ -41,8 +40,6 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
     cd vllm-ascend && \
     python3 setup.py bdist_wheel && \
-    ls -l dist && \
-    for f in dist/*.whl; do mv "$f" "$(echo "$f" | sed -e 's/-linux_x86_64\.whl$/-manylinux1_x86_64.whl/' -e 's/-linux_aarch64\.whl$/-manylinux2014_aarch64.whl/')"; done && \
     ls -l dist
 CMD ["/bin/bash"]

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/dependabot.yml RENAMED Viewed

@@ -2,9 +2,6 @@ version: 2
 updates:
   - package-ecosystem: "github-actions"
     directory: "/"
-    schedule:
-      # Check for updates to GitHub Actions every week
-      interval: "weekly"
     open-pull-requests-limit: 2
     reviewers:
       - "Yikun"

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/workflows/accuracy_test.yaml RENAMED Viewed

@@ -34,8 +34,7 @@ on:
         # Current supported vLLM versions
         options:
           - main
-          - v0.9.0.1
-          - v0.9.0
+          - v0.9.1
           - v0.7.3
       vllm-ascend-version:
         description: 'vllm-ascend version:'
@@ -118,7 +117,7 @@ jobs:
       fail-fast: false
     name: ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
     container:
-      image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
       env:
         HF_ENDPOINT: https://hf-mirror.com
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -159,7 +158,7 @@ jobs:
           repository: vllm-project/vllm
           path: ./vllm-empty
           # Please also update this when bump matched version
-          ref: ${{ github.event.inputs.vllm-version || 'v0.9.0' }}
+          ref: ${{ github.event.inputs.vllm-version || 'v0.9.1' }}
       - name: Install vllm-project/vllm from source
         working-directory: ./vllm-empty

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/workflows/image_openeuler.yml RENAMED Viewed

@@ -19,6 +19,12 @@ on:
       - '.github/workflows/image_openeuler.yml'
       - 'Dockerfile.openEuler'
       - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
   push:
     # Publish image when tagging, the Dockerfile in tag will be build as tag image
     branches:

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/workflows/image_ubuntu.yml RENAMED Viewed

@@ -19,6 +19,12 @@ on:
       - '.github/workflows/image_ubuntu.yml'
       - 'Dockerfile'
       - 'vllm_ascend/**'
+      - 'setup.py'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - 'cmake/**'
+      - 'CMakeLists.txt'
+      - 'csrc/**'
   push:
     # Publish image when tagging, the Dockerfile in tag will be build as tag image
     branches:

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/workflows/nightly_benchmarks.yaml RENAMED Viewed

@@ -18,11 +18,7 @@
 name: 'Benchmarks / Performance'
 # This workflow runs nightly benchmarks for vllm-ascend.
-on:
-  schedule:
-    # Run at 02:00 everyday
-    - cron: '00 18 * * *'
+on:
   workflow_dispatch:
     # Allow manual triggering of the workflow
@@ -45,15 +41,20 @@ jobs:
   test:
     if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
-    name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}
+    name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}, use_v1=${{ matrix.vllm_use_v1 }}
     runs-on: 'linux-arm64-npu-static-8'
     strategy:
       matrix:
         include:
+          - vllm_branch: v0.9.1
+            vllm_ascend_branch: main
+            vllm_use_v1: 0
           - vllm_branch: v0.9.0
             vllm_ascend_branch: main
+            vllm_use_v1: 1
+      max-parallel: 1
     container:
-      image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
       volumes:
         - /usr/local/dcmi:/usr/local/dcmi
         - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
@@ -71,6 +72,7 @@ jobs:
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
         ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
         ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
+        VLLM_USE_V1: ${{ matrix.vllm_use_v1 }}
     steps:
       - name: Check npu and CANN info
         run: |
@@ -140,7 +142,7 @@ jobs:
       - name: Install elastic_tool
         if: github.event_name != 'pull_request'
         run: |
-          pip install escli-tool==0.2.1
+          pip install escli-tool==0.2.2
       - name: Collect pr info from vllm-project/vllm-ascend
         if: github.event_name != 'pull_request'
@@ -164,10 +166,10 @@ jobs:
           while IFS= read -r line || [[ -n "$line" ]]; do
             commit_id=${line%% *}
             commit_title=${line#* }
-            commit_time=$(git show -s --format=%cd $commit_hash --date=iso-strict)
-            commit_time_no_tz=${commit_time::19}
             git checkout $commit_id
+            commit_time=$(git show -s --format=%cd $commit_hash --date=iso-strict)
+            commit_time_no_tz=${commit_time::19}
             pip install -e .
             echo "------------------------"
@@ -177,17 +179,17 @@ jobs:
             echo "vllm branch: ${{ matrix.vllm_branch }}"
             echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
             echo "------------------------"
             cd /github/home
             bash benchmarks/scripts/run-performance-benchmarks.sh
             # send the result to es
-            if [[ "${{ github.event_name }}" != "pull request" ]]; then
-              escli add --vllm_branch ${{ matrix.vllm_branch }} \
-              --vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
-              --commit_id $commit_id \
-              --commit_title "$commit_title" \
-              --created_at "$commit_time_no_tz" \
-              --res_dir ./benchmarks/results
-              rm -rf ./benchmarks/results
-            fi
+            escli add --vllm_branch ${{ matrix.vllm_branch }} \
+            --vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
+            --commit_id $commit_id \
+            --commit_title "$commit_title" \
+            --created_at "$commit_time_no_tz" \
+            --res_dir ./benchmarks/results \
+            --extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
+            rm -rf ./benchmarks/results
             cd -
           done < commit_log.txt

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/workflows/release_code.yml RENAMED Viewed

@@ -53,7 +53,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.11"]
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/workflows/release_whl.yml RENAMED Viewed

@@ -71,16 +71,11 @@ jobs:
         --build-arg PY_VERSION=${{ matrix.python-version }} \
         -t wheel:v1 .
         docker run --rm \
+        -u $(id -u):$(id -g) \
         -v $(pwd):/outpwd \
         wheel:v1 \
         bash -c "cp -r /workspace/vllm-ascend/dist /outpwd"
         ls dist
-    - name: Archive wheel
-      uses: actions/upload-artifact@v4
-      with:
-        name: vllm-ascend-${{ matrix.os }}-py${{ matrix.python-version }}-wheel
-        path: dist/*
     - name: Set up Python ${{ matrix.python-version }}
       if: startsWith(github.ref, 'refs/tags/')
@@ -88,6 +83,40 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
+    - name: Repair wheels with auditwheel
+      run: |
+        python3 -m pip install auditwheel
+        python3 -m pip install patchelf
+        mkdir -p dist/repaired
+        for whl in dist/*.whl; do
+          auditwheel repair "$whl" -w dist/repaired/ \
+          --exclude libplatform.so \
+          --exclude libregister.so \
+          --exclude libge_common_base.so \
+          --exclude libc10.so \
+          --exclude libc_sec.so \
+          --exclude "libascend*.so" \
+          --exclude "libtorch*.so"
+        done
+        rm -f dist/*.whl
+        mv dist/repaired/*.whl dist/
+        rmdir dist/repaired
+        ls dist
+    - name: Verify automatic platform tags
+      run: |
+        cd dist
+        for wheel in *.whl; do
+          echo "verification file: $wheel"
+          auditwheel show "$wheel"
+        done
+    - name: Archive wheel
+      uses: actions/upload-artifact@v4
+      with:
+        name: vllm-ascend-${{ matrix.os }}-py${{ matrix.python-version }}-wheel
+        path: dist/*
     - name: Release
       if: startsWith(github.ref, 'refs/tags/')
       run: |

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/workflows/vllm_ascend_doctest.yaml RENAMED Viewed

@@ -29,9 +29,6 @@ on:
       - 'tests/e2e/doctests/**'
       - 'tests/e2e/common.sh'
       - 'tests/e2e/run_doctests.sh'
-  schedule:
-    # Runs every 4 hours
-    - cron:  '0 */4 * * *'
 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
 # declared as "shell: bash -el {0}" on steps that need to be properly activated.

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/workflows/vllm_ascend_test.yaml RENAMED Viewed

@@ -18,8 +18,6 @@
 name: 'test'
 on:
-  schedule:
-    - cron: '0 23 * * *'
   pull_request:
     branches:
       - 'main'
@@ -33,6 +31,9 @@ on:
       - '!benchmarks/**'
       - 'tools/mypy.sh'
       - 'mypy.ini'
+      - '.github/workflows/*.ya?ml'
+      - '.github/workflows/actionlint.*'
+      - '.github/workflows/matchers/actionlint.json'
 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
 # declared as "shell: bash -el {0}" on steps that need to be properly activated.
@@ -46,7 +47,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.11"]
+        vllm_version: [v0.9.1]
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
       - name: Set up Python ${{ matrix.python-version }}
@@ -85,8 +87,16 @@ jobs:
         uses: actions/checkout@v4
         with:
           repository: vllm-project/vllm
+          ref: ${{ matrix.vllm_version }}
           path: vllm-empty
+      - name: Actionlint Check
+        env:
+          SHELLCHECK_OPTS: --exclude=SC2046,SC2006,SC2086
+        run: |
+          echo "::add-matcher::.github/workflows/matchers/actionlint.json"
+          tools/actionlint.sh -color
       - name: Install vllm-project/vllm from source
         working-directory: vllm-empty
         run: |
@@ -105,7 +115,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-arm64-npu-1, linux-arm64-npu-4]
-        vllm_version: [main, v0.9.0]
+        vllm_version: [v0.9.1]
     concurrency:
       group: >
         ${{
@@ -118,8 +128,7 @@ jobs:
     name: vLLM Ascend test
     runs-on: ${{ matrix.os }}
     container:
-      # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
-      image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
       env:
         HF_ENDPOINT: https://hf-mirror.com
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -170,34 +179,43 @@ jobs:
         run: |
           if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
             VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py
-            pytest -sv tests/singlecard/test_scheduler.py
             # guided decoding doesn't work, fix it later
             # pytest -sv tests/singlecard/test_guided_decoding.py.py
             # test_ascend_config.py should be ran separately because it will regenerate the global config many times.
             pytest -sv tests/singlecard/test_ascend_config.py
             pytest -sv tests/singlecard/test_camem.py
+            pytest -sv tests/singlecard/core/test_ascend_scheduler.py
+            pytest -sv tests/singlecard/core/test_ascend_scheduler_e2e.py
             pytest -sv tests/singlecard/ \
             --ignore=tests/singlecard/test_offline_inference.py \
-            --ignore=tests/singlecard/test_scheduler.py \
             --ignore=tests/singlecard/test_guided_decoding.py \
             --ignore=tests/singlecard/test_ascend_config.py \
-            --ignore=tests/singlecard/test_camem.py
+            --ignore=tests/singlecard/test_camem.py \
+            --ignore=tests/singlecard/core/test_ascend_scheduler.py \
+            --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
           else
             pytest -sv tests/multicard/test_ilama_lora_tp2.py
             # To avoid oom, we need to run the test in a single process.
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_w4a8_deepseek.py::test_deepseek_W4A8
             VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
             VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
             VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_w8a8_ep_dbo
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ_with_flashcomm_v1
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_with_flashcomm_v2
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py --ignore=tests/multicard/test_w4a8_deepseek.py
           fi
       - name: Run vllm-project/vllm-ascend test on V0 engine
+        if: ${{ github.event_name == 'schedule' }}
         env:
           VLLM_USE_V1: 0
         run: |
           if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
             VLLM_USE_MODELSCOPE=True  pytest -sv tests/singlecard/test_offline_inference.py
-            pytest -sv tests/singlecard/test_scheduler.py
             # guided decoding doesn't work, fix it later
             # pytest -sv tests/singlecard/test_guided_decoding.py.py
             pytest -sv tests/singlecard/test_camem.py
@@ -206,11 +224,12 @@ jobs:
             pytest -sv tests/singlecard/test_prompt_embedding.py
             pytest -sv tests/singlecard/ \
               --ignore=tests/singlecard/test_offline_inference.py \
-              --ignore=tests/singlecard/test_scheduler.py \
               --ignore=tests/singlecard/test_guided_decoding.py \
               --ignore=tests/singlecard/test_camem.py \
               --ignore=tests/singlecard/test_ascend_config.py \
-              --ignore=tests/singlecard/test_prompt_embedding.py
+              --ignore=tests/singlecard/test_prompt_embedding.py \
+              --ignore=tests/singlecard/core/test_ascend_scheduler.py \
+              --ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
           else
             pytest -sv tests/multicard/test_ilama_lora_tp2.py
             # Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
@@ -218,5 +237,6 @@ jobs:
             VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
             VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
             VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
             VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
           fi

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/workflows/vllm_ascend_test_long_term.yaml RENAMED Viewed

@@ -17,9 +17,6 @@
 name: 'e2e test / long-term-test'
 on:
-  schedule:
-    # Runs at 23:00 UTC (7:00 AM Beijing) every day
-    - cron: '0 23 * * *'
   pull_request:
     types: [ labeled ]
@@ -43,12 +40,12 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-arm64-npu-1, linux-arm64-npu-4]
-        vllm_version: [main, v0.9.0]
+        vllm_version: [v0.9.1]
     name: vLLM Ascend long term test
     runs-on: ${{ matrix.os }}
     container:
       # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
-      image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
       env:
         HF_ENDPOINT: https://hf-mirror.com
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -95,12 +92,17 @@ jobs:
       - name: Run vllm-project/vllm-ascend long term test
         run: |
           if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
-            # spec decode test
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_spec_decode.py
-            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_mtp_correctness.py  # it needs a clean process
-            pytest -sv tests/long_term/spec_decode --ignore=tests/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
+            # v0 spec decode test
+            # VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode_v0/e2e/test_mtp_correctness.py  # it needs a clean process
+            # pytest -sv tests/long_term/spec_decode_v0 --ignore=tests/long_term/spec_decode_v0/e2e/test_mtp_correctness.py
+            # v1 spec decode test
+            # TODO: revert me when test_v1_mtp_correctness.py is fixed
+            VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode_v1/test_v1_mtp_correctness.py
+            # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
+            # VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode_v1/test_v1_spec_decode.py
+            # accuracy test single card
             pytest -sv tests/long_term/test_accuracy.py
           else
+            # accuracy test multi card
             VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py
           fi

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/.github/workflows/vllm_ascend_test_pd.yaml RENAMED Viewed

@@ -17,9 +17,6 @@
 name: 'e2e test / pd-disaggregation'
 on:
-  schedule:
-    # Runs at 23:00 UTC (7:00 AM Beijing) every day
-    - cron: '0 23 * * *'
   pull_request:
     types: [ labeled ]
@@ -41,12 +38,12 @@ jobs:
     if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
     strategy:
       matrix:
-        vllm_verison: [main, v0.9.0]
+        vllm_verison: [v0.9.1]
     name: vLLM Ascend prefilling decoding disaggregation test
     runs-on: linux-arm64-npu-static-8
     container:
-      image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
       volumes:
         - /usr/local/dcmi:/usr/local/dcmi
         - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
@@ -104,3 +101,7 @@ jobs:
       - name: Run vllm-project/vllm-ascend PD Disaggregation test
         run: |
           pytest -sv tests/e2e/pd_disaggreate/test_pd_e2e.py
+      - name: Run vllm-project/vllm-ascend PD Disaggregation edge test
+        run: |
+          bash tests/e2e/pd_disaggreate/run_edge_case_test.sh

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/CMakeLists.txt RENAMED Viewed

@@ -96,5 +96,3 @@ target_link_libraries(
 target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib")
 install(TARGETS vllm_ascend_C vllm_ascend_kernels DESTINATION ${VLLM_ASCEND_INSTALL_PATH})

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/Dockerfile RENAMED Viewed

@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
-FROM quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
+FROM quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.9.0
+ARG VLLM_TAG=v0.9.1
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/Dockerfile.openEuler RENAMED Viewed

@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
-FROM quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10
+FROM quay.io/ascend/cann:8.2.rc1-910b-openeuler22.03-py3.11
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.9.0
+ARG VLLM_TAG=v0.9.1
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vllm_ascend
-Version: 0.9.0rc2
+Version: 0.9.1rc2
 Summary: vLLM Ascend backend plugin
 Home-page: https://github.com/vllm-project/vllm-ascend
 Author: vLLM-Ascend team
@@ -58,8 +58,8 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l
 - OS: Linux
 - Software:
   * Python >= 3.9, < 3.12
-  * CANN >= 8.1.RC1
-  * PyTorch >= 2.5.1, torch-npu >= 2.5.1
+  * CANN >= 8.2.RC1
+  * PyTorch >= 2.5.1, torch-npu >= 2.5.1.post1
   * vLLM (the same version as vllm-ascend)
 ## Getting Started

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/README.md RENAMED Viewed

@@ -37,8 +37,8 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l
 - OS: Linux
 - Software:
   * Python >= 3.9, < 3.12
-  * CANN >= 8.1.RC1
-  * PyTorch >= 2.5.1, torch-npu >= 2.5.1
+  * CANN >= 8.2.RC1
+  * PyTorch >= 2.5.1, torch-npu >= 2.5.1.post1
   * vLLM (the same version as vllm-ascend)
 ## Getting Started

{vllm_ascend-0.9.0rc2 → vllm_ascend-0.9.1rc2}/README.zh.md RENAMED Viewed

@@ -38,8 +38,8 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP
 - 操作系统：Linux
 - 软件：
   * Python >= 3.9, < 3.12
-  * CANN >= 8.1.RC1
-  * PyTorch >= 2.5.1, torch-npu >= 2.5.1
+  * CANN >= 8.2.RC1
+  * PyTorch >= 2.5.1, torch-npu >= 2.5.1.post1
   * vLLM (与vllm-ascend版本一致)
 ## 开始使用

vllm-ascend 0.9.0rc2__tar.gz → 0.9.1rc2__tar.gz

vllm-ascend 0.9.0rc2tar.gz → 0.9.1rc2tar.gz