PyPI - vllm-ascend - Versions diffs - 0.11.0rc1__tar.gz → 0.11.0rc3__tar.gz - Mend

vllm-ascend 0.11.0rc1tar.gz → 0.11.0rc3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (631) hide show

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/_accuracy_test.yaml RENAMED Viewed

@@ -30,7 +30,7 @@ jobs:
     runs-on: ${{ inputs.runner }}
     name: ${{ inputs.model_name }} accuracy
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
       env:
         VLLM_USE_MODELSCOPE: True
         # 1. If version specified (work_dispatch), do specified branch accuracy test

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/_e2e_test.yaml RENAMED Viewed

@@ -106,8 +106,8 @@ jobs:
           # ------------------------------------ v1 spec decode test ------------------------------------ #
           pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
           pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
-          # Fix me: OOM error
-          #pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
+          # Fix me: test_eagle_correctness OOM error
+          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
           pytest -sv tests/e2e/singlecard/ops/

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/accuracy_test.yaml RENAMED Viewed

@@ -68,5 +68,5 @@ jobs:
     with:
       vllm: v0.11.0
       runner:  linux-aarch64-${{ matrix.runner }}
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
       model_name: ${{ matrix.model_name }}

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/multi_node_test.yaml RENAMED Viewed

@@ -23,7 +23,7 @@ jobs:
     # This is a runner with no NPU for k8s controller
     runs-on: linux-aarch64-a3-0
     container:
-      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
+      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
       env:
         KUBECONFIG: /tmp/kubeconfig
         KUBECTL: /root/.cache/.kube/kubectl

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/nightly_benchmarks.yaml RENAMED Viewed

@@ -56,7 +56,7 @@ jobs:
             vllm_use_v1: 1
       max-parallel: 1
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
       volumes:
         - /usr/local/dcmi:/usr/local/dcmi
         - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/release_whl.yml RENAMED Viewed

@@ -57,7 +57,13 @@ jobs:
     - name: Print
       run: |
         lscpu
+    - name: Free up disk space
+      uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+      with:
+        tool-cache: true
+        docker-images: false
     - name: Build wheel
       run: |
         ls

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_dist.yaml RENAMED Viewed

@@ -47,7 +47,7 @@ jobs:
     name: vLLM Ascend test
     runs-on: ${{ matrix.os }}
     container:
-      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
+      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
       env:
         DEBIAN_FRONTEND: noninteractive
     steps:
@@ -97,4 +97,4 @@ jobs:
           VLLM_USE_MODELSCOPE: True
         run: |
           # TODO: enable more tests
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test.yaml RENAMED Viewed

@@ -145,5 +145,5 @@ jobs:
     with:
       vllm: ${{ matrix.vllm_version }}
       runner: linux-aarch64-a2
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
       type: light

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_310p.yaml RENAMED Viewed

@@ -58,7 +58,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     container:
       # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-310p-ubuntu22.04-py3.11
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
       env:
         VLLM_LOGGING_LEVEL: ERROR
         VLLM_USE_MODELSCOPE: True

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_full.yaml RENAMED Viewed

@@ -76,5 +76,5 @@ jobs:
     with:
       vllm: ${{ matrix.vllm_version }}
       runner: linux-aarch64-a2
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
       type: full

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_full_vllm_main.yaml RENAMED Viewed

@@ -41,5 +41,5 @@ jobs:
     with:
       vllm: main
       runner: linux-aarch64-a2
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
       type: full

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_models.yaml RENAMED Viewed

@@ -79,7 +79,7 @@ jobs:
     with:
       vllm: v0.11.0
       runner:  linux-aarch64-${{ matrix.runner }}
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
       model_name: ${{ matrix.model_name }}
       upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/.github/workflows/vllm_ascend_test_pd.yaml RENAMED Viewed

@@ -49,7 +49,7 @@ jobs:
     runs-on: linux-arm64-npu-static-8
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
       volumes:
         - /usr/local/dcmi:/usr/local/dcmi
         - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
@@ -109,4 +109,4 @@ jobs:
       - name: Run vllm-project/vllm-ascend PD Disaggregation edge test
         run: |
           git config --global --add safe.directory/__w/vllm-ascend/vllm-ascend
-          bash tests/e2e/pd_disaggreate/run_edge_case_test.sh
+          bash tests/e2e/pd_disaggreate/run_edge_case_test.sh

vllm_ascend-0.11.0rc1/Dockerfile.a3 → vllm_ascend-0.11.0rc3/Dockerfile RENAMED Viewed

@@ -15,24 +15,33 @@
 # This file is a part of the vllm-ascend project.
 #
-FROM quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
+FROM quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
+ARG MOONCAKE_TAG="v0.3.7.post2"
 # Define environments
 ENV DEBIAN_FRONTEND=noninteractive
 ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
-RUN apt-get update -y && \
-    apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
-    rm -rf /var/cache/apt/* && \
-    rm -rf /var/lib/apt/lists/*
 WORKDIR /workspace
 COPY . /vllm-workspace/vllm-ascend/
+# Install Mooncake dependencies
+RUN apt-get update -y && \
+    apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev && \
+    git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
+    cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
+    cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/lib64 && \
+    mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
+    make -j$(nproc) && make install && \
+    rm -fr /vllm-workspace/Mooncake/build && \
+    rm -rf /var/cache/apt/* && \
+    rm -rf /var/lib/apt/lists/*
 RUN pip config set global.index-url ${PIP_INDEX_URL}
 # Install vLLM
@@ -40,7 +49,7 @@ ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
 ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
-RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
+RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
     python3 -m pip cache purge
@@ -54,7 +63,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip cache purge
 # Install modelscope (for fast download) and ray (for multinode)
-RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
+RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
-CMD ["/bin/bash"]
+CMD ["/bin/bash"]

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/Dockerfile.310p RENAMED Viewed

@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
-FROM quay.io/ascend/cann:8.3.rc1-310p-ubuntu22.04-py3.11
+FROM quay.io/ascend/cann:8.3.rc2-310p-ubuntu22.04-py3.11
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
@@ -40,7 +40,7 @@ ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
 ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
-RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
+RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
     python3 -m pip cache purge

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/Dockerfile.310p.openEuler RENAMED Viewed

@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
-FROM quay.io/ascend/cann:8.3.rc1-310p-openeuler24.03-py3.11
+FROM quay.io/ascend/cann:8.3.rc2-310p-openeuler24.03-py3.11
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
@@ -38,7 +38,7 @@ ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
-RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
+RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
     python3 -m pip cache purge

vllm_ascend-0.11.0rc1/Dockerfile → vllm_ascend-0.11.0rc3/Dockerfile.a3 RENAMED Viewed

@@ -15,32 +15,40 @@
 # This file is a part of the vllm-ascend project.
 #
-FROM quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+FROM quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
+ARG MOONCAKE_TAG=v0.3.7.post2
+COPY . /vllm-workspace/vllm-ascend/
 # Define environments
 ENV DEBIAN_FRONTEND=noninteractive
 ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
-RUN apt-get update -y && \
-    apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \
-    rm -rf /var/cache/apt/* && \
-    rm -rf /var/lib/apt/lists/*
+RUN pip config set global.index-url ${PIP_INDEX_URL}
 WORKDIR /workspace
-COPY . /vllm-workspace/vllm-ascend/
-RUN pip config set global.index-url ${PIP_INDEX_URL}
+# Install Mooncake dependencies
+RUN apt-get update -y && \
+    apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev && \
+    git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
+    cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
+    cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/lib64 && \
+    mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
+    make -j$(nproc) && make install && \
+    rm -fr /vllm-workspace/Mooncake/build && \
+    rm -rf /var/cache/apt/* && \
+    rm -rf /var/lib/apt/lists/*
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
 ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
-RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
+RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
     python3 -m pip cache purge
@@ -54,7 +62,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip cache purge
 # Install modelscope (for fast download) and ray (for multinode)
-RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
+RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
 CMD ["/bin/bash"]

vllm_ascend-0.11.0rc1/Dockerfile.openEuler → vllm_ascend-0.11.0rc3/Dockerfile.a3.openEuler RENAMED Viewed

@@ -15,30 +15,43 @@
 # This file is a part of the vllm-ascend project.
 #
-FROM quay.io/ascend/cann:8.3.rc1-910b-openeuler24.03-py3.11
+FROM quay.io/ascend/cann:8.3.rc2-a3-openeuler24.03-py3.11
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
+ARG MOONCAKE_TAG="v0.3.7.post2"
 ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
-RUN yum update -y && \
-    yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
-    rm -rf /var/cache/yum
 RUN pip config set global.index-url ${PIP_INDEX_URL}
 WORKDIR /workspace
 COPY . /vllm-workspace/vllm-ascend/
+SHELL ["/bin/bash", "-c"]
+RUN yum update -y && \
+    yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
+    git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
+    cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
+    ARCH=$(uname -m) && \
+    source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
+    export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \
+    export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/${ARCH}-openEuler-linux && \
+    cd /vllm-workspace/Mooncake && \
+    bash mooncake_installer.sh -y && \
+    mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
+    make -j$(nproc) && make install && \
+    rm -fr /vllm-workspace/Mooncake/build && \
+    rm -rf /var/cache/yum/*
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
 ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
-RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
+RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
     python3 -m pip cache purge
@@ -52,7 +65,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip cache purge
 # Install modelscope (for fast download) and ray (for multinode)
-RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
+RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
 CMD ["/bin/bash"]

vllm_ascend-0.11.0rc1/Dockerfile.a3.openEuler → vllm_ascend-0.11.0rc3/Dockerfile.openEuler RENAMED Viewed

@@ -15,16 +15,14 @@
 # This file is a part of the vllm-ascend project.
 #
-FROM quay.io/ascend/cann:8.3.rc1-a3-openeuler24.03-py3.11
+FROM quay.io/ascend/cann:8.3.rc2-910b-openeuler24.03-py3.11
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
+ARG MOONCAKE_TAG="v0.3.7.post2"
 ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
-RUN yum update -y && \
-    yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
-    rm -rf /var/cache/yum
 RUN pip config set global.index-url ${PIP_INDEX_URL}
@@ -32,13 +30,29 @@ WORKDIR /workspace
 COPY . /vllm-workspace/vllm-ascend/
+SHELL ["/bin/bash", "-c"]
+RUN yum update -y && \
+    yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
+    git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
+    cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
+    ARCH=$(uname -m) && \
+    source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
+    export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \
+    export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/${ARCH}-openEuler-linux && \
+    cd /vllm-workspace/Mooncake && \
+    bash mooncake_installer.sh -y && \
+    mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
+    make -j$(nproc) && make install && \
+    rm -fr /vllm-workspace/Mooncake/build && \
+    rm -rf /var/cache/yum/*
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0
+ARG VLLM_TAG=v0.11.2
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
-RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
+RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[audio] --extra-index https://download.pytorch.org/whl/cpu/ && \
     python3 -m pip uninstall -y triton && \
     python3 -m pip cache purge
@@ -52,7 +66,7 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
     python3 -m pip cache purge
 # Install modelscope (for fast download) and ray (for multinode)
-RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \
+RUN python3 -m pip install modelscope 'ray>=2.47.1,<=2.48.0' 'protobuf>3.20.0' && \
     python3 -m pip cache purge
-CMD ["/bin/bash"]
+CMD ["/bin/bash"]

{vllm_ascend-0.11.0rc1/vllm_ascend.egg-info → vllm_ascend-0.11.0rc3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vllm_ascend
-Version: 0.11.0rc1
+Version: 0.11.0rc3
 Summary: vLLM Ascend backend plugin
 Home-page: https://github.com/vllm-project/vllm-ascend
 Author: vLLM-Ascend team
@@ -37,6 +37,7 @@ Requires-Dist: msgpack
 Requires-Dist: quart
 Requires-Dist: numba
 Requires-Dist: torch-npu==2.7.1
+Requires-Dist: transformers<=4.57.1
 Dynamic: author
 Dynamic: classifier
 Dynamic: description

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/conf.py RENAMED Viewed

@@ -75,7 +75,7 @@ myst_substitutions = {
     'pip_vllm_ascend_version': "0.11.0rc0",
     'pip_vllm_version': "0.11.0",
     # CANN image tag
-    'cann_image_tag': "8.3.rc1-910b-ubuntu22.04-py3.11",
+    'cann_image_tag': "8.3.rc2-910b-ubuntu22.04-py3.11",
     # vllm version in ci
     'ci_vllm_version': 'v0.11.0rc3',
 }

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/installation.md RENAMED Viewed

@@ -79,19 +79,19 @@ source vllm-ascend-env/bin/activate
 pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple attrs 'numpy<2.0.0' decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions
 # Download and install the CANN package.
-wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run
-chmod +x ./Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run
-./Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run --full
-# https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.3.rc1_linux-aarch64.run
+wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run
+chmod +x ./Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run
+./Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run --full
+# https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.3.rc2_linux-aarch64.run
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run
-chmod +x ./Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run
-./Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run --install
+wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run
+chmod +x ./Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run
+./Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run --install
-wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run
-chmod +x ./Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run
-./Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run --install
+wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run
+chmod +x ./Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run
+./Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run --install
 source /usr/local/Ascend/nnal/atb/set_env.sh
 ```

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/index.md RENAMED Viewed

@@ -8,6 +8,7 @@ single_npu_multimodal
 single_npu_audio
 single_npu_qwen3_embedding
 single_npu_qwen3_quantization
+single_node_pd_disaggregation_llmdatadist
 multi_npu_qwen3_next
 multi_npu
 multi_npu_moge

{vllm_ascend-0.11.0rc1 → vllm_ascend-0.11.0rc3}/docs/source/tutorials/multi_npu_qwen3_next.md RENAMED Viewed

@@ -51,7 +51,7 @@ Install the Ascend BiSheng toolkit:
 wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run
 chmod a+x Ascend-BiSheng-toolkit_aarch64.run
 ./Ascend-BiSheng-toolkit_aarch64.run --install
-source /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
+source /usr/local/Ascend/8.3.RC2/bisheng_toolkit/set_env.sh
 ```
 Install Triton Ascend:
@@ -75,7 +75,7 @@ Coming soon ...
 Please make sure you have already executed the command:
 ```bash
-source /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
+source /usr/local/Ascend/8.3.RC2/bisheng_toolkit/set_env.sh
 ```
 :::::{tab-set}

vllm-ascend 0.11.0rc1__tar.gz → 0.11.0rc3__tar.gz

vllm-ascend 0.11.0rc1tar.gz → 0.11.0rc3tar.gz