PyPI - vllm-ascend - Versions diffs - 0.9.0rc2__tar.gz → 0.11.0rc1__tar.gz - Mend

vllm-ascend 0.9.0rc2tar.gz → 0.11.0rc1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (859) hide show

vllm_ascend-0.11.0rc1/.gemini/config.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+# https://developers.google.com/gemini-code-assist/docs/customize-gemini-behavior-github
+have_fun: false  # Just review the code
+code_review:
+  comment_severity_threshold: HIGH  # Reduce quantity of comments
+  pull_request_opened:
+    summary: false  # Don't summarize the PR in a separate comment

vllm_ascend-0.11.0rc1/.github/Dockerfile.buildwheel ADDED Viewed

@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+ARG PY_VERSION=3.11
+FROM quay.io/ascend/manylinux:8.2.rc1-910b-manylinux_2_28-py${PY_VERSION}
+ARG COMPILE_CUSTOM_KERNELS=1
+# Define environments
+ENV DEBIAN_FRONTEND=noninteractive
+ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
+RUN yum update -y && \
+    yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
+    rm -rf /var/cache/yum
+WORKDIR /workspace
+COPY . /workspace/vllm-ascend/
+# Install req
+RUN python3 -m pip install -r vllm-ascend/requirements.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
+    python3 -m pip install twine
+# Install vllm-ascend
+RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
+    source /usr/local/Ascend/nnal/atb/set_env.sh && \
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
+    cd vllm-ascend && \
+    python3 setup.py bdist_wheel && \
+    ls -l dist
+CMD ["/bin/bash"]

vllm_ascend-0.11.0rc1/.github/ISSUE_TEMPLATE/110-user-story.yml ADDED Viewed

@@ -0,0 +1,37 @@
+name: 📚 User Story
+description: Apply for an user story to be displayed on https://vllm-ascend.readthedocs.io/en/latest/community/user_stories/index.html
+title: "[User Story]: "
+labels: ["user-story"]
+body:
+- type: textarea
+  attributes:
+    label: 📚 Title
+    description: >
+      A clear title about what your user story is about.
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: About / Introduction
+    description: >
+      A brief introduction about the background of your use case, like your scenario, hardware size etc.
+- type: textarea
+  attributes:
+    label: Bussiness Challenges
+    description: >
+      Tell us how what kind of challenge you faced in this user story.
+- type: textarea
+  attributes:
+    label: Solving challenges with vLLM Ascend and benefits
+    description: >
+      Tell us how vLLM Ascend helped you overcome the challenges, including details like how you use it, what version you used, hardware info, etc. And what kind of benefit do you get from using vLLM Ascend
+- type: textarea
+  attributes:
+    label: Extra Info
+    description: >
+      Any extra infomation you want to include in this story
+- type: markdown
+  attributes:
+    value: >
+      Thanks for contributing 🎉!

vllm_ascend-0.11.0rc1/.github/ISSUE_TEMPLATE/750-RFC.yml ADDED Viewed

@@ -0,0 +1,49 @@
+name: 💬 Request for comments (RFC).
+description: Ask for feedback on major architectural changes or design choices.
+title: "[RFC]: "
+labels: ["RFC"]
+body:
+- type: markdown
+  attributes:
+    value: >
+      #### Please take a look at previous [RFCs](https://github.com/vllm-project/vllm-ascend/issues?q=label%3ARFC+sort%3Aupdated-desc) for reference.
+- type: textarea
+  attributes:
+    label: Motivation.
+    description: >
+      The motivation of the RFC.
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Proposed Change.
+    description: >
+      The proposed change of the RFC.
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Feedback Period.
+    description: >
+      The feedback period of the RFC. Usually at least one week.
+  validations:
+    required: false
+- type: textarea
+  attributes:
+    label: CC List.
+    description: >
+      The list of people you want to CC.
+  validations:
+    required: false
+- type: textarea
+  attributes:
+    label: Any Other Things.
+    description: >
+      Any other things you would like to mention, such as feature branch request.
+  validations:
+    required: false
+- type: markdown
+  attributes:
+    value: >
+      Thanks for contributing 🎉!

vllm_ascend-0.11.0rc1/.github/ISSUE_TEMPLATE/900-release-checklist.yml ADDED Viewed

@@ -0,0 +1,104 @@
+name: Release Checklist
+description: Generate a release checklist issue when prepare a new release.(Used for release team)
+title: "[Release]: Release checklist for v"
+body:
+- type: textarea
+  attributes:
+    description: >
+      Brief info for the new release.
+    label: Release Checklist
+    value: >
+      **Release Version**:
+      **Release Branch**:
+      **Release Date**:
+      **Release Manager**:
+- type: textarea
+  attributes:
+    description: >
+      Release notes.
+    label: Prepare Release Note
+    value: >
+      - [ ] Create a new issue for release feedback
+      - [ ] Upgrade vllm version to the new version for CI and Dockerfile
+      - [ ] Write the release note PR.
+        - [ ] Update the feedback issue link in docs/source/faqs.md
+        - [ ] Add release note to docs/source/user_guide/release_notes.md
+        - [ ] Update release version in README.md and README.zh.md
+        - [ ] Update version info in docs/source/community/versioning_policy.md
+        - [ ] Update contributor info in docs/source/community/contributors.md
+        - [ ] Update package version in docs/conf.py
+- type: textarea
+  attributes:
+    description: >
+      Make sure the code is merged.
+    label: PR need Merge
+    value: >
+      - [ ] PR link1
+      - [ ] PR link2
+      - [ ] ...
+- type: textarea
+  attributes:
+    description: >
+      Make sure the new Feature/Function is tested
+    label: Functional Test
+    value: >
+      - [ ] Feature1
+      - [ ] Bug1
+      - [ ] ...
+- type: textarea
+  attributes:
+    description: >
+      Make sure the doc is updated.
+    label: Doc Test
+    value: >
+      - [ ] Tutorial is updated.
+      - [ ] User Guide is updated.
+      - [ ] Developer Guide is updated.
+- type: textarea
+  attributes:
+    description: >
+      Make sure the artifacts is ready
+    label: Prepare Artifacts
+    value: >
+      - [ ] Docker image is ready.
+      - [ ] Wheel package is ready.
+- type: textarea
+  attributes:
+    description: >
+      Start to release.
+    label: Release Step
+    value: >
+      - [ ] Release note PR is merged.
+      - [ ] Post the release on GitHub release page.
+      - [ ] Generate official doc page on https://app.readthedocs.org/dashboard/
+      - [ ] Wait for the wheel package to be available on https://pypi.org/project/vllm-ascend
+      - [ ] Wait for the docker image to be available on https://quay.io/ascend/vllm-ascend
+      - [ ] Upload 310p wheel to Github release page
+      - [ ] Broadcast the release news (By message, blog , etc)
+      - [ ] Close this issue

vllm_ascend-0.11.0rc1/.github/PULL_REQUEST_TEMPLATE.md ADDED Viewed

@@ -0,0 +1,27 @@
+<!--  Thanks for sending a pull request!
+BEFORE SUBMITTING, PLEASE READ https://docs.vllm.ai/en/latest/contributing/overview.html
+-->
+### What this PR does / why we need it?
+<!--
+- Please clarify what changes you are proposing. The purpose of this section is to outline the changes and how this PR fixes the issue.
+If possible, please consider writing useful notes for better and faster reviews in your PR.
+- Please clarify why the changes are needed. For instance, the use case and bug description.
+- Fixes #
+-->
+### Does this PR introduce _any_ user-facing change?
+<!--
+Note that it means *any* user-facing change including all aspects such as API, interface or other behavior changes.
+Documentation-only updates are not considered user-facing changes.
+-->
+### How was this patch tested?
+<!--
+CI passed with new added/existing test.
+If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
+If tests were not added, please describe why they were not added and/or why it was difficult to add.
+-->

vllm_ascend-0.11.0rc1/.github/actionlint.yaml ADDED Viewed

@@ -0,0 +1,21 @@
+self-hosted-runner:
+  # Labels of self-hosted runner in array of strings.
+  labels:
+    - linux-aarch64-a2-0
+    - linux-aarch64-a2-1
+    - linux-aarch64-a2-2
+    - linux-aarch64-a2-4
+    - linux-aarch64-a2-8
+    - linux-arm64-npu-static-8
+    - linux-aarch64-310p-1
+    - linux-aarch64-310p-2
+    - linux-aarch64-310p-4
+    - ubuntu-24.04-arm
+    - linux-aarch64-a3-1
+    - linux-aarch64-a3-2
+    - linux-aarch64-a3-4
+    - linux-aarch64-a3-8
+    - linux-amd64-cpu-0
+    - linux-amd64-cpu-8
+    - linux-amd64-cpu-16
+    - linux-aarch64-a3-0

vllm_ascend-0.11.0rc1/.github/format_pr_body.sh ADDED Viewed

@@ -0,0 +1,59 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+# Adapted from vllm/.github/scripts/cleanup_pr_body.sh
+#!/bin/bash
+set -eux
+# ensure 2 argument is passed
+if [ "$#" -ne 3 ]; then
+    echo "Usage: $0 <pr_number> <vllm_version> <vllm_commit>"
+    exit 1
+fi
+PR_NUMBER=$1
+VLLM_VERSION=$2
+VLLM_COMMIT=$3
+OLD=/tmp/orig_pr_body.txt
+NEW=/tmp/new_pr_body.txt
+FINAL=/tmp/final_pr_body.txt
+gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
+cp "${OLD}" "${NEW}"
+# Remove notes in pr description and add vLLM version and commit
+sed -i '/<!--/,/-->/d' "${NEW}"
+sed -i '/- vLLM .*$/d' "${NEW}"
+{
+    echo ""
+    echo "- vLLM version: $VLLM_VERSION"
+    echo "- vLLM main: $VLLM_COMMIT"
+} >> "${NEW}"
+# Remove redundant empty lines
+uniq "${NEW}" > "${FINAL}"
+# Run this only if ${NEW} is different than ${OLD}
+if ! cmp -s "${OLD}" "${FINAL}"; then
+    echo
+    echo "Updating PR body:"
+    echo
+    cat "${NEW}"
+    gh pr edit --body-file "${FINAL}" "${PR_NUMBER}"
+else
+    echo "No changes needed"
+fi

vllm_ascend-0.11.0rc1/.github/workflows/_accuracy_test.yaml ADDED Viewed

@@ -0,0 +1,175 @@
+name: 'accuracy test'
+on:
+  workflow_call:
+    inputs:
+      vllm:
+        required: true
+        type: string
+      vllm-ascend:
+        required: false
+        type: string
+        default: main
+      runner:
+        required: true
+        type: string
+      image:
+        required: true
+        type: string
+      model_name:
+        required: true
+        type: string
+      upload:
+        required: false
+        type: boolean
+        default: false
+jobs:
+  accuracy_tests:
+    runs-on: ${{ inputs.runner }}
+    name: ${{ inputs.model_name }} accuracy
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      env:
+        VLLM_USE_MODELSCOPE: True
+        # 1. If version specified (work_dispatch), do specified branch accuracy test
+        # 2. If no version (labeled PR), do accuracy test by default ref:
+        # The branch, tag or SHA to checkout. When checking out the repository that
+        # triggered a workflow, this defaults to the reference or SHA for that event.
+        # Otherwise, uses the default branch.
+        GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set model name as output
+        id: set_output
+        run: |
+          echo "model_name=${{ inputs.model_name }}" >> $GITHUB_OUTPUT
+      - name: Config mirrors
+        run: |
+          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+          apt-get update -y
+          apt install git -y
+      - name: Install system dependencies
+        run: |
+          apt-get -y install `cat packages.txt`
+          apt-get -y install gcc g++ cmake libnuma-dev
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ inputs.vllm }}
+          path: ./vllm-empty
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+      - name: Resolve vllm-ascend version
+        run: |
+          VERSION_INPUT="${{ inputs.vllm-ascend }}"
+          if [[ "$VERSION_INPUT" == "latest" ]]; then
+            TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
+            LATEST_TAG=$(echo "$TAGS" | head -n1)
+            if [[ -z "$LATEST_TAG" ]]; then
+              RESOLVED_VERSION="main"
+            else
+              RESOLVED_VERSION="$LATEST_TAG"
+            fi
+          else
+            RESOLVED_VERSION="$VERSION_INPUT"
+          fi
+          echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm-ascend
+          path: ./vllm-ascend
+          ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
+      - name: Install vllm-project/vllm-ascend
+        working-directory: ./vllm-ascend
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+      - name: Get vLLM commit hash and URL
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
+          echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
+      - name: Get vLLM-Ascend commit hash and URL
+        working-directory: ./vllm-ascend
+        run: |
+          VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
+          echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
+      - name: Collect version info
+        run: |
+          for dir in /usr/local/Ascend/ascend-toolkit/*; do
+            dname=$(basename "$dir")
+            if [ "$dname" != "latest" ]; then
+              TOOLKIT_DIR="$dname"
+              break
+            fi
+          done
+          INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
+          GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
+                           | head -n1 \
+                           | cut -d'=' -f2 \
+                           | tr -d '"')
+          {
+            echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
+            pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
+            pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
+            pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
+          } >> "$GITHUB_ENV"
+      - name: Run accuracy test
+        id: report
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          VLLM_USE_MODELSCOPE: True
+          VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
+          VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
+          VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
+          VLLM_ASCEND_COMMIT: ${{ env.VLLM_ASCEND_COMMIT }}
+          CANN_VERSION: ${{ env.GHA_CANN_VERSION }}
+          TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
+          TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
+        run: |
+          model_base_name=$(basename ${{ inputs.model_name }})
+          markdown_name="${model_base_name}"
+          echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
+          mkdir -p ./benchmarks/accuracy
+          pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
+          --config ./tests/e2e/models/configs/${{ inputs.model_name }}.yaml
+      - name: Generate step summary
+        if: ${{ always() }}
+        run: |
+          cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
+      - name: Upload Report
+        if: ${{ inputs.upload == true }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
+          path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
+          if-no-files-found: warn
+          retention-days: 90
+          overwrite: true

vllm_ascend-0.11.0rc1/.github/workflows/_e2e_nightly.yaml ADDED Viewed

@@ -0,0 +1,115 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+name: 'e2e nightly test'
+on:
+  workflow_call:
+    inputs:
+      vllm:
+        required: true
+        type: string
+      runner:
+        required: true
+        type: string
+      image:
+        required: false
+        type: string
+        default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
+      tests:
+        required: true
+        type: string
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+# only cancel in-progress runs of the same workflow
+# and ignore the lint / 1 card / 4 cards test type
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  e2e-nightly:
+    name: e2e-nightly
+    runs-on: ${{ inputs.runner }}
+    container:
+      image: ${{ inputs.image }}
+      env:
+        VLLM_USE_MODELSCOPE: True
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+      - name: Config mirrors
+        run: |
+          sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+          apt-get update -y
+          apt install git -y
+          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
+      - name: Install system dependencies
+        run: |
+          apt-get -y install `cat packages.txt`
+          apt-get -y install gcc g++ cmake libnuma-dev
+      - name: Checkout vllm-project/vllm repo
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ inputs.vllm }}
+          path: ./vllm-empty
+      - name: Install vllm-project/vllm from source
+        working-directory: ./vllm-empty
+        run: |
+          VLLM_TARGET_DEVICE=empty pip install -e .
+      - name: Install vllm-project/vllm-ascend
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+      - name: Checkout aisbench repo and Install aisbench
+        run: |
+          git clone https://gitee.com/aisbench/benchmark.git
+          cd benchmark
+          git checkout v3.0-20250930-master
+          pip3 install -e ./
+          pip3 install -r requirements/api.txt
+          pip3 install -r requirements/extra.txt
+      - name: Run vllm-project/vllm-ascend test
+        env:
+          VLLM_WORKER_MULTIPROC_METHOD: spawn
+          VLLM_USE_MODELSCOPE: True
+          VLLM_CI_RUNNER: ${{ inputs.runner }}
+        run: |
+          # TODO: enable more tests
+          pytest -sv ${{ inputs.tests }}

vllm-ascend 0.9.0rc2__tar.gz → 0.11.0rc1__tar.gz

vllm-ascend 0.9.0rc2tar.gz → 0.11.0rc1tar.gz