PyPI - vec-inf - Versions diffs - 0.7.1__tar.gz → 0.7.3__tar.gz - Mend

vec-inf 0.7.1tar.gz → 0.7.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

{vec_inf-0.7.1 → vec_inf-0.7.3}/.github/workflows/code_checks.yml RENAMED Viewed

@@ -30,7 +30,7 @@ jobs:
     steps:
       - uses: actions/checkout@v5.0.0
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           # Install a specific version of uv.
           version: "0.5.21"
@@ -40,7 +40,7 @@ jobs:
         with:
           python-version-file: ".python-version"
       - name: Install the project
-        run: uv sync --dev
+        run: uv sync --dev --prerelease=allow
       - name: Install dependencies and check code
         run: |
           source .venv/bin/activate
@@ -49,3 +49,5 @@ jobs:
         uses: pypa/gh-action-pip-audit@v1.1.0
         with:
           virtual-environment: .venv/
+          # Temporary: ignore pip advisory until fixed in pip>=25.3
+          ignore-vulns: GHSA-4xh5-x5gv-qwph

{vec_inf-0.7.1 → vec_inf-0.7.3}/.github/workflows/docker.yml RENAMED Viewed

@@ -21,7 +21,9 @@ on:
 jobs:
   push_to_registry:
     name: Push Docker image to Docker Hub
-    runs-on: ubuntu-latest
+    runs-on:
+      - self-hosted
+      - docker
     steps:
       - name: Checkout repository
         uses: actions/checkout@v5.0.0
@@ -32,6 +34,9 @@ jobs:
           VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2)
           echo "version=$VERSION" >> $GITHUB_OUTPUT
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
       - name: Log in to Docker Hub
         uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
         with:
@@ -40,7 +45,7 @@ jobs:
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893
         with:
           images: vectorinstitute/vector-inference

{vec_inf-0.7.1 → vec_inf-0.7.3}/.github/workflows/docs.yml RENAMED Viewed

@@ -56,7 +56,7 @@ jobs:
           fetch-depth: 0  # Fetch all history for proper versioning
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           version: "0.5.21"
           enable-cache: true
@@ -67,16 +67,16 @@ jobs:
           python-version-file: ".python-version"
       - name: Install the project
-        run: uv sync --all-extras --group docs
+        run: uv sync --all-extras --group docs --prerelease=allow
       - name: Build docs
-        run: uv run mkdocs build
+        run: uv run --frozen mkdocs build
       - name: Create .nojekyll file
         run: touch site/.nojekyll
       - name: Upload artifact
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: docs-site
           path: site/
@@ -93,7 +93,7 @@ jobs:
           fetch-depth: 0  # Fetch all history for proper versioning
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           version: "0.5.21"
           enable-cache: true
@@ -104,7 +104,7 @@ jobs:
           python-version-file: ".python-version"
       - name: Install the project
-        run: uv sync --all-extras --group docs
+        run: uv sync --all-extras --group docs --frozen
       - name: Configure Git Credentials
         run: |
@@ -112,7 +112,7 @@ jobs:
           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
       - name: Download artifact
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           name: docs-site
           path: site

{vec_inf-0.7.1 → vec_inf-0.7.3}/.github/workflows/publish.yml RENAMED Viewed

@@ -16,7 +16,7 @@ jobs:
       - uses: actions/checkout@v5.0.0
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           version: "0.6.6"
           enable-cache: true

{vec_inf-0.7.1 → vec_inf-0.7.3}/.github/workflows/unit_tests.yml RENAMED Viewed

@@ -46,7 +46,7 @@ jobs:
       - uses: actions/checkout@v5.0.0
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
         with:
           # Install a specific version of uv.
           version: "0.5.21"
@@ -58,18 +58,18 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Install the project
-        run: uv sync --dev
+        run: uv sync --dev --prerelease=allow
       - name: Install dependencies and check code
         run: |
-          uv run pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
+          uv run --frozen pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
       - name: Install the core package only
         run: uv sync --no-dev
       - name: Run package import tests
         run: |
-          uv run pytest tests/test_imports.py
+          uv run --frozen pytest tests/test_imports.py
       - name: Import Codecov GPG public key
         run: |
@@ -79,7 +79,7 @@ jobs:
         uses: codecov/codecov-action@v5.5.1
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
-          file: ./coverage.xml
+          files: ./coverage.xml
           name: codecov-umbrella
           fail_ci_if_error: true
           verbose: true

{vec_inf-0.7.1 → vec_inf-0.7.3}/.pre-commit-config.yaml RENAMED Viewed

@@ -17,7 +17,7 @@ repos:
     - id: check-toml
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: 'v0.13.2'
+    rev: 'v0.14.5'
     hooks:
     - id: ruff
       args: [--fix, --exit-non-zero-on-fix]

{vec_inf-0.7.1 → vec_inf-0.7.3}/Dockerfile RENAMED Viewed

@@ -35,29 +35,33 @@ RUN wget https://bootstrap.pypa.io/get-pip.py && \
     rm get-pip.py && \
     python3.10 -m pip install --upgrade pip setuptools wheel uv
-# Install Infiniband/RDMA support
+# Install RDMA support
 RUN apt-get update && apt-get install -y \
     libibverbs1 libibverbs-dev ibverbs-utils \
     librdmacm1 librdmacm-dev rdmacm-utils \
+    rdma-core ibverbs-providers infiniband-diags perftest \
     && rm -rf /var/lib/apt/lists/*
 # Set up RDMA environment (these will persist in the final container)
 ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
-ENV UCX_NET_DEVICES=all
 ENV NCCL_IB_DISABLE=0
+ENV NCCL_SOCKET_IFNAME="^lo,docker0"
+ENV NCCL_NET_GDR_LEVEL=PHB
+ENV NCCL_IB_TIMEOUT=22
+ENV NCCL_IB_RETRY_CNT=7
+ENV NCCL_DEBUG=INFO
 # Set up project
 WORKDIR /vec-inf
 COPY . /vec-inf
 # Install project dependencies with build requirements
-RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu128" uv pip install --system -e .[dev]
+RUN uv pip install --system -e .[dev] --prerelease=allow
-# Final configuration
-RUN mkdir -p /vec-inf/nccl && \
-    mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /vec-inf/nccl/libnccl.so.2.18.1
-ENV VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
-ENV NCCL_DEBUG=INFO
+# Install a single, system NCCL (from NVIDIA CUDA repo in base image)
+RUN apt-get update && apt-get install -y --allow-change-held-packages\
+    libnccl2 libnccl-dev \
+    && rm -rf /var/lib/apt/lists/*
 # Set the default command to start an interactive shell
 CMD ["bash"]

{vec_inf-0.7.1 → vec_inf-0.7.3}/MODEL_TRACKING.md RENAMED Viewed

@@ -40,6 +40,7 @@ This document tracks all model weights available in the `/model-weights` directo
 | `gemma-2b-it` | ❌ |
 | `gemma-7b` | ❌ |
 | `gemma-7b-it` | ❌ |
+| `gemma-2-2b-it` | ✅ |
 | `gemma-2-9b` | ✅ |
 | `gemma-2-9b-it` | ✅ |
 | `gemma-2-27b` | ✅ |
@@ -165,8 +166,8 @@ This document tracks all model weights available in the `/model-weights` directo
 | Model | Configuration |
 |:------|:-------------|
 | `Qwen3-14B` | ✅ |
-| `Qwen3-8B` | ❌ |
-| `Qwen3-32B` | ❌ |
+| `Qwen3-8B` | ✅ |
+| `Qwen3-32B` | ✅ |
 | `Qwen3-235B-A22B` | ❌ |
 | `Qwen3-Embedding-8B` | ❌ |
@@ -186,6 +187,11 @@ This document tracks all model weights available in the `/model-weights` directo
 | `DeepSeek-Coder-V2-Lite-Instruct` | ❌ |
 | `deepseek-math-7b-instruct` | ❌ |
+### OpenAI: GPT-OSS
+| Model | Configuration |
+|:------|:-------------|
+| `gpt-oss-120b` | ✅ |
 ### Other LLM Models
 | Model | Configuration |
 |:------|:-------------|

{vec_inf-0.7.1 → vec_inf-0.7.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vec-inf
-Version: 0.7.1
+Version: 0.7.3
 Summary: Efficient LLM inference on Slurm clusters using vLLM.
 Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
 License-Expression: MIT
@@ -13,9 +13,10 @@ Requires-Dist: requests>=2.31.0
 Requires-Dist: rich>=13.7.0
 Provides-Extra: dev
 Requires-Dist: cupy-cuda12x==12.1.0; extra == 'dev'
-Requires-Dist: ray>=2.40.0; extra == 'dev'
+Requires-Dist: flashinfer-python>=0.4.0; extra == 'dev'
+Requires-Dist: ray[default]>=2.50.0; extra == 'dev'
+Requires-Dist: sglang>=0.5.0; extra == 'dev'
 Requires-Dist: torch>=2.7.0; extra == 'dev'
-Requires-Dist: vllm-nccl-cu12<2.19,>=2.18; extra == 'dev'
 Requires-Dist: vllm>=0.10.0; extra == 'dev'
 Requires-Dist: xgrammar>=0.1.11; extra == 'dev'
 Description-Content-Type: text/markdown
@@ -29,7 +30,7 @@ Description-Content-Type: text/markdown
 [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
 [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
 [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
-[![vLLM](https://img.shields.io/badge/vLLM-0.10.1.1-blue)](https://docs.vllm.ai/en/v0.10.1.1/)
+[![vLLM](https://img.shields.io/badge/vLLM-0.11.0-blue)](https://docs.vllm.ai/en/v0.11.0/)
 ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
 This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
@@ -42,7 +43,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
 ```bash
 pip install vec-inf
 ```
-Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.10.1.1`.
+Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
 If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
 * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
@@ -75,7 +76,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
 #### Other commands
 * `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
-* `status`: Check the model status by providing its Slurm job ID.
+* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
 * `metrics`: Streams performance metrics to the console.
 * `shutdown`: Shutdown a model by providing its Slurm job ID.
 * `list`: List all available model names, or view the default/cached configuration of a specific model.

{vec_inf-0.7.1 → vec_inf-0.7.3}/README.md RENAMED Viewed

@@ -7,7 +7,7 @@
 [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
 [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
 [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
-[![vLLM](https://img.shields.io/badge/vLLM-0.10.1.1-blue)](https://docs.vllm.ai/en/v0.10.1.1/)
+[![vLLM](https://img.shields.io/badge/vLLM-0.11.0-blue)](https://docs.vllm.ai/en/v0.11.0/)
 ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
 This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
@@ -20,7 +20,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
 ```bash
 pip install vec-inf
 ```
-Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.10.1.1`.
+Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
 If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
 * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
@@ -53,7 +53,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
 #### Other commands
 * `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
-* `status`: Check the model status by providing its Slurm job ID.
+* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
 * `metrics`: Streams performance metrics to the console.
 * `shutdown`: Shutdown a model by providing its Slurm job ID.
 * `list`: List all available model names, or view the default/cached configuration of a specific model.

{vec_inf-0.7.1 → vec_inf-0.7.3}/docs/index.md RENAMED Viewed

@@ -12,7 +12,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
 pip install vec-inf
 ```
-Otherwise, we recommend using the provided [`Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.10.1.1`.
+Otherwise, we recommend using the provided [`Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
 If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
 * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config), then install from source by running `pip install .`.

{vec_inf-0.7.1 → vec_inf-0.7.3}/docs/user_guide.md RENAMED Viewed

@@ -149,35 +149,52 @@ Since batch launches use heterogeneous jobs, users can request different partiti
 ### `status` command
-You can check the inference server status by providing the Slurm job ID to the `status` command:
+You can check the status of all inference servers launched through `vec-inf` by running the `status` command:
+```bash
+vec-inf status
+```
+And you should see an output like this:
+```
+┏━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Job ID    ┃ Model Name ┃ Status  ┃ Base URL              ┃
+┡━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
+│ 1434429   │ Qwen3-8B   │ READY   │ http://gpu113:8080/v1 │
+│ 1434584   │ Qwen3-14B  │ READY   │ http://gpu053:8080/v1 │
+│ 1435035+0 │ Qwen3-32B  │ PENDING │ UNAVAILABLE           │
+│ 1435035+1 │ Qwen3-14B  │ PENDING │ UNAVAILABLE           │
+└───────────┴────────────┴─────────┴───────────────────────┘
+```
+If you want to check why a specific job is pending or failing, append the job ID to the status command:
 ```bash
-vec-inf status 15373800
+vec-inf status 1435035+1
 ```
 If the server is pending for resources, you should see an output like this:
 ```
-┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ Job Status     ┃ Value                      ┃
-┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ Model Name     │ Meta-Llama-3.1-8B-Instruct │
-│ Model Status   │ PENDING                    │
-│ Pending Reason │ Resources                  │
-│ Base URL       │ UNAVAILABLE                │
-└────────────────┴────────────────────────────┘
+┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
+┃ Job Status     ┃ Value       ┃
+┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
+│ Model Name     │ Qwen3-14B   │
+│ Model Status   │ PENDING     │
+│ Pending Reason │ Resources   │
+│ Base URL       │ UNAVAILABLE │
+└────────────────┴─────────────┘
 ```
 When the server is ready, you should see an output like this:
 ```
-┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃ Job Status   ┃ Value                      ┃
-┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
-│ Model Name   │ Meta-Llama-3.1-8B-Instruct │
-│ Model Status │ READY                      │
-│ Base URL     │ http://gpu042:8080/v1      │
-└──────────────┴────────────────────────────┘
+┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
+┃ Job Status   ┃ Value                 ┃
+┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
+│ Model Name   │ Qwen3-14B             │
+│ Model Status │ READY                 │
+│ Base URL     │ http://gpu105:8080/v1 │
+└──────────────┴───────────────────────┘
 ```
 There are 5 possible states:
@@ -190,7 +207,7 @@ There are 5 possible states:
 **Note**
 * The base URL is only available when model is in `READY` state.
-* For servers launched with `batch-launch`, the job ID should follow the format of "MAIN_JOB_ID+OFFSET" (e.g. 17480109+0, 17480109+1).
+* For servers launched with `batch-launch`, the job ID should follow the format of "MAIN_JOB_ID+OFFSET" (e.g. 1435035+0, 1435035+1).
 ### `metrics` command

{vec_inf-0.7.1 → vec_inf-0.7.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "vec-inf"
-version = "0.7.1"
+version = "0.7.3"
 description = "Efficient LLM inference on Slurm clusters using vLLM."
 readme = "README.md"
 authors = [{name = "Marshall Wang", email = "marshall.wang@vectorinstitute.ai"}]
@@ -42,9 +42,10 @@ dev = [
     "xgrammar>=0.1.11",
     "torch>=2.7.0",
     "vllm>=0.10.0",
-    "vllm-nccl-cu12>=2.18,<2.19",
-    "ray>=2.40.0",
-    "cupy-cuda12x==12.1.0"
+    "ray[default]>=2.50.0",
+    "cupy-cuda12x==12.1.0",
+    "flashinfer-python>=0.4.0",
+    "sglang>=0.5.0",
 ]
 [project.scripts]

{vec_inf-0.7.1 → vec_inf-0.7.3}/tests/vec_inf/cli/test_cli.py RENAMED Viewed

@@ -39,6 +39,7 @@ def test_launch_command_success(runner):
             "mem_per_node": "32G",
             "model_weights_parent_dir": "/model-weights",
             "vocab_size": "128000",
+            "venv": "/path/to/venv",
             "vllm_args": {"max_model_len": 8192},
             "env": {"CACHE": "/cache"},
         }
@@ -134,7 +135,7 @@ def test_list_single_model(runner):
 def test_status_command(runner):
-    """Test status command."""
+    """Test status command with job ID argument."""
     with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
         mock_client = MagicMock()
         mock_client_class.return_value = mock_client
@@ -153,6 +154,111 @@ def test_status_command(runner):
         assert "Meta-Llama-3.1-8B" in result.output
+def test_status_command_no_job_id_no_running_jobs(runner):
+    """Test status command with no argument when no jobs are running."""
+    with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.fetch_running_jobs.return_value = []
+        result = runner.invoke(cli, ["status"])
+        assert result.exit_code == 0
+        assert "No running jobs found." in result.output
+def test_status_command_no_job_id_single_running_job(runner):
+    """Test status command with no argument when one job is running."""
+    with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.fetch_running_jobs.return_value = ["12345"]
+        mock_status = MagicMock()
+        mock_status.model_name = "test-model-1"
+        mock_status.server_status = "READY"
+        mock_status.base_url = "http://localhost:8000"
+        mock_status.pending_reason = None
+        mock_status.failed_reason = None
+        mock_client.get_status.return_value = mock_status
+        result = runner.invoke(cli, ["status"])
+        assert result.exit_code == 0
+        assert "test-model-1" in result.output
+        mock_client.fetch_running_jobs.assert_called_once()
+        mock_client.get_status.assert_called_once_with("12345")
+def test_status_command_no_job_id_multiple_running_jobs(runner):
+    """Test status command with no argument when multiple jobs are running."""
+    with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.fetch_running_jobs.return_value = ["12345", "67890"]
+        mock_status_1 = MagicMock()
+        mock_status_1.model_name = "test-model-1"
+        mock_status_1.server_status = "READY"
+        mock_status_1.base_url = "http://localhost:8000"
+        mock_status_1.pending_reason = None
+        mock_status_1.failed_reason = None
+        mock_status_2 = MagicMock()
+        mock_status_2.model_name = "test-model-2"
+        mock_status_2.server_status = "PENDING"
+        mock_status_2.base_url = None
+        mock_status_2.pending_reason = "Waiting for resources"
+        mock_status_2.failed_reason = None
+        mock_client.get_status.side_effect = [mock_status_1, mock_status_2]
+        result = runner.invoke(cli, ["status"])
+        assert result.exit_code == 0
+        assert "test-model-1" in result.output
+        assert "test-model-2" in result.output
+        assert "12345" in result.output
+        assert "67890" in result.output
+        mock_client.fetch_running_jobs.assert_called_once()
+        assert mock_client.get_status.call_count == 2
+def test_status_command_no_job_id_multiple_jobs_json_mode(runner):
+    """Test status command with no argument and JSON mode for multiple jobs."""
+    with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.fetch_running_jobs.return_value = ["12345", "67890"]
+        mock_status_1 = MagicMock()
+        mock_status_1.model_name = "test-model-1"
+        mock_status_1.server_status = "READY"
+        mock_status_1.base_url = "http://localhost:8000"
+        mock_status_1.pending_reason = None
+        mock_status_1.failed_reason = None
+        mock_status_2 = MagicMock()
+        mock_status_2.model_name = "test-model-2"
+        mock_status_2.server_status = "FAILED"
+        mock_status_2.base_url = None
+        mock_status_2.pending_reason = None
+        mock_status_2.failed_reason = "Out of memory"
+        mock_client.get_status.side_effect = [mock_status_1, mock_status_2]
+        result = runner.invoke(cli, ["status", "--json-mode"])
+        assert result.exit_code == 0
+        output = json.loads(result.output)
+        assert isinstance(output, list)
+        assert len(output) == 2
+        assert output[0]["model_name"] == "test-model-1"
+        assert output[0]["model_status"] == "READY"
+        assert output[1]["model_name"] == "test-model-2"
+        assert output[1]["model_status"] == "FAILED"
 def test_shutdown_command(runner):
     """Test shutdown command."""
     with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:

vec-inf 0.7.1__tar.gz → 0.7.3__tar.gz

vec-inf 0.7.1tar.gz → 0.7.3tar.gz