vec-inf 0.7.2__tar.gz → 0.7.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/workflows/docker.yml +7 -2
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.pre-commit-config.yaml +1 -1
- {vec_inf-0.7.2 → vec_inf-0.7.3}/PKG-INFO +4 -4
- {vec_inf-0.7.2 → vec_inf-0.7.3}/README.md +3 -3
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/index.md +1 -1
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/user_guide.md +35 -18
- {vec_inf-0.7.2 → vec_inf-0.7.3}/pyproject.toml +1 -1
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/cli/test_cli.py +106 -1
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/cli/test_helper.py +249 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_api.py +186 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_slurm_script_generator.py +6 -5
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/README.md +2 -1
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/cli/_cli.py +24 -9
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/cli/_helper.py +56 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_helper.py +14 -5
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_slurm_script_generator.py +24 -13
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_slurm_templates.py +10 -12
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_utils.py +4 -1
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/api.py +47 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/config/models.yaml +4 -5
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/find_port.sh +10 -1
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/ISSUE_TEMPLATE/model-request.md +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/dependabot.yml +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/pull_request_template.md +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/workflows/code_checks.yml +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/workflows/docs.yml +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/workflows/publish.yml +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/workflows/unit_tests.yml +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.gitignore +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/.python-version +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/Dockerfile +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/LICENSE +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/MODEL_TRACKING.md +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/codecov.yml +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/Makefile +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/api.md +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/assets/favicon-48x48.svg +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/assets/favicon.ico +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/assets/vector-logo.svg +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/contributing.md +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/make.bat +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/overrides/partials/copyright.html +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/overrides/partials/logo.html +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/stylesheets/extra.css +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/README.md +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/api/basic_usage.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/inference/llm/chat_completions.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/inference/llm/completions.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/inference/llm/completions.sh +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/inference/text_embedding/embeddings.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/inference/vlm/vision_completions.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/logits/logits.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/slurm_dependency/README.md +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/slurm_dependency/downstream_job.sbatch +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/slurm_dependency/run_downstream.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/slurm_dependency/run_workflow.sh +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/mkdocs.yml +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/profile/avg_throughput.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/profile/gen.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/__init__.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/test_imports.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/__init__.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/cli/__init__.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/cli/test_utils.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/__init__.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_examples.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_helper.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_models.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_utils.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_vars.env +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/uv.lock +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/__init__.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/cli/__init__.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/cli/_utils.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/cli/_vars.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/__init__.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_client_vars.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_exceptions.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_slurm_vars.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/config.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/models.py +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/config/README.md +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/config/environment.yaml +0 -0
- {vec_inf-0.7.2 → vec_inf-0.7.3}/venv.sh +0 -0
|
@@ -21,7 +21,9 @@ on:
|
|
|
21
21
|
jobs:
|
|
22
22
|
push_to_registry:
|
|
23
23
|
name: Push Docker image to Docker Hub
|
|
24
|
-
runs-on:
|
|
24
|
+
runs-on:
|
|
25
|
+
- self-hosted
|
|
26
|
+
- docker
|
|
25
27
|
steps:
|
|
26
28
|
- name: Checkout repository
|
|
27
29
|
uses: actions/checkout@v5.0.0
|
|
@@ -32,6 +34,9 @@ jobs:
|
|
|
32
34
|
VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2)
|
|
33
35
|
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
|
34
36
|
|
|
37
|
+
- name: Set up Docker Buildx
|
|
38
|
+
uses: docker/setup-buildx-action@v3
|
|
39
|
+
|
|
35
40
|
- name: Log in to Docker Hub
|
|
36
41
|
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
|
|
37
42
|
with:
|
|
@@ -40,7 +45,7 @@ jobs:
|
|
|
40
45
|
|
|
41
46
|
- name: Extract metadata (tags, labels) for Docker
|
|
42
47
|
id: meta
|
|
43
|
-
uses: docker/metadata-action@
|
|
48
|
+
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893
|
|
44
49
|
with:
|
|
45
50
|
images: vectorinstitute/vector-inference
|
|
46
51
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vec-inf
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: Efficient LLM inference on Slurm clusters using vLLM.
|
|
5
5
|
Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -30,7 +30,7 @@ Description-Content-Type: text/markdown
|
|
|
30
30
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
|
|
31
31
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
|
|
32
32
|
[](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
|
|
33
|
-
[](https://docs.vllm.ai/en/v0.11.0/)
|
|
34
34
|

|
|
35
35
|
|
|
36
36
|
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
|
|
@@ -43,7 +43,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
|
|
|
43
43
|
```bash
|
|
44
44
|
pip install vec-inf
|
|
45
45
|
```
|
|
46
|
-
Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.
|
|
46
|
+
Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
|
|
47
47
|
|
|
48
48
|
If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
|
|
49
49
|
* Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
|
|
@@ -76,7 +76,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
|
|
|
76
76
|
#### Other commands
|
|
77
77
|
|
|
78
78
|
* `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
|
|
79
|
-
* `status`: Check the
|
|
79
|
+
* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
|
|
80
80
|
* `metrics`: Streams performance metrics to the console.
|
|
81
81
|
* `shutdown`: Shutdown a model by providing its Slurm job ID.
|
|
82
82
|
* `list`: List all available model names, or view the default/cached configuration of a specific model.
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
|
|
8
8
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
|
|
9
9
|
[](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
|
|
10
|
-
[](https://docs.vllm.ai/en/v0.11.0/)
|
|
11
11
|

|
|
12
12
|
|
|
13
13
|
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
|
|
@@ -20,7 +20,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
|
|
|
20
20
|
```bash
|
|
21
21
|
pip install vec-inf
|
|
22
22
|
```
|
|
23
|
-
Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.
|
|
23
|
+
Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
|
|
24
24
|
|
|
25
25
|
If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
|
|
26
26
|
* Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
|
|
@@ -53,7 +53,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
|
|
|
53
53
|
#### Other commands
|
|
54
54
|
|
|
55
55
|
* `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
|
|
56
|
-
* `status`: Check the
|
|
56
|
+
* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
|
|
57
57
|
* `metrics`: Streams performance metrics to the console.
|
|
58
58
|
* `shutdown`: Shutdown a model by providing its Slurm job ID.
|
|
59
59
|
* `list`: List all available model names, or view the default/cached configuration of a specific model.
|
|
@@ -12,7 +12,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
|
|
|
12
12
|
pip install vec-inf
|
|
13
13
|
```
|
|
14
14
|
|
|
15
|
-
Otherwise, we recommend using the provided [`Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.
|
|
15
|
+
Otherwise, we recommend using the provided [`Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
|
|
16
16
|
|
|
17
17
|
If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
|
|
18
18
|
* Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config), then install from source by running `pip install .`.
|
|
@@ -149,35 +149,52 @@ Since batch launches use heterogeneous jobs, users can request different partiti
|
|
|
149
149
|
|
|
150
150
|
### `status` command
|
|
151
151
|
|
|
152
|
-
You can check the
|
|
152
|
+
You can check the status of all inference servers launched through `vec-inf` by running the `status` command:
|
|
153
|
+
```bash
|
|
154
|
+
vec-inf status
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
And you should see an output like this:
|
|
158
|
+
```
|
|
159
|
+
┏━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
160
|
+
┃ Job ID ┃ Model Name ┃ Status ┃ Base URL ┃
|
|
161
|
+
┡━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
|
|
162
|
+
│ 1434429 │ Qwen3-8B │ READY │ http://gpu113:8080/v1 │
|
|
163
|
+
│ 1434584 │ Qwen3-14B │ READY │ http://gpu053:8080/v1 │
|
|
164
|
+
│ 1435035+0 │ Qwen3-32B │ PENDING │ UNAVAILABLE │
|
|
165
|
+
│ 1435035+1 │ Qwen3-14B │ PENDING │ UNAVAILABLE │
|
|
166
|
+
└───────────┴────────────┴─────────┴───────────────────────┘
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
If you want to check why a specific job is pending or failing, append the job ID to the status command:
|
|
153
170
|
|
|
154
171
|
```bash
|
|
155
|
-
vec-inf status
|
|
172
|
+
vec-inf status 1435035+1
|
|
156
173
|
```
|
|
157
174
|
|
|
158
175
|
If the server is pending for resources, you should see an output like this:
|
|
159
176
|
|
|
160
177
|
```
|
|
161
|
-
|
|
162
|
-
┃ Job Status ┃ Value
|
|
163
|
-
|
|
164
|
-
│ Model Name │
|
|
165
|
-
│ Model Status │ PENDING
|
|
166
|
-
│ Pending Reason │ Resources
|
|
167
|
-
│ Base URL │ UNAVAILABLE
|
|
168
|
-
|
|
178
|
+
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
|
|
179
|
+
┃ Job Status ┃ Value ┃
|
|
180
|
+
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
|
|
181
|
+
│ Model Name │ Qwen3-14B │
|
|
182
|
+
│ Model Status │ PENDING │
|
|
183
|
+
│ Pending Reason │ Resources │
|
|
184
|
+
│ Base URL │ UNAVAILABLE │
|
|
185
|
+
└────────────────┴─────────────┘
|
|
169
186
|
```
|
|
170
187
|
|
|
171
188
|
When the server is ready, you should see an output like this:
|
|
172
189
|
|
|
173
190
|
```
|
|
174
|
-
|
|
175
|
-
┃ Job Status ┃ Value
|
|
176
|
-
|
|
177
|
-
│ Model Name │
|
|
178
|
-
│ Model Status │ READY
|
|
179
|
-
│ Base URL │ http://
|
|
180
|
-
|
|
191
|
+
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
192
|
+
┃ Job Status ┃ Value ┃
|
|
193
|
+
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
|
|
194
|
+
│ Model Name │ Qwen3-14B │
|
|
195
|
+
│ Model Status │ READY │
|
|
196
|
+
│ Base URL │ http://gpu105:8080/v1 │
|
|
197
|
+
└──────────────┴───────────────────────┘
|
|
181
198
|
```
|
|
182
199
|
|
|
183
200
|
There are 5 possible states:
|
|
@@ -190,7 +207,7 @@ There are 5 possible states:
|
|
|
190
207
|
|
|
191
208
|
**Note**
|
|
192
209
|
* The base URL is only available when model is in `READY` state.
|
|
193
|
-
* For servers launched with `batch-launch`, the job ID should follow the format of "MAIN_JOB_ID+OFFSET" (e.g.
|
|
210
|
+
* For servers launched with `batch-launch`, the job ID should follow the format of "MAIN_JOB_ID+OFFSET" (e.g. 1435035+0, 1435035+1).
|
|
194
211
|
|
|
195
212
|
### `metrics` command
|
|
196
213
|
|
|
@@ -135,7 +135,7 @@ def test_list_single_model(runner):
|
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
def test_status_command(runner):
|
|
138
|
-
"""Test status command."""
|
|
138
|
+
"""Test status command with job ID argument."""
|
|
139
139
|
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
|
|
140
140
|
mock_client = MagicMock()
|
|
141
141
|
mock_client_class.return_value = mock_client
|
|
@@ -154,6 +154,111 @@ def test_status_command(runner):
|
|
|
154
154
|
assert "Meta-Llama-3.1-8B" in result.output
|
|
155
155
|
|
|
156
156
|
|
|
157
|
+
def test_status_command_no_job_id_no_running_jobs(runner):
|
|
158
|
+
"""Test status command with no argument when no jobs are running."""
|
|
159
|
+
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
|
|
160
|
+
mock_client = MagicMock()
|
|
161
|
+
mock_client_class.return_value = mock_client
|
|
162
|
+
mock_client.fetch_running_jobs.return_value = []
|
|
163
|
+
|
|
164
|
+
result = runner.invoke(cli, ["status"])
|
|
165
|
+
|
|
166
|
+
assert result.exit_code == 0
|
|
167
|
+
assert "No running jobs found." in result.output
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def test_status_command_no_job_id_single_running_job(runner):
|
|
171
|
+
"""Test status command with no argument when one job is running."""
|
|
172
|
+
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
|
|
173
|
+
mock_client = MagicMock()
|
|
174
|
+
mock_client_class.return_value = mock_client
|
|
175
|
+
mock_client.fetch_running_jobs.return_value = ["12345"]
|
|
176
|
+
|
|
177
|
+
mock_status = MagicMock()
|
|
178
|
+
mock_status.model_name = "test-model-1"
|
|
179
|
+
mock_status.server_status = "READY"
|
|
180
|
+
mock_status.base_url = "http://localhost:8000"
|
|
181
|
+
mock_status.pending_reason = None
|
|
182
|
+
mock_status.failed_reason = None
|
|
183
|
+
mock_client.get_status.return_value = mock_status
|
|
184
|
+
|
|
185
|
+
result = runner.invoke(cli, ["status"])
|
|
186
|
+
|
|
187
|
+
assert result.exit_code == 0
|
|
188
|
+
assert "test-model-1" in result.output
|
|
189
|
+
mock_client.fetch_running_jobs.assert_called_once()
|
|
190
|
+
mock_client.get_status.assert_called_once_with("12345")
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def test_status_command_no_job_id_multiple_running_jobs(runner):
|
|
194
|
+
"""Test status command with no argument when multiple jobs are running."""
|
|
195
|
+
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
|
|
196
|
+
mock_client = MagicMock()
|
|
197
|
+
mock_client_class.return_value = mock_client
|
|
198
|
+
mock_client.fetch_running_jobs.return_value = ["12345", "67890"]
|
|
199
|
+
|
|
200
|
+
mock_status_1 = MagicMock()
|
|
201
|
+
mock_status_1.model_name = "test-model-1"
|
|
202
|
+
mock_status_1.server_status = "READY"
|
|
203
|
+
mock_status_1.base_url = "http://localhost:8000"
|
|
204
|
+
mock_status_1.pending_reason = None
|
|
205
|
+
mock_status_1.failed_reason = None
|
|
206
|
+
|
|
207
|
+
mock_status_2 = MagicMock()
|
|
208
|
+
mock_status_2.model_name = "test-model-2"
|
|
209
|
+
mock_status_2.server_status = "PENDING"
|
|
210
|
+
mock_status_2.base_url = None
|
|
211
|
+
mock_status_2.pending_reason = "Waiting for resources"
|
|
212
|
+
mock_status_2.failed_reason = None
|
|
213
|
+
|
|
214
|
+
mock_client.get_status.side_effect = [mock_status_1, mock_status_2]
|
|
215
|
+
|
|
216
|
+
result = runner.invoke(cli, ["status"])
|
|
217
|
+
|
|
218
|
+
assert result.exit_code == 0
|
|
219
|
+
assert "test-model-1" in result.output
|
|
220
|
+
assert "test-model-2" in result.output
|
|
221
|
+
assert "12345" in result.output
|
|
222
|
+
assert "67890" in result.output
|
|
223
|
+
mock_client.fetch_running_jobs.assert_called_once()
|
|
224
|
+
assert mock_client.get_status.call_count == 2
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def test_status_command_no_job_id_multiple_jobs_json_mode(runner):
|
|
228
|
+
"""Test status command with no argument and JSON mode for multiple jobs."""
|
|
229
|
+
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
|
|
230
|
+
mock_client = MagicMock()
|
|
231
|
+
mock_client_class.return_value = mock_client
|
|
232
|
+
mock_client.fetch_running_jobs.return_value = ["12345", "67890"]
|
|
233
|
+
|
|
234
|
+
mock_status_1 = MagicMock()
|
|
235
|
+
mock_status_1.model_name = "test-model-1"
|
|
236
|
+
mock_status_1.server_status = "READY"
|
|
237
|
+
mock_status_1.base_url = "http://localhost:8000"
|
|
238
|
+
mock_status_1.pending_reason = None
|
|
239
|
+
mock_status_1.failed_reason = None
|
|
240
|
+
|
|
241
|
+
mock_status_2 = MagicMock()
|
|
242
|
+
mock_status_2.model_name = "test-model-2"
|
|
243
|
+
mock_status_2.server_status = "FAILED"
|
|
244
|
+
mock_status_2.base_url = None
|
|
245
|
+
mock_status_2.pending_reason = None
|
|
246
|
+
mock_status_2.failed_reason = "Out of memory"
|
|
247
|
+
|
|
248
|
+
mock_client.get_status.side_effect = [mock_status_1, mock_status_2]
|
|
249
|
+
|
|
250
|
+
result = runner.invoke(cli, ["status", "--json-mode"])
|
|
251
|
+
|
|
252
|
+
assert result.exit_code == 0
|
|
253
|
+
output = json.loads(result.output)
|
|
254
|
+
assert isinstance(output, list)
|
|
255
|
+
assert len(output) == 2
|
|
256
|
+
assert output[0]["model_name"] == "test-model-1"
|
|
257
|
+
assert output[0]["model_status"] == "READY"
|
|
258
|
+
assert output[1]["model_name"] == "test-model-2"
|
|
259
|
+
assert output[1]["model_status"] == "FAILED"
|
|
260
|
+
|
|
261
|
+
|
|
157
262
|
def test_shutdown_command(runner):
|
|
158
263
|
"""Test shutdown command."""
|
|
159
264
|
with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
|
|
@@ -10,6 +10,7 @@ from vec_inf.cli._helper import (
|
|
|
10
10
|
BatchLaunchResponseFormatter,
|
|
11
11
|
LaunchResponseFormatter,
|
|
12
12
|
ListCmdDisplay,
|
|
13
|
+
ListStatusDisplay,
|
|
13
14
|
MetricsResponseFormatter,
|
|
14
15
|
StatusResponseFormatter,
|
|
15
16
|
)
|
|
@@ -521,3 +522,251 @@ class TestListCmdDisplay:
|
|
|
521
522
|
with patch.object(console, "print") as mock_print:
|
|
522
523
|
display.display_all_models_output(model_infos)
|
|
523
524
|
mock_print.assert_called_once()
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
class TestListStatusDisplay:
|
|
528
|
+
"""Test cases for ListStatusDisplay."""
|
|
529
|
+
|
|
530
|
+
def test_init(self):
|
|
531
|
+
"""Test ListStatusDisplay initialization."""
|
|
532
|
+
job_ids = ["12345", "67890"]
|
|
533
|
+
statuses = [
|
|
534
|
+
StatusResponse(
|
|
535
|
+
model_name="test-model-1",
|
|
536
|
+
log_dir="/tmp/logs",
|
|
537
|
+
server_status="READY",
|
|
538
|
+
job_state="RUNNING",
|
|
539
|
+
raw_output="JobState=RUNNING",
|
|
540
|
+
base_url="http://localhost:8000",
|
|
541
|
+
pending_reason=None,
|
|
542
|
+
failed_reason=None,
|
|
543
|
+
),
|
|
544
|
+
StatusResponse(
|
|
545
|
+
model_name="test-model-2",
|
|
546
|
+
log_dir="/tmp/logs",
|
|
547
|
+
server_status="PENDING",
|
|
548
|
+
job_state="PENDING",
|
|
549
|
+
raw_output="JobState=PENDING",
|
|
550
|
+
base_url=None,
|
|
551
|
+
pending_reason="Waiting for resources",
|
|
552
|
+
failed_reason=None,
|
|
553
|
+
),
|
|
554
|
+
]
|
|
555
|
+
|
|
556
|
+
display = ListStatusDisplay(job_ids, statuses, json_mode=False)
|
|
557
|
+
|
|
558
|
+
assert display.job_ids == job_ids
|
|
559
|
+
assert display.statuses == statuses
|
|
560
|
+
assert display.json_mode is False
|
|
561
|
+
assert isinstance(display.table, Table)
|
|
562
|
+
|
|
563
|
+
def test_init_json_mode(self):
|
|
564
|
+
"""Test ListStatusDisplay initialization with JSON mode."""
|
|
565
|
+
job_ids = ["12345"]
|
|
566
|
+
statuses = [
|
|
567
|
+
StatusResponse(
|
|
568
|
+
model_name="test-model",
|
|
569
|
+
log_dir="/tmp/logs",
|
|
570
|
+
server_status="READY",
|
|
571
|
+
job_state="RUNNING",
|
|
572
|
+
raw_output="JobState=RUNNING",
|
|
573
|
+
base_url="http://localhost:8000",
|
|
574
|
+
pending_reason=None,
|
|
575
|
+
failed_reason=None,
|
|
576
|
+
)
|
|
577
|
+
]
|
|
578
|
+
|
|
579
|
+
display = ListStatusDisplay(job_ids, statuses, json_mode=True)
|
|
580
|
+
|
|
581
|
+
assert display.json_mode is True
|
|
582
|
+
|
|
583
|
+
def test_display_multiple_status_output_table_mode(self):
|
|
584
|
+
"""Test displaying multiple statuses in table mode."""
|
|
585
|
+
console = Console()
|
|
586
|
+
job_ids = ["12345", "67890"]
|
|
587
|
+
statuses = [
|
|
588
|
+
StatusResponse(
|
|
589
|
+
model_name="test-model-1",
|
|
590
|
+
log_dir="/tmp/logs",
|
|
591
|
+
server_status="READY",
|
|
592
|
+
job_state="RUNNING",
|
|
593
|
+
raw_output="JobState=RUNNING",
|
|
594
|
+
base_url="http://localhost:8000",
|
|
595
|
+
pending_reason=None,
|
|
596
|
+
failed_reason=None,
|
|
597
|
+
),
|
|
598
|
+
StatusResponse(
|
|
599
|
+
model_name="test-model-2",
|
|
600
|
+
log_dir="/tmp/logs",
|
|
601
|
+
server_status="PENDING",
|
|
602
|
+
job_state="PENDING",
|
|
603
|
+
raw_output="JobState=PENDING",
|
|
604
|
+
base_url=None,
|
|
605
|
+
pending_reason="Waiting for resources",
|
|
606
|
+
failed_reason=None,
|
|
607
|
+
),
|
|
608
|
+
]
|
|
609
|
+
|
|
610
|
+
display = ListStatusDisplay(job_ids, statuses, json_mode=False)
|
|
611
|
+
|
|
612
|
+
with patch.object(console, "print") as mock_print:
|
|
613
|
+
display.display_multiple_status_output(console)
|
|
614
|
+
mock_print.assert_called_once()
|
|
615
|
+
# Verify the table was printed
|
|
616
|
+
assert mock_print.call_args[0][0] == display.table
|
|
617
|
+
|
|
618
|
+
def test_display_multiple_status_output_json_mode(self):
|
|
619
|
+
"""Test displaying multiple statuses in JSON mode."""
|
|
620
|
+
console = Console()
|
|
621
|
+
job_ids = ["12345", "67890"]
|
|
622
|
+
statuses = [
|
|
623
|
+
StatusResponse(
|
|
624
|
+
model_name="test-model-1",
|
|
625
|
+
log_dir="/tmp/logs",
|
|
626
|
+
server_status="READY",
|
|
627
|
+
job_state="RUNNING",
|
|
628
|
+
raw_output="JobState=RUNNING",
|
|
629
|
+
base_url="http://localhost:8000",
|
|
630
|
+
pending_reason=None,
|
|
631
|
+
failed_reason=None,
|
|
632
|
+
),
|
|
633
|
+
StatusResponse(
|
|
634
|
+
model_name="test-model-2",
|
|
635
|
+
log_dir="/tmp/logs",
|
|
636
|
+
server_status="FAILED",
|
|
637
|
+
job_state="FAILED",
|
|
638
|
+
raw_output="JobState=FAILED",
|
|
639
|
+
base_url=None,
|
|
640
|
+
pending_reason=None,
|
|
641
|
+
failed_reason="Out of memory",
|
|
642
|
+
),
|
|
643
|
+
]
|
|
644
|
+
|
|
645
|
+
display = ListStatusDisplay(job_ids, statuses, json_mode=True)
|
|
646
|
+
|
|
647
|
+
with patch("click.echo") as mock_echo:
|
|
648
|
+
display.display_multiple_status_output(console)
|
|
649
|
+
mock_echo.assert_called_once()
|
|
650
|
+
|
|
651
|
+
# Verify JSON output
|
|
652
|
+
output = mock_echo.call_args[0][0]
|
|
653
|
+
json_data = json.loads(output)
|
|
654
|
+
assert isinstance(json_data, list)
|
|
655
|
+
assert len(json_data) == 2
|
|
656
|
+
assert json_data[0]["model_name"] == "test-model-1"
|
|
657
|
+
assert json_data[0]["model_status"] == "READY"
|
|
658
|
+
assert json_data[0]["base_url"] == "http://localhost:8000"
|
|
659
|
+
assert json_data[1]["model_name"] == "test-model-2"
|
|
660
|
+
assert json_data[1]["model_status"] == "FAILED"
|
|
661
|
+
assert json_data[1]["base_url"] is None
|
|
662
|
+
|
|
663
|
+
def test_display_multiple_status_output_empty_list(self):
|
|
664
|
+
"""Test displaying empty status list."""
|
|
665
|
+
console = Console()
|
|
666
|
+
job_ids = []
|
|
667
|
+
statuses = []
|
|
668
|
+
|
|
669
|
+
display = ListStatusDisplay(job_ids, statuses, json_mode=False)
|
|
670
|
+
|
|
671
|
+
with patch.object(console, "print") as mock_print:
|
|
672
|
+
display.display_multiple_status_output(console)
|
|
673
|
+
mock_print.assert_called_once()
|
|
674
|
+
|
|
675
|
+
def test_display_multiple_status_output_empty_list_json(self):
|
|
676
|
+
"""Test displaying empty status list in JSON mode."""
|
|
677
|
+
console = Console()
|
|
678
|
+
job_ids = []
|
|
679
|
+
statuses = []
|
|
680
|
+
|
|
681
|
+
display = ListStatusDisplay(job_ids, statuses, json_mode=True)
|
|
682
|
+
|
|
683
|
+
with patch("click.echo") as mock_echo:
|
|
684
|
+
display.display_multiple_status_output(console)
|
|
685
|
+
mock_echo.assert_called_once()
|
|
686
|
+
|
|
687
|
+
output = mock_echo.call_args[0][0]
|
|
688
|
+
json_data = json.loads(output)
|
|
689
|
+
assert isinstance(json_data, list)
|
|
690
|
+
assert len(json_data) == 0
|
|
691
|
+
|
|
692
|
+
def test_display_multiple_status_output_single_status(self):
|
|
693
|
+
"""Test displaying single status."""
|
|
694
|
+
console = Console()
|
|
695
|
+
job_ids = ["12345"]
|
|
696
|
+
statuses = [
|
|
697
|
+
StatusResponse(
|
|
698
|
+
model_name="single-model",
|
|
699
|
+
log_dir="/tmp/logs",
|
|
700
|
+
server_status="READY",
|
|
701
|
+
job_state="RUNNING",
|
|
702
|
+
raw_output="JobState=RUNNING",
|
|
703
|
+
base_url="http://localhost:8000",
|
|
704
|
+
pending_reason=None,
|
|
705
|
+
failed_reason=None,
|
|
706
|
+
)
|
|
707
|
+
]
|
|
708
|
+
|
|
709
|
+
display = ListStatusDisplay(job_ids, statuses, json_mode=False)
|
|
710
|
+
|
|
711
|
+
with patch.object(console, "print") as mock_print:
|
|
712
|
+
display.display_multiple_status_output(console)
|
|
713
|
+
mock_print.assert_called_once()
|
|
714
|
+
# Verify table has one row
|
|
715
|
+
assert len(display.table.rows) == 1
|
|
716
|
+
|
|
717
|
+
def test_display_multiple_status_output_with_none_base_url(self):
|
|
718
|
+
"""Test displaying statuses with None base_url."""
|
|
719
|
+
console = Console()
|
|
720
|
+
job_ids = ["12345"]
|
|
721
|
+
statuses = [
|
|
722
|
+
StatusResponse(
|
|
723
|
+
model_name="pending-model",
|
|
724
|
+
log_dir="/tmp/logs",
|
|
725
|
+
server_status="PENDING",
|
|
726
|
+
job_state="PENDING",
|
|
727
|
+
raw_output="JobState=PENDING",
|
|
728
|
+
base_url=None,
|
|
729
|
+
pending_reason="Resource allocation",
|
|
730
|
+
failed_reason=None,
|
|
731
|
+
)
|
|
732
|
+
]
|
|
733
|
+
|
|
734
|
+
display = ListStatusDisplay(job_ids, statuses, json_mode=False)
|
|
735
|
+
|
|
736
|
+
with patch.object(console, "print") as mock_print:
|
|
737
|
+
display.display_multiple_status_output(console)
|
|
738
|
+
mock_print.assert_called_once()
|
|
739
|
+
# Verify the row was added (None base_url should be handled gracefully)
|
|
740
|
+
assert len(display.table.rows) == 1
|
|
741
|
+
# Verify table has correct number of columns
|
|
742
|
+
assert (
|
|
743
|
+
len(display.table.columns) == 4
|
|
744
|
+
) # Job ID, Model Name, Status, Base URL
|
|
745
|
+
|
|
746
|
+
def test_display_multiple_status_output_json_with_none_values(self):
|
|
747
|
+
"""Test JSON output with None values."""
|
|
748
|
+
console = Console()
|
|
749
|
+
job_ids = ["12345"]
|
|
750
|
+
statuses = [
|
|
751
|
+
StatusResponse(
|
|
752
|
+
model_name="pending-model",
|
|
753
|
+
log_dir="/tmp/logs",
|
|
754
|
+
server_status="PENDING",
|
|
755
|
+
job_state="PENDING",
|
|
756
|
+
raw_output="JobState=PENDING",
|
|
757
|
+
base_url=None,
|
|
758
|
+
pending_reason="Waiting",
|
|
759
|
+
failed_reason=None,
|
|
760
|
+
)
|
|
761
|
+
]
|
|
762
|
+
|
|
763
|
+
display = ListStatusDisplay(job_ids, statuses, json_mode=True)
|
|
764
|
+
|
|
765
|
+
with patch("click.echo") as mock_echo:
|
|
766
|
+
display.display_multiple_status_output(console)
|
|
767
|
+
mock_echo.assert_called_once()
|
|
768
|
+
|
|
769
|
+
output = mock_echo.call_args[0][0]
|
|
770
|
+
json_data = json.loads(output)
|
|
771
|
+
assert json_data[0]["base_url"] is None
|
|
772
|
+
assert json_data[0]["model_status"] == "PENDING"
|