vec-inf 0.7.2__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/workflows/docker.yml +7 -2
  2. {vec_inf-0.7.2 → vec_inf-0.7.3}/.pre-commit-config.yaml +1 -1
  3. {vec_inf-0.7.2 → vec_inf-0.7.3}/PKG-INFO +4 -4
  4. {vec_inf-0.7.2 → vec_inf-0.7.3}/README.md +3 -3
  5. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/index.md +1 -1
  6. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/user_guide.md +35 -18
  7. {vec_inf-0.7.2 → vec_inf-0.7.3}/pyproject.toml +1 -1
  8. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/cli/test_cli.py +106 -1
  9. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/cli/test_helper.py +249 -0
  10. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_api.py +186 -0
  11. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_slurm_script_generator.py +6 -5
  12. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/README.md +2 -1
  13. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/cli/_cli.py +24 -9
  14. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/cli/_helper.py +56 -0
  15. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_helper.py +14 -5
  16. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_slurm_script_generator.py +24 -13
  17. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_slurm_templates.py +10 -12
  18. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_utils.py +4 -1
  19. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/api.py +47 -0
  20. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/config/models.yaml +4 -5
  21. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/find_port.sh +10 -1
  22. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  23. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  24. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  25. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/ISSUE_TEMPLATE/model-request.md +0 -0
  26. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/dependabot.yml +0 -0
  27. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/pull_request_template.md +0 -0
  28. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/workflows/code_checks.yml +0 -0
  29. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/workflows/docs.yml +0 -0
  30. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/workflows/publish.yml +0 -0
  31. {vec_inf-0.7.2 → vec_inf-0.7.3}/.github/workflows/unit_tests.yml +0 -0
  32. {vec_inf-0.7.2 → vec_inf-0.7.3}/.gitignore +0 -0
  33. {vec_inf-0.7.2 → vec_inf-0.7.3}/.python-version +0 -0
  34. {vec_inf-0.7.2 → vec_inf-0.7.3}/Dockerfile +0 -0
  35. {vec_inf-0.7.2 → vec_inf-0.7.3}/LICENSE +0 -0
  36. {vec_inf-0.7.2 → vec_inf-0.7.3}/MODEL_TRACKING.md +0 -0
  37. {vec_inf-0.7.2 → vec_inf-0.7.3}/codecov.yml +0 -0
  38. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/Makefile +0 -0
  39. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/api.md +0 -0
  40. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/assets/favicon-48x48.svg +0 -0
  41. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/assets/favicon.ico +0 -0
  42. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/assets/vector-logo.svg +0 -0
  43. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/contributing.md +0 -0
  44. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/make.bat +0 -0
  45. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/overrides/partials/copyright.html +0 -0
  46. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/overrides/partials/logo.html +0 -0
  47. {vec_inf-0.7.2 → vec_inf-0.7.3}/docs/stylesheets/extra.css +0 -0
  48. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/README.md +0 -0
  49. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/api/basic_usage.py +0 -0
  50. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/inference/llm/chat_completions.py +0 -0
  51. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/inference/llm/completions.py +0 -0
  52. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/inference/llm/completions.sh +0 -0
  53. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/inference/text_embedding/embeddings.py +0 -0
  54. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/inference/vlm/vision_completions.py +0 -0
  55. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/logits/logits.py +0 -0
  56. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/slurm_dependency/README.md +0 -0
  57. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/slurm_dependency/downstream_job.sbatch +0 -0
  58. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/slurm_dependency/run_downstream.py +0 -0
  59. {vec_inf-0.7.2 → vec_inf-0.7.3}/examples/slurm_dependency/run_workflow.sh +0 -0
  60. {vec_inf-0.7.2 → vec_inf-0.7.3}/mkdocs.yml +0 -0
  61. {vec_inf-0.7.2 → vec_inf-0.7.3}/profile/avg_throughput.py +0 -0
  62. {vec_inf-0.7.2 → vec_inf-0.7.3}/profile/gen.py +0 -0
  63. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/__init__.py +0 -0
  64. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/test_imports.py +0 -0
  65. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/__init__.py +0 -0
  66. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/cli/__init__.py +0 -0
  67. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/cli/test_utils.py +0 -0
  68. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/__init__.py +0 -0
  69. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_examples.py +0 -0
  70. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_helper.py +0 -0
  71. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_models.py +0 -0
  72. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_utils.py +0 -0
  73. {vec_inf-0.7.2 → vec_inf-0.7.3}/tests/vec_inf/client/test_vars.env +0 -0
  74. {vec_inf-0.7.2 → vec_inf-0.7.3}/uv.lock +0 -0
  75. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/__init__.py +0 -0
  76. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/cli/__init__.py +0 -0
  77. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/cli/_utils.py +0 -0
  78. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/cli/_vars.py +0 -0
  79. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/__init__.py +0 -0
  80. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_client_vars.py +0 -0
  81. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_exceptions.py +0 -0
  82. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/_slurm_vars.py +0 -0
  83. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/config.py +0 -0
  84. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/client/models.py +0 -0
  85. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/config/README.md +0 -0
  86. {vec_inf-0.7.2 → vec_inf-0.7.3}/vec_inf/config/environment.yaml +0 -0
  87. {vec_inf-0.7.2 → vec_inf-0.7.3}/venv.sh +0 -0
@@ -21,7 +21,9 @@ on:
21
21
  jobs:
22
22
  push_to_registry:
23
23
  name: Push Docker image to Docker Hub
24
- runs-on: ubuntu-latest
24
+ runs-on:
25
+ - self-hosted
26
+ - docker
25
27
  steps:
26
28
  - name: Checkout repository
27
29
  uses: actions/checkout@v5.0.0
@@ -32,6 +34,9 @@ jobs:
32
34
  VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2)
33
35
  echo "version=$VERSION" >> $GITHUB_OUTPUT
34
36
 
37
+ - name: Set up Docker Buildx
38
+ uses: docker/setup-buildx-action@v3
39
+
35
40
  - name: Log in to Docker Hub
36
41
  uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
37
42
  with:
@@ -40,7 +45,7 @@ jobs:
40
45
 
41
46
  - name: Extract metadata (tags, labels) for Docker
42
47
  id: meta
43
- uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f
48
+ uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893
44
49
  with:
45
50
  images: vectorinstitute/vector-inference
46
51
 
@@ -17,7 +17,7 @@ repos:
17
17
  - id: check-toml
18
18
 
19
19
  - repo: https://github.com/astral-sh/ruff-pre-commit
20
- rev: 'v0.14.3'
20
+ rev: 'v0.14.5'
21
21
  hooks:
22
22
  - id: ruff
23
23
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vec-inf
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: Efficient LLM inference on Slurm clusters using vLLM.
5
5
  Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
6
6
  License-Expression: MIT
@@ -30,7 +30,7 @@ Description-Content-Type: text/markdown
30
30
  [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
31
31
  [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
32
32
  [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
33
- [![vLLM](https://img.shields.io/badge/vLLM-0.10.1.1-blue)](https://docs.vllm.ai/en/v0.10.1.1/)
33
+ [![vLLM](https://img.shields.io/badge/vLLM-0.11.0-blue)](https://docs.vllm.ai/en/v0.11.0/)
34
34
  ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
35
35
 
36
36
  This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
@@ -43,7 +43,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
43
43
  ```bash
44
44
  pip install vec-inf
45
45
  ```
46
- Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.10.1.1`.
46
+ Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
47
47
 
48
48
  If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
49
49
  * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
@@ -76,7 +76,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
76
76
  #### Other commands
77
77
 
78
78
  * `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
79
- * `status`: Check the model status by providing its Slurm job ID.
79
+ * `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
80
80
  * `metrics`: Streams performance metrics to the console.
81
81
  * `shutdown`: Shutdown a model by providing its Slurm job ID.
82
82
  * `list`: List all available model names, or view the default/cached configuration of a specific model.
@@ -7,7 +7,7 @@
7
7
  [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
8
8
  [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
9
9
  [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
10
- [![vLLM](https://img.shields.io/badge/vLLM-0.10.1.1-blue)](https://docs.vllm.ai/en/v0.10.1.1/)
10
+ [![vLLM](https://img.shields.io/badge/vLLM-0.11.0-blue)](https://docs.vllm.ai/en/v0.11.0/)
11
11
  ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
12
12
 
13
13
  This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
@@ -20,7 +20,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
20
20
  ```bash
21
21
  pip install vec-inf
22
22
  ```
23
- Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.10.1.1`.
23
+ Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
24
24
 
25
25
  If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
26
26
  * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
@@ -53,7 +53,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
53
53
  #### Other commands
54
54
 
55
55
  * `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
56
- * `status`: Check the model status by providing its Slurm job ID.
56
+ * `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
57
57
  * `metrics`: Streams performance metrics to the console.
58
58
  * `shutdown`: Shutdown a model by providing its Slurm job ID.
59
59
  * `list`: List all available model names, or view the default/cached configuration of a specific model.
@@ -12,7 +12,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
12
12
  pip install vec-inf
13
13
  ```
14
14
 
15
- Otherwise, we recommend using the provided [`Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.10.1.1`.
15
+ Otherwise, we recommend using the provided [`Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
16
16
 
17
17
  If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
18
18
  * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config), then install from source by running `pip install .`.
@@ -149,35 +149,52 @@ Since batch launches use heterogeneous jobs, users can request different partiti
149
149
 
150
150
  ### `status` command
151
151
 
152
- You can check the inference server status by providing the Slurm job ID to the `status` command:
152
+ You can check the status of all inference servers launched through `vec-inf` by running the `status` command:
153
+ ```bash
154
+ vec-inf status
155
+ ```
156
+
157
+ And you should see an output like this:
158
+ ```
159
+ ┏━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
160
+ ┃ Job ID ┃ Model Name ┃ Status ┃ Base URL ┃
161
+ ┡━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
162
+ │ 1434429 │ Qwen3-8B │ READY │ http://gpu113:8080/v1 │
163
+ │ 1434584 │ Qwen3-14B │ READY │ http://gpu053:8080/v1 │
164
+ │ 1435035+0 │ Qwen3-32B │ PENDING │ UNAVAILABLE │
165
+ │ 1435035+1 │ Qwen3-14B │ PENDING │ UNAVAILABLE │
166
+ └───────────┴────────────┴─────────┴───────────────────────┘
167
+ ```
168
+
169
+ If you want to check why a specific job is pending or failing, append the job ID to the status command:
153
170
 
154
171
  ```bash
155
- vec-inf status 15373800
172
+ vec-inf status 1435035+1
156
173
  ```
157
174
 
158
175
  If the server is pending for resources, you should see an output like this:
159
176
 
160
177
  ```
161
- ┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
162
- ┃ Job Status ┃ Value
163
- ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
164
- │ Model Name │ Meta-Llama-3.1-8B-Instruct
165
- │ Model Status │ PENDING
166
- │ Pending Reason │ Resources
167
- │ Base URL │ UNAVAILABLE
168
- └────────────────┴────────────────────────────┘
178
+ ┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
179
+ ┃ Job Status ┃ Value
180
+ ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
181
+ │ Model Name │ Qwen3-14B
182
+ │ Model Status │ PENDING
183
+ │ Pending Reason │ Resources
184
+ │ Base URL │ UNAVAILABLE
185
+ └────────────────┴─────────────┘
169
186
  ```
170
187
 
171
188
  When the server is ready, you should see an output like this:
172
189
 
173
190
  ```
174
- ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
175
- ┃ Job Status ┃ Value
176
- ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
177
- │ Model Name │ Meta-Llama-3.1-8B-Instruct
178
- │ Model Status │ READY
179
- │ Base URL │ http://gpu042:8080/v1
180
- └──────────────┴────────────────────────────┘
191
+ ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓
192
+ ┃ Job Status ┃ Value
193
+ ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩
194
+ │ Model Name │ Qwen3-14B
195
+ │ Model Status │ READY
196
+ │ Base URL │ http://gpu105:8080/v1
197
+ └──────────────┴───────────────────────┘
181
198
  ```
182
199
 
183
200
  There are 5 possible states:
@@ -190,7 +207,7 @@ There are 5 possible states:
190
207
 
191
208
  **Note**
192
209
  * The base URL is only available when model is in `READY` state.
193
- * For servers launched with `batch-launch`, the job ID should follow the format of "MAIN_JOB_ID+OFFSET" (e.g. 17480109+0, 17480109+1).
210
+ * For servers launched with `batch-launch`, the job ID should follow the format of "MAIN_JOB_ID+OFFSET" (e.g. 1435035+0, 1435035+1).
194
211
 
195
212
  ### `metrics` command
196
213
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "vec-inf"
3
- version = "0.7.2"
3
+ version = "0.7.3"
4
4
  description = "Efficient LLM inference on Slurm clusters using vLLM."
5
5
  readme = "README.md"
6
6
  authors = [{name = "Marshall Wang", email = "marshall.wang@vectorinstitute.ai"}]
@@ -135,7 +135,7 @@ def test_list_single_model(runner):
135
135
 
136
136
 
137
137
  def test_status_command(runner):
138
- """Test status command."""
138
+ """Test status command with job ID argument."""
139
139
  with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
140
140
  mock_client = MagicMock()
141
141
  mock_client_class.return_value = mock_client
@@ -154,6 +154,111 @@ def test_status_command(runner):
154
154
  assert "Meta-Llama-3.1-8B" in result.output
155
155
 
156
156
 
157
+ def test_status_command_no_job_id_no_running_jobs(runner):
158
+ """Test status command with no argument when no jobs are running."""
159
+ with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
160
+ mock_client = MagicMock()
161
+ mock_client_class.return_value = mock_client
162
+ mock_client.fetch_running_jobs.return_value = []
163
+
164
+ result = runner.invoke(cli, ["status"])
165
+
166
+ assert result.exit_code == 0
167
+ assert "No running jobs found." in result.output
168
+
169
+
170
+ def test_status_command_no_job_id_single_running_job(runner):
171
+ """Test status command with no argument when one job is running."""
172
+ with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
173
+ mock_client = MagicMock()
174
+ mock_client_class.return_value = mock_client
175
+ mock_client.fetch_running_jobs.return_value = ["12345"]
176
+
177
+ mock_status = MagicMock()
178
+ mock_status.model_name = "test-model-1"
179
+ mock_status.server_status = "READY"
180
+ mock_status.base_url = "http://localhost:8000"
181
+ mock_status.pending_reason = None
182
+ mock_status.failed_reason = None
183
+ mock_client.get_status.return_value = mock_status
184
+
185
+ result = runner.invoke(cli, ["status"])
186
+
187
+ assert result.exit_code == 0
188
+ assert "test-model-1" in result.output
189
+ mock_client.fetch_running_jobs.assert_called_once()
190
+ mock_client.get_status.assert_called_once_with("12345")
191
+
192
+
193
+ def test_status_command_no_job_id_multiple_running_jobs(runner):
194
+ """Test status command with no argument when multiple jobs are running."""
195
+ with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
196
+ mock_client = MagicMock()
197
+ mock_client_class.return_value = mock_client
198
+ mock_client.fetch_running_jobs.return_value = ["12345", "67890"]
199
+
200
+ mock_status_1 = MagicMock()
201
+ mock_status_1.model_name = "test-model-1"
202
+ mock_status_1.server_status = "READY"
203
+ mock_status_1.base_url = "http://localhost:8000"
204
+ mock_status_1.pending_reason = None
205
+ mock_status_1.failed_reason = None
206
+
207
+ mock_status_2 = MagicMock()
208
+ mock_status_2.model_name = "test-model-2"
209
+ mock_status_2.server_status = "PENDING"
210
+ mock_status_2.base_url = None
211
+ mock_status_2.pending_reason = "Waiting for resources"
212
+ mock_status_2.failed_reason = None
213
+
214
+ mock_client.get_status.side_effect = [mock_status_1, mock_status_2]
215
+
216
+ result = runner.invoke(cli, ["status"])
217
+
218
+ assert result.exit_code == 0
219
+ assert "test-model-1" in result.output
220
+ assert "test-model-2" in result.output
221
+ assert "12345" in result.output
222
+ assert "67890" in result.output
223
+ mock_client.fetch_running_jobs.assert_called_once()
224
+ assert mock_client.get_status.call_count == 2
225
+
226
+
227
+ def test_status_command_no_job_id_multiple_jobs_json_mode(runner):
228
+ """Test status command with no argument and JSON mode for multiple jobs."""
229
+ with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
230
+ mock_client = MagicMock()
231
+ mock_client_class.return_value = mock_client
232
+ mock_client.fetch_running_jobs.return_value = ["12345", "67890"]
233
+
234
+ mock_status_1 = MagicMock()
235
+ mock_status_1.model_name = "test-model-1"
236
+ mock_status_1.server_status = "READY"
237
+ mock_status_1.base_url = "http://localhost:8000"
238
+ mock_status_1.pending_reason = None
239
+ mock_status_1.failed_reason = None
240
+
241
+ mock_status_2 = MagicMock()
242
+ mock_status_2.model_name = "test-model-2"
243
+ mock_status_2.server_status = "FAILED"
244
+ mock_status_2.base_url = None
245
+ mock_status_2.pending_reason = None
246
+ mock_status_2.failed_reason = "Out of memory"
247
+
248
+ mock_client.get_status.side_effect = [mock_status_1, mock_status_2]
249
+
250
+ result = runner.invoke(cli, ["status", "--json-mode"])
251
+
252
+ assert result.exit_code == 0
253
+ output = json.loads(result.output)
254
+ assert isinstance(output, list)
255
+ assert len(output) == 2
256
+ assert output[0]["model_name"] == "test-model-1"
257
+ assert output[0]["model_status"] == "READY"
258
+ assert output[1]["model_name"] == "test-model-2"
259
+ assert output[1]["model_status"] == "FAILED"
260
+
261
+
157
262
  def test_shutdown_command(runner):
158
263
  """Test shutdown command."""
159
264
  with patch("vec_inf.cli._cli.VecInfClient") as mock_client_class:
@@ -10,6 +10,7 @@ from vec_inf.cli._helper import (
10
10
  BatchLaunchResponseFormatter,
11
11
  LaunchResponseFormatter,
12
12
  ListCmdDisplay,
13
+ ListStatusDisplay,
13
14
  MetricsResponseFormatter,
14
15
  StatusResponseFormatter,
15
16
  )
@@ -521,3 +522,251 @@ class TestListCmdDisplay:
521
522
  with patch.object(console, "print") as mock_print:
522
523
  display.display_all_models_output(model_infos)
523
524
  mock_print.assert_called_once()
525
+
526
+
527
+ class TestListStatusDisplay:
528
+ """Test cases for ListStatusDisplay."""
529
+
530
+ def test_init(self):
531
+ """Test ListStatusDisplay initialization."""
532
+ job_ids = ["12345", "67890"]
533
+ statuses = [
534
+ StatusResponse(
535
+ model_name="test-model-1",
536
+ log_dir="/tmp/logs",
537
+ server_status="READY",
538
+ job_state="RUNNING",
539
+ raw_output="JobState=RUNNING",
540
+ base_url="http://localhost:8000",
541
+ pending_reason=None,
542
+ failed_reason=None,
543
+ ),
544
+ StatusResponse(
545
+ model_name="test-model-2",
546
+ log_dir="/tmp/logs",
547
+ server_status="PENDING",
548
+ job_state="PENDING",
549
+ raw_output="JobState=PENDING",
550
+ base_url=None,
551
+ pending_reason="Waiting for resources",
552
+ failed_reason=None,
553
+ ),
554
+ ]
555
+
556
+ display = ListStatusDisplay(job_ids, statuses, json_mode=False)
557
+
558
+ assert display.job_ids == job_ids
559
+ assert display.statuses == statuses
560
+ assert display.json_mode is False
561
+ assert isinstance(display.table, Table)
562
+
563
+ def test_init_json_mode(self):
564
+ """Test ListStatusDisplay initialization with JSON mode."""
565
+ job_ids = ["12345"]
566
+ statuses = [
567
+ StatusResponse(
568
+ model_name="test-model",
569
+ log_dir="/tmp/logs",
570
+ server_status="READY",
571
+ job_state="RUNNING",
572
+ raw_output="JobState=RUNNING",
573
+ base_url="http://localhost:8000",
574
+ pending_reason=None,
575
+ failed_reason=None,
576
+ )
577
+ ]
578
+
579
+ display = ListStatusDisplay(job_ids, statuses, json_mode=True)
580
+
581
+ assert display.json_mode is True
582
+
583
+ def test_display_multiple_status_output_table_mode(self):
584
+ """Test displaying multiple statuses in table mode."""
585
+ console = Console()
586
+ job_ids = ["12345", "67890"]
587
+ statuses = [
588
+ StatusResponse(
589
+ model_name="test-model-1",
590
+ log_dir="/tmp/logs",
591
+ server_status="READY",
592
+ job_state="RUNNING",
593
+ raw_output="JobState=RUNNING",
594
+ base_url="http://localhost:8000",
595
+ pending_reason=None,
596
+ failed_reason=None,
597
+ ),
598
+ StatusResponse(
599
+ model_name="test-model-2",
600
+ log_dir="/tmp/logs",
601
+ server_status="PENDING",
602
+ job_state="PENDING",
603
+ raw_output="JobState=PENDING",
604
+ base_url=None,
605
+ pending_reason="Waiting for resources",
606
+ failed_reason=None,
607
+ ),
608
+ ]
609
+
610
+ display = ListStatusDisplay(job_ids, statuses, json_mode=False)
611
+
612
+ with patch.object(console, "print") as mock_print:
613
+ display.display_multiple_status_output(console)
614
+ mock_print.assert_called_once()
615
+ # Verify the table was printed
616
+ assert mock_print.call_args[0][0] == display.table
617
+
618
+ def test_display_multiple_status_output_json_mode(self):
619
+ """Test displaying multiple statuses in JSON mode."""
620
+ console = Console()
621
+ job_ids = ["12345", "67890"]
622
+ statuses = [
623
+ StatusResponse(
624
+ model_name="test-model-1",
625
+ log_dir="/tmp/logs",
626
+ server_status="READY",
627
+ job_state="RUNNING",
628
+ raw_output="JobState=RUNNING",
629
+ base_url="http://localhost:8000",
630
+ pending_reason=None,
631
+ failed_reason=None,
632
+ ),
633
+ StatusResponse(
634
+ model_name="test-model-2",
635
+ log_dir="/tmp/logs",
636
+ server_status="FAILED",
637
+ job_state="FAILED",
638
+ raw_output="JobState=FAILED",
639
+ base_url=None,
640
+ pending_reason=None,
641
+ failed_reason="Out of memory",
642
+ ),
643
+ ]
644
+
645
+ display = ListStatusDisplay(job_ids, statuses, json_mode=True)
646
+
647
+ with patch("click.echo") as mock_echo:
648
+ display.display_multiple_status_output(console)
649
+ mock_echo.assert_called_once()
650
+
651
+ # Verify JSON output
652
+ output = mock_echo.call_args[0][0]
653
+ json_data = json.loads(output)
654
+ assert isinstance(json_data, list)
655
+ assert len(json_data) == 2
656
+ assert json_data[0]["model_name"] == "test-model-1"
657
+ assert json_data[0]["model_status"] == "READY"
658
+ assert json_data[0]["base_url"] == "http://localhost:8000"
659
+ assert json_data[1]["model_name"] == "test-model-2"
660
+ assert json_data[1]["model_status"] == "FAILED"
661
+ assert json_data[1]["base_url"] is None
662
+
663
+ def test_display_multiple_status_output_empty_list(self):
664
+ """Test displaying empty status list."""
665
+ console = Console()
666
+ job_ids = []
667
+ statuses = []
668
+
669
+ display = ListStatusDisplay(job_ids, statuses, json_mode=False)
670
+
671
+ with patch.object(console, "print") as mock_print:
672
+ display.display_multiple_status_output(console)
673
+ mock_print.assert_called_once()
674
+
675
+ def test_display_multiple_status_output_empty_list_json(self):
676
+ """Test displaying empty status list in JSON mode."""
677
+ console = Console()
678
+ job_ids = []
679
+ statuses = []
680
+
681
+ display = ListStatusDisplay(job_ids, statuses, json_mode=True)
682
+
683
+ with patch("click.echo") as mock_echo:
684
+ display.display_multiple_status_output(console)
685
+ mock_echo.assert_called_once()
686
+
687
+ output = mock_echo.call_args[0][0]
688
+ json_data = json.loads(output)
689
+ assert isinstance(json_data, list)
690
+ assert len(json_data) == 0
691
+
692
+ def test_display_multiple_status_output_single_status(self):
693
+ """Test displaying single status."""
694
+ console = Console()
695
+ job_ids = ["12345"]
696
+ statuses = [
697
+ StatusResponse(
698
+ model_name="single-model",
699
+ log_dir="/tmp/logs",
700
+ server_status="READY",
701
+ job_state="RUNNING",
702
+ raw_output="JobState=RUNNING",
703
+ base_url="http://localhost:8000",
704
+ pending_reason=None,
705
+ failed_reason=None,
706
+ )
707
+ ]
708
+
709
+ display = ListStatusDisplay(job_ids, statuses, json_mode=False)
710
+
711
+ with patch.object(console, "print") as mock_print:
712
+ display.display_multiple_status_output(console)
713
+ mock_print.assert_called_once()
714
+ # Verify table has one row
715
+ assert len(display.table.rows) == 1
716
+
717
+ def test_display_multiple_status_output_with_none_base_url(self):
718
+ """Test displaying statuses with None base_url."""
719
+ console = Console()
720
+ job_ids = ["12345"]
721
+ statuses = [
722
+ StatusResponse(
723
+ model_name="pending-model",
724
+ log_dir="/tmp/logs",
725
+ server_status="PENDING",
726
+ job_state="PENDING",
727
+ raw_output="JobState=PENDING",
728
+ base_url=None,
729
+ pending_reason="Resource allocation",
730
+ failed_reason=None,
731
+ )
732
+ ]
733
+
734
+ display = ListStatusDisplay(job_ids, statuses, json_mode=False)
735
+
736
+ with patch.object(console, "print") as mock_print:
737
+ display.display_multiple_status_output(console)
738
+ mock_print.assert_called_once()
739
+ # Verify the row was added (None base_url should be handled gracefully)
740
+ assert len(display.table.rows) == 1
741
+ # Verify table has correct number of columns
742
+ assert (
743
+ len(display.table.columns) == 4
744
+ ) # Job ID, Model Name, Status, Base URL
745
+
746
+ def test_display_multiple_status_output_json_with_none_values(self):
747
+ """Test JSON output with None values."""
748
+ console = Console()
749
+ job_ids = ["12345"]
750
+ statuses = [
751
+ StatusResponse(
752
+ model_name="pending-model",
753
+ log_dir="/tmp/logs",
754
+ server_status="PENDING",
755
+ job_state="PENDING",
756
+ raw_output="JobState=PENDING",
757
+ base_url=None,
758
+ pending_reason="Waiting",
759
+ failed_reason=None,
760
+ )
761
+ ]
762
+
763
+ display = ListStatusDisplay(job_ids, statuses, json_mode=True)
764
+
765
+ with patch("click.echo") as mock_echo:
766
+ display.display_multiple_status_output(console)
767
+ mock_echo.assert_called_once()
768
+
769
+ output = mock_echo.call_args[0][0]
770
+ json_data = json.loads(output)
771
+ assert json_data[0]["base_url"] is None
772
+ assert json_data[0]["model_status"] == "PENDING"