vec-inf 0.4.1__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/README.md +3 -3
- vec_inf/cli/_cli.py +227 -325
- vec_inf/cli/_helper.py +400 -0
- vec_inf/cli/_utils.py +26 -135
- vec_inf/cli/_vars.py +32 -0
- vec_inf/client/__init__.py +31 -0
- vec_inf/client/_client_vars.py +213 -0
- vec_inf/client/_exceptions.py +37 -0
- vec_inf/client/_helper.py +674 -0
- vec_inf/client/_slurm_script_generator.py +179 -0
- vec_inf/client/_utils.py +287 -0
- vec_inf/client/api.py +302 -0
- vec_inf/client/config.py +128 -0
- vec_inf/client/models.py +225 -0
- vec_inf/client/slurm_vars.py +49 -0
- vec_inf/{models → config}/README.md +30 -12
- vec_inf/config/models.yaml +1300 -0
- vec_inf-0.6.0.dist-info/METADATA +193 -0
- vec_inf-0.6.0.dist-info/RECORD +25 -0
- vec_inf/launch_server.sh +0 -145
- vec_inf/models/models.csv +0 -85
- vec_inf/multinode_vllm.slurm +0 -124
- vec_inf/vllm.slurm +0 -59
- vec_inf-0.4.1.dist-info/METADATA +0 -121
- vec_inf-0.4.1.dist-info/RECORD +0 -16
- {vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/WHEEL +0 -0
- {vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/licenses/LICENSE +0 -0
vec_inf/README.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# `vec-inf` Commands
|
|
2
2
|
|
|
3
|
-
* `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server, `--json-mode` supported.
|
|
4
|
-
* `list`: List all available model names, or append a supported model name to view the default configuration, `--json-mode` supported.
|
|
5
|
-
* `metrics`: Streams performance metrics to the console.
|
|
3
|
+
* `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server, `--json-mode` supported.
|
|
6
4
|
* `status`: Check the model status by providing its Slurm job ID, `--json-mode` supported.
|
|
5
|
+
* `metrics`: Streams performance metrics to the console.
|
|
7
6
|
* `shutdown`: Shutdown a model by providing its Slurm job ID.
|
|
7
|
+
* `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported.
|
|
8
8
|
|
|
9
9
|
Use `--help` to see all available options
|
vec_inf/cli/_cli.py
CHANGED
|
@@ -1,17 +1,37 @@
|
|
|
1
|
-
"""Command line interface for Vector Inference.
|
|
1
|
+
"""Command line interface for Vector Inference.
|
|
2
|
+
|
|
3
|
+
This module provides the command-line interface for interacting with Vector
|
|
4
|
+
Inference services, including model launching, status checking, metrics
|
|
5
|
+
monitoring, and shutdown operations.
|
|
6
|
+
|
|
7
|
+
Commands
|
|
8
|
+
--------
|
|
9
|
+
launch
|
|
10
|
+
Launch a model on the cluster
|
|
11
|
+
status
|
|
12
|
+
Check the status of a running model
|
|
13
|
+
shutdown
|
|
14
|
+
Stop a running model
|
|
15
|
+
list
|
|
16
|
+
List available models or get specific model configuration
|
|
17
|
+
metrics
|
|
18
|
+
Stream real-time performance metrics
|
|
19
|
+
"""
|
|
2
20
|
|
|
3
|
-
import os
|
|
4
21
|
import time
|
|
5
|
-
from typing import
|
|
22
|
+
from typing import Optional, Union
|
|
6
23
|
|
|
7
24
|
import click
|
|
8
|
-
import polars as pl
|
|
9
|
-
from rich.columns import Columns
|
|
10
25
|
from rich.console import Console
|
|
11
26
|
from rich.live import Live
|
|
12
|
-
from rich.panel import Panel
|
|
13
27
|
|
|
14
|
-
|
|
28
|
+
from vec_inf.cli._helper import (
|
|
29
|
+
LaunchResponseFormatter,
|
|
30
|
+
ListCmdDisplay,
|
|
31
|
+
MetricsResponseFormatter,
|
|
32
|
+
StatusResponseFormatter,
|
|
33
|
+
)
|
|
34
|
+
from vec_inf.client import LaunchOptions, VecInfClient
|
|
15
35
|
|
|
16
36
|
|
|
17
37
|
CONSOLE = Console()
|
|
@@ -27,21 +47,10 @@ def cli() -> None:
|
|
|
27
47
|
@click.argument("model-name", type=str, nargs=1)
|
|
28
48
|
@click.option("--model-family", type=str, help="The model family")
|
|
29
49
|
@click.option("--model-variant", type=str, help="The model variant")
|
|
30
|
-
@click.option(
|
|
31
|
-
"--max-model-len",
|
|
32
|
-
type=int,
|
|
33
|
-
help="Model context length. Default value set based on suggested resource allocation.",
|
|
34
|
-
)
|
|
35
|
-
@click.option(
|
|
36
|
-
"--max-num-seqs",
|
|
37
|
-
type=int,
|
|
38
|
-
help="Maximum number of sequences to process in a single request",
|
|
39
|
-
)
|
|
40
50
|
@click.option(
|
|
41
51
|
"--partition",
|
|
42
52
|
type=str,
|
|
43
|
-
|
|
44
|
-
help="Type of compute partition, default to a40",
|
|
53
|
+
help="Type of compute partition",
|
|
45
54
|
)
|
|
46
55
|
@click.option(
|
|
47
56
|
"--num-nodes",
|
|
@@ -49,10 +58,15 @@ def cli() -> None:
|
|
|
49
58
|
help="Number of nodes to use, default to suggested resource allocation for model",
|
|
50
59
|
)
|
|
51
60
|
@click.option(
|
|
52
|
-
"--
|
|
61
|
+
"--gpus-per-node",
|
|
53
62
|
type=int,
|
|
54
63
|
help="Number of GPUs/node to use, default to suggested resource allocation for model",
|
|
55
64
|
)
|
|
65
|
+
@click.option(
|
|
66
|
+
"--account",
|
|
67
|
+
type=str,
|
|
68
|
+
help="Charge resources used by this job to specified account.",
|
|
69
|
+
)
|
|
56
70
|
@click.option(
|
|
57
71
|
"--qos",
|
|
58
72
|
type=str,
|
|
@@ -63,41 +77,25 @@ def cli() -> None:
|
|
|
63
77
|
type=str,
|
|
64
78
|
help="Time limit for job, this should comply with QoS limits",
|
|
65
79
|
)
|
|
66
|
-
@click.option(
|
|
67
|
-
"--vocab-size",
|
|
68
|
-
type=int,
|
|
69
|
-
help="Vocabulary size, this option is intended for custom models",
|
|
70
|
-
)
|
|
71
|
-
@click.option(
|
|
72
|
-
"--data-type", type=str, default="auto", help="Model data type, default to auto"
|
|
73
|
-
)
|
|
74
80
|
@click.option(
|
|
75
81
|
"--venv",
|
|
76
82
|
type=str,
|
|
77
|
-
|
|
78
|
-
help="Path to virtual environment, default to preconfigured singularity container",
|
|
83
|
+
help="Path to virtual environment",
|
|
79
84
|
)
|
|
80
85
|
@click.option(
|
|
81
86
|
"--log-dir",
|
|
82
87
|
type=str,
|
|
83
|
-
|
|
84
|
-
help="Path to slurm log directory, default to .vec-inf-logs in user home directory",
|
|
88
|
+
help="Path to slurm log directory",
|
|
85
89
|
)
|
|
86
90
|
@click.option(
|
|
87
91
|
"--model-weights-parent-dir",
|
|
88
92
|
type=str,
|
|
89
|
-
|
|
90
|
-
help="Path to parent directory containing model weights, default to '/model-weights' for supported models",
|
|
93
|
+
help="Path to parent directory containing model weights",
|
|
91
94
|
)
|
|
92
95
|
@click.option(
|
|
93
|
-
"--
|
|
96
|
+
"--vllm-args",
|
|
94
97
|
type=str,
|
|
95
|
-
help="
|
|
96
|
-
)
|
|
97
|
-
@click.option(
|
|
98
|
-
"--enforce-eager",
|
|
99
|
-
type=str,
|
|
100
|
-
help="Always use eager-mode PyTorch, accepts 'True' or 'False', default to 'False' for custom models if not set",
|
|
98
|
+
help="vLLM engine arguments to be set, use the format as specified in vLLM documentation and separate arguments with commas, e.g. --vllm-args '--max-model-len=8192,--max-num-seqs=256,--enable-prefix-caching'",
|
|
101
99
|
)
|
|
102
100
|
@click.option(
|
|
103
101
|
"--json-mode",
|
|
@@ -106,77 +104,69 @@ def cli() -> None:
|
|
|
106
104
|
)
|
|
107
105
|
def launch(
|
|
108
106
|
model_name: str,
|
|
109
|
-
|
|
110
|
-
model_variant: Optional[str] = None,
|
|
111
|
-
max_model_len: Optional[int] = None,
|
|
112
|
-
max_num_seqs: Optional[int] = None,
|
|
113
|
-
partition: Optional[str] = None,
|
|
114
|
-
num_nodes: Optional[int] = None,
|
|
115
|
-
num_gpus: Optional[int] = None,
|
|
116
|
-
qos: Optional[str] = None,
|
|
117
|
-
time: Optional[str] = None,
|
|
118
|
-
vocab_size: Optional[int] = None,
|
|
119
|
-
data_type: Optional[str] = None,
|
|
120
|
-
venv: Optional[str] = None,
|
|
121
|
-
log_dir: Optional[str] = None,
|
|
122
|
-
model_weights_parent_dir: Optional[str] = None,
|
|
123
|
-
pipeline_parallelism: Optional[str] = None,
|
|
124
|
-
enforce_eager: Optional[str] = None,
|
|
125
|
-
json_mode: bool = False,
|
|
107
|
+
**cli_kwargs: Optional[Union[str, int, float, bool]],
|
|
126
108
|
) -> None:
|
|
127
|
-
"""Launch a model on the cluster.
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
109
|
+
"""Launch a model on the cluster.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
model_name : str
|
|
114
|
+
Name of the model to launch
|
|
115
|
+
**cli_kwargs : dict
|
|
116
|
+
Additional launch options including:
|
|
117
|
+
- model_family : str, optional
|
|
118
|
+
Family/architecture of the model
|
|
119
|
+
- model_variant : str, optional
|
|
120
|
+
Specific variant of the model
|
|
121
|
+
- partition : str, optional
|
|
122
|
+
Type of compute partition
|
|
123
|
+
- num_nodes : int, optional
|
|
124
|
+
Number of nodes to use
|
|
125
|
+
- gpus_per_node : int, optional
|
|
126
|
+
Number of GPUs per node
|
|
127
|
+
- qos : str, optional
|
|
128
|
+
Quality of service tier
|
|
129
|
+
- time : str, optional
|
|
130
|
+
Time limit for job
|
|
131
|
+
- venv : str, optional
|
|
132
|
+
Path to virtual environment
|
|
133
|
+
- log_dir : str, optional
|
|
134
|
+
Path to SLURM log directory
|
|
135
|
+
- model_weights_parent_dir : str, optional
|
|
136
|
+
Path to model weights directory
|
|
137
|
+
- vllm_args : str, optional
|
|
138
|
+
vLLM engine arguments
|
|
139
|
+
- json_mode : bool, optional
|
|
140
|
+
Output in JSON format
|
|
141
|
+
|
|
142
|
+
Raises
|
|
143
|
+
------
|
|
144
|
+
click.ClickException
|
|
145
|
+
If launch fails for any reason
|
|
146
|
+
"""
|
|
147
|
+
try:
|
|
148
|
+
# Convert cli_kwargs to LaunchOptions
|
|
149
|
+
json_mode = cli_kwargs["json_mode"]
|
|
150
|
+
del cli_kwargs["json_mode"]
|
|
151
|
+
|
|
152
|
+
launch_options = LaunchOptions(**cli_kwargs) # type: ignore
|
|
153
|
+
|
|
154
|
+
# Start the client and launch model inference server
|
|
155
|
+
client = VecInfClient()
|
|
156
|
+
launch_response = client.launch_model(model_name, launch_options)
|
|
157
|
+
|
|
158
|
+
# Display launch information
|
|
159
|
+
launch_formatter = LaunchResponseFormatter(model_name, launch_response.config)
|
|
160
|
+
if json_mode:
|
|
161
|
+
click.echo(launch_response.config)
|
|
162
|
+
else:
|
|
163
|
+
launch_info_table = launch_formatter.format_table_output()
|
|
164
|
+
CONSOLE.print(launch_info_table)
|
|
165
|
+
|
|
166
|
+
except click.ClickException as e:
|
|
167
|
+
raise e
|
|
168
|
+
except Exception as e:
|
|
169
|
+
raise click.ClickException(f"Launch failed: {str(e)}") from e
|
|
180
170
|
|
|
181
171
|
|
|
182
172
|
@cli.command("status")
|
|
@@ -194,133 +184,61 @@ def launch(
|
|
|
194
184
|
def status(
|
|
195
185
|
slurm_job_id: int, log_dir: Optional[str] = None, json_mode: bool = False
|
|
196
186
|
) -> None:
|
|
197
|
-
"""Get the status of a running model on the cluster.
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
job_state = "UNAVAILABLE"
|
|
214
|
-
|
|
215
|
-
return {
|
|
216
|
-
"model_name": job_name,
|
|
217
|
-
"status": "SHUTDOWN",
|
|
218
|
-
"base_url": "UNAVAILABLE",
|
|
219
|
-
"state": job_state,
|
|
220
|
-
"pending_reason": None,
|
|
221
|
-
"failed_reason": None,
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
def _process_job_state(
|
|
226
|
-
output: str, status_info: Dict[str, Any], slurm_job_id: int, log_dir: Optional[str]
|
|
227
|
-
) -> Dict[str, Any]:
|
|
228
|
-
"""Process different job states and update status information."""
|
|
229
|
-
if status_info["state"] == "PENDING":
|
|
230
|
-
_process_pending_state(output, status_info)
|
|
231
|
-
elif status_info["state"] == "RUNNING":
|
|
232
|
-
_handle_running_state(status_info, slurm_job_id, log_dir)
|
|
233
|
-
return status_info
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
def _process_pending_state(output: str, status_info: Dict[str, Any]) -> None:
|
|
237
|
-
"""Handle PENDING job state."""
|
|
187
|
+
"""Get the status of a running model on the cluster.
|
|
188
|
+
|
|
189
|
+
Parameters
|
|
190
|
+
----------
|
|
191
|
+
slurm_job_id : int
|
|
192
|
+
ID of the SLURM job to check
|
|
193
|
+
log_dir : str, optional
|
|
194
|
+
Path to SLURM log directory
|
|
195
|
+
json_mode : bool, default=False
|
|
196
|
+
Whether to output in JSON format
|
|
197
|
+
|
|
198
|
+
Raises
|
|
199
|
+
------
|
|
200
|
+
click.ClickException
|
|
201
|
+
If status check fails
|
|
202
|
+
"""
|
|
238
203
|
try:
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
server_status = utils.is_server_running(
|
|
250
|
-
status_info["model_name"], slurm_job_id, log_dir
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
if isinstance(server_status, tuple):
|
|
254
|
-
status_info["status"], status_info["failed_reason"] = server_status
|
|
255
|
-
return
|
|
256
|
-
|
|
257
|
-
if server_status == "RUNNING":
|
|
258
|
-
_check_model_health(status_info, slurm_job_id, log_dir)
|
|
259
|
-
else:
|
|
260
|
-
status_info["status"] = server_status
|
|
261
|
-
|
|
204
|
+
# Start the client and get model inference server status
|
|
205
|
+
client = VecInfClient()
|
|
206
|
+
status_response = client.get_status(slurm_job_id, log_dir)
|
|
207
|
+
# Display status information
|
|
208
|
+
status_formatter = StatusResponseFormatter(status_response)
|
|
209
|
+
if json_mode:
|
|
210
|
+
status_formatter.output_json()
|
|
211
|
+
else:
|
|
212
|
+
status_info_table = status_formatter.output_table()
|
|
213
|
+
CONSOLE.print(status_info_table)
|
|
262
214
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
model_status = utils.model_health_check(
|
|
268
|
-
status_info["model_name"], slurm_job_id, log_dir
|
|
269
|
-
)
|
|
270
|
-
status, failed_reason = model_status
|
|
271
|
-
if status == "READY":
|
|
272
|
-
status_info["base_url"] = utils.get_base_url(
|
|
273
|
-
status_info["model_name"], slurm_job_id, log_dir
|
|
274
|
-
)
|
|
275
|
-
status_info["status"] = status
|
|
276
|
-
else:
|
|
277
|
-
status_info["status"], status_info["failed_reason"] = status, failed_reason
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
def _display_status(status_info: Dict[str, Any], json_mode: bool) -> None:
|
|
281
|
-
"""Display the status information in appropriate format."""
|
|
282
|
-
if json_mode:
|
|
283
|
-
_output_json(status_info)
|
|
284
|
-
else:
|
|
285
|
-
_output_table(status_info)
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
def _output_json(status_info: Dict[str, Any]) -> None:
|
|
289
|
-
"""Format and output JSON data."""
|
|
290
|
-
json_data = {
|
|
291
|
-
"model_name": status_info["model_name"],
|
|
292
|
-
"model_status": status_info["status"],
|
|
293
|
-
"base_url": status_info["base_url"],
|
|
294
|
-
}
|
|
295
|
-
if status_info["pending_reason"]:
|
|
296
|
-
json_data["pending_reason"] = status_info["pending_reason"]
|
|
297
|
-
if status_info["failed_reason"]:
|
|
298
|
-
json_data["failed_reason"] = status_info["failed_reason"]
|
|
299
|
-
click.echo(json_data)
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
def _output_table(status_info: Dict[str, Any]) -> None:
|
|
303
|
-
"""Create and display rich table."""
|
|
304
|
-
table = utils.create_table(key_title="Job Status", value_title="Value")
|
|
305
|
-
table.add_row("Model Name", status_info["model_name"])
|
|
306
|
-
table.add_row("Model Status", status_info["status"], style="blue")
|
|
307
|
-
|
|
308
|
-
if status_info["pending_reason"]:
|
|
309
|
-
table.add_row("Pending Reason", status_info["pending_reason"])
|
|
310
|
-
if status_info["failed_reason"]:
|
|
311
|
-
table.add_row("Failed Reason", status_info["failed_reason"])
|
|
312
|
-
|
|
313
|
-
table.add_row("Base URL", status_info["base_url"])
|
|
314
|
-
CONSOLE.print(table)
|
|
215
|
+
except click.ClickException as e:
|
|
216
|
+
raise e
|
|
217
|
+
except Exception as e:
|
|
218
|
+
raise click.ClickException(f"Status check failed: {str(e)}") from e
|
|
315
219
|
|
|
316
220
|
|
|
317
221
|
@cli.command("shutdown")
|
|
318
222
|
@click.argument("slurm_job_id", type=int, nargs=1)
|
|
319
223
|
def shutdown(slurm_job_id: int) -> None:
|
|
320
|
-
"""Shutdown a running model on the cluster.
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
224
|
+
"""Shutdown a running model on the cluster.
|
|
225
|
+
|
|
226
|
+
Parameters
|
|
227
|
+
----------
|
|
228
|
+
slurm_job_id : int
|
|
229
|
+
ID of the SLURM job to shut down
|
|
230
|
+
|
|
231
|
+
Raises
|
|
232
|
+
------
|
|
233
|
+
click.ClickException
|
|
234
|
+
If shutdown operation fails
|
|
235
|
+
"""
|
|
236
|
+
try:
|
|
237
|
+
client = VecInfClient()
|
|
238
|
+
client.shutdown_model(slurm_job_id)
|
|
239
|
+
click.echo(f"Shutting down model with Slurm Job ID: {slurm_job_id}")
|
|
240
|
+
except Exception as e:
|
|
241
|
+
raise click.ClickException(f"Shutdown failed: {str(e)}") from e
|
|
324
242
|
|
|
325
243
|
|
|
326
244
|
@cli.command("list")
|
|
@@ -331,107 +249,91 @@ def shutdown(slurm_job_id: int) -> None:
|
|
|
331
249
|
help="Output in JSON string",
|
|
332
250
|
)
|
|
333
251
|
def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> None:
|
|
334
|
-
"""List all available models, or get default setup of a specific model.
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
"Text Embedding": "purple",
|
|
363
|
-
"Reward Modeling": "bright_magenta",
|
|
364
|
-
}
|
|
365
|
-
|
|
366
|
-
models_df = models_df.with_columns(
|
|
367
|
-
pl.when(pl.col("model_type") == "LLM")
|
|
368
|
-
.then(0)
|
|
369
|
-
.when(pl.col("model_type") == "VLM")
|
|
370
|
-
.then(1)
|
|
371
|
-
.when(pl.col("model_type") == "Text Embedding")
|
|
372
|
-
.then(2)
|
|
373
|
-
.when(pl.col("model_type") == "Reward Modeling")
|
|
374
|
-
.then(3)
|
|
375
|
-
.otherwise(-1)
|
|
376
|
-
.alias("model_type_order")
|
|
377
|
-
)
|
|
378
|
-
|
|
379
|
-
models_df = models_df.sort("model_type_order")
|
|
380
|
-
models_df = models_df.drop("model_type_order")
|
|
381
|
-
|
|
382
|
-
for row in models_df.to_dicts():
|
|
383
|
-
panel_color = model_type_colors.get(row["model_type"], "white")
|
|
384
|
-
if row["model_variant"] == "None":
|
|
385
|
-
styled_text = f"[magenta]{row['model_family']}[/magenta]"
|
|
386
|
-
else:
|
|
387
|
-
styled_text = (
|
|
388
|
-
f"[magenta]{row['model_family']}[/magenta]-{row['model_variant']}"
|
|
389
|
-
)
|
|
390
|
-
panels.append(Panel(styled_text, expand=True, border_style=panel_color))
|
|
391
|
-
CONSOLE.print(Columns(panels, equal=True))
|
|
392
|
-
|
|
393
|
-
models_df = utils.load_models_df()
|
|
394
|
-
|
|
395
|
-
if model_name:
|
|
396
|
-
list_model(model_name, models_df, json_mode)
|
|
397
|
-
else:
|
|
398
|
-
list_all(models_df, json_mode)
|
|
252
|
+
"""List all available models, or get default setup of a specific model.
|
|
253
|
+
|
|
254
|
+
Parameters
|
|
255
|
+
----------
|
|
256
|
+
model_name : str, optional
|
|
257
|
+
Name of specific model to get information for
|
|
258
|
+
json_mode : bool, default=False
|
|
259
|
+
Whether to output in JSON format
|
|
260
|
+
|
|
261
|
+
Raises
|
|
262
|
+
------
|
|
263
|
+
click.ClickException
|
|
264
|
+
If list operation fails
|
|
265
|
+
"""
|
|
266
|
+
try:
|
|
267
|
+
# Start the client
|
|
268
|
+
client = VecInfClient()
|
|
269
|
+
list_display = ListCmdDisplay(CONSOLE, json_mode)
|
|
270
|
+
if model_name:
|
|
271
|
+
model_config = client.get_model_config(model_name)
|
|
272
|
+
list_display.display_single_model_output(model_config)
|
|
273
|
+
else:
|
|
274
|
+
model_infos = client.list_models()
|
|
275
|
+
list_display.display_all_models_output(model_infos)
|
|
276
|
+
except click.ClickException as e:
|
|
277
|
+
raise e
|
|
278
|
+
except Exception as e:
|
|
279
|
+
raise click.ClickException(f"List models failed: {str(e)}") from e
|
|
399
280
|
|
|
400
281
|
|
|
401
282
|
@cli.command("metrics")
|
|
402
283
|
@click.argument("slurm_job_id", type=int, nargs=1)
|
|
403
284
|
@click.option(
|
|
404
|
-
"--log-dir",
|
|
405
|
-
type=str,
|
|
406
|
-
help="Path to slurm log directory. This is required if --log-dir was set in model launch",
|
|
285
|
+
"--log-dir", type=str, help="Path to slurm log directory (if used during launch)"
|
|
407
286
|
)
|
|
408
287
|
def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
|
|
409
|
-
"""Stream performance metrics
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
288
|
+
"""Stream real-time performance metrics from the model endpoint.
|
|
289
|
+
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
292
|
+
slurm_job_id : int
|
|
293
|
+
ID of the SLURM job to monitor
|
|
294
|
+
log_dir : str, optional
|
|
295
|
+
Path to SLURM log directory
|
|
296
|
+
|
|
297
|
+
Raises
|
|
298
|
+
------
|
|
299
|
+
click.ClickException
|
|
300
|
+
If metrics collection fails
|
|
301
|
+
|
|
302
|
+
Notes
|
|
303
|
+
-----
|
|
304
|
+
This command continuously streams metrics with a 2-second refresh interval
|
|
305
|
+
until interrupted. If metrics are not available, it will display status
|
|
306
|
+
information instead.
|
|
307
|
+
"""
|
|
308
|
+
try:
|
|
309
|
+
# Start the client and get inference server metrics
|
|
310
|
+
client = VecInfClient()
|
|
311
|
+
metrics_response = client.get_metrics(slurm_job_id, log_dir)
|
|
312
|
+
metrics_formatter = MetricsResponseFormatter(metrics_response.metrics)
|
|
313
|
+
|
|
314
|
+
# Check if metrics response is ready
|
|
315
|
+
if isinstance(metrics_response.metrics, str):
|
|
316
|
+
metrics_formatter.format_failed_metrics(metrics_response.metrics)
|
|
317
|
+
CONSOLE.print(metrics_formatter.table)
|
|
318
|
+
return
|
|
319
|
+
|
|
320
|
+
with Live(refresh_per_second=1, console=CONSOLE) as live:
|
|
321
|
+
while True:
|
|
322
|
+
metrics_response = client.get_metrics(slurm_job_id, log_dir)
|
|
323
|
+
metrics_formatter = MetricsResponseFormatter(metrics_response.metrics)
|
|
324
|
+
|
|
325
|
+
if isinstance(metrics_response.metrics, str):
|
|
326
|
+
# Show status information if metrics aren't available
|
|
327
|
+
metrics_formatter.format_failed_metrics(metrics_response.metrics)
|
|
328
|
+
else:
|
|
329
|
+
metrics_formatter.format_metrics()
|
|
330
|
+
|
|
331
|
+
live.update(metrics_formatter.table)
|
|
332
|
+
time.sleep(2)
|
|
333
|
+
except click.ClickException as e:
|
|
334
|
+
raise e
|
|
335
|
+
except Exception as e:
|
|
336
|
+
raise click.ClickException(f"Metrics check failed: {str(e)}") from e
|
|
435
337
|
|
|
436
338
|
|
|
437
339
|
if __name__ == "__main__":
|