vec-inf 0.4.1__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/README.md +3 -3
- vec_inf/cli/_cli.py +227 -325
- vec_inf/cli/_helper.py +400 -0
- vec_inf/cli/_utils.py +26 -135
- vec_inf/cli/_vars.py +32 -0
- vec_inf/client/__init__.py +31 -0
- vec_inf/client/_client_vars.py +213 -0
- vec_inf/client/_exceptions.py +37 -0
- vec_inf/client/_helper.py +674 -0
- vec_inf/client/_slurm_script_generator.py +179 -0
- vec_inf/client/_utils.py +287 -0
- vec_inf/client/api.py +302 -0
- vec_inf/client/config.py +128 -0
- vec_inf/client/models.py +225 -0
- vec_inf/client/slurm_vars.py +49 -0
- vec_inf/{models → config}/README.md +30 -12
- vec_inf/config/models.yaml +1300 -0
- vec_inf-0.6.0.dist-info/METADATA +193 -0
- vec_inf-0.6.0.dist-info/RECORD +25 -0
- vec_inf/launch_server.sh +0 -145
- vec_inf/models/models.csv +0 -85
- vec_inf/multinode_vllm.slurm +0 -124
- vec_inf/vllm.slurm +0 -59
- vec_inf-0.4.1.dist-info/METADATA +0 -121
- vec_inf-0.4.1.dist-info/RECORD +0 -16
- {vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/WHEEL +0 -0
- {vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/licenses/LICENSE +0 -0
vec_inf/cli/_helper.py
ADDED
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
"""Helper classes for the CLI.
|
|
2
|
+
|
|
3
|
+
This module provides formatting and display classes for the command-line interface,
|
|
4
|
+
handling the presentation of model information, status updates, and metrics.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Union
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
from rich.columns import Columns
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.panel import Panel
|
|
14
|
+
from rich.table import Table
|
|
15
|
+
|
|
16
|
+
from vec_inf.cli._utils import create_table
|
|
17
|
+
from vec_inf.cli._vars import MODEL_TYPE_COLORS, MODEL_TYPE_PRIORITY
|
|
18
|
+
from vec_inf.client import ModelConfig, ModelInfo, StatusResponse
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LaunchResponseFormatter:
|
|
22
|
+
"""CLI Helper class for formatting LaunchResponse.
|
|
23
|
+
|
|
24
|
+
A formatter class that handles the presentation of model launch information
|
|
25
|
+
in both table and JSON formats.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
model_name : str
|
|
30
|
+
Name of the launched model
|
|
31
|
+
params : dict[str, Any]
|
|
32
|
+
Launch parameters and configuration
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, model_name: str, params: dict[str, Any]):
|
|
36
|
+
self.model_name = model_name
|
|
37
|
+
self.params = params
|
|
38
|
+
|
|
39
|
+
def format_table_output(self) -> Table:
|
|
40
|
+
"""Format output as rich Table.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
Table
|
|
45
|
+
Rich table containing formatted launch information including:
|
|
46
|
+
- Job configuration
|
|
47
|
+
- Model details
|
|
48
|
+
- Resource allocation
|
|
49
|
+
- vLLM configuration
|
|
50
|
+
"""
|
|
51
|
+
table = create_table(key_title="Job Config", value_title="Value")
|
|
52
|
+
|
|
53
|
+
# Add key information with consistent styling
|
|
54
|
+
table.add_row("Slurm Job ID", self.params["slurm_job_id"], style="blue")
|
|
55
|
+
table.add_row("Job Name", self.model_name)
|
|
56
|
+
|
|
57
|
+
# Add model details
|
|
58
|
+
table.add_row("Model Type", self.params["model_type"])
|
|
59
|
+
table.add_row("Vocabulary Size", self.params["vocab_size"])
|
|
60
|
+
|
|
61
|
+
# Add resource allocation details
|
|
62
|
+
table.add_row("Partition", self.params["partition"])
|
|
63
|
+
table.add_row("QoS", self.params["qos"])
|
|
64
|
+
table.add_row("Time Limit", self.params["time"])
|
|
65
|
+
table.add_row("Num Nodes", self.params["num_nodes"])
|
|
66
|
+
table.add_row("GPUs/Node", self.params["gpus_per_node"])
|
|
67
|
+
table.add_row("CPUs/Task", self.params["cpus_per_task"])
|
|
68
|
+
table.add_row("Memory/Node", self.params["mem_per_node"])
|
|
69
|
+
|
|
70
|
+
# Add job config details
|
|
71
|
+
table.add_row(
|
|
72
|
+
"Model Weights Directory",
|
|
73
|
+
str(Path(self.params["model_weights_parent_dir"], self.model_name)),
|
|
74
|
+
)
|
|
75
|
+
table.add_row("Log Directory", self.params["log_dir"])
|
|
76
|
+
|
|
77
|
+
# Add vLLM configuration details
|
|
78
|
+
table.add_row("vLLM Arguments:", style="magenta")
|
|
79
|
+
for arg, value in self.params["vllm_args"].items():
|
|
80
|
+
table.add_row(f" {arg}:", str(value))
|
|
81
|
+
|
|
82
|
+
return table
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class StatusResponseFormatter:
|
|
86
|
+
"""CLI Helper class for formatting StatusResponse.
|
|
87
|
+
|
|
88
|
+
A formatter class that handles the presentation of model status information
|
|
89
|
+
in both table and JSON formats.
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
status_info : StatusResponse
|
|
94
|
+
Status information to format
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(self, status_info: StatusResponse):
|
|
98
|
+
self.status_info = status_info
|
|
99
|
+
|
|
100
|
+
def output_json(self) -> None:
|
|
101
|
+
"""Format and output JSON data.
|
|
102
|
+
|
|
103
|
+
Outputs a JSON object containing:
|
|
104
|
+
- model_name
|
|
105
|
+
- model_status
|
|
106
|
+
- base_url
|
|
107
|
+
- pending_reason (if applicable)
|
|
108
|
+
- failed_reason (if applicable)
|
|
109
|
+
"""
|
|
110
|
+
json_data = {
|
|
111
|
+
"model_name": self.status_info.model_name,
|
|
112
|
+
"model_status": self.status_info.server_status,
|
|
113
|
+
"base_url": self.status_info.base_url,
|
|
114
|
+
}
|
|
115
|
+
if self.status_info.pending_reason:
|
|
116
|
+
json_data["pending_reason"] = self.status_info.pending_reason
|
|
117
|
+
if self.status_info.failed_reason:
|
|
118
|
+
json_data["failed_reason"] = self.status_info.failed_reason
|
|
119
|
+
click.echo(json_data)
|
|
120
|
+
|
|
121
|
+
def output_table(self) -> Table:
|
|
122
|
+
"""Create and display rich table.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
Table
|
|
127
|
+
Rich table containing formatted status information including:
|
|
128
|
+
- Model name
|
|
129
|
+
- Status
|
|
130
|
+
- Base URL
|
|
131
|
+
- Error information (if applicable)
|
|
132
|
+
"""
|
|
133
|
+
table = create_table(key_title="Job Status", value_title="Value")
|
|
134
|
+
table.add_row("Model Name", self.status_info.model_name)
|
|
135
|
+
table.add_row("Model Status", self.status_info.server_status, style="blue")
|
|
136
|
+
|
|
137
|
+
if self.status_info.pending_reason:
|
|
138
|
+
table.add_row("Pending Reason", self.status_info.pending_reason)
|
|
139
|
+
if self.status_info.failed_reason:
|
|
140
|
+
table.add_row("Failed Reason", self.status_info.failed_reason)
|
|
141
|
+
|
|
142
|
+
table.add_row("Base URL", self.status_info.base_url)
|
|
143
|
+
return table
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class MetricsResponseFormatter:
|
|
147
|
+
"""CLI Helper class for formatting MetricsResponse.
|
|
148
|
+
|
|
149
|
+
A formatter class that handles the presentation of model metrics
|
|
150
|
+
in a table format.
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
metrics : Union[dict[str, float], str]
|
|
155
|
+
Dictionary of metrics or error message
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
def __init__(self, metrics: Union[dict[str, float], str]):
|
|
159
|
+
self.metrics = self._set_metrics(metrics)
|
|
160
|
+
self.table = create_table("Metric", "Value")
|
|
161
|
+
self.enabled_prefix_caching = self._check_prefix_caching()
|
|
162
|
+
|
|
163
|
+
def _set_metrics(self, metrics: Union[dict[str, float], str]) -> dict[str, float]:
|
|
164
|
+
"""Set the metrics attribute.
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
metrics : Union[dict[str, float], str]
|
|
169
|
+
Raw metrics data
|
|
170
|
+
|
|
171
|
+
Returns
|
|
172
|
+
-------
|
|
173
|
+
dict[str, float]
|
|
174
|
+
Processed metrics dictionary
|
|
175
|
+
"""
|
|
176
|
+
return metrics if isinstance(metrics, dict) else {}
|
|
177
|
+
|
|
178
|
+
def _check_prefix_caching(self) -> bool:
|
|
179
|
+
"""Check if prefix caching is enabled.
|
|
180
|
+
|
|
181
|
+
Returns
|
|
182
|
+
-------
|
|
183
|
+
bool
|
|
184
|
+
True if prefix caching metrics are present
|
|
185
|
+
"""
|
|
186
|
+
return self.metrics.get("gpu_prefix_cache_hit_rate") is not None
|
|
187
|
+
|
|
188
|
+
def format_failed_metrics(self, message: str) -> None:
|
|
189
|
+
"""Format error message for failed metrics collection.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
message : str
|
|
194
|
+
Error message to display
|
|
195
|
+
"""
|
|
196
|
+
self.table.add_row("ERROR", message)
|
|
197
|
+
|
|
198
|
+
def format_metrics(self) -> None:
|
|
199
|
+
"""Format and display all available metrics.
|
|
200
|
+
|
|
201
|
+
Formats and adds to the table:
|
|
202
|
+
- Throughput metrics
|
|
203
|
+
- Request queue metrics
|
|
204
|
+
- Cache usage metrics
|
|
205
|
+
- Prefix cache metrics (if enabled)
|
|
206
|
+
- Latency metrics
|
|
207
|
+
- Token counts
|
|
208
|
+
"""
|
|
209
|
+
# Throughput metrics
|
|
210
|
+
self.table.add_row(
|
|
211
|
+
"Prompt Throughput",
|
|
212
|
+
f"{self.metrics.get('prompt_tokens_per_sec', 0):.1f} tokens/s",
|
|
213
|
+
)
|
|
214
|
+
self.table.add_row(
|
|
215
|
+
"Generation Throughput",
|
|
216
|
+
f"{self.metrics.get('generation_tokens_per_sec', 0):.1f} tokens/s",
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Request queue metrics
|
|
220
|
+
self.table.add_row(
|
|
221
|
+
"Requests Running",
|
|
222
|
+
f"{self.metrics.get('requests_running', 0):.0f} reqs",
|
|
223
|
+
)
|
|
224
|
+
self.table.add_row(
|
|
225
|
+
"Requests Waiting",
|
|
226
|
+
f"{self.metrics.get('requests_waiting', 0):.0f} reqs",
|
|
227
|
+
)
|
|
228
|
+
self.table.add_row(
|
|
229
|
+
"Requests Swapped",
|
|
230
|
+
f"{self.metrics.get('requests_swapped', 0):.0f} reqs",
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Cache usage metrics
|
|
234
|
+
self.table.add_row(
|
|
235
|
+
"GPU Cache Usage",
|
|
236
|
+
f"{self.metrics.get('gpu_cache_usage', 0) * 100:.1f}%",
|
|
237
|
+
)
|
|
238
|
+
self.table.add_row(
|
|
239
|
+
"CPU Cache Usage",
|
|
240
|
+
f"{self.metrics.get('cpu_cache_usage', 0) * 100:.1f}%",
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
if self.enabled_prefix_caching:
|
|
244
|
+
self.table.add_row(
|
|
245
|
+
"GPU Prefix Cache Hit Rate",
|
|
246
|
+
f"{self.metrics.get('gpu_prefix_cache_hit_rate', 0) * 100:.1f}%",
|
|
247
|
+
)
|
|
248
|
+
self.table.add_row(
|
|
249
|
+
"CPU Prefix Cache Hit Rate",
|
|
250
|
+
f"{self.metrics.get('cpu_prefix_cache_hit_rate', 0) * 100:.1f}%",
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Show average latency if available
|
|
254
|
+
if "avg_request_latency" in self.metrics:
|
|
255
|
+
self.table.add_row(
|
|
256
|
+
"Avg Request Latency",
|
|
257
|
+
f"{self.metrics['avg_request_latency']:.1f} s",
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Token counts
|
|
261
|
+
self.table.add_row(
|
|
262
|
+
"Total Prompt Tokens",
|
|
263
|
+
f"{self.metrics.get('total_prompt_tokens', 0):.0f} tokens",
|
|
264
|
+
)
|
|
265
|
+
self.table.add_row(
|
|
266
|
+
"Total Generation Tokens",
|
|
267
|
+
f"{self.metrics.get('total_generation_tokens', 0):.0f} tokens",
|
|
268
|
+
)
|
|
269
|
+
self.table.add_row(
|
|
270
|
+
"Successful Requests",
|
|
271
|
+
f"{self.metrics.get('successful_requests_total', 0):.0f} reqs",
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class ListCmdDisplay:
|
|
276
|
+
"""CLI Helper class for displaying model listing functionality.
|
|
277
|
+
|
|
278
|
+
A display class that handles the presentation of model listings
|
|
279
|
+
in both table and JSON formats.
|
|
280
|
+
|
|
281
|
+
Parameters
|
|
282
|
+
----------
|
|
283
|
+
console : Console
|
|
284
|
+
Rich console instance for output
|
|
285
|
+
json_mode : bool, default=False
|
|
286
|
+
Whether to output in JSON format
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
def __init__(self, console: Console, json_mode: bool = False):
|
|
290
|
+
self.console = console
|
|
291
|
+
self.json_mode = json_mode
|
|
292
|
+
self.model_config = None
|
|
293
|
+
self.model_names: list[str] = []
|
|
294
|
+
|
|
295
|
+
def _format_single_model_output(
|
|
296
|
+
self, config: ModelConfig
|
|
297
|
+
) -> Union[dict[str, Any], Table]:
|
|
298
|
+
"""Format output table for a single model.
|
|
299
|
+
|
|
300
|
+
Parameters
|
|
301
|
+
----------
|
|
302
|
+
config : ModelConfig
|
|
303
|
+
Model configuration to format
|
|
304
|
+
|
|
305
|
+
Returns
|
|
306
|
+
-------
|
|
307
|
+
Union[dict[str, Any], Table]
|
|
308
|
+
Either a dictionary for JSON output or a Rich table
|
|
309
|
+
"""
|
|
310
|
+
if self.json_mode:
|
|
311
|
+
# Exclude non-essential fields from JSON output
|
|
312
|
+
excluded = {"venv", "log_dir"}
|
|
313
|
+
config_dict = config.model_dump(exclude=excluded)
|
|
314
|
+
# Convert Path objects to strings
|
|
315
|
+
config_dict["model_weights_parent_dir"] = str(
|
|
316
|
+
config_dict["model_weights_parent_dir"]
|
|
317
|
+
)
|
|
318
|
+
return config_dict
|
|
319
|
+
|
|
320
|
+
table = create_table(key_title="Model Config", value_title="Value")
|
|
321
|
+
for field, value in config.model_dump().items():
|
|
322
|
+
if field not in {"venv", "log_dir", "vllm_args"}:
|
|
323
|
+
table.add_row(field, str(value))
|
|
324
|
+
if field == "vllm_args":
|
|
325
|
+
table.add_row("vLLM Arguments:", style="magenta")
|
|
326
|
+
for vllm_arg, vllm_value in value.items():
|
|
327
|
+
table.add_row(f" {vllm_arg}:", str(vllm_value))
|
|
328
|
+
return table
|
|
329
|
+
|
|
330
|
+
def _format_all_models_output(
|
|
331
|
+
self, model_infos: list[ModelInfo]
|
|
332
|
+
) -> Union[list[str], list[Panel]]:
|
|
333
|
+
"""Format output table for all models.
|
|
334
|
+
|
|
335
|
+
Parameters
|
|
336
|
+
----------
|
|
337
|
+
model_infos : list[ModelInfo]
|
|
338
|
+
List of model information to format
|
|
339
|
+
|
|
340
|
+
Returns
|
|
341
|
+
-------
|
|
342
|
+
Union[list[str], list[Panel]]
|
|
343
|
+
Either a list of model names or a list of formatted panels
|
|
344
|
+
|
|
345
|
+
Notes
|
|
346
|
+
-----
|
|
347
|
+
Models are sorted by type priority and color-coded based on their type.
|
|
348
|
+
"""
|
|
349
|
+
# Sort by model type priority
|
|
350
|
+
sorted_model_infos = sorted(
|
|
351
|
+
model_infos,
|
|
352
|
+
key=lambda x: MODEL_TYPE_PRIORITY.get(x.model_type, 4),
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
# Create panels with color coding
|
|
356
|
+
panels = []
|
|
357
|
+
for model_info in sorted_model_infos:
|
|
358
|
+
color = MODEL_TYPE_COLORS.get(model_info.model_type, "white")
|
|
359
|
+
variant = model_info.variant or ""
|
|
360
|
+
display_text = f"[magenta]{model_info.family}[/magenta]"
|
|
361
|
+
if variant:
|
|
362
|
+
display_text += f"-{variant}"
|
|
363
|
+
panels.append(Panel(display_text, expand=True, border_style=color))
|
|
364
|
+
|
|
365
|
+
return panels
|
|
366
|
+
|
|
367
|
+
def display_single_model_output(self, config: ModelConfig) -> None:
|
|
368
|
+
"""Display the output for a single model.
|
|
369
|
+
|
|
370
|
+
Parameters
|
|
371
|
+
----------
|
|
372
|
+
config : ModelConfig
|
|
373
|
+
Model configuration to display
|
|
374
|
+
"""
|
|
375
|
+
output = self._format_single_model_output(config)
|
|
376
|
+
if self.json_mode:
|
|
377
|
+
click.echo(output)
|
|
378
|
+
else:
|
|
379
|
+
self.console.print(output)
|
|
380
|
+
|
|
381
|
+
def display_all_models_output(self, model_infos: list[ModelInfo]) -> None:
|
|
382
|
+
"""Display the output for all models.
|
|
383
|
+
|
|
384
|
+
Parameters
|
|
385
|
+
----------
|
|
386
|
+
model_infos : list[ModelInfo]
|
|
387
|
+
List of model information to display
|
|
388
|
+
|
|
389
|
+
Notes
|
|
390
|
+
-----
|
|
391
|
+
Output format depends on json_mode:
|
|
392
|
+
- JSON: List of model names
|
|
393
|
+
- Table: Color-coded panels with model information
|
|
394
|
+
"""
|
|
395
|
+
if self.json_mode:
|
|
396
|
+
model_names = [info.name for info in model_infos]
|
|
397
|
+
click.echo(model_names)
|
|
398
|
+
else:
|
|
399
|
+
panels = self._format_all_models_output(model_infos)
|
|
400
|
+
self.console.print(Columns(panels, equal=True))
|
vec_inf/cli/_utils.py
CHANGED
|
@@ -1,147 +1,38 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Helper functions for the CLI.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
This module provides utility functions for creating consistent table displays
|
|
4
|
+
in the command-line interface.
|
|
5
|
+
"""
|
|
6
6
|
|
|
7
|
-
import polars as pl
|
|
8
|
-
import requests
|
|
9
7
|
from rich.table import Table
|
|
10
8
|
|
|
11
9
|
|
|
12
|
-
MODEL_READY_SIGNATURE = "INFO: Application startup complete."
|
|
13
|
-
SERVER_ADDRESS_SIGNATURE = "Server address: "
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def run_bash_command(command: str) -> str:
|
|
17
|
-
"""Run a bash command and return the output."""
|
|
18
|
-
process = subprocess.Popen(
|
|
19
|
-
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
|
20
|
-
)
|
|
21
|
-
stdout, _ = process.communicate()
|
|
22
|
-
return stdout
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def read_slurm_log(
|
|
26
|
-
slurm_job_name: str, slurm_job_id: int, slurm_log_type: str, log_dir: Optional[str]
|
|
27
|
-
) -> Union[list[str], str]:
|
|
28
|
-
"""Read the slurm log file."""
|
|
29
|
-
if not log_dir:
|
|
30
|
-
models_dir = os.path.join(os.path.expanduser("~"), ".vec-inf-logs")
|
|
31
|
-
|
|
32
|
-
for directory in sorted(os.listdir(models_dir), key=len, reverse=True):
|
|
33
|
-
if directory in slurm_job_name:
|
|
34
|
-
log_dir = os.path.join(models_dir, directory)
|
|
35
|
-
break
|
|
36
|
-
|
|
37
|
-
log_dir = cast(str, log_dir)
|
|
38
|
-
|
|
39
|
-
try:
|
|
40
|
-
file_path = os.path.join(
|
|
41
|
-
log_dir,
|
|
42
|
-
f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}",
|
|
43
|
-
)
|
|
44
|
-
with open(file_path, "r") as file:
|
|
45
|
-
lines = file.readlines()
|
|
46
|
-
except FileNotFoundError:
|
|
47
|
-
print(f"Could not find file: {file_path}")
|
|
48
|
-
return "LOG_FILE_NOT_FOUND"
|
|
49
|
-
return lines
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def is_server_running(
|
|
53
|
-
slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
|
|
54
|
-
) -> Union[str, tuple[str, str]]:
|
|
55
|
-
"""Check if a model is ready to serve requests."""
|
|
56
|
-
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "err", log_dir)
|
|
57
|
-
if isinstance(log_content, str):
|
|
58
|
-
return log_content
|
|
59
|
-
|
|
60
|
-
status: Union[str, tuple[str, str]] = "LAUNCHING"
|
|
61
|
-
|
|
62
|
-
for line in log_content:
|
|
63
|
-
if "error" in line.lower():
|
|
64
|
-
status = ("FAILED", line.strip("\n"))
|
|
65
|
-
if MODEL_READY_SIGNATURE in line:
|
|
66
|
-
status = "RUNNING"
|
|
67
|
-
|
|
68
|
-
return status
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
|
|
72
|
-
"""Get the base URL of a model."""
|
|
73
|
-
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "out", log_dir)
|
|
74
|
-
if isinstance(log_content, str):
|
|
75
|
-
return log_content
|
|
76
|
-
|
|
77
|
-
for line in log_content:
|
|
78
|
-
if SERVER_ADDRESS_SIGNATURE in line:
|
|
79
|
-
return line.split(SERVER_ADDRESS_SIGNATURE)[1].strip("\n")
|
|
80
|
-
return "URL_NOT_FOUND"
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def model_health_check(
|
|
84
|
-
slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
|
|
85
|
-
) -> Tuple[str, Union[str, int]]:
|
|
86
|
-
"""Check the health of a running model on the cluster."""
|
|
87
|
-
base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
|
|
88
|
-
if not base_url.startswith("http"):
|
|
89
|
-
return ("FAILED", base_url)
|
|
90
|
-
health_check_url = base_url.replace("v1", "health")
|
|
91
|
-
|
|
92
|
-
try:
|
|
93
|
-
response = requests.get(health_check_url)
|
|
94
|
-
# Check if the request was successful
|
|
95
|
-
if response.status_code == 200:
|
|
96
|
-
return ("READY", response.status_code)
|
|
97
|
-
return ("FAILED", response.status_code)
|
|
98
|
-
except requests.exceptions.RequestException as e:
|
|
99
|
-
return ("FAILED", str(e))
|
|
100
|
-
|
|
101
|
-
|
|
102
10
|
def create_table(
|
|
103
11
|
key_title: str = "", value_title: str = "", show_header: bool = True
|
|
104
12
|
) -> Table:
|
|
105
|
-
"""Create a table for displaying model status.
|
|
13
|
+
"""Create a table for displaying model status.
|
|
14
|
+
|
|
15
|
+
Creates a two-column Rich table with consistent styling for displaying
|
|
16
|
+
key-value pairs in the CLI.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
key_title : str, default=""
|
|
21
|
+
Title for the key column
|
|
22
|
+
value_title : str, default=""
|
|
23
|
+
Title for the value column
|
|
24
|
+
show_header : bool, default=True
|
|
25
|
+
Whether to display column headers
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
Table
|
|
30
|
+
Rich Table instance with configured styling:
|
|
31
|
+
- Headers in bold magenta
|
|
32
|
+
- Key column in dim style
|
|
33
|
+
- Value column in default style
|
|
34
|
+
"""
|
|
106
35
|
table = Table(show_header=show_header, header_style="bold magenta")
|
|
107
36
|
table.add_column(key_title, style="dim")
|
|
108
37
|
table.add_column(value_title)
|
|
109
38
|
return table
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def load_models_df() -> pl.DataFrame:
|
|
113
|
-
"""Load the models dataframe."""
|
|
114
|
-
return pl.read_csv(
|
|
115
|
-
os.path.join(
|
|
116
|
-
os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
|
|
117
|
-
"models/models.csv",
|
|
118
|
-
)
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def load_default_args(models_df: pl.DataFrame, model_name: str) -> Dict[str, str]:
|
|
123
|
-
"""Load the default arguments for a model."""
|
|
124
|
-
row_data = models_df.filter(models_df["model_name"] == model_name)
|
|
125
|
-
default_args = row_data.to_dicts()[0]
|
|
126
|
-
default_args.pop("model_name", None)
|
|
127
|
-
return default_args
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def get_latest_metric(log_lines: List[str]) -> Union[str, Dict[str, str]]:
|
|
131
|
-
"""Read the latest metric entry from the log file."""
|
|
132
|
-
latest_metric = {}
|
|
133
|
-
|
|
134
|
-
try:
|
|
135
|
-
for line in reversed(log_lines):
|
|
136
|
-
if "Avg prompt throughput" in line:
|
|
137
|
-
# Parse the metric values from the line
|
|
138
|
-
metrics_str = line.split("] ")[1].strip().strip(".")
|
|
139
|
-
metrics_list = metrics_str.split(", ")
|
|
140
|
-
for metric in metrics_list:
|
|
141
|
-
key, value = metric.split(": ")
|
|
142
|
-
latest_metric[key] = value
|
|
143
|
-
break
|
|
144
|
-
except Exception as e:
|
|
145
|
-
return f"[red]Error reading log file: {e}[/red]"
|
|
146
|
-
|
|
147
|
-
return latest_metric
|
vec_inf/cli/_vars.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Constants for CLI rendering.
|
|
2
|
+
|
|
3
|
+
This module defines constant mappings for model type priorities and colors
|
|
4
|
+
used in the CLI display formatting.
|
|
5
|
+
|
|
6
|
+
Constants
|
|
7
|
+
---------
|
|
8
|
+
MODEL_TYPE_PRIORITY : dict
|
|
9
|
+
Mapping of model types to their display priority (lower numbers shown first)
|
|
10
|
+
|
|
11
|
+
MODEL_TYPE_COLORS : dict
|
|
12
|
+
Mapping of model types to their display colors in Rich
|
|
13
|
+
|
|
14
|
+
Notes
|
|
15
|
+
-----
|
|
16
|
+
These constants are used primarily by the ListCmdDisplay class to ensure
|
|
17
|
+
consistent sorting and color coding of different model types in the CLI output.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
MODEL_TYPE_PRIORITY = {
|
|
21
|
+
"LLM": 0,
|
|
22
|
+
"VLM": 1,
|
|
23
|
+
"Text_Embedding": 2,
|
|
24
|
+
"Reward_Modeling": 3,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
MODEL_TYPE_COLORS = {
|
|
28
|
+
"LLM": "cyan",
|
|
29
|
+
"VLM": "bright_blue",
|
|
30
|
+
"Text_Embedding": "purple",
|
|
31
|
+
"Reward_Modeling": "bright_magenta",
|
|
32
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Programmatic API for Vector Inference.
|
|
2
|
+
|
|
3
|
+
This module provides a Python API for launching and managing inference servers
|
|
4
|
+
using `vec_inf`. It is an alternative to the command-line interface, and allows
|
|
5
|
+
users direct control over the lifecycle of inference servers via python scripts.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from vec_inf.client.api import VecInfClient
|
|
9
|
+
from vec_inf.client.config import ModelConfig
|
|
10
|
+
from vec_inf.client.models import (
|
|
11
|
+
LaunchOptions,
|
|
12
|
+
LaunchResponse,
|
|
13
|
+
MetricsResponse,
|
|
14
|
+
ModelInfo,
|
|
15
|
+
ModelStatus,
|
|
16
|
+
ModelType,
|
|
17
|
+
StatusResponse,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"VecInfClient",
|
|
23
|
+
"LaunchResponse",
|
|
24
|
+
"StatusResponse",
|
|
25
|
+
"ModelInfo",
|
|
26
|
+
"MetricsResponse",
|
|
27
|
+
"ModelStatus",
|
|
28
|
+
"ModelType",
|
|
29
|
+
"LaunchOptions",
|
|
30
|
+
"ModelConfig",
|
|
31
|
+
]
|