vec-inf 0.4.1__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vec_inf/cli/_helper.py ADDED
@@ -0,0 +1,400 @@
1
+ """Helper classes for the CLI.
2
+
3
+ This module provides formatting and display classes for the command-line interface,
4
+ handling the presentation of model information, status updates, and metrics.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Any, Union
9
+
10
+ import click
11
+ from rich.columns import Columns
12
+ from rich.console import Console
13
+ from rich.panel import Panel
14
+ from rich.table import Table
15
+
16
+ from vec_inf.cli._utils import create_table
17
+ from vec_inf.cli._vars import MODEL_TYPE_COLORS, MODEL_TYPE_PRIORITY
18
+ from vec_inf.client import ModelConfig, ModelInfo, StatusResponse
19
+
20
+
21
+ class LaunchResponseFormatter:
22
+ """CLI Helper class for formatting LaunchResponse.
23
+
24
+ A formatter class that handles the presentation of model launch information
25
+ in both table and JSON formats.
26
+
27
+ Parameters
28
+ ----------
29
+ model_name : str
30
+ Name of the launched model
31
+ params : dict[str, Any]
32
+ Launch parameters and configuration
33
+ """
34
+
35
+ def __init__(self, model_name: str, params: dict[str, Any]):
36
+ self.model_name = model_name
37
+ self.params = params
38
+
39
+ def format_table_output(self) -> Table:
40
+ """Format output as rich Table.
41
+
42
+ Returns
43
+ -------
44
+ Table
45
+ Rich table containing formatted launch information including:
46
+ - Job configuration
47
+ - Model details
48
+ - Resource allocation
49
+ - vLLM configuration
50
+ """
51
+ table = create_table(key_title="Job Config", value_title="Value")
52
+
53
+ # Add key information with consistent styling
54
+ table.add_row("Slurm Job ID", self.params["slurm_job_id"], style="blue")
55
+ table.add_row("Job Name", self.model_name)
56
+
57
+ # Add model details
58
+ table.add_row("Model Type", self.params["model_type"])
59
+ table.add_row("Vocabulary Size", self.params["vocab_size"])
60
+
61
+ # Add resource allocation details
62
+ table.add_row("Partition", self.params["partition"])
63
+ table.add_row("QoS", self.params["qos"])
64
+ table.add_row("Time Limit", self.params["time"])
65
+ table.add_row("Num Nodes", self.params["num_nodes"])
66
+ table.add_row("GPUs/Node", self.params["gpus_per_node"])
67
+ table.add_row("CPUs/Task", self.params["cpus_per_task"])
68
+ table.add_row("Memory/Node", self.params["mem_per_node"])
69
+
70
+ # Add job config details
71
+ table.add_row(
72
+ "Model Weights Directory",
73
+ str(Path(self.params["model_weights_parent_dir"], self.model_name)),
74
+ )
75
+ table.add_row("Log Directory", self.params["log_dir"])
76
+
77
+ # Add vLLM configuration details
78
+ table.add_row("vLLM Arguments:", style="magenta")
79
+ for arg, value in self.params["vllm_args"].items():
80
+ table.add_row(f" {arg}:", str(value))
81
+
82
+ return table
83
+
84
+
85
+ class StatusResponseFormatter:
86
+ """CLI Helper class for formatting StatusResponse.
87
+
88
+ A formatter class that handles the presentation of model status information
89
+ in both table and JSON formats.
90
+
91
+ Parameters
92
+ ----------
93
+ status_info : StatusResponse
94
+ Status information to format
95
+ """
96
+
97
+ def __init__(self, status_info: StatusResponse):
98
+ self.status_info = status_info
99
+
100
+ def output_json(self) -> None:
101
+ """Format and output JSON data.
102
+
103
+ Outputs a JSON object containing:
104
+ - model_name
105
+ - model_status
106
+ - base_url
107
+ - pending_reason (if applicable)
108
+ - failed_reason (if applicable)
109
+ """
110
+ json_data = {
111
+ "model_name": self.status_info.model_name,
112
+ "model_status": self.status_info.server_status,
113
+ "base_url": self.status_info.base_url,
114
+ }
115
+ if self.status_info.pending_reason:
116
+ json_data["pending_reason"] = self.status_info.pending_reason
117
+ if self.status_info.failed_reason:
118
+ json_data["failed_reason"] = self.status_info.failed_reason
119
+ click.echo(json_data)
120
+
121
+ def output_table(self) -> Table:
122
+ """Create and display rich table.
123
+
124
+ Returns
125
+ -------
126
+ Table
127
+ Rich table containing formatted status information including:
128
+ - Model name
129
+ - Status
130
+ - Base URL
131
+ - Error information (if applicable)
132
+ """
133
+ table = create_table(key_title="Job Status", value_title="Value")
134
+ table.add_row("Model Name", self.status_info.model_name)
135
+ table.add_row("Model Status", self.status_info.server_status, style="blue")
136
+
137
+ if self.status_info.pending_reason:
138
+ table.add_row("Pending Reason", self.status_info.pending_reason)
139
+ if self.status_info.failed_reason:
140
+ table.add_row("Failed Reason", self.status_info.failed_reason)
141
+
142
+ table.add_row("Base URL", self.status_info.base_url)
143
+ return table
144
+
145
+
146
+ class MetricsResponseFormatter:
147
+ """CLI Helper class for formatting MetricsResponse.
148
+
149
+ A formatter class that handles the presentation of model metrics
150
+ in a table format.
151
+
152
+ Parameters
153
+ ----------
154
+ metrics : Union[dict[str, float], str]
155
+ Dictionary of metrics or error message
156
+ """
157
+
158
+ def __init__(self, metrics: Union[dict[str, float], str]):
159
+ self.metrics = self._set_metrics(metrics)
160
+ self.table = create_table("Metric", "Value")
161
+ self.enabled_prefix_caching = self._check_prefix_caching()
162
+
163
+ def _set_metrics(self, metrics: Union[dict[str, float], str]) -> dict[str, float]:
164
+ """Set the metrics attribute.
165
+
166
+ Parameters
167
+ ----------
168
+ metrics : Union[dict[str, float], str]
169
+ Raw metrics data
170
+
171
+ Returns
172
+ -------
173
+ dict[str, float]
174
+ Processed metrics dictionary
175
+ """
176
+ return metrics if isinstance(metrics, dict) else {}
177
+
178
+ def _check_prefix_caching(self) -> bool:
179
+ """Check if prefix caching is enabled.
180
+
181
+ Returns
182
+ -------
183
+ bool
184
+ True if prefix caching metrics are present
185
+ """
186
+ return self.metrics.get("gpu_prefix_cache_hit_rate") is not None
187
+
188
+ def format_failed_metrics(self, message: str) -> None:
189
+ """Format error message for failed metrics collection.
190
+
191
+ Parameters
192
+ ----------
193
+ message : str
194
+ Error message to display
195
+ """
196
+ self.table.add_row("ERROR", message)
197
+
198
+ def format_metrics(self) -> None:
199
+ """Format and display all available metrics.
200
+
201
+ Formats and adds to the table:
202
+ - Throughput metrics
203
+ - Request queue metrics
204
+ - Cache usage metrics
205
+ - Prefix cache metrics (if enabled)
206
+ - Latency metrics
207
+ - Token counts
208
+ """
209
+ # Throughput metrics
210
+ self.table.add_row(
211
+ "Prompt Throughput",
212
+ f"{self.metrics.get('prompt_tokens_per_sec', 0):.1f} tokens/s",
213
+ )
214
+ self.table.add_row(
215
+ "Generation Throughput",
216
+ f"{self.metrics.get('generation_tokens_per_sec', 0):.1f} tokens/s",
217
+ )
218
+
219
+ # Request queue metrics
220
+ self.table.add_row(
221
+ "Requests Running",
222
+ f"{self.metrics.get('requests_running', 0):.0f} reqs",
223
+ )
224
+ self.table.add_row(
225
+ "Requests Waiting",
226
+ f"{self.metrics.get('requests_waiting', 0):.0f} reqs",
227
+ )
228
+ self.table.add_row(
229
+ "Requests Swapped",
230
+ f"{self.metrics.get('requests_swapped', 0):.0f} reqs",
231
+ )
232
+
233
+ # Cache usage metrics
234
+ self.table.add_row(
235
+ "GPU Cache Usage",
236
+ f"{self.metrics.get('gpu_cache_usage', 0) * 100:.1f}%",
237
+ )
238
+ self.table.add_row(
239
+ "CPU Cache Usage",
240
+ f"{self.metrics.get('cpu_cache_usage', 0) * 100:.1f}%",
241
+ )
242
+
243
+ if self.enabled_prefix_caching:
244
+ self.table.add_row(
245
+ "GPU Prefix Cache Hit Rate",
246
+ f"{self.metrics.get('gpu_prefix_cache_hit_rate', 0) * 100:.1f}%",
247
+ )
248
+ self.table.add_row(
249
+ "CPU Prefix Cache Hit Rate",
250
+ f"{self.metrics.get('cpu_prefix_cache_hit_rate', 0) * 100:.1f}%",
251
+ )
252
+
253
+ # Show average latency if available
254
+ if "avg_request_latency" in self.metrics:
255
+ self.table.add_row(
256
+ "Avg Request Latency",
257
+ f"{self.metrics['avg_request_latency']:.1f} s",
258
+ )
259
+
260
+ # Token counts
261
+ self.table.add_row(
262
+ "Total Prompt Tokens",
263
+ f"{self.metrics.get('total_prompt_tokens', 0):.0f} tokens",
264
+ )
265
+ self.table.add_row(
266
+ "Total Generation Tokens",
267
+ f"{self.metrics.get('total_generation_tokens', 0):.0f} tokens",
268
+ )
269
+ self.table.add_row(
270
+ "Successful Requests",
271
+ f"{self.metrics.get('successful_requests_total', 0):.0f} reqs",
272
+ )
273
+
274
+
275
+ class ListCmdDisplay:
276
+ """CLI Helper class for displaying model listing functionality.
277
+
278
+ A display class that handles the presentation of model listings
279
+ in both table and JSON formats.
280
+
281
+ Parameters
282
+ ----------
283
+ console : Console
284
+ Rich console instance for output
285
+ json_mode : bool, default=False
286
+ Whether to output in JSON format
287
+ """
288
+
289
+ def __init__(self, console: Console, json_mode: bool = False):
290
+ self.console = console
291
+ self.json_mode = json_mode
292
+ self.model_config = None
293
+ self.model_names: list[str] = []
294
+
295
+ def _format_single_model_output(
296
+ self, config: ModelConfig
297
+ ) -> Union[dict[str, Any], Table]:
298
+ """Format output table for a single model.
299
+
300
+ Parameters
301
+ ----------
302
+ config : ModelConfig
303
+ Model configuration to format
304
+
305
+ Returns
306
+ -------
307
+ Union[dict[str, Any], Table]
308
+ Either a dictionary for JSON output or a Rich table
309
+ """
310
+ if self.json_mode:
311
+ # Exclude non-essential fields from JSON output
312
+ excluded = {"venv", "log_dir"}
313
+ config_dict = config.model_dump(exclude=excluded)
314
+ # Convert Path objects to strings
315
+ config_dict["model_weights_parent_dir"] = str(
316
+ config_dict["model_weights_parent_dir"]
317
+ )
318
+ return config_dict
319
+
320
+ table = create_table(key_title="Model Config", value_title="Value")
321
+ for field, value in config.model_dump().items():
322
+ if field not in {"venv", "log_dir", "vllm_args"}:
323
+ table.add_row(field, str(value))
324
+ if field == "vllm_args":
325
+ table.add_row("vLLM Arguments:", style="magenta")
326
+ for vllm_arg, vllm_value in value.items():
327
+ table.add_row(f" {vllm_arg}:", str(vllm_value))
328
+ return table
329
+
330
+ def _format_all_models_output(
331
+ self, model_infos: list[ModelInfo]
332
+ ) -> Union[list[str], list[Panel]]:
333
+ """Format output table for all models.
334
+
335
+ Parameters
336
+ ----------
337
+ model_infos : list[ModelInfo]
338
+ List of model information to format
339
+
340
+ Returns
341
+ -------
342
+ Union[list[str], list[Panel]]
343
+ Either a list of model names or a list of formatted panels
344
+
345
+ Notes
346
+ -----
347
+ Models are sorted by type priority and color-coded based on their type.
348
+ """
349
+ # Sort by model type priority
350
+ sorted_model_infos = sorted(
351
+ model_infos,
352
+ key=lambda x: MODEL_TYPE_PRIORITY.get(x.model_type, 4),
353
+ )
354
+
355
+ # Create panels with color coding
356
+ panels = []
357
+ for model_info in sorted_model_infos:
358
+ color = MODEL_TYPE_COLORS.get(model_info.model_type, "white")
359
+ variant = model_info.variant or ""
360
+ display_text = f"[magenta]{model_info.family}[/magenta]"
361
+ if variant:
362
+ display_text += f"-{variant}"
363
+ panels.append(Panel(display_text, expand=True, border_style=color))
364
+
365
+ return panels
366
+
367
+ def display_single_model_output(self, config: ModelConfig) -> None:
368
+ """Display the output for a single model.
369
+
370
+ Parameters
371
+ ----------
372
+ config : ModelConfig
373
+ Model configuration to display
374
+ """
375
+ output = self._format_single_model_output(config)
376
+ if self.json_mode:
377
+ click.echo(output)
378
+ else:
379
+ self.console.print(output)
380
+
381
+ def display_all_models_output(self, model_infos: list[ModelInfo]) -> None:
382
+ """Display the output for all models.
383
+
384
+ Parameters
385
+ ----------
386
+ model_infos : list[ModelInfo]
387
+ List of model information to display
388
+
389
+ Notes
390
+ -----
391
+ Output format depends on json_mode:
392
+ - JSON: List of model names
393
+ - Table: Color-coded panels with model information
394
+ """
395
+ if self.json_mode:
396
+ model_names = [info.name for info in model_infos]
397
+ click.echo(model_names)
398
+ else:
399
+ panels = self._format_all_models_output(model_infos)
400
+ self.console.print(Columns(panels, equal=True))
vec_inf/cli/_utils.py CHANGED
@@ -1,147 +1,38 @@
1
- """Utility functions for the CLI."""
1
+ """Helper functions for the CLI.
2
2
 
3
- import os
4
- import subprocess
5
- from typing import Dict, List, Optional, Tuple, Union, cast
3
+ This module provides utility functions for creating consistent table displays
4
+ in the command-line interface.
5
+ """
6
6
 
7
- import polars as pl
8
- import requests
9
7
  from rich.table import Table
10
8
 
11
9
 
12
- MODEL_READY_SIGNATURE = "INFO: Application startup complete."
13
- SERVER_ADDRESS_SIGNATURE = "Server address: "
14
-
15
-
16
- def run_bash_command(command: str) -> str:
17
- """Run a bash command and return the output."""
18
- process = subprocess.Popen(
19
- command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
20
- )
21
- stdout, _ = process.communicate()
22
- return stdout
23
-
24
-
25
- def read_slurm_log(
26
- slurm_job_name: str, slurm_job_id: int, slurm_log_type: str, log_dir: Optional[str]
27
- ) -> Union[list[str], str]:
28
- """Read the slurm log file."""
29
- if not log_dir:
30
- models_dir = os.path.join(os.path.expanduser("~"), ".vec-inf-logs")
31
-
32
- for directory in sorted(os.listdir(models_dir), key=len, reverse=True):
33
- if directory in slurm_job_name:
34
- log_dir = os.path.join(models_dir, directory)
35
- break
36
-
37
- log_dir = cast(str, log_dir)
38
-
39
- try:
40
- file_path = os.path.join(
41
- log_dir,
42
- f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}",
43
- )
44
- with open(file_path, "r") as file:
45
- lines = file.readlines()
46
- except FileNotFoundError:
47
- print(f"Could not find file: {file_path}")
48
- return "LOG_FILE_NOT_FOUND"
49
- return lines
50
-
51
-
52
- def is_server_running(
53
- slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
54
- ) -> Union[str, tuple[str, str]]:
55
- """Check if a model is ready to serve requests."""
56
- log_content = read_slurm_log(slurm_job_name, slurm_job_id, "err", log_dir)
57
- if isinstance(log_content, str):
58
- return log_content
59
-
60
- status: Union[str, tuple[str, str]] = "LAUNCHING"
61
-
62
- for line in log_content:
63
- if "error" in line.lower():
64
- status = ("FAILED", line.strip("\n"))
65
- if MODEL_READY_SIGNATURE in line:
66
- status = "RUNNING"
67
-
68
- return status
69
-
70
-
71
- def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
72
- """Get the base URL of a model."""
73
- log_content = read_slurm_log(slurm_job_name, slurm_job_id, "out", log_dir)
74
- if isinstance(log_content, str):
75
- return log_content
76
-
77
- for line in log_content:
78
- if SERVER_ADDRESS_SIGNATURE in line:
79
- return line.split(SERVER_ADDRESS_SIGNATURE)[1].strip("\n")
80
- return "URL_NOT_FOUND"
81
-
82
-
83
- def model_health_check(
84
- slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
85
- ) -> Tuple[str, Union[str, int]]:
86
- """Check the health of a running model on the cluster."""
87
- base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
88
- if not base_url.startswith("http"):
89
- return ("FAILED", base_url)
90
- health_check_url = base_url.replace("v1", "health")
91
-
92
- try:
93
- response = requests.get(health_check_url)
94
- # Check if the request was successful
95
- if response.status_code == 200:
96
- return ("READY", response.status_code)
97
- return ("FAILED", response.status_code)
98
- except requests.exceptions.RequestException as e:
99
- return ("FAILED", str(e))
100
-
101
-
102
10
  def create_table(
103
11
  key_title: str = "", value_title: str = "", show_header: bool = True
104
12
  ) -> Table:
105
- """Create a table for displaying model status."""
13
+ """Create a table for displaying model status.
14
+
15
+ Creates a two-column Rich table with consistent styling for displaying
16
+ key-value pairs in the CLI.
17
+
18
+ Parameters
19
+ ----------
20
+ key_title : str, default=""
21
+ Title for the key column
22
+ value_title : str, default=""
23
+ Title for the value column
24
+ show_header : bool, default=True
25
+ Whether to display column headers
26
+
27
+ Returns
28
+ -------
29
+ Table
30
+ Rich Table instance with configured styling:
31
+ - Headers in bold magenta
32
+ - Key column in dim style
33
+ - Value column in default style
34
+ """
106
35
  table = Table(show_header=show_header, header_style="bold magenta")
107
36
  table.add_column(key_title, style="dim")
108
37
  table.add_column(value_title)
109
38
  return table
110
-
111
-
112
- def load_models_df() -> pl.DataFrame:
113
- """Load the models dataframe."""
114
- return pl.read_csv(
115
- os.path.join(
116
- os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
117
- "models/models.csv",
118
- )
119
- )
120
-
121
-
122
- def load_default_args(models_df: pl.DataFrame, model_name: str) -> Dict[str, str]:
123
- """Load the default arguments for a model."""
124
- row_data = models_df.filter(models_df["model_name"] == model_name)
125
- default_args = row_data.to_dicts()[0]
126
- default_args.pop("model_name", None)
127
- return default_args
128
-
129
-
130
- def get_latest_metric(log_lines: List[str]) -> Union[str, Dict[str, str]]:
131
- """Read the latest metric entry from the log file."""
132
- latest_metric = {}
133
-
134
- try:
135
- for line in reversed(log_lines):
136
- if "Avg prompt throughput" in line:
137
- # Parse the metric values from the line
138
- metrics_str = line.split("] ")[1].strip().strip(".")
139
- metrics_list = metrics_str.split(", ")
140
- for metric in metrics_list:
141
- key, value = metric.split(": ")
142
- latest_metric[key] = value
143
- break
144
- except Exception as e:
145
- return f"[red]Error reading log file: {e}[/red]"
146
-
147
- return latest_metric
vec_inf/cli/_vars.py ADDED
@@ -0,0 +1,32 @@
1
+ """Constants for CLI rendering.
2
+
3
+ This module defines constant mappings for model type priorities and colors
4
+ used in the CLI display formatting.
5
+
6
+ Constants
7
+ ---------
8
+ MODEL_TYPE_PRIORITY : dict
9
+ Mapping of model types to their display priority (lower numbers shown first)
10
+
11
+ MODEL_TYPE_COLORS : dict
12
+ Mapping of model types to their display colors in Rich
13
+
14
+ Notes
15
+ -----
16
+ These constants are used primarily by the ListCmdDisplay class to ensure
17
+ consistent sorting and color coding of different model types in the CLI output.
18
+ """
19
+
20
+ MODEL_TYPE_PRIORITY = {
21
+ "LLM": 0,
22
+ "VLM": 1,
23
+ "Text_Embedding": 2,
24
+ "Reward_Modeling": 3,
25
+ }
26
+
27
+ MODEL_TYPE_COLORS = {
28
+ "LLM": "cyan",
29
+ "VLM": "bright_blue",
30
+ "Text_Embedding": "purple",
31
+ "Reward_Modeling": "bright_magenta",
32
+ }
@@ -0,0 +1,31 @@
1
+ """Programmatic API for Vector Inference.
2
+
3
+ This module provides a Python API for launching and managing inference servers
4
+ using `vec_inf`. It is an alternative to the command-line interface, and allows
5
+ users direct control over the lifecycle of inference servers via python scripts.
6
+ """
7
+
8
+ from vec_inf.client.api import VecInfClient
9
+ from vec_inf.client.config import ModelConfig
10
+ from vec_inf.client.models import (
11
+ LaunchOptions,
12
+ LaunchResponse,
13
+ MetricsResponse,
14
+ ModelInfo,
15
+ ModelStatus,
16
+ ModelType,
17
+ StatusResponse,
18
+ )
19
+
20
+
21
+ __all__ = [
22
+ "VecInfClient",
23
+ "LaunchResponse",
24
+ "StatusResponse",
25
+ "ModelInfo",
26
+ "MetricsResponse",
27
+ "ModelStatus",
28
+ "ModelType",
29
+ "LaunchOptions",
30
+ "ModelConfig",
31
+ ]