vec-inf 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vec_inf/README.md CHANGED
@@ -1,9 +1,23 @@
1
- # `vec-inf` Commands
1
+ ## `vec-inf` CLI Commands
2
2
 
3
- * `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server, `--json-mode` supported.
4
- * `status`: Check the model status by providing its Slurm job ID, `--json-mode` supported.
3
+ * `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server.
4
+ * `batch-launch`: Specify a list of models to launch multiple OpenAI compatible inference servers at the same time.
5
+ * `status`: Check the model status by providing its Slurm job ID.
5
6
  * `metrics`: Streams performance metrics to the console.
6
7
  * `shutdown`: Shutdown a model by providing its Slurm job ID.
7
- * `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported.
8
+ * `list`: List all available model names, or view the default/cached configuration of a specific model.
9
+ * `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, `--job-id`, and/or `--before-job-id`. Use `--dry-run` to preview what would be deleted.
8
10
 
9
11
  Use `--help` to see all available options
12
+
13
+ ## `VecInfClient` API
14
+
15
+ * `launch_model`: Launch an OpenAI compatible inference server.
16
+ * `batch_launch_models`: Launch multiple OpenAI compatible inference servers.
17
+ * `get_status`: Get the status of a running model.
18
+ * `get_metrics`: Get the performance metrics of a running model.
19
+ * `shutdown_model`: Shutdown a running model.
20
+ * `list_models`" List all available models.
21
+ * `get_model_config`: Get the configuration for a specific model.
22
+ * `wait_until_ready`: Wait until a model is ready or fails.
23
+ * `cleanup_logs`: Remove logs from the log directory.
vec_inf/cli/_cli.py CHANGED
@@ -18,6 +18,7 @@ metrics
18
18
  Stream real-time performance metrics
19
19
  """
20
20
 
21
+ import json
21
22
  import time
22
23
  from typing import Optional, Union
23
24
 
@@ -26,6 +27,7 @@ from rich.console import Console
26
27
  from rich.live import Live
27
28
 
28
29
  from vec_inf.cli._helper import (
30
+ BatchLaunchResponseFormatter,
29
31
  LaunchResponseFormatter,
30
32
  ListCmdDisplay,
31
33
  MetricsResponseFormatter,
@@ -43,14 +45,19 @@ def cli() -> None:
43
45
  pass
44
46
 
45
47
 
46
- @cli.command("launch")
48
+ @cli.command("launch", help="Launch a model on the cluster.")
47
49
  @click.argument("model-name", type=str, nargs=1)
48
50
  @click.option("--model-family", type=str, help="The model family")
49
51
  @click.option("--model-variant", type=str, help="The model variant")
50
52
  @click.option(
51
53
  "--partition",
52
54
  type=str,
53
- help="Type of compute partition",
55
+ help="Type of Slurm partition",
56
+ )
57
+ @click.option(
58
+ "--resource-type",
59
+ type=str,
60
+ help="Type of resource to request for the job",
54
61
  )
55
62
  @click.option(
56
63
  "--num-nodes",
@@ -64,14 +71,36 @@ def cli() -> None:
64
71
  )
65
72
  @click.option(
66
73
  "--account",
74
+ "-A",
67
75
  type=str,
68
76
  help="Charge resources used by this job to specified account.",
69
77
  )
78
+ @click.option(
79
+ "--work-dir",
80
+ "-D",
81
+ type=str,
82
+ help="Set working directory for the batch job",
83
+ )
70
84
  @click.option(
71
85
  "--qos",
72
86
  type=str,
73
87
  help="Quality of service",
74
88
  )
89
+ @click.option(
90
+ "--exclude",
91
+ type=str,
92
+ help="Exclude certain nodes from the resources granted to the job",
93
+ )
94
+ @click.option(
95
+ "--nodelist",
96
+ type=str,
97
+ help="Request a specific list of nodes for deployment",
98
+ )
99
+ @click.option(
100
+ "--bind",
101
+ type=str,
102
+ help="Additional binds for the container as a comma separated list of bind paths",
103
+ )
75
104
  @click.option(
76
105
  "--time",
77
106
  type=str,
@@ -102,6 +131,16 @@ def cli() -> None:
102
131
  is_flag=True,
103
132
  help="Output in JSON string",
104
133
  )
134
+ @click.option(
135
+ "--env",
136
+ type=str,
137
+ help="Environment variables to be set. Seperate variables with commas. Can also include path to a file containing environment variables seperated by newlines. e.g. --env 'TRITON_CACHE_DIR=/scratch/.cache/triton,my_custom_vars_file.env'",
138
+ )
139
+ @click.option(
140
+ "--config",
141
+ type=str,
142
+ help="Path to a model config yaml file to use in place of the default",
143
+ )
105
144
  def launch(
106
145
  model_name: str,
107
146
  **cli_kwargs: Optional[Union[str, int, float, bool]],
@@ -119,13 +158,25 @@ def launch(
119
158
  - model_variant : str, optional
120
159
  Specific variant of the model
121
160
  - partition : str, optional
122
- Type of compute partition
161
+ Type of Slurm partition
162
+ - resource_type : str, optional
163
+ Type of resource to request for the job
123
164
  - num_nodes : int, optional
124
165
  Number of nodes to use
125
166
  - gpus_per_node : int, optional
126
167
  Number of GPUs per node
168
+ - account : str, optional
169
+ Charge resources used by this job to specified account
170
+ - work_dir : str, optional
171
+ Set working directory for the batch job
127
172
  - qos : str, optional
128
173
  Quality of service tier
174
+ - exclude : str, optional
175
+ Exclude certain nodes from the resources granted to the job
176
+ - nodelist : str, optional
177
+ Request a specific list of nodes for deployment
178
+ - bind : str, optional
179
+ Additional binds for the container as a comma separated list of bind paths
129
180
  - time : str, optional
130
181
  Time limit for job
131
182
  - venv : str, optional
@@ -136,6 +187,10 @@ def launch(
136
187
  Path to model weights directory
137
188
  - vllm_args : str, optional
138
189
  vLLM engine arguments
190
+ - env : str, optional
191
+ Environment variables
192
+ - config : str, optional
193
+ Path to custom model config yaml file
139
194
  - json_mode : bool, optional
140
195
  Output in JSON format
141
196
 
@@ -156,10 +211,12 @@ def launch(
156
211
  launch_response = client.launch_model(model_name, launch_options)
157
212
 
158
213
  # Display launch information
159
- launch_formatter = LaunchResponseFormatter(model_name, launch_response.config)
160
214
  if json_mode:
161
- click.echo(launch_response.config)
215
+ click.echo(json.dumps(launch_response.config))
162
216
  else:
217
+ launch_formatter = LaunchResponseFormatter(
218
+ model_name, launch_response.config
219
+ )
163
220
  launch_info_table = launch_formatter.format_table_output()
164
221
  CONSOLE.print(launch_info_table)
165
222
 
@@ -169,29 +226,93 @@ def launch(
169
226
  raise click.ClickException(f"Launch failed: {str(e)}") from e
170
227
 
171
228
 
172
- @cli.command("status")
173
- @click.argument("slurm_job_id", type=int, nargs=1)
229
+ @cli.command(
230
+ "batch-launch",
231
+ help="Launch multiple models in a batch, separate model names with spaces.",
232
+ )
233
+ @click.argument("model-names", type=str, nargs=-1)
174
234
  @click.option(
175
- "--log-dir",
235
+ "--batch-config",
236
+ type=str,
237
+ help="Model configuration for batch launch",
238
+ )
239
+ @click.option(
240
+ "--account",
241
+ "-A",
176
242
  type=str,
177
- help="Path to slurm log directory. This is required if --log-dir was set in model launch",
243
+ help="Charge resources used by this job to specified account.",
244
+ )
245
+ @click.option(
246
+ "--work-dir",
247
+ "-D",
248
+ type=str,
249
+ help="Set working directory for the batch job",
178
250
  )
179
251
  @click.option(
180
252
  "--json-mode",
181
253
  is_flag=True,
182
254
  help="Output in JSON string",
183
255
  )
184
- def status(
185
- slurm_job_id: int, log_dir: Optional[str] = None, json_mode: bool = False
256
+ def batch_launch(
257
+ model_names: tuple[str, ...],
258
+ batch_config: Optional[str] = None,
259
+ account: Optional[str] = None,
260
+ work_dir: Optional[str] = None,
261
+ json_mode: Optional[bool] = False,
186
262
  ) -> None:
263
+ """Launch multiple models in a batch.
264
+
265
+ Parameters
266
+ ----------
267
+ model_names : tuple[str, ...]
268
+ Names of the models to launch
269
+ batch_config : str
270
+ Model configuration for batch launch
271
+ json_mode : bool, default=False
272
+ Whether to output in JSON format
273
+
274
+ Raises
275
+ ------
276
+ click.ClickException
277
+ If batch launch fails
278
+ """
279
+ try:
280
+ # Start the client and launch models in batch mode
281
+ client = VecInfClient()
282
+ batch_launch_response = client.batch_launch_models(
283
+ list(model_names), batch_config, account, work_dir
284
+ )
285
+
286
+ # Display batch launch information
287
+ if json_mode:
288
+ click.echo(json.dumps(batch_launch_response.config, indent=4))
289
+ else:
290
+ batch_launch_formatter = BatchLaunchResponseFormatter(
291
+ batch_launch_response.config
292
+ )
293
+ batch_launch_info_table = batch_launch_formatter.format_table_output()
294
+ CONSOLE.print(batch_launch_info_table)
295
+
296
+ except click.ClickException as e:
297
+ raise e
298
+ except Exception as e:
299
+ raise click.ClickException(f"Batch launch failed: {str(e)}") from e
300
+
301
+
302
+ @cli.command("status", help="Check the status of a running model on the cluster.")
303
+ @click.argument("slurm_job_id", type=str, nargs=1)
304
+ @click.option(
305
+ "--json-mode",
306
+ is_flag=True,
307
+ help="Output in JSON string",
308
+ )
309
+ def status(slurm_job_id: str, json_mode: bool = False) -> None:
187
310
  """Get the status of a running model on the cluster.
188
311
 
189
312
  Parameters
190
313
  ----------
191
- slurm_job_id : int
314
+ slurm_job_id : str
192
315
  ID of the SLURM job to check
193
- log_dir : str, optional
194
- Path to SLURM log directory
195
316
  json_mode : bool, default=False
196
317
  Whether to output in JSON format
197
318
 
@@ -203,7 +324,7 @@ def status(
203
324
  try:
204
325
  # Start the client and get model inference server status
205
326
  client = VecInfClient()
206
- status_response = client.get_status(slurm_job_id, log_dir)
327
+ status_response = client.get_status(slurm_job_id)
207
328
  # Display status information
208
329
  status_formatter = StatusResponseFormatter(status_response)
209
330
  if json_mode:
@@ -218,14 +339,14 @@ def status(
218
339
  raise click.ClickException(f"Status check failed: {str(e)}") from e
219
340
 
220
341
 
221
- @cli.command("shutdown")
222
- @click.argument("slurm_job_id", type=int, nargs=1)
223
- def shutdown(slurm_job_id: int) -> None:
342
+ @cli.command("shutdown", help="Shutdown a running model on the cluster.")
343
+ @click.argument("slurm_job_id", type=str, nargs=1)
344
+ def shutdown(slurm_job_id: str) -> None:
224
345
  """Shutdown a running model on the cluster.
225
346
 
226
347
  Parameters
227
348
  ----------
228
- slurm_job_id : int
349
+ slurm_job_id : str
229
350
  ID of the SLURM job to shut down
230
351
 
231
352
  Raises
@@ -241,7 +362,7 @@ def shutdown(slurm_job_id: int) -> None:
241
362
  raise click.ClickException(f"Shutdown failed: {str(e)}") from e
242
363
 
243
364
 
244
- @cli.command("list")
365
+ @cli.command("list", help="List available models or get specific model configuration.")
245
366
  @click.argument("model-name", required=False)
246
367
  @click.option(
247
368
  "--json-mode",
@@ -279,20 +400,17 @@ def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> No
279
400
  raise click.ClickException(f"List models failed: {str(e)}") from e
280
401
 
281
402
 
282
- @cli.command("metrics")
283
- @click.argument("slurm_job_id", type=int, nargs=1)
284
- @click.option(
285
- "--log-dir", type=str, help="Path to slurm log directory (if used during launch)"
403
+ @cli.command(
404
+ "metrics", help="Stream real-time performance metrics from the model endpoint."
286
405
  )
287
- def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
406
+ @click.argument("slurm_job_id", type=str, nargs=1)
407
+ def metrics(slurm_job_id: str) -> None:
288
408
  """Stream real-time performance metrics from the model endpoint.
289
409
 
290
410
  Parameters
291
411
  ----------
292
- slurm_job_id : int
412
+ slurm_job_id : str
293
413
  ID of the SLURM job to monitor
294
- log_dir : str, optional
295
- Path to SLURM log directory
296
414
 
297
415
  Raises
298
416
  ------
@@ -308,7 +426,7 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
308
426
  try:
309
427
  # Start the client and get inference server metrics
310
428
  client = VecInfClient()
311
- metrics_response = client.get_metrics(slurm_job_id, log_dir)
429
+ metrics_response = client.get_metrics(slurm_job_id)
312
430
  metrics_formatter = MetricsResponseFormatter(metrics_response.metrics)
313
431
 
314
432
  # Check if metrics response is ready
@@ -319,7 +437,7 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
319
437
 
320
438
  with Live(refresh_per_second=1, console=CONSOLE) as live:
321
439
  while True:
322
- metrics_response = client.get_metrics(slurm_job_id, log_dir)
440
+ metrics_response = client.get_metrics(slurm_job_id)
323
441
  metrics_formatter = MetricsResponseFormatter(metrics_response.metrics)
324
442
 
325
443
  if isinstance(metrics_response.metrics, str):
@@ -336,5 +454,69 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
336
454
  raise click.ClickException(f"Metrics check failed: {str(e)}") from e
337
455
 
338
456
 
457
+ @cli.command("cleanup", help="Clean up log files based on optional filters.")
458
+ @click.option("--log-dir", type=str, help="Path to SLURM log directory")
459
+ @click.option("--model-family", type=str, help="Filter by model family")
460
+ @click.option("--model-name", type=str, help="Filter by model name")
461
+ @click.option(
462
+ "--job-id", type=int, help="Only remove logs with this exact SLURM job ID"
463
+ )
464
+ @click.option(
465
+ "--before-job-id",
466
+ type=int,
467
+ help="Remove logs with job ID less than this value",
468
+ )
469
+ @click.option("--dry-run", is_flag=True, help="List matching logs without deleting")
470
+ def cleanup_logs_cli(
471
+ log_dir: Optional[str],
472
+ model_family: Optional[str],
473
+ model_name: Optional[str],
474
+ job_id: Optional[int],
475
+ before_job_id: Optional[int],
476
+ dry_run: bool,
477
+ ) -> None:
478
+ """Clean up log files based on optional filters.
479
+
480
+ Parameters
481
+ ----------
482
+ log_dir : str or Path, optional
483
+ Root directory containing log files. Defaults to ~/.vec-inf-logs.
484
+ model_family : str, optional
485
+ Only delete logs for this model family.
486
+ model_name : str, optional
487
+ Only delete logs for this model name.
488
+ job_id : int, optional
489
+ If provided, only match directories with this exact SLURM job ID.
490
+ before_job_id : int, optional
491
+ If provided, only delete logs with job ID less than this value.
492
+ dry_run : bool
493
+ If True, return matching files without deleting them.
494
+ """
495
+ try:
496
+ client = VecInfClient()
497
+ matched = client.cleanup_logs(
498
+ log_dir=log_dir,
499
+ model_family=model_family,
500
+ model_name=model_name,
501
+ job_id=job_id,
502
+ before_job_id=before_job_id,
503
+ dry_run=dry_run,
504
+ )
505
+
506
+ if not matched:
507
+ if dry_run:
508
+ click.echo("Dry run: no matching log directories found.")
509
+ else:
510
+ click.echo("No matching log directories were deleted.")
511
+ elif dry_run:
512
+ click.echo(f"Dry run: {len(matched)} directories would be deleted:")
513
+ for f in matched:
514
+ click.echo(f" - {f}")
515
+ else:
516
+ click.echo(f"Deleted {len(matched)} log directory(ies).")
517
+ except Exception as e:
518
+ raise click.ClickException(f"Cleanup failed: {str(e)}") from e
519
+
520
+
339
521
  if __name__ == "__main__":
340
522
  cli()
vec_inf/cli/_helper.py CHANGED
@@ -4,6 +4,7 @@ This module provides formatting and display classes for the command-line interfa
4
4
  handling the presentation of model information, status updates, and metrics.
5
5
  """
6
6
 
7
+ import json
7
8
  from pathlib import Path
8
9
  from typing import Any, Union
9
10
 
@@ -27,9 +28,8 @@ class LaunchResponseFormatter:
27
28
  Parameters
28
29
  ----------
29
30
  model_name : str
30
- Name of the launched model
31
- params : dict[str, Any]
32
- Launch parameters and configuration
31
+ Name of the launched model params : dict[str, Any] Launch parameters and
32
+ configuration
33
33
  """
34
34
 
35
35
  def __init__(self, model_name: str, params: dict[str, Any]):
@@ -59,8 +59,16 @@ class LaunchResponseFormatter:
59
59
  table.add_row("Vocabulary Size", self.params["vocab_size"])
60
60
 
61
61
  # Add resource allocation details
62
- table.add_row("Partition", self.params["partition"])
63
- table.add_row("QoS", self.params["qos"])
62
+ if self.params.get("account"):
63
+ table.add_row("Account", self.params["account"])
64
+ if self.params.get("work_dir"):
65
+ table.add_row("Working Directory", self.params["work_dir"])
66
+ if self.params.get("resource_type"):
67
+ table.add_row("Resource Type", self.params["resource_type"])
68
+ if self.params.get("partition"):
69
+ table.add_row("Partition", self.params["partition"])
70
+ if self.params.get("qos"):
71
+ table.add_row("QoS", self.params["qos"])
64
72
  table.add_row("Time Limit", self.params["time"])
65
73
  table.add_row("Num Nodes", self.params["num_nodes"])
66
74
  table.add_row("GPUs/Node", self.params["gpus_per_node"])
@@ -79,6 +87,80 @@ class LaunchResponseFormatter:
79
87
  for arg, value in self.params["vllm_args"].items():
80
88
  table.add_row(f" {arg}:", str(value))
81
89
 
90
+ # Add Environment Variable Configuration Details
91
+ table.add_row("Environment Variables", style="magenta")
92
+ for arg, value in self.params["env"].items():
93
+ table.add_row(f" {arg}:", str(value))
94
+
95
+ return table
96
+
97
+
98
+ class BatchLaunchResponseFormatter:
99
+ """CLI Helper class for formatting BatchLaunchResponse.
100
+
101
+ A formatter class that handles the presentation of batch launch information
102
+ in both table and JSON formats.
103
+
104
+ Parameters
105
+ ----------
106
+ params : dict[str, Any]
107
+ Configuration for the batch launch
108
+ """
109
+
110
+ def __init__(self, params: dict[str, Any]):
111
+ self.params = params
112
+
113
+ def format_table_output(self) -> Table:
114
+ """Format output as rich Table.
115
+
116
+ Returns
117
+ -------
118
+ Table
119
+ Rich table containing formatted batch launch information including:
120
+ - Job configuration
121
+ - Model details
122
+ - Resource allocation
123
+ - vLLM configuration
124
+ """
125
+ table = create_table(key_title="Job Config", value_title="Value")
126
+ # Add key information with consistent styling
127
+ table.add_row("Slurm Job ID", self.params["slurm_job_id"], style="blue")
128
+ table.add_row("Slurm Job Name", self.params["slurm_job_name"], style="blue")
129
+ if self.params.get("account"):
130
+ table.add_row("Account", self.params["account"], style="blue")
131
+ if self.params.get("work_dir"):
132
+ table.add_row("Working Directory", self.params["work_dir"], style="blue")
133
+ table.add_row("Log Directory", self.params["log_dir"], style="blue")
134
+ for model_name in self.params["models"]:
135
+ table.add_row("Model Name", model_name, style="magenta")
136
+ # Add resource allocation details
137
+ if self.params["models"][model_name].get("resource_type"):
138
+ table.add_row(
139
+ "Resource Type",
140
+ f" {self.params['models'][model_name]['resource_type']}",
141
+ )
142
+ if self.params["models"][model_name].get("partition"):
143
+ table.add_row(
144
+ "Partition", f" {self.params['models'][model_name]['partition']}"
145
+ )
146
+ if self.params["models"][model_name].get("qos"):
147
+ table.add_row("QoS", f" {self.params['models'][model_name]['qos']}")
148
+ table.add_row(
149
+ "Time Limit", f" {self.params['models'][model_name]['time']}"
150
+ )
151
+ table.add_row(
152
+ "Num Nodes", f" {self.params['models'][model_name]['num_nodes']}"
153
+ )
154
+ table.add_row(
155
+ "GPUs/Node", f" {self.params['models'][model_name]['gpus_per_node']}"
156
+ )
157
+ table.add_row(
158
+ "CPUs/Task", f" {self.params['models'][model_name]['cpus_per_task']}"
159
+ )
160
+ table.add_row(
161
+ "Memory/Node", f" {self.params['models'][model_name]['mem_per_node']}"
162
+ )
163
+
82
164
  return table
83
165
 
84
166
 
@@ -116,7 +198,8 @@ class StatusResponseFormatter:
116
198
  json_data["pending_reason"] = self.status_info.pending_reason
117
199
  if self.status_info.failed_reason:
118
200
  json_data["failed_reason"] = self.status_info.failed_reason
119
- click.echo(json_data)
201
+
202
+ click.echo(json.dumps(json_data, indent=4))
120
203
 
121
204
  def output_table(self) -> Table:
122
205
  """Create and display rich table.
@@ -292,9 +375,7 @@ class ListCmdDisplay:
292
375
  self.model_config = None
293
376
  self.model_names: list[str] = []
294
377
 
295
- def _format_single_model_output(
296
- self, config: ModelConfig
297
- ) -> Union[dict[str, Any], Table]:
378
+ def _format_single_model_output(self, config: ModelConfig) -> Union[str, Table]:
298
379
  """Format output table for a single model.
299
380
 
300
381
  Parameters
@@ -304,8 +385,8 @@ class ListCmdDisplay:
304
385
 
305
386
  Returns
306
387
  -------
307
- Union[dict[str, Any], Table]
308
- Either a dictionary for JSON output or a Rich table
388
+ Union[str, Table]
389
+ Either a JSON string for JSON output or a Rich table
309
390
  """
310
391
  if self.json_mode:
311
392
  # Exclude non-essential fields from JSON output
@@ -315,11 +396,11 @@ class ListCmdDisplay:
315
396
  config_dict["model_weights_parent_dir"] = str(
316
397
  config_dict["model_weights_parent_dir"]
317
398
  )
318
- return config_dict
399
+ return json.dumps(config_dict, indent=4)
319
400
 
320
401
  table = create_table(key_title="Model Config", value_title="Value")
321
402
  for field, value in config.model_dump().items():
322
- if field not in {"venv", "log_dir", "vllm_args"}:
403
+ if field not in {"venv", "log_dir", "vllm_args"} and value:
323
404
  table.add_row(field, str(value))
324
405
  if field == "vllm_args":
325
406
  table.add_row("vLLM Arguments:", style="magenta")
@@ -394,7 +475,7 @@ class ListCmdDisplay:
394
475
  """
395
476
  if self.json_mode:
396
477
  model_names = [info.name for info in model_infos]
397
- click.echo(model_names)
478
+ click.echo(json.dumps(model_names, indent=4))
398
479
  else:
399
480
  panels = self._format_all_models_output(model_infos)
400
481
  self.console.print(Columns(panels, equal=True))