vec-inf 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vec_inf/README.md CHANGED
@@ -1,9 +1,23 @@
1
- # `vec-inf` Commands
1
+ ## `vec-inf` CLI Commands
2
2
 
3
- * `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server, `--json-mode` supported.
4
- * `status`: Check the model status by providing its Slurm job ID, `--json-mode` supported.
3
+ * `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server.
4
+ * `batch-launch`: Specify a list of models to launch multiple OpenAI compatible inference servers at the same time.
5
+ * `status`: Check the model status by providing its Slurm job ID.
5
6
  * `metrics`: Streams performance metrics to the console.
6
7
  * `shutdown`: Shutdown a model by providing its Slurm job ID.
7
- * `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported.
8
+ * `list`: List all available model names, or view the default/cached configuration of a specific model.
9
+ * `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, `--job-id`, and/or `--before-job-id`. Use `--dry-run` to preview what would be deleted.
8
10
 
9
11
  Use `--help` to see all available options
12
+
13
+ ## `VecInfClient` API
14
+
15
+ * `launch_model`: Launch an OpenAI compatible inference server.
16
+ * `batch_launch_models`: Launch multiple OpenAI compatible inference servers.
17
+ * `get_status`: Get the status of a running model.
18
+ * `get_metrics`: Get the performance metrics of a running model.
19
+ * `shutdown_model`: Shutdown a running model.
20
+ * `list_models`" List all available models.
21
+ * `get_model_config`: Get the configuration for a specific model.
22
+ * `wait_until_ready`: Wait until a model is ready or fails.
23
+ * `cleanup_logs`: Remove logs from the log directory.
vec_inf/cli/_cli.py CHANGED
@@ -27,6 +27,7 @@ from rich.console import Console
27
27
  from rich.live import Live
28
28
 
29
29
  from vec_inf.cli._helper import (
30
+ BatchLaunchResponseFormatter,
30
31
  LaunchResponseFormatter,
31
32
  ListCmdDisplay,
32
33
  MetricsResponseFormatter,
@@ -44,14 +45,19 @@ def cli() -> None:
44
45
  pass
45
46
 
46
47
 
47
- @cli.command("launch")
48
+ @cli.command("launch", help="Launch a model on the cluster.")
48
49
  @click.argument("model-name", type=str, nargs=1)
49
50
  @click.option("--model-family", type=str, help="The model family")
50
51
  @click.option("--model-variant", type=str, help="The model variant")
51
52
  @click.option(
52
53
  "--partition",
53
54
  type=str,
54
- help="Type of compute partition",
55
+ help="Type of Slurm partition",
56
+ )
57
+ @click.option(
58
+ "--resource-type",
59
+ type=str,
60
+ help="Type of resource to request for the job",
55
61
  )
56
62
  @click.option(
57
63
  "--num-nodes",
@@ -65,9 +71,16 @@ def cli() -> None:
65
71
  )
66
72
  @click.option(
67
73
  "--account",
74
+ "-A",
68
75
  type=str,
69
76
  help="Charge resources used by this job to specified account.",
70
77
  )
78
+ @click.option(
79
+ "--work-dir",
80
+ "-D",
81
+ type=str,
82
+ help="Set working directory for the batch job",
83
+ )
71
84
  @click.option(
72
85
  "--qos",
73
86
  type=str,
@@ -79,14 +92,14 @@ def cli() -> None:
79
92
  help="Exclude certain nodes from the resources granted to the job",
80
93
  )
81
94
  @click.option(
82
- "--node-list",
95
+ "--nodelist",
83
96
  type=str,
84
97
  help="Request a specific list of nodes for deployment",
85
98
  )
86
99
  @click.option(
87
100
  "--bind",
88
101
  type=str,
89
- help="Additional binds for the singularity container as a comma separated list of bind paths",
102
+ help="Additional binds for the container as a comma separated list of bind paths",
90
103
  )
91
104
  @click.option(
92
105
  "--time",
@@ -118,6 +131,16 @@ def cli() -> None:
118
131
  is_flag=True,
119
132
  help="Output in JSON string",
120
133
  )
134
+ @click.option(
135
+ "--env",
136
+ type=str,
137
+ help="Environment variables to be set. Seperate variables with commas. Can also include path to a file containing environment variables seperated by newlines. e.g. --env 'TRITON_CACHE_DIR=/scratch/.cache/triton,my_custom_vars_file.env'",
138
+ )
139
+ @click.option(
140
+ "--config",
141
+ type=str,
142
+ help="Path to a model config yaml file to use in place of the default",
143
+ )
121
144
  def launch(
122
145
  model_name: str,
123
146
  **cli_kwargs: Optional[Union[str, int, float, bool]],
@@ -135,21 +158,25 @@ def launch(
135
158
  - model_variant : str, optional
136
159
  Specific variant of the model
137
160
  - partition : str, optional
138
- Type of compute partition
161
+ Type of Slurm partition
162
+ - resource_type : str, optional
163
+ Type of resource to request for the job
139
164
  - num_nodes : int, optional
140
165
  Number of nodes to use
141
166
  - gpus_per_node : int, optional
142
167
  Number of GPUs per node
143
168
  - account : str, optional
144
169
  Charge resources used by this job to specified account
170
+ - work_dir : str, optional
171
+ Set working directory for the batch job
145
172
  - qos : str, optional
146
173
  Quality of service tier
147
174
  - exclude : str, optional
148
175
  Exclude certain nodes from the resources granted to the job
149
- - node_list : str, optional
176
+ - nodelist : str, optional
150
177
  Request a specific list of nodes for deployment
151
178
  - bind : str, optional
152
- Additional binds for the singularity container
179
+ Additional binds for the container as a comma separated list of bind paths
153
180
  - time : str, optional
154
181
  Time limit for job
155
182
  - venv : str, optional
@@ -160,6 +187,10 @@ def launch(
160
187
  Path to model weights directory
161
188
  - vllm_args : str, optional
162
189
  vLLM engine arguments
190
+ - env : str, optional
191
+ Environment variables
192
+ - config : str, optional
193
+ Path to custom model config yaml file
163
194
  - json_mode : bool, optional
164
195
  Output in JSON format
165
196
 
@@ -180,11 +211,12 @@ def launch(
180
211
  launch_response = client.launch_model(model_name, launch_options)
181
212
 
182
213
  # Display launch information
183
- launch_formatter = LaunchResponseFormatter(model_name, launch_response.config)
184
-
185
214
  if json_mode:
186
215
  click.echo(json.dumps(launch_response.config))
187
216
  else:
217
+ launch_formatter = LaunchResponseFormatter(
218
+ model_name, launch_response.config
219
+ )
188
220
  launch_info_table = launch_formatter.format_table_output()
189
221
  CONSOLE.print(launch_info_table)
190
222
 
@@ -194,29 +226,93 @@ def launch(
194
226
  raise click.ClickException(f"Launch failed: {str(e)}") from e
195
227
 
196
228
 
197
- @cli.command("status")
198
- @click.argument("slurm_job_id", type=int, nargs=1)
229
+ @cli.command(
230
+ "batch-launch",
231
+ help="Launch multiple models in a batch, separate model names with spaces.",
232
+ )
233
+ @click.argument("model-names", type=str, nargs=-1)
199
234
  @click.option(
200
- "--log-dir",
235
+ "--batch-config",
236
+ type=str,
237
+ help="Model configuration for batch launch",
238
+ )
239
+ @click.option(
240
+ "--account",
241
+ "-A",
201
242
  type=str,
202
- help="Path to slurm log directory. This is required if --log-dir was set in model launch",
243
+ help="Charge resources used by this job to specified account.",
244
+ )
245
+ @click.option(
246
+ "--work-dir",
247
+ "-D",
248
+ type=str,
249
+ help="Set working directory for the batch job",
203
250
  )
204
251
  @click.option(
205
252
  "--json-mode",
206
253
  is_flag=True,
207
254
  help="Output in JSON string",
208
255
  )
209
- def status(
210
- slurm_job_id: int, log_dir: Optional[str] = None, json_mode: bool = False
256
+ def batch_launch(
257
+ model_names: tuple[str, ...],
258
+ batch_config: Optional[str] = None,
259
+ account: Optional[str] = None,
260
+ work_dir: Optional[str] = None,
261
+ json_mode: Optional[bool] = False,
211
262
  ) -> None:
263
+ """Launch multiple models in a batch.
264
+
265
+ Parameters
266
+ ----------
267
+ model_names : tuple[str, ...]
268
+ Names of the models to launch
269
+ batch_config : str
270
+ Model configuration for batch launch
271
+ json_mode : bool, default=False
272
+ Whether to output in JSON format
273
+
274
+ Raises
275
+ ------
276
+ click.ClickException
277
+ If batch launch fails
278
+ """
279
+ try:
280
+ # Start the client and launch models in batch mode
281
+ client = VecInfClient()
282
+ batch_launch_response = client.batch_launch_models(
283
+ list(model_names), batch_config, account, work_dir
284
+ )
285
+
286
+ # Display batch launch information
287
+ if json_mode:
288
+ click.echo(json.dumps(batch_launch_response.config, indent=4))
289
+ else:
290
+ batch_launch_formatter = BatchLaunchResponseFormatter(
291
+ batch_launch_response.config
292
+ )
293
+ batch_launch_info_table = batch_launch_formatter.format_table_output()
294
+ CONSOLE.print(batch_launch_info_table)
295
+
296
+ except click.ClickException as e:
297
+ raise e
298
+ except Exception as e:
299
+ raise click.ClickException(f"Batch launch failed: {str(e)}") from e
300
+
301
+
302
+ @cli.command("status", help="Check the status of a running model on the cluster.")
303
+ @click.argument("slurm_job_id", type=str, nargs=1)
304
+ @click.option(
305
+ "--json-mode",
306
+ is_flag=True,
307
+ help="Output in JSON string",
308
+ )
309
+ def status(slurm_job_id: str, json_mode: bool = False) -> None:
212
310
  """Get the status of a running model on the cluster.
213
311
 
214
312
  Parameters
215
313
  ----------
216
- slurm_job_id : int
314
+ slurm_job_id : str
217
315
  ID of the SLURM job to check
218
- log_dir : str, optional
219
- Path to SLURM log directory
220
316
  json_mode : bool, default=False
221
317
  Whether to output in JSON format
222
318
 
@@ -228,7 +324,7 @@ def status(
228
324
  try:
229
325
  # Start the client and get model inference server status
230
326
  client = VecInfClient()
231
- status_response = client.get_status(slurm_job_id, log_dir)
327
+ status_response = client.get_status(slurm_job_id)
232
328
  # Display status information
233
329
  status_formatter = StatusResponseFormatter(status_response)
234
330
  if json_mode:
@@ -243,14 +339,14 @@ def status(
243
339
  raise click.ClickException(f"Status check failed: {str(e)}") from e
244
340
 
245
341
 
246
- @cli.command("shutdown")
247
- @click.argument("slurm_job_id", type=int, nargs=1)
248
- def shutdown(slurm_job_id: int) -> None:
342
+ @cli.command("shutdown", help="Shutdown a running model on the cluster.")
343
+ @click.argument("slurm_job_id", type=str, nargs=1)
344
+ def shutdown(slurm_job_id: str) -> None:
249
345
  """Shutdown a running model on the cluster.
250
346
 
251
347
  Parameters
252
348
  ----------
253
- slurm_job_id : int
349
+ slurm_job_id : str
254
350
  ID of the SLURM job to shut down
255
351
 
256
352
  Raises
@@ -266,7 +362,7 @@ def shutdown(slurm_job_id: int) -> None:
266
362
  raise click.ClickException(f"Shutdown failed: {str(e)}") from e
267
363
 
268
364
 
269
- @cli.command("list")
365
+ @cli.command("list", help="List available models or get specific model configuration.")
270
366
  @click.argument("model-name", required=False)
271
367
  @click.option(
272
368
  "--json-mode",
@@ -304,20 +400,17 @@ def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> No
304
400
  raise click.ClickException(f"List models failed: {str(e)}") from e
305
401
 
306
402
 
307
- @cli.command("metrics")
308
- @click.argument("slurm_job_id", type=int, nargs=1)
309
- @click.option(
310
- "--log-dir", type=str, help="Path to slurm log directory (if used during launch)"
403
+ @cli.command(
404
+ "metrics", help="Stream real-time performance metrics from the model endpoint."
311
405
  )
312
- def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
406
+ @click.argument("slurm_job_id", type=str, nargs=1)
407
+ def metrics(slurm_job_id: str) -> None:
313
408
  """Stream real-time performance metrics from the model endpoint.
314
409
 
315
410
  Parameters
316
411
  ----------
317
- slurm_job_id : int
412
+ slurm_job_id : str
318
413
  ID of the SLURM job to monitor
319
- log_dir : str, optional
320
- Path to SLURM log directory
321
414
 
322
415
  Raises
323
416
  ------
@@ -333,7 +426,7 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
333
426
  try:
334
427
  # Start the client and get inference server metrics
335
428
  client = VecInfClient()
336
- metrics_response = client.get_metrics(slurm_job_id, log_dir)
429
+ metrics_response = client.get_metrics(slurm_job_id)
337
430
  metrics_formatter = MetricsResponseFormatter(metrics_response.metrics)
338
431
 
339
432
  # Check if metrics response is ready
@@ -344,7 +437,7 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
344
437
 
345
438
  with Live(refresh_per_second=1, console=CONSOLE) as live:
346
439
  while True:
347
- metrics_response = client.get_metrics(slurm_job_id, log_dir)
440
+ metrics_response = client.get_metrics(slurm_job_id)
348
441
  metrics_formatter = MetricsResponseFormatter(metrics_response.metrics)
349
442
 
350
443
  if isinstance(metrics_response.metrics, str):
@@ -361,5 +454,69 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
361
454
  raise click.ClickException(f"Metrics check failed: {str(e)}") from e
362
455
 
363
456
 
457
+ @cli.command("cleanup", help="Clean up log files based on optional filters.")
458
+ @click.option("--log-dir", type=str, help="Path to SLURM log directory")
459
+ @click.option("--model-family", type=str, help="Filter by model family")
460
+ @click.option("--model-name", type=str, help="Filter by model name")
461
+ @click.option(
462
+ "--job-id", type=int, help="Only remove logs with this exact SLURM job ID"
463
+ )
464
+ @click.option(
465
+ "--before-job-id",
466
+ type=int,
467
+ help="Remove logs with job ID less than this value",
468
+ )
469
+ @click.option("--dry-run", is_flag=True, help="List matching logs without deleting")
470
+ def cleanup_logs_cli(
471
+ log_dir: Optional[str],
472
+ model_family: Optional[str],
473
+ model_name: Optional[str],
474
+ job_id: Optional[int],
475
+ before_job_id: Optional[int],
476
+ dry_run: bool,
477
+ ) -> None:
478
+ """Clean up log files based on optional filters.
479
+
480
+ Parameters
481
+ ----------
482
+ log_dir : str or Path, optional
483
+ Root directory containing log files. Defaults to ~/.vec-inf-logs.
484
+ model_family : str, optional
485
+ Only delete logs for this model family.
486
+ model_name : str, optional
487
+ Only delete logs for this model name.
488
+ job_id : int, optional
489
+ If provided, only match directories with this exact SLURM job ID.
490
+ before_job_id : int, optional
491
+ If provided, only delete logs with job ID less than this value.
492
+ dry_run : bool
493
+ If True, return matching files without deleting them.
494
+ """
495
+ try:
496
+ client = VecInfClient()
497
+ matched = client.cleanup_logs(
498
+ log_dir=log_dir,
499
+ model_family=model_family,
500
+ model_name=model_name,
501
+ job_id=job_id,
502
+ before_job_id=before_job_id,
503
+ dry_run=dry_run,
504
+ )
505
+
506
+ if not matched:
507
+ if dry_run:
508
+ click.echo("Dry run: no matching log directories found.")
509
+ else:
510
+ click.echo("No matching log directories were deleted.")
511
+ elif dry_run:
512
+ click.echo(f"Dry run: {len(matched)} directories would be deleted:")
513
+ for f in matched:
514
+ click.echo(f" - {f}")
515
+ else:
516
+ click.echo(f"Deleted {len(matched)} log directory(ies).")
517
+ except Exception as e:
518
+ raise click.ClickException(f"Cleanup failed: {str(e)}") from e
519
+
520
+
364
521
  if __name__ == "__main__":
365
522
  cli()
vec_inf/cli/_helper.py CHANGED
@@ -4,6 +4,7 @@ This module provides formatting and display classes for the command-line interfa
4
4
  handling the presentation of model information, status updates, and metrics.
5
5
  """
6
6
 
7
+ import json
7
8
  from pathlib import Path
8
9
  from typing import Any, Union
9
10
 
@@ -27,9 +28,8 @@ class LaunchResponseFormatter:
27
28
  Parameters
28
29
  ----------
29
30
  model_name : str
30
- Name of the launched model
31
- params : dict[str, Any]
32
- Launch parameters and configuration
31
+ Name of the launched model params : dict[str, Any] Launch parameters and
32
+ configuration
33
33
  """
34
34
 
35
35
  def __init__(self, model_name: str, params: dict[str, Any]):
@@ -59,8 +59,16 @@ class LaunchResponseFormatter:
59
59
  table.add_row("Vocabulary Size", self.params["vocab_size"])
60
60
 
61
61
  # Add resource allocation details
62
- table.add_row("Partition", self.params["partition"])
63
- table.add_row("QoS", self.params["qos"])
62
+ if self.params.get("account"):
63
+ table.add_row("Account", self.params["account"])
64
+ if self.params.get("work_dir"):
65
+ table.add_row("Working Directory", self.params["work_dir"])
66
+ if self.params.get("resource_type"):
67
+ table.add_row("Resource Type", self.params["resource_type"])
68
+ if self.params.get("partition"):
69
+ table.add_row("Partition", self.params["partition"])
70
+ if self.params.get("qos"):
71
+ table.add_row("QoS", self.params["qos"])
64
72
  table.add_row("Time Limit", self.params["time"])
65
73
  table.add_row("Num Nodes", self.params["num_nodes"])
66
74
  table.add_row("GPUs/Node", self.params["gpus_per_node"])
@@ -79,6 +87,80 @@ class LaunchResponseFormatter:
79
87
  for arg, value in self.params["vllm_args"].items():
80
88
  table.add_row(f" {arg}:", str(value))
81
89
 
90
+ # Add Environment Variable Configuration Details
91
+ table.add_row("Environment Variables", style="magenta")
92
+ for arg, value in self.params["env"].items():
93
+ table.add_row(f" {arg}:", str(value))
94
+
95
+ return table
96
+
97
+
98
+ class BatchLaunchResponseFormatter:
99
+ """CLI Helper class for formatting BatchLaunchResponse.
100
+
101
+ A formatter class that handles the presentation of batch launch information
102
+ in both table and JSON formats.
103
+
104
+ Parameters
105
+ ----------
106
+ params : dict[str, Any]
107
+ Configuration for the batch launch
108
+ """
109
+
110
+ def __init__(self, params: dict[str, Any]):
111
+ self.params = params
112
+
113
+ def format_table_output(self) -> Table:
114
+ """Format output as rich Table.
115
+
116
+ Returns
117
+ -------
118
+ Table
119
+ Rich table containing formatted batch launch information including:
120
+ - Job configuration
121
+ - Model details
122
+ - Resource allocation
123
+ - vLLM configuration
124
+ """
125
+ table = create_table(key_title="Job Config", value_title="Value")
126
+ # Add key information with consistent styling
127
+ table.add_row("Slurm Job ID", self.params["slurm_job_id"], style="blue")
128
+ table.add_row("Slurm Job Name", self.params["slurm_job_name"], style="blue")
129
+ if self.params.get("account"):
130
+ table.add_row("Account", self.params["account"], style="blue")
131
+ if self.params.get("work_dir"):
132
+ table.add_row("Working Directory", self.params["work_dir"], style="blue")
133
+ table.add_row("Log Directory", self.params["log_dir"], style="blue")
134
+ for model_name in self.params["models"]:
135
+ table.add_row("Model Name", model_name, style="magenta")
136
+ # Add resource allocation details
137
+ if self.params["models"][model_name].get("resource_type"):
138
+ table.add_row(
139
+ "Resource Type",
140
+ f" {self.params['models'][model_name]['resource_type']}",
141
+ )
142
+ if self.params["models"][model_name].get("partition"):
143
+ table.add_row(
144
+ "Partition", f" {self.params['models'][model_name]['partition']}"
145
+ )
146
+ if self.params["models"][model_name].get("qos"):
147
+ table.add_row("QoS", f" {self.params['models'][model_name]['qos']}")
148
+ table.add_row(
149
+ "Time Limit", f" {self.params['models'][model_name]['time']}"
150
+ )
151
+ table.add_row(
152
+ "Num Nodes", f" {self.params['models'][model_name]['num_nodes']}"
153
+ )
154
+ table.add_row(
155
+ "GPUs/Node", f" {self.params['models'][model_name]['gpus_per_node']}"
156
+ )
157
+ table.add_row(
158
+ "CPUs/Task", f" {self.params['models'][model_name]['cpus_per_task']}"
159
+ )
160
+ table.add_row(
161
+ "Memory/Node", f" {self.params['models'][model_name]['mem_per_node']}"
162
+ )
163
+
82
164
  return table
83
165
 
84
166
 
@@ -116,7 +198,8 @@ class StatusResponseFormatter:
116
198
  json_data["pending_reason"] = self.status_info.pending_reason
117
199
  if self.status_info.failed_reason:
118
200
  json_data["failed_reason"] = self.status_info.failed_reason
119
- click.echo(json_data)
201
+
202
+ click.echo(json.dumps(json_data, indent=4))
120
203
 
121
204
  def output_table(self) -> Table:
122
205
  """Create and display rich table.
@@ -292,9 +375,7 @@ class ListCmdDisplay:
292
375
  self.model_config = None
293
376
  self.model_names: list[str] = []
294
377
 
295
- def _format_single_model_output(
296
- self, config: ModelConfig
297
- ) -> Union[dict[str, Any], Table]:
378
+ def _format_single_model_output(self, config: ModelConfig) -> Union[str, Table]:
298
379
  """Format output table for a single model.
299
380
 
300
381
  Parameters
@@ -304,8 +385,8 @@ class ListCmdDisplay:
304
385
 
305
386
  Returns
306
387
  -------
307
- Union[dict[str, Any], Table]
308
- Either a dictionary for JSON output or a Rich table
388
+ Union[str, Table]
389
+ Either a JSON string for JSON output or a Rich table
309
390
  """
310
391
  if self.json_mode:
311
392
  # Exclude non-essential fields from JSON output
@@ -315,11 +396,11 @@ class ListCmdDisplay:
315
396
  config_dict["model_weights_parent_dir"] = str(
316
397
  config_dict["model_weights_parent_dir"]
317
398
  )
318
- return config_dict
399
+ return json.dumps(config_dict, indent=4)
319
400
 
320
401
  table = create_table(key_title="Model Config", value_title="Value")
321
402
  for field, value in config.model_dump().items():
322
- if field not in {"venv", "log_dir", "vllm_args"}:
403
+ if field not in {"venv", "log_dir", "vllm_args"} and value:
323
404
  table.add_row(field, str(value))
324
405
  if field == "vllm_args":
325
406
  table.add_row("vLLM Arguments:", style="magenta")
@@ -394,7 +475,7 @@ class ListCmdDisplay:
394
475
  """
395
476
  if self.json_mode:
396
477
  model_names = [info.name for info in model_infos]
397
- click.echo(model_names)
478
+ click.echo(json.dumps(model_names, indent=4))
398
479
  else:
399
480
  panels = self._format_all_models_output(model_infos)
400
481
  self.console.print(Columns(panels, equal=True))