vec-inf 0.7.2__py3-none-any.whl → 0.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vec_inf/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  * `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server.
4
4
  * `batch-launch`: Specify a list of models to launch multiple OpenAI compatible inference servers at the same time.
5
- * `status`: Check the model status by providing its Slurm job ID.
5
+ * `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
6
6
  * `metrics`: Streams performance metrics to the console.
7
7
  * `shutdown`: Shutdown a model by providing its Slurm job ID.
8
8
  * `list`: List all available model names, or view the default/cached configuration of a specific model.
@@ -14,6 +14,7 @@ Use `--help` to see all available options
14
14
 
15
15
  * `launch_model`: Launch an OpenAI compatible inference server.
16
16
  * `batch_launch_models`: Launch multiple OpenAI compatible inference servers.
17
+ * `fetch_running_jobs`: Get the running `vec-inf` job IDs.
17
18
  * `get_status`: Get the status of a running model.
18
19
  * `get_metrics`: Get the performance metrics of a running model.
19
20
  * `shutdown_model`: Shutdown a running model.
vec_inf/cli/_cli.py CHANGED
@@ -30,6 +30,7 @@ from vec_inf.cli._helper import (
30
30
  BatchLaunchResponseFormatter,
31
31
  LaunchResponseFormatter,
32
32
  ListCmdDisplay,
33
+ ListStatusDisplay,
33
34
  MetricsResponseFormatter,
34
35
  StatusResponseFormatter,
35
36
  )
@@ -313,14 +314,14 @@ def batch_launch(
313
314
  raise click.ClickException(f"Batch launch failed: {str(e)}") from e
314
315
 
315
316
 
316
- @cli.command("status", help="Check the status of a running model on the cluster.")
317
- @click.argument("slurm_job_id", type=str, nargs=1)
317
+ @cli.command("status", help="Check the status of running vec-inf jobs on the cluster.")
318
+ @click.argument("slurm_job_id", required=False)
318
319
  @click.option(
319
320
  "--json-mode",
320
321
  is_flag=True,
321
322
  help="Output in JSON string",
322
323
  )
323
- def status(slurm_job_id: str, json_mode: bool = False) -> None:
324
+ def status(slurm_job_id: Optional[str] = None, json_mode: bool = False) -> None:
324
325
  """Get the status of a running model on the cluster.
325
326
 
326
327
  Parameters
@@ -338,14 +339,28 @@ def status(slurm_job_id: str, json_mode: bool = False) -> None:
338
339
  try:
339
340
  # Start the client and get model inference server status
340
341
  client = VecInfClient()
341
- status_response = client.get_status(slurm_job_id)
342
+ if not slurm_job_id:
343
+ slurm_job_ids = client.fetch_running_jobs()
344
+ if not slurm_job_ids:
345
+ click.echo("No running jobs found.")
346
+ return
347
+ else:
348
+ slurm_job_ids = [slurm_job_id]
349
+ responses = []
350
+ for job_id in slurm_job_ids:
351
+ responses.append(client.get_status(job_id))
352
+
342
353
  # Display status information
343
- status_formatter = StatusResponseFormatter(status_response)
344
- if json_mode:
345
- status_formatter.output_json()
354
+ if slurm_job_id:
355
+ status_formatter = StatusResponseFormatter(responses[0])
356
+ if json_mode:
357
+ status_formatter.output_json()
358
+ else:
359
+ status_info_table = status_formatter.output_table()
360
+ CONSOLE.print(status_info_table)
346
361
  else:
347
- status_info_table = status_formatter.output_table()
348
- CONSOLE.print(status_info_table)
362
+ list_status_display = ListStatusDisplay(slurm_job_ids, responses, json_mode)
363
+ list_status_display.display_multiple_status_output(CONSOLE)
349
364
 
350
365
  except click.ClickException as e:
351
366
  raise e
vec_inf/cli/_helper.py CHANGED
@@ -251,6 +251,62 @@ class StatusResponseFormatter:
251
251
  return table
252
252
 
253
253
 
254
+ class ListStatusDisplay:
255
+ """CLI Helper class for formatting a list of StatusResponse.
256
+
257
+ A formatter class that handles the presentation of multiple job statuses
258
+ in a table format.
259
+
260
+ Parameters
261
+ ----------
262
+ statuses : list[StatusResponse]
263
+ List of model status information
264
+ """
265
+
266
+ def __init__(
267
+ self,
268
+ job_ids: list[str],
269
+ statuses: list[StatusResponse],
270
+ json_mode: bool = False,
271
+ ):
272
+ self.job_ids = job_ids
273
+ self.statuses = statuses
274
+ self.json_mode = json_mode
275
+
276
+ self.table = Table(show_header=True, header_style="bold magenta")
277
+ self.table.add_column("Job ID")
278
+ self.table.add_column("Model Name")
279
+ self.table.add_column("Status", style="blue")
280
+ self.table.add_column("Base URL")
281
+
282
+ def display_multiple_status_output(self, console: Console) -> None:
283
+ """Format and display all model statuses.
284
+
285
+ Formats each model's status and adds it to the table.
286
+ """
287
+ if self.json_mode:
288
+ json_data = [
289
+ {
290
+ "job_id": status.model_name,
291
+ "model_name": status.model_name,
292
+ "model_status": status.server_status,
293
+ "base_url": status.base_url,
294
+ }
295
+ for status in self.statuses
296
+ ]
297
+ click.echo(json.dumps(json_data, indent=4))
298
+ return
299
+
300
+ for i, status in enumerate(self.statuses):
301
+ self.table.add_row(
302
+ self.job_ids[i],
303
+ status.model_name,
304
+ status.server_status,
305
+ status.base_url,
306
+ )
307
+ console.print(self.table)
308
+
309
+
254
310
  class MetricsResponseFormatter:
255
311
  """CLI Helper class for formatting MetricsResponse.
256
312
 
vec_inf/client/_helper.py CHANGED
@@ -469,16 +469,15 @@ class BatchModelLauncher:
469
469
  If required fields are missing or tensor parallel size is not specified
470
470
  when using multiple GPUs
471
471
  """
472
- params: dict[str, Any] = {
473
- "models": {},
472
+ common_params: dict[str, Any] = {
474
473
  "slurm_job_name": self.slurm_job_name,
475
474
  "src_dir": str(SRC_DIR),
476
475
  "account": account,
477
476
  "work_dir": work_dir,
478
477
  }
479
478
 
480
- # Check for required fields without default vals, will raise an error if missing
481
- utils.check_required_fields(params)
479
+ params: dict[str, Any] = common_params.copy()
480
+ params["models"] = {}
482
481
 
483
482
  for i, (model_name, config) in enumerate(self.model_configs.items()):
484
483
  params["models"][model_name] = config.model_dump(exclude_none=True)
@@ -555,6 +554,16 @@ class BatchModelLauncher:
555
554
  raise ValueError(
556
555
  f"Mismatch found for {arg}: {params[arg]} != {params['models'][model_name][arg]}, check your configuration"
557
556
  )
557
+ # Check for required fields and return environment variable overrides
558
+ env_overrides = utils.check_required_fields(
559
+ {**params["models"][model_name], **common_params}
560
+ )
561
+
562
+ for arg, value in env_overrides.items():
563
+ if arg in common_params:
564
+ params[arg] = value
565
+ else:
566
+ params["models"][model_name][arg] = value
558
567
 
559
568
  return params
560
569
 
@@ -718,7 +727,7 @@ class ModelStatusMonitor:
718
727
  Basic status information for the job
719
728
  """
720
729
  try:
721
- job_name = self.job_status["JobName"]
730
+ job_name = self.job_status["JobName"].removesuffix("-vec-inf")
722
731
  job_state = self.job_status["JobState"]
723
732
  except KeyError:
724
733
  job_name = "UNAVAILABLE"
@@ -34,9 +34,9 @@ class SlurmScriptGenerator:
34
34
  self.params = params
35
35
  self.is_multinode = int(self.params["num_nodes"]) > 1
36
36
  self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
37
- self.additional_binds = self.params.get("bind", "")
38
- if self.additional_binds:
39
- self.additional_binds = f" --bind {self.additional_binds}"
37
+ self.additional_binds = (
38
+ f",{self.params['bind']}" if self.params.get("bind") else ""
39
+ )
40
40
  self.model_weights_path = str(
41
41
  Path(self.params["model_weights_parent_dir"], self.params["model_name"])
42
42
  )
@@ -89,6 +89,8 @@ class SlurmScriptGenerator:
89
89
  for arg, value in SLURM_JOB_CONFIG_ARGS.items():
90
90
  if self.params.get(value):
91
91
  shebang.append(f"#SBATCH --{arg}={self.params[value]}")
92
+ if value == "model_name":
93
+ shebang[-1] += "-vec-inf"
92
94
  if self.is_multinode:
93
95
  shebang += SLURM_SCRIPT_TEMPLATE["shebang"]["multinode"]
94
96
  return "\n".join(shebang)
@@ -107,7 +109,12 @@ class SlurmScriptGenerator:
107
109
  server_script = ["\n"]
108
110
  if self.use_container:
109
111
  server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"]))
110
- server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_env_vars"]))
112
+ server_script.append(
113
+ SLURM_SCRIPT_TEMPLATE["bind_path"].format(
114
+ model_weights_path=self.model_weights_path,
115
+ additional_binds=self.additional_binds,
116
+ )
117
+ )
111
118
  else:
112
119
  server_script.append(
113
120
  SLURM_SCRIPT_TEMPLATE["activate_venv"].format(venv=self.params["venv"])
@@ -125,7 +132,6 @@ class SlurmScriptGenerator:
125
132
  "CONTAINER_PLACEHOLDER",
126
133
  SLURM_SCRIPT_TEMPLATE["container_command"].format(
127
134
  model_weights_path=self.model_weights_path,
128
- additional_binds=self.additional_binds,
129
135
  env_str=self.env_str,
130
136
  ),
131
137
  )
@@ -163,7 +169,6 @@ class SlurmScriptGenerator:
163
169
  launcher_script.append(
164
170
  SLURM_SCRIPT_TEMPLATE["container_command"].format(
165
171
  model_weights_path=self.model_weights_path,
166
- additional_binds=self.additional_binds,
167
172
  env_str=self.env_str,
168
173
  )
169
174
  )
@@ -215,11 +220,11 @@ class BatchSlurmScriptGenerator:
215
220
  self.script_paths: list[Path] = []
216
221
  self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
217
222
  for model_name in self.params["models"]:
218
- self.params["models"][model_name]["additional_binds"] = ""
219
- if self.params["models"][model_name].get("bind"):
220
- self.params["models"][model_name]["additional_binds"] = (
221
- f" --bind {self.params['models'][model_name]['bind']}"
222
- )
223
+ self.params["models"][model_name]["additional_binds"] = (
224
+ f",{self.params['models'][model_name]['bind']}"
225
+ if self.params["models"][model_name].get("bind")
226
+ else ""
227
+ )
223
228
  self.params["models"][model_name]["model_weights_path"] = str(
224
229
  Path(
225
230
  self.params["models"][model_name]["model_weights_parent_dir"],
@@ -259,7 +264,12 @@ class BatchSlurmScriptGenerator:
259
264
  script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["shebang"])
260
265
  if self.use_container:
261
266
  script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"])
262
- script_content.append("\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["env_vars"]))
267
+ script_content.append(
268
+ BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format(
269
+ model_weights_path=model_params["model_weights_path"],
270
+ additional_binds=model_params["additional_binds"],
271
+ )
272
+ )
263
273
  script_content.append(
264
274
  "\n".join(
265
275
  BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["server_address_setup"]
@@ -277,7 +287,6 @@ class BatchSlurmScriptGenerator:
277
287
  script_content.append(
278
288
  BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format(
279
289
  model_weights_path=model_params["model_weights_path"],
280
- additional_binds=model_params["additional_binds"],
281
290
  )
282
291
  )
283
292
  script_content.append(
@@ -321,6 +330,8 @@ class BatchSlurmScriptGenerator:
321
330
  model_params = self.params["models"][model_name]
322
331
  if model_params.get(value) and value not in ["out_file", "err_file"]:
323
332
  shebang.append(f"#SBATCH --{arg}={model_params[value]}")
333
+ if value == "model_name":
334
+ shebang[-1] += "-vec-inf"
324
335
  shebang[-1] += "\n"
325
336
  shebang.append(BATCH_SLURM_SCRIPT_TEMPLATE["hetjob"])
326
337
  # Remove the last hetjob line
@@ -57,6 +57,8 @@ class SlurmScriptTemplate(TypedDict):
57
57
  Commands for container setup
58
58
  imports : str
59
59
  Import statements and source commands
60
+ bind_path : str
61
+ Bind path environment variable for the container
60
62
  container_command : str
61
63
  Template for container execution command
62
64
  activate_venv : str
@@ -74,7 +76,7 @@ class SlurmScriptTemplate(TypedDict):
74
76
  shebang: ShebangConfig
75
77
  container_setup: list[str]
76
78
  imports: str
77
- container_env_vars: list[str]
79
+ bind_path: str
78
80
  container_command: str
79
81
  activate_venv: str
80
82
  server_setup: ServerSetupConfig
@@ -96,10 +98,8 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
96
98
  f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop",
97
99
  ],
98
100
  "imports": "source {src_dir}/find_port.sh",
99
- "container_env_vars": [
100
- f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp"
101
- ],
102
- "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
101
+ "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
102
+ "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {IMAGE_PATH} \\",
103
103
  "activate_venv": "source {venv}/bin/activate",
104
104
  "server_setup": {
105
105
  "single_node": [
@@ -215,8 +215,8 @@ class BatchModelLaunchScriptTemplate(TypedDict):
215
215
  Shebang line for the script
216
216
  container_setup : list[str]
217
217
  Commands for container setup
218
- env_vars : list[str]
219
- Environment variables to set
218
+ bind_path : str
219
+ Bind path environment variable for the container
220
220
  server_address_setup : list[str]
221
221
  Commands to setup the server address
222
222
  launch_cmd : list[str]
@@ -227,7 +227,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
227
227
 
228
228
  shebang: str
229
229
  container_setup: str
230
- env_vars: list[str]
230
+ bind_path: str
231
231
  server_address_setup: list[str]
232
232
  write_to_json: list[str]
233
233
  launch_cmd: list[str]
@@ -237,9 +237,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
237
237
  BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
238
238
  "shebang": "#!/bin/bash\n",
239
239
  "container_setup": f"{CONTAINER_LOAD_CMD}\n",
240
- "env_vars": [
241
- f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g')"
242
- ],
240
+ "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
243
241
  "server_address_setup": [
244
242
  "source {src_dir}/find_port.sh",
245
243
  "head_node_ip=${{SLURMD_NODENAME}}",
@@ -255,7 +253,7 @@ BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
255
253
  ' "$json_path" > temp_{model_name}.json \\',
256
254
  ' && mv temp_{model_name}.json "$json_path"\n',
257
255
  ],
258
- "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
256
+ "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --containall {IMAGE_PATH} \\",
259
257
  "launch_cmd": [
260
258
  "vllm serve {model_weights_path} \\",
261
259
  " --served-model-name {model_name} \\",
vec_inf/client/_utils.py CHANGED
@@ -436,7 +436,7 @@ def find_matching_dirs(
436
436
  return matched
437
437
 
438
438
 
439
- def check_required_fields(params: dict[str, Any]) -> None:
439
+ def check_required_fields(params: dict[str, Any]) -> dict[str, Any]:
440
440
  """Check for required fields without default vals and their corresponding env vars.
441
441
 
442
442
  Parameters
@@ -444,12 +444,15 @@ def check_required_fields(params: dict[str, Any]) -> None:
444
444
  params : dict[str, Any]
445
445
  Dictionary of parameters to check.
446
446
  """
447
+ env_overrides = {}
447
448
  for arg in REQUIRED_ARGS:
448
449
  if not params.get(arg):
449
450
  default_value = os.getenv(REQUIRED_ARGS[arg])
450
451
  if default_value:
451
452
  params[arg] = default_value
453
+ env_overrides[arg] = default_value
452
454
  else:
453
455
  raise MissingRequiredFieldsError(
454
456
  f"{arg} is required, please set it in the command arguments or environment variables"
455
457
  )
458
+ return env_overrides
vec_inf/client/api.py CHANGED
@@ -10,7 +10,9 @@ vec_inf.client._helper : Helper classes for model inference server management
10
10
  vec_inf.client.models : Data models for API responses
11
11
  """
12
12
 
13
+ import re
13
14
  import shutil
15
+ import subprocess
14
16
  import time
15
17
  import warnings
16
18
  from pathlib import Path
@@ -181,6 +183,51 @@ class VecInfClient:
181
183
  )
182
184
  return model_launcher.launch()
183
185
 
186
+ def fetch_running_jobs(self) -> list[str]:
187
+ """
188
+ Fetch the list of running vec-inf job IDs for the current user.
189
+
190
+ Returns
191
+ -------
192
+ list[str]
193
+ List of matching job names; empty list if squeue unavailable.
194
+ """
195
+ try:
196
+ res = subprocess.run(
197
+ ["squeue", "--me", "--noheader"],
198
+ capture_output=True,
199
+ text=True,
200
+ check=True,
201
+ )
202
+ job_ids = [
203
+ ln.strip().split()[0] for ln in res.stdout.splitlines() if ln.strip()
204
+ ]
205
+
206
+ if not job_ids:
207
+ return []
208
+
209
+ # For each job, fetch the full JobName and filter by suffix
210
+ matching_ids = []
211
+ for jid in job_ids:
212
+ try:
213
+ sctl = subprocess.run(
214
+ ["scontrol", "show", "job", "-o", jid],
215
+ capture_output=True,
216
+ text=True,
217
+ check=True,
218
+ )
219
+ m = re.search(r"\bJobName=([^\s]+)", sctl.stdout)
220
+ if m and m.group(1).endswith("-vec-inf"):
221
+ matching_ids.append(jid)
222
+ except subprocess.CalledProcessError:
223
+ # Job might have finished between squeue and scontrol; skip
224
+ continue
225
+
226
+ return matching_ids
227
+
228
+ except subprocess.CalledProcessError as e:
229
+ raise SlurmJobError(f"Error running slurm command: {e}") from e
230
+
184
231
  def get_status(self, slurm_job_id: str) -> StatusResponse:
185
232
  """Get the status of a running model.
186
233
 
@@ -1059,12 +1059,11 @@ models:
1059
1059
  model_family: gpt-oss
1060
1060
  model_variant: 120b
1061
1061
  model_type: LLM
1062
- gpus_per_node: 4
1063
- num_nodes: 2
1062
+ gpus_per_node: 2
1063
+ num_nodes: 1
1064
1064
  vocab_size: 201088
1065
1065
  time: 08:00:00
1066
1066
  resource_type: l40s
1067
1067
  vllm_args:
1068
- --tensor-parallel-size: 4
1069
- --pipeline-parallel-size: 2
1070
- --max-model-len: 40960
1068
+ --tensor-parallel-size: 2
1069
+ --max-model-len: 32768
vec_inf/find_port.sh CHANGED
@@ -28,7 +28,16 @@ find_available_port() {
28
28
  local base_port=$2
29
29
  local max_port=$3
30
30
 
31
- for ((port=base_port; port<=max_port; port++)); do
31
+ # Generate shuffled list of ports; fallback to sequential if shuf not present
32
+ if command -v shuf >/dev/null 2>&1; then
33
+ local port_list
34
+ port_list=$(shuf -i "${base_port}-${max_port}")
35
+ else
36
+ local port_list
37
+ port_list=$(seq $base_port $max_port)
38
+ fi
39
+
40
+ for port in $port_list; do
32
41
  if is_port_available $ip $port; then
33
42
  echo $port
34
43
  return
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vec-inf
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: Efficient LLM inference on Slurm clusters using vLLM.
5
5
  Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
6
6
  License-Expression: MIT
@@ -30,7 +30,7 @@ Description-Content-Type: text/markdown
30
30
  [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
31
31
  [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
32
32
  [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
33
- [![vLLM](https://img.shields.io/badge/vLLM-0.10.1.1-blue)](https://docs.vllm.ai/en/v0.10.1.1/)
33
+ [![vLLM](https://img.shields.io/badge/vLLM-0.11.0-blue)](https://docs.vllm.ai/en/v0.11.0/)
34
34
  ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
35
35
 
36
36
  This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
@@ -43,7 +43,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
43
43
  ```bash
44
44
  pip install vec-inf
45
45
  ```
46
- Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.10.1.1`.
46
+ Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
47
47
 
48
48
  If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
49
49
  * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
@@ -76,7 +76,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
76
76
  #### Other commands
77
77
 
78
78
  * `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
79
- * `status`: Check the model status by providing its Slurm job ID.
79
+ * `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
80
80
  * `metrics`: Streams performance metrics to the console.
81
81
  * `shutdown`: Shutdown a model by providing its Slurm job ID.
82
82
  * `list`: List all available model names, or view the default/cached configuration of a specific model.
@@ -0,0 +1,27 @@
1
+ vec_inf/README.md,sha256=GpKnty9u1b06cPT2Ce_5v0LBucmXOQt6Nl4OJKvjf68,1410
2
+ vec_inf/__init__.py,sha256=bHwSIz9lebYuxIemni-lP0h3gwJHVbJnwExQKGJWw_Q,23
3
+ vec_inf/find_port.sh,sha256=HHx1kg-TIoPZu0u55S4T5jl8MDV4_mnqh4Y7r_quyWw,1358
4
+ vec_inf/cli/__init__.py,sha256=5XIvGQCOnaGl73XMkwetjC-Ul3xuXGrWDXdYJ3aUzvU,27
5
+ vec_inf/cli/_cli.py,sha256=9EzRpOFJVd1_g0G-em7DlNJFZoKb-FvBboqSOUanoxU,16787
6
+ vec_inf/cli/_helper.py,sha256=q8ysD0g_hgKg_6emZZNNAhYR90SqvJJMYkPAB0Kj6gc,19177
7
+ vec_inf/cli/_utils.py,sha256=23vSbmvNOWY1-W1aOAwYqNDkDDmx-5UVlCiXAtxUZ8A,1057
8
+ vec_inf/cli/_vars.py,sha256=V6DrJs_BuUa4yNcbBSSnMwpcyXwEBsizy3D0ubIg2fA,777
9
+ vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
10
+ vec_inf/client/_client_vars.py,sha256=1D-bX9dS0-pFImLvgWt2hUnwJiz-VaxuLb2HIfPML8I,2408
11
+ vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
12
+ vec_inf/client/_helper.py,sha256=R5fOfmRK1I9H6ta6-5hhWwr12dhjZD8O0FvJOgfTkaA,37565
13
+ vec_inf/client/_slurm_script_generator.py,sha256=P60W36xvVNi33iK-GqK_StZ6zpJnCr8gLnY6AEn1HKE,14182
14
+ vec_inf/client/_slurm_templates.py,sha256=Zjl47mNxhOTxFDAa61n9o0NAZ_TwO2KF_LvYN3JG7Mk,9349
15
+ vec_inf/client/_slurm_vars.py,sha256=sgP__XhpE1K7pvOzVFmotUXmINYPcOuFP-zGaePT5Iw,2910
16
+ vec_inf/client/_utils.py,sha256=bxn5acjDEtojEuLf8vaBv85kc5TwtYw_gIMhNKcD0M4,14405
17
+ vec_inf/client/api.py,sha256=-vazAWvZp0vsn4jB6R-WdUo5eZ5bR-XJqU6r6qOL16A,13596
18
+ vec_inf/client/config.py,sha256=VU4h2iqL0rxYAqGw2HBF_l6QvvSDJy5M79IgX5G2PW4,5830
19
+ vec_inf/client/models.py,sha256=jGNPOj1uPPBV7xdGy3HFv2ZwpJOGCsU8qm7pE2Rnnes,7498
20
+ vec_inf/config/README.md,sha256=TvZOqZyTUaAFr71hC7GVgg6QUw80AXREyq8wS4D-F30,528
21
+ vec_inf/config/environment.yaml,sha256=oEDp85hUERJO9NNn4wYhcgunnmkln50GNHDzG_3isMw,678
22
+ vec_inf/config/models.yaml,sha256=1zPnW_1I_ltLk8wAoVNLvywQ1htvn0yzdqfHEBFDthg,24730
23
+ vec_inf-0.7.3.dist-info/METADATA,sha256=b-qth5Y_KY6FOj5ghuRwImLz6RBu2x3mXUrfmyqXpJ8,10122
24
+ vec_inf-0.7.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
25
+ vec_inf-0.7.3.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
26
+ vec_inf-0.7.3.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
27
+ vec_inf-0.7.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,27 +0,0 @@
1
- vec_inf/README.md,sha256=WyvjbSs5Eh5fp8u66bgOaO3FQKP2U7m_HbLgqTHs_ng,1322
2
- vec_inf/__init__.py,sha256=bHwSIz9lebYuxIemni-lP0h3gwJHVbJnwExQKGJWw_Q,23
3
- vec_inf/find_port.sh,sha256=bGQ6LYSFVSsfDIGatrSg5YvddbZfaPL0R-Bjo4KYD6I,1088
4
- vec_inf/cli/__init__.py,sha256=5XIvGQCOnaGl73XMkwetjC-Ul3xuXGrWDXdYJ3aUzvU,27
5
- vec_inf/cli/_cli.py,sha256=0YfxtPT_Nq5gvIol9eWmw5yW9AT1ghf_E49R9pD7UG4,16213
6
- vec_inf/cli/_helper.py,sha256=0_onclvxxpDTp33ODYc19RbZ2aIhXuMTC9v19q8ZhIo,17473
7
- vec_inf/cli/_utils.py,sha256=23vSbmvNOWY1-W1aOAwYqNDkDDmx-5UVlCiXAtxUZ8A,1057
8
- vec_inf/cli/_vars.py,sha256=V6DrJs_BuUa4yNcbBSSnMwpcyXwEBsizy3D0ubIg2fA,777
9
- vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
10
- vec_inf/client/_client_vars.py,sha256=1D-bX9dS0-pFImLvgWt2hUnwJiz-VaxuLb2HIfPML8I,2408
11
- vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
12
- vec_inf/client/_helper.py,sha256=hb6m5TLwcGE0grCu5-UCUkWbByV-G5h8gA87Yzct6rk,37170
13
- vec_inf/client/_slurm_script_generator.py,sha256=L6tqn71kNJ2I0xYipFh_ZxIAG8znpXhTpUxTU8LJIa4,13988
14
- vec_inf/client/_slurm_templates.py,sha256=GxVNClkgggoJN2pT1AjK7CQCAErfKRMIs97Vlhxs9u8,9349
15
- vec_inf/client/_slurm_vars.py,sha256=sgP__XhpE1K7pvOzVFmotUXmINYPcOuFP-zGaePT5Iw,2910
16
- vec_inf/client/_utils.py,sha256=_ZBmic0XvJ4vpdIuXDi6KO5iL2rbhIpFQT01EWGItN4,14296
17
- vec_inf/client/api.py,sha256=lkVWCme-HmMJMqp8JbtjkBVL_MSPsCC_IBL9FBw3Um8,12011
18
- vec_inf/client/config.py,sha256=VU4h2iqL0rxYAqGw2HBF_l6QvvSDJy5M79IgX5G2PW4,5830
19
- vec_inf/client/models.py,sha256=jGNPOj1uPPBV7xdGy3HFv2ZwpJOGCsU8qm7pE2Rnnes,7498
20
- vec_inf/config/README.md,sha256=TvZOqZyTUaAFr71hC7GVgg6QUw80AXREyq8wS4D-F30,528
21
- vec_inf/config/environment.yaml,sha256=oEDp85hUERJO9NNn4wYhcgunnmkln50GNHDzG_3isMw,678
22
- vec_inf/config/models.yaml,sha256=PSDR29zI8xld32Vm6dhgCIRHPEkBhwQx7-d_uFlEAM8,24764
23
- vec_inf-0.7.2.dist-info/METADATA,sha256=ljs9hao8q4igLERrjGL5u1vZ_n7DMrr8XnBHzybPE2Y,10099
24
- vec_inf-0.7.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
- vec_inf-0.7.2.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
26
- vec_inf-0.7.2.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
27
- vec_inf-0.7.2.dist-info/RECORD,,