vec-inf 0.7.2__py3-none-any.whl → 0.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/README.md +2 -1
- vec_inf/cli/_cli.py +24 -9
- vec_inf/cli/_helper.py +56 -0
- vec_inf/client/_helper.py +14 -5
- vec_inf/client/_slurm_script_generator.py +24 -13
- vec_inf/client/_slurm_templates.py +10 -12
- vec_inf/client/_utils.py +4 -1
- vec_inf/client/api.py +47 -0
- vec_inf/config/models.yaml +4 -5
- vec_inf/find_port.sh +10 -1
- {vec_inf-0.7.2.dist-info → vec_inf-0.7.3.dist-info}/METADATA +4 -4
- vec_inf-0.7.3.dist-info/RECORD +27 -0
- {vec_inf-0.7.2.dist-info → vec_inf-0.7.3.dist-info}/WHEEL +1 -1
- vec_inf-0.7.2.dist-info/RECORD +0 -27
- {vec_inf-0.7.2.dist-info → vec_inf-0.7.3.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.7.2.dist-info → vec_inf-0.7.3.dist-info}/licenses/LICENSE +0 -0
vec_inf/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
* `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server.
|
|
4
4
|
* `batch-launch`: Specify a list of models to launch multiple OpenAI compatible inference servers at the same time.
|
|
5
|
-
* `status`: Check the
|
|
5
|
+
* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
|
|
6
6
|
* `metrics`: Streams performance metrics to the console.
|
|
7
7
|
* `shutdown`: Shutdown a model by providing its Slurm job ID.
|
|
8
8
|
* `list`: List all available model names, or view the default/cached configuration of a specific model.
|
|
@@ -14,6 +14,7 @@ Use `--help` to see all available options
|
|
|
14
14
|
|
|
15
15
|
* `launch_model`: Launch an OpenAI compatible inference server.
|
|
16
16
|
* `batch_launch_models`: Launch multiple OpenAI compatible inference servers.
|
|
17
|
+
* `fetch_running_jobs`: Get the running `vec-inf` job IDs.
|
|
17
18
|
* `get_status`: Get the status of a running model.
|
|
18
19
|
* `get_metrics`: Get the performance metrics of a running model.
|
|
19
20
|
* `shutdown_model`: Shutdown a running model.
|
vec_inf/cli/_cli.py
CHANGED
|
@@ -30,6 +30,7 @@ from vec_inf.cli._helper import (
|
|
|
30
30
|
BatchLaunchResponseFormatter,
|
|
31
31
|
LaunchResponseFormatter,
|
|
32
32
|
ListCmdDisplay,
|
|
33
|
+
ListStatusDisplay,
|
|
33
34
|
MetricsResponseFormatter,
|
|
34
35
|
StatusResponseFormatter,
|
|
35
36
|
)
|
|
@@ -313,14 +314,14 @@ def batch_launch(
|
|
|
313
314
|
raise click.ClickException(f"Batch launch failed: {str(e)}") from e
|
|
314
315
|
|
|
315
316
|
|
|
316
|
-
@cli.command("status", help="Check the status of
|
|
317
|
-
@click.argument("slurm_job_id",
|
|
317
|
+
@cli.command("status", help="Check the status of running vec-inf jobs on the cluster.")
|
|
318
|
+
@click.argument("slurm_job_id", required=False)
|
|
318
319
|
@click.option(
|
|
319
320
|
"--json-mode",
|
|
320
321
|
is_flag=True,
|
|
321
322
|
help="Output in JSON string",
|
|
322
323
|
)
|
|
323
|
-
def status(slurm_job_id: str, json_mode: bool = False) -> None:
|
|
324
|
+
def status(slurm_job_id: Optional[str] = None, json_mode: bool = False) -> None:
|
|
324
325
|
"""Get the status of a running model on the cluster.
|
|
325
326
|
|
|
326
327
|
Parameters
|
|
@@ -338,14 +339,28 @@ def status(slurm_job_id: str, json_mode: bool = False) -> None:
|
|
|
338
339
|
try:
|
|
339
340
|
# Start the client and get model inference server status
|
|
340
341
|
client = VecInfClient()
|
|
341
|
-
|
|
342
|
+
if not slurm_job_id:
|
|
343
|
+
slurm_job_ids = client.fetch_running_jobs()
|
|
344
|
+
if not slurm_job_ids:
|
|
345
|
+
click.echo("No running jobs found.")
|
|
346
|
+
return
|
|
347
|
+
else:
|
|
348
|
+
slurm_job_ids = [slurm_job_id]
|
|
349
|
+
responses = []
|
|
350
|
+
for job_id in slurm_job_ids:
|
|
351
|
+
responses.append(client.get_status(job_id))
|
|
352
|
+
|
|
342
353
|
# Display status information
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
354
|
+
if slurm_job_id:
|
|
355
|
+
status_formatter = StatusResponseFormatter(responses[0])
|
|
356
|
+
if json_mode:
|
|
357
|
+
status_formatter.output_json()
|
|
358
|
+
else:
|
|
359
|
+
status_info_table = status_formatter.output_table()
|
|
360
|
+
CONSOLE.print(status_info_table)
|
|
346
361
|
else:
|
|
347
|
-
|
|
348
|
-
|
|
362
|
+
list_status_display = ListStatusDisplay(slurm_job_ids, responses, json_mode)
|
|
363
|
+
list_status_display.display_multiple_status_output(CONSOLE)
|
|
349
364
|
|
|
350
365
|
except click.ClickException as e:
|
|
351
366
|
raise e
|
vec_inf/cli/_helper.py
CHANGED
|
@@ -251,6 +251,62 @@ class StatusResponseFormatter:
|
|
|
251
251
|
return table
|
|
252
252
|
|
|
253
253
|
|
|
254
|
+
class ListStatusDisplay:
|
|
255
|
+
"""CLI Helper class for formatting a list of StatusResponse.
|
|
256
|
+
|
|
257
|
+
A formatter class that handles the presentation of multiple job statuses
|
|
258
|
+
in a table format.
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
statuses : list[StatusResponse]
|
|
263
|
+
List of model status information
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
def __init__(
|
|
267
|
+
self,
|
|
268
|
+
job_ids: list[str],
|
|
269
|
+
statuses: list[StatusResponse],
|
|
270
|
+
json_mode: bool = False,
|
|
271
|
+
):
|
|
272
|
+
self.job_ids = job_ids
|
|
273
|
+
self.statuses = statuses
|
|
274
|
+
self.json_mode = json_mode
|
|
275
|
+
|
|
276
|
+
self.table = Table(show_header=True, header_style="bold magenta")
|
|
277
|
+
self.table.add_column("Job ID")
|
|
278
|
+
self.table.add_column("Model Name")
|
|
279
|
+
self.table.add_column("Status", style="blue")
|
|
280
|
+
self.table.add_column("Base URL")
|
|
281
|
+
|
|
282
|
+
def display_multiple_status_output(self, console: Console) -> None:
|
|
283
|
+
"""Format and display all model statuses.
|
|
284
|
+
|
|
285
|
+
Formats each model's status and adds it to the table.
|
|
286
|
+
"""
|
|
287
|
+
if self.json_mode:
|
|
288
|
+
json_data = [
|
|
289
|
+
{
|
|
290
|
+
"job_id": status.model_name,
|
|
291
|
+
"model_name": status.model_name,
|
|
292
|
+
"model_status": status.server_status,
|
|
293
|
+
"base_url": status.base_url,
|
|
294
|
+
}
|
|
295
|
+
for status in self.statuses
|
|
296
|
+
]
|
|
297
|
+
click.echo(json.dumps(json_data, indent=4))
|
|
298
|
+
return
|
|
299
|
+
|
|
300
|
+
for i, status in enumerate(self.statuses):
|
|
301
|
+
self.table.add_row(
|
|
302
|
+
self.job_ids[i],
|
|
303
|
+
status.model_name,
|
|
304
|
+
status.server_status,
|
|
305
|
+
status.base_url,
|
|
306
|
+
)
|
|
307
|
+
console.print(self.table)
|
|
308
|
+
|
|
309
|
+
|
|
254
310
|
class MetricsResponseFormatter:
|
|
255
311
|
"""CLI Helper class for formatting MetricsResponse.
|
|
256
312
|
|
vec_inf/client/_helper.py
CHANGED
|
@@ -469,16 +469,15 @@ class BatchModelLauncher:
|
|
|
469
469
|
If required fields are missing or tensor parallel size is not specified
|
|
470
470
|
when using multiple GPUs
|
|
471
471
|
"""
|
|
472
|
-
|
|
473
|
-
"models": {},
|
|
472
|
+
common_params: dict[str, Any] = {
|
|
474
473
|
"slurm_job_name": self.slurm_job_name,
|
|
475
474
|
"src_dir": str(SRC_DIR),
|
|
476
475
|
"account": account,
|
|
477
476
|
"work_dir": work_dir,
|
|
478
477
|
}
|
|
479
478
|
|
|
480
|
-
|
|
481
|
-
|
|
479
|
+
params: dict[str, Any] = common_params.copy()
|
|
480
|
+
params["models"] = {}
|
|
482
481
|
|
|
483
482
|
for i, (model_name, config) in enumerate(self.model_configs.items()):
|
|
484
483
|
params["models"][model_name] = config.model_dump(exclude_none=True)
|
|
@@ -555,6 +554,16 @@ class BatchModelLauncher:
|
|
|
555
554
|
raise ValueError(
|
|
556
555
|
f"Mismatch found for {arg}: {params[arg]} != {params['models'][model_name][arg]}, check your configuration"
|
|
557
556
|
)
|
|
557
|
+
# Check for required fields and return environment variable overrides
|
|
558
|
+
env_overrides = utils.check_required_fields(
|
|
559
|
+
{**params["models"][model_name], **common_params}
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
for arg, value in env_overrides.items():
|
|
563
|
+
if arg in common_params:
|
|
564
|
+
params[arg] = value
|
|
565
|
+
else:
|
|
566
|
+
params["models"][model_name][arg] = value
|
|
558
567
|
|
|
559
568
|
return params
|
|
560
569
|
|
|
@@ -718,7 +727,7 @@ class ModelStatusMonitor:
|
|
|
718
727
|
Basic status information for the job
|
|
719
728
|
"""
|
|
720
729
|
try:
|
|
721
|
-
job_name = self.job_status["JobName"]
|
|
730
|
+
job_name = self.job_status["JobName"].removesuffix("-vec-inf")
|
|
722
731
|
job_state = self.job_status["JobState"]
|
|
723
732
|
except KeyError:
|
|
724
733
|
job_name = "UNAVAILABLE"
|
|
@@ -34,9 +34,9 @@ class SlurmScriptGenerator:
|
|
|
34
34
|
self.params = params
|
|
35
35
|
self.is_multinode = int(self.params["num_nodes"]) > 1
|
|
36
36
|
self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
|
|
37
|
-
self.additional_binds =
|
|
38
|
-
|
|
39
|
-
|
|
37
|
+
self.additional_binds = (
|
|
38
|
+
f",{self.params['bind']}" if self.params.get("bind") else ""
|
|
39
|
+
)
|
|
40
40
|
self.model_weights_path = str(
|
|
41
41
|
Path(self.params["model_weights_parent_dir"], self.params["model_name"])
|
|
42
42
|
)
|
|
@@ -89,6 +89,8 @@ class SlurmScriptGenerator:
|
|
|
89
89
|
for arg, value in SLURM_JOB_CONFIG_ARGS.items():
|
|
90
90
|
if self.params.get(value):
|
|
91
91
|
shebang.append(f"#SBATCH --{arg}={self.params[value]}")
|
|
92
|
+
if value == "model_name":
|
|
93
|
+
shebang[-1] += "-vec-inf"
|
|
92
94
|
if self.is_multinode:
|
|
93
95
|
shebang += SLURM_SCRIPT_TEMPLATE["shebang"]["multinode"]
|
|
94
96
|
return "\n".join(shebang)
|
|
@@ -107,7 +109,12 @@ class SlurmScriptGenerator:
|
|
|
107
109
|
server_script = ["\n"]
|
|
108
110
|
if self.use_container:
|
|
109
111
|
server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"]))
|
|
110
|
-
server_script.append(
|
|
112
|
+
server_script.append(
|
|
113
|
+
SLURM_SCRIPT_TEMPLATE["bind_path"].format(
|
|
114
|
+
model_weights_path=self.model_weights_path,
|
|
115
|
+
additional_binds=self.additional_binds,
|
|
116
|
+
)
|
|
117
|
+
)
|
|
111
118
|
else:
|
|
112
119
|
server_script.append(
|
|
113
120
|
SLURM_SCRIPT_TEMPLATE["activate_venv"].format(venv=self.params["venv"])
|
|
@@ -125,7 +132,6 @@ class SlurmScriptGenerator:
|
|
|
125
132
|
"CONTAINER_PLACEHOLDER",
|
|
126
133
|
SLURM_SCRIPT_TEMPLATE["container_command"].format(
|
|
127
134
|
model_weights_path=self.model_weights_path,
|
|
128
|
-
additional_binds=self.additional_binds,
|
|
129
135
|
env_str=self.env_str,
|
|
130
136
|
),
|
|
131
137
|
)
|
|
@@ -163,7 +169,6 @@ class SlurmScriptGenerator:
|
|
|
163
169
|
launcher_script.append(
|
|
164
170
|
SLURM_SCRIPT_TEMPLATE["container_command"].format(
|
|
165
171
|
model_weights_path=self.model_weights_path,
|
|
166
|
-
additional_binds=self.additional_binds,
|
|
167
172
|
env_str=self.env_str,
|
|
168
173
|
)
|
|
169
174
|
)
|
|
@@ -215,11 +220,11 @@ class BatchSlurmScriptGenerator:
|
|
|
215
220
|
self.script_paths: list[Path] = []
|
|
216
221
|
self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
|
|
217
222
|
for model_name in self.params["models"]:
|
|
218
|
-
self.params["models"][model_name]["additional_binds"] =
|
|
219
|
-
|
|
220
|
-
self.params["models"][model_name]
|
|
221
|
-
|
|
222
|
-
|
|
223
|
+
self.params["models"][model_name]["additional_binds"] = (
|
|
224
|
+
f",{self.params['models'][model_name]['bind']}"
|
|
225
|
+
if self.params["models"][model_name].get("bind")
|
|
226
|
+
else ""
|
|
227
|
+
)
|
|
223
228
|
self.params["models"][model_name]["model_weights_path"] = str(
|
|
224
229
|
Path(
|
|
225
230
|
self.params["models"][model_name]["model_weights_parent_dir"],
|
|
@@ -259,7 +264,12 @@ class BatchSlurmScriptGenerator:
|
|
|
259
264
|
script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["shebang"])
|
|
260
265
|
if self.use_container:
|
|
261
266
|
script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"])
|
|
262
|
-
script_content.append(
|
|
267
|
+
script_content.append(
|
|
268
|
+
BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format(
|
|
269
|
+
model_weights_path=model_params["model_weights_path"],
|
|
270
|
+
additional_binds=model_params["additional_binds"],
|
|
271
|
+
)
|
|
272
|
+
)
|
|
263
273
|
script_content.append(
|
|
264
274
|
"\n".join(
|
|
265
275
|
BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["server_address_setup"]
|
|
@@ -277,7 +287,6 @@ class BatchSlurmScriptGenerator:
|
|
|
277
287
|
script_content.append(
|
|
278
288
|
BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format(
|
|
279
289
|
model_weights_path=model_params["model_weights_path"],
|
|
280
|
-
additional_binds=model_params["additional_binds"],
|
|
281
290
|
)
|
|
282
291
|
)
|
|
283
292
|
script_content.append(
|
|
@@ -321,6 +330,8 @@ class BatchSlurmScriptGenerator:
|
|
|
321
330
|
model_params = self.params["models"][model_name]
|
|
322
331
|
if model_params.get(value) and value not in ["out_file", "err_file"]:
|
|
323
332
|
shebang.append(f"#SBATCH --{arg}={model_params[value]}")
|
|
333
|
+
if value == "model_name":
|
|
334
|
+
shebang[-1] += "-vec-inf"
|
|
324
335
|
shebang[-1] += "\n"
|
|
325
336
|
shebang.append(BATCH_SLURM_SCRIPT_TEMPLATE["hetjob"])
|
|
326
337
|
# Remove the last hetjob line
|
|
@@ -57,6 +57,8 @@ class SlurmScriptTemplate(TypedDict):
|
|
|
57
57
|
Commands for container setup
|
|
58
58
|
imports : str
|
|
59
59
|
Import statements and source commands
|
|
60
|
+
bind_path : str
|
|
61
|
+
Bind path environment variable for the container
|
|
60
62
|
container_command : str
|
|
61
63
|
Template for container execution command
|
|
62
64
|
activate_venv : str
|
|
@@ -74,7 +76,7 @@ class SlurmScriptTemplate(TypedDict):
|
|
|
74
76
|
shebang: ShebangConfig
|
|
75
77
|
container_setup: list[str]
|
|
76
78
|
imports: str
|
|
77
|
-
|
|
79
|
+
bind_path: str
|
|
78
80
|
container_command: str
|
|
79
81
|
activate_venv: str
|
|
80
82
|
server_setup: ServerSetupConfig
|
|
@@ -96,10 +98,8 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
|
|
|
96
98
|
f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop",
|
|
97
99
|
],
|
|
98
100
|
"imports": "source {src_dir}/find_port.sh",
|
|
99
|
-
"
|
|
100
|
-
|
|
101
|
-
],
|
|
102
|
-
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
|
|
101
|
+
"bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
|
|
102
|
+
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {IMAGE_PATH} \\",
|
|
103
103
|
"activate_venv": "source {venv}/bin/activate",
|
|
104
104
|
"server_setup": {
|
|
105
105
|
"single_node": [
|
|
@@ -215,8 +215,8 @@ class BatchModelLaunchScriptTemplate(TypedDict):
|
|
|
215
215
|
Shebang line for the script
|
|
216
216
|
container_setup : list[str]
|
|
217
217
|
Commands for container setup
|
|
218
|
-
|
|
219
|
-
|
|
218
|
+
bind_path : str
|
|
219
|
+
Bind path environment variable for the container
|
|
220
220
|
server_address_setup : list[str]
|
|
221
221
|
Commands to setup the server address
|
|
222
222
|
launch_cmd : list[str]
|
|
@@ -227,7 +227,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
|
|
|
227
227
|
|
|
228
228
|
shebang: str
|
|
229
229
|
container_setup: str
|
|
230
|
-
|
|
230
|
+
bind_path: str
|
|
231
231
|
server_address_setup: list[str]
|
|
232
232
|
write_to_json: list[str]
|
|
233
233
|
launch_cmd: list[str]
|
|
@@ -237,9 +237,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
|
|
|
237
237
|
BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
|
|
238
238
|
"shebang": "#!/bin/bash\n",
|
|
239
239
|
"container_setup": f"{CONTAINER_LOAD_CMD}\n",
|
|
240
|
-
"
|
|
241
|
-
f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g')"
|
|
242
|
-
],
|
|
240
|
+
"bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
|
|
243
241
|
"server_address_setup": [
|
|
244
242
|
"source {src_dir}/find_port.sh",
|
|
245
243
|
"head_node_ip=${{SLURMD_NODENAME}}",
|
|
@@ -255,7 +253,7 @@ BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
|
|
|
255
253
|
' "$json_path" > temp_{model_name}.json \\',
|
|
256
254
|
' && mv temp_{model_name}.json "$json_path"\n',
|
|
257
255
|
],
|
|
258
|
-
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv --
|
|
256
|
+
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv --containall {IMAGE_PATH} \\",
|
|
259
257
|
"launch_cmd": [
|
|
260
258
|
"vllm serve {model_weights_path} \\",
|
|
261
259
|
" --served-model-name {model_name} \\",
|
vec_inf/client/_utils.py
CHANGED
|
@@ -436,7 +436,7 @@ def find_matching_dirs(
|
|
|
436
436
|
return matched
|
|
437
437
|
|
|
438
438
|
|
|
439
|
-
def check_required_fields(params: dict[str, Any]) ->
|
|
439
|
+
def check_required_fields(params: dict[str, Any]) -> dict[str, Any]:
|
|
440
440
|
"""Check for required fields without default vals and their corresponding env vars.
|
|
441
441
|
|
|
442
442
|
Parameters
|
|
@@ -444,12 +444,15 @@ def check_required_fields(params: dict[str, Any]) -> None:
|
|
|
444
444
|
params : dict[str, Any]
|
|
445
445
|
Dictionary of parameters to check.
|
|
446
446
|
"""
|
|
447
|
+
env_overrides = {}
|
|
447
448
|
for arg in REQUIRED_ARGS:
|
|
448
449
|
if not params.get(arg):
|
|
449
450
|
default_value = os.getenv(REQUIRED_ARGS[arg])
|
|
450
451
|
if default_value:
|
|
451
452
|
params[arg] = default_value
|
|
453
|
+
env_overrides[arg] = default_value
|
|
452
454
|
else:
|
|
453
455
|
raise MissingRequiredFieldsError(
|
|
454
456
|
f"{arg} is required, please set it in the command arguments or environment variables"
|
|
455
457
|
)
|
|
458
|
+
return env_overrides
|
vec_inf/client/api.py
CHANGED
|
@@ -10,7 +10,9 @@ vec_inf.client._helper : Helper classes for model inference server management
|
|
|
10
10
|
vec_inf.client.models : Data models for API responses
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
+
import re
|
|
13
14
|
import shutil
|
|
15
|
+
import subprocess
|
|
14
16
|
import time
|
|
15
17
|
import warnings
|
|
16
18
|
from pathlib import Path
|
|
@@ -181,6 +183,51 @@ class VecInfClient:
|
|
|
181
183
|
)
|
|
182
184
|
return model_launcher.launch()
|
|
183
185
|
|
|
186
|
+
def fetch_running_jobs(self) -> list[str]:
|
|
187
|
+
"""
|
|
188
|
+
Fetch the list of running vec-inf job IDs for the current user.
|
|
189
|
+
|
|
190
|
+
Returns
|
|
191
|
+
-------
|
|
192
|
+
list[str]
|
|
193
|
+
List of matching job names; empty list if squeue unavailable.
|
|
194
|
+
"""
|
|
195
|
+
try:
|
|
196
|
+
res = subprocess.run(
|
|
197
|
+
["squeue", "--me", "--noheader"],
|
|
198
|
+
capture_output=True,
|
|
199
|
+
text=True,
|
|
200
|
+
check=True,
|
|
201
|
+
)
|
|
202
|
+
job_ids = [
|
|
203
|
+
ln.strip().split()[0] for ln in res.stdout.splitlines() if ln.strip()
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
if not job_ids:
|
|
207
|
+
return []
|
|
208
|
+
|
|
209
|
+
# For each job, fetch the full JobName and filter by suffix
|
|
210
|
+
matching_ids = []
|
|
211
|
+
for jid in job_ids:
|
|
212
|
+
try:
|
|
213
|
+
sctl = subprocess.run(
|
|
214
|
+
["scontrol", "show", "job", "-o", jid],
|
|
215
|
+
capture_output=True,
|
|
216
|
+
text=True,
|
|
217
|
+
check=True,
|
|
218
|
+
)
|
|
219
|
+
m = re.search(r"\bJobName=([^\s]+)", sctl.stdout)
|
|
220
|
+
if m and m.group(1).endswith("-vec-inf"):
|
|
221
|
+
matching_ids.append(jid)
|
|
222
|
+
except subprocess.CalledProcessError:
|
|
223
|
+
# Job might have finished between squeue and scontrol; skip
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
return matching_ids
|
|
227
|
+
|
|
228
|
+
except subprocess.CalledProcessError as e:
|
|
229
|
+
raise SlurmJobError(f"Error running slurm command: {e}") from e
|
|
230
|
+
|
|
184
231
|
def get_status(self, slurm_job_id: str) -> StatusResponse:
|
|
185
232
|
"""Get the status of a running model.
|
|
186
233
|
|
vec_inf/config/models.yaml
CHANGED
|
@@ -1059,12 +1059,11 @@ models:
|
|
|
1059
1059
|
model_family: gpt-oss
|
|
1060
1060
|
model_variant: 120b
|
|
1061
1061
|
model_type: LLM
|
|
1062
|
-
gpus_per_node:
|
|
1063
|
-
num_nodes:
|
|
1062
|
+
gpus_per_node: 2
|
|
1063
|
+
num_nodes: 1
|
|
1064
1064
|
vocab_size: 201088
|
|
1065
1065
|
time: 08:00:00
|
|
1066
1066
|
resource_type: l40s
|
|
1067
1067
|
vllm_args:
|
|
1068
|
-
--tensor-parallel-size:
|
|
1069
|
-
--
|
|
1070
|
-
--max-model-len: 40960
|
|
1068
|
+
--tensor-parallel-size: 2
|
|
1069
|
+
--max-model-len: 32768
|
vec_inf/find_port.sh
CHANGED
|
@@ -28,7 +28,16 @@ find_available_port() {
|
|
|
28
28
|
local base_port=$2
|
|
29
29
|
local max_port=$3
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
# Generate shuffled list of ports; fallback to sequential if shuf not present
|
|
32
|
+
if command -v shuf >/dev/null 2>&1; then
|
|
33
|
+
local port_list
|
|
34
|
+
port_list=$(shuf -i "${base_port}-${max_port}")
|
|
35
|
+
else
|
|
36
|
+
local port_list
|
|
37
|
+
port_list=$(seq $base_port $max_port)
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
for port in $port_list; do
|
|
32
41
|
if is_port_available $ip $port; then
|
|
33
42
|
echo $port
|
|
34
43
|
return
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vec-inf
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: Efficient LLM inference on Slurm clusters using vLLM.
|
|
5
5
|
Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -30,7 +30,7 @@ Description-Content-Type: text/markdown
|
|
|
30
30
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
|
|
31
31
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
|
|
32
32
|
[](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
|
|
33
|
-
[](https://docs.vllm.ai/en/v0.11.0/)
|
|
34
34
|

|
|
35
35
|
|
|
36
36
|
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
|
|
@@ -43,7 +43,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
|
|
|
43
43
|
```bash
|
|
44
44
|
pip install vec-inf
|
|
45
45
|
```
|
|
46
|
-
Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.
|
|
46
|
+
Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
|
|
47
47
|
|
|
48
48
|
If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
|
|
49
49
|
* Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
|
|
@@ -76,7 +76,7 @@ Models that are already supported by `vec-inf` would be launched using the cache
|
|
|
76
76
|
#### Other commands
|
|
77
77
|
|
|
78
78
|
* `batch-launch`: Launch multiple model inference servers at once, currently ONLY single node models supported,
|
|
79
|
-
* `status`: Check the
|
|
79
|
+
* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
|
|
80
80
|
* `metrics`: Streams performance metrics to the console.
|
|
81
81
|
* `shutdown`: Shutdown a model by providing its Slurm job ID.
|
|
82
82
|
* `list`: List all available model names, or view the default/cached configuration of a specific model.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
vec_inf/README.md,sha256=GpKnty9u1b06cPT2Ce_5v0LBucmXOQt6Nl4OJKvjf68,1410
|
|
2
|
+
vec_inf/__init__.py,sha256=bHwSIz9lebYuxIemni-lP0h3gwJHVbJnwExQKGJWw_Q,23
|
|
3
|
+
vec_inf/find_port.sh,sha256=HHx1kg-TIoPZu0u55S4T5jl8MDV4_mnqh4Y7r_quyWw,1358
|
|
4
|
+
vec_inf/cli/__init__.py,sha256=5XIvGQCOnaGl73XMkwetjC-Ul3xuXGrWDXdYJ3aUzvU,27
|
|
5
|
+
vec_inf/cli/_cli.py,sha256=9EzRpOFJVd1_g0G-em7DlNJFZoKb-FvBboqSOUanoxU,16787
|
|
6
|
+
vec_inf/cli/_helper.py,sha256=q8ysD0g_hgKg_6emZZNNAhYR90SqvJJMYkPAB0Kj6gc,19177
|
|
7
|
+
vec_inf/cli/_utils.py,sha256=23vSbmvNOWY1-W1aOAwYqNDkDDmx-5UVlCiXAtxUZ8A,1057
|
|
8
|
+
vec_inf/cli/_vars.py,sha256=V6DrJs_BuUa4yNcbBSSnMwpcyXwEBsizy3D0ubIg2fA,777
|
|
9
|
+
vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
|
|
10
|
+
vec_inf/client/_client_vars.py,sha256=1D-bX9dS0-pFImLvgWt2hUnwJiz-VaxuLb2HIfPML8I,2408
|
|
11
|
+
vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
|
|
12
|
+
vec_inf/client/_helper.py,sha256=R5fOfmRK1I9H6ta6-5hhWwr12dhjZD8O0FvJOgfTkaA,37565
|
|
13
|
+
vec_inf/client/_slurm_script_generator.py,sha256=P60W36xvVNi33iK-GqK_StZ6zpJnCr8gLnY6AEn1HKE,14182
|
|
14
|
+
vec_inf/client/_slurm_templates.py,sha256=Zjl47mNxhOTxFDAa61n9o0NAZ_TwO2KF_LvYN3JG7Mk,9349
|
|
15
|
+
vec_inf/client/_slurm_vars.py,sha256=sgP__XhpE1K7pvOzVFmotUXmINYPcOuFP-zGaePT5Iw,2910
|
|
16
|
+
vec_inf/client/_utils.py,sha256=bxn5acjDEtojEuLf8vaBv85kc5TwtYw_gIMhNKcD0M4,14405
|
|
17
|
+
vec_inf/client/api.py,sha256=-vazAWvZp0vsn4jB6R-WdUo5eZ5bR-XJqU6r6qOL16A,13596
|
|
18
|
+
vec_inf/client/config.py,sha256=VU4h2iqL0rxYAqGw2HBF_l6QvvSDJy5M79IgX5G2PW4,5830
|
|
19
|
+
vec_inf/client/models.py,sha256=jGNPOj1uPPBV7xdGy3HFv2ZwpJOGCsU8qm7pE2Rnnes,7498
|
|
20
|
+
vec_inf/config/README.md,sha256=TvZOqZyTUaAFr71hC7GVgg6QUw80AXREyq8wS4D-F30,528
|
|
21
|
+
vec_inf/config/environment.yaml,sha256=oEDp85hUERJO9NNn4wYhcgunnmkln50GNHDzG_3isMw,678
|
|
22
|
+
vec_inf/config/models.yaml,sha256=1zPnW_1I_ltLk8wAoVNLvywQ1htvn0yzdqfHEBFDthg,24730
|
|
23
|
+
vec_inf-0.7.3.dist-info/METADATA,sha256=b-qth5Y_KY6FOj5ghuRwImLz6RBu2x3mXUrfmyqXpJ8,10122
|
|
24
|
+
vec_inf-0.7.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
25
|
+
vec_inf-0.7.3.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
|
|
26
|
+
vec_inf-0.7.3.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
|
|
27
|
+
vec_inf-0.7.3.dist-info/RECORD,,
|
vec_inf-0.7.2.dist-info/RECORD
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
vec_inf/README.md,sha256=WyvjbSs5Eh5fp8u66bgOaO3FQKP2U7m_HbLgqTHs_ng,1322
|
|
2
|
-
vec_inf/__init__.py,sha256=bHwSIz9lebYuxIemni-lP0h3gwJHVbJnwExQKGJWw_Q,23
|
|
3
|
-
vec_inf/find_port.sh,sha256=bGQ6LYSFVSsfDIGatrSg5YvddbZfaPL0R-Bjo4KYD6I,1088
|
|
4
|
-
vec_inf/cli/__init__.py,sha256=5XIvGQCOnaGl73XMkwetjC-Ul3xuXGrWDXdYJ3aUzvU,27
|
|
5
|
-
vec_inf/cli/_cli.py,sha256=0YfxtPT_Nq5gvIol9eWmw5yW9AT1ghf_E49R9pD7UG4,16213
|
|
6
|
-
vec_inf/cli/_helper.py,sha256=0_onclvxxpDTp33ODYc19RbZ2aIhXuMTC9v19q8ZhIo,17473
|
|
7
|
-
vec_inf/cli/_utils.py,sha256=23vSbmvNOWY1-W1aOAwYqNDkDDmx-5UVlCiXAtxUZ8A,1057
|
|
8
|
-
vec_inf/cli/_vars.py,sha256=V6DrJs_BuUa4yNcbBSSnMwpcyXwEBsizy3D0ubIg2fA,777
|
|
9
|
-
vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
|
|
10
|
-
vec_inf/client/_client_vars.py,sha256=1D-bX9dS0-pFImLvgWt2hUnwJiz-VaxuLb2HIfPML8I,2408
|
|
11
|
-
vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
|
|
12
|
-
vec_inf/client/_helper.py,sha256=hb6m5TLwcGE0grCu5-UCUkWbByV-G5h8gA87Yzct6rk,37170
|
|
13
|
-
vec_inf/client/_slurm_script_generator.py,sha256=L6tqn71kNJ2I0xYipFh_ZxIAG8znpXhTpUxTU8LJIa4,13988
|
|
14
|
-
vec_inf/client/_slurm_templates.py,sha256=GxVNClkgggoJN2pT1AjK7CQCAErfKRMIs97Vlhxs9u8,9349
|
|
15
|
-
vec_inf/client/_slurm_vars.py,sha256=sgP__XhpE1K7pvOzVFmotUXmINYPcOuFP-zGaePT5Iw,2910
|
|
16
|
-
vec_inf/client/_utils.py,sha256=_ZBmic0XvJ4vpdIuXDi6KO5iL2rbhIpFQT01EWGItN4,14296
|
|
17
|
-
vec_inf/client/api.py,sha256=lkVWCme-HmMJMqp8JbtjkBVL_MSPsCC_IBL9FBw3Um8,12011
|
|
18
|
-
vec_inf/client/config.py,sha256=VU4h2iqL0rxYAqGw2HBF_l6QvvSDJy5M79IgX5G2PW4,5830
|
|
19
|
-
vec_inf/client/models.py,sha256=jGNPOj1uPPBV7xdGy3HFv2ZwpJOGCsU8qm7pE2Rnnes,7498
|
|
20
|
-
vec_inf/config/README.md,sha256=TvZOqZyTUaAFr71hC7GVgg6QUw80AXREyq8wS4D-F30,528
|
|
21
|
-
vec_inf/config/environment.yaml,sha256=oEDp85hUERJO9NNn4wYhcgunnmkln50GNHDzG_3isMw,678
|
|
22
|
-
vec_inf/config/models.yaml,sha256=PSDR29zI8xld32Vm6dhgCIRHPEkBhwQx7-d_uFlEAM8,24764
|
|
23
|
-
vec_inf-0.7.2.dist-info/METADATA,sha256=ljs9hao8q4igLERrjGL5u1vZ_n7DMrr8XnBHzybPE2Y,10099
|
|
24
|
-
vec_inf-0.7.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
25
|
-
vec_inf-0.7.2.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
|
|
26
|
-
vec_inf-0.7.2.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
|
|
27
|
-
vec_inf-0.7.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|