vec-inf 0.4.0.post1__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/__init__.py +1 -0
- vec_inf/cli/__init__.py +1 -0
- vec_inf/cli/_cli.py +134 -81
- vec_inf/cli/_utils.py +21 -37
- vec_inf/launch_server.sh +20 -1
- vec_inf/models/README.md +24 -0
- vec_inf/models/models.csv +12 -0
- vec_inf/multinode_vllm.slurm +3 -1
- vec_inf/vllm.slurm +3 -1
- {vec_inf-0.4.0.post1.dist-info → vec_inf-0.4.1.dist-info}/METADATA +24 -23
- vec_inf-0.4.1.dist-info/RECORD +16 -0
- {vec_inf-0.4.0.post1.dist-info → vec_inf-0.4.1.dist-info}/WHEEL +1 -1
- vec_inf-0.4.1.dist-info/entry_points.txt +2 -0
- vec_inf-0.4.0.post1.dist-info/RECORD +0 -16
- vec_inf-0.4.0.post1.dist-info/entry_points.txt +0 -3
- {vec_inf-0.4.0.post1.dist-info → vec_inf-0.4.1.dist-info/licenses}/LICENSE +0 -0
vec_inf/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""vec_inf package."""
|
vec_inf/cli/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""vec_inf cli package."""
|
vec_inf/cli/_cli.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
"""Command line interface for Vector Inference."""
|
|
2
|
+
|
|
1
3
|
import os
|
|
2
4
|
import time
|
|
3
|
-
from typing import
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
4
6
|
|
|
5
7
|
import click
|
|
6
|
-
|
|
7
8
|
import polars as pl
|
|
8
9
|
from rich.columns import Columns
|
|
9
10
|
from rich.console import Console
|
|
@@ -12,12 +13,13 @@ from rich.panel import Panel
|
|
|
12
13
|
|
|
13
14
|
import vec_inf.cli._utils as utils
|
|
14
15
|
|
|
16
|
+
|
|
15
17
|
CONSOLE = Console()
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
@click.group()
|
|
19
|
-
def cli():
|
|
20
|
-
"""Vector Inference CLI"""
|
|
21
|
+
def cli() -> None:
|
|
22
|
+
"""Vector Inference CLI."""
|
|
21
23
|
pass
|
|
22
24
|
|
|
23
25
|
|
|
@@ -122,10 +124,7 @@ def launch(
|
|
|
122
124
|
enforce_eager: Optional[str] = None,
|
|
123
125
|
json_mode: bool = False,
|
|
124
126
|
) -> None:
|
|
125
|
-
"""
|
|
126
|
-
Launch a model on the cluster
|
|
127
|
-
"""
|
|
128
|
-
|
|
127
|
+
"""Launch a model on the cluster."""
|
|
129
128
|
if isinstance(pipeline_parallelism, str):
|
|
130
129
|
pipeline_parallelism = (
|
|
131
130
|
"True" if pipeline_parallelism.lower() == "true" else "False"
|
|
@@ -138,6 +137,13 @@ def launch(
|
|
|
138
137
|
|
|
139
138
|
models_df = utils.load_models_df()
|
|
140
139
|
|
|
140
|
+
models_df = models_df.with_columns(
|
|
141
|
+
pl.col("model_type").replace("Reward Modeling", "Reward_Modeling")
|
|
142
|
+
)
|
|
143
|
+
models_df = models_df.with_columns(
|
|
144
|
+
pl.col("model_type").replace("Text Embedding", "Text_Embedding")
|
|
145
|
+
)
|
|
146
|
+
|
|
141
147
|
if model_name in models_df["model_name"].to_list():
|
|
142
148
|
default_args = utils.load_default_args(models_df, model_name)
|
|
143
149
|
for arg in default_args:
|
|
@@ -148,7 +154,6 @@ def launch(
|
|
|
148
154
|
else:
|
|
149
155
|
model_args = models_df.columns
|
|
150
156
|
model_args.remove("model_name")
|
|
151
|
-
model_args.remove("model_type")
|
|
152
157
|
for arg in model_args:
|
|
153
158
|
if locals()[arg] is not None:
|
|
154
159
|
renamed_arg = arg.replace("_", "-")
|
|
@@ -189,79 +194,130 @@ def launch(
|
|
|
189
194
|
def status(
|
|
190
195
|
slurm_job_id: int, log_dir: Optional[str] = None, json_mode: bool = False
|
|
191
196
|
) -> None:
|
|
192
|
-
"""
|
|
193
|
-
Get the status of a running model on the cluster
|
|
194
|
-
"""
|
|
197
|
+
"""Get the status of a running model on the cluster."""
|
|
195
198
|
status_cmd = f"scontrol show job {slurm_job_id} --oneliner"
|
|
196
199
|
output = utils.run_bash_command(status_cmd)
|
|
197
200
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
+
base_data = _get_base_status_data(output)
|
|
202
|
+
status_info = _process_job_state(output, base_data, slurm_job_id, log_dir)
|
|
203
|
+
_display_status(status_info, json_mode)
|
|
201
204
|
|
|
205
|
+
|
|
206
|
+
def _get_base_status_data(output: str) -> Dict[str, Any]:
|
|
207
|
+
"""Extract basic job status information from scontrol output."""
|
|
202
208
|
try:
|
|
203
|
-
|
|
204
|
-
|
|
209
|
+
job_name = output.split(" ")[1].split("=")[1]
|
|
210
|
+
job_state = output.split(" ")[9].split("=")[1]
|
|
205
211
|
except IndexError:
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
212
|
+
job_name = "UNAVAILABLE"
|
|
213
|
+
job_state = "UNAVAILABLE"
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
"model_name": job_name,
|
|
217
|
+
"status": "SHUTDOWN",
|
|
218
|
+
"base_url": "UNAVAILABLE",
|
|
219
|
+
"state": job_state,
|
|
220
|
+
"pending_reason": None,
|
|
221
|
+
"failed_reason": None,
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _process_job_state(
|
|
226
|
+
output: str, status_info: Dict[str, Any], slurm_job_id: int, log_dir: Optional[str]
|
|
227
|
+
) -> Dict[str, Any]:
|
|
228
|
+
"""Process different job states and update status information."""
|
|
229
|
+
if status_info["state"] == "PENDING":
|
|
230
|
+
_process_pending_state(output, status_info)
|
|
231
|
+
elif status_info["state"] == "RUNNING":
|
|
232
|
+
_handle_running_state(status_info, slurm_job_id, log_dir)
|
|
233
|
+
return status_info
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _process_pending_state(output: str, status_info: Dict[str, Any]) -> None:
|
|
237
|
+
"""Handle PENDING job state."""
|
|
238
|
+
try:
|
|
239
|
+
status_info["pending_reason"] = output.split(" ")[10].split("=")[1]
|
|
240
|
+
status_info["status"] = "PENDING"
|
|
241
|
+
except IndexError:
|
|
242
|
+
status_info["pending_reason"] = "Unknown pending reason"
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _handle_running_state(
|
|
246
|
+
status_info: Dict[str, Any], slurm_job_id: int, log_dir: Optional[str]
|
|
247
|
+
) -> None:
|
|
248
|
+
"""Handle RUNNING job state and check server status."""
|
|
249
|
+
server_status = utils.is_server_running(
|
|
250
|
+
status_info["model_name"], slurm_job_id, log_dir
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
if isinstance(server_status, tuple):
|
|
254
|
+
status_info["status"], status_info["failed_reason"] = server_status
|
|
255
|
+
return
|
|
256
|
+
|
|
257
|
+
if server_status == "RUNNING":
|
|
258
|
+
_check_model_health(status_info, slurm_job_id, log_dir)
|
|
259
|
+
else:
|
|
260
|
+
status_info["status"] = server_status
|
|
261
|
+
|
|
235
262
|
|
|
263
|
+
def _check_model_health(
|
|
264
|
+
status_info: Dict[str, Any], slurm_job_id: int, log_dir: Optional[str]
|
|
265
|
+
) -> None:
|
|
266
|
+
"""Check model health and update status accordingly."""
|
|
267
|
+
model_status = utils.model_health_check(
|
|
268
|
+
status_info["model_name"], slurm_job_id, log_dir
|
|
269
|
+
)
|
|
270
|
+
status, failed_reason = model_status
|
|
271
|
+
if status == "READY":
|
|
272
|
+
status_info["base_url"] = utils.get_base_url(
|
|
273
|
+
status_info["model_name"], slurm_job_id, log_dir
|
|
274
|
+
)
|
|
275
|
+
status_info["status"] = status
|
|
276
|
+
else:
|
|
277
|
+
status_info["status"], status_info["failed_reason"] = status, failed_reason
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _display_status(status_info: Dict[str, Any], json_mode: bool) -> None:
|
|
281
|
+
"""Display the status information in appropriate format."""
|
|
236
282
|
if json_mode:
|
|
237
|
-
|
|
238
|
-
"model_name": slurm_job_name,
|
|
239
|
-
"model_status": status,
|
|
240
|
-
"base_url": base_url,
|
|
241
|
-
}
|
|
242
|
-
if "slurm_job_pending_reason" in locals():
|
|
243
|
-
status_dict["pending_reason"] = slurm_job_pending_reason
|
|
244
|
-
if "slurm_job_failed_reason" in locals():
|
|
245
|
-
status_dict["failed_reason"] = slurm_job_failed_reason
|
|
246
|
-
click.echo(f"{status_dict}")
|
|
283
|
+
_output_json(status_info)
|
|
247
284
|
else:
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
285
|
+
_output_table(status_info)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _output_json(status_info: Dict[str, Any]) -> None:
|
|
289
|
+
"""Format and output JSON data."""
|
|
290
|
+
json_data = {
|
|
291
|
+
"model_name": status_info["model_name"],
|
|
292
|
+
"model_status": status_info["status"],
|
|
293
|
+
"base_url": status_info["base_url"],
|
|
294
|
+
}
|
|
295
|
+
if status_info["pending_reason"]:
|
|
296
|
+
json_data["pending_reason"] = status_info["pending_reason"]
|
|
297
|
+
if status_info["failed_reason"]:
|
|
298
|
+
json_data["failed_reason"] = status_info["failed_reason"]
|
|
299
|
+
click.echo(json_data)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _output_table(status_info: Dict[str, Any]) -> None:
|
|
303
|
+
"""Create and display rich table."""
|
|
304
|
+
table = utils.create_table(key_title="Job Status", value_title="Value")
|
|
305
|
+
table.add_row("Model Name", status_info["model_name"])
|
|
306
|
+
table.add_row("Model Status", status_info["status"], style="blue")
|
|
307
|
+
|
|
308
|
+
if status_info["pending_reason"]:
|
|
309
|
+
table.add_row("Pending Reason", status_info["pending_reason"])
|
|
310
|
+
if status_info["failed_reason"]:
|
|
311
|
+
table.add_row("Failed Reason", status_info["failed_reason"])
|
|
312
|
+
|
|
313
|
+
table.add_row("Base URL", status_info["base_url"])
|
|
314
|
+
CONSOLE.print(table)
|
|
257
315
|
|
|
258
316
|
|
|
259
317
|
@cli.command("shutdown")
|
|
260
318
|
@click.argument("slurm_job_id", type=int, nargs=1)
|
|
261
319
|
def shutdown(slurm_job_id: int) -> None:
|
|
262
|
-
"""
|
|
263
|
-
Shutdown a running model on the cluster
|
|
264
|
-
"""
|
|
320
|
+
"""Shutdown a running model on the cluster."""
|
|
265
321
|
shutdown_cmd = f"scancel {slurm_job_id}"
|
|
266
322
|
utils.run_bash_command(shutdown_cmd)
|
|
267
323
|
click.echo(f"Shutting down model with Slurm Job ID: {slurm_job_id}")
|
|
@@ -275,11 +331,9 @@ def shutdown(slurm_job_id: int) -> None:
|
|
|
275
331
|
help="Output in JSON string",
|
|
276
332
|
)
|
|
277
333
|
def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> None:
|
|
278
|
-
"""
|
|
279
|
-
List all available models, or get default setup of a specific model
|
|
280
|
-
"""
|
|
334
|
+
"""List all available models, or get default setup of a specific model."""
|
|
281
335
|
|
|
282
|
-
def list_model(model_name: str, models_df: pl.DataFrame, json_mode: bool):
|
|
336
|
+
def list_model(model_name: str, models_df: pl.DataFrame, json_mode: bool) -> None:
|
|
283
337
|
if model_name not in models_df["model_name"].to_list():
|
|
284
338
|
raise ValueError(f"Model name {model_name} not found in available models")
|
|
285
339
|
|
|
@@ -297,7 +351,7 @@ def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> No
|
|
|
297
351
|
table.add_row(key, str(value))
|
|
298
352
|
CONSOLE.print(table)
|
|
299
353
|
|
|
300
|
-
def list_all(models_df: pl.DataFrame, json_mode: bool):
|
|
354
|
+
def list_all(models_df: pl.DataFrame, json_mode: bool) -> None:
|
|
301
355
|
if json_mode:
|
|
302
356
|
click.echo(models_df["model_name"].to_list())
|
|
303
357
|
return
|
|
@@ -327,9 +381,12 @@ def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> No
|
|
|
327
381
|
|
|
328
382
|
for row in models_df.to_dicts():
|
|
329
383
|
panel_color = model_type_colors.get(row["model_type"], "white")
|
|
330
|
-
|
|
331
|
-
f"[magenta]{row['model_family']}[/magenta]
|
|
332
|
-
|
|
384
|
+
if row["model_variant"] == "None":
|
|
385
|
+
styled_text = f"[magenta]{row['model_family']}[/magenta]"
|
|
386
|
+
else:
|
|
387
|
+
styled_text = (
|
|
388
|
+
f"[magenta]{row['model_family']}[/magenta]-{row['model_variant']}"
|
|
389
|
+
)
|
|
333
390
|
panels.append(Panel(styled_text, expand=True, border_style=panel_color))
|
|
334
391
|
CONSOLE.print(Columns(panels, equal=True))
|
|
335
392
|
|
|
@@ -349,9 +406,7 @@ def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> No
|
|
|
349
406
|
help="Path to slurm log directory. This is required if --log-dir was set in model launch",
|
|
350
407
|
)
|
|
351
408
|
def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
|
|
352
|
-
"""
|
|
353
|
-
Stream performance metrics to the console
|
|
354
|
-
"""
|
|
409
|
+
"""Stream performance metrics to the console."""
|
|
355
410
|
status_cmd = f"scontrol show job {slurm_job_id} --oneliner"
|
|
356
411
|
output = utils.run_bash_command(status_cmd)
|
|
357
412
|
slurm_job_name = output.split(" ")[1].split("=")[1]
|
|
@@ -365,13 +420,11 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
|
|
|
365
420
|
if isinstance(out_logs, str):
|
|
366
421
|
live.update(out_logs)
|
|
367
422
|
break
|
|
368
|
-
out_logs = cast(list, out_logs)
|
|
369
423
|
latest_metrics = utils.get_latest_metric(out_logs)
|
|
370
424
|
# if latest_metrics is a string, then it is an error message
|
|
371
425
|
if isinstance(latest_metrics, str):
|
|
372
426
|
live.update(latest_metrics)
|
|
373
427
|
break
|
|
374
|
-
latest_metrics = cast(dict, latest_metrics)
|
|
375
428
|
table = utils.create_table(key_title="Metric", value_title="Value")
|
|
376
429
|
for key, value in latest_metrics.items():
|
|
377
430
|
table.add_row(key, value)
|
vec_inf/cli/_utils.py
CHANGED
|
@@ -1,19 +1,20 @@
|
|
|
1
|
+
"""Utility functions for the CLI."""
|
|
2
|
+
|
|
1
3
|
import os
|
|
2
4
|
import subprocess
|
|
3
|
-
from typing import Optional, Union, cast
|
|
5
|
+
from typing import Dict, List, Optional, Tuple, Union, cast
|
|
4
6
|
|
|
5
7
|
import polars as pl
|
|
6
8
|
import requests
|
|
7
9
|
from rich.table import Table
|
|
8
10
|
|
|
11
|
+
|
|
9
12
|
MODEL_READY_SIGNATURE = "INFO: Application startup complete."
|
|
10
13
|
SERVER_ADDRESS_SIGNATURE = "Server address: "
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
def run_bash_command(command: str) -> str:
|
|
14
|
-
"""
|
|
15
|
-
Run a bash command and return the output
|
|
16
|
-
"""
|
|
17
|
+
"""Run a bash command and return the output."""
|
|
17
18
|
process = subprocess.Popen(
|
|
18
19
|
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
|
19
20
|
)
|
|
@@ -24,15 +25,13 @@ def run_bash_command(command: str) -> str:
|
|
|
24
25
|
def read_slurm_log(
|
|
25
26
|
slurm_job_name: str, slurm_job_id: int, slurm_log_type: str, log_dir: Optional[str]
|
|
26
27
|
) -> Union[list[str], str]:
|
|
27
|
-
"""
|
|
28
|
-
Read the slurm log file
|
|
29
|
-
"""
|
|
28
|
+
"""Read the slurm log file."""
|
|
30
29
|
if not log_dir:
|
|
31
30
|
models_dir = os.path.join(os.path.expanduser("~"), ".vec-inf-logs")
|
|
32
31
|
|
|
33
|
-
for
|
|
34
|
-
if
|
|
35
|
-
log_dir = os.path.join(models_dir,
|
|
32
|
+
for directory in sorted(os.listdir(models_dir), key=len, reverse=True):
|
|
33
|
+
if directory in slurm_job_name:
|
|
34
|
+
log_dir = os.path.join(models_dir, directory)
|
|
36
35
|
break
|
|
37
36
|
|
|
38
37
|
log_dir = cast(str, log_dir)
|
|
@@ -53,9 +52,7 @@ def read_slurm_log(
|
|
|
53
52
|
def is_server_running(
|
|
54
53
|
slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
|
|
55
54
|
) -> Union[str, tuple[str, str]]:
|
|
56
|
-
"""
|
|
57
|
-
Check if a model is ready to serve requests
|
|
58
|
-
"""
|
|
55
|
+
"""Check if a model is ready to serve requests."""
|
|
59
56
|
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "err", log_dir)
|
|
60
57
|
if isinstance(log_content, str):
|
|
61
58
|
return log_content
|
|
@@ -72,9 +69,7 @@ def is_server_running(
|
|
|
72
69
|
|
|
73
70
|
|
|
74
71
|
def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
|
|
75
|
-
"""
|
|
76
|
-
Get the base URL of a model
|
|
77
|
-
"""
|
|
72
|
+
"""Get the base URL of a model."""
|
|
78
73
|
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "out", log_dir)
|
|
79
74
|
if isinstance(log_content, str):
|
|
80
75
|
return log_content
|
|
@@ -87,10 +82,8 @@ def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str])
|
|
|
87
82
|
|
|
88
83
|
def model_health_check(
|
|
89
84
|
slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
|
|
90
|
-
) ->
|
|
91
|
-
"""
|
|
92
|
-
Check the health of a running model on the cluster
|
|
93
|
-
"""
|
|
85
|
+
) -> Tuple[str, Union[str, int]]:
|
|
86
|
+
"""Check the health of a running model on the cluster."""
|
|
94
87
|
base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
|
|
95
88
|
if not base_url.startswith("http"):
|
|
96
89
|
return ("FAILED", base_url)
|
|
@@ -100,9 +93,8 @@ def model_health_check(
|
|
|
100
93
|
response = requests.get(health_check_url)
|
|
101
94
|
# Check if the request was successful
|
|
102
95
|
if response.status_code == 200:
|
|
103
|
-
return "READY"
|
|
104
|
-
|
|
105
|
-
return ("FAILED", response.status_code)
|
|
96
|
+
return ("READY", response.status_code)
|
|
97
|
+
return ("FAILED", response.status_code)
|
|
106
98
|
except requests.exceptions.RequestException as e:
|
|
107
99
|
return ("FAILED", str(e))
|
|
108
100
|
|
|
@@ -110,9 +102,7 @@ def model_health_check(
|
|
|
110
102
|
def create_table(
|
|
111
103
|
key_title: str = "", value_title: str = "", show_header: bool = True
|
|
112
104
|
) -> Table:
|
|
113
|
-
"""
|
|
114
|
-
Create a table for displaying model status
|
|
115
|
-
"""
|
|
105
|
+
"""Create a table for displaying model status."""
|
|
116
106
|
table = Table(show_header=show_header, header_style="bold magenta")
|
|
117
107
|
table.add_column(key_title, style="dim")
|
|
118
108
|
table.add_column(value_title)
|
|
@@ -120,30 +110,24 @@ def create_table(
|
|
|
120
110
|
|
|
121
111
|
|
|
122
112
|
def load_models_df() -> pl.DataFrame:
|
|
123
|
-
"""
|
|
124
|
-
|
|
125
|
-
"""
|
|
126
|
-
models_df = pl.read_csv(
|
|
113
|
+
"""Load the models dataframe."""
|
|
114
|
+
return pl.read_csv(
|
|
127
115
|
os.path.join(
|
|
128
116
|
os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
|
|
129
117
|
"models/models.csv",
|
|
130
118
|
)
|
|
131
119
|
)
|
|
132
|
-
return models_df
|
|
133
120
|
|
|
134
121
|
|
|
135
|
-
def load_default_args(models_df: pl.DataFrame, model_name: str) ->
|
|
136
|
-
"""
|
|
137
|
-
Load the default arguments for a model
|
|
138
|
-
"""
|
|
122
|
+
def load_default_args(models_df: pl.DataFrame, model_name: str) -> Dict[str, str]:
|
|
123
|
+
"""Load the default arguments for a model."""
|
|
139
124
|
row_data = models_df.filter(models_df["model_name"] == model_name)
|
|
140
125
|
default_args = row_data.to_dicts()[0]
|
|
141
126
|
default_args.pop("model_name", None)
|
|
142
|
-
default_args.pop("model_type", None)
|
|
143
127
|
return default_args
|
|
144
128
|
|
|
145
129
|
|
|
146
|
-
def get_latest_metric(log_lines:
|
|
130
|
+
def get_latest_metric(log_lines: List[str]) -> Union[str, Dict[str, str]]:
|
|
147
131
|
"""Read the latest metric entry from the log file."""
|
|
148
132
|
latest_metric = {}
|
|
149
133
|
|
vec_inf/launch_server.sh
CHANGED
|
@@ -6,6 +6,7 @@ while [[ "$#" -gt 0 ]]; do
|
|
|
6
6
|
case $1 in
|
|
7
7
|
--model-family) model_family="$2"; shift ;;
|
|
8
8
|
--model-variant) model_variant="$2"; shift ;;
|
|
9
|
+
--model-type) model_type="$2"; shift ;;
|
|
9
10
|
--partition) partition="$2"; shift ;;
|
|
10
11
|
--qos) qos="$2"; shift ;;
|
|
11
12
|
--time) walltime="$2"; shift ;;
|
|
@@ -25,7 +26,7 @@ while [[ "$#" -gt 0 ]]; do
|
|
|
25
26
|
shift
|
|
26
27
|
done
|
|
27
28
|
|
|
28
|
-
required_vars=(model_family model_variant partition qos walltime num_nodes num_gpus max_model_len vocab_size data_type venv log_dir model_weights_parent_dir)
|
|
29
|
+
required_vars=(model_family model_variant model_type partition qos walltime num_nodes num_gpus max_model_len vocab_size data_type venv log_dir model_weights_parent_dir)
|
|
29
30
|
|
|
30
31
|
for var in "$required_vars[@]"; do
|
|
31
32
|
if [ -z "$!var" ]; then
|
|
@@ -36,6 +37,7 @@ done
|
|
|
36
37
|
|
|
37
38
|
export MODEL_FAMILY=$model_family
|
|
38
39
|
export MODEL_VARIANT=$model_variant
|
|
40
|
+
export MODEL_TYPE=$model_type
|
|
39
41
|
export JOB_PARTITION=$partition
|
|
40
42
|
export QOS=$qos
|
|
41
43
|
export WALLTIME=$walltime
|
|
@@ -48,6 +50,17 @@ export VENV_BASE=$venv
|
|
|
48
50
|
export LOG_DIR=$log_dir
|
|
49
51
|
export MODEL_WEIGHTS_PARENT_DIR=$model_weights_parent_dir
|
|
50
52
|
|
|
53
|
+
if [[ "$model_type" == "LLM" || "$model_type" == "VLM" ]]; then
|
|
54
|
+
export VLLM_TASK="generate"
|
|
55
|
+
elif [ "$model_type" == "Reward_Modeling" ]; then
|
|
56
|
+
export VLLM_TASK="reward"
|
|
57
|
+
elif [ "$model_type" == "Text_Embedding" ]; then
|
|
58
|
+
export VLLM_TASK="embed"
|
|
59
|
+
else
|
|
60
|
+
echo "Error: Unknown model_type: $model_type"
|
|
61
|
+
exit 1
|
|
62
|
+
fi
|
|
63
|
+
|
|
51
64
|
if [ -n "$max_num_seqs" ]; then
|
|
52
65
|
export VLLM_MAX_NUM_SEQS=$max_num_seqs
|
|
53
66
|
else
|
|
@@ -69,6 +82,10 @@ fi
|
|
|
69
82
|
# ================================= Set default environment variables ======================================
|
|
70
83
|
# Slurm job configuration
|
|
71
84
|
export JOB_NAME="$MODEL_FAMILY-$MODEL_VARIANT"
|
|
85
|
+
if [ "$JOB_NAME" == "DeepSeek-R1-None" ]; then
|
|
86
|
+
export JOB_NAME=$MODEL_FAMILY
|
|
87
|
+
fi
|
|
88
|
+
|
|
72
89
|
if [ "$LOG_DIR" = "default" ]; then
|
|
73
90
|
export LOG_DIR="$HOME/.vec-inf-logs/$MODEL_FAMILY"
|
|
74
91
|
fi
|
|
@@ -101,6 +118,8 @@ echo Num Nodes: $NUM_NODES
|
|
|
101
118
|
echo GPUs per Node: $NUM_GPUS
|
|
102
119
|
echo QOS: $QOS
|
|
103
120
|
echo Walltime: $WALLTIME
|
|
121
|
+
echo Model Type: $MODEL_TYPE
|
|
122
|
+
echo Task: $VLLM_TASK
|
|
104
123
|
echo Data Type: $VLLM_DATA_TYPE
|
|
105
124
|
echo Max Model Length: $VLLM_MAX_MODEL_LEN
|
|
106
125
|
echo Max Num Seqs: $VLLM_MAX_NUM_SEQS
|
vec_inf/models/README.md
CHANGED
|
@@ -148,6 +148,18 @@ More profiling metrics coming soon!
|
|
|
148
148
|
|:----------:|:----------:|:----------:|:----------:|
|
|
149
149
|
| [`QwQ-32B-Preview`](https://huggingface.co/Qwen/QwQ-32B-Preview) | 2x a40 | - tokens/s | - tokens/s |
|
|
150
150
|
|
|
151
|
+
### [DeepSeek-R1: Distilled Models](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d)
|
|
152
|
+
|
|
153
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
154
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
155
|
+
| [`DeepSeek-R1-Distill-Llama-8B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | 1x a40 | - tokens/s | - tokens/s |
|
|
156
|
+
| [`DeepSeek-R1-Distill-Llama-70B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) | 4x a40 | - tokens/s | - tokens/s |
|
|
157
|
+
| [`DeepSeek-R1-Distill-Qwen-1.5B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | 1x a40 | - tokens/s | - tokens/s |
|
|
158
|
+
| [`DeepSeek-R1-Distill-Qwen-7B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1x a40 | - tokens/s | - tokens/s |
|
|
159
|
+
| [`DeepSeek-R1-Distill-Qwen-14B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | 2x a40 | - tokens/s | - tokens/s |
|
|
160
|
+
| [`DeepSeek-R1-Distill-Qwen-32B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) | 4x a40 | - tokens/s | - tokens/s |
|
|
161
|
+
|
|
162
|
+
|
|
151
163
|
## Vision Language Models
|
|
152
164
|
|
|
153
165
|
### [LLaVa-1.5](https://huggingface.co/collections/llava-hf/llava-15-65f762d5b6941db5c2ba07e0)
|
|
@@ -194,6 +206,18 @@ More profiling metrics coming soon!
|
|
|
194
206
|
|:----------:|:----------:|:----------:|:----------:|
|
|
195
207
|
| [`e5-mistral-7b-instruct`](https://huggingface.co/intfloat/e5-mistral-7b-instruct) | 1x a40 | - tokens/s | - tokens/s |
|
|
196
208
|
|
|
209
|
+
### [BAAI: bge](https://huggingface.co/BAAI)
|
|
210
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
211
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
212
|
+
| [`bge-base-en-v1.5`](https://huggingface.co/BAAI/bge-base-en-v1.5) | 1x A40 | - tokens/s | - tokens/s |
|
|
213
|
+
|
|
214
|
+
### [Sentence Transformers: MiniLM](https://huggingface.co/sentence-transformers)
|
|
215
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
216
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
217
|
+
| [`all-MiniLM-L6-v2`](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | 1x A40 | - tokens/s | - tokens/s |
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
|
|
197
221
|
## Reward Modeling Models
|
|
198
222
|
|
|
199
223
|
### [Qwen: Qwen2.5-Math](https://huggingface.co/collections/Qwen/qwen25-math-66eaa240a1b7d5ee65f1da3e)
|
vec_inf/models/models.csv
CHANGED
|
@@ -71,3 +71,15 @@ Qwen2.5-Math-RM-72B,Qwen2.5,Math-RM-72B,Reward Modeling,4,1,152064,4096,256,true
|
|
|
71
71
|
QwQ-32B-Preview,QwQ,32B-Preview,LLM,2,1,152064,32768,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
72
72
|
Pixtral-12B-2409,Pixtral,12B-2409,VLM,1,1,131072,8192,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
73
73
|
e5-mistral-7b-instruct,e5,mistral-7b-instruct,Text Embedding,1,1,32000,4096,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
74
|
+
bge-base-en-v1.5,bge,base-en-v1.5,Text Embedding,1,1,30522,512,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
75
|
+
all-MiniLM-L6-v2,all-MiniLM,L6-v2,Text Embedding,1,1,30522,512,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
76
|
+
Llama-3.3-70B-Instruct,Llama-3.3,70B-Instruct,LLM,4,1,128256,65536,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
77
|
+
InternVL2_5-26B,InternVL2_5,26B,VLM,2,1,92553,32768,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
78
|
+
InternVL2_5-38B,InternVL2_5,38B,VLM,4,1,92553,32768,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
79
|
+
Aya-Expanse-32B,Aya-Expanse,32B,LLM,2,1,256000,8192,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
80
|
+
DeepSeek-R1-Distill-Llama-70B,DeepSeek-R1,Distill-Llama-70B,LLM,4,1,128256,65536,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
81
|
+
DeepSeek-R1-Distill-Llama-8B,DeepSeek-R1,Distill-Llama-8B,LLM,1,1,128256,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
82
|
+
DeepSeek-R1-Distill-Qwen-32B,DeepSeek-R1,Distill-Qwen-32B,LLM,4,1,152064,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
83
|
+
DeepSeek-R1-Distill-Qwen-14B,DeepSeek-R1,Distill-Qwen-14B,LLM,2,1,152064,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
84
|
+
DeepSeek-R1-Distill-Qwen-7B,DeepSeek-R1,Distill-Qwen-7B,LLM,1,1,152064,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
|
85
|
+
DeepSeek-R1-Distill-Qwen-1.5B,DeepSeek-R1,Distill-Qwen-1.5B,LLM,1,1,152064,131072,256,true,false,m2,08:00:00,a40,auto,singularity,default,/model-weights
|
vec_inf/multinode_vllm.slurm
CHANGED
|
@@ -12,7 +12,7 @@ nvidia-smi
|
|
|
12
12
|
source ${SRC_DIR}/find_port.sh
|
|
13
13
|
|
|
14
14
|
if [ "$VENV_BASE" = "singularity" ]; then
|
|
15
|
-
export SINGULARITY_IMAGE=/projects/aieng/public/vector-
|
|
15
|
+
export SINGULARITY_IMAGE=/projects/aieng/public/vector-inference_latest.sif
|
|
16
16
|
export VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
|
|
17
17
|
module load singularity-ce/3.8.2
|
|
18
18
|
singularity exec $SINGULARITY_IMAGE ray stop
|
|
@@ -103,6 +103,7 @@ if [ "$VENV_BASE" = "singularity" ]; then
|
|
|
103
103
|
--max-logprobs ${VLLM_MAX_LOGPROBS} \
|
|
104
104
|
--max-model-len ${VLLM_MAX_MODEL_LEN} \
|
|
105
105
|
--max-num-seqs ${VLLM_MAX_NUM_SEQS} \
|
|
106
|
+
--task ${VLLM_TASK} \
|
|
106
107
|
${ENFORCE_EAGER}
|
|
107
108
|
else
|
|
108
109
|
source ${VENV_BASE}/bin/activate
|
|
@@ -118,5 +119,6 @@ else
|
|
|
118
119
|
--max-logprobs ${VLLM_MAX_LOGPROBS} \
|
|
119
120
|
--max-model-len ${VLLM_MAX_MODEL_LEN} \
|
|
120
121
|
--max-num-seqs ${VLLM_MAX_NUM_SEQS} \
|
|
122
|
+
--task ${VLLM_TASK} \
|
|
121
123
|
${ENFORCE_EAGER}
|
|
122
124
|
fi
|
vec_inf/vllm.slurm
CHANGED
|
@@ -23,7 +23,7 @@ fi
|
|
|
23
23
|
|
|
24
24
|
# Activate vllm venv
|
|
25
25
|
if [ "$VENV_BASE" = "singularity" ]; then
|
|
26
|
-
export SINGULARITY_IMAGE=/projects/aieng/public/vector-
|
|
26
|
+
export SINGULARITY_IMAGE=/projects/aieng/public/vector-inference_latest.sif
|
|
27
27
|
export VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
|
|
28
28
|
module load singularity-ce/3.8.2
|
|
29
29
|
singularity exec $SINGULARITY_IMAGE ray stop
|
|
@@ -39,6 +39,7 @@ if [ "$VENV_BASE" = "singularity" ]; then
|
|
|
39
39
|
--trust-remote-code \
|
|
40
40
|
--max-model-len ${VLLM_MAX_MODEL_LEN} \
|
|
41
41
|
--max-num-seqs ${VLLM_MAX_NUM_SEQS} \
|
|
42
|
+
--task ${VLLM_TASK} \
|
|
42
43
|
${ENFORCE_EAGER}
|
|
43
44
|
else
|
|
44
45
|
source ${VENV_BASE}/bin/activate
|
|
@@ -53,5 +54,6 @@ else
|
|
|
53
54
|
--trust-remote-code \
|
|
54
55
|
--max-model-len ${VLLM_MAX_MODEL_LEN} \
|
|
55
56
|
--max-num-seqs ${VLLM_MAX_NUM_SEQS} \
|
|
57
|
+
--task ${VLLM_TASK} \
|
|
56
58
|
${ENFORCE_EAGER}
|
|
57
59
|
fi
|
|
@@ -1,30 +1,32 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: vec-inf
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Efficient LLM inference on Slurm clusters using vLLM.
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
Requires-Python:
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
5
|
+
Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: <3.11,>=3.10
|
|
9
|
+
Requires-Dist: click>=8.1.0
|
|
10
|
+
Requires-Dist: numpy>=1.24.0
|
|
11
|
+
Requires-Dist: polars>=1.15.0
|
|
12
|
+
Requires-Dist: requests>=2.31.0
|
|
13
|
+
Requires-Dist: rich>=13.7.0
|
|
15
14
|
Provides-Extra: dev
|
|
16
|
-
Requires-Dist:
|
|
17
|
-
Requires-Dist:
|
|
18
|
-
Requires-Dist:
|
|
19
|
-
Requires-Dist:
|
|
20
|
-
Requires-Dist: ray (>=2.9.3,<3.0.0) ; extra == "dev"
|
|
21
|
-
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
22
|
-
Requires-Dist: rich (>=13.7.0,<14.0.0)
|
|
23
|
-
Requires-Dist: vllm (>=0.6.0,<0.7.0) ; extra == "dev"
|
|
24
|
-
Requires-Dist: vllm-nccl-cu12 (>=2.18,<2.19) ; extra == "dev"
|
|
15
|
+
Requires-Dist: cupy-cuda12x==12.1.0; extra == 'dev'
|
|
16
|
+
Requires-Dist: ray>=2.40.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: vllm-nccl-cu12<2.19,>=2.18; extra == 'dev'
|
|
18
|
+
Requires-Dist: vllm>=0.7.2; extra == 'dev'
|
|
25
19
|
Description-Content-Type: text/markdown
|
|
26
20
|
|
|
27
21
|
# Vector Inference: Easy inference on Slurm clusters
|
|
22
|
+
|
|
23
|
+
----------------------------------------------------
|
|
24
|
+
|
|
25
|
+
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
|
|
26
|
+
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs_build.yml)
|
|
27
|
+
[](https://codecov.io/github/VectorInstitute/vector-inference)
|
|
28
|
+

|
|
29
|
+
|
|
28
30
|
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **All scripts in this repository runs natively on the Vector Institute cluster environment**. To adapt to other environments, update [`launch_server.sh`](vec_inf/launch_server.sh), [`vllm.slurm`](vec_inf/vllm.slurm), [`multinode_vllm.slurm`](vec_inf/multinode_vllm.slurm) and [`models.csv`](vec_inf/models/models.csv) accordingly.
|
|
29
31
|
|
|
30
32
|
## Installation
|
|
@@ -42,7 +44,7 @@ vec-inf launch Meta-Llama-3.1-8B-Instruct
|
|
|
42
44
|
```
|
|
43
45
|
You should see an output like the following:
|
|
44
46
|
|
|
45
|
-
<img width="
|
|
47
|
+
<img width="600" alt="launch_img" src="https://github.com/user-attachments/assets/ab658552-18b2-47e0-bf70-e539c3b898d5">
|
|
46
48
|
|
|
47
49
|
The model would be launched using the [default parameters](vec_inf/models/models.csv), you can override these values by providing additional parameters, use `--help` to see the full list. You can also launch your own customized model as long as the model architecture is [supported by vLLM](https://docs.vllm.ai/en/stable/models/supported_models.html), and make sure to follow the instructions below:
|
|
48
50
|
* Your model weights directory naming convention should follow `$MODEL_FAMILY-$MODEL_VARIANT`.
|
|
@@ -117,4 +119,3 @@ If you want to run inference from your local device, you can open a SSH tunnel t
|
|
|
117
119
|
ssh -L 8081:172.17.8.29:8081 username@v.vectorinstitute.ai -N
|
|
118
120
|
```
|
|
119
121
|
Where the last number in the URL is the GPU number (gpu029 in this case). The example provided above is for the vector cluster, change the variables accordingly for your environment
|
|
120
|
-
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
vec_inf/README.md,sha256=dxX0xKfwLioG0mJ2YFv5JJ5q1m5NlWBrVBOap1wuHfQ,624
|
|
2
|
+
vec_inf/__init__.py,sha256=bHwSIz9lebYuxIemni-lP0h3gwJHVbJnwExQKGJWw_Q,23
|
|
3
|
+
vec_inf/find_port.sh,sha256=bGQ6LYSFVSsfDIGatrSg5YvddbZfaPL0R-Bjo4KYD6I,1088
|
|
4
|
+
vec_inf/launch_server.sh,sha256=WJ7HyOEbknxe7zjF388qgnTqoapl90cUrjsIJQChidc,4714
|
|
5
|
+
vec_inf/multinode_vllm.slurm,sha256=ymyteZWWspNDL0yBjhPNMZRd18Jepbw28HRw0EDuXYY,4201
|
|
6
|
+
vec_inf/vllm.slurm,sha256=64jg8t9FHp4IH5Jc_Vrk0XwSSIrpN4Xjwko6GO7cDXQ,1894
|
|
7
|
+
vec_inf/cli/__init__.py,sha256=5XIvGQCOnaGl73XMkwetjC-Ul3xuXGrWDXdYJ3aUzvU,27
|
|
8
|
+
vec_inf/cli/_cli.py,sha256=3LZ7RbJsQ3mxHWTtt-34uQNCZ7G9HaJifyfTQw33zuI,14330
|
|
9
|
+
vec_inf/cli/_utils.py,sha256=t_zFDEomSP9eDvad85GlJIFQ7Kl5ZXOxbgbkfMZ3DwA,4802
|
|
10
|
+
vec_inf/models/README.md,sha256=JpQCg5taBuQp4sLmasK7YPjFMZritOAKlfPpEJsOpeQ,16602
|
|
11
|
+
vec_inf/models/models.csv,sha256=xYrNykRu5HabsUjj4bdRI63YuGgCJSZ-ti_nIjuGPCY,11557
|
|
12
|
+
vec_inf-0.4.1.dist-info/METADATA,sha256=yFvkCgCVpYzuZZJmD22BlTYQeTMk8gD6gmYagyTUyog,7375
|
|
13
|
+
vec_inf-0.4.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
14
|
+
vec_inf-0.4.1.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
|
|
15
|
+
vec_inf-0.4.1.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
|
|
16
|
+
vec_inf-0.4.1.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
vec_inf/README.md,sha256=dxX0xKfwLioG0mJ2YFv5JJ5q1m5NlWBrVBOap1wuHfQ,624
|
|
2
|
-
vec_inf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
vec_inf/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
vec_inf/cli/_cli.py,sha256=TRaY-QSBQ_do9b4R6Pl7fyDlrfuMN8Z8HH_xOCKkVJA,12585
|
|
5
|
-
vec_inf/cli/_utils.py,sha256=sQqi7JdPOb7gfW4EVsXY2yhLUo8xWqxoY1spQ53bag4,4845
|
|
6
|
-
vec_inf/find_port.sh,sha256=bGQ6LYSFVSsfDIGatrSg5YvddbZfaPL0R-Bjo4KYD6I,1088
|
|
7
|
-
vec_inf/launch_server.sh,sha256=gFovqXuYiQ8bEc6O31WTMDuBoNj7opB5iVfnCDhz2Nw,4165
|
|
8
|
-
vec_inf/models/README.md,sha256=YNEVTWliHehCpJTq2SXAidqgFl6CWL6GUOnAPksDYFE,14844
|
|
9
|
-
vec_inf/models/models.csv,sha256=f_cNeM7L0-4pgZqYfWilQd12-WVec2IVk6dRq5BE4mE,9875
|
|
10
|
-
vec_inf/multinode_vllm.slurm,sha256=tg0WgLRdpRFD-oT05aucOpe6h2TZiTyYJFTMqSIj-HQ,4154
|
|
11
|
-
vec_inf/vllm.slurm,sha256=lMgBI7r9jUVVhSIdrUH2DdC-Bxz0eyQ8vuB5uwOzWt0,1847
|
|
12
|
-
vec_inf-0.4.0.post1.dist-info/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
|
|
13
|
-
vec_inf-0.4.0.post1.dist-info/METADATA,sha256=Q6KhU-ggnR9FB5YUjWrPwy2MSd_c9GCFXAQqT9YXZOw,7032
|
|
14
|
-
vec_inf-0.4.0.post1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
15
|
-
vec_inf-0.4.0.post1.dist-info/entry_points.txt,sha256=JF4uXsj1H4XacxaBw9f0KN0P0qDzmp7K_1zTEBDappo,48
|
|
16
|
-
vec_inf-0.4.0.post1.dist-info/RECORD,,
|
|
File without changes
|