vec-inf 0.7.1__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/cli/_cli.py +15 -1
- vec_inf/cli/_helper.py +44 -19
- vec_inf/client/_helper.py +66 -26
- vec_inf/client/_slurm_script_generator.py +36 -19
- vec_inf/client/_slurm_templates.py +20 -3
- vec_inf/client/_utils.py +54 -5
- vec_inf/client/api.py +8 -2
- vec_inf/client/models.py +6 -0
- vec_inf/config/models.yaml +48 -99
- {vec_inf-0.7.1.dist-info → vec_inf-0.7.2.dist-info}/METADATA +4 -3
- {vec_inf-0.7.1.dist-info → vec_inf-0.7.2.dist-info}/RECORD +14 -14
- {vec_inf-0.7.1.dist-info → vec_inf-0.7.2.dist-info}/WHEEL +0 -0
- {vec_inf-0.7.1.dist-info → vec_inf-0.7.2.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.7.1.dist-info → vec_inf-0.7.2.dist-info}/licenses/LICENSE +0 -0
vec_inf/cli/_cli.py
CHANGED
|
@@ -69,6 +69,16 @@ def cli() -> None:
|
|
|
69
69
|
type=int,
|
|
70
70
|
help="Number of GPUs/node to use, default to suggested resource allocation for model",
|
|
71
71
|
)
|
|
72
|
+
@click.option(
|
|
73
|
+
"--cpus-per-task",
|
|
74
|
+
type=int,
|
|
75
|
+
help="Number of CPU cores per task",
|
|
76
|
+
)
|
|
77
|
+
@click.option(
|
|
78
|
+
"--mem-per-node",
|
|
79
|
+
type=str,
|
|
80
|
+
help="Memory allocation per node in GB format (e.g., '32G')",
|
|
81
|
+
)
|
|
72
82
|
@click.option(
|
|
73
83
|
"--account",
|
|
74
84
|
"-A",
|
|
@@ -165,6 +175,10 @@ def launch(
|
|
|
165
175
|
Number of nodes to use
|
|
166
176
|
- gpus_per_node : int, optional
|
|
167
177
|
Number of GPUs per node
|
|
178
|
+
- cpus_per_task : int, optional
|
|
179
|
+
Number of CPU cores per task
|
|
180
|
+
- mem_per_node : str, optional
|
|
181
|
+
Memory allocation per node in GB format (e.g., '32G')
|
|
168
182
|
- account : str, optional
|
|
169
183
|
Charge resources used by this job to specified account
|
|
170
184
|
- work_dir : str, optional
|
|
@@ -447,7 +461,7 @@ def metrics(slurm_job_id: str) -> None:
|
|
|
447
461
|
metrics_formatter.format_metrics()
|
|
448
462
|
|
|
449
463
|
live.update(metrics_formatter.table)
|
|
450
|
-
time.sleep(
|
|
464
|
+
time.sleep(1)
|
|
451
465
|
except click.ClickException as e:
|
|
452
466
|
raise e
|
|
453
467
|
except Exception as e:
|
vec_inf/cli/_helper.py
CHANGED
|
@@ -36,6 +36,43 @@ class LaunchResponseFormatter:
|
|
|
36
36
|
self.model_name = model_name
|
|
37
37
|
self.params = params
|
|
38
38
|
|
|
39
|
+
def _add_resource_allocation_details(self, table: Table) -> None:
|
|
40
|
+
"""Add resource allocation details to the table."""
|
|
41
|
+
optional_fields = [
|
|
42
|
+
("account", "Account"),
|
|
43
|
+
("work_dir", "Working Directory"),
|
|
44
|
+
("resource_type", "Resource Type"),
|
|
45
|
+
("partition", "Partition"),
|
|
46
|
+
("qos", "QoS"),
|
|
47
|
+
]
|
|
48
|
+
for key, label in optional_fields:
|
|
49
|
+
if self.params.get(key):
|
|
50
|
+
table.add_row(label, self.params[key])
|
|
51
|
+
|
|
52
|
+
def _add_vllm_config(self, table: Table) -> None:
|
|
53
|
+
"""Add vLLM configuration details to the table."""
|
|
54
|
+
if self.params.get("vllm_args"):
|
|
55
|
+
table.add_row("vLLM Arguments:", style="magenta")
|
|
56
|
+
for arg, value in self.params["vllm_args"].items():
|
|
57
|
+
table.add_row(f" {arg}:", str(value))
|
|
58
|
+
|
|
59
|
+
def _add_env_vars(self, table: Table) -> None:
|
|
60
|
+
"""Add environment variable configuration details to the table."""
|
|
61
|
+
if self.params.get("env"):
|
|
62
|
+
table.add_row("Environment Variables", style="magenta")
|
|
63
|
+
for arg, value in self.params["env"].items():
|
|
64
|
+
table.add_row(f" {arg}:", str(value))
|
|
65
|
+
|
|
66
|
+
def _add_bind_paths(self, table: Table) -> None:
|
|
67
|
+
"""Add bind path configuration details to the table."""
|
|
68
|
+
if self.params.get("bind"):
|
|
69
|
+
table.add_row("Bind Paths", style="magenta")
|
|
70
|
+
for path in self.params["bind"].split(","):
|
|
71
|
+
host = target = path
|
|
72
|
+
if ":" in path:
|
|
73
|
+
host, target = path.split(":")
|
|
74
|
+
table.add_row(f" {host}:", target)
|
|
75
|
+
|
|
39
76
|
def format_table_output(self) -> Table:
|
|
40
77
|
"""Format output as rich Table.
|
|
41
78
|
|
|
@@ -59,16 +96,7 @@ class LaunchResponseFormatter:
|
|
|
59
96
|
table.add_row("Vocabulary Size", self.params["vocab_size"])
|
|
60
97
|
|
|
61
98
|
# Add resource allocation details
|
|
62
|
-
|
|
63
|
-
table.add_row("Account", self.params["account"])
|
|
64
|
-
if self.params.get("work_dir"):
|
|
65
|
-
table.add_row("Working Directory", self.params["work_dir"])
|
|
66
|
-
if self.params.get("resource_type"):
|
|
67
|
-
table.add_row("Resource Type", self.params["resource_type"])
|
|
68
|
-
if self.params.get("partition"):
|
|
69
|
-
table.add_row("Partition", self.params["partition"])
|
|
70
|
-
if self.params.get("qos"):
|
|
71
|
-
table.add_row("QoS", self.params["qos"])
|
|
99
|
+
self._add_resource_allocation_details(table)
|
|
72
100
|
table.add_row("Time Limit", self.params["time"])
|
|
73
101
|
table.add_row("Num Nodes", self.params["num_nodes"])
|
|
74
102
|
table.add_row("GPUs/Node", self.params["gpus_per_node"])
|
|
@@ -76,21 +104,18 @@ class LaunchResponseFormatter:
|
|
|
76
104
|
table.add_row("Memory/Node", self.params["mem_per_node"])
|
|
77
105
|
|
|
78
106
|
# Add job config details
|
|
107
|
+
if self.params.get("venv"):
|
|
108
|
+
table.add_row("Virtual Environment", self.params["venv"])
|
|
79
109
|
table.add_row(
|
|
80
110
|
"Model Weights Directory",
|
|
81
111
|
str(Path(self.params["model_weights_parent_dir"], self.model_name)),
|
|
82
112
|
)
|
|
83
113
|
table.add_row("Log Directory", self.params["log_dir"])
|
|
84
114
|
|
|
85
|
-
# Add
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
# Add Environment Variable Configuration Details
|
|
91
|
-
table.add_row("Environment Variables", style="magenta")
|
|
92
|
-
for arg, value in self.params["env"].items():
|
|
93
|
-
table.add_row(f" {arg}:", str(value))
|
|
115
|
+
# Add configuration details
|
|
116
|
+
self._add_vllm_config(table)
|
|
117
|
+
self._add_env_vars(table)
|
|
118
|
+
self._add_bind_paths(table)
|
|
94
119
|
|
|
95
120
|
return table
|
|
96
121
|
|
vec_inf/client/_helper.py
CHANGED
|
@@ -31,6 +31,7 @@ from vec_inf.client._slurm_script_generator import (
|
|
|
31
31
|
BatchSlurmScriptGenerator,
|
|
32
32
|
SlurmScriptGenerator,
|
|
33
33
|
)
|
|
34
|
+
from vec_inf.client._slurm_vars import CONTAINER_MODULE_NAME, IMAGE_PATH
|
|
34
35
|
from vec_inf.client.config import ModelConfig
|
|
35
36
|
from vec_inf.client.models import (
|
|
36
37
|
BatchLaunchResponse,
|
|
@@ -195,23 +196,14 @@ class ModelLauncher:
|
|
|
195
196
|
print(f"WARNING: Could not parse env var: {line}")
|
|
196
197
|
return env_vars
|
|
197
198
|
|
|
198
|
-
def
|
|
199
|
-
"""
|
|
200
|
-
|
|
201
|
-
Returns
|
|
202
|
-
-------
|
|
203
|
-
dict[str, Any]
|
|
204
|
-
Dictionary of prepared launch parameters
|
|
199
|
+
def _apply_cli_overrides(self, params: dict[str, Any]) -> None:
|
|
200
|
+
"""Apply CLI argument overrides to params.
|
|
205
201
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
when using multiple GPUs
|
|
202
|
+
Parameters
|
|
203
|
+
----------
|
|
204
|
+
params : dict[str, Any]
|
|
205
|
+
Dictionary of launch parameters to override
|
|
211
206
|
"""
|
|
212
|
-
params = self.model_config.model_dump(exclude_none=True)
|
|
213
|
-
|
|
214
|
-
# Override config defaults with CLI arguments
|
|
215
207
|
if self.kwargs.get("vllm_args"):
|
|
216
208
|
vllm_args = self._process_vllm_args(self.kwargs["vllm_args"])
|
|
217
209
|
for key, value in vllm_args.items():
|
|
@@ -224,13 +216,29 @@ class ModelLauncher:
|
|
|
224
216
|
params["env"][key] = str(value)
|
|
225
217
|
del self.kwargs["env"]
|
|
226
218
|
|
|
219
|
+
if self.kwargs.get("bind") and params.get("bind"):
|
|
220
|
+
params["bind"] = f"{params['bind']},{self.kwargs['bind']}"
|
|
221
|
+
del self.kwargs["bind"]
|
|
222
|
+
|
|
227
223
|
for key, value in self.kwargs.items():
|
|
228
224
|
params[key] = value
|
|
229
225
|
|
|
230
|
-
|
|
231
|
-
|
|
226
|
+
def _validate_resource_allocation(self, params: dict[str, Any]) -> None:
|
|
227
|
+
"""Validate resource allocation and parallelization settings.
|
|
232
228
|
|
|
233
|
-
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
params : dict[str, Any]
|
|
232
|
+
Dictionary of launch parameters to validate
|
|
233
|
+
|
|
234
|
+
Raises
|
|
235
|
+
------
|
|
236
|
+
MissingRequiredFieldsError
|
|
237
|
+
If tensor parallel size is not specified when using multiple GPUs
|
|
238
|
+
ValueError
|
|
239
|
+
If total # of GPUs requested is not a power of two
|
|
240
|
+
If mismatch between total # of GPUs requested and parallelization settings
|
|
241
|
+
"""
|
|
234
242
|
if (
|
|
235
243
|
int(params["gpus_per_node"]) > 1
|
|
236
244
|
and params["vllm_args"].get("--tensor-parallel-size") is None
|
|
@@ -251,19 +259,18 @@ class ModelLauncher:
|
|
|
251
259
|
"Mismatch between total number of GPUs requested and parallelization settings"
|
|
252
260
|
)
|
|
253
261
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
if resource_type:
|
|
257
|
-
params["gres"] = f"gpu:{resource_type}:{params['gpus_per_node']}"
|
|
258
|
-
else:
|
|
259
|
-
params["gres"] = f"gpu:{params['gpus_per_node']}"
|
|
262
|
+
def _setup_log_files(self, params: dict[str, Any]) -> None:
|
|
263
|
+
"""Set up log directory and file paths.
|
|
260
264
|
|
|
261
|
-
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
params : dict[str, Any]
|
|
268
|
+
Dictionary of launch parameters to set up log files
|
|
269
|
+
"""
|
|
262
270
|
params["log_dir"] = Path(params["log_dir"], params["model_family"]).expanduser()
|
|
263
271
|
params["log_dir"].mkdir(parents=True, exist_ok=True)
|
|
264
272
|
params["src_dir"] = SRC_DIR
|
|
265
273
|
|
|
266
|
-
# Construct slurm log file paths
|
|
267
274
|
params["out_file"] = (
|
|
268
275
|
f"{params['log_dir']}/{self.model_name}.%j/{self.model_name}.%j.out"
|
|
269
276
|
)
|
|
@@ -274,6 +281,35 @@ class ModelLauncher:
|
|
|
274
281
|
f"{params['log_dir']}/{self.model_name}.$SLURM_JOB_ID/{self.model_name}.$SLURM_JOB_ID.json"
|
|
275
282
|
)
|
|
276
283
|
|
|
284
|
+
def _get_launch_params(self) -> dict[str, Any]:
|
|
285
|
+
"""Prepare launch parameters, set log dir, and validate required fields.
|
|
286
|
+
|
|
287
|
+
Returns
|
|
288
|
+
-------
|
|
289
|
+
dict[str, Any]
|
|
290
|
+
Dictionary of prepared launch parameters
|
|
291
|
+
"""
|
|
292
|
+
params = self.model_config.model_dump(exclude_none=True)
|
|
293
|
+
|
|
294
|
+
# Override config defaults with CLI arguments
|
|
295
|
+
self._apply_cli_overrides(params)
|
|
296
|
+
|
|
297
|
+
# Check for required fields without default vals, will raise an error if missing
|
|
298
|
+
utils.check_required_fields(params)
|
|
299
|
+
|
|
300
|
+
# Validate resource allocation and parallelization settings
|
|
301
|
+
self._validate_resource_allocation(params)
|
|
302
|
+
|
|
303
|
+
# Convert gpus_per_node and resource_type to gres
|
|
304
|
+
resource_type = params.get("resource_type")
|
|
305
|
+
if resource_type:
|
|
306
|
+
params["gres"] = f"gpu:{resource_type}:{params['gpus_per_node']}"
|
|
307
|
+
else:
|
|
308
|
+
params["gres"] = f"gpu:{params['gpus_per_node']}"
|
|
309
|
+
|
|
310
|
+
# Setup log files
|
|
311
|
+
self._setup_log_files(params)
|
|
312
|
+
|
|
277
313
|
# Convert path to string for JSON serialization
|
|
278
314
|
for field in params:
|
|
279
315
|
if field in ["vllm_args", "env"]:
|
|
@@ -332,6 +368,10 @@ class ModelLauncher:
|
|
|
332
368
|
job_log_dir / f"{self.model_name}.{self.slurm_job_id}.sbatch"
|
|
333
369
|
)
|
|
334
370
|
|
|
371
|
+
# Replace venv with image path if using container
|
|
372
|
+
if self.params["venv"] == CONTAINER_MODULE_NAME:
|
|
373
|
+
self.params["venv"] = IMAGE_PATH
|
|
374
|
+
|
|
335
375
|
with job_json.open("w") as file:
|
|
336
376
|
json.dump(self.params, file, indent=4)
|
|
337
377
|
|
|
@@ -14,6 +14,7 @@ from vec_inf.client._slurm_templates import (
|
|
|
14
14
|
BATCH_SLURM_SCRIPT_TEMPLATE,
|
|
15
15
|
SLURM_SCRIPT_TEMPLATE,
|
|
16
16
|
)
|
|
17
|
+
from vec_inf.client._slurm_vars import CONTAINER_MODULE_NAME
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class SlurmScriptGenerator:
|
|
@@ -32,24 +33,35 @@ class SlurmScriptGenerator:
|
|
|
32
33
|
def __init__(self, params: dict[str, Any]):
|
|
33
34
|
self.params = params
|
|
34
35
|
self.is_multinode = int(self.params["num_nodes"]) > 1
|
|
35
|
-
self.use_container =
|
|
36
|
-
self.params["venv"] == "singularity" or self.params["venv"] == "apptainer"
|
|
37
|
-
)
|
|
36
|
+
self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
|
|
38
37
|
self.additional_binds = self.params.get("bind", "")
|
|
39
38
|
if self.additional_binds:
|
|
40
39
|
self.additional_binds = f" --bind {self.additional_binds}"
|
|
41
40
|
self.model_weights_path = str(
|
|
42
41
|
Path(self.params["model_weights_parent_dir"], self.params["model_name"])
|
|
43
42
|
)
|
|
43
|
+
self.env_str = self._generate_env_str()
|
|
44
|
+
|
|
45
|
+
def _generate_env_str(self) -> str:
|
|
46
|
+
"""Generate the environment variables string for the Slurm script.
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
str
|
|
51
|
+
Formatted env vars string for container or shell export commands.
|
|
52
|
+
"""
|
|
44
53
|
env_dict: dict[str, str] = self.params.get("env", {})
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
54
|
+
|
|
55
|
+
if not env_dict:
|
|
56
|
+
return ""
|
|
57
|
+
|
|
58
|
+
if self.use_container:
|
|
59
|
+
# Format for container: --env KEY1=VAL1,KEY2=VAL2
|
|
60
|
+
env_pairs = [f"{key}={val}" for key, val in env_dict.items()]
|
|
61
|
+
return f"--env {','.join(env_pairs)}"
|
|
62
|
+
# Format for shell: export KEY1=VAL1\nexport KEY2=VAL2
|
|
63
|
+
export_lines = [f"export {key}={val}" for key, val in env_dict.items()]
|
|
64
|
+
return "\n".join(export_lines)
|
|
53
65
|
|
|
54
66
|
def _generate_script_content(self) -> str:
|
|
55
67
|
"""Generate the complete Slurm script content.
|
|
@@ -95,7 +107,12 @@ class SlurmScriptGenerator:
|
|
|
95
107
|
server_script = ["\n"]
|
|
96
108
|
if self.use_container:
|
|
97
109
|
server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"]))
|
|
98
|
-
|
|
110
|
+
server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_env_vars"]))
|
|
111
|
+
else:
|
|
112
|
+
server_script.append(
|
|
113
|
+
SLURM_SCRIPT_TEMPLATE["activate_venv"].format(venv=self.params["venv"])
|
|
114
|
+
)
|
|
115
|
+
server_script.append(self.env_str)
|
|
99
116
|
server_script.append(
|
|
100
117
|
SLURM_SCRIPT_TEMPLATE["imports"].format(src_dir=self.params["src_dir"])
|
|
101
118
|
)
|
|
@@ -112,6 +129,11 @@ class SlurmScriptGenerator:
|
|
|
112
129
|
env_str=self.env_str,
|
|
113
130
|
),
|
|
114
131
|
)
|
|
132
|
+
else:
|
|
133
|
+
server_setup_str = server_setup_str.replace(
|
|
134
|
+
"CONTAINER_PLACEHOLDER",
|
|
135
|
+
"\\",
|
|
136
|
+
)
|
|
115
137
|
else:
|
|
116
138
|
server_setup_str = "\n".join(
|
|
117
139
|
SLURM_SCRIPT_TEMPLATE["server_setup"]["single_node"]
|
|
@@ -145,10 +167,7 @@ class SlurmScriptGenerator:
|
|
|
145
167
|
env_str=self.env_str,
|
|
146
168
|
)
|
|
147
169
|
)
|
|
148
|
-
|
|
149
|
-
launcher_script.append(
|
|
150
|
-
SLURM_SCRIPT_TEMPLATE["activate_venv"].format(venv=self.params["venv"])
|
|
151
|
-
)
|
|
170
|
+
|
|
152
171
|
launcher_script.append(
|
|
153
172
|
"\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"]).format(
|
|
154
173
|
model_weights_path=self.model_weights_path,
|
|
@@ -194,9 +213,7 @@ class BatchSlurmScriptGenerator:
|
|
|
194
213
|
def __init__(self, params: dict[str, Any]):
|
|
195
214
|
self.params = params
|
|
196
215
|
self.script_paths: list[Path] = []
|
|
197
|
-
self.use_container =
|
|
198
|
-
self.params["venv"] == "singularity" or self.params["venv"] == "apptainer"
|
|
199
|
-
)
|
|
216
|
+
self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
|
|
200
217
|
for model_name in self.params["models"]:
|
|
201
218
|
self.params["models"][model_name]["additional_binds"] = ""
|
|
202
219
|
if self.params["models"][model_name].get("bind"):
|
|
@@ -74,7 +74,7 @@ class SlurmScriptTemplate(TypedDict):
|
|
|
74
74
|
shebang: ShebangConfig
|
|
75
75
|
container_setup: list[str]
|
|
76
76
|
imports: str
|
|
77
|
-
|
|
77
|
+
container_env_vars: list[str]
|
|
78
78
|
container_command: str
|
|
79
79
|
activate_venv: str
|
|
80
80
|
server_setup: ServerSetupConfig
|
|
@@ -96,8 +96,8 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
|
|
|
96
96
|
f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop",
|
|
97
97
|
],
|
|
98
98
|
"imports": "source {src_dir}/find_port.sh",
|
|
99
|
-
"
|
|
100
|
-
f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH
|
|
99
|
+
"container_env_vars": [
|
|
100
|
+
f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp"
|
|
101
101
|
],
|
|
102
102
|
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
|
|
103
103
|
"activate_venv": "source {venv}/bin/activate",
|
|
@@ -112,6 +112,23 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
|
|
|
112
112
|
"nodes_array=($nodes)",
|
|
113
113
|
"head_node=${{nodes_array[0]}}",
|
|
114
114
|
'head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address)',
|
|
115
|
+
"\n# Check for RDMA devices and set environment variable accordingly",
|
|
116
|
+
"if ! command -v ibv_devices >/dev/null 2>&1; then",
|
|
117
|
+
' echo "ibv_devices not found; forcing TCP. (No RDMA userland on host?)"',
|
|
118
|
+
" export NCCL_IB_DISABLE=1",
|
|
119
|
+
' export NCCL_ENV_ARG="--env NCCL_IB_DISABLE=1"',
|
|
120
|
+
"else",
|
|
121
|
+
" # Pick GID index based on link layer (IB vs RoCE)",
|
|
122
|
+
' if ibv_devinfo 2>/dev/null | grep -q "link_layer:.*Ethernet"; then',
|
|
123
|
+
" # RoCEv2 typically needs a nonzero GID index; 3 is common, try 2 if your fabric uses it",
|
|
124
|
+
" export NCCL_IB_GID_INDEX={{NCCL_IB_GID_INDEX:-3}}",
|
|
125
|
+
' export NCCL_ENV_ARG="--env NCCL_IB_GID_INDEX={{NCCL_IB_GID_INDEX:-3}}"',
|
|
126
|
+
" else",
|
|
127
|
+
" # Native InfiniBand => GID 0",
|
|
128
|
+
" export NCCL_IB_GID_INDEX={{NCCL_IB_GID_INDEX:-0}}",
|
|
129
|
+
' export NCCL_ENV_ARG="--env NCCL_IB_GID_INDEX={{NCCL_IB_GID_INDEX:-0}}"',
|
|
130
|
+
" fi",
|
|
131
|
+
"fi",
|
|
115
132
|
"\n# Start Ray head node",
|
|
116
133
|
"head_node_port=$(find_available_port $head_node_ip 8080 65535)",
|
|
117
134
|
"ray_head=$head_node_ip:$head_node_port",
|
vec_inf/client/_utils.py
CHANGED
|
@@ -108,15 +108,64 @@ def is_server_running(
|
|
|
108
108
|
if isinstance(log_content, str):
|
|
109
109
|
return log_content
|
|
110
110
|
|
|
111
|
-
|
|
111
|
+
# Patterns that indicate fatal errors (not just warnings)
|
|
112
|
+
fatal_error_patterns = [
|
|
113
|
+
"traceback",
|
|
114
|
+
"exception",
|
|
115
|
+
"fatal error",
|
|
116
|
+
"critical error",
|
|
117
|
+
"failed to",
|
|
118
|
+
"could not",
|
|
119
|
+
"unable to",
|
|
120
|
+
"error:",
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
# Patterns to ignore (non-fatal warnings/info messages)
|
|
124
|
+
ignore_patterns = [
|
|
125
|
+
"deprecated",
|
|
126
|
+
"futurewarning",
|
|
127
|
+
"userwarning",
|
|
128
|
+
"deprecationwarning",
|
|
129
|
+
"slurmstepd: error:", # SLURM cancellation messages (often after server started)
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
ready_signature_found = False
|
|
133
|
+
fatal_error_line = None
|
|
112
134
|
|
|
113
135
|
for line in log_content:
|
|
114
|
-
|
|
115
|
-
|
|
136
|
+
line_lower = line.lower()
|
|
137
|
+
|
|
138
|
+
# Check for ready signature first - if found, server is running
|
|
116
139
|
if MODEL_READY_SIGNATURE in line:
|
|
117
|
-
|
|
140
|
+
ready_signature_found = True
|
|
141
|
+
# Continue checking to see if there are errors after startup
|
|
142
|
+
|
|
143
|
+
# Check for fatal errors (only if we haven't seen ready signature yet)
|
|
144
|
+
if not ready_signature_found:
|
|
145
|
+
# Skip lines that match ignore patterns
|
|
146
|
+
if any(ignore_pattern in line_lower for ignore_pattern in ignore_patterns):
|
|
147
|
+
continue
|
|
118
148
|
|
|
119
|
-
|
|
149
|
+
# Check for fatal error patterns
|
|
150
|
+
for pattern in fatal_error_patterns:
|
|
151
|
+
if pattern in line_lower:
|
|
152
|
+
# Additional check: skip if it's part of a warning message
|
|
153
|
+
# (warnings often contain "error:" but aren't fatal)
|
|
154
|
+
if "warning" in line_lower and "error:" in line_lower:
|
|
155
|
+
continue
|
|
156
|
+
fatal_error_line = line.strip("\n")
|
|
157
|
+
break
|
|
158
|
+
|
|
159
|
+
# If we found a fatal error, mark as failed
|
|
160
|
+
if fatal_error_line:
|
|
161
|
+
return (ModelStatus.FAILED, fatal_error_line)
|
|
162
|
+
|
|
163
|
+
# If ready signature was found and no fatal errors, server is running
|
|
164
|
+
if ready_signature_found:
|
|
165
|
+
return "RUNNING"
|
|
166
|
+
|
|
167
|
+
# Otherwise, still launching
|
|
168
|
+
return ModelStatus.LAUNCHING
|
|
120
169
|
|
|
121
170
|
|
|
122
171
|
def get_base_url(slurm_job_name: str, slurm_job_id: str, log_dir: str) -> str:
|
vec_inf/client/api.py
CHANGED
|
@@ -81,7 +81,7 @@ class VecInfClient:
|
|
|
81
81
|
|
|
82
82
|
def __init__(self) -> None:
|
|
83
83
|
"""Initialize the Vector Inference client."""
|
|
84
|
-
|
|
84
|
+
self._metrics_collectors: dict[str, PerformanceMetricsCollector] = {}
|
|
85
85
|
|
|
86
86
|
def list_models(self) -> list[ModelInfo]:
|
|
87
87
|
"""List all available models.
|
|
@@ -218,7 +218,13 @@ class VecInfClient:
|
|
|
218
218
|
- Performance metrics or error message
|
|
219
219
|
- Timestamp of collection
|
|
220
220
|
"""
|
|
221
|
-
|
|
221
|
+
# Use cached collector to preserve state between calls to compute throughput
|
|
222
|
+
if slurm_job_id not in self._metrics_collectors:
|
|
223
|
+
self._metrics_collectors[slurm_job_id] = PerformanceMetricsCollector(
|
|
224
|
+
slurm_job_id
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
performance_metrics_collector = self._metrics_collectors[slurm_job_id]
|
|
222
228
|
|
|
223
229
|
metrics: Union[dict[str, float], str]
|
|
224
230
|
if not performance_metrics_collector.metrics_url.startswith("http"):
|
vec_inf/client/models.py
CHANGED
|
@@ -194,6 +194,10 @@ class LaunchOptions:
|
|
|
194
194
|
Number of nodes to allocate
|
|
195
195
|
gpus_per_node : int, optional
|
|
196
196
|
Number of GPUs per node
|
|
197
|
+
cpus_per_task : int, optional
|
|
198
|
+
Number of CPUs per task
|
|
199
|
+
mem_per_node : str, optional
|
|
200
|
+
Memory per node
|
|
197
201
|
account : str, optional
|
|
198
202
|
Account name for job scheduling
|
|
199
203
|
work_dir : str, optional
|
|
@@ -232,6 +236,8 @@ class LaunchOptions:
|
|
|
232
236
|
resource_type: Optional[str] = None
|
|
233
237
|
num_nodes: Optional[int] = None
|
|
234
238
|
gpus_per_node: Optional[int] = None
|
|
239
|
+
cpus_per_task: Optional[int] = None
|
|
240
|
+
mem_per_node: Optional[str] = None
|
|
235
241
|
account: Optional[str] = None
|
|
236
242
|
work_dir: Optional[str] = None
|
|
237
243
|
qos: Optional[str] = None
|
vec_inf/config/models.yaml
CHANGED
|
@@ -12,7 +12,6 @@ models:
|
|
|
12
12
|
--pipeline-parallel-size: 2
|
|
13
13
|
--tensor-parallel-size: 4
|
|
14
14
|
--max-model-len: 65536
|
|
15
|
-
--max-num-seqs: 256
|
|
16
15
|
c4ai-command-r-08-2024:
|
|
17
16
|
model_family: c4ai-command-r
|
|
18
17
|
model_variant: 08-2024
|
|
@@ -25,7 +24,6 @@ models:
|
|
|
25
24
|
vllm_args:
|
|
26
25
|
--tensor-parallel-size: 2
|
|
27
26
|
--max-model-len: 32768
|
|
28
|
-
--max-num-seqs: 256
|
|
29
27
|
CodeLlama-7b-hf:
|
|
30
28
|
model_family: CodeLlama
|
|
31
29
|
model_variant: 7b-hf
|
|
@@ -37,7 +35,6 @@ models:
|
|
|
37
35
|
resource_type: l40s
|
|
38
36
|
vllm_args:
|
|
39
37
|
--max-model-len: 16384
|
|
40
|
-
--max-num-seqs: 256
|
|
41
38
|
CodeLlama-7b-Instruct-hf:
|
|
42
39
|
model_family: CodeLlama
|
|
43
40
|
model_variant: 7b-Instruct-hf
|
|
@@ -49,7 +46,6 @@ models:
|
|
|
49
46
|
resource_type: l40s
|
|
50
47
|
vllm_args:
|
|
51
48
|
--max-model-len: 16384
|
|
52
|
-
--max-num-seqs: 256
|
|
53
49
|
CodeLlama-13b-hf:
|
|
54
50
|
model_family: CodeLlama
|
|
55
51
|
model_variant: 13b-hf
|
|
@@ -61,7 +57,6 @@ models:
|
|
|
61
57
|
resource_type: l40s
|
|
62
58
|
vllm_args:
|
|
63
59
|
--max-model-len: 16384
|
|
64
|
-
--max-num-seqs: 256
|
|
65
60
|
CodeLlama-13b-Instruct-hf:
|
|
66
61
|
model_family: CodeLlama
|
|
67
62
|
model_variant: 13b-Instruct-hf
|
|
@@ -73,7 +68,6 @@ models:
|
|
|
73
68
|
resource_type: l40s
|
|
74
69
|
vllm_args:
|
|
75
70
|
--max-model-len: 16384
|
|
76
|
-
--max-num-seqs: 256
|
|
77
71
|
CodeLlama-34b-hf:
|
|
78
72
|
model_family: CodeLlama
|
|
79
73
|
model_variant: 34b-hf
|
|
@@ -86,7 +80,6 @@ models:
|
|
|
86
80
|
vllm_args:
|
|
87
81
|
--tensor-parallel-size: 2
|
|
88
82
|
--max-model-len: 16384
|
|
89
|
-
--max-num-seqs: 256
|
|
90
83
|
CodeLlama-34b-Instruct-hf:
|
|
91
84
|
model_family: CodeLlama
|
|
92
85
|
model_variant: 34b-Instruct-hf
|
|
@@ -99,7 +92,6 @@ models:
|
|
|
99
92
|
vllm_args:
|
|
100
93
|
--tensor-parallel-size: 2
|
|
101
94
|
--max-model-len: 16384
|
|
102
|
-
--max-num-seqs: 256
|
|
103
95
|
CodeLlama-70b-hf:
|
|
104
96
|
model_family: CodeLlama
|
|
105
97
|
model_variant: 70b-hf
|
|
@@ -112,7 +104,6 @@ models:
|
|
|
112
104
|
vllm_args:
|
|
113
105
|
--tensor-parallel-size: 4
|
|
114
106
|
--max-model-len: 4096
|
|
115
|
-
--max-num-seqs: 256
|
|
116
107
|
CodeLlama-70b-Instruct-hf:
|
|
117
108
|
model_family: CodeLlama
|
|
118
109
|
model_variant: 70b-Instruct-hf
|
|
@@ -125,7 +116,17 @@ models:
|
|
|
125
116
|
vllm_args:
|
|
126
117
|
--tensor-parallel-size: 4
|
|
127
118
|
--max-model-len: 4096
|
|
128
|
-
|
|
119
|
+
gemma-2-2b-it:
|
|
120
|
+
model_family: gemma-2
|
|
121
|
+
model_variant: 2b-it
|
|
122
|
+
model_type: LLM
|
|
123
|
+
gpus_per_node: 1
|
|
124
|
+
num_nodes: 1
|
|
125
|
+
vocab_size: 256000
|
|
126
|
+
time: 08:00:00
|
|
127
|
+
resource_type: l40s
|
|
128
|
+
vllm_args:
|
|
129
|
+
--max-model-len: 4096
|
|
129
130
|
gemma-2-9b:
|
|
130
131
|
model_family: gemma-2
|
|
131
132
|
model_variant: 9b
|
|
@@ -137,7 +138,6 @@ models:
|
|
|
137
138
|
resource_type: l40s
|
|
138
139
|
vllm_args:
|
|
139
140
|
--max-model-len: 4096
|
|
140
|
-
--max-num-seqs: 256
|
|
141
141
|
gemma-2-9b-it:
|
|
142
142
|
model_family: gemma-2
|
|
143
143
|
model_variant: 9b-it
|
|
@@ -149,7 +149,6 @@ models:
|
|
|
149
149
|
resource_type: l40s
|
|
150
150
|
vllm_args:
|
|
151
151
|
--max-model-len: 4096
|
|
152
|
-
--max-num-seqs: 256
|
|
153
152
|
gemma-2-27b:
|
|
154
153
|
model_family: gemma-2
|
|
155
154
|
model_variant: 27b
|
|
@@ -162,7 +161,6 @@ models:
|
|
|
162
161
|
vllm_args:
|
|
163
162
|
--tensor-parallel-size: 2
|
|
164
163
|
--max-model-len: 4096
|
|
165
|
-
--max-num-seqs: 256
|
|
166
164
|
gemma-2-27b-it:
|
|
167
165
|
model_family: gemma-2
|
|
168
166
|
model_variant: 27b-it
|
|
@@ -175,7 +173,6 @@ models:
|
|
|
175
173
|
vllm_args:
|
|
176
174
|
--tensor-parallel-size: 2
|
|
177
175
|
--max-model-len: 4096
|
|
178
|
-
--max-num-seqs: 256
|
|
179
176
|
Llama-2-7b-hf:
|
|
180
177
|
model_family: Llama-2
|
|
181
178
|
model_variant: 7b-hf
|
|
@@ -187,7 +184,6 @@ models:
|
|
|
187
184
|
resource_type: l40s
|
|
188
185
|
vllm_args:
|
|
189
186
|
--max-model-len: 4096
|
|
190
|
-
--max-num-seqs: 256
|
|
191
187
|
Llama-2-7b-chat-hf:
|
|
192
188
|
model_family: Llama-2
|
|
193
189
|
model_variant: 7b-chat-hf
|
|
@@ -199,7 +195,6 @@ models:
|
|
|
199
195
|
resource_type: l40s
|
|
200
196
|
vllm_args:
|
|
201
197
|
--max-model-len: 4096
|
|
202
|
-
--max-num-seqs: 256
|
|
203
198
|
Llama-2-13b-hf:
|
|
204
199
|
model_family: Llama-2
|
|
205
200
|
model_variant: 13b-hf
|
|
@@ -211,7 +206,6 @@ models:
|
|
|
211
206
|
resource_type: l40s
|
|
212
207
|
vllm_args:
|
|
213
208
|
--max-model-len: 4096
|
|
214
|
-
--max-num-seqs: 256
|
|
215
209
|
Llama-2-13b-chat-hf:
|
|
216
210
|
model_family: Llama-2
|
|
217
211
|
model_variant: 13b-chat-hf
|
|
@@ -223,7 +217,6 @@ models:
|
|
|
223
217
|
resource_type: l40s
|
|
224
218
|
vllm_args:
|
|
225
219
|
--max-model-len: 4096
|
|
226
|
-
--max-num-seqs: 256
|
|
227
220
|
Llama-2-70b-hf:
|
|
228
221
|
model_family: Llama-2
|
|
229
222
|
model_variant: 70b-hf
|
|
@@ -236,7 +229,6 @@ models:
|
|
|
236
229
|
vllm_args:
|
|
237
230
|
--tensor-parallel-size: 4
|
|
238
231
|
--max-model-len: 4096
|
|
239
|
-
--max-num-seqs: 256
|
|
240
232
|
Llama-2-70b-chat-hf:
|
|
241
233
|
model_family: Llama-2
|
|
242
234
|
model_variant: 70b-chat-hf
|
|
@@ -249,7 +241,6 @@ models:
|
|
|
249
241
|
vllm_args:
|
|
250
242
|
--tensor-parallel-size: 4
|
|
251
243
|
--max-model-len: 4096
|
|
252
|
-
--max-num-seqs: 256
|
|
253
244
|
llava-1.5-7b-hf:
|
|
254
245
|
model_family: llava-1.5
|
|
255
246
|
model_variant: 7b-hf
|
|
@@ -261,7 +252,6 @@ models:
|
|
|
261
252
|
resource_type: l40s
|
|
262
253
|
vllm_args:
|
|
263
254
|
--max-model-len: 4096
|
|
264
|
-
--max-num-seqs: 256
|
|
265
255
|
llava-1.5-13b-hf:
|
|
266
256
|
model_family: llava-1.5
|
|
267
257
|
model_variant: 13b-hf
|
|
@@ -273,7 +263,6 @@ models:
|
|
|
273
263
|
resource_type: l40s
|
|
274
264
|
vllm_args:
|
|
275
265
|
--max-model-len: 4096
|
|
276
|
-
--max-num-seqs: 256
|
|
277
266
|
llava-v1.6-mistral-7b-hf:
|
|
278
267
|
model_family: llava-v1.6
|
|
279
268
|
model_variant: mistral-7b-hf
|
|
@@ -285,7 +274,6 @@ models:
|
|
|
285
274
|
resource_type: l40s
|
|
286
275
|
vllm_args:
|
|
287
276
|
--max-model-len: 32768
|
|
288
|
-
--max-num-seqs: 256
|
|
289
277
|
llava-v1.6-34b-hf:
|
|
290
278
|
model_family: llava-v1.6
|
|
291
279
|
model_variant: 34b-hf
|
|
@@ -298,7 +286,6 @@ models:
|
|
|
298
286
|
vllm_args:
|
|
299
287
|
--tensor-parallel-size: 2
|
|
300
288
|
--max-model-len: 4096
|
|
301
|
-
--max-num-seqs: 256
|
|
302
289
|
Meta-Llama-3-8B:
|
|
303
290
|
model_family: Meta-Llama-3
|
|
304
291
|
model_variant: 8B
|
|
@@ -310,7 +297,6 @@ models:
|
|
|
310
297
|
resource_type: l40s
|
|
311
298
|
vllm_args:
|
|
312
299
|
--max-model-len: 8192
|
|
313
|
-
--max-num-seqs: 256
|
|
314
300
|
Meta-Llama-3-8B-Instruct:
|
|
315
301
|
model_family: Meta-Llama-3
|
|
316
302
|
model_variant: 8B-Instruct
|
|
@@ -322,7 +308,6 @@ models:
|
|
|
322
308
|
resource_type: l40s
|
|
323
309
|
vllm_args:
|
|
324
310
|
--max-model-len: 8192
|
|
325
|
-
--max-num-seqs: 256
|
|
326
311
|
Meta-Llama-3-70B:
|
|
327
312
|
model_family: Meta-Llama-3
|
|
328
313
|
model_variant: 70B
|
|
@@ -335,7 +320,6 @@ models:
|
|
|
335
320
|
vllm_args:
|
|
336
321
|
--tensor-parallel-size: 4
|
|
337
322
|
--max-model-len: 8192
|
|
338
|
-
--max-num-seqs: 256
|
|
339
323
|
Meta-Llama-3-70B-Instruct:
|
|
340
324
|
model_family: Meta-Llama-3
|
|
341
325
|
model_variant: 70B-Instruct
|
|
@@ -348,7 +332,6 @@ models:
|
|
|
348
332
|
vllm_args:
|
|
349
333
|
--tensor-parallel-size: 4
|
|
350
334
|
--max-model-len: 8192
|
|
351
|
-
--max-num-seqs: 256
|
|
352
335
|
Meta-Llama-3.1-8B:
|
|
353
336
|
model_family: Meta-Llama-3.1
|
|
354
337
|
model_variant: 8B
|
|
@@ -360,7 +343,6 @@ models:
|
|
|
360
343
|
resource_type: l40s
|
|
361
344
|
vllm_args:
|
|
362
345
|
--max-model-len: 131072
|
|
363
|
-
--max-num-seqs: 256
|
|
364
346
|
Meta-Llama-3.1-8B-Instruct:
|
|
365
347
|
model_family: Meta-Llama-3.1
|
|
366
348
|
model_variant: 8B-Instruct
|
|
@@ -372,7 +354,6 @@ models:
|
|
|
372
354
|
resource_type: l40s
|
|
373
355
|
vllm_args:
|
|
374
356
|
--max-model-len: 131072
|
|
375
|
-
--max-num-seqs: 256
|
|
376
357
|
Meta-Llama-3.1-70B:
|
|
377
358
|
model_family: Meta-Llama-3.1
|
|
378
359
|
model_variant: 70B
|
|
@@ -385,7 +366,6 @@ models:
|
|
|
385
366
|
vllm_args:
|
|
386
367
|
--tensor-parallel-size: 4
|
|
387
368
|
--max-model-len: 65536
|
|
388
|
-
--max-num-seqs: 256
|
|
389
369
|
Meta-Llama-3.1-70B-Instruct:
|
|
390
370
|
model_family: Meta-Llama-3.1
|
|
391
371
|
model_variant: 70B-Instruct
|
|
@@ -398,7 +378,6 @@ models:
|
|
|
398
378
|
vllm_args:
|
|
399
379
|
--tensor-parallel-size: 4
|
|
400
380
|
--max-model-len: 65536
|
|
401
|
-
--max-num-seqs: 256
|
|
402
381
|
Meta-Llama-3.1-405B-Instruct:
|
|
403
382
|
model_family: Meta-Llama-3.1
|
|
404
383
|
model_variant: 405B-Instruct
|
|
@@ -406,14 +385,12 @@ models:
|
|
|
406
385
|
gpus_per_node: 4
|
|
407
386
|
num_nodes: 8
|
|
408
387
|
vocab_size: 128256
|
|
409
|
-
|
|
410
|
-
time: 02:00:00
|
|
388
|
+
time: 08:00:00
|
|
411
389
|
resource_type: l40s
|
|
412
390
|
vllm_args:
|
|
413
391
|
--pipeline-parallel-size: 8
|
|
414
392
|
--tensor-parallel-size: 4
|
|
415
393
|
--max-model-len: 16384
|
|
416
|
-
--max-num-seqs: 256
|
|
417
394
|
Mistral-7B-Instruct-v0.1:
|
|
418
395
|
model_family: Mistral
|
|
419
396
|
model_variant: 7B-Instruct-v0.1
|
|
@@ -425,7 +402,6 @@ models:
|
|
|
425
402
|
resource_type: l40s
|
|
426
403
|
vllm_args:
|
|
427
404
|
--max-model-len: 32768
|
|
428
|
-
--max-num-seqs: 256
|
|
429
405
|
Mistral-7B-Instruct-v0.2:
|
|
430
406
|
model_family: Mistral
|
|
431
407
|
model_variant: 7B-Instruct-v0.2
|
|
@@ -437,7 +413,6 @@ models:
|
|
|
437
413
|
resource_type: l40s
|
|
438
414
|
vllm_args:
|
|
439
415
|
--max-model-len: 32768
|
|
440
|
-
--max-num-seqs: 256
|
|
441
416
|
Mistral-7B-v0.3:
|
|
442
417
|
model_family: Mistral
|
|
443
418
|
model_variant: 7B-v0.3
|
|
@@ -449,7 +424,6 @@ models:
|
|
|
449
424
|
resource_type: l40s
|
|
450
425
|
vllm_args:
|
|
451
426
|
--max-model-len: 32768
|
|
452
|
-
--max-num-seqs: 256
|
|
453
427
|
Mistral-7B-Instruct-v0.3:
|
|
454
428
|
model_family: Mistral
|
|
455
429
|
model_variant: 7B-Instruct-v0.3
|
|
@@ -461,7 +435,6 @@ models:
|
|
|
461
435
|
resource_type: l40s
|
|
462
436
|
vllm_args:
|
|
463
437
|
--max-model-len: 32768
|
|
464
|
-
--max-num-seqs: 256
|
|
465
438
|
Mistral-Large-Instruct-2407:
|
|
466
439
|
model_family: Mistral
|
|
467
440
|
model_variant: Large-Instruct-2407
|
|
@@ -475,7 +448,6 @@ models:
|
|
|
475
448
|
--pipeline-parallel-size: 2
|
|
476
449
|
--tensor-parallel-size: 4
|
|
477
450
|
--max-model-len: 32768
|
|
478
|
-
--max-num-seqs: 256
|
|
479
451
|
Mistral-Large-Instruct-2411:
|
|
480
452
|
model_family: Mistral
|
|
481
453
|
model_variant: Large-Instruct-2411
|
|
@@ -489,7 +461,6 @@ models:
|
|
|
489
461
|
--pipeline-parallel-size: 2
|
|
490
462
|
--tensor-parallel-size: 4
|
|
491
463
|
--max-model-len: 32768
|
|
492
|
-
--max-num-seqs: 256
|
|
493
464
|
Mixtral-8x7B-Instruct-v0.1:
|
|
494
465
|
model_family: Mixtral
|
|
495
466
|
model_variant: 8x7B-Instruct-v0.1
|
|
@@ -502,7 +473,6 @@ models:
|
|
|
502
473
|
vllm_args:
|
|
503
474
|
--tensor-parallel-size: 4
|
|
504
475
|
--max-model-len: 32768
|
|
505
|
-
--max-num-seqs: 256
|
|
506
476
|
Mixtral-8x22B-v0.1:
|
|
507
477
|
model_family: Mixtral
|
|
508
478
|
model_variant: 8x22B-v0.1
|
|
@@ -516,7 +486,6 @@ models:
|
|
|
516
486
|
--pipeline-parallel-size: 2
|
|
517
487
|
--tensor-parallel-size: 4
|
|
518
488
|
--max-model-len: 65536
|
|
519
|
-
--max-num-seqs: 256
|
|
520
489
|
Mixtral-8x22B-Instruct-v0.1:
|
|
521
490
|
model_family: Mixtral
|
|
522
491
|
model_variant: 8x22B-Instruct-v0.1
|
|
@@ -530,7 +499,6 @@ models:
|
|
|
530
499
|
--pipeline-parallel-size: 2
|
|
531
500
|
--tensor-parallel-size: 4
|
|
532
501
|
--max-model-len: 65536
|
|
533
|
-
--max-num-seqs: 256
|
|
534
502
|
Phi-3-medium-128k-instruct:
|
|
535
503
|
model_family: Phi-3
|
|
536
504
|
model_variant: medium-128k-instruct
|
|
@@ -543,7 +511,6 @@ models:
|
|
|
543
511
|
vllm_args:
|
|
544
512
|
--tensor-parallel-size: 2
|
|
545
513
|
--max-model-len: 131072
|
|
546
|
-
--max-num-seqs: 256
|
|
547
514
|
Phi-3-vision-128k-instruct:
|
|
548
515
|
model_family: Phi-3-vision
|
|
549
516
|
model_variant: 128k-instruct
|
|
@@ -556,20 +523,6 @@ models:
|
|
|
556
523
|
vllm_args:
|
|
557
524
|
--tensor-parallel-size: 2
|
|
558
525
|
--max-model-len: 65536
|
|
559
|
-
--max-num-seqs: 256
|
|
560
|
-
Llama3-OpenBioLLM-70B:
|
|
561
|
-
model_family: Llama3-OpenBioLLM
|
|
562
|
-
model_variant: 70B
|
|
563
|
-
model_type: LLM
|
|
564
|
-
gpus_per_node: 4
|
|
565
|
-
num_nodes: 1
|
|
566
|
-
vocab_size: 128256
|
|
567
|
-
time: 08:00:00
|
|
568
|
-
resource_type: l40s
|
|
569
|
-
vllm_args:
|
|
570
|
-
--tensor-parallel-size: 4
|
|
571
|
-
--max-model-len: 8192
|
|
572
|
-
--max-num-seqs: 256
|
|
573
526
|
Llama-3.1-Nemotron-70B-Instruct-HF:
|
|
574
527
|
model_family: Llama-3.1-Nemotron
|
|
575
528
|
model_variant: 70B-Instruct-HF
|
|
@@ -582,7 +535,6 @@ models:
|
|
|
582
535
|
vllm_args:
|
|
583
536
|
--tensor-parallel-size: 4
|
|
584
537
|
--max-model-len: 65536
|
|
585
|
-
--max-num-seqs: 256
|
|
586
538
|
Llama-3.2-1B:
|
|
587
539
|
model_family: Llama-3.2
|
|
588
540
|
model_variant: 1B
|
|
@@ -594,7 +546,6 @@ models:
|
|
|
594
546
|
resource_type: l40s
|
|
595
547
|
vllm_args:
|
|
596
548
|
--max-model-len: 131072
|
|
597
|
-
--max-num-seqs: 256
|
|
598
549
|
Llama-3.2-1B-Instruct:
|
|
599
550
|
model_family: Llama-3.2
|
|
600
551
|
model_variant: 1B-Instruct
|
|
@@ -606,7 +557,6 @@ models:
|
|
|
606
557
|
resource_type: l40s
|
|
607
558
|
vllm_args:
|
|
608
559
|
--max-model-len: 131072
|
|
609
|
-
--max-num-seqs: 256
|
|
610
560
|
Llama-3.2-3B:
|
|
611
561
|
model_family: Llama-3.2
|
|
612
562
|
model_variant: 3B
|
|
@@ -618,7 +568,6 @@ models:
|
|
|
618
568
|
resource_type: l40s
|
|
619
569
|
vllm_args:
|
|
620
570
|
--max-model-len: 131072
|
|
621
|
-
--max-num-seqs: 256
|
|
622
571
|
Llama-3.2-3B-Instruct:
|
|
623
572
|
model_family: Llama-3.2
|
|
624
573
|
model_variant: 3B-Instruct
|
|
@@ -630,7 +579,6 @@ models:
|
|
|
630
579
|
resource_type: l40s
|
|
631
580
|
vllm_args:
|
|
632
581
|
--max-model-len: 131072
|
|
633
|
-
--max-num-seqs: 256
|
|
634
582
|
Llama-3.2-11B-Vision:
|
|
635
583
|
model_family: Llama-3.2
|
|
636
584
|
model_variant: 11B-Vision
|
|
@@ -698,7 +646,6 @@ models:
|
|
|
698
646
|
resource_type: l40s
|
|
699
647
|
vllm_args:
|
|
700
648
|
--max-model-len: 32768
|
|
701
|
-
--max-num-seqs: 256
|
|
702
649
|
Qwen2.5-1.5B-Instruct:
|
|
703
650
|
model_family: Qwen2.5
|
|
704
651
|
model_variant: 1.5B-Instruct
|
|
@@ -710,7 +657,6 @@ models:
|
|
|
710
657
|
resource_type: l40s
|
|
711
658
|
vllm_args:
|
|
712
659
|
--max-model-len: 32768
|
|
713
|
-
--max-num-seqs: 256
|
|
714
660
|
Qwen2.5-3B-Instruct:
|
|
715
661
|
model_family: Qwen2.5
|
|
716
662
|
model_variant: 3B-Instruct
|
|
@@ -722,7 +668,6 @@ models:
|
|
|
722
668
|
resource_type: l40s
|
|
723
669
|
vllm_args:
|
|
724
670
|
--max-model-len: 32768
|
|
725
|
-
--max-num-seqs: 256
|
|
726
671
|
Qwen2.5-7B-Instruct:
|
|
727
672
|
model_family: Qwen2.5
|
|
728
673
|
model_variant: 7B-Instruct
|
|
@@ -734,7 +679,6 @@ models:
|
|
|
734
679
|
resource_type: l40s
|
|
735
680
|
vllm_args:
|
|
736
681
|
--max-model-len: 32768
|
|
737
|
-
--max-num-seqs: 256
|
|
738
682
|
Qwen2.5-14B-Instruct:
|
|
739
683
|
model_family: Qwen2.5
|
|
740
684
|
model_variant: 14B-Instruct
|
|
@@ -746,7 +690,6 @@ models:
|
|
|
746
690
|
resource_type: l40s
|
|
747
691
|
vllm_args:
|
|
748
692
|
--max-model-len: 32768
|
|
749
|
-
--max-num-seqs: 256
|
|
750
693
|
Qwen2.5-32B-Instruct:
|
|
751
694
|
model_family: Qwen2.5
|
|
752
695
|
model_variant: 32B-Instruct
|
|
@@ -759,7 +702,6 @@ models:
|
|
|
759
702
|
vllm_args:
|
|
760
703
|
--tensor-parallel-size: 2
|
|
761
704
|
--max-model-len: 32768
|
|
762
|
-
--max-num-seqs: 256
|
|
763
705
|
Qwen2.5-72B-Instruct:
|
|
764
706
|
model_family: Qwen2.5
|
|
765
707
|
model_variant: 72B-Instruct
|
|
@@ -772,7 +714,6 @@ models:
|
|
|
772
714
|
vllm_args:
|
|
773
715
|
--tensor-parallel-size: 4
|
|
774
716
|
--max-model-len: 16384
|
|
775
|
-
--max-num-seqs: 256
|
|
776
717
|
Qwen2.5-Math-1.5B-Instruct:
|
|
777
718
|
model_family: Qwen2.5
|
|
778
719
|
model_variant: Math-1.5B-Instruct
|
|
@@ -784,7 +725,6 @@ models:
|
|
|
784
725
|
resource_type: l40s
|
|
785
726
|
vllm_args:
|
|
786
727
|
--max-model-len: 4096
|
|
787
|
-
--max-num-seqs: 256
|
|
788
728
|
Qwen2.5-Math-7B-Instruct:
|
|
789
729
|
model_family: Qwen2.5
|
|
790
730
|
model_variant: Math-7B-Instruct
|
|
@@ -796,7 +736,6 @@ models:
|
|
|
796
736
|
resource_type: l40s
|
|
797
737
|
vllm_args:
|
|
798
738
|
--max-model-len: 4096
|
|
799
|
-
--max-num-seqs: 256
|
|
800
739
|
Qwen2.5-Math-72B-Instruct:
|
|
801
740
|
model_family: Qwen2.5
|
|
802
741
|
model_variant: Math-72B-Instruct
|
|
@@ -809,7 +748,6 @@ models:
|
|
|
809
748
|
vllm_args:
|
|
810
749
|
--tensor-parallel-size: 4
|
|
811
750
|
--max-model-len: 4096
|
|
812
|
-
--max-num-seqs: 256
|
|
813
751
|
Qwen2.5-Coder-7B-Instruct:
|
|
814
752
|
model_family: Qwen2.5
|
|
815
753
|
model_variant: Coder-7B-Instruct
|
|
@@ -821,7 +759,6 @@ models:
|
|
|
821
759
|
resource_type: l40s
|
|
822
760
|
vllm_args:
|
|
823
761
|
--max-model-len: 32768
|
|
824
|
-
--max-num-seqs: 256
|
|
825
762
|
Qwen2.5-Math-RM-72B:
|
|
826
763
|
model_family: Qwen2.5
|
|
827
764
|
model_variant: Math-RM-72B
|
|
@@ -834,7 +771,6 @@ models:
|
|
|
834
771
|
vllm_args:
|
|
835
772
|
--tensor-parallel-size: 4
|
|
836
773
|
--max-model-len: 4096
|
|
837
|
-
--max-num-seqs: 256
|
|
838
774
|
Qwen2.5-Math-PRM-7B:
|
|
839
775
|
model_family: Qwen2.5
|
|
840
776
|
model_variant: Math-PRM-7B
|
|
@@ -846,7 +782,6 @@ models:
|
|
|
846
782
|
resource_type: l40s
|
|
847
783
|
vllm_args:
|
|
848
784
|
--max-model-len: 4096
|
|
849
|
-
--max-num-seqs: 256
|
|
850
785
|
QwQ-32B:
|
|
851
786
|
model_family: QwQ
|
|
852
787
|
model_variant: 32B
|
|
@@ -859,7 +794,6 @@ models:
|
|
|
859
794
|
vllm_args:
|
|
860
795
|
--tensor-parallel-size: 2
|
|
861
796
|
--max-model-len: 32768
|
|
862
|
-
--max-num-seqs: 256
|
|
863
797
|
Pixtral-12B-2409:
|
|
864
798
|
model_family: Pixtral
|
|
865
799
|
model_variant: 12B-2409
|
|
@@ -871,7 +805,6 @@ models:
|
|
|
871
805
|
resource_type: l40s
|
|
872
806
|
vllm_args:
|
|
873
807
|
--max-model-len: 8192
|
|
874
|
-
--max-num-seqs: 256
|
|
875
808
|
e5-mistral-7b-instruct:
|
|
876
809
|
model_family: e5
|
|
877
810
|
model_variant: mistral-7b-instruct
|
|
@@ -883,7 +816,6 @@ models:
|
|
|
883
816
|
resource_type: l40s
|
|
884
817
|
vllm_args:
|
|
885
818
|
--max-model-len: 4096
|
|
886
|
-
--max-num-seqs: 256
|
|
887
819
|
bge-base-en-v1.5:
|
|
888
820
|
model_family: bge
|
|
889
821
|
model_variant: base-en-v1.5
|
|
@@ -895,7 +827,6 @@ models:
|
|
|
895
827
|
resource_type: l40s
|
|
896
828
|
vllm_args:
|
|
897
829
|
--max-model-len: 512
|
|
898
|
-
--max-num-seqs: 256
|
|
899
830
|
all-MiniLM-L6-v2:
|
|
900
831
|
model_family: all-MiniLM
|
|
901
832
|
model_variant: L6-v2
|
|
@@ -907,7 +838,6 @@ models:
|
|
|
907
838
|
resource_type: l40s
|
|
908
839
|
vllm_args:
|
|
909
840
|
--max-model-len: 512
|
|
910
|
-
--max-num-seqs: 256
|
|
911
841
|
Llama-3.3-70B-Instruct:
|
|
912
842
|
model_family: Llama-3.3
|
|
913
843
|
model_variant: 70B-Instruct
|
|
@@ -920,7 +850,6 @@ models:
|
|
|
920
850
|
vllm_args:
|
|
921
851
|
--tensor-parallel-size: 4
|
|
922
852
|
--max-model-len: 65536
|
|
923
|
-
--max-num-seqs: 256
|
|
924
853
|
InternVL2_5-26B:
|
|
925
854
|
model_family: InternVL2_5
|
|
926
855
|
model_variant: 26B
|
|
@@ -933,7 +862,6 @@ models:
|
|
|
933
862
|
vllm_args:
|
|
934
863
|
--tensor-parallel-size: 2
|
|
935
864
|
--max-model-len: 32768
|
|
936
|
-
--max-num-seqs: 256
|
|
937
865
|
InternVL2_5-38B:
|
|
938
866
|
model_family: InternVL2_5
|
|
939
867
|
model_variant: 38B
|
|
@@ -946,7 +874,6 @@ models:
|
|
|
946
874
|
vllm_args:
|
|
947
875
|
--tensor-parallel-size: 4
|
|
948
876
|
--max-model-len: 32768
|
|
949
|
-
--max-num-seqs: 256
|
|
950
877
|
Aya-Expanse-32B:
|
|
951
878
|
model_family: Aya-Expanse
|
|
952
879
|
model_variant: 32B
|
|
@@ -959,7 +886,6 @@ models:
|
|
|
959
886
|
vllm_args:
|
|
960
887
|
--tensor-parallel-size: 2
|
|
961
888
|
--max-model-len: 8192
|
|
962
|
-
--max-num-seqs: 256
|
|
963
889
|
DeepSeek-R1-Distill-Llama-70B:
|
|
964
890
|
model_family: DeepSeek-R1
|
|
965
891
|
model_variant: Distill-Llama-70B
|
|
@@ -972,7 +898,6 @@ models:
|
|
|
972
898
|
vllm_args:
|
|
973
899
|
--tensor-parallel-size: 4
|
|
974
900
|
--max-model-len: 65536
|
|
975
|
-
--max-num-seqs: 256
|
|
976
901
|
DeepSeek-R1-Distill-Llama-8B:
|
|
977
902
|
model_family: DeepSeek-R1
|
|
978
903
|
model_variant: Distill-Llama-8B
|
|
@@ -984,7 +909,6 @@ models:
|
|
|
984
909
|
resource_type: l40s
|
|
985
910
|
vllm_args:
|
|
986
911
|
--max-model-len: 131072
|
|
987
|
-
--max-num-seqs: 256
|
|
988
912
|
DeepSeek-R1-Distill-Qwen-32B:
|
|
989
913
|
model_family: DeepSeek-R1
|
|
990
914
|
model_variant: Distill-Qwen-32B
|
|
@@ -997,7 +921,6 @@ models:
|
|
|
997
921
|
vllm_args:
|
|
998
922
|
--tensor-parallel-size: 2
|
|
999
923
|
--max-model-len: 65536
|
|
1000
|
-
--max-num-seqs: 256
|
|
1001
924
|
DeepSeek-R1-Distill-Qwen-14B:
|
|
1002
925
|
model_family: DeepSeek-R1
|
|
1003
926
|
model_variant: Distill-Qwen-14B
|
|
@@ -1009,7 +932,6 @@ models:
|
|
|
1009
932
|
resource_type: l40s
|
|
1010
933
|
vllm_args:
|
|
1011
934
|
--max-model-len: 65536
|
|
1012
|
-
--max-num-seqs: 256
|
|
1013
935
|
DeepSeek-R1-Distill-Qwen-7B:
|
|
1014
936
|
model_family: DeepSeek-R1
|
|
1015
937
|
model_variant: Distill-Qwen-7B
|
|
@@ -1021,7 +943,6 @@ models:
|
|
|
1021
943
|
resource_type: l40s
|
|
1022
944
|
vllm_args:
|
|
1023
945
|
--max-model-len: 131072
|
|
1024
|
-
--max-num-seqs: 256
|
|
1025
946
|
DeepSeek-R1-Distill-Qwen-1.5B:
|
|
1026
947
|
model_family: DeepSeek-R1
|
|
1027
948
|
model_variant: Distill-Qwen-1.5B
|
|
@@ -1033,7 +954,6 @@ models:
|
|
|
1033
954
|
resource_type: l40s
|
|
1034
955
|
vllm_args:
|
|
1035
956
|
--max-model-len: 131072
|
|
1036
|
-
--max-num-seqs: 256
|
|
1037
957
|
Phi-3.5-vision-instruct:
|
|
1038
958
|
model_family: Phi-3.5-vision
|
|
1039
959
|
model_variant: instruct
|
|
@@ -1046,7 +966,6 @@ models:
|
|
|
1046
966
|
vllm_args:
|
|
1047
967
|
--tensor-parallel-size: 2
|
|
1048
968
|
--max-model-len: 65536
|
|
1049
|
-
--max-num-seqs: 256
|
|
1050
969
|
InternVL2_5-8B:
|
|
1051
970
|
model_family: InternVL2_5
|
|
1052
971
|
model_variant: 8B
|
|
@@ -1058,7 +977,6 @@ models:
|
|
|
1058
977
|
resource_type: l40s
|
|
1059
978
|
vllm_args:
|
|
1060
979
|
--max-model-len: 32768
|
|
1061
|
-
--max-num-seqs: 256
|
|
1062
980
|
glm-4v-9b:
|
|
1063
981
|
model_family: glm-4v
|
|
1064
982
|
model_variant: 9b
|
|
@@ -1070,7 +988,6 @@ models:
|
|
|
1070
988
|
resource_type: l40s
|
|
1071
989
|
vllm_args:
|
|
1072
990
|
--max-model-len: 8192
|
|
1073
|
-
--max-num-seqs: 256
|
|
1074
991
|
Molmo-7B-D-0924:
|
|
1075
992
|
model_family: Molmo
|
|
1076
993
|
model_variant: 7B-D-0924
|
|
@@ -1082,7 +999,6 @@ models:
|
|
|
1082
999
|
resource_type: l40s
|
|
1083
1000
|
vllm_args:
|
|
1084
1001
|
--max-model-len: 4096
|
|
1085
|
-
--max-num-seqs: 256
|
|
1086
1002
|
deepseek-vl2:
|
|
1087
1003
|
model_family: deepseek-vl2
|
|
1088
1004
|
model_type: VLM
|
|
@@ -1094,7 +1010,6 @@ models:
|
|
|
1094
1010
|
vllm_args:
|
|
1095
1011
|
--tensor-parallel-size: 2
|
|
1096
1012
|
--max-model-len: 4096
|
|
1097
|
-
--max-num-seqs: 256
|
|
1098
1013
|
deepseek-vl2-small:
|
|
1099
1014
|
model_family: deepseek-vl2
|
|
1100
1015
|
model_variant: small
|
|
@@ -1106,7 +1021,17 @@ models:
|
|
|
1106
1021
|
resource_type: l40s
|
|
1107
1022
|
vllm_args:
|
|
1108
1023
|
--max-model-len: 4096
|
|
1109
|
-
|
|
1024
|
+
Qwen3-8B:
|
|
1025
|
+
model_family: Qwen3
|
|
1026
|
+
model_variant: 8B
|
|
1027
|
+
model_type: LLM
|
|
1028
|
+
gpus_per_node: 1
|
|
1029
|
+
num_nodes: 1
|
|
1030
|
+
vocab_size: 151936
|
|
1031
|
+
time: 08:00:00
|
|
1032
|
+
resource_type: l40s
|
|
1033
|
+
vllm_args:
|
|
1034
|
+
--max-model-len: 40960
|
|
1110
1035
|
Qwen3-14B:
|
|
1111
1036
|
model_family: Qwen3
|
|
1112
1037
|
model_variant: 14B
|
|
@@ -1118,4 +1043,28 @@ models:
|
|
|
1118
1043
|
resource_type: l40s
|
|
1119
1044
|
vllm_args:
|
|
1120
1045
|
--max-model-len: 40960
|
|
1121
|
-
|
|
1046
|
+
Qwen3-32B:
|
|
1047
|
+
model_family: Qwen3
|
|
1048
|
+
model_variant: 32B
|
|
1049
|
+
model_type: LLM
|
|
1050
|
+
gpus_per_node: 2
|
|
1051
|
+
num_nodes: 1
|
|
1052
|
+
vocab_size: 151936
|
|
1053
|
+
time: 08:00:00
|
|
1054
|
+
resource_type: l40s
|
|
1055
|
+
vllm_args:
|
|
1056
|
+
--tensor-parallel-size: 2
|
|
1057
|
+
--max-model-len: 40960
|
|
1058
|
+
gpt-oss-120b:
|
|
1059
|
+
model_family: gpt-oss
|
|
1060
|
+
model_variant: 120b
|
|
1061
|
+
model_type: LLM
|
|
1062
|
+
gpus_per_node: 4
|
|
1063
|
+
num_nodes: 2
|
|
1064
|
+
vocab_size: 201088
|
|
1065
|
+
time: 08:00:00
|
|
1066
|
+
resource_type: l40s
|
|
1067
|
+
vllm_args:
|
|
1068
|
+
--tensor-parallel-size: 4
|
|
1069
|
+
--pipeline-parallel-size: 2
|
|
1070
|
+
--max-model-len: 40960
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vec-inf
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.2
|
|
4
4
|
Summary: Efficient LLM inference on Slurm clusters using vLLM.
|
|
5
5
|
Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -13,9 +13,10 @@ Requires-Dist: requests>=2.31.0
|
|
|
13
13
|
Requires-Dist: rich>=13.7.0
|
|
14
14
|
Provides-Extra: dev
|
|
15
15
|
Requires-Dist: cupy-cuda12x==12.1.0; extra == 'dev'
|
|
16
|
-
Requires-Dist:
|
|
16
|
+
Requires-Dist: flashinfer-python>=0.4.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: ray[default]>=2.50.0; extra == 'dev'
|
|
18
|
+
Requires-Dist: sglang>=0.5.0; extra == 'dev'
|
|
17
19
|
Requires-Dist: torch>=2.7.0; extra == 'dev'
|
|
18
|
-
Requires-Dist: vllm-nccl-cu12<2.19,>=2.18; extra == 'dev'
|
|
19
20
|
Requires-Dist: vllm>=0.10.0; extra == 'dev'
|
|
20
21
|
Requires-Dist: xgrammar>=0.1.11; extra == 'dev'
|
|
21
22
|
Description-Content-Type: text/markdown
|
|
@@ -2,26 +2,26 @@ vec_inf/README.md,sha256=WyvjbSs5Eh5fp8u66bgOaO3FQKP2U7m_HbLgqTHs_ng,1322
|
|
|
2
2
|
vec_inf/__init__.py,sha256=bHwSIz9lebYuxIemni-lP0h3gwJHVbJnwExQKGJWw_Q,23
|
|
3
3
|
vec_inf/find_port.sh,sha256=bGQ6LYSFVSsfDIGatrSg5YvddbZfaPL0R-Bjo4KYD6I,1088
|
|
4
4
|
vec_inf/cli/__init__.py,sha256=5XIvGQCOnaGl73XMkwetjC-Ul3xuXGrWDXdYJ3aUzvU,27
|
|
5
|
-
vec_inf/cli/_cli.py,sha256=
|
|
6
|
-
vec_inf/cli/_helper.py,sha256=
|
|
5
|
+
vec_inf/cli/_cli.py,sha256=0YfxtPT_Nq5gvIol9eWmw5yW9AT1ghf_E49R9pD7UG4,16213
|
|
6
|
+
vec_inf/cli/_helper.py,sha256=0_onclvxxpDTp33ODYc19RbZ2aIhXuMTC9v19q8ZhIo,17473
|
|
7
7
|
vec_inf/cli/_utils.py,sha256=23vSbmvNOWY1-W1aOAwYqNDkDDmx-5UVlCiXAtxUZ8A,1057
|
|
8
8
|
vec_inf/cli/_vars.py,sha256=V6DrJs_BuUa4yNcbBSSnMwpcyXwEBsizy3D0ubIg2fA,777
|
|
9
9
|
vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
|
|
10
10
|
vec_inf/client/_client_vars.py,sha256=1D-bX9dS0-pFImLvgWt2hUnwJiz-VaxuLb2HIfPML8I,2408
|
|
11
11
|
vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
|
|
12
|
-
vec_inf/client/_helper.py,sha256=
|
|
13
|
-
vec_inf/client/_slurm_script_generator.py,sha256=
|
|
14
|
-
vec_inf/client/_slurm_templates.py,sha256=
|
|
12
|
+
vec_inf/client/_helper.py,sha256=hb6m5TLwcGE0grCu5-UCUkWbByV-G5h8gA87Yzct6rk,37170
|
|
13
|
+
vec_inf/client/_slurm_script_generator.py,sha256=L6tqn71kNJ2I0xYipFh_ZxIAG8znpXhTpUxTU8LJIa4,13988
|
|
14
|
+
vec_inf/client/_slurm_templates.py,sha256=GxVNClkgggoJN2pT1AjK7CQCAErfKRMIs97Vlhxs9u8,9349
|
|
15
15
|
vec_inf/client/_slurm_vars.py,sha256=sgP__XhpE1K7pvOzVFmotUXmINYPcOuFP-zGaePT5Iw,2910
|
|
16
|
-
vec_inf/client/_utils.py,sha256=
|
|
17
|
-
vec_inf/client/api.py,sha256=
|
|
16
|
+
vec_inf/client/_utils.py,sha256=_ZBmic0XvJ4vpdIuXDi6KO5iL2rbhIpFQT01EWGItN4,14296
|
|
17
|
+
vec_inf/client/api.py,sha256=lkVWCme-HmMJMqp8JbtjkBVL_MSPsCC_IBL9FBw3Um8,12011
|
|
18
18
|
vec_inf/client/config.py,sha256=VU4h2iqL0rxYAqGw2HBF_l6QvvSDJy5M79IgX5G2PW4,5830
|
|
19
|
-
vec_inf/client/models.py,sha256=
|
|
19
|
+
vec_inf/client/models.py,sha256=jGNPOj1uPPBV7xdGy3HFv2ZwpJOGCsU8qm7pE2Rnnes,7498
|
|
20
20
|
vec_inf/config/README.md,sha256=TvZOqZyTUaAFr71hC7GVgg6QUw80AXREyq8wS4D-F30,528
|
|
21
21
|
vec_inf/config/environment.yaml,sha256=oEDp85hUERJO9NNn4wYhcgunnmkln50GNHDzG_3isMw,678
|
|
22
|
-
vec_inf/config/models.yaml,sha256=
|
|
23
|
-
vec_inf-0.7.
|
|
24
|
-
vec_inf-0.7.
|
|
25
|
-
vec_inf-0.7.
|
|
26
|
-
vec_inf-0.7.
|
|
27
|
-
vec_inf-0.7.
|
|
22
|
+
vec_inf/config/models.yaml,sha256=PSDR29zI8xld32Vm6dhgCIRHPEkBhwQx7-d_uFlEAM8,24764
|
|
23
|
+
vec_inf-0.7.2.dist-info/METADATA,sha256=ljs9hao8q4igLERrjGL5u1vZ_n7DMrr8XnBHzybPE2Y,10099
|
|
24
|
+
vec_inf-0.7.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
25
|
+
vec_inf-0.7.2.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
|
|
26
|
+
vec_inf-0.7.2.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
|
|
27
|
+
vec_inf-0.7.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|