vec-inf 0.7.2__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/README.md +2 -1
- vec_inf/cli/_cli.py +43 -12
- vec_inf/cli/_helper.py +79 -12
- vec_inf/cli/_vars.py +37 -22
- vec_inf/client/_client_vars.py +31 -1
- vec_inf/client/_helper.py +154 -49
- vec_inf/client/_slurm_script_generator.py +109 -43
- vec_inf/client/_slurm_templates.py +110 -48
- vec_inf/client/_slurm_vars.py +13 -4
- vec_inf/client/_utils.py +13 -7
- vec_inf/client/api.py +47 -0
- vec_inf/client/config.py +17 -7
- vec_inf/client/models.py +25 -19
- vec_inf/config/README.md +1 -1
- vec_inf/config/environment.yaml +9 -2
- vec_inf/config/models.yaml +184 -368
- vec_inf/find_port.sh +10 -1
- {vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/METADATA +17 -16
- vec_inf-0.8.0.dist-info/RECORD +27 -0
- {vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/WHEEL +1 -1
- vec_inf-0.7.2.dist-info/RECORD +0 -27
- {vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -9,7 +9,7 @@ from typing import TypedDict
|
|
|
9
9
|
from vec_inf.client._slurm_vars import (
|
|
10
10
|
CONTAINER_LOAD_CMD,
|
|
11
11
|
CONTAINER_MODULE_NAME,
|
|
12
|
-
|
|
12
|
+
PYTHON_VERSION,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
|
|
@@ -38,12 +38,33 @@ class ServerSetupConfig(TypedDict):
|
|
|
38
38
|
----------
|
|
39
39
|
single_node : list[str]
|
|
40
40
|
Setup commands for single-node deployments
|
|
41
|
-
|
|
42
|
-
Setup commands for multi-node deployments
|
|
41
|
+
multinode_vllm : list[str]
|
|
42
|
+
Setup commands for multi-node vLLM deployments
|
|
43
|
+
multinode_sglang : list[str]
|
|
44
|
+
Setup commands for multi-node SGLang deployments
|
|
43
45
|
"""
|
|
44
46
|
|
|
45
47
|
single_node: list[str]
|
|
46
|
-
|
|
48
|
+
multinode_vllm: list[str]
|
|
49
|
+
multinode_sglang: list[str]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class LaunchCmdConfig(TypedDict):
|
|
53
|
+
"""TypedDict for launch command configuration.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
vllm : list[str]
|
|
58
|
+
Launch commands for vLLM inference server
|
|
59
|
+
sglang : list[str]
|
|
60
|
+
Launch commands for SGLang inference server
|
|
61
|
+
sglang_multinode : list[str]
|
|
62
|
+
Launch commands for multi-node SGLang inference server
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
vllm: list[str]
|
|
66
|
+
sglang: list[str]
|
|
67
|
+
sglang_multinode: list[str]
|
|
47
68
|
|
|
48
69
|
|
|
49
70
|
class SlurmScriptTemplate(TypedDict):
|
|
@@ -57,56 +78,54 @@ class SlurmScriptTemplate(TypedDict):
|
|
|
57
78
|
Commands for container setup
|
|
58
79
|
imports : str
|
|
59
80
|
Import statements and source commands
|
|
81
|
+
bind_path : str
|
|
82
|
+
Bind path environment variable for the container
|
|
60
83
|
container_command : str
|
|
61
84
|
Template for container execution command
|
|
62
85
|
activate_venv : str
|
|
63
86
|
Template for virtual environment activation
|
|
64
87
|
server_setup : ServerSetupConfig
|
|
65
88
|
Server initialization commands for different deployment modes
|
|
66
|
-
|
|
67
|
-
Commands to find available ports for
|
|
89
|
+
find_server_port : list[str]
|
|
90
|
+
Commands to find available ports for inference server
|
|
68
91
|
write_to_json : list[str]
|
|
69
92
|
Commands to write server configuration to JSON
|
|
70
|
-
launch_cmd :
|
|
71
|
-
|
|
93
|
+
launch_cmd : LaunchCmdConfig
|
|
94
|
+
Inference server launch commands
|
|
72
95
|
"""
|
|
73
96
|
|
|
74
97
|
shebang: ShebangConfig
|
|
75
98
|
container_setup: list[str]
|
|
76
99
|
imports: str
|
|
77
|
-
|
|
100
|
+
bind_path: str
|
|
78
101
|
container_command: str
|
|
79
102
|
activate_venv: str
|
|
80
103
|
server_setup: ServerSetupConfig
|
|
81
|
-
|
|
104
|
+
find_server_port: list[str]
|
|
82
105
|
write_to_json: list[str]
|
|
83
|
-
launch_cmd:
|
|
106
|
+
launch_cmd: LaunchCmdConfig
|
|
84
107
|
|
|
85
108
|
|
|
86
109
|
SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
|
|
87
110
|
"shebang": {
|
|
88
111
|
"base": "#!/bin/bash",
|
|
89
112
|
"multinode": [
|
|
90
|
-
"#SBATCH --
|
|
91
|
-
"#SBATCH --tasks-per-node=1",
|
|
113
|
+
"#SBATCH --ntasks-per-node=1",
|
|
92
114
|
],
|
|
93
115
|
},
|
|
94
116
|
"container_setup": [
|
|
95
117
|
CONTAINER_LOAD_CMD,
|
|
96
|
-
f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop",
|
|
97
118
|
],
|
|
98
119
|
"imports": "source {src_dir}/find_port.sh",
|
|
99
|
-
"
|
|
100
|
-
|
|
101
|
-
],
|
|
102
|
-
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
|
|
120
|
+
"bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
|
|
121
|
+
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {{image_path}} \\",
|
|
103
122
|
"activate_venv": "source {venv}/bin/activate",
|
|
104
123
|
"server_setup": {
|
|
105
124
|
"single_node": [
|
|
106
125
|
"\n# Find available port",
|
|
107
|
-
"
|
|
126
|
+
"head_node=${SLURMD_NODENAME}",
|
|
108
127
|
],
|
|
109
|
-
"
|
|
128
|
+
"multinode_vllm": [
|
|
110
129
|
"\n# Get list of nodes",
|
|
111
130
|
'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
|
|
112
131
|
"nodes_array=($nodes)",
|
|
@@ -130,7 +149,7 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
|
|
|
130
149
|
" fi",
|
|
131
150
|
"fi",
|
|
132
151
|
"\n# Start Ray head node",
|
|
133
|
-
"head_node_port=$(find_available_port $
|
|
152
|
+
"head_node_port=$(find_available_port $head_node 8080 65535)",
|
|
134
153
|
"ray_head=$head_node_ip:$head_node_port",
|
|
135
154
|
'echo "Ray Head IP: $ray_head"',
|
|
136
155
|
'echo "Starting HEAD at $head_node"',
|
|
@@ -151,10 +170,19 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
|
|
|
151
170
|
" sleep 5",
|
|
152
171
|
"done",
|
|
153
172
|
],
|
|
173
|
+
"multinode_sglang": [
|
|
174
|
+
"\n# Set NCCL initialization address using the hostname of the head node",
|
|
175
|
+
'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
|
|
176
|
+
"nodes_array=($nodes)",
|
|
177
|
+
"head_node=${nodes_array[0]}",
|
|
178
|
+
"NCCL_PORT=$(find_available_port $head_node 8000 65535)",
|
|
179
|
+
'NCCL_INIT_ADDR="${head_node}:${NCCL_PORT}"',
|
|
180
|
+
'echo "[INFO] NCCL_INIT_ADDR: $NCCL_INIT_ADDR"',
|
|
181
|
+
],
|
|
154
182
|
},
|
|
155
|
-
"
|
|
156
|
-
"\
|
|
157
|
-
'server_address="http://${
|
|
183
|
+
"find_server_port": [
|
|
184
|
+
"\nserver_port_number=$(find_available_port $head_node 8080 65535)",
|
|
185
|
+
'server_address="http://${head_node}:${server_port_number}/v1"',
|
|
158
186
|
],
|
|
159
187
|
"write_to_json": [
|
|
160
188
|
'\njson_path="{log_dir}/{model_name}.$SLURM_JOB_ID/{model_name}.$SLURM_JOB_ID.json"',
|
|
@@ -163,12 +191,39 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
|
|
|
163
191
|
' "$json_path" > temp.json \\',
|
|
164
192
|
' && mv temp.json "$json_path"',
|
|
165
193
|
],
|
|
166
|
-
"launch_cmd":
|
|
167
|
-
"vllm
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
194
|
+
"launch_cmd": {
|
|
195
|
+
"vllm": [
|
|
196
|
+
"vllm serve {model_weights_path} \\",
|
|
197
|
+
" --served-model-name {model_name} \\",
|
|
198
|
+
' --host "0.0.0.0" \\',
|
|
199
|
+
" --port $server_port_number \\",
|
|
200
|
+
],
|
|
201
|
+
"sglang": [
|
|
202
|
+
f"{PYTHON_VERSION} -m sglang.launch_server \\",
|
|
203
|
+
" --model-path {model_weights_path} \\",
|
|
204
|
+
" --served-model-name {model_name} \\",
|
|
205
|
+
' --host "0.0.0.0" \\',
|
|
206
|
+
" --port $server_port_number \\",
|
|
207
|
+
],
|
|
208
|
+
"sglang_multinode": [
|
|
209
|
+
"for ((i = 0; i < $SLURM_JOB_NUM_NODES; i++)); do",
|
|
210
|
+
" node_i=${{nodes_array[$i]}}",
|
|
211
|
+
' echo "Launching SGLang server on $node_i"',
|
|
212
|
+
' srun --ntasks=1 --nodes=1 -w "$node_i" \\',
|
|
213
|
+
" CONTAINER_PLACEHOLDER",
|
|
214
|
+
f" {PYTHON_VERSION} -m sglang.launch_server \\",
|
|
215
|
+
" --model-path {model_weights_path} \\",
|
|
216
|
+
" --served-model-name {model_name} \\",
|
|
217
|
+
' --host "0.0.0.0" \\',
|
|
218
|
+
" --port $server_port_number \\",
|
|
219
|
+
' --nccl-init-addr "$NCCL_INIT_ADDR" \\',
|
|
220
|
+
" --nnodes {num_nodes} \\",
|
|
221
|
+
' --node-rank "$i" \\',
|
|
222
|
+
"SGLANG_ARGS_PLACEHOLDER &",
|
|
223
|
+
"done",
|
|
224
|
+
"\nwait",
|
|
225
|
+
],
|
|
226
|
+
},
|
|
172
227
|
}
|
|
173
228
|
|
|
174
229
|
|
|
@@ -184,7 +239,7 @@ class BatchSlurmScriptTemplate(TypedDict):
|
|
|
184
239
|
permission_update : str
|
|
185
240
|
Command to update permissions of the script
|
|
186
241
|
launch_model_scripts : list[str]
|
|
187
|
-
Commands to
|
|
242
|
+
Commands to run server launch scripts
|
|
188
243
|
"""
|
|
189
244
|
|
|
190
245
|
shebang: str
|
|
@@ -215,36 +270,34 @@ class BatchModelLaunchScriptTemplate(TypedDict):
|
|
|
215
270
|
Shebang line for the script
|
|
216
271
|
container_setup : list[str]
|
|
217
272
|
Commands for container setup
|
|
218
|
-
|
|
219
|
-
|
|
273
|
+
bind_path : str
|
|
274
|
+
Bind path environment variable for the container
|
|
220
275
|
server_address_setup : list[str]
|
|
221
276
|
Commands to setup the server address
|
|
222
277
|
launch_cmd : list[str]
|
|
223
|
-
Commands to launch the
|
|
278
|
+
Commands to launch the inference server
|
|
224
279
|
container_command : str
|
|
225
280
|
Commands to setup the container command
|
|
226
281
|
"""
|
|
227
282
|
|
|
228
283
|
shebang: str
|
|
229
284
|
container_setup: str
|
|
230
|
-
|
|
285
|
+
bind_path: str
|
|
231
286
|
server_address_setup: list[str]
|
|
232
287
|
write_to_json: list[str]
|
|
233
|
-
launch_cmd: list[str]
|
|
288
|
+
launch_cmd: dict[str, list[str]]
|
|
234
289
|
container_command: str
|
|
235
290
|
|
|
236
291
|
|
|
237
292
|
BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
|
|
238
293
|
"shebang": "#!/bin/bash\n",
|
|
239
294
|
"container_setup": f"{CONTAINER_LOAD_CMD}\n",
|
|
240
|
-
"
|
|
241
|
-
f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g')"
|
|
242
|
-
],
|
|
295
|
+
"bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
|
|
243
296
|
"server_address_setup": [
|
|
244
297
|
"source {src_dir}/find_port.sh",
|
|
245
298
|
"head_node_ip=${{SLURMD_NODENAME}}",
|
|
246
|
-
"
|
|
247
|
-
'server_address="http://${{head_node_ip}}:${{
|
|
299
|
+
"server_port_number=$(find_available_port $head_node_ip 8080 65535)",
|
|
300
|
+
'server_address="http://${{head_node_ip}}:${{server_port_number}}/v1"\n',
|
|
248
301
|
"echo $server_address\n",
|
|
249
302
|
],
|
|
250
303
|
"write_to_json": [
|
|
@@ -255,11 +308,20 @@ BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
|
|
|
255
308
|
' "$json_path" > temp_{model_name}.json \\',
|
|
256
309
|
' && mv temp_{model_name}.json "$json_path"\n',
|
|
257
310
|
],
|
|
258
|
-
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv --
|
|
259
|
-
"launch_cmd":
|
|
260
|
-
"vllm
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
311
|
+
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv --containall {{image_path}} \\",
|
|
312
|
+
"launch_cmd": {
|
|
313
|
+
"vllm": [
|
|
314
|
+
"vllm serve {model_weights_path} \\",
|
|
315
|
+
" --served-model-name {model_name} \\",
|
|
316
|
+
' --host "0.0.0.0" \\',
|
|
317
|
+
" --port $server_port_number \\",
|
|
318
|
+
],
|
|
319
|
+
"sglang": [
|
|
320
|
+
f"{PYTHON_VERSION} -m sglang.launch_server \\",
|
|
321
|
+
" --model-path {model_weights_path} \\",
|
|
322
|
+
" --served-model-name {model_name} \\",
|
|
323
|
+
' --host "0.0.0.0" \\',
|
|
324
|
+
" --port $server_port_number \\",
|
|
325
|
+
],
|
|
326
|
+
},
|
|
265
327
|
}
|
vec_inf/client/_slurm_vars.py
CHANGED
|
@@ -52,7 +52,11 @@ def load_env_config() -> dict[str, Any]:
|
|
|
52
52
|
_config = load_env_config()
|
|
53
53
|
|
|
54
54
|
# Extract path values
|
|
55
|
-
IMAGE_PATH =
|
|
55
|
+
IMAGE_PATH = {
|
|
56
|
+
"vllm": _config["paths"]["vllm_image_path"],
|
|
57
|
+
"sglang": _config["paths"]["sglang_image_path"],
|
|
58
|
+
}
|
|
59
|
+
CACHED_MODEL_CONFIG_PATH = Path(_config["paths"]["cached_model_config_path"])
|
|
56
60
|
|
|
57
61
|
# Extract containerization info
|
|
58
62
|
CONTAINER_LOAD_CMD = _config["containerization"]["module_load_cmd"]
|
|
@@ -78,9 +82,14 @@ RESOURCE_TYPE: TypeAlias = create_literal_type( # type: ignore[valid-type]
|
|
|
78
82
|
_config["allowed_values"]["resource_type"]
|
|
79
83
|
)
|
|
80
84
|
|
|
81
|
-
#
|
|
82
|
-
|
|
83
|
-
|
|
85
|
+
# Model types available derived from the cached model config
|
|
86
|
+
MODEL_TYPES: TypeAlias = create_literal_type(_config["model_types"]) # type: ignore[valid-type]
|
|
87
|
+
|
|
88
|
+
# Required arguments for launching jobs and corresponding environment variables
|
|
89
|
+
REQUIRED_ARGS: dict[str, str | None] = _config["required_args"]
|
|
90
|
+
|
|
91
|
+
# Running sglang requires python version
|
|
92
|
+
PYTHON_VERSION: str = _config["python_version"]
|
|
84
93
|
|
|
85
94
|
# Extract default arguments
|
|
86
95
|
DEFAULT_ARGS: dict[str, str] = _config["default_args"]
|
vec_inf/client/_utils.py
CHANGED
|
@@ -16,7 +16,7 @@ import yaml
|
|
|
16
16
|
|
|
17
17
|
from vec_inf.client._client_vars import MODEL_READY_SIGNATURE
|
|
18
18
|
from vec_inf.client._exceptions import MissingRequiredFieldsError
|
|
19
|
-
from vec_inf.client._slurm_vars import
|
|
19
|
+
from vec_inf.client._slurm_vars import CACHED_MODEL_CONFIG_PATH, REQUIRED_ARGS
|
|
20
20
|
from vec_inf.client.config import ModelConfig
|
|
21
21
|
from vec_inf.client.models import ModelStatus
|
|
22
22
|
|
|
@@ -77,7 +77,7 @@ def read_slurm_log(
|
|
|
77
77
|
json_content: dict[str, str] = json.load(file)
|
|
78
78
|
return json_content
|
|
79
79
|
else:
|
|
80
|
-
with file_path.open("r") as file:
|
|
80
|
+
with file_path.open("r", errors="replace") as file:
|
|
81
81
|
return file.readlines()
|
|
82
82
|
except FileNotFoundError:
|
|
83
83
|
return f"LOG FILE NOT FOUND: {file_path}"
|
|
@@ -249,7 +249,7 @@ def load_config(config_path: Optional[str] = None) -> list[ModelConfig]:
|
|
|
249
249
|
-----
|
|
250
250
|
Configuration is loaded from:
|
|
251
251
|
1. User path: specified by config_path
|
|
252
|
-
2. Default path: package's config/models.yaml or
|
|
252
|
+
2. Default path: package's config/models.yaml or CACHED_MODEL_CONFIG_PATH if exists
|
|
253
253
|
3. Environment variable: specified by VEC_INF_CONFIG environment variable
|
|
254
254
|
and merged with default config
|
|
255
255
|
|
|
@@ -303,8 +303,8 @@ def load_config(config_path: Optional[str] = None) -> list[ModelConfig]:
|
|
|
303
303
|
|
|
304
304
|
# 2. Otherwise, load default config
|
|
305
305
|
default_path = (
|
|
306
|
-
|
|
307
|
-
if
|
|
306
|
+
CACHED_MODEL_CONFIG_PATH
|
|
307
|
+
if CACHED_MODEL_CONFIG_PATH.exists()
|
|
308
308
|
else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
|
|
309
309
|
)
|
|
310
310
|
config = load_yaml_config(default_path)
|
|
@@ -436,7 +436,7 @@ def find_matching_dirs(
|
|
|
436
436
|
return matched
|
|
437
437
|
|
|
438
438
|
|
|
439
|
-
def check_required_fields(params: dict[str, Any]) ->
|
|
439
|
+
def check_required_fields(params: dict[str, Any]) -> dict[str, Any]:
|
|
440
440
|
"""Check for required fields without default vals and their corresponding env vars.
|
|
441
441
|
|
|
442
442
|
Parameters
|
|
@@ -444,12 +444,18 @@ def check_required_fields(params: dict[str, Any]) -> None:
|
|
|
444
444
|
params : dict[str, Any]
|
|
445
445
|
Dictionary of parameters to check.
|
|
446
446
|
"""
|
|
447
|
+
env_overrides: dict[str, str] = {}
|
|
448
|
+
|
|
449
|
+
if not REQUIRED_ARGS:
|
|
450
|
+
return env_overrides
|
|
447
451
|
for arg in REQUIRED_ARGS:
|
|
448
452
|
if not params.get(arg):
|
|
449
|
-
default_value = os.getenv(REQUIRED_ARGS[arg])
|
|
453
|
+
default_value = os.getenv(str(REQUIRED_ARGS[arg]))
|
|
450
454
|
if default_value:
|
|
451
455
|
params[arg] = default_value
|
|
456
|
+
env_overrides[arg] = default_value
|
|
452
457
|
else:
|
|
453
458
|
raise MissingRequiredFieldsError(
|
|
454
459
|
f"{arg} is required, please set it in the command arguments or environment variables"
|
|
455
460
|
)
|
|
461
|
+
return env_overrides
|
vec_inf/client/api.py
CHANGED
|
@@ -10,7 +10,9 @@ vec_inf.client._helper : Helper classes for model inference server management
|
|
|
10
10
|
vec_inf.client.models : Data models for API responses
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
+
import re
|
|
13
14
|
import shutil
|
|
15
|
+
import subprocess
|
|
14
16
|
import time
|
|
15
17
|
import warnings
|
|
16
18
|
from pathlib import Path
|
|
@@ -181,6 +183,51 @@ class VecInfClient:
|
|
|
181
183
|
)
|
|
182
184
|
return model_launcher.launch()
|
|
183
185
|
|
|
186
|
+
def fetch_running_jobs(self) -> list[str]:
|
|
187
|
+
"""
|
|
188
|
+
Fetch the list of running vec-inf job IDs for the current user.
|
|
189
|
+
|
|
190
|
+
Returns
|
|
191
|
+
-------
|
|
192
|
+
list[str]
|
|
193
|
+
List of matching job names; empty list if squeue unavailable.
|
|
194
|
+
"""
|
|
195
|
+
try:
|
|
196
|
+
res = subprocess.run(
|
|
197
|
+
["squeue", "--me", "--noheader"],
|
|
198
|
+
capture_output=True,
|
|
199
|
+
text=True,
|
|
200
|
+
check=True,
|
|
201
|
+
)
|
|
202
|
+
job_ids = [
|
|
203
|
+
ln.strip().split()[0] for ln in res.stdout.splitlines() if ln.strip()
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
if not job_ids:
|
|
207
|
+
return []
|
|
208
|
+
|
|
209
|
+
# For each job, fetch the full JobName and filter by suffix
|
|
210
|
+
matching_ids = []
|
|
211
|
+
for jid in job_ids:
|
|
212
|
+
try:
|
|
213
|
+
sctl = subprocess.run(
|
|
214
|
+
["scontrol", "show", "job", "-o", jid],
|
|
215
|
+
capture_output=True,
|
|
216
|
+
text=True,
|
|
217
|
+
check=True,
|
|
218
|
+
)
|
|
219
|
+
m = re.search(r"\bJobName=([^\s]+)", sctl.stdout)
|
|
220
|
+
if m and m.group(1).endswith("-vec-inf"):
|
|
221
|
+
matching_ids.append(jid)
|
|
222
|
+
except subprocess.CalledProcessError:
|
|
223
|
+
# Job might have finished between squeue and scontrol; skip
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
return matching_ids
|
|
227
|
+
|
|
228
|
+
except subprocess.CalledProcessError as e:
|
|
229
|
+
raise SlurmJobError(f"Error running slurm command: {e}") from e
|
|
230
|
+
|
|
184
231
|
def get_status(self, slurm_job_id: str) -> StatusResponse:
|
|
185
232
|
"""Get the status of a running model.
|
|
186
233
|
|
vec_inf/client/config.py
CHANGED
|
@@ -8,13 +8,13 @@ from pathlib import Path
|
|
|
8
8
|
from typing import Any, Optional, Union
|
|
9
9
|
|
|
10
10
|
from pydantic import BaseModel, ConfigDict, Field
|
|
11
|
-
from typing_extensions import Literal
|
|
12
11
|
|
|
13
12
|
from vec_inf.client._slurm_vars import (
|
|
14
13
|
DEFAULT_ARGS,
|
|
15
14
|
MAX_CPUS_PER_TASK,
|
|
16
15
|
MAX_GPUS_PER_NODE,
|
|
17
16
|
MAX_NUM_NODES,
|
|
17
|
+
MODEL_TYPES,
|
|
18
18
|
PARTITION,
|
|
19
19
|
QOS,
|
|
20
20
|
RESOURCE_TYPE,
|
|
@@ -66,8 +66,12 @@ class ModelConfig(BaseModel):
|
|
|
66
66
|
Directory path for storing logs
|
|
67
67
|
model_weights_parent_dir : Path, optional
|
|
68
68
|
Base directory containing model weights
|
|
69
|
+
engine: str, optional
|
|
70
|
+
Inference engine to be used, supports 'vllm' and 'sglang'
|
|
69
71
|
vllm_args : dict[str, Any], optional
|
|
70
72
|
Additional arguments for vLLM engine configuration
|
|
73
|
+
sglang_args : dict[str, Any], optional
|
|
74
|
+
Additional arguments for SGLang engine configuration
|
|
71
75
|
|
|
72
76
|
Notes
|
|
73
77
|
-----
|
|
@@ -75,14 +79,16 @@ class ModelConfig(BaseModel):
|
|
|
75
79
|
configured to be immutable (frozen) and forbids extra fields.
|
|
76
80
|
"""
|
|
77
81
|
|
|
82
|
+
model_config = ConfigDict(
|
|
83
|
+
extra="ignore", str_strip_whitespace=True, validate_default=True, frozen=True
|
|
84
|
+
)
|
|
85
|
+
|
|
78
86
|
model_name: str = Field(..., min_length=3, pattern=r"^[a-zA-Z0-9\-_\.]+$")
|
|
79
87
|
model_family: str = Field(..., min_length=2)
|
|
80
88
|
model_variant: Optional[str] = Field(
|
|
81
89
|
default=None, description="Specific variant/version of the model family"
|
|
82
90
|
)
|
|
83
|
-
model_type:
|
|
84
|
-
..., description="Type of model architecture"
|
|
85
|
-
)
|
|
91
|
+
model_type: MODEL_TYPES = Field(..., description="Type of model architecture")
|
|
86
92
|
gpus_per_node: int = Field(
|
|
87
93
|
..., gt=0, le=MAX_GPUS_PER_NODE, description="GPUs per node"
|
|
88
94
|
)
|
|
@@ -148,12 +154,16 @@ class ModelConfig(BaseModel):
|
|
|
148
154
|
default=Path(DEFAULT_ARGS["model_weights_parent_dir"]),
|
|
149
155
|
description="Base directory for model weights",
|
|
150
156
|
)
|
|
157
|
+
engine: Optional[str] = Field(
|
|
158
|
+
default="vllm",
|
|
159
|
+
description="Inference engine to be used, supports 'vllm' and 'sglang'",
|
|
160
|
+
)
|
|
151
161
|
vllm_args: Optional[dict[str, Any]] = Field(
|
|
152
162
|
default={}, description="vLLM engine arguments"
|
|
153
163
|
)
|
|
164
|
+
sglang_args: Optional[dict[str, Any]] = Field(
|
|
165
|
+
default={}, description="SGLang engine arguments"
|
|
166
|
+
)
|
|
154
167
|
env: Optional[dict[str, Any]] = Field(
|
|
155
168
|
default={}, description="Environment variables to be set"
|
|
156
169
|
)
|
|
157
|
-
model_config = ConfigDict(
|
|
158
|
-
extra="forbid", str_strip_whitespace=True, validate_default=True, frozen=True
|
|
159
|
-
)
|
vec_inf/client/models.py
CHANGED
|
@@ -25,7 +25,9 @@ ModelInfo : datacitten
|
|
|
25
25
|
|
|
26
26
|
from dataclasses import dataclass, field
|
|
27
27
|
from enum import Enum
|
|
28
|
-
from typing import Any, Optional, Union
|
|
28
|
+
from typing import Any, Optional, Union, get_args
|
|
29
|
+
|
|
30
|
+
from vec_inf.client._slurm_vars import MODEL_TYPES
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
class ModelStatus(str, Enum):
|
|
@@ -55,25 +57,23 @@ class ModelStatus(str, Enum):
|
|
|
55
57
|
UNAVAILABLE = "UNAVAILABLE"
|
|
56
58
|
|
|
57
59
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
+
# Extract model type values from the Literal type
|
|
61
|
+
_MODEL_TYPE_VALUES = get_args(MODEL_TYPES)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _model_type_to_enum_name(model_type: str) -> str:
|
|
65
|
+
"""Convert a model type string to a valid enum attribute name."""
|
|
66
|
+
# Convert to uppercase and replace hyphens with underscores
|
|
67
|
+
return model_type.upper().replace("-", "_")
|
|
60
68
|
|
|
61
|
-
Attributes
|
|
62
|
-
----------
|
|
63
|
-
LLM : str
|
|
64
|
-
Large Language Model
|
|
65
|
-
VLM : str
|
|
66
|
-
Vision Language Model
|
|
67
|
-
TEXT_EMBEDDING : str
|
|
68
|
-
Text Embedding Model
|
|
69
|
-
REWARD_MODELING : str
|
|
70
|
-
Reward Modeling Model
|
|
71
|
-
"""
|
|
72
69
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
70
|
+
# Create ModelType enum dynamically from MODEL_TYPES
|
|
71
|
+
ModelType = Enum( # type: ignore[misc]
|
|
72
|
+
"ModelType",
|
|
73
|
+
{_model_type_to_enum_name(mt): mt for mt in _MODEL_TYPE_VALUES},
|
|
74
|
+
type=str,
|
|
75
|
+
module=__name__,
|
|
76
|
+
)
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
@dataclass
|
|
@@ -222,8 +222,12 @@ class LaunchOptions:
|
|
|
222
222
|
Directory for logs
|
|
223
223
|
model_weights_parent_dir : str, optional
|
|
224
224
|
Parent directory containing model weights
|
|
225
|
+
engine: str, optional
|
|
226
|
+
Inference engine to use
|
|
225
227
|
vllm_args : str, optional
|
|
226
|
-
|
|
228
|
+
vLLM engine arguments
|
|
229
|
+
sglang_args : str, optional
|
|
230
|
+
SGLang engine arguments
|
|
227
231
|
env : str, optional
|
|
228
232
|
Environment variables to be set
|
|
229
233
|
config : str, optional
|
|
@@ -250,7 +254,9 @@ class LaunchOptions:
|
|
|
250
254
|
venv: Optional[str] = None
|
|
251
255
|
log_dir: Optional[str] = None
|
|
252
256
|
model_weights_parent_dir: Optional[str] = None
|
|
257
|
+
engine: Optional[str] = None
|
|
253
258
|
vllm_args: Optional[str] = None
|
|
259
|
+
sglang_args: Optional[str] = None
|
|
254
260
|
env: Optional[str] = None
|
|
255
261
|
config: Optional[str] = None
|
|
256
262
|
|
vec_inf/config/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Configs
|
|
2
2
|
|
|
3
3
|
* [`environment.yaml`](environment.yaml): Configuration for the Slurm cluster environment, including image paths, resource availabilities, default value, and etc.
|
|
4
|
-
* [`models.yaml`](models.yaml): Configuration for launching model inference servers, including Slurm parameters as well as
|
|
4
|
+
* [`models.yaml`](models.yaml): Configuration for launching model inference servers, including Slurm parameters as well as inference engine arguments.
|
|
5
5
|
|
|
6
6
|
**NOTE**: These configs acts as last resort fallbacks in the `vec-inf` package, they will be updated to match the latest cached config on the Vector Killarney cluster with each new package version release.
|
vec_inf/config/environment.yaml
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
paths:
|
|
2
|
-
image_path: "/model-weights/vec-inf-shared/vector-inference_latest.sif"
|
|
2
|
+
image_path: "/model-weights/vec-inf-shared/vector-inference_latest.sif" # Maintains backwards compatibility
|
|
3
|
+
vllm_image_path: "/model-weights/vec-inf-shared/vector-inference-vllm_latest.sif"
|
|
4
|
+
sglang_image_path: "/model-weights/vec-inf-shared/vector-inference-sglang_latest.sif"
|
|
5
|
+
cached_model_config_path: "/model-weights/vec-inf-shared/models.yaml"
|
|
3
6
|
|
|
4
7
|
containerization:
|
|
5
8
|
module_load_cmd: "module load apptainer"
|
|
@@ -19,13 +22,17 @@ required_args:
|
|
|
19
22
|
account: "VEC_INF_ACCOUNT"
|
|
20
23
|
work_dir: "VEC_INF_WORK_DIR"
|
|
21
24
|
|
|
25
|
+
python_version: "python3.12"
|
|
26
|
+
|
|
27
|
+
model_types: ["LLM", "VLM", "Text_Embedding", "Reward_Modeling", "OCR"] # Derived from models.yaml
|
|
28
|
+
|
|
22
29
|
default_args:
|
|
23
30
|
cpus_per_task: "16"
|
|
24
31
|
mem_per_node: "64G"
|
|
25
32
|
time: "08:00:00"
|
|
26
33
|
qos: ""
|
|
27
34
|
partition: ""
|
|
28
|
-
resource_type: ""
|
|
35
|
+
resource_type: "l40s"
|
|
29
36
|
exclude: ""
|
|
30
37
|
nodelist: ""
|
|
31
38
|
bind: ""
|