vec-inf 0.7.2__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ from typing import TypedDict
9
9
  from vec_inf.client._slurm_vars import (
10
10
  CONTAINER_LOAD_CMD,
11
11
  CONTAINER_MODULE_NAME,
12
- IMAGE_PATH,
12
+ PYTHON_VERSION,
13
13
  )
14
14
 
15
15
 
@@ -38,12 +38,33 @@ class ServerSetupConfig(TypedDict):
38
38
  ----------
39
39
  single_node : list[str]
40
40
  Setup commands for single-node deployments
41
- multinode : list[str]
42
- Setup commands for multi-node deployments, including Ray initialization
41
+ multinode_vllm : list[str]
42
+ Setup commands for multi-node vLLM deployments
43
+ multinode_sglang : list[str]
44
+ Setup commands for multi-node SGLang deployments
43
45
  """
44
46
 
45
47
  single_node: list[str]
46
- multinode: list[str]
48
+ multinode_vllm: list[str]
49
+ multinode_sglang: list[str]
50
+
51
+
52
+ class LaunchCmdConfig(TypedDict):
53
+ """TypedDict for launch command configuration.
54
+
55
+ Parameters
56
+ ----------
57
+ vllm : list[str]
58
+ Launch commands for vLLM inference server
59
+ sglang : list[str]
60
+ Launch commands for SGLang inference server
61
+ sglang_multinode : list[str]
62
+ Launch commands for multi-node SGLang inference server
63
+ """
64
+
65
+ vllm: list[str]
66
+ sglang: list[str]
67
+ sglang_multinode: list[str]
47
68
 
48
69
 
49
70
  class SlurmScriptTemplate(TypedDict):
@@ -57,56 +78,54 @@ class SlurmScriptTemplate(TypedDict):
57
78
  Commands for container setup
58
79
  imports : str
59
80
  Import statements and source commands
81
+ bind_path : str
82
+ Bind path environment variable for the container
60
83
  container_command : str
61
84
  Template for container execution command
62
85
  activate_venv : str
63
86
  Template for virtual environment activation
64
87
  server_setup : ServerSetupConfig
65
88
  Server initialization commands for different deployment modes
66
- find_vllm_port : list[str]
67
- Commands to find available ports for vLLM server
89
+ find_server_port : list[str]
90
+ Commands to find available ports for inference server
68
91
  write_to_json : list[str]
69
92
  Commands to write server configuration to JSON
70
- launch_cmd : list[str]
71
- vLLM server launch commands
93
+ launch_cmd : LaunchCmdConfig
94
+ Inference server launch commands
72
95
  """
73
96
 
74
97
  shebang: ShebangConfig
75
98
  container_setup: list[str]
76
99
  imports: str
77
- container_env_vars: list[str]
100
+ bind_path: str
78
101
  container_command: str
79
102
  activate_venv: str
80
103
  server_setup: ServerSetupConfig
81
- find_vllm_port: list[str]
104
+ find_server_port: list[str]
82
105
  write_to_json: list[str]
83
- launch_cmd: list[str]
106
+ launch_cmd: LaunchCmdConfig
84
107
 
85
108
 
86
109
  SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
87
110
  "shebang": {
88
111
  "base": "#!/bin/bash",
89
112
  "multinode": [
90
- "#SBATCH --exclusive",
91
- "#SBATCH --tasks-per-node=1",
113
+ "#SBATCH --ntasks-per-node=1",
92
114
  ],
93
115
  },
94
116
  "container_setup": [
95
117
  CONTAINER_LOAD_CMD,
96
- f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop",
97
118
  ],
98
119
  "imports": "source {src_dir}/find_port.sh",
99
- "container_env_vars": [
100
- f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp"
101
- ],
102
- "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
120
+ "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
121
+ "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {{image_path}} \\",
103
122
  "activate_venv": "source {venv}/bin/activate",
104
123
  "server_setup": {
105
124
  "single_node": [
106
125
  "\n# Find available port",
107
- "head_node_ip=${SLURMD_NODENAME}",
126
+ "head_node=${SLURMD_NODENAME}",
108
127
  ],
109
- "multinode": [
128
+ "multinode_vllm": [
110
129
  "\n# Get list of nodes",
111
130
  'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
112
131
  "nodes_array=($nodes)",
@@ -130,7 +149,7 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
130
149
  " fi",
131
150
  "fi",
132
151
  "\n# Start Ray head node",
133
- "head_node_port=$(find_available_port $head_node_ip 8080 65535)",
152
+ "head_node_port=$(find_available_port $head_node 8080 65535)",
134
153
  "ray_head=$head_node_ip:$head_node_port",
135
154
  'echo "Ray Head IP: $ray_head"',
136
155
  'echo "Starting HEAD at $head_node"',
@@ -151,10 +170,19 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
151
170
  " sleep 5",
152
171
  "done",
153
172
  ],
173
+ "multinode_sglang": [
174
+ "\n# Set NCCL initialization address using the hostname of the head node",
175
+ 'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
176
+ "nodes_array=($nodes)",
177
+ "head_node=${nodes_array[0]}",
178
+ "NCCL_PORT=$(find_available_port $head_node 8000 65535)",
179
+ 'NCCL_INIT_ADDR="${head_node}:${NCCL_PORT}"',
180
+ 'echo "[INFO] NCCL_INIT_ADDR: $NCCL_INIT_ADDR"',
181
+ ],
154
182
  },
155
- "find_vllm_port": [
156
- "\nvllm_port_number=$(find_available_port $head_node_ip 8080 65535)",
157
- 'server_address="http://${head_node_ip}:${vllm_port_number}/v1"',
183
+ "find_server_port": [
184
+ "\nserver_port_number=$(find_available_port $head_node 8080 65535)",
185
+ 'server_address="http://${head_node}:${server_port_number}/v1"',
158
186
  ],
159
187
  "write_to_json": [
160
188
  '\njson_path="{log_dir}/{model_name}.$SLURM_JOB_ID/{model_name}.$SLURM_JOB_ID.json"',
@@ -163,12 +191,39 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
163
191
  ' "$json_path" > temp.json \\',
164
192
  ' && mv temp.json "$json_path"',
165
193
  ],
166
- "launch_cmd": [
167
- "vllm serve {model_weights_path} \\",
168
- " --served-model-name {model_name} \\",
169
- ' --host "0.0.0.0" \\',
170
- " --port $vllm_port_number \\",
171
- ],
194
+ "launch_cmd": {
195
+ "vllm": [
196
+ "vllm serve {model_weights_path} \\",
197
+ " --served-model-name {model_name} \\",
198
+ ' --host "0.0.0.0" \\',
199
+ " --port $server_port_number \\",
200
+ ],
201
+ "sglang": [
202
+ f"{PYTHON_VERSION} -m sglang.launch_server \\",
203
+ " --model-path {model_weights_path} \\",
204
+ " --served-model-name {model_name} \\",
205
+ ' --host "0.0.0.0" \\',
206
+ " --port $server_port_number \\",
207
+ ],
208
+ "sglang_multinode": [
209
+ "for ((i = 0; i < $SLURM_JOB_NUM_NODES; i++)); do",
210
+ " node_i=${{nodes_array[$i]}}",
211
+ ' echo "Launching SGLang server on $node_i"',
212
+ ' srun --ntasks=1 --nodes=1 -w "$node_i" \\',
213
+ " CONTAINER_PLACEHOLDER",
214
+ f" {PYTHON_VERSION} -m sglang.launch_server \\",
215
+ " --model-path {model_weights_path} \\",
216
+ " --served-model-name {model_name} \\",
217
+ ' --host "0.0.0.0" \\',
218
+ " --port $server_port_number \\",
219
+ ' --nccl-init-addr "$NCCL_INIT_ADDR" \\',
220
+ " --nnodes {num_nodes} \\",
221
+ ' --node-rank "$i" \\',
222
+ "SGLANG_ARGS_PLACEHOLDER &",
223
+ "done",
224
+ "\nwait",
225
+ ],
226
+ },
172
227
  }
173
228
 
174
229
 
@@ -184,7 +239,7 @@ class BatchSlurmScriptTemplate(TypedDict):
184
239
  permission_update : str
185
240
  Command to update permissions of the script
186
241
  launch_model_scripts : list[str]
187
- Commands to launch the vLLM server
242
+ Commands to run server launch scripts
188
243
  """
189
244
 
190
245
  shebang: str
@@ -215,36 +270,34 @@ class BatchModelLaunchScriptTemplate(TypedDict):
215
270
  Shebang line for the script
216
271
  container_setup : list[str]
217
272
  Commands for container setup
218
- env_vars : list[str]
219
- Environment variables to set
273
+ bind_path : str
274
+ Bind path environment variable for the container
220
275
  server_address_setup : list[str]
221
276
  Commands to setup the server address
222
277
  launch_cmd : list[str]
223
- Commands to launch the vLLM server
278
+ Commands to launch the inference server
224
279
  container_command : str
225
280
  Commands to setup the container command
226
281
  """
227
282
 
228
283
  shebang: str
229
284
  container_setup: str
230
- env_vars: list[str]
285
+ bind_path: str
231
286
  server_address_setup: list[str]
232
287
  write_to_json: list[str]
233
- launch_cmd: list[str]
288
+ launch_cmd: dict[str, list[str]]
234
289
  container_command: str
235
290
 
236
291
 
237
292
  BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
238
293
  "shebang": "#!/bin/bash\n",
239
294
  "container_setup": f"{CONTAINER_LOAD_CMD}\n",
240
- "env_vars": [
241
- f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g')"
242
- ],
295
+ "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
243
296
  "server_address_setup": [
244
297
  "source {src_dir}/find_port.sh",
245
298
  "head_node_ip=${{SLURMD_NODENAME}}",
246
- "vllm_port_number=$(find_available_port $head_node_ip 8080 65535)",
247
- 'server_address="http://${{head_node_ip}}:${{vllm_port_number}}/v1"\n',
299
+ "server_port_number=$(find_available_port $head_node_ip 8080 65535)",
300
+ 'server_address="http://${{head_node_ip}}:${{server_port_number}}/v1"\n',
248
301
  "echo $server_address\n",
249
302
  ],
250
303
  "write_to_json": [
@@ -255,11 +308,20 @@ BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
255
308
  ' "$json_path" > temp_{model_name}.json \\',
256
309
  ' && mv temp_{model_name}.json "$json_path"\n',
257
310
  ],
258
- "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
259
- "launch_cmd": [
260
- "vllm serve {model_weights_path} \\",
261
- " --served-model-name {model_name} \\",
262
- ' --host "0.0.0.0" \\',
263
- " --port $vllm_port_number \\",
264
- ],
311
+ "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --containall {{image_path}} \\",
312
+ "launch_cmd": {
313
+ "vllm": [
314
+ "vllm serve {model_weights_path} \\",
315
+ " --served-model-name {model_name} \\",
316
+ ' --host "0.0.0.0" \\',
317
+ " --port $server_port_number \\",
318
+ ],
319
+ "sglang": [
320
+ f"{PYTHON_VERSION} -m sglang.launch_server \\",
321
+ " --model-path {model_weights_path} \\",
322
+ " --served-model-name {model_name} \\",
323
+ ' --host "0.0.0.0" \\',
324
+ " --port $server_port_number \\",
325
+ ],
326
+ },
265
327
  }
@@ -52,7 +52,11 @@ def load_env_config() -> dict[str, Any]:
52
52
  _config = load_env_config()
53
53
 
54
54
  # Extract path values
55
- IMAGE_PATH = _config["paths"]["image_path"]
55
+ IMAGE_PATH = {
56
+ "vllm": _config["paths"]["vllm_image_path"],
57
+ "sglang": _config["paths"]["sglang_image_path"],
58
+ }
59
+ CACHED_MODEL_CONFIG_PATH = Path(_config["paths"]["cached_model_config_path"])
56
60
 
57
61
  # Extract containerization info
58
62
  CONTAINER_LOAD_CMD = _config["containerization"]["module_load_cmd"]
@@ -78,9 +82,14 @@ RESOURCE_TYPE: TypeAlias = create_literal_type( # type: ignore[valid-type]
78
82
  _config["allowed_values"]["resource_type"]
79
83
  )
80
84
 
81
- # Extract required arguments, for launching jobs that don't have a default value and
82
- # their corresponding environment variables
83
- REQUIRED_ARGS: dict[str, str] = _config["required_args"]
85
+ # Model types available derived from the cached model config
86
+ MODEL_TYPES: TypeAlias = create_literal_type(_config["model_types"]) # type: ignore[valid-type]
87
+
88
+ # Required arguments for launching jobs and corresponding environment variables
89
+ REQUIRED_ARGS: dict[str, str | None] = _config["required_args"]
90
+
91
+ # Running sglang requires python version
92
+ PYTHON_VERSION: str = _config["python_version"]
84
93
 
85
94
  # Extract default arguments
86
95
  DEFAULT_ARGS: dict[str, str] = _config["default_args"]
vec_inf/client/_utils.py CHANGED
@@ -16,7 +16,7 @@ import yaml
16
16
 
17
17
  from vec_inf.client._client_vars import MODEL_READY_SIGNATURE
18
18
  from vec_inf.client._exceptions import MissingRequiredFieldsError
19
- from vec_inf.client._slurm_vars import CACHED_CONFIG_DIR, REQUIRED_ARGS
19
+ from vec_inf.client._slurm_vars import CACHED_MODEL_CONFIG_PATH, REQUIRED_ARGS
20
20
  from vec_inf.client.config import ModelConfig
21
21
  from vec_inf.client.models import ModelStatus
22
22
 
@@ -77,7 +77,7 @@ def read_slurm_log(
77
77
  json_content: dict[str, str] = json.load(file)
78
78
  return json_content
79
79
  else:
80
- with file_path.open("r") as file:
80
+ with file_path.open("r", errors="replace") as file:
81
81
  return file.readlines()
82
82
  except FileNotFoundError:
83
83
  return f"LOG FILE NOT FOUND: {file_path}"
@@ -249,7 +249,7 @@ def load_config(config_path: Optional[str] = None) -> list[ModelConfig]:
249
249
  -----
250
250
  Configuration is loaded from:
251
251
  1. User path: specified by config_path
252
- 2. Default path: package's config/models.yaml or CACHED_CONFIG if it exists
252
+ 2. Default path: package's config/models.yaml or CACHED_MODEL_CONFIG_PATH if exists
253
253
  3. Environment variable: specified by VEC_INF_CONFIG environment variable
254
254
  and merged with default config
255
255
 
@@ -303,8 +303,8 @@ def load_config(config_path: Optional[str] = None) -> list[ModelConfig]:
303
303
 
304
304
  # 2. Otherwise, load default config
305
305
  default_path = (
306
- CACHED_CONFIG_DIR / "models.yaml"
307
- if CACHED_CONFIG_DIR.exists()
306
+ CACHED_MODEL_CONFIG_PATH
307
+ if CACHED_MODEL_CONFIG_PATH.exists()
308
308
  else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
309
309
  )
310
310
  config = load_yaml_config(default_path)
@@ -436,7 +436,7 @@ def find_matching_dirs(
436
436
  return matched
437
437
 
438
438
 
439
- def check_required_fields(params: dict[str, Any]) -> None:
439
+ def check_required_fields(params: dict[str, Any]) -> dict[str, Any]:
440
440
  """Check for required fields without default vals and their corresponding env vars.
441
441
 
442
442
  Parameters
@@ -444,12 +444,18 @@ def check_required_fields(params: dict[str, Any]) -> None:
444
444
  params : dict[str, Any]
445
445
  Dictionary of parameters to check.
446
446
  """
447
+ env_overrides: dict[str, str] = {}
448
+
449
+ if not REQUIRED_ARGS:
450
+ return env_overrides
447
451
  for arg in REQUIRED_ARGS:
448
452
  if not params.get(arg):
449
- default_value = os.getenv(REQUIRED_ARGS[arg])
453
+ default_value = os.getenv(str(REQUIRED_ARGS[arg]))
450
454
  if default_value:
451
455
  params[arg] = default_value
456
+ env_overrides[arg] = default_value
452
457
  else:
453
458
  raise MissingRequiredFieldsError(
454
459
  f"{arg} is required, please set it in the command arguments or environment variables"
455
460
  )
461
+ return env_overrides
vec_inf/client/api.py CHANGED
@@ -10,7 +10,9 @@ vec_inf.client._helper : Helper classes for model inference server management
10
10
  vec_inf.client.models : Data models for API responses
11
11
  """
12
12
 
13
+ import re
13
14
  import shutil
15
+ import subprocess
14
16
  import time
15
17
  import warnings
16
18
  from pathlib import Path
@@ -181,6 +183,51 @@ class VecInfClient:
181
183
  )
182
184
  return model_launcher.launch()
183
185
 
186
+ def fetch_running_jobs(self) -> list[str]:
187
+ """
188
+ Fetch the list of running vec-inf job IDs for the current user.
189
+
190
+ Returns
191
+ -------
192
+ list[str]
193
+ List of matching job names; empty list if squeue unavailable.
194
+ """
195
+ try:
196
+ res = subprocess.run(
197
+ ["squeue", "--me", "--noheader"],
198
+ capture_output=True,
199
+ text=True,
200
+ check=True,
201
+ )
202
+ job_ids = [
203
+ ln.strip().split()[0] for ln in res.stdout.splitlines() if ln.strip()
204
+ ]
205
+
206
+ if not job_ids:
207
+ return []
208
+
209
+ # For each job, fetch the full JobName and filter by suffix
210
+ matching_ids = []
211
+ for jid in job_ids:
212
+ try:
213
+ sctl = subprocess.run(
214
+ ["scontrol", "show", "job", "-o", jid],
215
+ capture_output=True,
216
+ text=True,
217
+ check=True,
218
+ )
219
+ m = re.search(r"\bJobName=([^\s]+)", sctl.stdout)
220
+ if m and m.group(1).endswith("-vec-inf"):
221
+ matching_ids.append(jid)
222
+ except subprocess.CalledProcessError:
223
+ # Job might have finished between squeue and scontrol; skip
224
+ continue
225
+
226
+ return matching_ids
227
+
228
+ except subprocess.CalledProcessError as e:
229
+ raise SlurmJobError(f"Error running slurm command: {e}") from e
230
+
184
231
  def get_status(self, slurm_job_id: str) -> StatusResponse:
185
232
  """Get the status of a running model.
186
233
 
vec_inf/client/config.py CHANGED
@@ -8,13 +8,13 @@ from pathlib import Path
8
8
  from typing import Any, Optional, Union
9
9
 
10
10
  from pydantic import BaseModel, ConfigDict, Field
11
- from typing_extensions import Literal
12
11
 
13
12
  from vec_inf.client._slurm_vars import (
14
13
  DEFAULT_ARGS,
15
14
  MAX_CPUS_PER_TASK,
16
15
  MAX_GPUS_PER_NODE,
17
16
  MAX_NUM_NODES,
17
+ MODEL_TYPES,
18
18
  PARTITION,
19
19
  QOS,
20
20
  RESOURCE_TYPE,
@@ -66,8 +66,12 @@ class ModelConfig(BaseModel):
66
66
  Directory path for storing logs
67
67
  model_weights_parent_dir : Path, optional
68
68
  Base directory containing model weights
69
+ engine: str, optional
70
+ Inference engine to be used, supports 'vllm' and 'sglang'
69
71
  vllm_args : dict[str, Any], optional
70
72
  Additional arguments for vLLM engine configuration
73
+ sglang_args : dict[str, Any], optional
74
+ Additional arguments for SGLang engine configuration
71
75
 
72
76
  Notes
73
77
  -----
@@ -75,14 +79,16 @@ class ModelConfig(BaseModel):
75
79
  configured to be immutable (frozen) and forbids extra fields.
76
80
  """
77
81
 
82
+ model_config = ConfigDict(
83
+ extra="ignore", str_strip_whitespace=True, validate_default=True, frozen=True
84
+ )
85
+
78
86
  model_name: str = Field(..., min_length=3, pattern=r"^[a-zA-Z0-9\-_\.]+$")
79
87
  model_family: str = Field(..., min_length=2)
80
88
  model_variant: Optional[str] = Field(
81
89
  default=None, description="Specific variant/version of the model family"
82
90
  )
83
- model_type: Literal["LLM", "VLM", "Text_Embedding", "Reward_Modeling"] = Field(
84
- ..., description="Type of model architecture"
85
- )
91
+ model_type: MODEL_TYPES = Field(..., description="Type of model architecture")
86
92
  gpus_per_node: int = Field(
87
93
  ..., gt=0, le=MAX_GPUS_PER_NODE, description="GPUs per node"
88
94
  )
@@ -148,12 +154,16 @@ class ModelConfig(BaseModel):
148
154
  default=Path(DEFAULT_ARGS["model_weights_parent_dir"]),
149
155
  description="Base directory for model weights",
150
156
  )
157
+ engine: Optional[str] = Field(
158
+ default="vllm",
159
+ description="Inference engine to be used, supports 'vllm' and 'sglang'",
160
+ )
151
161
  vllm_args: Optional[dict[str, Any]] = Field(
152
162
  default={}, description="vLLM engine arguments"
153
163
  )
164
+ sglang_args: Optional[dict[str, Any]] = Field(
165
+ default={}, description="SGLang engine arguments"
166
+ )
154
167
  env: Optional[dict[str, Any]] = Field(
155
168
  default={}, description="Environment variables to be set"
156
169
  )
157
- model_config = ConfigDict(
158
- extra="forbid", str_strip_whitespace=True, validate_default=True, frozen=True
159
- )
vec_inf/client/models.py CHANGED
@@ -25,7 +25,9 @@ ModelInfo : datacitten
25
25
 
26
26
  from dataclasses import dataclass, field
27
27
  from enum import Enum
28
- from typing import Any, Optional, Union
28
+ from typing import Any, Optional, Union, get_args
29
+
30
+ from vec_inf.client._slurm_vars import MODEL_TYPES
29
31
 
30
32
 
31
33
  class ModelStatus(str, Enum):
@@ -55,25 +57,23 @@ class ModelStatus(str, Enum):
55
57
  UNAVAILABLE = "UNAVAILABLE"
56
58
 
57
59
 
58
- class ModelType(str, Enum):
59
- """Enum representing the possible model types.
60
+ # Extract model type values from the Literal type
61
+ _MODEL_TYPE_VALUES = get_args(MODEL_TYPES)
62
+
63
+
64
+ def _model_type_to_enum_name(model_type: str) -> str:
65
+ """Convert a model type string to a valid enum attribute name."""
66
+ # Convert to uppercase and replace hyphens with underscores
67
+ return model_type.upper().replace("-", "_")
60
68
 
61
- Attributes
62
- ----------
63
- LLM : str
64
- Large Language Model
65
- VLM : str
66
- Vision Language Model
67
- TEXT_EMBEDDING : str
68
- Text Embedding Model
69
- REWARD_MODELING : str
70
- Reward Modeling Model
71
- """
72
69
 
73
- LLM = "LLM"
74
- VLM = "VLM"
75
- TEXT_EMBEDDING = "Text_Embedding"
76
- REWARD_MODELING = "Reward_Modeling"
70
+ # Create ModelType enum dynamically from MODEL_TYPES
71
+ ModelType = Enum( # type: ignore[misc]
72
+ "ModelType",
73
+ {_model_type_to_enum_name(mt): mt for mt in _MODEL_TYPE_VALUES},
74
+ type=str,
75
+ module=__name__,
76
+ )
77
77
 
78
78
 
79
79
  @dataclass
@@ -222,8 +222,12 @@ class LaunchOptions:
222
222
  Directory for logs
223
223
  model_weights_parent_dir : str, optional
224
224
  Parent directory containing model weights
225
+ engine: str, optional
226
+ Inference engine to use
225
227
  vllm_args : str, optional
226
- Additional arguments for vLLM
228
+ vLLM engine arguments
229
+ sglang_args : str, optional
230
+ SGLang engine arguments
227
231
  env : str, optional
228
232
  Environment variables to be set
229
233
  config : str, optional
@@ -250,7 +254,9 @@ class LaunchOptions:
250
254
  venv: Optional[str] = None
251
255
  log_dir: Optional[str] = None
252
256
  model_weights_parent_dir: Optional[str] = None
257
+ engine: Optional[str] = None
253
258
  vllm_args: Optional[str] = None
259
+ sglang_args: Optional[str] = None
254
260
  env: Optional[str] = None
255
261
  config: Optional[str] = None
256
262
 
vec_inf/config/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Configs
2
2
 
3
3
  * [`environment.yaml`](environment.yaml): Configuration for the Slurm cluster environment, including image paths, resource availabilities, default value, and etc.
4
- * [`models.yaml`](models.yaml): Configuration for launching model inference servers, including Slurm parameters as well as `vllm serve` arguments.
4
+ * [`models.yaml`](models.yaml): Configuration for launching model inference servers, including Slurm parameters as well as inference engine arguments.
5
5
 
6
6
  **NOTE**: These configs acts as last resort fallbacks in the `vec-inf` package, they will be updated to match the latest cached config on the Vector Killarney cluster with each new package version release.
@@ -1,5 +1,8 @@
1
1
  paths:
2
- image_path: "/model-weights/vec-inf-shared/vector-inference_latest.sif"
2
+ image_path: "/model-weights/vec-inf-shared/vector-inference_latest.sif" # Maintains backwards compatibility
3
+ vllm_image_path: "/model-weights/vec-inf-shared/vector-inference-vllm_latest.sif"
4
+ sglang_image_path: "/model-weights/vec-inf-shared/vector-inference-sglang_latest.sif"
5
+ cached_model_config_path: "/model-weights/vec-inf-shared/models.yaml"
3
6
 
4
7
  containerization:
5
8
  module_load_cmd: "module load apptainer"
@@ -19,13 +22,17 @@ required_args:
19
22
  account: "VEC_INF_ACCOUNT"
20
23
  work_dir: "VEC_INF_WORK_DIR"
21
24
 
25
+ python_version: "python3.12"
26
+
27
+ model_types: ["LLM", "VLM", "Text_Embedding", "Reward_Modeling", "OCR"] # Derived from models.yaml
28
+
22
29
  default_args:
23
30
  cpus_per_task: "16"
24
31
  mem_per_node: "64G"
25
32
  time: "08:00:00"
26
33
  qos: ""
27
34
  partition: ""
28
- resource_type: ""
35
+ resource_type: "l40s"
29
36
  exclude: ""
30
37
  nodelist: ""
31
38
  bind: ""