vec-inf 0.7.3__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vec_inf/client/models.py CHANGED
@@ -25,7 +25,9 @@ ModelInfo : datacitten
25
25
 
26
26
  from dataclasses import dataclass, field
27
27
  from enum import Enum
28
- from typing import Any, Optional, Union
28
+ from typing import Any, Optional, Union, get_args
29
+
30
+ from vec_inf.client._slurm_vars import MODEL_TYPES
29
31
 
30
32
 
31
33
  class ModelStatus(str, Enum):
@@ -55,25 +57,23 @@ class ModelStatus(str, Enum):
55
57
  UNAVAILABLE = "UNAVAILABLE"
56
58
 
57
59
 
58
- class ModelType(str, Enum):
59
- """Enum representing the possible model types.
60
+ # Extract model type values from the Literal type
61
+ _MODEL_TYPE_VALUES = get_args(MODEL_TYPES)
62
+
63
+
64
+ def _model_type_to_enum_name(model_type: str) -> str:
65
+ """Convert a model type string to a valid enum attribute name."""
66
+ # Convert to uppercase and replace hyphens with underscores
67
+ return model_type.upper().replace("-", "_")
60
68
 
61
- Attributes
62
- ----------
63
- LLM : str
64
- Large Language Model
65
- VLM : str
66
- Vision Language Model
67
- TEXT_EMBEDDING : str
68
- Text Embedding Model
69
- REWARD_MODELING : str
70
- Reward Modeling Model
71
- """
72
69
 
73
- LLM = "LLM"
74
- VLM = "VLM"
75
- TEXT_EMBEDDING = "Text_Embedding"
76
- REWARD_MODELING = "Reward_Modeling"
70
+ # Create ModelType enum dynamically from MODEL_TYPES
71
+ ModelType = Enum( # type: ignore[misc]
72
+ "ModelType",
73
+ {_model_type_to_enum_name(mt): mt for mt in _MODEL_TYPE_VALUES},
74
+ type=str,
75
+ module=__name__,
76
+ )
77
77
 
78
78
 
79
79
  @dataclass
@@ -222,8 +222,12 @@ class LaunchOptions:
222
222
  Directory for logs
223
223
  model_weights_parent_dir : str, optional
224
224
  Parent directory containing model weights
225
+ engine: str, optional
226
+ Inference engine to use
225
227
  vllm_args : str, optional
226
- Additional arguments for vLLM
228
+ vLLM engine arguments
229
+ sglang_args : str, optional
230
+ SGLang engine arguments
227
231
  env : str, optional
228
232
  Environment variables to be set
229
233
  config : str, optional
@@ -250,7 +254,9 @@ class LaunchOptions:
250
254
  venv: Optional[str] = None
251
255
  log_dir: Optional[str] = None
252
256
  model_weights_parent_dir: Optional[str] = None
257
+ engine: Optional[str] = None
253
258
  vllm_args: Optional[str] = None
259
+ sglang_args: Optional[str] = None
254
260
  env: Optional[str] = None
255
261
  config: Optional[str] = None
256
262
 
vec_inf/config/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Configs
2
2
 
3
3
  * [`environment.yaml`](environment.yaml): Configuration for the Slurm cluster environment, including image paths, resource availabilities, default value, and etc.
4
- * [`models.yaml`](models.yaml): Configuration for launching model inference servers, including Slurm parameters as well as `vllm serve` arguments.
4
+ * [`models.yaml`](models.yaml): Configuration for launching model inference servers, including Slurm parameters as well as inference engine arguments.
5
5
 
6
6
  **NOTE**: These configs acts as last resort fallbacks in the `vec-inf` package, they will be updated to match the latest cached config on the Vector Killarney cluster with each new package version release.
@@ -1,5 +1,8 @@
1
1
  paths:
2
- image_path: "/model-weights/vec-inf-shared/vector-inference_latest.sif"
2
+ image_path: "/model-weights/vec-inf-shared/vector-inference_latest.sif" # Maintains backwards compatibility
3
+ vllm_image_path: "/model-weights/vec-inf-shared/vector-inference-vllm_latest.sif"
4
+ sglang_image_path: "/model-weights/vec-inf-shared/vector-inference-sglang_latest.sif"
5
+ cached_model_config_path: "/model-weights/vec-inf-shared/models.yaml"
3
6
 
4
7
  containerization:
5
8
  module_load_cmd: "module load apptainer"
@@ -19,13 +22,17 @@ required_args:
19
22
  account: "VEC_INF_ACCOUNT"
20
23
  work_dir: "VEC_INF_WORK_DIR"
21
24
 
25
+ python_version: "python3.12"
26
+
27
+ model_types: ["LLM", "VLM", "Text_Embedding", "Reward_Modeling", "OCR"] # Derived from models.yaml
28
+
22
29
  default_args:
23
30
  cpus_per_task: "16"
24
31
  mem_per_node: "64G"
25
32
  time: "08:00:00"
26
33
  qos: ""
27
34
  partition: ""
28
- resource_type: ""
35
+ resource_type: "l40s"
29
36
  exclude: ""
30
37
  nodelist: ""
31
38
  bind: ""