vec-inf 0.7.3__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/cli/_cli.py +19 -3
- vec_inf/cli/_helper.py +23 -12
- vec_inf/cli/_vars.py +37 -22
- vec_inf/client/_client_vars.py +31 -1
- vec_inf/client/_helper.py +157 -44
- vec_inf/client/_slurm_script_generator.py +87 -30
- vec_inf/client/_slurm_templates.py +104 -40
- vec_inf/client/_slurm_vars.py +13 -4
- vec_inf/client/_utils.py +10 -7
- vec_inf/client/config.py +17 -7
- vec_inf/client/models.py +25 -19
- vec_inf/config/README.md +1 -1
- vec_inf/config/environment.yaml +9 -2
- vec_inf/config/models.yaml +223 -364
- {vec_inf-0.7.3.dist-info → vec_inf-0.8.1.dist-info}/METADATA +19 -15
- vec_inf-0.8.1.dist-info/RECORD +27 -0
- vec_inf-0.7.3.dist-info/RECORD +0 -27
- {vec_inf-0.7.3.dist-info → vec_inf-0.8.1.dist-info}/WHEEL +0 -0
- {vec_inf-0.7.3.dist-info → vec_inf-0.8.1.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.7.3.dist-info → vec_inf-0.8.1.dist-info}/licenses/LICENSE +0 -0
vec_inf/client/models.py
CHANGED
|
@@ -25,7 +25,9 @@ ModelInfo : datacitten
|
|
|
25
25
|
|
|
26
26
|
from dataclasses import dataclass, field
|
|
27
27
|
from enum import Enum
|
|
28
|
-
from typing import Any, Optional, Union
|
|
28
|
+
from typing import Any, Optional, Union, get_args
|
|
29
|
+
|
|
30
|
+
from vec_inf.client._slurm_vars import MODEL_TYPES
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
class ModelStatus(str, Enum):
|
|
@@ -55,25 +57,23 @@ class ModelStatus(str, Enum):
|
|
|
55
57
|
UNAVAILABLE = "UNAVAILABLE"
|
|
56
58
|
|
|
57
59
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
+
# Extract model type values from the Literal type
|
|
61
|
+
_MODEL_TYPE_VALUES = get_args(MODEL_TYPES)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _model_type_to_enum_name(model_type: str) -> str:
|
|
65
|
+
"""Convert a model type string to a valid enum attribute name."""
|
|
66
|
+
# Convert to uppercase and replace hyphens with underscores
|
|
67
|
+
return model_type.upper().replace("-", "_")
|
|
60
68
|
|
|
61
|
-
Attributes
|
|
62
|
-
----------
|
|
63
|
-
LLM : str
|
|
64
|
-
Large Language Model
|
|
65
|
-
VLM : str
|
|
66
|
-
Vision Language Model
|
|
67
|
-
TEXT_EMBEDDING : str
|
|
68
|
-
Text Embedding Model
|
|
69
|
-
REWARD_MODELING : str
|
|
70
|
-
Reward Modeling Model
|
|
71
|
-
"""
|
|
72
69
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
70
|
+
# Create ModelType enum dynamically from MODEL_TYPES
|
|
71
|
+
ModelType = Enum( # type: ignore[misc]
|
|
72
|
+
"ModelType",
|
|
73
|
+
{_model_type_to_enum_name(mt): mt for mt in _MODEL_TYPE_VALUES},
|
|
74
|
+
type=str,
|
|
75
|
+
module=__name__,
|
|
76
|
+
)
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
@dataclass
|
|
@@ -222,8 +222,12 @@ class LaunchOptions:
|
|
|
222
222
|
Directory for logs
|
|
223
223
|
model_weights_parent_dir : str, optional
|
|
224
224
|
Parent directory containing model weights
|
|
225
|
+
engine: str, optional
|
|
226
|
+
Inference engine to use
|
|
225
227
|
vllm_args : str, optional
|
|
226
|
-
|
|
228
|
+
vLLM engine arguments
|
|
229
|
+
sglang_args : str, optional
|
|
230
|
+
SGLang engine arguments
|
|
227
231
|
env : str, optional
|
|
228
232
|
Environment variables to be set
|
|
229
233
|
config : str, optional
|
|
@@ -250,7 +254,9 @@ class LaunchOptions:
|
|
|
250
254
|
venv: Optional[str] = None
|
|
251
255
|
log_dir: Optional[str] = None
|
|
252
256
|
model_weights_parent_dir: Optional[str] = None
|
|
257
|
+
engine: Optional[str] = None
|
|
253
258
|
vllm_args: Optional[str] = None
|
|
259
|
+
sglang_args: Optional[str] = None
|
|
254
260
|
env: Optional[str] = None
|
|
255
261
|
config: Optional[str] = None
|
|
256
262
|
|
vec_inf/config/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Configs
|
|
2
2
|
|
|
3
3
|
* [`environment.yaml`](environment.yaml): Configuration for the Slurm cluster environment, including image paths, resource availabilities, default value, and etc.
|
|
4
|
-
* [`models.yaml`](models.yaml): Configuration for launching model inference servers, including Slurm parameters as well as
|
|
4
|
+
* [`models.yaml`](models.yaml): Configuration for launching model inference servers, including Slurm parameters as well as inference engine arguments.
|
|
5
5
|
|
|
6
6
|
**NOTE**: These configs acts as last resort fallbacks in the `vec-inf` package, they will be updated to match the latest cached config on the Vector Killarney cluster with each new package version release.
|
vec_inf/config/environment.yaml
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
paths:
|
|
2
|
-
image_path: "/model-weights/vec-inf-shared/vector-inference_latest.sif"
|
|
2
|
+
image_path: "/model-weights/vec-inf-shared/vector-inference_latest.sif" # Maintains backwards compatibility
|
|
3
|
+
vllm_image_path: "/model-weights/vec-inf-shared/vector-inference-vllm_latest.sif"
|
|
4
|
+
sglang_image_path: "/model-weights/vec-inf-shared/vector-inference-sglang_latest.sif"
|
|
5
|
+
cached_model_config_path: "/model-weights/vec-inf-shared/models.yaml"
|
|
3
6
|
|
|
4
7
|
containerization:
|
|
5
8
|
module_load_cmd: "module load apptainer"
|
|
@@ -19,13 +22,17 @@ required_args:
|
|
|
19
22
|
account: "VEC_INF_ACCOUNT"
|
|
20
23
|
work_dir: "VEC_INF_WORK_DIR"
|
|
21
24
|
|
|
25
|
+
python_version: "python3.12"
|
|
26
|
+
|
|
27
|
+
model_types: ["LLM", "VLM", "Text_Embedding", "Reward_Modeling", "OCR"] # Derived from models.yaml
|
|
28
|
+
|
|
22
29
|
default_args:
|
|
23
30
|
cpus_per_task: "16"
|
|
24
31
|
mem_per_node: "64G"
|
|
25
32
|
time: "08:00:00"
|
|
26
33
|
qos: ""
|
|
27
34
|
partition: ""
|
|
28
|
-
resource_type: ""
|
|
35
|
+
resource_type: "l40s"
|
|
29
36
|
exclude: ""
|
|
30
37
|
nodelist: ""
|
|
31
38
|
bind: ""
|