vec-inf 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/cli/_cli.py +79 -287
- vec_inf/cli/_config.py +87 -0
- vec_inf/cli/_helper.py +675 -0
- vec_inf/cli/_utils.py +77 -62
- vec_inf/{models → config}/README.md +30 -0
- vec_inf/config/models.yaml +1274 -0
- vec_inf/multinode_vllm.slurm +61 -31
- vec_inf/vllm.slurm +55 -24
- vec_inf-0.5.0.dist-info/METADATA +210 -0
- vec_inf-0.5.0.dist-info/RECORD +17 -0
- vec_inf/launch_server.sh +0 -145
- vec_inf/models/models.csv +0 -85
- vec_inf-0.4.1.dist-info/METADATA +0 -121
- vec_inf-0.4.1.dist-info/RECORD +0 -16
- {vec_inf-0.4.1.dist-info → vec_inf-0.5.0.dist-info}/WHEEL +0 -0
- {vec_inf-0.4.1.dist-info → vec_inf-0.5.0.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.4.1.dist-info → vec_inf-0.5.0.dist-info}/licenses/LICENSE +0 -0
vec_inf/cli/_utils.py
CHANGED
|
@@ -1,52 +1,73 @@
|
|
|
1
1
|
"""Utility functions for the CLI."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import os
|
|
4
5
|
import subprocess
|
|
5
|
-
from
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Optional, Union, cast
|
|
6
8
|
|
|
7
|
-
import polars as pl
|
|
8
9
|
import requests
|
|
10
|
+
import yaml
|
|
9
11
|
from rich.table import Table
|
|
10
12
|
|
|
13
|
+
from vec_inf.cli._config import ModelConfig
|
|
14
|
+
|
|
11
15
|
|
|
12
16
|
MODEL_READY_SIGNATURE = "INFO: Application startup complete."
|
|
13
|
-
|
|
17
|
+
CACHED_CONFIG = Path("/", "model-weights", "vec-inf-shared", "models.yaml")
|
|
14
18
|
|
|
15
19
|
|
|
16
|
-
def run_bash_command(command: str) -> str:
|
|
20
|
+
def run_bash_command(command: str) -> tuple[str, str]:
|
|
17
21
|
"""Run a bash command and return the output."""
|
|
18
22
|
process = subprocess.Popen(
|
|
19
23
|
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
|
20
24
|
)
|
|
21
|
-
|
|
22
|
-
return stdout
|
|
25
|
+
return process.communicate()
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
def read_slurm_log(
|
|
26
|
-
slurm_job_name: str,
|
|
27
|
-
|
|
29
|
+
slurm_job_name: str,
|
|
30
|
+
slurm_job_id: int,
|
|
31
|
+
slurm_log_type: str,
|
|
32
|
+
log_dir: Optional[Union[str, Path]],
|
|
33
|
+
) -> Union[list[str], str, dict[str, str]]:
|
|
28
34
|
"""Read the slurm log file."""
|
|
29
35
|
if not log_dir:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
36
|
+
# Default log directory
|
|
37
|
+
models_dir = Path.home() / ".vec-inf-logs"
|
|
38
|
+
if not models_dir.exists():
|
|
39
|
+
return "LOG DIR NOT FOUND"
|
|
40
|
+
# Iterate over all dirs in models_dir, sorted by dir name length in desc order
|
|
41
|
+
for directory in sorted(
|
|
42
|
+
[d for d in models_dir.iterdir() if d.is_dir()],
|
|
43
|
+
key=lambda d: len(d.name),
|
|
44
|
+
reverse=True,
|
|
45
|
+
):
|
|
46
|
+
if directory.name in slurm_job_name:
|
|
47
|
+
log_dir = directory
|
|
35
48
|
break
|
|
49
|
+
else:
|
|
50
|
+
log_dir = Path(log_dir)
|
|
36
51
|
|
|
37
|
-
log_dir
|
|
52
|
+
# If log_dir is still not set, then didn't find the log dir at default location
|
|
53
|
+
if not log_dir:
|
|
54
|
+
return "LOG DIR NOT FOUND"
|
|
38
55
|
|
|
39
56
|
try:
|
|
40
|
-
file_path =
|
|
41
|
-
log_dir
|
|
42
|
-
f"{slurm_job_name}.{slurm_job_id}
|
|
57
|
+
file_path = (
|
|
58
|
+
log_dir
|
|
59
|
+
/ Path(f"{slurm_job_name}.{slurm_job_id}")
|
|
60
|
+
/ f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}"
|
|
43
61
|
)
|
|
44
|
-
|
|
45
|
-
|
|
62
|
+
if slurm_log_type == "json":
|
|
63
|
+
with file_path.open("r") as file:
|
|
64
|
+
json_content: dict[str, str] = json.load(file)
|
|
65
|
+
return json_content
|
|
66
|
+
else:
|
|
67
|
+
with file_path.open("r") as file:
|
|
68
|
+
return file.readlines()
|
|
46
69
|
except FileNotFoundError:
|
|
47
|
-
|
|
48
|
-
return "LOG_FILE_NOT_FOUND"
|
|
49
|
-
return lines
|
|
70
|
+
return f"LOG FILE NOT FOUND: {file_path}"
|
|
50
71
|
|
|
51
72
|
|
|
52
73
|
def is_server_running(
|
|
@@ -70,19 +91,17 @@ def is_server_running(
|
|
|
70
91
|
|
|
71
92
|
def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
|
|
72
93
|
"""Get the base URL of a model."""
|
|
73
|
-
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "
|
|
94
|
+
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "json", log_dir)
|
|
74
95
|
if isinstance(log_content, str):
|
|
75
96
|
return log_content
|
|
76
97
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
return line.split(SERVER_ADDRESS_SIGNATURE)[1].strip("\n")
|
|
80
|
-
return "URL_NOT_FOUND"
|
|
98
|
+
server_addr = cast(dict[str, str], log_content).get("server_address")
|
|
99
|
+
return server_addr if server_addr else "URL NOT FOUND"
|
|
81
100
|
|
|
82
101
|
|
|
83
102
|
def model_health_check(
|
|
84
103
|
slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
|
|
85
|
-
) ->
|
|
104
|
+
) -> tuple[str, Union[str, int]]:
|
|
86
105
|
"""Check the health of a running model on the cluster."""
|
|
87
106
|
base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
|
|
88
107
|
if not base_url.startswith("http"):
|
|
@@ -109,39 +128,35 @@ def create_table(
|
|
|
109
128
|
return table
|
|
110
129
|
|
|
111
130
|
|
|
112
|
-
def
|
|
113
|
-
"""Load the
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
)
|
|
131
|
+
def load_config() -> list[ModelConfig]:
|
|
132
|
+
"""Load the model configuration."""
|
|
133
|
+
default_path = (
|
|
134
|
+
CACHED_CONFIG
|
|
135
|
+
if CACHED_CONFIG.exists()
|
|
136
|
+
else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
|
|
119
137
|
)
|
|
120
138
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
return f"[red]Error reading log file: {e}[/red]"
|
|
146
|
-
|
|
147
|
-
return latest_metric
|
|
139
|
+
config: dict[str, Any] = {}
|
|
140
|
+
with open(default_path) as f:
|
|
141
|
+
config = yaml.safe_load(f) or {}
|
|
142
|
+
|
|
143
|
+
user_path = os.getenv("VEC_INF_CONFIG")
|
|
144
|
+
if user_path:
|
|
145
|
+
user_path_obj = Path(user_path)
|
|
146
|
+
if user_path_obj.exists():
|
|
147
|
+
with open(user_path_obj) as f:
|
|
148
|
+
user_config = yaml.safe_load(f) or {}
|
|
149
|
+
for name, data in user_config.get("models", {}).items():
|
|
150
|
+
if name in config.get("models", {}):
|
|
151
|
+
config["models"][name].update(data)
|
|
152
|
+
else:
|
|
153
|
+
config.setdefault("models", {})[name] = data
|
|
154
|
+
else:
|
|
155
|
+
print(
|
|
156
|
+
f"WARNING: Could not find user config: {user_path}, revert to default config located at {default_path}"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return [
|
|
160
|
+
ModelConfig(model_name=name, **model_data)
|
|
161
|
+
for name, model_data in config.get("models", {}).items()
|
|
162
|
+
]
|
|
@@ -162,6 +162,13 @@ More profiling metrics coming soon!
|
|
|
162
162
|
|
|
163
163
|
## Vision Language Models
|
|
164
164
|
|
|
165
|
+
### [allenai: Molmo](https://huggingface.co/collections/allenai/molmo-66f379e6fe3b8ef090a8ca19)
|
|
166
|
+
|
|
167
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
168
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
169
|
+
| [`Molmo-7B-D-0924`](https://huggingface.co/allenai/Molmo-7B-D-0924) | 1x a40 | - tokens/s | - tokens/s |
|
|
170
|
+
|
|
171
|
+
|
|
165
172
|
### [LLaVa-1.5](https://huggingface.co/collections/llava-hf/llava-15-65f762d5b6941db5c2ba07e0)
|
|
166
173
|
|
|
167
174
|
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
@@ -181,6 +188,7 @@ More profiling metrics coming soon!
|
|
|
181
188
|
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
182
189
|
|:----------:|:----------:|:----------:|:----------:|
|
|
183
190
|
| [`Phi-3-vision-128k-instruct`](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct) | 2x a40 | - tokens/s | - tokens/s |
|
|
191
|
+
| [`Phi-3.5-vision-instruct`](https://huggingface.co/microsoft/Phi-3.5-vision-instruct) | 2x a40 | - tokens/s | - tokens/s |
|
|
184
192
|
|
|
185
193
|
### [Meta: Llama 3.2](https://huggingface.co/collections/meta-llama/llama-32-66f448ffc8c32f949b04c8cf)
|
|
186
194
|
|
|
@@ -199,6 +207,27 @@ More profiling metrics coming soon!
|
|
|
199
207
|
|:----------:|:----------:|:----------:|:----------:|
|
|
200
208
|
| [`Pixtral-12B-2409`](https://huggingface.co/mistralai/Pixtral-12B-2409) | 1x a40 | - tokens/s | - tokens/s |
|
|
201
209
|
|
|
210
|
+
### [OpenGVLab: InternVL2.5](https://huggingface.co/collections/OpenGVLab/internvl25-673e1019b66e2218f68d7c1c)
|
|
211
|
+
|
|
212
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
213
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
214
|
+
| [`InternVL2_5-8B`](https://huggingface.co/OpenGVLab/InternVL2_5-8B) | 1x a40 | - tokens/s | - tokens/s |
|
|
215
|
+
| [`InternVL2_5-26B`](https://huggingface.co/OpenGVLab/InternVL2_5-26B) | 2x a40 | - tokens/s | - tokens/s |
|
|
216
|
+
| [`InternVL2_5-38B`](https://huggingface.co/OpenGVLab/InternVL2_5-38B) | 4x a40 | - tokens/s | - tokens/s |
|
|
217
|
+
|
|
218
|
+
### [THUDM: GLM-4](https://huggingface.co/collections/THUDM/glm-4-665fcf188c414b03c2f7e3b7)
|
|
219
|
+
|
|
220
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
221
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
222
|
+
| [`glm-4v-9b`](https://huggingface.co/THUDM/glm-4v-9b) | 1x a40 | - tokens/s | - tokens/s |
|
|
223
|
+
|
|
224
|
+
### [DeepSeek: DeepSeek-VL2](https://huggingface.co/collections/deepseek-ai/deepseek-vl2-675c22accc456d3beb4613ab)
|
|
225
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
226
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
227
|
+
| [`deepseek-vl2`](https://huggingface.co/deepseek-ai/deepseek-vl2) | 2x a40 | - tokens/s | - tokens/s |
|
|
228
|
+
| [`deepseek-vl2-small`](https://huggingface.co/deepseek-ai/deepseek-vl2-small) | 1x a40 | - tokens/s | - tokens/s |
|
|
229
|
+
|
|
230
|
+
|
|
202
231
|
## Text Embedding Models
|
|
203
232
|
|
|
204
233
|
### [Liang Wang: e5](https://huggingface.co/intfloat)
|
|
@@ -225,3 +254,4 @@ More profiling metrics coming soon!
|
|
|
225
254
|
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
226
255
|
|:----------:|:----------:|:----------:|:----------:|
|
|
227
256
|
| [`Qwen2.5-Math-RM-72B`](https://huggingface.co/Qwen/Qwen2.5-Math-RM-72B) | 4x a40 | - tokens/s | - tokens/s |
|
|
257
|
+
| [`Qwen2.5-Math-PRM-7B`](https://huggingface.co/Qwen/Qwen2.5-Math-PRM-7B) | 1x a40 | - tokens/s | - tokens/s |
|