vec-inf 0.4.0.post1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/__init__.py +1 -0
- vec_inf/cli/__init__.py +1 -0
- vec_inf/cli/_cli.py +88 -243
- vec_inf/cli/_config.py +87 -0
- vec_inf/cli/_helper.py +675 -0
- vec_inf/cli/_utils.py +88 -89
- vec_inf/{models → config}/README.md +54 -0
- vec_inf/config/models.yaml +1274 -0
- vec_inf/multinode_vllm.slurm +61 -29
- vec_inf/vllm.slurm +55 -22
- vec_inf-0.5.0.dist-info/METADATA +210 -0
- vec_inf-0.5.0.dist-info/RECORD +17 -0
- {vec_inf-0.4.0.post1.dist-info → vec_inf-0.5.0.dist-info}/WHEEL +1 -1
- vec_inf-0.5.0.dist-info/entry_points.txt +2 -0
- vec_inf/launch_server.sh +0 -126
- vec_inf/models/models.csv +0 -73
- vec_inf-0.4.0.post1.dist-info/METADATA +0 -120
- vec_inf-0.4.0.post1.dist-info/RECORD +0 -16
- vec_inf-0.4.0.post1.dist-info/entry_points.txt +0 -3
- {vec_inf-0.4.0.post1.dist-info → vec_inf-0.5.0.dist-info/licenses}/LICENSE +0 -0
vec_inf/cli/_utils.py
CHANGED
|
@@ -1,61 +1,79 @@
|
|
|
1
|
+
"""Utility functions for the CLI."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
1
4
|
import os
|
|
2
5
|
import subprocess
|
|
3
|
-
from
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Optional, Union, cast
|
|
4
8
|
|
|
5
|
-
import polars as pl
|
|
6
9
|
import requests
|
|
10
|
+
import yaml
|
|
7
11
|
from rich.table import Table
|
|
8
12
|
|
|
13
|
+
from vec_inf.cli._config import ModelConfig
|
|
14
|
+
|
|
15
|
+
|
|
9
16
|
MODEL_READY_SIGNATURE = "INFO: Application startup complete."
|
|
10
|
-
|
|
17
|
+
CACHED_CONFIG = Path("/", "model-weights", "vec-inf-shared", "models.yaml")
|
|
11
18
|
|
|
12
19
|
|
|
13
|
-
def run_bash_command(command: str) -> str:
|
|
14
|
-
"""
|
|
15
|
-
Run a bash command and return the output
|
|
16
|
-
"""
|
|
20
|
+
def run_bash_command(command: str) -> tuple[str, str]:
|
|
21
|
+
"""Run a bash command and return the output."""
|
|
17
22
|
process = subprocess.Popen(
|
|
18
23
|
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
|
19
24
|
)
|
|
20
|
-
|
|
21
|
-
return stdout
|
|
25
|
+
return process.communicate()
|
|
22
26
|
|
|
23
27
|
|
|
24
28
|
def read_slurm_log(
|
|
25
|
-
slurm_job_name: str,
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
slurm_job_name: str,
|
|
30
|
+
slurm_job_id: int,
|
|
31
|
+
slurm_log_type: str,
|
|
32
|
+
log_dir: Optional[Union[str, Path]],
|
|
33
|
+
) -> Union[list[str], str, dict[str, str]]:
|
|
34
|
+
"""Read the slurm log file."""
|
|
30
35
|
if not log_dir:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
+
# Default log directory
|
|
37
|
+
models_dir = Path.home() / ".vec-inf-logs"
|
|
38
|
+
if not models_dir.exists():
|
|
39
|
+
return "LOG DIR NOT FOUND"
|
|
40
|
+
# Iterate over all dirs in models_dir, sorted by dir name length in desc order
|
|
41
|
+
for directory in sorted(
|
|
42
|
+
[d for d in models_dir.iterdir() if d.is_dir()],
|
|
43
|
+
key=lambda d: len(d.name),
|
|
44
|
+
reverse=True,
|
|
45
|
+
):
|
|
46
|
+
if directory.name in slurm_job_name:
|
|
47
|
+
log_dir = directory
|
|
36
48
|
break
|
|
49
|
+
else:
|
|
50
|
+
log_dir = Path(log_dir)
|
|
37
51
|
|
|
38
|
-
log_dir
|
|
52
|
+
# If log_dir is still not set, then didn't find the log dir at default location
|
|
53
|
+
if not log_dir:
|
|
54
|
+
return "LOG DIR NOT FOUND"
|
|
39
55
|
|
|
40
56
|
try:
|
|
41
|
-
file_path =
|
|
42
|
-
log_dir
|
|
43
|
-
f"{slurm_job_name}.{slurm_job_id}
|
|
57
|
+
file_path = (
|
|
58
|
+
log_dir
|
|
59
|
+
/ Path(f"{slurm_job_name}.{slurm_job_id}")
|
|
60
|
+
/ f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}"
|
|
44
61
|
)
|
|
45
|
-
|
|
46
|
-
|
|
62
|
+
if slurm_log_type == "json":
|
|
63
|
+
with file_path.open("r") as file:
|
|
64
|
+
json_content: dict[str, str] = json.load(file)
|
|
65
|
+
return json_content
|
|
66
|
+
else:
|
|
67
|
+
with file_path.open("r") as file:
|
|
68
|
+
return file.readlines()
|
|
47
69
|
except FileNotFoundError:
|
|
48
|
-
|
|
49
|
-
return "LOG_FILE_NOT_FOUND"
|
|
50
|
-
return lines
|
|
70
|
+
return f"LOG FILE NOT FOUND: {file_path}"
|
|
51
71
|
|
|
52
72
|
|
|
53
73
|
def is_server_running(
|
|
54
74
|
slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
|
|
55
75
|
) -> Union[str, tuple[str, str]]:
|
|
56
|
-
"""
|
|
57
|
-
Check if a model is ready to serve requests
|
|
58
|
-
"""
|
|
76
|
+
"""Check if a model is ready to serve requests."""
|
|
59
77
|
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "err", log_dir)
|
|
60
78
|
if isinstance(log_content, str):
|
|
61
79
|
return log_content
|
|
@@ -72,25 +90,19 @@ def is_server_running(
|
|
|
72
90
|
|
|
73
91
|
|
|
74
92
|
def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
|
|
75
|
-
"""
|
|
76
|
-
|
|
77
|
-
"""
|
|
78
|
-
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "out", log_dir)
|
|
93
|
+
"""Get the base URL of a model."""
|
|
94
|
+
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "json", log_dir)
|
|
79
95
|
if isinstance(log_content, str):
|
|
80
96
|
return log_content
|
|
81
97
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
return line.split(SERVER_ADDRESS_SIGNATURE)[1].strip("\n")
|
|
85
|
-
return "URL_NOT_FOUND"
|
|
98
|
+
server_addr = cast(dict[str, str], log_content).get("server_address")
|
|
99
|
+
return server_addr if server_addr else "URL NOT FOUND"
|
|
86
100
|
|
|
87
101
|
|
|
88
102
|
def model_health_check(
|
|
89
103
|
slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
|
|
90
|
-
) ->
|
|
91
|
-
"""
|
|
92
|
-
Check the health of a running model on the cluster
|
|
93
|
-
"""
|
|
104
|
+
) -> tuple[str, Union[str, int]]:
|
|
105
|
+
"""Check the health of a running model on the cluster."""
|
|
94
106
|
base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
|
|
95
107
|
if not base_url.startswith("http"):
|
|
96
108
|
return ("FAILED", base_url)
|
|
@@ -100,9 +112,8 @@ def model_health_check(
|
|
|
100
112
|
response = requests.get(health_check_url)
|
|
101
113
|
# Check if the request was successful
|
|
102
114
|
if response.status_code == 200:
|
|
103
|
-
return "READY"
|
|
104
|
-
|
|
105
|
-
return ("FAILED", response.status_code)
|
|
115
|
+
return ("READY", response.status_code)
|
|
116
|
+
return ("FAILED", response.status_code)
|
|
106
117
|
except requests.exceptions.RequestException as e:
|
|
107
118
|
return ("FAILED", str(e))
|
|
108
119
|
|
|
@@ -110,54 +121,42 @@ def model_health_check(
|
|
|
110
121
|
def create_table(
|
|
111
122
|
key_title: str = "", value_title: str = "", show_header: bool = True
|
|
112
123
|
) -> Table:
|
|
113
|
-
"""
|
|
114
|
-
Create a table for displaying model status
|
|
115
|
-
"""
|
|
124
|
+
"""Create a table for displaying model status."""
|
|
116
125
|
table = Table(show_header=show_header, header_style="bold magenta")
|
|
117
126
|
table.add_column(key_title, style="dim")
|
|
118
127
|
table.add_column(value_title)
|
|
119
128
|
return table
|
|
120
129
|
|
|
121
130
|
|
|
122
|
-
def
|
|
123
|
-
"""
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
|
|
129
|
-
"models/models.csv",
|
|
130
|
-
)
|
|
131
|
+
def load_config() -> list[ModelConfig]:
|
|
132
|
+
"""Load the model configuration."""
|
|
133
|
+
default_path = (
|
|
134
|
+
CACHED_CONFIG
|
|
135
|
+
if CACHED_CONFIG.exists()
|
|
136
|
+
else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
|
|
131
137
|
)
|
|
132
|
-
return models_df
|
|
133
138
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
""
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
latest_metric[key] = value
|
|
159
|
-
break
|
|
160
|
-
except Exception as e:
|
|
161
|
-
return f"[red]Error reading log file: {e}[/red]"
|
|
162
|
-
|
|
163
|
-
return latest_metric
|
|
139
|
+
config: dict[str, Any] = {}
|
|
140
|
+
with open(default_path) as f:
|
|
141
|
+
config = yaml.safe_load(f) or {}
|
|
142
|
+
|
|
143
|
+
user_path = os.getenv("VEC_INF_CONFIG")
|
|
144
|
+
if user_path:
|
|
145
|
+
user_path_obj = Path(user_path)
|
|
146
|
+
if user_path_obj.exists():
|
|
147
|
+
with open(user_path_obj) as f:
|
|
148
|
+
user_config = yaml.safe_load(f) or {}
|
|
149
|
+
for name, data in user_config.get("models", {}).items():
|
|
150
|
+
if name in config.get("models", {}):
|
|
151
|
+
config["models"][name].update(data)
|
|
152
|
+
else:
|
|
153
|
+
config.setdefault("models", {})[name] = data
|
|
154
|
+
else:
|
|
155
|
+
print(
|
|
156
|
+
f"WARNING: Could not find user config: {user_path}, revert to default config located at {default_path}"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return [
|
|
160
|
+
ModelConfig(model_name=name, **model_data)
|
|
161
|
+
for name, model_data in config.get("models", {}).items()
|
|
162
|
+
]
|
|
@@ -148,8 +148,27 @@ More profiling metrics coming soon!
|
|
|
148
148
|
|:----------:|:----------:|:----------:|:----------:|
|
|
149
149
|
| [`QwQ-32B-Preview`](https://huggingface.co/Qwen/QwQ-32B-Preview) | 2x a40 | - tokens/s | - tokens/s |
|
|
150
150
|
|
|
151
|
+
### [DeepSeek-R1: Distilled Models](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d)
|
|
152
|
+
|
|
153
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
154
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
155
|
+
| [`DeepSeek-R1-Distill-Llama-8B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | 1x a40 | - tokens/s | - tokens/s |
|
|
156
|
+
| [`DeepSeek-R1-Distill-Llama-70B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) | 4x a40 | - tokens/s | - tokens/s |
|
|
157
|
+
| [`DeepSeek-R1-Distill-Qwen-1.5B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | 1x a40 | - tokens/s | - tokens/s |
|
|
158
|
+
| [`DeepSeek-R1-Distill-Qwen-7B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1x a40 | - tokens/s | - tokens/s |
|
|
159
|
+
| [`DeepSeek-R1-Distill-Qwen-14B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | 2x a40 | - tokens/s | - tokens/s |
|
|
160
|
+
| [`DeepSeek-R1-Distill-Qwen-32B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) | 4x a40 | - tokens/s | - tokens/s |
|
|
161
|
+
|
|
162
|
+
|
|
151
163
|
## Vision Language Models
|
|
152
164
|
|
|
165
|
+
### [allenai: Molmo](https://huggingface.co/collections/allenai/molmo-66f379e6fe3b8ef090a8ca19)
|
|
166
|
+
|
|
167
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
168
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
169
|
+
| [`Molmo-7B-D-0924`](https://huggingface.co/allenai/Molmo-7B-D-0924) | 1x a40 | - tokens/s | - tokens/s |
|
|
170
|
+
|
|
171
|
+
|
|
153
172
|
### [LLaVa-1.5](https://huggingface.co/collections/llava-hf/llava-15-65f762d5b6941db5c2ba07e0)
|
|
154
173
|
|
|
155
174
|
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
@@ -169,6 +188,7 @@ More profiling metrics coming soon!
|
|
|
169
188
|
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
170
189
|
|:----------:|:----------:|:----------:|:----------:|
|
|
171
190
|
| [`Phi-3-vision-128k-instruct`](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct) | 2x a40 | - tokens/s | - tokens/s |
|
|
191
|
+
| [`Phi-3.5-vision-instruct`](https://huggingface.co/microsoft/Phi-3.5-vision-instruct) | 2x a40 | - tokens/s | - tokens/s |
|
|
172
192
|
|
|
173
193
|
### [Meta: Llama 3.2](https://huggingface.co/collections/meta-llama/llama-32-66f448ffc8c32f949b04c8cf)
|
|
174
194
|
|
|
@@ -187,6 +207,27 @@ More profiling metrics coming soon!
|
|
|
187
207
|
|:----------:|:----------:|:----------:|:----------:|
|
|
188
208
|
| [`Pixtral-12B-2409`](https://huggingface.co/mistralai/Pixtral-12B-2409) | 1x a40 | - tokens/s | - tokens/s |
|
|
189
209
|
|
|
210
|
+
### [OpenGVLab: InternVL2.5](https://huggingface.co/collections/OpenGVLab/internvl25-673e1019b66e2218f68d7c1c)
|
|
211
|
+
|
|
212
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
213
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
214
|
+
| [`InternVL2_5-8B`](https://huggingface.co/OpenGVLab/InternVL2_5-8B) | 1x a40 | - tokens/s | - tokens/s |
|
|
215
|
+
| [`InternVL2_5-26B`](https://huggingface.co/OpenGVLab/InternVL2_5-26B) | 2x a40 | - tokens/s | - tokens/s |
|
|
216
|
+
| [`InternVL2_5-38B`](https://huggingface.co/OpenGVLab/InternVL2_5-38B) | 4x a40 | - tokens/s | - tokens/s |
|
|
217
|
+
|
|
218
|
+
### [THUDM: GLM-4](https://huggingface.co/collections/THUDM/glm-4-665fcf188c414b03c2f7e3b7)
|
|
219
|
+
|
|
220
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
221
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
222
|
+
| [`glm-4v-9b`](https://huggingface.co/THUDM/glm-4v-9b) | 1x a40 | - tokens/s | - tokens/s |
|
|
223
|
+
|
|
224
|
+
### [DeepSeek: DeepSeek-VL2](https://huggingface.co/collections/deepseek-ai/deepseek-vl2-675c22accc456d3beb4613ab)
|
|
225
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
226
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
227
|
+
| [`deepseek-vl2`](https://huggingface.co/deepseek-ai/deepseek-vl2) | 2x a40 | - tokens/s | - tokens/s |
|
|
228
|
+
| [`deepseek-vl2-small`](https://huggingface.co/deepseek-ai/deepseek-vl2-small) | 1x a40 | - tokens/s | - tokens/s |
|
|
229
|
+
|
|
230
|
+
|
|
190
231
|
## Text Embedding Models
|
|
191
232
|
|
|
192
233
|
### [Liang Wang: e5](https://huggingface.co/intfloat)
|
|
@@ -194,6 +235,18 @@ More profiling metrics coming soon!
|
|
|
194
235
|
|:----------:|:----------:|:----------:|:----------:|
|
|
195
236
|
| [`e5-mistral-7b-instruct`](https://huggingface.co/intfloat/e5-mistral-7b-instruct) | 1x a40 | - tokens/s | - tokens/s |
|
|
196
237
|
|
|
238
|
+
### [BAAI: bge](https://huggingface.co/BAAI)
|
|
239
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
240
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
241
|
+
| [`bge-base-en-v1.5`](https://huggingface.co/BAAI/bge-base-en-v1.5) | 1x A40 | - tokens/s | - tokens/s |
|
|
242
|
+
|
|
243
|
+
### [Sentence Transformers: MiniLM](https://huggingface.co/sentence-transformers)
|
|
244
|
+
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
245
|
+
|:----------:|:----------:|:----------:|:----------:|
|
|
246
|
+
| [`all-MiniLM-L6-v2`](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | 1x A40 | - tokens/s | - tokens/s |
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
|
|
197
250
|
## Reward Modeling Models
|
|
198
251
|
|
|
199
252
|
### [Qwen: Qwen2.5-Math](https://huggingface.co/collections/Qwen/qwen25-math-66eaa240a1b7d5ee65f1da3e)
|
|
@@ -201,3 +254,4 @@ More profiling metrics coming soon!
|
|
|
201
254
|
| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
|
|
202
255
|
|:----------:|:----------:|:----------:|:----------:|
|
|
203
256
|
| [`Qwen2.5-Math-RM-72B`](https://huggingface.co/Qwen/Qwen2.5-Math-RM-72B) | 4x a40 | - tokens/s | - tokens/s |
|
|
257
|
+
| [`Qwen2.5-Math-PRM-7B`](https://huggingface.co/Qwen/Qwen2.5-Math-PRM-7B) | 1x a40 | - tokens/s | - tokens/s |
|