vec-inf 0.4.0.post1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vec_inf/cli/_utils.py CHANGED
@@ -1,61 +1,79 @@
1
+ """Utility functions for the CLI."""
2
+
3
+ import json
1
4
  import os
2
5
  import subprocess
3
- from typing import Optional, Union, cast
6
+ from pathlib import Path
7
+ from typing import Any, Optional, Union, cast
4
8
 
5
- import polars as pl
6
9
  import requests
10
+ import yaml
7
11
  from rich.table import Table
8
12
 
13
+ from vec_inf.cli._config import ModelConfig
14
+
15
+
9
16
  MODEL_READY_SIGNATURE = "INFO: Application startup complete."
10
- SERVER_ADDRESS_SIGNATURE = "Server address: "
17
+ CACHED_CONFIG = Path("/", "model-weights", "vec-inf-shared", "models.yaml")
11
18
 
12
19
 
13
- def run_bash_command(command: str) -> str:
14
- """
15
- Run a bash command and return the output
16
- """
20
+ def run_bash_command(command: str) -> tuple[str, str]:
21
+ """Run a bash command and return the output."""
17
22
  process = subprocess.Popen(
18
23
  command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
19
24
  )
20
- stdout, _ = process.communicate()
21
- return stdout
25
+ return process.communicate()
22
26
 
23
27
 
24
28
  def read_slurm_log(
25
- slurm_job_name: str, slurm_job_id: int, slurm_log_type: str, log_dir: Optional[str]
26
- ) -> Union[list[str], str]:
27
- """
28
- Read the slurm log file
29
- """
29
+ slurm_job_name: str,
30
+ slurm_job_id: int,
31
+ slurm_log_type: str,
32
+ log_dir: Optional[Union[str, Path]],
33
+ ) -> Union[list[str], str, dict[str, str]]:
34
+ """Read the slurm log file."""
30
35
  if not log_dir:
31
- models_dir = os.path.join(os.path.expanduser("~"), ".vec-inf-logs")
32
-
33
- for dir in sorted(os.listdir(models_dir), key=len, reverse=True):
34
- if dir in slurm_job_name:
35
- log_dir = os.path.join(models_dir, dir)
36
+ # Default log directory
37
+ models_dir = Path.home() / ".vec-inf-logs"
38
+ if not models_dir.exists():
39
+ return "LOG DIR NOT FOUND"
40
+ # Iterate over all dirs in models_dir, sorted by dir name length in desc order
41
+ for directory in sorted(
42
+ [d for d in models_dir.iterdir() if d.is_dir()],
43
+ key=lambda d: len(d.name),
44
+ reverse=True,
45
+ ):
46
+ if directory.name in slurm_job_name:
47
+ log_dir = directory
36
48
  break
49
+ else:
50
+ log_dir = Path(log_dir)
37
51
 
38
- log_dir = cast(str, log_dir)
52
+ # If log_dir is still not set, then didn't find the log dir at default location
53
+ if not log_dir:
54
+ return "LOG DIR NOT FOUND"
39
55
 
40
56
  try:
41
- file_path = os.path.join(
42
- log_dir,
43
- f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}",
57
+ file_path = (
58
+ log_dir
59
+ / Path(f"{slurm_job_name}.{slurm_job_id}")
60
+ / f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}"
44
61
  )
45
- with open(file_path, "r") as file:
46
- lines = file.readlines()
62
+ if slurm_log_type == "json":
63
+ with file_path.open("r") as file:
64
+ json_content: dict[str, str] = json.load(file)
65
+ return json_content
66
+ else:
67
+ with file_path.open("r") as file:
68
+ return file.readlines()
47
69
  except FileNotFoundError:
48
- print(f"Could not find file: {file_path}")
49
- return "LOG_FILE_NOT_FOUND"
50
- return lines
70
+ return f"LOG FILE NOT FOUND: {file_path}"
51
71
 
52
72
 
53
73
  def is_server_running(
54
74
  slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
55
75
  ) -> Union[str, tuple[str, str]]:
56
- """
57
- Check if a model is ready to serve requests
58
- """
76
+ """Check if a model is ready to serve requests."""
59
77
  log_content = read_slurm_log(slurm_job_name, slurm_job_id, "err", log_dir)
60
78
  if isinstance(log_content, str):
61
79
  return log_content
@@ -72,25 +90,19 @@ def is_server_running(
72
90
 
73
91
 
74
92
  def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
75
- """
76
- Get the base URL of a model
77
- """
78
- log_content = read_slurm_log(slurm_job_name, slurm_job_id, "out", log_dir)
93
+ """Get the base URL of a model."""
94
+ log_content = read_slurm_log(slurm_job_name, slurm_job_id, "json", log_dir)
79
95
  if isinstance(log_content, str):
80
96
  return log_content
81
97
 
82
- for line in log_content:
83
- if SERVER_ADDRESS_SIGNATURE in line:
84
- return line.split(SERVER_ADDRESS_SIGNATURE)[1].strip("\n")
85
- return "URL_NOT_FOUND"
98
+ server_addr = cast(dict[str, str], log_content).get("server_address")
99
+ return server_addr if server_addr else "URL NOT FOUND"
86
100
 
87
101
 
88
102
  def model_health_check(
89
103
  slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
90
- ) -> Union[str, tuple[str, Union[str, int]]]:
91
- """
92
- Check the health of a running model on the cluster
93
- """
104
+ ) -> tuple[str, Union[str, int]]:
105
+ """Check the health of a running model on the cluster."""
94
106
  base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
95
107
  if not base_url.startswith("http"):
96
108
  return ("FAILED", base_url)
@@ -100,9 +112,8 @@ def model_health_check(
100
112
  response = requests.get(health_check_url)
101
113
  # Check if the request was successful
102
114
  if response.status_code == 200:
103
- return "READY"
104
- else:
105
- return ("FAILED", response.status_code)
115
+ return ("READY", response.status_code)
116
+ return ("FAILED", response.status_code)
106
117
  except requests.exceptions.RequestException as e:
107
118
  return ("FAILED", str(e))
108
119
 
@@ -110,54 +121,42 @@ def model_health_check(
110
121
  def create_table(
111
122
  key_title: str = "", value_title: str = "", show_header: bool = True
112
123
  ) -> Table:
113
- """
114
- Create a table for displaying model status
115
- """
124
+ """Create a table for displaying model status."""
116
125
  table = Table(show_header=show_header, header_style="bold magenta")
117
126
  table.add_column(key_title, style="dim")
118
127
  table.add_column(value_title)
119
128
  return table
120
129
 
121
130
 
122
- def load_models_df() -> pl.DataFrame:
123
- """
124
- Load the models dataframe
125
- """
126
- models_df = pl.read_csv(
127
- os.path.join(
128
- os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
129
- "models/models.csv",
130
- )
131
+ def load_config() -> list[ModelConfig]:
132
+ """Load the model configuration."""
133
+ default_path = (
134
+ CACHED_CONFIG
135
+ if CACHED_CONFIG.exists()
136
+ else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
131
137
  )
132
- return models_df
133
138
 
134
-
135
- def load_default_args(models_df: pl.DataFrame, model_name: str) -> dict:
136
- """
137
- Load the default arguments for a model
138
- """
139
- row_data = models_df.filter(models_df["model_name"] == model_name)
140
- default_args = row_data.to_dicts()[0]
141
- default_args.pop("model_name", None)
142
- default_args.pop("model_type", None)
143
- return default_args
144
-
145
-
146
- def get_latest_metric(log_lines: list[str]) -> dict | str:
147
- """Read the latest metric entry from the log file."""
148
- latest_metric = {}
149
-
150
- try:
151
- for line in reversed(log_lines):
152
- if "Avg prompt throughput" in line:
153
- # Parse the metric values from the line
154
- metrics_str = line.split("] ")[1].strip().strip(".")
155
- metrics_list = metrics_str.split(", ")
156
- for metric in metrics_list:
157
- key, value = metric.split(": ")
158
- latest_metric[key] = value
159
- break
160
- except Exception as e:
161
- return f"[red]Error reading log file: {e}[/red]"
162
-
163
- return latest_metric
139
+ config: dict[str, Any] = {}
140
+ with open(default_path) as f:
141
+ config = yaml.safe_load(f) or {}
142
+
143
+ user_path = os.getenv("VEC_INF_CONFIG")
144
+ if user_path:
145
+ user_path_obj = Path(user_path)
146
+ if user_path_obj.exists():
147
+ with open(user_path_obj) as f:
148
+ user_config = yaml.safe_load(f) or {}
149
+ for name, data in user_config.get("models", {}).items():
150
+ if name in config.get("models", {}):
151
+ config["models"][name].update(data)
152
+ else:
153
+ config.setdefault("models", {})[name] = data
154
+ else:
155
+ print(
156
+ f"WARNING: Could not find user config: {user_path}, revert to default config located at {default_path}"
157
+ )
158
+
159
+ return [
160
+ ModelConfig(model_name=name, **model_data)
161
+ for name, model_data in config.get("models", {}).items()
162
+ ]
@@ -148,8 +148,27 @@ More profiling metrics coming soon!
148
148
  |:----------:|:----------:|:----------:|:----------:|
149
149
  | [`QwQ-32B-Preview`](https://huggingface.co/Qwen/QwQ-32B-Preview) | 2x a40 | - tokens/s | - tokens/s |
150
150
 
151
+ ### [DeepSeek-R1: Distilled Models](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d)
152
+
153
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
154
+ |:----------:|:----------:|:----------:|:----------:|
155
+ | [`DeepSeek-R1-Distill-Llama-8B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | 1x a40 | - tokens/s | - tokens/s |
156
+ | [`DeepSeek-R1-Distill-Llama-70B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) | 4x a40 | - tokens/s | - tokens/s |
157
+ | [`DeepSeek-R1-Distill-Qwen-1.5B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | 1x a40 | - tokens/s | - tokens/s |
158
+ | [`DeepSeek-R1-Distill-Qwen-7B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1x a40 | - tokens/s | - tokens/s |
159
+ | [`DeepSeek-R1-Distill-Qwen-14B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | 2x a40 | - tokens/s | - tokens/s |
160
+ | [`DeepSeek-R1-Distill-Qwen-32B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) | 4x a40 | - tokens/s | - tokens/s |
161
+
162
+
151
163
  ## Vision Language Models
152
164
 
165
+ ### [allenai: Molmo](https://huggingface.co/collections/allenai/molmo-66f379e6fe3b8ef090a8ca19)
166
+
167
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
168
+ |:----------:|:----------:|:----------:|:----------:|
169
+ | [`Molmo-7B-D-0924`](https://huggingface.co/allenai/Molmo-7B-D-0924) | 1x a40 | - tokens/s | - tokens/s |
170
+
171
+
153
172
  ### [LLaVa-1.5](https://huggingface.co/collections/llava-hf/llava-15-65f762d5b6941db5c2ba07e0)
154
173
 
155
174
  | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
@@ -169,6 +188,7 @@ More profiling metrics coming soon!
169
188
  | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
170
189
  |:----------:|:----------:|:----------:|:----------:|
171
190
  | [`Phi-3-vision-128k-instruct`](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct) | 2x a40 | - tokens/s | - tokens/s |
191
+ | [`Phi-3.5-vision-instruct`](https://huggingface.co/microsoft/Phi-3.5-vision-instruct) | 2x a40 | - tokens/s | - tokens/s |
172
192
 
173
193
  ### [Meta: Llama 3.2](https://huggingface.co/collections/meta-llama/llama-32-66f448ffc8c32f949b04c8cf)
174
194
 
@@ -187,6 +207,27 @@ More profiling metrics coming soon!
187
207
  |:----------:|:----------:|:----------:|:----------:|
188
208
  | [`Pixtral-12B-2409`](https://huggingface.co/mistralai/Pixtral-12B-2409) | 1x a40 | - tokens/s | - tokens/s |
189
209
 
210
+ ### [OpenGVLab: InternVL2.5](https://huggingface.co/collections/OpenGVLab/internvl25-673e1019b66e2218f68d7c1c)
211
+
212
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
213
+ |:----------:|:----------:|:----------:|:----------:|
214
+ | [`InternVL2_5-8B`](https://huggingface.co/OpenGVLab/InternVL2_5-8B) | 1x a40 | - tokens/s | - tokens/s |
215
+ | [`InternVL2_5-26B`](https://huggingface.co/OpenGVLab/InternVL2_5-26B) | 2x a40 | - tokens/s | - tokens/s |
216
+ | [`InternVL2_5-38B`](https://huggingface.co/OpenGVLab/InternVL2_5-38B) | 4x a40 | - tokens/s | - tokens/s |
217
+
218
+ ### [THUDM: GLM-4](https://huggingface.co/collections/THUDM/glm-4-665fcf188c414b03c2f7e3b7)
219
+
220
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
221
+ |:----------:|:----------:|:----------:|:----------:|
222
+ | [`glm-4v-9b`](https://huggingface.co/THUDM/glm-4v-9b) | 1x a40 | - tokens/s | - tokens/s |
223
+
224
+ ### [DeepSeek: DeepSeek-VL2](https://huggingface.co/collections/deepseek-ai/deepseek-vl2-675c22accc456d3beb4613ab)
225
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
226
+ |:----------:|:----------:|:----------:|:----------:|
227
+ | [`deepseek-vl2`](https://huggingface.co/deepseek-ai/deepseek-vl2) | 2x a40 | - tokens/s | - tokens/s |
228
+ | [`deepseek-vl2-small`](https://huggingface.co/deepseek-ai/deepseek-vl2-small) | 1x a40 | - tokens/s | - tokens/s |
229
+
230
+
190
231
  ## Text Embedding Models
191
232
 
192
233
  ### [Liang Wang: e5](https://huggingface.co/intfloat)
@@ -194,6 +235,18 @@ More profiling metrics coming soon!
194
235
  |:----------:|:----------:|:----------:|:----------:|
195
236
  | [`e5-mistral-7b-instruct`](https://huggingface.co/intfloat/e5-mistral-7b-instruct) | 1x a40 | - tokens/s | - tokens/s |
196
237
 
238
+ ### [BAAI: bge](https://huggingface.co/BAAI)
239
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
240
+ |:----------:|:----------:|:----------:|:----------:|
241
+ | [`bge-base-en-v1.5`](https://huggingface.co/BAAI/bge-base-en-v1.5) | 1x A40 | - tokens/s | - tokens/s |
242
+
243
+ ### [Sentence Transformers: MiniLM](https://huggingface.co/sentence-transformers)
244
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
245
+ |:----------:|:----------:|:----------:|:----------:|
246
+ | [`all-MiniLM-L6-v2`](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | 1x A40 | - tokens/s | - tokens/s |
247
+
248
+
249
+
197
250
  ## Reward Modeling Models
198
251
 
199
252
  ### [Qwen: Qwen2.5-Math](https://huggingface.co/collections/Qwen/qwen25-math-66eaa240a1b7d5ee65f1da3e)
@@ -201,3 +254,4 @@ More profiling metrics coming soon!
201
254
  | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
202
255
  |:----------:|:----------:|:----------:|:----------:|
203
256
  | [`Qwen2.5-Math-RM-72B`](https://huggingface.co/Qwen/Qwen2.5-Math-RM-72B) | 4x a40 | - tokens/s | - tokens/s |
257
+ | [`Qwen2.5-Math-PRM-7B`](https://huggingface.co/Qwen/Qwen2.5-Math-PRM-7B) | 1x a40 | - tokens/s | - tokens/s |