vec-inf 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vec_inf/cli/_utils.py CHANGED
@@ -1,52 +1,73 @@
1
1
  """Utility functions for the CLI."""
2
2
 
3
+ import json
3
4
  import os
4
5
  import subprocess
5
- from typing import Dict, List, Optional, Tuple, Union, cast
6
+ from pathlib import Path
7
+ from typing import Any, Optional, Union, cast
6
8
 
7
- import polars as pl
8
9
  import requests
10
+ import yaml
9
11
  from rich.table import Table
10
12
 
13
+ from vec_inf.cli._config import ModelConfig
14
+
11
15
 
12
16
  MODEL_READY_SIGNATURE = "INFO: Application startup complete."
13
- SERVER_ADDRESS_SIGNATURE = "Server address: "
17
+ CACHED_CONFIG = Path("/", "model-weights", "vec-inf-shared", "models.yaml")
14
18
 
15
19
 
16
- def run_bash_command(command: str) -> str:
20
+ def run_bash_command(command: str) -> tuple[str, str]:
17
21
  """Run a bash command and return the output."""
18
22
  process = subprocess.Popen(
19
23
  command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
20
24
  )
21
- stdout, _ = process.communicate()
22
- return stdout
25
+ return process.communicate()
23
26
 
24
27
 
25
28
  def read_slurm_log(
26
- slurm_job_name: str, slurm_job_id: int, slurm_log_type: str, log_dir: Optional[str]
27
- ) -> Union[list[str], str]:
29
+ slurm_job_name: str,
30
+ slurm_job_id: int,
31
+ slurm_log_type: str,
32
+ log_dir: Optional[Union[str, Path]],
33
+ ) -> Union[list[str], str, dict[str, str]]:
28
34
  """Read the slurm log file."""
29
35
  if not log_dir:
30
- models_dir = os.path.join(os.path.expanduser("~"), ".vec-inf-logs")
31
-
32
- for directory in sorted(os.listdir(models_dir), key=len, reverse=True):
33
- if directory in slurm_job_name:
34
- log_dir = os.path.join(models_dir, directory)
36
+ # Default log directory
37
+ models_dir = Path.home() / ".vec-inf-logs"
38
+ if not models_dir.exists():
39
+ return "LOG DIR NOT FOUND"
40
+ # Iterate over all dirs in models_dir, sorted by dir name length in desc order
41
+ for directory in sorted(
42
+ [d for d in models_dir.iterdir() if d.is_dir()],
43
+ key=lambda d: len(d.name),
44
+ reverse=True,
45
+ ):
46
+ if directory.name in slurm_job_name:
47
+ log_dir = directory
35
48
  break
49
+ else:
50
+ log_dir = Path(log_dir)
36
51
 
37
- log_dir = cast(str, log_dir)
52
+ # If log_dir is still not set, then didn't find the log dir at default location
53
+ if not log_dir:
54
+ return "LOG DIR NOT FOUND"
38
55
 
39
56
  try:
40
- file_path = os.path.join(
41
- log_dir,
42
- f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}",
57
+ file_path = (
58
+ log_dir
59
+ / Path(f"{slurm_job_name}.{slurm_job_id}")
60
+ / f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}"
43
61
  )
44
- with open(file_path, "r") as file:
45
- lines = file.readlines()
62
+ if slurm_log_type == "json":
63
+ with file_path.open("r") as file:
64
+ json_content: dict[str, str] = json.load(file)
65
+ return json_content
66
+ else:
67
+ with file_path.open("r") as file:
68
+ return file.readlines()
46
69
  except FileNotFoundError:
47
- print(f"Could not find file: {file_path}")
48
- return "LOG_FILE_NOT_FOUND"
49
- return lines
70
+ return f"LOG FILE NOT FOUND: {file_path}"
50
71
 
51
72
 
52
73
  def is_server_running(
@@ -70,19 +91,17 @@ def is_server_running(
70
91
 
71
92
  def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
72
93
  """Get the base URL of a model."""
73
- log_content = read_slurm_log(slurm_job_name, slurm_job_id, "out", log_dir)
94
+ log_content = read_slurm_log(slurm_job_name, slurm_job_id, "json", log_dir)
74
95
  if isinstance(log_content, str):
75
96
  return log_content
76
97
 
77
- for line in log_content:
78
- if SERVER_ADDRESS_SIGNATURE in line:
79
- return line.split(SERVER_ADDRESS_SIGNATURE)[1].strip("\n")
80
- return "URL_NOT_FOUND"
98
+ server_addr = cast(dict[str, str], log_content).get("server_address")
99
+ return server_addr if server_addr else "URL NOT FOUND"
81
100
 
82
101
 
83
102
  def model_health_check(
84
103
  slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
85
- ) -> Tuple[str, Union[str, int]]:
104
+ ) -> tuple[str, Union[str, int]]:
86
105
  """Check the health of a running model on the cluster."""
87
106
  base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
88
107
  if not base_url.startswith("http"):
@@ -109,39 +128,35 @@ def create_table(
109
128
  return table
110
129
 
111
130
 
112
- def load_models_df() -> pl.DataFrame:
113
- """Load the models dataframe."""
114
- return pl.read_csv(
115
- os.path.join(
116
- os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
117
- "models/models.csv",
118
- )
131
+ def load_config() -> list[ModelConfig]:
132
+ """Load the model configuration."""
133
+ default_path = (
134
+ CACHED_CONFIG
135
+ if CACHED_CONFIG.exists()
136
+ else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
119
137
  )
120
138
 
121
-
122
- def load_default_args(models_df: pl.DataFrame, model_name: str) -> Dict[str, str]:
123
- """Load the default arguments for a model."""
124
- row_data = models_df.filter(models_df["model_name"] == model_name)
125
- default_args = row_data.to_dicts()[0]
126
- default_args.pop("model_name", None)
127
- return default_args
128
-
129
-
130
- def get_latest_metric(log_lines: List[str]) -> Union[str, Dict[str, str]]:
131
- """Read the latest metric entry from the log file."""
132
- latest_metric = {}
133
-
134
- try:
135
- for line in reversed(log_lines):
136
- if "Avg prompt throughput" in line:
137
- # Parse the metric values from the line
138
- metrics_str = line.split("] ")[1].strip().strip(".")
139
- metrics_list = metrics_str.split(", ")
140
- for metric in metrics_list:
141
- key, value = metric.split(": ")
142
- latest_metric[key] = value
143
- break
144
- except Exception as e:
145
- return f"[red]Error reading log file: {e}[/red]"
146
-
147
- return latest_metric
139
+ config: dict[str, Any] = {}
140
+ with open(default_path) as f:
141
+ config = yaml.safe_load(f) or {}
142
+
143
+ user_path = os.getenv("VEC_INF_CONFIG")
144
+ if user_path:
145
+ user_path_obj = Path(user_path)
146
+ if user_path_obj.exists():
147
+ with open(user_path_obj) as f:
148
+ user_config = yaml.safe_load(f) or {}
149
+ for name, data in user_config.get("models", {}).items():
150
+ if name in config.get("models", {}):
151
+ config["models"][name].update(data)
152
+ else:
153
+ config.setdefault("models", {})[name] = data
154
+ else:
155
+ print(
156
+ f"WARNING: Could not find user config: {user_path}, revert to default config located at {default_path}"
157
+ )
158
+
159
+ return [
160
+ ModelConfig(model_name=name, **model_data)
161
+ for name, model_data in config.get("models", {}).items()
162
+ ]
@@ -162,6 +162,13 @@ More profiling metrics coming soon!
162
162
 
163
163
  ## Vision Language Models
164
164
 
165
+ ### [allenai: Molmo](https://huggingface.co/collections/allenai/molmo-66f379e6fe3b8ef090a8ca19)
166
+
167
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
168
+ |:----------:|:----------:|:----------:|:----------:|
169
+ | [`Molmo-7B-D-0924`](https://huggingface.co/allenai/Molmo-7B-D-0924) | 1x a40 | - tokens/s | - tokens/s |
170
+
171
+
165
172
  ### [LLaVa-1.5](https://huggingface.co/collections/llava-hf/llava-15-65f762d5b6941db5c2ba07e0)
166
173
 
167
174
  | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
@@ -181,6 +188,7 @@ More profiling metrics coming soon!
181
188
  | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
182
189
  |:----------:|:----------:|:----------:|:----------:|
183
190
  | [`Phi-3-vision-128k-instruct`](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct) | 2x a40 | - tokens/s | - tokens/s |
191
+ | [`Phi-3.5-vision-instruct`](https://huggingface.co/microsoft/Phi-3.5-vision-instruct) | 2x a40 | - tokens/s | - tokens/s |
184
192
 
185
193
  ### [Meta: Llama 3.2](https://huggingface.co/collections/meta-llama/llama-32-66f448ffc8c32f949b04c8cf)
186
194
 
@@ -199,6 +207,27 @@ More profiling metrics coming soon!
199
207
  |:----------:|:----------:|:----------:|:----------:|
200
208
  | [`Pixtral-12B-2409`](https://huggingface.co/mistralai/Pixtral-12B-2409) | 1x a40 | - tokens/s | - tokens/s |
201
209
 
210
+ ### [OpenGVLab: InternVL2.5](https://huggingface.co/collections/OpenGVLab/internvl25-673e1019b66e2218f68d7c1c)
211
+
212
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
213
+ |:----------:|:----------:|:----------:|:----------:|
214
+ | [`InternVL2_5-8B`](https://huggingface.co/OpenGVLab/InternVL2_5-8B) | 1x a40 | - tokens/s | - tokens/s |
215
+ | [`InternVL2_5-26B`](https://huggingface.co/OpenGVLab/InternVL2_5-26B) | 2x a40 | - tokens/s | - tokens/s |
216
+ | [`InternVL2_5-38B`](https://huggingface.co/OpenGVLab/InternVL2_5-38B) | 4x a40 | - tokens/s | - tokens/s |
217
+
218
+ ### [THUDM: GLM-4](https://huggingface.co/collections/THUDM/glm-4-665fcf188c414b03c2f7e3b7)
219
+
220
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
221
+ |:----------:|:----------:|:----------:|:----------:|
222
+ | [`glm-4v-9b`](https://huggingface.co/THUDM/glm-4v-9b) | 1x a40 | - tokens/s | - tokens/s |
223
+
224
+ ### [DeepSeek: DeepSeek-VL2](https://huggingface.co/collections/deepseek-ai/deepseek-vl2-675c22accc456d3beb4613ab)
225
+ | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
226
+ |:----------:|:----------:|:----------:|:----------:|
227
+ | [`deepseek-vl2`](https://huggingface.co/deepseek-ai/deepseek-vl2) | 2x a40 | - tokens/s | - tokens/s |
228
+ | [`deepseek-vl2-small`](https://huggingface.co/deepseek-ai/deepseek-vl2-small) | 1x a40 | - tokens/s | - tokens/s |
229
+
230
+
202
231
  ## Text Embedding Models
203
232
 
204
233
  ### [Liang Wang: e5](https://huggingface.co/intfloat)
@@ -225,3 +254,4 @@ More profiling metrics coming soon!
225
254
  | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
226
255
  |:----------:|:----------:|:----------:|:----------:|
227
256
  | [`Qwen2.5-Math-RM-72B`](https://huggingface.co/Qwen/Qwen2.5-Math-RM-72B) | 4x a40 | - tokens/s | - tokens/s |
257
+ | [`Qwen2.5-Math-PRM-7B`](https://huggingface.co/Qwen/Qwen2.5-Math-PRM-7B) | 1x a40 | - tokens/s | - tokens/s |