PyPI - vec-inf - Versions diffs - 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

vec-inf 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

vec_inf/cli/_cli.py +79 -287
vec_inf/cli/_config.py +87 -0
vec_inf/cli/_helper.py +675 -0
vec_inf/cli/_utils.py +77 -62
vec_inf/{models → config}/README.md +30 -0
vec_inf/config/models.yaml +1274 -0
vec_inf/multinode_vllm.slurm +61 -31
vec_inf/vllm.slurm +55 -24
vec_inf-0.5.0.dist-info/METADATA +210 -0
vec_inf-0.5.0.dist-info/RECORD +17 -0
vec_inf/launch_server.sh +0 -145
vec_inf/models/models.csv +0 -85
vec_inf-0.4.1.dist-info/METADATA +0 -121
vec_inf-0.4.1.dist-info/RECORD +0 -16
{vec_inf-0.4.1.dist-info → vec_inf-0.5.0.dist-info}/WHEEL +0 -0
{vec_inf-0.4.1.dist-info → vec_inf-0.5.0.dist-info}/entry_points.txt +0 -0
{vec_inf-0.4.1.dist-info → vec_inf-0.5.0.dist-info}/licenses/LICENSE +0 -0

vec_inf/cli/_utils.py CHANGED Viewed

@@ -1,52 +1,73 @@
 """Utility functions for the CLI."""
+import json
 import os
 import subprocess
-from typing import Dict, List, Optional, Tuple, Union, cast
+from pathlib import Path
+from typing import Any, Optional, Union, cast
-import polars as pl
 import requests
+import yaml
 from rich.table import Table
+from vec_inf.cli._config import ModelConfig
 MODEL_READY_SIGNATURE = "INFO:     Application startup complete."
-SERVER_ADDRESS_SIGNATURE = "Server address: "
+CACHED_CONFIG = Path("/", "model-weights", "vec-inf-shared", "models.yaml")
-def run_bash_command(command: str) -> str:
+def run_bash_command(command: str) -> tuple[str, str]:
     """Run a bash command and return the output."""
     process = subprocess.Popen(
         command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
     )
-    stdout, _ = process.communicate()
-    return stdout
+    return process.communicate()
 def read_slurm_log(
-    slurm_job_name: str, slurm_job_id: int, slurm_log_type: str, log_dir: Optional[str]
-) -> Union[list[str], str]:
+    slurm_job_name: str,
+    slurm_job_id: int,
+    slurm_log_type: str,
+    log_dir: Optional[Union[str, Path]],
+) -> Union[list[str], str, dict[str, str]]:
     """Read the slurm log file."""
     if not log_dir:
-        models_dir = os.path.join(os.path.expanduser("~"), ".vec-inf-logs")
-        for directory in sorted(os.listdir(models_dir), key=len, reverse=True):
-            if directory in slurm_job_name:
-                log_dir = os.path.join(models_dir, directory)
+        # Default log directory
+        models_dir = Path.home() / ".vec-inf-logs"
+        if not models_dir.exists():
+            return "LOG DIR NOT FOUND"
+        # Iterate over all dirs in models_dir, sorted by dir name length in desc order
+        for directory in sorted(
+            [d for d in models_dir.iterdir() if d.is_dir()],
+            key=lambda d: len(d.name),
+            reverse=True,
+        ):
+            if directory.name in slurm_job_name:
+                log_dir = directory
                 break
+    else:
+        log_dir = Path(log_dir)
-    log_dir = cast(str, log_dir)
+    # If log_dir is still not set, then didn't find the log dir at default location
+    if not log_dir:
+        return "LOG DIR NOT FOUND"
     try:
-        file_path = os.path.join(
-            log_dir,
-            f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}",
+        file_path = (
+            log_dir
+            / Path(f"{slurm_job_name}.{slurm_job_id}")
+            / f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}"
         )
-        with open(file_path, "r") as file:
-            lines = file.readlines()
+        if slurm_log_type == "json":
+            with file_path.open("r") as file:
+                json_content: dict[str, str] = json.load(file)
+                return json_content
+        else:
+            with file_path.open("r") as file:
+                return file.readlines()
     except FileNotFoundError:
-        print(f"Could not find file: {file_path}")
-        return "LOG_FILE_NOT_FOUND"
-    return lines
+        return f"LOG FILE NOT FOUND: {file_path}"
 def is_server_running(
@@ -70,19 +91,17 @@ def is_server_running(
 def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
     """Get the base URL of a model."""
-    log_content = read_slurm_log(slurm_job_name, slurm_job_id, "out", log_dir)
+    log_content = read_slurm_log(slurm_job_name, slurm_job_id, "json", log_dir)
     if isinstance(log_content, str):
         return log_content
-    for line in log_content:
-        if SERVER_ADDRESS_SIGNATURE in line:
-            return line.split(SERVER_ADDRESS_SIGNATURE)[1].strip("\n")
-    return "URL_NOT_FOUND"
+    server_addr = cast(dict[str, str], log_content).get("server_address")
+    return server_addr if server_addr else "URL NOT FOUND"
 def model_health_check(
     slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
-) -> Tuple[str, Union[str, int]]:
+) -> tuple[str, Union[str, int]]:
     """Check the health of a running model on the cluster."""
     base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
     if not base_url.startswith("http"):
@@ -109,39 +128,35 @@ def create_table(
     return table
-def load_models_df() -> pl.DataFrame:
-    """Load the models dataframe."""
-    return pl.read_csv(
-        os.path.join(
-            os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
-            "models/models.csv",
-        )
+def load_config() -> list[ModelConfig]:
+    """Load the model configuration."""
+    default_path = (
+        CACHED_CONFIG
+        if CACHED_CONFIG.exists()
+        else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
     )
-def load_default_args(models_df: pl.DataFrame, model_name: str) -> Dict[str, str]:
-    """Load the default arguments for a model."""
-    row_data = models_df.filter(models_df["model_name"] == model_name)
-    default_args = row_data.to_dicts()[0]
-    default_args.pop("model_name", None)
-    return default_args
-def get_latest_metric(log_lines: List[str]) -> Union[str, Dict[str, str]]:
-    """Read the latest metric entry from the log file."""
-    latest_metric = {}
-    try:
-        for line in reversed(log_lines):
-            if "Avg prompt throughput" in line:
-                # Parse the metric values from the line
-                metrics_str = line.split("] ")[1].strip().strip(".")
-                metrics_list = metrics_str.split(", ")
-                for metric in metrics_list:
-                    key, value = metric.split(": ")
-                    latest_metric[key] = value
-                break
-    except Exception as e:
-        return f"[red]Error reading log file: {e}[/red]"
-    return latest_metric
+    config: dict[str, Any] = {}
+    with open(default_path) as f:
+        config = yaml.safe_load(f) or {}
+    user_path = os.getenv("VEC_INF_CONFIG")
+    if user_path:
+        user_path_obj = Path(user_path)
+        if user_path_obj.exists():
+            with open(user_path_obj) as f:
+                user_config = yaml.safe_load(f) or {}
+                for name, data in user_config.get("models", {}).items():
+                    if name in config.get("models", {}):
+                        config["models"][name].update(data)
+                    else:
+                        config.setdefault("models", {})[name] = data
+        else:
+            print(
+                f"WARNING: Could not find user config: {user_path}, revert to default config located at {default_path}"
+            )
+    return [
+        ModelConfig(model_name=name, **model_data)
+        for name, model_data in config.get("models", {}).items()
+    ]

vec_inf/{models → config}/README.md RENAMED Viewed

@@ -162,6 +162,13 @@ More profiling metrics coming soon!
 ## Vision Language Models
+### [allenai: Molmo](https://huggingface.co/collections/allenai/molmo-66f379e6fe3b8ef090a8ca19)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`Molmo-7B-D-0924`](https://huggingface.co/allenai/Molmo-7B-D-0924) | 1x a40 | - tokens/s | - tokens/s |
 ### [LLaVa-1.5](https://huggingface.co/collections/llava-hf/llava-15-65f762d5b6941db5c2ba07e0)
 | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
@@ -181,6 +188,7 @@ More profiling metrics coming soon!
 | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
 |:----------:|:----------:|:----------:|:----------:|
 | [`Phi-3-vision-128k-instruct`](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct) | 2x a40 | - tokens/s | - tokens/s |
+| [`Phi-3.5-vision-instruct`](https://huggingface.co/microsoft/Phi-3.5-vision-instruct) | 2x a40 | - tokens/s | - tokens/s |
 ### [Meta: Llama 3.2](https://huggingface.co/collections/meta-llama/llama-32-66f448ffc8c32f949b04c8cf)
@@ -199,6 +207,27 @@ More profiling metrics coming soon!
 |:----------:|:----------:|:----------:|:----------:|
 | [`Pixtral-12B-2409`](https://huggingface.co/mistralai/Pixtral-12B-2409) | 1x a40 | - tokens/s | - tokens/s |
+### [OpenGVLab: InternVL2.5](https://huggingface.co/collections/OpenGVLab/internvl25-673e1019b66e2218f68d7c1c)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`InternVL2_5-8B`](https://huggingface.co/OpenGVLab/InternVL2_5-8B) | 1x a40 | - tokens/s | - tokens/s |
+| [`InternVL2_5-26B`](https://huggingface.co/OpenGVLab/InternVL2_5-26B) | 2x a40 | - tokens/s | - tokens/s |
+| [`InternVL2_5-38B`](https://huggingface.co/OpenGVLab/InternVL2_5-38B) | 4x a40 | - tokens/s | - tokens/s |
+### [THUDM: GLM-4](https://huggingface.co/collections/THUDM/glm-4-665fcf188c414b03c2f7e3b7)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`glm-4v-9b`](https://huggingface.co/THUDM/glm-4v-9b) | 1x a40 | - tokens/s | - tokens/s |
+### [DeepSeek: DeepSeek-VL2](https://huggingface.co/collections/deepseek-ai/deepseek-vl2-675c22accc456d3beb4613ab)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`deepseek-vl2`](https://huggingface.co/deepseek-ai/deepseek-vl2) | 2x a40 | - tokens/s | - tokens/s |
+| [`deepseek-vl2-small`](https://huggingface.co/deepseek-ai/deepseek-vl2-small) | 1x a40 | - tokens/s | - tokens/s |
 ## Text Embedding Models
 ### [Liang Wang: e5](https://huggingface.co/intfloat)
@@ -225,3 +254,4 @@ More profiling metrics coming soon!
 | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
 |:----------:|:----------:|:----------:|:----------:|
 | [`Qwen2.5-Math-RM-72B`](https://huggingface.co/Qwen/Qwen2.5-Math-RM-72B) | 4x a40 | - tokens/s | - tokens/s |
+| [`Qwen2.5-Math-PRM-7B`](https://huggingface.co/Qwen/Qwen2.5-Math-PRM-7B) | 1x a40 | - tokens/s | - tokens/s |

vec-inf 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

vec-inf 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl