PyPI - vec-inf - Versions diffs - 0.4.0.post1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

vec-inf 0.4.0.post1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

vec_inf/__init__.py +1 -0
vec_inf/cli/__init__.py +1 -0
vec_inf/cli/_cli.py +88 -243
vec_inf/cli/_config.py +87 -0
vec_inf/cli/_helper.py +675 -0
vec_inf/cli/_utils.py +88 -89
vec_inf/{models → config}/README.md +54 -0
vec_inf/config/models.yaml +1274 -0
vec_inf/multinode_vllm.slurm +61 -29
vec_inf/vllm.slurm +55 -22
vec_inf-0.5.0.dist-info/METADATA +210 -0
vec_inf-0.5.0.dist-info/RECORD +17 -0
{vec_inf-0.4.0.post1.dist-info → vec_inf-0.5.0.dist-info}/WHEEL +1 -1
vec_inf-0.5.0.dist-info/entry_points.txt +2 -0
vec_inf/launch_server.sh +0 -126
vec_inf/models/models.csv +0 -73
vec_inf-0.4.0.post1.dist-info/METADATA +0 -120
vec_inf-0.4.0.post1.dist-info/RECORD +0 -16
vec_inf-0.4.0.post1.dist-info/entry_points.txt +0 -3
{vec_inf-0.4.0.post1.dist-info → vec_inf-0.5.0.dist-info/licenses}/LICENSE +0 -0

vec_inf/cli/_utils.py CHANGED Viewed

@@ -1,61 +1,79 @@
+"""Utility functions for the CLI."""
+import json
 import os
 import subprocess
-from typing import Optional, Union, cast
+from pathlib import Path
+from typing import Any, Optional, Union, cast
-import polars as pl
 import requests
+import yaml
 from rich.table import Table
+from vec_inf.cli._config import ModelConfig
 MODEL_READY_SIGNATURE = "INFO:     Application startup complete."
-SERVER_ADDRESS_SIGNATURE = "Server address: "
+CACHED_CONFIG = Path("/", "model-weights", "vec-inf-shared", "models.yaml")
-def run_bash_command(command: str) -> str:
-    """
-    Run a bash command and return the output
-    """
+def run_bash_command(command: str) -> tuple[str, str]:
+    """Run a bash command and return the output."""
     process = subprocess.Popen(
         command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
     )
-    stdout, _ = process.communicate()
-    return stdout
+    return process.communicate()
 def read_slurm_log(
-    slurm_job_name: str, slurm_job_id: int, slurm_log_type: str, log_dir: Optional[str]
-) -> Union[list[str], str]:
-    """
-    Read the slurm log file
-    """
+    slurm_job_name: str,
+    slurm_job_id: int,
+    slurm_log_type: str,
+    log_dir: Optional[Union[str, Path]],
+) -> Union[list[str], str, dict[str, str]]:
+    """Read the slurm log file."""
     if not log_dir:
-        models_dir = os.path.join(os.path.expanduser("~"), ".vec-inf-logs")
-        for dir in sorted(os.listdir(models_dir), key=len, reverse=True):
-            if dir in slurm_job_name:
-                log_dir = os.path.join(models_dir, dir)
+        # Default log directory
+        models_dir = Path.home() / ".vec-inf-logs"
+        if not models_dir.exists():
+            return "LOG DIR NOT FOUND"
+        # Iterate over all dirs in models_dir, sorted by dir name length in desc order
+        for directory in sorted(
+            [d for d in models_dir.iterdir() if d.is_dir()],
+            key=lambda d: len(d.name),
+            reverse=True,
+        ):
+            if directory.name in slurm_job_name:
+                log_dir = directory
                 break
+    else:
+        log_dir = Path(log_dir)
-    log_dir = cast(str, log_dir)
+    # If log_dir is still not set, then didn't find the log dir at default location
+    if not log_dir:
+        return "LOG DIR NOT FOUND"
     try:
-        file_path = os.path.join(
-            log_dir,
-            f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}",
+        file_path = (
+            log_dir
+            / Path(f"{slurm_job_name}.{slurm_job_id}")
+            / f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}"
         )
-        with open(file_path, "r") as file:
-            lines = file.readlines()
+        if slurm_log_type == "json":
+            with file_path.open("r") as file:
+                json_content: dict[str, str] = json.load(file)
+                return json_content
+        else:
+            with file_path.open("r") as file:
+                return file.readlines()
     except FileNotFoundError:
-        print(f"Could not find file: {file_path}")
-        return "LOG_FILE_NOT_FOUND"
-    return lines
+        return f"LOG FILE NOT FOUND: {file_path}"
 def is_server_running(
     slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
 ) -> Union[str, tuple[str, str]]:
-    """
-    Check if a model is ready to serve requests
-    """
+    """Check if a model is ready to serve requests."""
     log_content = read_slurm_log(slurm_job_name, slurm_job_id, "err", log_dir)
     if isinstance(log_content, str):
         return log_content
@@ -72,25 +90,19 @@ def is_server_running(
 def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
-    """
-    Get the base URL of a model
-    """
-    log_content = read_slurm_log(slurm_job_name, slurm_job_id, "out", log_dir)
+    """Get the base URL of a model."""
+    log_content = read_slurm_log(slurm_job_name, slurm_job_id, "json", log_dir)
     if isinstance(log_content, str):
         return log_content
-    for line in log_content:
-        if SERVER_ADDRESS_SIGNATURE in line:
-            return line.split(SERVER_ADDRESS_SIGNATURE)[1].strip("\n")
-    return "URL_NOT_FOUND"
+    server_addr = cast(dict[str, str], log_content).get("server_address")
+    return server_addr if server_addr else "URL NOT FOUND"
 def model_health_check(
     slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
-) -> Union[str, tuple[str, Union[str, int]]]:
-    """
-    Check the health of a running model on the cluster
-    """
+) -> tuple[str, Union[str, int]]:
+    """Check the health of a running model on the cluster."""
     base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
     if not base_url.startswith("http"):
         return ("FAILED", base_url)
@@ -100,9 +112,8 @@ def model_health_check(
         response = requests.get(health_check_url)
         # Check if the request was successful
         if response.status_code == 200:
-            return "READY"
-        else:
-            return ("FAILED", response.status_code)
+            return ("READY", response.status_code)
+        return ("FAILED", response.status_code)
     except requests.exceptions.RequestException as e:
         return ("FAILED", str(e))
@@ -110,54 +121,42 @@ def model_health_check(
 def create_table(
     key_title: str = "", value_title: str = "", show_header: bool = True
 ) -> Table:
-    """
-    Create a table for displaying model status
-    """
+    """Create a table for displaying model status."""
     table = Table(show_header=show_header, header_style="bold magenta")
     table.add_column(key_title, style="dim")
     table.add_column(value_title)
     return table
-def load_models_df() -> pl.DataFrame:
-    """
-    Load the models dataframe
-    """
-    models_df = pl.read_csv(
-        os.path.join(
-            os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
-            "models/models.csv",
-        )
+def load_config() -> list[ModelConfig]:
+    """Load the model configuration."""
+    default_path = (
+        CACHED_CONFIG
+        if CACHED_CONFIG.exists()
+        else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
     )
-    return models_df
-def load_default_args(models_df: pl.DataFrame, model_name: str) -> dict:
-    """
-    Load the default arguments for a model
-    """
-    row_data = models_df.filter(models_df["model_name"] == model_name)
-    default_args = row_data.to_dicts()[0]
-    default_args.pop("model_name", None)
-    default_args.pop("model_type", None)
-    return default_args
-def get_latest_metric(log_lines: list[str]) -> dict | str:
-    """Read the latest metric entry from the log file."""
-    latest_metric = {}
-    try:
-        for line in reversed(log_lines):
-            if "Avg prompt throughput" in line:
-                # Parse the metric values from the line
-                metrics_str = line.split("] ")[1].strip().strip(".")
-                metrics_list = metrics_str.split(", ")
-                for metric in metrics_list:
-                    key, value = metric.split(": ")
-                    latest_metric[key] = value
-                break
-    except Exception as e:
-        return f"[red]Error reading log file: {e}[/red]"
-    return latest_metric
+    config: dict[str, Any] = {}
+    with open(default_path) as f:
+        config = yaml.safe_load(f) or {}
+    user_path = os.getenv("VEC_INF_CONFIG")
+    if user_path:
+        user_path_obj = Path(user_path)
+        if user_path_obj.exists():
+            with open(user_path_obj) as f:
+                user_config = yaml.safe_load(f) or {}
+                for name, data in user_config.get("models", {}).items():
+                    if name in config.get("models", {}):
+                        config["models"][name].update(data)
+                    else:
+                        config.setdefault("models", {})[name] = data
+        else:
+            print(
+                f"WARNING: Could not find user config: {user_path}, revert to default config located at {default_path}"
+            )
+    return [
+        ModelConfig(model_name=name, **model_data)
+        for name, model_data in config.get("models", {}).items()
+    ]

vec_inf/{models → config}/README.md RENAMED Viewed

@@ -148,8 +148,27 @@ More profiling metrics coming soon!
 |:----------:|:----------:|:----------:|:----------:|
 | [`QwQ-32B-Preview`](https://huggingface.co/Qwen/QwQ-32B-Preview) | 2x a40 | - tokens/s | - tokens/s |
+### [DeepSeek-R1: Distilled Models](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`DeepSeek-R1-Distill-Llama-8B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | 1x a40 | - tokens/s | - tokens/s |
+| [`DeepSeek-R1-Distill-Llama-70B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) | 4x a40 | - tokens/s | - tokens/s |
+| [`DeepSeek-R1-Distill-Qwen-1.5B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | 1x a40 | - tokens/s | - tokens/s |
+| [`DeepSeek-R1-Distill-Qwen-7B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1x a40 | - tokens/s | - tokens/s |
+| [`DeepSeek-R1-Distill-Qwen-14B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | 2x a40 | - tokens/s | - tokens/s |
+| [`DeepSeek-R1-Distill-Qwen-32B`](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) | 4x a40 | - tokens/s | - tokens/s |
 ## Vision Language Models
+### [allenai: Molmo](https://huggingface.co/collections/allenai/molmo-66f379e6fe3b8ef090a8ca19)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`Molmo-7B-D-0924`](https://huggingface.co/allenai/Molmo-7B-D-0924) | 1x a40 | - tokens/s | - tokens/s |
 ### [LLaVa-1.5](https://huggingface.co/collections/llava-hf/llava-15-65f762d5b6941db5c2ba07e0)
 | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
@@ -169,6 +188,7 @@ More profiling metrics coming soon!
 | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
 |:----------:|:----------:|:----------:|:----------:|
 | [`Phi-3-vision-128k-instruct`](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct) | 2x a40 | - tokens/s | - tokens/s |
+| [`Phi-3.5-vision-instruct`](https://huggingface.co/microsoft/Phi-3.5-vision-instruct) | 2x a40 | - tokens/s | - tokens/s |
 ### [Meta: Llama 3.2](https://huggingface.co/collections/meta-llama/llama-32-66f448ffc8c32f949b04c8cf)
@@ -187,6 +207,27 @@ More profiling metrics coming soon!
 |:----------:|:----------:|:----------:|:----------:|
 | [`Pixtral-12B-2409`](https://huggingface.co/mistralai/Pixtral-12B-2409) | 1x a40 | - tokens/s | - tokens/s |
+### [OpenGVLab: InternVL2.5](https://huggingface.co/collections/OpenGVLab/internvl25-673e1019b66e2218f68d7c1c)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`InternVL2_5-8B`](https://huggingface.co/OpenGVLab/InternVL2_5-8B) | 1x a40 | - tokens/s | - tokens/s |
+| [`InternVL2_5-26B`](https://huggingface.co/OpenGVLab/InternVL2_5-26B) | 2x a40 | - tokens/s | - tokens/s |
+| [`InternVL2_5-38B`](https://huggingface.co/OpenGVLab/InternVL2_5-38B) | 4x a40 | - tokens/s | - tokens/s |
+### [THUDM: GLM-4](https://huggingface.co/collections/THUDM/glm-4-665fcf188c414b03c2f7e3b7)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`glm-4v-9b`](https://huggingface.co/THUDM/glm-4v-9b) | 1x a40 | - tokens/s | - tokens/s |
+### [DeepSeek: DeepSeek-VL2](https://huggingface.co/collections/deepseek-ai/deepseek-vl2-675c22accc456d3beb4613ab)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`deepseek-vl2`](https://huggingface.co/deepseek-ai/deepseek-vl2) | 2x a40 | - tokens/s | - tokens/s |
+| [`deepseek-vl2-small`](https://huggingface.co/deepseek-ai/deepseek-vl2-small) | 1x a40 | - tokens/s | - tokens/s |
 ## Text Embedding Models
 ### [Liang Wang: e5](https://huggingface.co/intfloat)
@@ -194,6 +235,18 @@ More profiling metrics coming soon!
 |:----------:|:----------:|:----------:|:----------:|
 | [`e5-mistral-7b-instruct`](https://huggingface.co/intfloat/e5-mistral-7b-instruct) | 1x a40 | - tokens/s | - tokens/s |
+### [BAAI: bge](https://huggingface.co/BAAI)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`bge-base-en-v1.5`](https://huggingface.co/BAAI/bge-base-en-v1.5) | 1x A40 | - tokens/s | - tokens/s |
+### [Sentence Transformers: MiniLM](https://huggingface.co/sentence-transformers)
+| Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
+|:----------:|:----------:|:----------:|:----------:|
+| [`all-MiniLM-L6-v2`](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | 1x A40 | - tokens/s | - tokens/s |
 ## Reward Modeling Models
 ### [Qwen: Qwen2.5-Math](https://huggingface.co/collections/Qwen/qwen25-math-66eaa240a1b7d5ee65f1da3e)
@@ -201,3 +254,4 @@ More profiling metrics coming soon!
 | Variant | Suggested resource allocation | Avg prompt throughput | Avg generation throughput |
 |:----------:|:----------:|:----------:|:----------:|
 | [`Qwen2.5-Math-RM-72B`](https://huggingface.co/Qwen/Qwen2.5-Math-RM-72B) | 4x a40 | - tokens/s | - tokens/s |
+| [`Qwen2.5-Math-PRM-7B`](https://huggingface.co/Qwen/Qwen2.5-Math-PRM-7B) | 1x a40 | - tokens/s | - tokens/s |

vec-inf 0.4.0.post1__py3-none-any.whl → 0.5.0__py3-none-any.whl

vec-inf 0.4.0.post1py3-none-any.whl → 0.5.0py3-none-any.whl