PyPI - vec-inf - Versions diffs - 0.7.2__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

vec-inf 0.7.2py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

vec_inf/README.md +2 -1
vec_inf/cli/_cli.py +43 -12
vec_inf/cli/_helper.py +79 -12
vec_inf/cli/_vars.py +37 -22
vec_inf/client/_client_vars.py +31 -1
vec_inf/client/_helper.py +154 -49
vec_inf/client/_slurm_script_generator.py +109 -43
vec_inf/client/_slurm_templates.py +110 -48
vec_inf/client/_slurm_vars.py +13 -4
vec_inf/client/_utils.py +13 -7
vec_inf/client/api.py +47 -0
vec_inf/client/config.py +17 -7
vec_inf/client/models.py +25 -19
vec_inf/config/README.md +1 -1
vec_inf/config/environment.yaml +9 -2
vec_inf/config/models.yaml +184 -368
vec_inf/find_port.sh +10 -1
{vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/METADATA +17 -16
vec_inf-0.8.0.dist-info/RECORD +27 -0
{vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/WHEEL +1 -1
vec_inf-0.7.2.dist-info/RECORD +0 -27
{vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/entry_points.txt +0 -0
{vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/licenses/LICENSE +0 -0

vec_inf/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 * `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server.
 * `batch-launch`: Specify a list of models to launch multiple OpenAI compatible inference servers at the same time.
-* `status`: Check the model status by providing its Slurm job ID.
+* `status`: Check the status of all `vec-inf` jobs, or a specific job by providing its job ID.
 * `metrics`: Streams performance metrics to the console.
 * `shutdown`: Shutdown a model by providing its Slurm job ID.
 * `list`: List all available model names, or view the default/cached configuration of a specific model.
@@ -14,6 +14,7 @@ Use `--help` to see all available options
 * `launch_model`: Launch an OpenAI compatible inference server.
 * `batch_launch_models`: Launch multiple OpenAI compatible inference servers.
+* `fetch_running_jobs`: Get the running `vec-inf` job IDs.
 * `get_status`: Get the status of a running model.
 * `get_metrics`: Get the performance metrics of a running model.
 * `shutdown_model`: Shutdown a running model.

vec_inf/cli/_cli.py CHANGED Viewed

@@ -30,6 +30,7 @@ from vec_inf.cli._helper import (
     BatchLaunchResponseFormatter,
     LaunchResponseFormatter,
     ListCmdDisplay,
+    ListStatusDisplay,
     MetricsResponseFormatter,
     StatusResponseFormatter,
 )
@@ -131,10 +132,20 @@ def cli() -> None:
     type=str,
     help="Path to parent directory containing model weights",
 )
+@click.option(
+    "--engine",
+    type=str,
+    help="Inference engine to use, supports 'vllm' and 'sglang'",
+)
 @click.option(
     "--vllm-args",
     type=str,
-    help="vLLM engine arguments to be set, use the format as specified in vLLM documentation and separate arguments with commas, e.g. --vllm-args '--max-model-len=8192,--max-num-seqs=256,--enable-prefix-caching'",
+    help="vLLM engine arguments to be set, use the format as specified in vLLM serve documentation and separate arguments with commas, e.g. --vllm-args '--max-model-len=8192,--max-num-seqs=256,--enable-prefix-caching'",
+)
+@click.option(
+    "--sglang-args",
+    type=str,
+    help="SGLang engine arguments to be set, use the format as specified in SGLang Server Arguments documentation and separate arguments with commas, e.g. --sglang-args '--context-length=8192,--mem-fraction-static=0.85'",
 )
 @click.option(
     "--json-mode",
@@ -149,7 +160,7 @@ def cli() -> None:
 @click.option(
     "--config",
     type=str,
-    help="Path to a model config yaml file to use in place of the default",
+    help="Path to a model config yaml file to use in place of the default, you can also set VEC_INF_MODEL_CONFIG to the path to the model config file",
 )
 def launch(
     model_name: str,
@@ -200,7 +211,9 @@ def launch(
         - model_weights_parent_dir : str, optional
             Path to model weights directory
         - vllm_args : str, optional
-            vLLM engine arguments
+            vllm engine arguments
+        - sglang_args : str, optional
+            sglang engine arguments
         - env : str, optional
             Environment variables
         - config : str, optional
@@ -228,6 +241,10 @@ def launch(
         if json_mode:
             click.echo(json.dumps(launch_response.config))
         else:
+            if launch_response.config.get("engine_inferred"):
+                CONSOLE.print(
+                    "Warning: Inference engine inferred from engine-specific args"
+                )
             launch_formatter = LaunchResponseFormatter(
                 model_name, launch_response.config
             )
@@ -313,14 +330,14 @@ def batch_launch(
         raise click.ClickException(f"Batch launch failed: {str(e)}") from e
-@cli.command("status", help="Check the status of a running model on the cluster.")
-@click.argument("slurm_job_id", type=str, nargs=1)
+@cli.command("status", help="Check the status of running vec-inf jobs on the cluster.")
+@click.argument("slurm_job_id", required=False)
 @click.option(
     "--json-mode",
     is_flag=True,
     help="Output in JSON string",
 )
-def status(slurm_job_id: str, json_mode: bool = False) -> None:
+def status(slurm_job_id: Optional[str] = None, json_mode: bool = False) -> None:
     """Get the status of a running model on the cluster.
     Parameters
@@ -338,14 +355,28 @@ def status(slurm_job_id: str, json_mode: bool = False) -> None:
     try:
         # Start the client and get model inference server status
         client = VecInfClient()
-        status_response = client.get_status(slurm_job_id)
+        if not slurm_job_id:
+            slurm_job_ids = client.fetch_running_jobs()
+            if not slurm_job_ids:
+                click.echo("No running jobs found.")
+                return
+        else:
+            slurm_job_ids = [slurm_job_id]
+        responses = []
+        for job_id in slurm_job_ids:
+            responses.append(client.get_status(job_id))
         # Display status information
-        status_formatter = StatusResponseFormatter(status_response)
-        if json_mode:
-            status_formatter.output_json()
+        if slurm_job_id:
+            status_formatter = StatusResponseFormatter(responses[0])
+            if json_mode:
+                status_formatter.output_json()
+            else:
+                status_info_table = status_formatter.output_table()
+                CONSOLE.print(status_info_table)
         else:
-            status_info_table = status_formatter.output_table()
-            CONSOLE.print(status_info_table)
+            list_status_display = ListStatusDisplay(slurm_job_ids, responses, json_mode)
+            list_status_display.display_multiple_status_output(CONSOLE)
     except click.ClickException as e:
         raise e

vec_inf/cli/_helper.py CHANGED Viewed

@@ -15,7 +15,7 @@ from rich.panel import Panel
 from rich.table import Table
 from vec_inf.cli._utils import create_table
-from vec_inf.cli._vars import MODEL_TYPE_COLORS, MODEL_TYPE_PRIORITY
+from vec_inf.cli._vars import ENGINE_NAME_MAP, MODEL_TYPE_COLORS, MODEL_TYPE_PRIORITY
 from vec_inf.client import ModelConfig, ModelInfo, StatusResponse
@@ -49,11 +49,12 @@ class LaunchResponseFormatter:
             if self.params.get(key):
                 table.add_row(label, self.params[key])
-    def _add_vllm_config(self, table: Table) -> None:
-        """Add vLLM configuration details to the table."""
-        if self.params.get("vllm_args"):
-            table.add_row("vLLM Arguments:", style="magenta")
-            for arg, value in self.params["vllm_args"].items():
+    def _add_engine_config(self, table: Table) -> None:
+        """Add inference engine configuration details to the table."""
+        if self.params.get("engine_args"):
+            engine_name = ENGINE_NAME_MAP[self.params["engine"]]
+            table.add_row(f"{engine_name} Arguments:", style="magenta")
+            for arg, value in self.params["engine_args"].items():
                 table.add_row(f"  {arg}:", str(value))
     def _add_env_vars(self, table: Table) -> None:
@@ -111,9 +112,10 @@ class LaunchResponseFormatter:
             str(Path(self.params["model_weights_parent_dir"], self.model_name)),
         )
         table.add_row("Log Directory", self.params["log_dir"])
+        table.add_row("Inference Engine", ENGINE_NAME_MAP[self.params["engine"]])
         # Add configuration details
-        self._add_vllm_config(table)
+        self._add_engine_config(table)
         self._add_env_vars(table)
         self._add_bind_paths(table)
@@ -185,6 +187,10 @@ class BatchLaunchResponseFormatter:
             table.add_row(
                 "Memory/Node", f"  {self.params['models'][model_name]['mem_per_node']}"
             )
+            table.add_row(
+                "Inference Engine",
+                f"  {ENGINE_NAME_MAP[self.params['models'][model_name]['engine']]}",
+            )
         return table
@@ -251,6 +257,62 @@ class StatusResponseFormatter:
         return table
+class ListStatusDisplay:
+    """CLI Helper class for formatting a list of StatusResponse.
+    A formatter class that handles the presentation of multiple job statuses
+    in a table format.
+    Parameters
+    ----------
+    statuses : list[StatusResponse]
+        List of model status information
+    """
+    def __init__(
+        self,
+        job_ids: list[str],
+        statuses: list[StatusResponse],
+        json_mode: bool = False,
+    ):
+        self.job_ids = job_ids
+        self.statuses = statuses
+        self.json_mode = json_mode
+        self.table = Table(show_header=True, header_style="bold magenta")
+        self.table.add_column("Job ID")
+        self.table.add_column("Model Name")
+        self.table.add_column("Status", style="blue")
+        self.table.add_column("Base URL")
+    def display_multiple_status_output(self, console: Console) -> None:
+        """Format and display all model statuses.
+        Formats each model's status and adds it to the table.
+        """
+        if self.json_mode:
+            json_data = [
+                {
+                    "job_id": status.model_name,
+                    "model_name": status.model_name,
+                    "model_status": status.server_status,
+                    "base_url": status.base_url,
+                }
+                for status in self.statuses
+            ]
+            click.echo(json.dumps(json_data, indent=4))
+            return
+        for i, status in enumerate(self.statuses):
+            self.table.add_row(
+                self.job_ids[i],
+                status.model_name,
+                status.server_status,
+                status.base_url,
+            )
+        console.print(self.table)
 class MetricsResponseFormatter:
     """CLI Helper class for formatting MetricsResponse.
@@ -423,14 +485,19 @@ class ListCmdDisplay:
             )
             return json.dumps(config_dict, indent=4)
+        excluded_list = ["venv", "log_dir"]
         table = create_table(key_title="Model Config", value_title="Value")
         for field, value in config.model_dump().items():
-            if field not in {"venv", "log_dir", "vllm_args"} and value:
+            if "args" in field:
+                if not value:
+                    continue
+                engine_name = ENGINE_NAME_MAP[field.split("_")[0]]
+                table.add_row(f"{engine_name} Arguments:", style="magenta")
+                for engine_arg, engine_value in value.items():
+                    table.add_row(f"  {engine_arg}:", str(engine_value))
+            elif field not in excluded_list and value:
                 table.add_row(field, str(value))
-            if field == "vllm_args":
-                table.add_row("vLLM Arguments:", style="magenta")
-                for vllm_arg, vllm_value in value.items():
-                    table.add_row(f"  {vllm_arg}:", str(vllm_value))
         return table
     def _format_all_models_output(

vec_inf/cli/_vars.py CHANGED Viewed

@@ -1,32 +1,47 @@
 """Constants for CLI rendering.
-This module defines constant mappings for model type priorities and colors
+This module defines mappings for model type priorities, colors, and engine name mappings
 used in the CLI display formatting.
+"""
-Constants
----------
-MODEL_TYPE_PRIORITY : dict
-    Mapping of model types to their display priority (lower numbers shown first)
+from typing import get_args
-MODEL_TYPE_COLORS : dict
-    Mapping of model types to their display colors in Rich
+from vec_inf.client._slurm_vars import MODEL_TYPES
-Notes
------
-These constants are used primarily by the ListCmdDisplay class to ensure
-consistent sorting and color coding of different model types in the CLI output.
-"""
-MODEL_TYPE_PRIORITY = {
-    "LLM": 0,
-    "VLM": 1,
-    "Text_Embedding": 2,
-    "Reward_Modeling": 3,
-}
+# Extract model type values from the Literal type
+_MODEL_TYPES = get_args(MODEL_TYPES)
+# Rich color options (prioritizing current colors, with fallbacks for additional types)
+_RICH_COLORS = [
+    "cyan",
+    "bright_blue",
+    "purple",
+    "bright_magenta",
+    "green",
+    "yellow",
+    "bright_green",
+    "bright_yellow",
+    "red",
+    "bright_red",
+    "blue",
+    "magenta",
+    "bright_cyan",
+    "white",
+    "bright_white",
+]
+# Mapping of model types to their display priority (lower numbers shown first)
+MODEL_TYPE_PRIORITY = {model_type: idx for idx, model_type in enumerate(_MODEL_TYPES)}
+# Mapping of model types to their display colors in Rich
 MODEL_TYPE_COLORS = {
-    "LLM": "cyan",
-    "VLM": "bright_blue",
-    "Text_Embedding": "purple",
-    "Reward_Modeling": "bright_magenta",
+    model_type: _RICH_COLORS[idx % len(_RICH_COLORS)]
+    for idx, model_type in enumerate(_MODEL_TYPES)
+}
+# Inference engine choice and name mapping
+ENGINE_NAME_MAP = {
+    "vllm": "vLLM",
+    "sglang": "SGLang",
 }

vec_inf/client/_client_vars.py CHANGED Viewed

@@ -49,7 +49,7 @@ SLURM_JOB_CONFIG_ARGS = {
     "time": "time",
     "nodes": "num_nodes",
     "exclude": "exclude",
-    "nodelist": "node_list",
+    "nodelist": "nodelist",
     "gres": "gres",
     "cpus-per-task": "cpus_per_task",
     "mem": "mem_per_node",
@@ -61,13 +61,43 @@ SLURM_JOB_CONFIG_ARGS = {
 VLLM_SHORT_TO_LONG_MAP = {
     "-tp": "--tensor-parallel-size",
     "-pp": "--pipeline-parallel-size",
+    "-n": "--nnodes",
+    "-r": "--node-rank",
+    "-dcp": "--decode-context-parallel-size",
+    "-pcp": "--prefill-context-parallel-size",
     "-dp": "--data-parallel-size",
+    "-dpn": "--data-parallel-rank",
+    "-dpr": "--data-parallel-start-rank",
     "-dpl": "--data-parallel-size-local",
     "-dpa": "--data-parallel-address",
     "-dpp": "--data-parallel-rpc-port",
+    "-dpb": "--data-parallel-backend",
+    "-dph": "--data-parallel-hybrid-lb",
+    "-dpe": "--data-parallel-external-lb",
     "-O": "--compilation-config",
     "-q": "--quantization",
 }
+# SGLang engine args mapping between short and long names
+SGLANG_SHORT_TO_LONG_MAP = {
+    "--tp": "--tensor-parallel-size",
+    "--tp-size": "--tensor-parallel-size",
+    "--pp": "--pipeline-parallel-size",
+    "--pp-size": "--pipeline-parallel-size",
+    "--dp": "--data-parallel-size",
+    "--dp-size": "--data-parallel-size",
+    "--ep": "--expert-parallel-size",
+    "--ep-size": "--expert-parallel-expert-size",
+}
+# Mapping of engine short names to their argument mappings
+ENGINE_SHORT_TO_LONG_MAP = {
+    "vllm": VLLM_SHORT_TO_LONG_MAP,
+    "sglang": SGLANG_SHORT_TO_LONG_MAP,
+}
 # Required matching arguments for batch mode
 BATCH_MODE_REQUIRED_MATCHING_ARGS = ["venv", "log_dir"]
+# Supported engines
+SUPPORTED_ENGINES = ["vllm", "sglang"]

vec-inf 0.7.2__py3-none-any.whl → 0.8.0__py3-none-any.whl

vec-inf 0.7.2py3-none-any.whl → 0.8.0py3-none-any.whl