PyPI - vec-inf - Versions diffs - 0.4.1__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

vec-inf 0.4.1py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

vec_inf/README.md +3 -3
vec_inf/cli/_cli.py +227 -325
vec_inf/cli/_helper.py +400 -0
vec_inf/cli/_utils.py +26 -135
vec_inf/cli/_vars.py +32 -0
vec_inf/client/__init__.py +31 -0
vec_inf/client/_client_vars.py +213 -0
vec_inf/client/_exceptions.py +37 -0
vec_inf/client/_helper.py +674 -0
vec_inf/client/_slurm_script_generator.py +179 -0
vec_inf/client/_utils.py +287 -0
vec_inf/client/api.py +302 -0
vec_inf/client/config.py +128 -0
vec_inf/client/models.py +225 -0
vec_inf/client/slurm_vars.py +49 -0
vec_inf/{models → config}/README.md +30 -12
vec_inf/config/models.yaml +1300 -0
vec_inf-0.6.0.dist-info/METADATA +193 -0
vec_inf-0.6.0.dist-info/RECORD +25 -0
vec_inf/launch_server.sh +0 -145
vec_inf/models/models.csv +0 -85
vec_inf/multinode_vllm.slurm +0 -124
vec_inf/vllm.slurm +0 -59
vec_inf-0.4.1.dist-info/METADATA +0 -121
vec_inf-0.4.1.dist-info/RECORD +0 -16
{vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/WHEEL +0 -0
{vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/entry_points.txt +0 -0
{vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/licenses/LICENSE +0 -0

vec_inf/README.md CHANGED Viewed

@@ -1,9 +1,9 @@
 # `vec-inf` Commands
-* `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server, `--json-mode` supported. Check [`here`](./models/README.md) for complete list of available options.
-* `list`: List all available model names, or append a supported model name to view the default configuration, `--json-mode` supported.
-* `metrics`: Streams performance metrics to the console.
+* `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server, `--json-mode` supported.
 * `status`: Check the model status by providing its Slurm job ID, `--json-mode` supported.
+* `metrics`: Streams performance metrics to the console.
 * `shutdown`: Shutdown a model by providing its Slurm job ID.
+* `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported.
 Use `--help` to see all available options

vec_inf/cli/_cli.py CHANGED Viewed

@@ -1,17 +1,37 @@
-"""Command line interface for Vector Inference."""
+"""Command line interface for Vector Inference.
+This module provides the command-line interface for interacting with Vector
+Inference services, including model launching, status checking, metrics
+monitoring, and shutdown operations.
+Commands
+--------
+launch
+    Launch a model on the cluster
+status
+    Check the status of a running model
+shutdown
+    Stop a running model
+list
+    List available models or get specific model configuration
+metrics
+    Stream real-time performance metrics
+"""
-import os
 import time
-from typing import Any, Dict, Optional
+from typing import Optional, Union
 import click
-import polars as pl
-from rich.columns import Columns
 from rich.console import Console
 from rich.live import Live
-from rich.panel import Panel
-import vec_inf.cli._utils as utils
+from vec_inf.cli._helper import (
+    LaunchResponseFormatter,
+    ListCmdDisplay,
+    MetricsResponseFormatter,
+    StatusResponseFormatter,
+)
+from vec_inf.client import LaunchOptions, VecInfClient
 CONSOLE = Console()
@@ -27,21 +47,10 @@ def cli() -> None:
 @click.argument("model-name", type=str, nargs=1)
 @click.option("--model-family", type=str, help="The model family")
 @click.option("--model-variant", type=str, help="The model variant")
-@click.option(
-    "--max-model-len",
-    type=int,
-    help="Model context length. Default value set based on suggested resource allocation.",
-)
-@click.option(
-    "--max-num-seqs",
-    type=int,
-    help="Maximum number of sequences to process in a single request",
-)
 @click.option(
     "--partition",
     type=str,
-    default="a40",
-    help="Type of compute partition, default to a40",
+    help="Type of compute partition",
 )
 @click.option(
     "--num-nodes",
@@ -49,10 +58,15 @@ def cli() -> None:
     help="Number of nodes to use, default to suggested resource allocation for model",
 )
 @click.option(
-    "--num-gpus",
+    "--gpus-per-node",
     type=int,
     help="Number of GPUs/node to use, default to suggested resource allocation for model",
 )
+@click.option(
+    "--account",
+    type=str,
+    help="Charge resources used by this job to specified account.",
+)
 @click.option(
     "--qos",
     type=str,
@@ -63,41 +77,25 @@ def cli() -> None:
     type=str,
     help="Time limit for job, this should comply with QoS limits",
 )
-@click.option(
-    "--vocab-size",
-    type=int,
-    help="Vocabulary size, this option is intended for custom models",
-)
-@click.option(
-    "--data-type", type=str, default="auto", help="Model data type, default to auto"
-)
 @click.option(
     "--venv",
     type=str,
-    default="singularity",
-    help="Path to virtual environment, default to preconfigured singularity container",
+    help="Path to virtual environment",
 )
 @click.option(
     "--log-dir",
     type=str,
-    default="default",
-    help="Path to slurm log directory, default to .vec-inf-logs in user home directory",
+    help="Path to slurm log directory",
 )
 @click.option(
     "--model-weights-parent-dir",
     type=str,
-    default="/model-weights",
-    help="Path to parent directory containing model weights, default to '/model-weights' for supported models",
+    help="Path to parent directory containing model weights",
 )
 @click.option(
-    "--pipeline-parallelism",
+    "--vllm-args",
     type=str,
-    help="Enable pipeline parallelism, accepts 'True' or 'False', default to 'True' for supported models",
-)
-@click.option(
-    "--enforce-eager",
-    type=str,
-    help="Always use eager-mode PyTorch, accepts 'True' or 'False', default to 'False' for custom models if not set",
+    help="vLLM engine arguments to be set, use the format as specified in vLLM documentation and separate arguments with commas, e.g. --vllm-args '--max-model-len=8192,--max-num-seqs=256,--enable-prefix-caching'",
 )
 @click.option(
     "--json-mode",
@@ -106,77 +104,69 @@ def cli() -> None:
 )
 def launch(
     model_name: str,
-    model_family: Optional[str] = None,
-    model_variant: Optional[str] = None,
-    max_model_len: Optional[int] = None,
-    max_num_seqs: Optional[int] = None,
-    partition: Optional[str] = None,
-    num_nodes: Optional[int] = None,
-    num_gpus: Optional[int] = None,
-    qos: Optional[str] = None,
-    time: Optional[str] = None,
-    vocab_size: Optional[int] = None,
-    data_type: Optional[str] = None,
-    venv: Optional[str] = None,
-    log_dir: Optional[str] = None,
-    model_weights_parent_dir: Optional[str] = None,
-    pipeline_parallelism: Optional[str] = None,
-    enforce_eager: Optional[str] = None,
-    json_mode: bool = False,
+    **cli_kwargs: Optional[Union[str, int, float, bool]],
 ) -> None:
-    """Launch a model on the cluster."""
-    if isinstance(pipeline_parallelism, str):
-        pipeline_parallelism = (
-            "True" if pipeline_parallelism.lower() == "true" else "False"
-        )
-    launch_script_path = os.path.join(
-        os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "launch_server.sh"
-    )
-    launch_cmd = f"bash {launch_script_path}"
-    models_df = utils.load_models_df()
-    models_df = models_df.with_columns(
-        pl.col("model_type").replace("Reward Modeling", "Reward_Modeling")
-    )
-    models_df = models_df.with_columns(
-        pl.col("model_type").replace("Text Embedding", "Text_Embedding")
-    )
-    if model_name in models_df["model_name"].to_list():
-        default_args = utils.load_default_args(models_df, model_name)
-        for arg in default_args:
-            if arg in locals() and locals()[arg] is not None:
-                default_args[arg] = locals()[arg]
-            renamed_arg = arg.replace("_", "-")
-            launch_cmd += f" --{renamed_arg} {default_args[arg]}"
-    else:
-        model_args = models_df.columns
-        model_args.remove("model_name")
-        for arg in model_args:
-            if locals()[arg] is not None:
-                renamed_arg = arg.replace("_", "-")
-                launch_cmd += f" --{renamed_arg} {locals()[arg]}"
-    output = utils.run_bash_command(launch_cmd)
-    slurm_job_id = output.split(" ")[-1].strip().strip("\n")
-    output_lines = output.split("\n")[:-2]
-    table = utils.create_table(key_title="Job Config", value_title="Value")
-    table.add_row("Slurm Job ID", slurm_job_id, style="blue")
-    output_dict = {"slurm_job_id": slurm_job_id}
-    for line in output_lines:
-        key, value = line.split(": ")
-        table.add_row(key, value)
-        output_dict[key.lower().replace(" ", "_")] = value
-    if json_mode:
-        click.echo(output_dict)
-    else:
-        CONSOLE.print(table)
+    """Launch a model on the cluster.
+    Parameters
+    ----------
+    model_name : str
+        Name of the model to launch
+    **cli_kwargs : dict
+        Additional launch options including:
+        - model_family : str, optional
+            Family/architecture of the model
+        - model_variant : str, optional
+            Specific variant of the model
+        - partition : str, optional
+            Type of compute partition
+        - num_nodes : int, optional
+            Number of nodes to use
+        - gpus_per_node : int, optional
+            Number of GPUs per node
+        - qos : str, optional
+            Quality of service tier
+        - time : str, optional
+            Time limit for job
+        - venv : str, optional
+            Path to virtual environment
+        - log_dir : str, optional
+            Path to SLURM log directory
+        - model_weights_parent_dir : str, optional
+            Path to model weights directory
+        - vllm_args : str, optional
+            vLLM engine arguments
+        - json_mode : bool, optional
+            Output in JSON format
+    Raises
+    ------
+    click.ClickException
+        If launch fails for any reason
+    """
+    try:
+        # Convert cli_kwargs to LaunchOptions
+        json_mode = cli_kwargs["json_mode"]
+        del cli_kwargs["json_mode"]
+        launch_options = LaunchOptions(**cli_kwargs)  # type: ignore
+        # Start the client and launch model inference server
+        client = VecInfClient()
+        launch_response = client.launch_model(model_name, launch_options)
+        # Display launch information
+        launch_formatter = LaunchResponseFormatter(model_name, launch_response.config)
+        if json_mode:
+            click.echo(launch_response.config)
+        else:
+            launch_info_table = launch_formatter.format_table_output()
+            CONSOLE.print(launch_info_table)
+    except click.ClickException as e:
+        raise e
+    except Exception as e:
+        raise click.ClickException(f"Launch failed: {str(e)}") from e
 @cli.command("status")
@@ -194,133 +184,61 @@ def launch(
 def status(
     slurm_job_id: int, log_dir: Optional[str] = None, json_mode: bool = False
 ) -> None:
-    """Get the status of a running model on the cluster."""
-    status_cmd = f"scontrol show job {slurm_job_id} --oneliner"
-    output = utils.run_bash_command(status_cmd)
-    base_data = _get_base_status_data(output)
-    status_info = _process_job_state(output, base_data, slurm_job_id, log_dir)
-    _display_status(status_info, json_mode)
-def _get_base_status_data(output: str) -> Dict[str, Any]:
-    """Extract basic job status information from scontrol output."""
-    try:
-        job_name = output.split(" ")[1].split("=")[1]
-        job_state = output.split(" ")[9].split("=")[1]
-    except IndexError:
-        job_name = "UNAVAILABLE"
-        job_state = "UNAVAILABLE"
-    return {
-        "model_name": job_name,
-        "status": "SHUTDOWN",
-        "base_url": "UNAVAILABLE",
-        "state": job_state,
-        "pending_reason": None,
-        "failed_reason": None,
-    }
-def _process_job_state(
-    output: str, status_info: Dict[str, Any], slurm_job_id: int, log_dir: Optional[str]
-) -> Dict[str, Any]:
-    """Process different job states and update status information."""
-    if status_info["state"] == "PENDING":
-        _process_pending_state(output, status_info)
-    elif status_info["state"] == "RUNNING":
-        _handle_running_state(status_info, slurm_job_id, log_dir)
-    return status_info
-def _process_pending_state(output: str, status_info: Dict[str, Any]) -> None:
-    """Handle PENDING job state."""
+    """Get the status of a running model on the cluster.
+    Parameters
+    ----------
+    slurm_job_id : int
+        ID of the SLURM job to check
+    log_dir : str, optional
+        Path to SLURM log directory
+    json_mode : bool, default=False
+        Whether to output in JSON format
+    Raises
+    ------
+    click.ClickException
+        If status check fails
+    """
     try:
-        status_info["pending_reason"] = output.split(" ")[10].split("=")[1]
-        status_info["status"] = "PENDING"
-    except IndexError:
-        status_info["pending_reason"] = "Unknown pending reason"
-def _handle_running_state(
-    status_info: Dict[str, Any], slurm_job_id: int, log_dir: Optional[str]
-) -> None:
-    """Handle RUNNING job state and check server status."""
-    server_status = utils.is_server_running(
-        status_info["model_name"], slurm_job_id, log_dir
-    )
-    if isinstance(server_status, tuple):
-        status_info["status"], status_info["failed_reason"] = server_status
-        return
-    if server_status == "RUNNING":
-        _check_model_health(status_info, slurm_job_id, log_dir)
-    else:
-        status_info["status"] = server_status
+        # Start the client and get model inference server status
+        client = VecInfClient()
+        status_response = client.get_status(slurm_job_id, log_dir)
+        # Display status information
+        status_formatter = StatusResponseFormatter(status_response)
+        if json_mode:
+            status_formatter.output_json()
+        else:
+            status_info_table = status_formatter.output_table()
+            CONSOLE.print(status_info_table)
-def _check_model_health(
-    status_info: Dict[str, Any], slurm_job_id: int, log_dir: Optional[str]
-) -> None:
-    """Check model health and update status accordingly."""
-    model_status = utils.model_health_check(
-        status_info["model_name"], slurm_job_id, log_dir
-    )
-    status, failed_reason = model_status
-    if status == "READY":
-        status_info["base_url"] = utils.get_base_url(
-            status_info["model_name"], slurm_job_id, log_dir
-        )
-        status_info["status"] = status
-    else:
-        status_info["status"], status_info["failed_reason"] = status, failed_reason
-def _display_status(status_info: Dict[str, Any], json_mode: bool) -> None:
-    """Display the status information in appropriate format."""
-    if json_mode:
-        _output_json(status_info)
-    else:
-        _output_table(status_info)
-def _output_json(status_info: Dict[str, Any]) -> None:
-    """Format and output JSON data."""
-    json_data = {
-        "model_name": status_info["model_name"],
-        "model_status": status_info["status"],
-        "base_url": status_info["base_url"],
-    }
-    if status_info["pending_reason"]:
-        json_data["pending_reason"] = status_info["pending_reason"]
-    if status_info["failed_reason"]:
-        json_data["failed_reason"] = status_info["failed_reason"]
-    click.echo(json_data)
-def _output_table(status_info: Dict[str, Any]) -> None:
-    """Create and display rich table."""
-    table = utils.create_table(key_title="Job Status", value_title="Value")
-    table.add_row("Model Name", status_info["model_name"])
-    table.add_row("Model Status", status_info["status"], style="blue")
-    if status_info["pending_reason"]:
-        table.add_row("Pending Reason", status_info["pending_reason"])
-    if status_info["failed_reason"]:
-        table.add_row("Failed Reason", status_info["failed_reason"])
-    table.add_row("Base URL", status_info["base_url"])
-    CONSOLE.print(table)
+    except click.ClickException as e:
+        raise e
+    except Exception as e:
+        raise click.ClickException(f"Status check failed: {str(e)}") from e
 @cli.command("shutdown")
 @click.argument("slurm_job_id", type=int, nargs=1)
 def shutdown(slurm_job_id: int) -> None:
-    """Shutdown a running model on the cluster."""
-    shutdown_cmd = f"scancel {slurm_job_id}"
-    utils.run_bash_command(shutdown_cmd)
-    click.echo(f"Shutting down model with Slurm Job ID: {slurm_job_id}")
+    """Shutdown a running model on the cluster.
+    Parameters
+    ----------
+    slurm_job_id : int
+        ID of the SLURM job to shut down
+    Raises
+    ------
+    click.ClickException
+        If shutdown operation fails
+    """
+    try:
+        client = VecInfClient()
+        client.shutdown_model(slurm_job_id)
+        click.echo(f"Shutting down model with Slurm Job ID: {slurm_job_id}")
+    except Exception as e:
+        raise click.ClickException(f"Shutdown failed: {str(e)}") from e
 @cli.command("list")
@@ -331,107 +249,91 @@ def shutdown(slurm_job_id: int) -> None:
     help="Output in JSON string",
 )
 def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> None:
-    """List all available models, or get default setup of a specific model."""
-    def list_model(model_name: str, models_df: pl.DataFrame, json_mode: bool) -> None:
-        if model_name not in models_df["model_name"].to_list():
-            raise ValueError(f"Model name {model_name} not found in available models")
-        excluded_keys = {"venv", "log_dir"}
-        model_row = models_df.filter(models_df["model_name"] == model_name)
-        if json_mode:
-            filtered_model_row = model_row.drop(excluded_keys, strict=False)
-            click.echo(filtered_model_row.to_dicts()[0])
-            return
-        table = utils.create_table(key_title="Model Config", value_title="Value")
-        for row in model_row.to_dicts():
-            for key, value in row.items():
-                if key not in excluded_keys:
-                    table.add_row(key, str(value))
-        CONSOLE.print(table)
-    def list_all(models_df: pl.DataFrame, json_mode: bool) -> None:
-        if json_mode:
-            click.echo(models_df["model_name"].to_list())
-            return
-        panels = []
-        model_type_colors = {
-            "LLM": "cyan",
-            "VLM": "bright_blue",
-            "Text Embedding": "purple",
-            "Reward Modeling": "bright_magenta",
-        }
-        models_df = models_df.with_columns(
-            pl.when(pl.col("model_type") == "LLM")
-            .then(0)
-            .when(pl.col("model_type") == "VLM")
-            .then(1)
-            .when(pl.col("model_type") == "Text Embedding")
-            .then(2)
-            .when(pl.col("model_type") == "Reward Modeling")
-            .then(3)
-            .otherwise(-1)
-            .alias("model_type_order")
-        )
-        models_df = models_df.sort("model_type_order")
-        models_df = models_df.drop("model_type_order")
-        for row in models_df.to_dicts():
-            panel_color = model_type_colors.get(row["model_type"], "white")
-            if row["model_variant"] == "None":
-                styled_text = f"[magenta]{row['model_family']}[/magenta]"
-            else:
-                styled_text = (
-                    f"[magenta]{row['model_family']}[/magenta]-{row['model_variant']}"
-                )
-            panels.append(Panel(styled_text, expand=True, border_style=panel_color))
-        CONSOLE.print(Columns(panels, equal=True))
-    models_df = utils.load_models_df()
-    if model_name:
-        list_model(model_name, models_df, json_mode)
-    else:
-        list_all(models_df, json_mode)
+    """List all available models, or get default setup of a specific model.
+    Parameters
+    ----------
+    model_name : str, optional
+        Name of specific model to get information for
+    json_mode : bool, default=False
+        Whether to output in JSON format
+    Raises
+    ------
+    click.ClickException
+        If list operation fails
+    """
+    try:
+        # Start the client
+        client = VecInfClient()
+        list_display = ListCmdDisplay(CONSOLE, json_mode)
+        if model_name:
+            model_config = client.get_model_config(model_name)
+            list_display.display_single_model_output(model_config)
+        else:
+            model_infos = client.list_models()
+            list_display.display_all_models_output(model_infos)
+    except click.ClickException as e:
+        raise e
+    except Exception as e:
+        raise click.ClickException(f"List models failed: {str(e)}") from e
 @cli.command("metrics")
 @click.argument("slurm_job_id", type=int, nargs=1)
 @click.option(
-    "--log-dir",
-    type=str,
-    help="Path to slurm log directory. This is required if --log-dir was set in model launch",
+    "--log-dir", type=str, help="Path to slurm log directory (if used during launch)"
 )
 def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
-    """Stream performance metrics to the console."""
-    status_cmd = f"scontrol show job {slurm_job_id} --oneliner"
-    output = utils.run_bash_command(status_cmd)
-    slurm_job_name = output.split(" ")[1].split("=")[1]
-    with Live(refresh_per_second=1, console=CONSOLE) as live:
-        while True:
-            out_logs = utils.read_slurm_log(
-                slurm_job_name, slurm_job_id, "out", log_dir
-            )
-            # if out_logs is a string, then it is an error message
-            if isinstance(out_logs, str):
-                live.update(out_logs)
-                break
-            latest_metrics = utils.get_latest_metric(out_logs)
-            # if latest_metrics is a string, then it is an error message
-            if isinstance(latest_metrics, str):
-                live.update(latest_metrics)
-                break
-            table = utils.create_table(key_title="Metric", value_title="Value")
-            for key, value in latest_metrics.items():
-                table.add_row(key, value)
-            live.update(table)
-            time.sleep(2)
+    """Stream real-time performance metrics from the model endpoint.
+    Parameters
+    ----------
+    slurm_job_id : int
+        ID of the SLURM job to monitor
+    log_dir : str, optional
+        Path to SLURM log directory
+    Raises
+    ------
+    click.ClickException
+        If metrics collection fails
+    Notes
+    -----
+    This command continuously streams metrics with a 2-second refresh interval
+    until interrupted. If metrics are not available, it will display status
+    information instead.
+    """
+    try:
+        # Start the client and get inference server metrics
+        client = VecInfClient()
+        metrics_response = client.get_metrics(slurm_job_id, log_dir)
+        metrics_formatter = MetricsResponseFormatter(metrics_response.metrics)
+        # Check if metrics response is ready
+        if isinstance(metrics_response.metrics, str):
+            metrics_formatter.format_failed_metrics(metrics_response.metrics)
+            CONSOLE.print(metrics_formatter.table)
+            return
+        with Live(refresh_per_second=1, console=CONSOLE) as live:
+            while True:
+                metrics_response = client.get_metrics(slurm_job_id, log_dir)
+                metrics_formatter = MetricsResponseFormatter(metrics_response.metrics)
+                if isinstance(metrics_response.metrics, str):
+                    # Show status information if metrics aren't available
+                    metrics_formatter.format_failed_metrics(metrics_response.metrics)
+                else:
+                    metrics_formatter.format_metrics()
+                live.update(metrics_formatter.table)
+                time.sleep(2)
+    except click.ClickException as e:
+        raise e
+    except Exception as e:
+        raise click.ClickException(f"Metrics check failed: {str(e)}") from e
 if __name__ == "__main__":

vec-inf 0.4.1__py3-none-any.whl → 0.6.0__py3-none-any.whl

vec-inf 0.4.1py3-none-any.whl → 0.6.0py3-none-any.whl