PyPI - vec-inf - Versions diffs - 0.6.1__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

vec-inf 0.6.1py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

vec_inf/README.md +18 -4
vec_inf/cli/_cli.py +191 -34
vec_inf/cli/_helper.py +95 -14
vec_inf/client/_client_vars.py +7 -165
vec_inf/client/_helper.py +386 -40
vec_inf/client/_slurm_script_generator.py +204 -36
vec_inf/client/_slurm_templates.py +248 -0
vec_inf/client/_slurm_vars.py +86 -0
vec_inf/client/_utils.py +189 -70
vec_inf/client/api.py +96 -25
vec_inf/client/config.py +40 -19
vec_inf/client/models.py +44 -4
vec_inf/config/README.md +4 -243
vec_inf/config/environment.yaml +35 -0
vec_inf/config/models.yaml +102 -274
{vec_inf-0.6.1.dist-info → vec_inf-0.7.1.dist-info}/METADATA +43 -73
vec_inf-0.7.1.dist-info/RECORD +27 -0
vec_inf/client/slurm_vars.py +0 -49
vec_inf-0.6.1.dist-info/RECORD +0 -25
{vec_inf-0.6.1.dist-info → vec_inf-0.7.1.dist-info}/WHEEL +0 -0
{vec_inf-0.6.1.dist-info → vec_inf-0.7.1.dist-info}/entry_points.txt +0 -0
{vec_inf-0.6.1.dist-info → vec_inf-0.7.1.dist-info}/licenses/LICENSE +0 -0

vec_inf/README.md CHANGED Viewed

@@ -1,9 +1,23 @@
-# `vec-inf` Commands
+## `vec-inf` CLI Commands
-* `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server, `--json-mode` supported.
-* `status`: Check the model status by providing its Slurm job ID, `--json-mode` supported.
+* `launch`: Specify a model family and other optional parameters to launch an OpenAI compatible inference server.
+* `batch-launch`: Specify a list of models to launch multiple OpenAI compatible inference servers at the same time.
+* `status`: Check the model status by providing its Slurm job ID.
 * `metrics`: Streams performance metrics to the console.
 * `shutdown`: Shutdown a model by providing its Slurm job ID.
-* `list`: List all available model names, or view the default/cached configuration of a specific model, `--json-mode` supported.
+* `list`: List all available model names, or view the default/cached configuration of a specific model.
+* `cleanup`: Remove old log directories. You can filter by `--model-family`, `--model-name`, `--job-id`, and/or `--before-job-id`. Use `--dry-run` to preview what would be deleted.
 Use `--help` to see all available options
+## `VecInfClient` API
+* `launch_model`: Launch an OpenAI compatible inference server.
+* `batch_launch_models`: Launch multiple OpenAI compatible inference servers.
+* `get_status`: Get the status of a running model.
+* `get_metrics`: Get the performance metrics of a running model.
+* `shutdown_model`: Shutdown a running model.
+* `list_models`" List all available models.
+* `get_model_config`: Get the configuration for a specific model.
+* `wait_until_ready`: Wait until a model is ready or fails.
+* `cleanup_logs`: Remove logs from the log directory.

vec_inf/cli/_cli.py CHANGED Viewed

@@ -27,6 +27,7 @@ from rich.console import Console
 from rich.live import Live
 from vec_inf.cli._helper import (
+    BatchLaunchResponseFormatter,
     LaunchResponseFormatter,
     ListCmdDisplay,
     MetricsResponseFormatter,
@@ -44,14 +45,19 @@ def cli() -> None:
     pass
-@cli.command("launch")
+@cli.command("launch", help="Launch a model on the cluster.")
 @click.argument("model-name", type=str, nargs=1)
 @click.option("--model-family", type=str, help="The model family")
 @click.option("--model-variant", type=str, help="The model variant")
 @click.option(
     "--partition",
     type=str,
-    help="Type of compute partition",
+    help="Type of Slurm partition",
+)
+@click.option(
+    "--resource-type",
+    type=str,
+    help="Type of resource to request for the job",
 )
 @click.option(
     "--num-nodes",
@@ -65,9 +71,16 @@ def cli() -> None:
 )
 @click.option(
     "--account",
+    "-A",
     type=str,
     help="Charge resources used by this job to specified account.",
 )
+@click.option(
+    "--work-dir",
+    "-D",
+    type=str,
+    help="Set working directory for the batch job",
+)
 @click.option(
     "--qos",
     type=str,
@@ -79,14 +92,14 @@ def cli() -> None:
     help="Exclude certain nodes from the resources granted to the job",
 )
 @click.option(
-    "--node-list",
+    "--nodelist",
     type=str,
     help="Request a specific list of nodes for deployment",
 )
 @click.option(
     "--bind",
     type=str,
-    help="Additional binds for the singularity container as a comma separated list of bind paths",
+    help="Additional binds for the container as a comma separated list of bind paths",
 )
 @click.option(
     "--time",
@@ -118,6 +131,16 @@ def cli() -> None:
     is_flag=True,
     help="Output in JSON string",
 )
+@click.option(
+    "--env",
+    type=str,
+    help="Environment variables to be set. Seperate variables with commas. Can also include path to a file containing environment variables seperated by newlines. e.g. --env 'TRITON_CACHE_DIR=/scratch/.cache/triton,my_custom_vars_file.env'",
+)
+@click.option(
+    "--config",
+    type=str,
+    help="Path to a model config yaml file to use in place of the default",
+)
 def launch(
     model_name: str,
     **cli_kwargs: Optional[Union[str, int, float, bool]],
@@ -135,21 +158,25 @@ def launch(
         - model_variant : str, optional
             Specific variant of the model
         - partition : str, optional
-            Type of compute partition
+            Type of Slurm partition
+        - resource_type : str, optional
+            Type of resource to request for the job
         - num_nodes : int, optional
             Number of nodes to use
         - gpus_per_node : int, optional
             Number of GPUs per node
         - account : str, optional
             Charge resources used by this job to specified account
+        - work_dir : str, optional
+            Set working directory for the batch job
         - qos : str, optional
             Quality of service tier
         - exclude : str, optional
             Exclude certain nodes from the resources granted to the job
-        - node_list : str, optional
+        - nodelist : str, optional
             Request a specific list of nodes for deployment
         - bind : str, optional
-            Additional binds for the singularity container
+            Additional binds for the container as a comma separated list of bind paths
         - time : str, optional
             Time limit for job
         - venv : str, optional
@@ -160,6 +187,10 @@ def launch(
             Path to model weights directory
         - vllm_args : str, optional
             vLLM engine arguments
+        - env : str, optional
+            Environment variables
+        - config : str, optional
+            Path to custom model config yaml file
         - json_mode : bool, optional
             Output in JSON format
@@ -180,11 +211,12 @@ def launch(
         launch_response = client.launch_model(model_name, launch_options)
         # Display launch information
-        launch_formatter = LaunchResponseFormatter(model_name, launch_response.config)
         if json_mode:
             click.echo(json.dumps(launch_response.config))
         else:
+            launch_formatter = LaunchResponseFormatter(
+                model_name, launch_response.config
+            )
             launch_info_table = launch_formatter.format_table_output()
             CONSOLE.print(launch_info_table)
@@ -194,29 +226,93 @@ def launch(
         raise click.ClickException(f"Launch failed: {str(e)}") from e
-@cli.command("status")
-@click.argument("slurm_job_id", type=int, nargs=1)
+@cli.command(
+    "batch-launch",
+    help="Launch multiple models in a batch, separate model names with spaces.",
+)
+@click.argument("model-names", type=str, nargs=-1)
 @click.option(
-    "--log-dir",
+    "--batch-config",
+    type=str,
+    help="Model configuration for batch launch",
+)
+@click.option(
+    "--account",
+    "-A",
     type=str,
-    help="Path to slurm log directory. This is required if --log-dir was set in model launch",
+    help="Charge resources used by this job to specified account.",
+)
+@click.option(
+    "--work-dir",
+    "-D",
+    type=str,
+    help="Set working directory for the batch job",
 )
 @click.option(
     "--json-mode",
     is_flag=True,
     help="Output in JSON string",
 )
-def status(
-    slurm_job_id: int, log_dir: Optional[str] = None, json_mode: bool = False
+def batch_launch(
+    model_names: tuple[str, ...],
+    batch_config: Optional[str] = None,
+    account: Optional[str] = None,
+    work_dir: Optional[str] = None,
+    json_mode: Optional[bool] = False,
 ) -> None:
+    """Launch multiple models in a batch.
+    Parameters
+    ----------
+    model_names : tuple[str, ...]
+        Names of the models to launch
+    batch_config : str
+        Model configuration for batch launch
+    json_mode : bool, default=False
+        Whether to output in JSON format
+    Raises
+    ------
+    click.ClickException
+        If batch launch fails
+    """
+    try:
+        # Start the client and launch models in batch mode
+        client = VecInfClient()
+        batch_launch_response = client.batch_launch_models(
+            list(model_names), batch_config, account, work_dir
+        )
+        # Display batch launch information
+        if json_mode:
+            click.echo(json.dumps(batch_launch_response.config, indent=4))
+        else:
+            batch_launch_formatter = BatchLaunchResponseFormatter(
+                batch_launch_response.config
+            )
+            batch_launch_info_table = batch_launch_formatter.format_table_output()
+            CONSOLE.print(batch_launch_info_table)
+    except click.ClickException as e:
+        raise e
+    except Exception as e:
+        raise click.ClickException(f"Batch launch failed: {str(e)}") from e
+@cli.command("status", help="Check the status of a running model on the cluster.")
+@click.argument("slurm_job_id", type=str, nargs=1)
+@click.option(
+    "--json-mode",
+    is_flag=True,
+    help="Output in JSON string",
+)
+def status(slurm_job_id: str, json_mode: bool = False) -> None:
     """Get the status of a running model on the cluster.
     Parameters
     ----------
-    slurm_job_id : int
+    slurm_job_id : str
         ID of the SLURM job to check
-    log_dir : str, optional
-        Path to SLURM log directory
     json_mode : bool, default=False
         Whether to output in JSON format
@@ -228,7 +324,7 @@ def status(
     try:
         # Start the client and get model inference server status
         client = VecInfClient()
-        status_response = client.get_status(slurm_job_id, log_dir)
+        status_response = client.get_status(slurm_job_id)
         # Display status information
         status_formatter = StatusResponseFormatter(status_response)
         if json_mode:
@@ -243,14 +339,14 @@ def status(
         raise click.ClickException(f"Status check failed: {str(e)}") from e
-@cli.command("shutdown")
-@click.argument("slurm_job_id", type=int, nargs=1)
-def shutdown(slurm_job_id: int) -> None:
+@cli.command("shutdown", help="Shutdown a running model on the cluster.")
+@click.argument("slurm_job_id", type=str, nargs=1)
+def shutdown(slurm_job_id: str) -> None:
     """Shutdown a running model on the cluster.
     Parameters
     ----------
-    slurm_job_id : int
+    slurm_job_id : str
         ID of the SLURM job to shut down
     Raises
@@ -266,7 +362,7 @@ def shutdown(slurm_job_id: int) -> None:
         raise click.ClickException(f"Shutdown failed: {str(e)}") from e
-@cli.command("list")
+@cli.command("list", help="List available models or get specific model configuration.")
 @click.argument("model-name", required=False)
 @click.option(
     "--json-mode",
@@ -304,20 +400,17 @@ def list_models(model_name: Optional[str] = None, json_mode: bool = False) -> No
         raise click.ClickException(f"List models failed: {str(e)}") from e
-@cli.command("metrics")
-@click.argument("slurm_job_id", type=int, nargs=1)
-@click.option(
-    "--log-dir", type=str, help="Path to slurm log directory (if used during launch)"
+@cli.command(
+    "metrics", help="Stream real-time performance metrics from the model endpoint."
 )
-def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
+@click.argument("slurm_job_id", type=str, nargs=1)
+def metrics(slurm_job_id: str) -> None:
     """Stream real-time performance metrics from the model endpoint.
     Parameters
     ----------
-    slurm_job_id : int
+    slurm_job_id : str
         ID of the SLURM job to monitor
-    log_dir : str, optional
-        Path to SLURM log directory
     Raises
     ------
@@ -333,7 +426,7 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
     try:
         # Start the client and get inference server metrics
         client = VecInfClient()
-        metrics_response = client.get_metrics(slurm_job_id, log_dir)
+        metrics_response = client.get_metrics(slurm_job_id)
         metrics_formatter = MetricsResponseFormatter(metrics_response.metrics)
         # Check if metrics response is ready
@@ -344,7 +437,7 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
         with Live(refresh_per_second=1, console=CONSOLE) as live:
             while True:
-                metrics_response = client.get_metrics(slurm_job_id, log_dir)
+                metrics_response = client.get_metrics(slurm_job_id)
                 metrics_formatter = MetricsResponseFormatter(metrics_response.metrics)
                 if isinstance(metrics_response.metrics, str):
@@ -361,5 +454,69 @@ def metrics(slurm_job_id: int, log_dir: Optional[str] = None) -> None:
         raise click.ClickException(f"Metrics check failed: {str(e)}") from e
+@cli.command("cleanup", help="Clean up log files based on optional filters.")
+@click.option("--log-dir", type=str, help="Path to SLURM log directory")
+@click.option("--model-family", type=str, help="Filter by model family")
+@click.option("--model-name", type=str, help="Filter by model name")
+@click.option(
+    "--job-id", type=int, help="Only remove logs with this exact SLURM job ID"
+)
+@click.option(
+    "--before-job-id",
+    type=int,
+    help="Remove logs with job ID less than this value",
+)
+@click.option("--dry-run", is_flag=True, help="List matching logs without deleting")
+def cleanup_logs_cli(
+    log_dir: Optional[str],
+    model_family: Optional[str],
+    model_name: Optional[str],
+    job_id: Optional[int],
+    before_job_id: Optional[int],
+    dry_run: bool,
+) -> None:
+    """Clean up log files based on optional filters.
+    Parameters
+    ----------
+    log_dir : str or Path, optional
+        Root directory containing log files. Defaults to ~/.vec-inf-logs.
+    model_family : str, optional
+        Only delete logs for this model family.
+    model_name : str, optional
+        Only delete logs for this model name.
+    job_id : int, optional
+        If provided, only match directories with this exact SLURM job ID.
+    before_job_id : int, optional
+        If provided, only delete logs with job ID less than this value.
+    dry_run : bool
+        If True, return matching files without deleting them.
+    """
+    try:
+        client = VecInfClient()
+        matched = client.cleanup_logs(
+            log_dir=log_dir,
+            model_family=model_family,
+            model_name=model_name,
+            job_id=job_id,
+            before_job_id=before_job_id,
+            dry_run=dry_run,
+        )
+        if not matched:
+            if dry_run:
+                click.echo("Dry run: no matching log directories found.")
+            else:
+                click.echo("No matching log directories were deleted.")
+        elif dry_run:
+            click.echo(f"Dry run: {len(matched)} directories would be deleted:")
+            for f in matched:
+                click.echo(f"  - {f}")
+        else:
+            click.echo(f"Deleted {len(matched)} log directory(ies).")
+    except Exception as e:
+        raise click.ClickException(f"Cleanup failed: {str(e)}") from e
 if __name__ == "__main__":
     cli()

vec_inf/cli/_helper.py CHANGED Viewed

@@ -4,6 +4,7 @@ This module provides formatting and display classes for the command-line interfa
 handling the presentation of model information, status updates, and metrics.
 """
+import json
 from pathlib import Path
 from typing import Any, Union
@@ -27,9 +28,8 @@ class LaunchResponseFormatter:
     Parameters
     ----------
     model_name : str
-        Name of the launched model
-    params : dict[str, Any]
-        Launch parameters and configuration
+        Name of the launched model    params : dict[str, Any] Launch parameters and
+            configuration
     """
     def __init__(self, model_name: str, params: dict[str, Any]):
@@ -59,8 +59,16 @@ class LaunchResponseFormatter:
         table.add_row("Vocabulary Size", self.params["vocab_size"])
         # Add resource allocation details
-        table.add_row("Partition", self.params["partition"])
-        table.add_row("QoS", self.params["qos"])
+        if self.params.get("account"):
+            table.add_row("Account", self.params["account"])
+        if self.params.get("work_dir"):
+            table.add_row("Working Directory", self.params["work_dir"])
+        if self.params.get("resource_type"):
+            table.add_row("Resource Type", self.params["resource_type"])
+        if self.params.get("partition"):
+            table.add_row("Partition", self.params["partition"])
+        if self.params.get("qos"):
+            table.add_row("QoS", self.params["qos"])
         table.add_row("Time Limit", self.params["time"])
         table.add_row("Num Nodes", self.params["num_nodes"])
         table.add_row("GPUs/Node", self.params["gpus_per_node"])
@@ -79,6 +87,80 @@ class LaunchResponseFormatter:
         for arg, value in self.params["vllm_args"].items():
             table.add_row(f"  {arg}:", str(value))
+        # Add Environment Variable Configuration Details
+        table.add_row("Environment Variables", style="magenta")
+        for arg, value in self.params["env"].items():
+            table.add_row(f"  {arg}:", str(value))
+        return table
+class BatchLaunchResponseFormatter:
+    """CLI Helper class for formatting BatchLaunchResponse.
+    A formatter class that handles the presentation of batch launch information
+    in both table and JSON formats.
+    Parameters
+    ----------
+    params : dict[str, Any]
+        Configuration for the batch launch
+    """
+    def __init__(self, params: dict[str, Any]):
+        self.params = params
+    def format_table_output(self) -> Table:
+        """Format output as rich Table.
+        Returns
+        -------
+        Table
+            Rich table containing formatted batch launch information including:
+            - Job configuration
+            - Model details
+            - Resource allocation
+            - vLLM configuration
+        """
+        table = create_table(key_title="Job Config", value_title="Value")
+        # Add key information with consistent styling
+        table.add_row("Slurm Job ID", self.params["slurm_job_id"], style="blue")
+        table.add_row("Slurm Job Name", self.params["slurm_job_name"], style="blue")
+        if self.params.get("account"):
+            table.add_row("Account", self.params["account"], style="blue")
+        if self.params.get("work_dir"):
+            table.add_row("Working Directory", self.params["work_dir"], style="blue")
+        table.add_row("Log Directory", self.params["log_dir"], style="blue")
+        for model_name in self.params["models"]:
+            table.add_row("Model Name", model_name, style="magenta")
+            # Add resource allocation details
+            if self.params["models"][model_name].get("resource_type"):
+                table.add_row(
+                    "Resource Type",
+                    f"  {self.params['models'][model_name]['resource_type']}",
+                )
+            if self.params["models"][model_name].get("partition"):
+                table.add_row(
+                    "Partition", f"  {self.params['models'][model_name]['partition']}"
+                )
+            if self.params["models"][model_name].get("qos"):
+                table.add_row("QoS", f"  {self.params['models'][model_name]['qos']}")
+            table.add_row(
+                "Time Limit", f"  {self.params['models'][model_name]['time']}"
+            )
+            table.add_row(
+                "Num Nodes", f"  {self.params['models'][model_name]['num_nodes']}"
+            )
+            table.add_row(
+                "GPUs/Node", f"  {self.params['models'][model_name]['gpus_per_node']}"
+            )
+            table.add_row(
+                "CPUs/Task", f"  {self.params['models'][model_name]['cpus_per_task']}"
+            )
+            table.add_row(
+                "Memory/Node", f"  {self.params['models'][model_name]['mem_per_node']}"
+            )
         return table
@@ -116,7 +198,8 @@ class StatusResponseFormatter:
             json_data["pending_reason"] = self.status_info.pending_reason
         if self.status_info.failed_reason:
             json_data["failed_reason"] = self.status_info.failed_reason
-        click.echo(json_data)
+        click.echo(json.dumps(json_data, indent=4))
     def output_table(self) -> Table:
         """Create and display rich table.
@@ -292,9 +375,7 @@ class ListCmdDisplay:
         self.model_config = None
         self.model_names: list[str] = []
-    def _format_single_model_output(
-        self, config: ModelConfig
-    ) -> Union[dict[str, Any], Table]:
+    def _format_single_model_output(self, config: ModelConfig) -> Union[str, Table]:
         """Format output table for a single model.
         Parameters
@@ -304,8 +385,8 @@ class ListCmdDisplay:
         Returns
         -------
-        Union[dict[str, Any], Table]
-            Either a dictionary for JSON output or a Rich table
+        Union[str, Table]
+            Either a JSON string for JSON output or a Rich table
         """
         if self.json_mode:
             # Exclude non-essential fields from JSON output
@@ -315,11 +396,11 @@ class ListCmdDisplay:
             config_dict["model_weights_parent_dir"] = str(
                 config_dict["model_weights_parent_dir"]
             )
-            return config_dict
+            return json.dumps(config_dict, indent=4)
         table = create_table(key_title="Model Config", value_title="Value")
         for field, value in config.model_dump().items():
-            if field not in {"venv", "log_dir", "vllm_args"}:
+            if field not in {"venv", "log_dir", "vllm_args"} and value:
                 table.add_row(field, str(value))
             if field == "vllm_args":
                 table.add_row("vLLM Arguments:", style="magenta")
@@ -394,7 +475,7 @@ class ListCmdDisplay:
         """
         if self.json_mode:
             model_names = [info.name for info in model_infos]
-            click.echo(model_names)
+            click.echo(json.dumps(model_names, indent=4))
         else:
             panels = self._format_all_models_output(model_infos)
             self.console.print(Columns(panels, equal=True))

vec-inf 0.6.1__py3-none-any.whl → 0.7.1__py3-none-any.whl

vec-inf 0.6.1py3-none-any.whl → 0.7.1py3-none-any.whl