PyPI - vec-inf - Versions diffs - 0.7.2__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

vec-inf 0.7.2py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

vec_inf/README.md +2 -1
vec_inf/cli/_cli.py +43 -12
vec_inf/cli/_helper.py +79 -12
vec_inf/cli/_vars.py +37 -22
vec_inf/client/_client_vars.py +31 -1
vec_inf/client/_helper.py +154 -49
vec_inf/client/_slurm_script_generator.py +109 -43
vec_inf/client/_slurm_templates.py +110 -48
vec_inf/client/_slurm_vars.py +13 -4
vec_inf/client/_utils.py +13 -7
vec_inf/client/api.py +47 -0
vec_inf/client/config.py +17 -7
vec_inf/client/models.py +25 -19
vec_inf/config/README.md +1 -1
vec_inf/config/environment.yaml +9 -2
vec_inf/config/models.yaml +184 -368
vec_inf/find_port.sh +10 -1
{vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/METADATA +17 -16
vec_inf-0.8.0.dist-info/RECORD +27 -0
{vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/WHEEL +1 -1
vec_inf-0.7.2.dist-info/RECORD +0 -27
{vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/entry_points.txt +0 -0
{vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/licenses/LICENSE +0 -0

vec_inf/client/_slurm_templates.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing import TypedDict
 from vec_inf.client._slurm_vars import (
     CONTAINER_LOAD_CMD,
     CONTAINER_MODULE_NAME,
-    IMAGE_PATH,
+    PYTHON_VERSION,
 )
@@ -38,12 +38,33 @@ class ServerSetupConfig(TypedDict):
     ----------
     single_node : list[str]
         Setup commands for single-node deployments
-    multinode : list[str]
-        Setup commands for multi-node deployments, including Ray initialization
+    multinode_vllm : list[str]
+        Setup commands for multi-node vLLM deployments
+    multinode_sglang : list[str]
+        Setup commands for multi-node SGLang deployments
     """
     single_node: list[str]
-    multinode: list[str]
+    multinode_vllm: list[str]
+    multinode_sglang: list[str]
+class LaunchCmdConfig(TypedDict):
+    """TypedDict for launch command configuration.
+    Parameters
+    ----------
+    vllm : list[str]
+        Launch commands for vLLM inference server
+    sglang : list[str]
+        Launch commands for SGLang inference server
+    sglang_multinode : list[str]
+        Launch commands for multi-node SGLang inference server
+    """
+    vllm: list[str]
+    sglang: list[str]
+    sglang_multinode: list[str]
 class SlurmScriptTemplate(TypedDict):
@@ -57,56 +78,54 @@ class SlurmScriptTemplate(TypedDict):
         Commands for container setup
     imports : str
         Import statements and source commands
+    bind_path : str
+        Bind path environment variable for the container
     container_command : str
         Template for container execution command
     activate_venv : str
         Template for virtual environment activation
     server_setup : ServerSetupConfig
         Server initialization commands for different deployment modes
-    find_vllm_port : list[str]
-        Commands to find available ports for vLLM server
+    find_server_port : list[str]
+        Commands to find available ports for inference server
     write_to_json : list[str]
         Commands to write server configuration to JSON
-    launch_cmd : list[str]
-        vLLM server launch commands
+    launch_cmd : LaunchCmdConfig
+        Inference server launch commands
     """
     shebang: ShebangConfig
     container_setup: list[str]
     imports: str
-    container_env_vars: list[str]
+    bind_path: str
     container_command: str
     activate_venv: str
     server_setup: ServerSetupConfig
-    find_vllm_port: list[str]
+    find_server_port: list[str]
     write_to_json: list[str]
-    launch_cmd: list[str]
+    launch_cmd: LaunchCmdConfig
 SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
     "shebang": {
         "base": "#!/bin/bash",
         "multinode": [
-            "#SBATCH --exclusive",
-            "#SBATCH --tasks-per-node=1",
+            "#SBATCH --ntasks-per-node=1",
         ],
     },
     "container_setup": [
         CONTAINER_LOAD_CMD,
-        f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop",
     ],
     "imports": "source {src_dir}/find_port.sh",
-    "container_env_vars": [
-        f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp"
-    ],
-    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
+    "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
+    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {{image_path}} \\",
     "activate_venv": "source {venv}/bin/activate",
     "server_setup": {
         "single_node": [
             "\n# Find available port",
-            "head_node_ip=${SLURMD_NODENAME}",
+            "head_node=${SLURMD_NODENAME}",
         ],
-        "multinode": [
+        "multinode_vllm": [
             "\n# Get list of nodes",
             'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
             "nodes_array=($nodes)",
@@ -130,7 +149,7 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
             "   fi",
             "fi",
             "\n# Start Ray head node",
-            "head_node_port=$(find_available_port $head_node_ip 8080 65535)",
+            "head_node_port=$(find_available_port $head_node 8080 65535)",
             "ray_head=$head_node_ip:$head_node_port",
             'echo "Ray Head IP: $ray_head"',
             'echo "Starting HEAD at $head_node"',
@@ -151,10 +170,19 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
             "    sleep 5",
             "done",
         ],
+        "multinode_sglang": [
+            "\n# Set NCCL initialization address using the hostname of the head node",
+            'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
+            "nodes_array=($nodes)",
+            "head_node=${nodes_array[0]}",
+            "NCCL_PORT=$(find_available_port $head_node 8000 65535)",
+            'NCCL_INIT_ADDR="${head_node}:${NCCL_PORT}"',
+            'echo "[INFO] NCCL_INIT_ADDR: $NCCL_INIT_ADDR"',
+        ],
     },
-    "find_vllm_port": [
-        "\nvllm_port_number=$(find_available_port $head_node_ip 8080 65535)",
-        'server_address="http://${head_node_ip}:${vllm_port_number}/v1"',
+    "find_server_port": [
+        "\nserver_port_number=$(find_available_port $head_node 8080 65535)",
+        'server_address="http://${head_node}:${server_port_number}/v1"',
     ],
     "write_to_json": [
         '\njson_path="{log_dir}/{model_name}.$SLURM_JOB_ID/{model_name}.$SLURM_JOB_ID.json"',
@@ -163,12 +191,39 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
         '    "$json_path" > temp.json \\',
         '    && mv temp.json "$json_path"',
     ],
-    "launch_cmd": [
-        "vllm serve {model_weights_path} \\",
-        "    --served-model-name {model_name} \\",
-        '    --host "0.0.0.0" \\',
-        "    --port $vllm_port_number \\",
-    ],
+    "launch_cmd": {
+        "vllm": [
+            "vllm serve {model_weights_path} \\",
+            "    --served-model-name {model_name} \\",
+            '    --host "0.0.0.0" \\',
+            "    --port $server_port_number \\",
+        ],
+        "sglang": [
+            f"{PYTHON_VERSION} -m sglang.launch_server \\",
+            "    --model-path {model_weights_path} \\",
+            "    --served-model-name {model_name} \\",
+            '    --host "0.0.0.0" \\',
+            "    --port $server_port_number \\",
+        ],
+        "sglang_multinode": [
+            "for ((i = 0; i < $SLURM_JOB_NUM_NODES; i++)); do",
+            "    node_i=${{nodes_array[$i]}}",
+            '    echo "Launching SGLang server on $node_i"',
+            '    srun --ntasks=1 --nodes=1 -w "$node_i" \\',
+            "    CONTAINER_PLACEHOLDER",
+            f"       {PYTHON_VERSION} -m sglang.launch_server \\",
+            "            --model-path {model_weights_path} \\",
+            "            --served-model-name {model_name} \\",
+            '            --host "0.0.0.0" \\',
+            "            --port $server_port_number \\",
+            '            --nccl-init-addr "$NCCL_INIT_ADDR" \\',
+            "            --nnodes {num_nodes} \\",
+            '            --node-rank "$i" \\',
+            "SGLANG_ARGS_PLACEHOLDER &",
+            "done",
+            "\nwait",
+        ],
+    },
 }
@@ -184,7 +239,7 @@ class BatchSlurmScriptTemplate(TypedDict):
     permission_update : str
         Command to update permissions of the script
     launch_model_scripts : list[str]
-        Commands to launch the vLLM server
+        Commands to run server launch scripts
     """
     shebang: str
@@ -215,36 +270,34 @@ class BatchModelLaunchScriptTemplate(TypedDict):
         Shebang line for the script
     container_setup : list[str]
         Commands for container setup
-    env_vars : list[str]
-        Environment variables to set
+    bind_path : str
+        Bind path environment variable for the container
     server_address_setup : list[str]
         Commands to setup the server address
     launch_cmd : list[str]
-        Commands to launch the vLLM server
+        Commands to launch the inference server
     container_command : str
         Commands to setup the container command
     """
     shebang: str
     container_setup: str
-    env_vars: list[str]
+    bind_path: str
     server_address_setup: list[str]
     write_to_json: list[str]
-    launch_cmd: list[str]
+    launch_cmd: dict[str, list[str]]
     container_command: str
 BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
     "shebang": "#!/bin/bash\n",
     "container_setup": f"{CONTAINER_LOAD_CMD}\n",
-    "env_vars": [
-        f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g')"
-    ],
+    "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
     "server_address_setup": [
         "source {src_dir}/find_port.sh",
         "head_node_ip=${{SLURMD_NODENAME}}",
-        "vllm_port_number=$(find_available_port $head_node_ip 8080 65535)",
-        'server_address="http://${{head_node_ip}}:${{vllm_port_number}}/v1"\n',
+        "server_port_number=$(find_available_port $head_node_ip 8080 65535)",
+        'server_address="http://${{head_node_ip}}:${{server_port_number}}/v1"\n',
         "echo $server_address\n",
     ],
     "write_to_json": [
@@ -255,11 +308,20 @@ BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
         '    "$json_path" > temp_{model_name}.json \\',
         '    && mv temp_{model_name}.json "$json_path"\n',
     ],
-    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
-    "launch_cmd": [
-        "vllm serve {model_weights_path} \\",
-        "    --served-model-name {model_name} \\",
-        '    --host "0.0.0.0" \\',
-        "    --port $vllm_port_number \\",
-    ],
+    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --containall {{image_path}} \\",
+    "launch_cmd": {
+        "vllm": [
+            "vllm serve {model_weights_path} \\",
+            "    --served-model-name {model_name} \\",
+            '    --host "0.0.0.0" \\',
+            "    --port $server_port_number \\",
+        ],
+        "sglang": [
+            f"{PYTHON_VERSION} -m sglang.launch_server \\",
+            "    --model-path {model_weights_path} \\",
+            "    --served-model-name {model_name} \\",
+            '    --host "0.0.0.0" \\',
+            "    --port $server_port_number \\",
+        ],
+    },
 }

vec_inf/client/_slurm_vars.py CHANGED Viewed

@@ -52,7 +52,11 @@ def load_env_config() -> dict[str, Any]:
 _config = load_env_config()
 # Extract path values
-IMAGE_PATH = _config["paths"]["image_path"]
+IMAGE_PATH = {
+    "vllm": _config["paths"]["vllm_image_path"],
+    "sglang": _config["paths"]["sglang_image_path"],
+}
+CACHED_MODEL_CONFIG_PATH = Path(_config["paths"]["cached_model_config_path"])
 # Extract containerization info
 CONTAINER_LOAD_CMD = _config["containerization"]["module_load_cmd"]
@@ -78,9 +82,14 @@ RESOURCE_TYPE: TypeAlias = create_literal_type(  # type: ignore[valid-type]
     _config["allowed_values"]["resource_type"]
 )
-# Extract required arguments, for launching jobs that don't have a default value and
-# their corresponding environment variables
-REQUIRED_ARGS: dict[str, str] = _config["required_args"]
+# Model types available derived from the cached model config
+MODEL_TYPES: TypeAlias = create_literal_type(_config["model_types"])  # type: ignore[valid-type]
+# Required arguments for launching jobs and corresponding environment variables
+REQUIRED_ARGS: dict[str, str | None] = _config["required_args"]
+# Running sglang requires python version
+PYTHON_VERSION: str = _config["python_version"]
 # Extract default arguments
 DEFAULT_ARGS: dict[str, str] = _config["default_args"]

vec_inf/client/_utils.py CHANGED Viewed

@@ -16,7 +16,7 @@ import yaml
 from vec_inf.client._client_vars import MODEL_READY_SIGNATURE
 from vec_inf.client._exceptions import MissingRequiredFieldsError
-from vec_inf.client._slurm_vars import CACHED_CONFIG_DIR, REQUIRED_ARGS
+from vec_inf.client._slurm_vars import CACHED_MODEL_CONFIG_PATH, REQUIRED_ARGS
 from vec_inf.client.config import ModelConfig
 from vec_inf.client.models import ModelStatus
@@ -77,7 +77,7 @@ def read_slurm_log(
                 json_content: dict[str, str] = json.load(file)
                 return json_content
         else:
-            with file_path.open("r") as file:
+            with file_path.open("r", errors="replace") as file:
                 return file.readlines()
     except FileNotFoundError:
         return f"LOG FILE NOT FOUND: {file_path}"
@@ -249,7 +249,7 @@ def load_config(config_path: Optional[str] = None) -> list[ModelConfig]:
     -----
     Configuration is loaded from:
     1. User path: specified by config_path
-    2. Default path: package's config/models.yaml or CACHED_CONFIG if it exists
+    2. Default path: package's config/models.yaml or CACHED_MODEL_CONFIG_PATH if exists
     3. Environment variable: specified by VEC_INF_CONFIG environment variable
         and merged with default config
@@ -303,8 +303,8 @@ def load_config(config_path: Optional[str] = None) -> list[ModelConfig]:
     # 2. Otherwise, load default config
     default_path = (
-        CACHED_CONFIG_DIR / "models.yaml"
-        if CACHED_CONFIG_DIR.exists()
+        CACHED_MODEL_CONFIG_PATH
+        if CACHED_MODEL_CONFIG_PATH.exists()
         else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
     )
     config = load_yaml_config(default_path)
@@ -436,7 +436,7 @@ def find_matching_dirs(
     return matched
-def check_required_fields(params: dict[str, Any]) -> None:
+def check_required_fields(params: dict[str, Any]) -> dict[str, Any]:
     """Check for required fields without default vals and their corresponding env vars.
     Parameters
@@ -444,12 +444,18 @@ def check_required_fields(params: dict[str, Any]) -> None:
     params : dict[str, Any]
         Dictionary of parameters to check.
     """
+    env_overrides: dict[str, str] = {}
+    if not REQUIRED_ARGS:
+        return env_overrides
     for arg in REQUIRED_ARGS:
         if not params.get(arg):
-            default_value = os.getenv(REQUIRED_ARGS[arg])
+            default_value = os.getenv(str(REQUIRED_ARGS[arg]))
             if default_value:
                 params[arg] = default_value
+                env_overrides[arg] = default_value
             else:
                 raise MissingRequiredFieldsError(
                     f"{arg} is required, please set it in the command arguments or environment variables"
                 )
+    return env_overrides

vec_inf/client/api.py CHANGED Viewed

@@ -10,7 +10,9 @@ vec_inf.client._helper : Helper classes for model inference server management
 vec_inf.client.models : Data models for API responses
 """
+import re
 import shutil
+import subprocess
 import time
 import warnings
 from pathlib import Path
@@ -181,6 +183,51 @@ class VecInfClient:
         )
         return model_launcher.launch()
+    def fetch_running_jobs(self) -> list[str]:
+        """
+        Fetch the list of running vec-inf job IDs for the current user.
+        Returns
+        -------
+        list[str]
+            List of matching job names; empty list if squeue unavailable.
+        """
+        try:
+            res = subprocess.run(
+                ["squeue", "--me", "--noheader"],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            job_ids = [
+                ln.strip().split()[0] for ln in res.stdout.splitlines() if ln.strip()
+            ]
+            if not job_ids:
+                return []
+            # For each job, fetch the full JobName and filter by suffix
+            matching_ids = []
+            for jid in job_ids:
+                try:
+                    sctl = subprocess.run(
+                        ["scontrol", "show", "job", "-o", jid],
+                        capture_output=True,
+                        text=True,
+                        check=True,
+                    )
+                    m = re.search(r"\bJobName=([^\s]+)", sctl.stdout)
+                    if m and m.group(1).endswith("-vec-inf"):
+                        matching_ids.append(jid)
+                except subprocess.CalledProcessError:
+                    # Job might have finished between squeue and scontrol; skip
+                    continue
+            return matching_ids
+        except subprocess.CalledProcessError as e:
+            raise SlurmJobError(f"Error running slurm command: {e}") from e
     def get_status(self, slurm_job_id: str) -> StatusResponse:
         """Get the status of a running model.

vec_inf/client/config.py CHANGED Viewed

@@ -8,13 +8,13 @@ from pathlib import Path
 from typing import Any, Optional, Union
 from pydantic import BaseModel, ConfigDict, Field
-from typing_extensions import Literal
 from vec_inf.client._slurm_vars import (
     DEFAULT_ARGS,
     MAX_CPUS_PER_TASK,
     MAX_GPUS_PER_NODE,
     MAX_NUM_NODES,
+    MODEL_TYPES,
     PARTITION,
     QOS,
     RESOURCE_TYPE,
@@ -66,8 +66,12 @@ class ModelConfig(BaseModel):
         Directory path for storing logs
     model_weights_parent_dir : Path, optional
         Base directory containing model weights
+    engine: str, optional
+        Inference engine to be used, supports 'vllm' and 'sglang'
     vllm_args : dict[str, Any], optional
         Additional arguments for vLLM engine configuration
+    sglang_args : dict[str, Any], optional
+        Additional arguments for SGLang engine configuration
     Notes
     -----
@@ -75,14 +79,16 @@ class ModelConfig(BaseModel):
     configured to be immutable (frozen) and forbids extra fields.
     """
+    model_config = ConfigDict(
+        extra="ignore", str_strip_whitespace=True, validate_default=True, frozen=True
+    )
     model_name: str = Field(..., min_length=3, pattern=r"^[a-zA-Z0-9\-_\.]+$")
     model_family: str = Field(..., min_length=2)
     model_variant: Optional[str] = Field(
         default=None, description="Specific variant/version of the model family"
     )
-    model_type: Literal["LLM", "VLM", "Text_Embedding", "Reward_Modeling"] = Field(
-        ..., description="Type of model architecture"
-    )
+    model_type: MODEL_TYPES = Field(..., description="Type of model architecture")
     gpus_per_node: int = Field(
         ..., gt=0, le=MAX_GPUS_PER_NODE, description="GPUs per node"
     )
@@ -148,12 +154,16 @@ class ModelConfig(BaseModel):
         default=Path(DEFAULT_ARGS["model_weights_parent_dir"]),
         description="Base directory for model weights",
     )
+    engine: Optional[str] = Field(
+        default="vllm",
+        description="Inference engine to be used, supports 'vllm' and 'sglang'",
+    )
     vllm_args: Optional[dict[str, Any]] = Field(
         default={}, description="vLLM engine arguments"
     )
+    sglang_args: Optional[dict[str, Any]] = Field(
+        default={}, description="SGLang engine arguments"
+    )
     env: Optional[dict[str, Any]] = Field(
         default={}, description="Environment variables to be set"
     )
-    model_config = ConfigDict(
-        extra="forbid", str_strip_whitespace=True, validate_default=True, frozen=True
-    )

vec_inf/client/models.py CHANGED Viewed

@@ -25,7 +25,9 @@ ModelInfo : datacitten
 from dataclasses import dataclass, field
 from enum import Enum
-from typing import Any, Optional, Union
+from typing import Any, Optional, Union, get_args
+from vec_inf.client._slurm_vars import MODEL_TYPES
 class ModelStatus(str, Enum):
@@ -55,25 +57,23 @@ class ModelStatus(str, Enum):
     UNAVAILABLE = "UNAVAILABLE"
-class ModelType(str, Enum):
-    """Enum representing the possible model types.
+# Extract model type values from the Literal type
+_MODEL_TYPE_VALUES = get_args(MODEL_TYPES)
+def _model_type_to_enum_name(model_type: str) -> str:
+    """Convert a model type string to a valid enum attribute name."""
+    # Convert to uppercase and replace hyphens with underscores
+    return model_type.upper().replace("-", "_")
-    Attributes
-    ----------
-    LLM : str
-        Large Language Model
-    VLM : str
-        Vision Language Model
-    TEXT_EMBEDDING : str
-        Text Embedding Model
-    REWARD_MODELING : str
-        Reward Modeling Model
-    """
-    LLM = "LLM"
-    VLM = "VLM"
-    TEXT_EMBEDDING = "Text_Embedding"
-    REWARD_MODELING = "Reward_Modeling"
+# Create ModelType enum dynamically from MODEL_TYPES
+ModelType = Enum(  # type: ignore[misc]
+    "ModelType",
+    {_model_type_to_enum_name(mt): mt for mt in _MODEL_TYPE_VALUES},
+    type=str,
+    module=__name__,
+)
 @dataclass
@@ -222,8 +222,12 @@ class LaunchOptions:
         Directory for logs
     model_weights_parent_dir : str, optional
         Parent directory containing model weights
+    engine: str, optional
+        Inference engine to use
     vllm_args : str, optional
-        Additional arguments for vLLM
+        vLLM engine arguments
+    sglang_args : str, optional
+        SGLang engine arguments
     env : str, optional
         Environment variables to be set
     config : str, optional
@@ -250,7 +254,9 @@ class LaunchOptions:
     venv: Optional[str] = None
     log_dir: Optional[str] = None
     model_weights_parent_dir: Optional[str] = None
+    engine: Optional[str] = None
     vllm_args: Optional[str] = None
+    sglang_args: Optional[str] = None
     env: Optional[str] = None
     config: Optional[str] = None

vec_inf/config/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Configs
 * [`environment.yaml`](environment.yaml): Configuration for the Slurm cluster environment, including image paths, resource availabilities, default value, and etc.
-* [`models.yaml`](models.yaml): Configuration for launching model inference servers, including Slurm parameters as well as `vllm serve` arguments.
+* [`models.yaml`](models.yaml): Configuration for launching model inference servers, including Slurm parameters as well as inference engine arguments.
 **NOTE**: These configs acts as last resort fallbacks in the `vec-inf` package, they will be updated to match the latest cached config on the Vector Killarney cluster with each new package version release.

vec_inf/config/environment.yaml CHANGED Viewed

@@ -1,5 +1,8 @@
 paths:
-  image_path: "/model-weights/vec-inf-shared/vector-inference_latest.sif"
+  image_path: "/model-weights/vec-inf-shared/vector-inference_latest.sif" # Maintains backwards compatibility
+  vllm_image_path: "/model-weights/vec-inf-shared/vector-inference-vllm_latest.sif"
+  sglang_image_path: "/model-weights/vec-inf-shared/vector-inference-sglang_latest.sif"
+  cached_model_config_path: "/model-weights/vec-inf-shared/models.yaml"
 containerization:
   module_load_cmd: "module load apptainer"
@@ -19,13 +22,17 @@ required_args:
   account: "VEC_INF_ACCOUNT"
   work_dir: "VEC_INF_WORK_DIR"
+python_version: "python3.12"
+model_types: ["LLM", "VLM", "Text_Embedding", "Reward_Modeling", "OCR"] # Derived from models.yaml
 default_args:
   cpus_per_task: "16"
   mem_per_node: "64G"
   time: "08:00:00"
   qos: ""
   partition: ""
-  resource_type: ""
+  resource_type: "l40s"
   exclude: ""
   nodelist: ""
   bind: ""

vec-inf 0.7.2__py3-none-any.whl → 0.8.0__py3-none-any.whl

vec-inf 0.7.2py3-none-any.whl → 0.8.0py3-none-any.whl