PyPI - vec-inf - Versions diffs - 0.7.3__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

vec-inf 0.7.3py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

vec_inf/cli/_cli.py +19 -3
vec_inf/cli/_helper.py +23 -12
vec_inf/cli/_vars.py +37 -22
vec_inf/client/_client_vars.py +31 -1
vec_inf/client/_helper.py +157 -44
vec_inf/client/_slurm_script_generator.py +87 -30
vec_inf/client/_slurm_templates.py +104 -40
vec_inf/client/_slurm_vars.py +13 -4
vec_inf/client/_utils.py +10 -7
vec_inf/client/config.py +17 -7
vec_inf/client/models.py +25 -19
vec_inf/config/README.md +1 -1
vec_inf/config/environment.yaml +9 -2
vec_inf/config/models.yaml +223 -364
{vec_inf-0.7.3.dist-info → vec_inf-0.8.1.dist-info}/METADATA +19 -15
vec_inf-0.8.1.dist-info/RECORD +27 -0
vec_inf-0.7.3.dist-info/RECORD +0 -27
{vec_inf-0.7.3.dist-info → vec_inf-0.8.1.dist-info}/WHEEL +0 -0
{vec_inf-0.7.3.dist-info → vec_inf-0.8.1.dist-info}/entry_points.txt +0 -0
{vec_inf-0.7.3.dist-info → vec_inf-0.8.1.dist-info}/licenses/LICENSE +0 -0

vec_inf/client/_slurm_script_generator.py CHANGED Viewed

@@ -1,7 +1,7 @@
-"""Class for generating Slurm scripts to run vLLM servers.
+"""Class for generating Slurm scripts to run inference servers.
-This module provides functionality to generate Slurm scripts for running vLLM servers
-in both single-node and multi-node configurations.
+This module provides functionality to generate Slurm scripts for running inference
+servers in both single-node and multi-node configurations.
 """
 from datetime import datetime
@@ -14,11 +14,11 @@ from vec_inf.client._slurm_templates import (
     BATCH_SLURM_SCRIPT_TEMPLATE,
     SLURM_SCRIPT_TEMPLATE,
 )
-from vec_inf.client._slurm_vars import CONTAINER_MODULE_NAME
+from vec_inf.client._slurm_vars import CONTAINER_MODULE_NAME, IMAGE_PATH
 class SlurmScriptGenerator:
-    """A class to generate Slurm scripts for running vLLM servers.
+    """A class to generate Slurm scripts for running inference servers.
     This class handles the generation of Slurm scripts for both single-node and
     multi-node configurations, supporting different virtualization environments
@@ -32,6 +32,7 @@ class SlurmScriptGenerator:
     def __init__(self, params: dict[str, Any]):
         self.params = params
+        self.engine = params.get("engine", "vllm")
         self.is_multinode = int(self.params["num_nodes"]) > 1
         self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
         self.additional_binds = (
@@ -111,6 +112,7 @@ class SlurmScriptGenerator:
             server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"]))
             server_script.append(
                 SLURM_SCRIPT_TEMPLATE["bind_path"].format(
+                    work_dir=self.params.get("work_dir", str(Path.home())),
                     model_weights_path=self.model_weights_path,
                     additional_binds=self.additional_binds,
                 )
@@ -123,16 +125,17 @@ class SlurmScriptGenerator:
         server_script.append(
             SLURM_SCRIPT_TEMPLATE["imports"].format(src_dir=self.params["src_dir"])
         )
-        if self.is_multinode:
+        if self.is_multinode and self.engine == "vllm":
             server_setup_str = "\n".join(
-                SLURM_SCRIPT_TEMPLATE["server_setup"]["multinode"]
+                SLURM_SCRIPT_TEMPLATE["server_setup"]["multinode_vllm"]
             ).format(gpus_per_node=self.params["gpus_per_node"])
             if self.use_container:
                 server_setup_str = server_setup_str.replace(
                     "CONTAINER_PLACEHOLDER",
                     SLURM_SCRIPT_TEMPLATE["container_command"].format(
-                        model_weights_path=self.model_weights_path,
                         env_str=self.env_str,
+                        image_path=IMAGE_PATH[self.engine],
                     ),
                 )
             else:
@@ -140,12 +143,16 @@ class SlurmScriptGenerator:
                     "CONTAINER_PLACEHOLDER",
                     "\\",
                 )
+        elif self.is_multinode and self.engine == "sglang":
+            server_setup_str = "\n".join(
+                SLURM_SCRIPT_TEMPLATE["server_setup"]["multinode_sglang"]
+            )
         else:
             server_setup_str = "\n".join(
                 SLURM_SCRIPT_TEMPLATE["server_setup"]["single_node"]
             )
         server_script.append(server_setup_str)
-        server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["find_vllm_port"]))
+        server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["find_server_port"]))
         server_script.append(
             "\n".join(SLURM_SCRIPT_TEMPLATE["write_to_json"]).format(
                 log_dir=self.params["log_dir"], model_name=self.params["model_name"]
@@ -154,38 +161,85 @@ class SlurmScriptGenerator:
         return "\n".join(server_script)
     def _generate_launch_cmd(self) -> str:
-        """Generate the vLLM server launch command.
+        """Generate the inference server launch command.
-        Creates the command to launch the vLLM server, handling different virtualization
-        environments (venv or singularity/apptainer).
+        Creates the command to launch the inference server, handling different
+        virtualization environments (venv or singularity/apptainer).
         Returns
         -------
         str
             Server launch command.
         """
-        launcher_script = ["\n"]
+        if self.is_multinode and self.engine == "sglang":
+            return self._generate_multinode_sglang_launch_cmd()
+        launch_cmd = ["\n"]
         if self.use_container:
-            launcher_script.append(
+            launch_cmd.append(
                 SLURM_SCRIPT_TEMPLATE["container_command"].format(
-                    model_weights_path=self.model_weights_path,
                     env_str=self.env_str,
+                    image_path=IMAGE_PATH[self.engine],
                 )
             )
-        launcher_script.append(
-            "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"]).format(
+        launch_cmd.append(
+            "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"][self.engine]).format(  # type: ignore[literal-required]
                 model_weights_path=self.model_weights_path,
                 model_name=self.params["model_name"],
             )
         )
-        for arg, value in self.params["vllm_args"].items():
+        for arg, value in self.params["engine_args"].items():
+            if isinstance(value, bool):
+                launch_cmd.append(f"    {arg} \\")
+            else:
+                launch_cmd.append(f"    {arg} {value} \\")
+        # A known bug in vLLM requires setting backend to ray for multi-node
+        # Remove this when the bug is fixed
+        if self.is_multinode:
+            launch_cmd.append("    --distributed-executor-backend ray \\")
+        return "\n".join(launch_cmd).rstrip(" \\")
+    def _generate_multinode_sglang_launch_cmd(self) -> str:
+        """Generate the launch command for multi-node sglang setup.
+        Returns
+        -------
+        str
+            Multi-node sglang launch command.
+        """
+        launch_cmd = "\n" + "\n".join(
+            SLURM_SCRIPT_TEMPLATE["launch_cmd"]["sglang_multinode"]
+        ).format(
+            num_nodes=self.params["num_nodes"],
+            model_weights_path=self.model_weights_path,
+            model_name=self.params["model_name"],
+        )
+        container_placeholder = "\\"
+        if self.use_container:
+            container_placeholder = SLURM_SCRIPT_TEMPLATE["container_command"].format(
+                env_str=self.env_str,
+                image_path=IMAGE_PATH[self.engine],
+            )
+        launch_cmd = launch_cmd.replace(
+            "CONTAINER_PLACEHOLDER",
+            container_placeholder,
+        )
+        engine_arg_str = ""
+        for arg, value in self.params["engine_args"].items():
             if isinstance(value, bool):
-                launcher_script.append(f"    {arg} \\")
+                engine_arg_str += f"            {arg} \\\n"
             else:
-                launcher_script.append(f"    {arg} {value} \\")
-        return "\n".join(launcher_script)
+                engine_arg_str += f"            {arg} {value} \\\n"
+        return launch_cmd.replace(
+            "SGLANG_ARGS_PLACEHOLDER", engine_arg_str.rstrip("\\\n")
+        )
     def write_to_log_dir(self) -> Path:
         """Write the generated Slurm script to the log directory.
@@ -212,7 +266,7 @@ class BatchSlurmScriptGenerator:
     """A class to generate Slurm scripts for batch mode.
     This class handles the generation of Slurm scripts for batch mode, which
-    launches multiple vLLM servers with different configurations in parallel.
+    launches multiple inference servers with different configurations in parallel.
     """
     def __init__(self, params: dict[str, Any]):
@@ -246,7 +300,7 @@ class BatchSlurmScriptGenerator:
         return script_path
     def _generate_model_launch_script(self, model_name: str) -> Path:
-        """Generate the bash script for launching individual vLLM servers.
+        """Generate the bash script for launching individual inference servers.
         Parameters
         ----------
@@ -256,7 +310,7 @@ class BatchSlurmScriptGenerator:
         Returns
         -------
         Path
-            The bash script path for launching the vLLM server.
+            The bash script path for launching the inference server.
         """
         # Generate the bash script content
         script_content = []
@@ -266,6 +320,7 @@ class BatchSlurmScriptGenerator:
             script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"])
         script_content.append(
             BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format(
+                work_dir=self.params.get("work_dir", str(Path.home())),
                 model_weights_path=model_params["model_weights_path"],
                 additional_binds=model_params["additional_binds"],
             )
@@ -286,21 +341,23 @@ class BatchSlurmScriptGenerator:
         if self.use_container:
             script_content.append(
                 BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format(
-                    model_weights_path=model_params["model_weights_path"],
+                    image_path=IMAGE_PATH[model_params["engine"]],
                 )
             )
         script_content.append(
-            "\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"]).format(
+            "\n".join(
+                BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"][model_params["engine"]]
+            ).format(
                 model_weights_path=model_params["model_weights_path"],
                 model_name=model_name,
             )
         )
-        for arg, value in model_params["vllm_args"].items():
+        for arg, value in model_params["engine_args"].items():
             if isinstance(value, bool):
                 script_content.append(f"    {arg} \\")
             else:
                 script_content.append(f"    {arg} {value} \\")
-        script_content[-1] = script_content[-1].replace("\\", "")
+        script_content[-1] = script_content[-1].rstrip(" \\")
         # Write the bash script to the log directory
         launch_script_path = self._write_to_log_dir(
             script_content, f"launch_{model_name}.sh"
@@ -339,12 +396,12 @@ class BatchSlurmScriptGenerator:
         return "\n".join(shebang)
     def generate_batch_slurm_script(self) -> Path:
-        """Generate the Slurm script for launching multiple vLLM servers in batch mode.
+        """Generate the Slurm script for launching multiple inference servers in batch.
         Returns
         -------
         Path
-            The Slurm script for launching multiple vLLM servers in batch mode.
+            The Slurm script for launching multiple inference servers in batch.
         """
         script_content = []

vec_inf/client/_slurm_templates.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing import TypedDict
 from vec_inf.client._slurm_vars import (
     CONTAINER_LOAD_CMD,
     CONTAINER_MODULE_NAME,
-    IMAGE_PATH,
+    PYTHON_VERSION,
 )
@@ -38,12 +38,33 @@ class ServerSetupConfig(TypedDict):
     ----------
     single_node : list[str]
         Setup commands for single-node deployments
-    multinode : list[str]
-        Setup commands for multi-node deployments, including Ray initialization
+    multinode_vllm : list[str]
+        Setup commands for multi-node vLLM deployments
+    multinode_sglang : list[str]
+        Setup commands for multi-node SGLang deployments
     """
     single_node: list[str]
-    multinode: list[str]
+    multinode_vllm: list[str]
+    multinode_sglang: list[str]
+class LaunchCmdConfig(TypedDict):
+    """TypedDict for launch command configuration.
+    Parameters
+    ----------
+    vllm : list[str]
+        Launch commands for vLLM inference server
+    sglang : list[str]
+        Launch commands for SGLang inference server
+    sglang_multinode : list[str]
+        Launch commands for multi-node SGLang inference server
+    """
+    vllm: list[str]
+    sglang: list[str]
+    sglang_multinode: list[str]
 class SlurmScriptTemplate(TypedDict):
@@ -65,12 +86,12 @@ class SlurmScriptTemplate(TypedDict):
         Template for virtual environment activation
     server_setup : ServerSetupConfig
         Server initialization commands for different deployment modes
-    find_vllm_port : list[str]
-        Commands to find available ports for vLLM server
+    find_server_port : list[str]
+        Commands to find available ports for inference server
     write_to_json : list[str]
         Commands to write server configuration to JSON
-    launch_cmd : list[str]
-        vLLM server launch commands
+    launch_cmd : LaunchCmdConfig
+        Inference server launch commands
     """
     shebang: ShebangConfig
@@ -80,33 +101,31 @@ class SlurmScriptTemplate(TypedDict):
     container_command: str
     activate_venv: str
     server_setup: ServerSetupConfig
-    find_vllm_port: list[str]
+    find_server_port: list[str]
     write_to_json: list[str]
-    launch_cmd: list[str]
+    launch_cmd: LaunchCmdConfig
 SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
     "shebang": {
         "base": "#!/bin/bash",
         "multinode": [
-            "#SBATCH --exclusive",
-            "#SBATCH --tasks-per-node=1",
+            "#SBATCH --ntasks-per-node=1",
         ],
     },
     "container_setup": [
         CONTAINER_LOAD_CMD,
-        f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop",
     ],
     "imports": "source {src_dir}/find_port.sh",
-    "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
-    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {IMAGE_PATH} \\",
+    "bind_path": f"export {CONTAINER_MODULE_NAME_UPPER}_BINDPATH=${CONTAINER_MODULE_NAME_UPPER}_BINDPATH,/dev,/tmp,{{work_dir}}/.vec-inf-cache:$HOME/.cache,{{model_weights_path}}{{additional_binds}}",
+    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {{image_path}} \\",
     "activate_venv": "source {venv}/bin/activate",
     "server_setup": {
         "single_node": [
             "\n# Find available port",
-            "head_node_ip=${SLURMD_NODENAME}",
+            "head_node=${SLURMD_NODENAME}",
         ],
-        "multinode": [
+        "multinode_vllm": [
             "\n# Get list of nodes",
             'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
             "nodes_array=($nodes)",
@@ -130,7 +149,7 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
             "   fi",
             "fi",
             "\n# Start Ray head node",
-            "head_node_port=$(find_available_port $head_node_ip 8080 65535)",
+            "head_node_port=$(find_available_port $head_node 8080 65535)",
             "ray_head=$head_node_ip:$head_node_port",
             'echo "Ray Head IP: $ray_head"',
             'echo "Starting HEAD at $head_node"',
@@ -151,10 +170,19 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
             "    sleep 5",
             "done",
         ],
+        "multinode_sglang": [
+            "\n# Set NCCL initialization address using the hostname of the head node",
+            'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
+            "nodes_array=($nodes)",
+            "head_node=${nodes_array[0]}",
+            "NCCL_PORT=$(find_available_port $head_node 8000 65535)",
+            'NCCL_INIT_ADDR="${head_node}:${NCCL_PORT}"',
+            'echo "[INFO] NCCL_INIT_ADDR: $NCCL_INIT_ADDR"',
+        ],
     },
-    "find_vllm_port": [
-        "\nvllm_port_number=$(find_available_port $head_node_ip 8080 65535)",
-        'server_address="http://${head_node_ip}:${vllm_port_number}/v1"',
+    "find_server_port": [
+        "\nserver_port_number=$(find_available_port $head_node 8080 65535)",
+        'server_address="http://${head_node}:${server_port_number}/v1"',
     ],
     "write_to_json": [
         '\njson_path="{log_dir}/{model_name}.$SLURM_JOB_ID/{model_name}.$SLURM_JOB_ID.json"',
@@ -163,12 +191,39 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
         '    "$json_path" > temp.json \\',
         '    && mv temp.json "$json_path"',
     ],
-    "launch_cmd": [
-        "vllm serve {model_weights_path} \\",
-        "    --served-model-name {model_name} \\",
-        '    --host "0.0.0.0" \\',
-        "    --port $vllm_port_number \\",
-    ],
+    "launch_cmd": {
+        "vllm": [
+            "vllm serve {model_weights_path} \\",
+            "    --served-model-name {model_name} \\",
+            '    --host "0.0.0.0" \\',
+            "    --port $server_port_number \\",
+        ],
+        "sglang": [
+            f"{PYTHON_VERSION} -m sglang.launch_server \\",
+            "    --model-path {model_weights_path} \\",
+            "    --served-model-name {model_name} \\",
+            '    --host "0.0.0.0" \\',
+            "    --port $server_port_number \\",
+        ],
+        "sglang_multinode": [
+            "for ((i = 0; i < $SLURM_JOB_NUM_NODES; i++)); do",
+            "    node_i=${{nodes_array[$i]}}",
+            '    echo "Launching SGLang server on $node_i"',
+            '    srun --ntasks=1 --nodes=1 -w "$node_i" \\',
+            "    CONTAINER_PLACEHOLDER",
+            f"       {PYTHON_VERSION} -m sglang.launch_server \\",
+            "            --model-path {model_weights_path} \\",
+            "            --served-model-name {model_name} \\",
+            '            --host "0.0.0.0" \\',
+            "            --port $server_port_number \\",
+            '            --nccl-init-addr "$NCCL_INIT_ADDR" \\',
+            "            --nnodes {num_nodes} \\",
+            '            --node-rank "$i" \\',
+            "SGLANG_ARGS_PLACEHOLDER &",
+            "done",
+            "\nwait",
+        ],
+    },
 }
@@ -184,7 +239,7 @@ class BatchSlurmScriptTemplate(TypedDict):
     permission_update : str
         Command to update permissions of the script
     launch_model_scripts : list[str]
-        Commands to launch the vLLM server
+        Commands to run server launch scripts
     """
     shebang: str
@@ -220,7 +275,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
     server_address_setup : list[str]
         Commands to setup the server address
     launch_cmd : list[str]
-        Commands to launch the vLLM server
+        Commands to launch the inference server
     container_command : str
         Commands to setup the container command
     """
@@ -230,19 +285,19 @@ class BatchModelLaunchScriptTemplate(TypedDict):
     bind_path: str
     server_address_setup: list[str]
     write_to_json: list[str]
-    launch_cmd: list[str]
+    launch_cmd: dict[str, list[str]]
     container_command: str
 BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
     "shebang": "#!/bin/bash\n",
     "container_setup": f"{CONTAINER_LOAD_CMD}\n",
-    "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
+    "bind_path": f"export {CONTAINER_MODULE_NAME_UPPER}_BINDPATH=${CONTAINER_MODULE_NAME_UPPER}_BINDPATH,/dev,/tmp,{{work_dir}}/.vec-inf-cache:$HOME/.cache,{{model_weights_path}}{{additional_binds}}",
     "server_address_setup": [
         "source {src_dir}/find_port.sh",
         "head_node_ip=${{SLURMD_NODENAME}}",
-        "vllm_port_number=$(find_available_port $head_node_ip 8080 65535)",
-        'server_address="http://${{head_node_ip}}:${{vllm_port_number}}/v1"\n',
+        "server_port_number=$(find_available_port $head_node_ip 8080 65535)",
+        'server_address="http://${{head_node_ip}}:${{server_port_number}}/v1"\n',
         "echo $server_address\n",
     ],
     "write_to_json": [
@@ -253,11 +308,20 @@ BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
         '    "$json_path" > temp_{model_name}.json \\',
         '    && mv temp_{model_name}.json "$json_path"\n',
     ],
-    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --containall {IMAGE_PATH} \\",
-    "launch_cmd": [
-        "vllm serve {model_weights_path} \\",
-        "    --served-model-name {model_name} \\",
-        '    --host "0.0.0.0" \\',
-        "    --port $vllm_port_number \\",
-    ],
+    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --containall {{image_path}} \\",
+    "launch_cmd": {
+        "vllm": [
+            "vllm serve {model_weights_path} \\",
+            "    --served-model-name {model_name} \\",
+            '    --host "0.0.0.0" \\',
+            "    --port $server_port_number \\",
+        ],
+        "sglang": [
+            f"{PYTHON_VERSION} -m sglang.launch_server \\",
+            "    --model-path {model_weights_path} \\",
+            "    --served-model-name {model_name} \\",
+            '    --host "0.0.0.0" \\',
+            "    --port $server_port_number \\",
+        ],
+    },
 }

vec_inf/client/_slurm_vars.py CHANGED Viewed

@@ -52,7 +52,11 @@ def load_env_config() -> dict[str, Any]:
 _config = load_env_config()
 # Extract path values
-IMAGE_PATH = _config["paths"]["image_path"]
+IMAGE_PATH = {
+    "vllm": _config["paths"]["vllm_image_path"],
+    "sglang": _config["paths"]["sglang_image_path"],
+}
+CACHED_MODEL_CONFIG_PATH = Path(_config["paths"]["cached_model_config_path"])
 # Extract containerization info
 CONTAINER_LOAD_CMD = _config["containerization"]["module_load_cmd"]
@@ -78,9 +82,14 @@ RESOURCE_TYPE: TypeAlias = create_literal_type(  # type: ignore[valid-type]
     _config["allowed_values"]["resource_type"]
 )
-# Extract required arguments, for launching jobs that don't have a default value and
-# their corresponding environment variables
-REQUIRED_ARGS: dict[str, str] = _config["required_args"]
+# Model types available derived from the cached model config
+MODEL_TYPES: TypeAlias = create_literal_type(_config["model_types"])  # type: ignore[valid-type]
+# Required arguments for launching jobs and corresponding environment variables
+REQUIRED_ARGS: dict[str, str | None] = _config["required_args"]
+# Running sglang requires python version
+PYTHON_VERSION: str = _config["python_version"]
 # Extract default arguments
 DEFAULT_ARGS: dict[str, str] = _config["default_args"]

vec_inf/client/_utils.py CHANGED Viewed

@@ -16,7 +16,7 @@ import yaml
 from vec_inf.client._client_vars import MODEL_READY_SIGNATURE
 from vec_inf.client._exceptions import MissingRequiredFieldsError
-from vec_inf.client._slurm_vars import CACHED_CONFIG_DIR, REQUIRED_ARGS
+from vec_inf.client._slurm_vars import CACHED_MODEL_CONFIG_PATH, REQUIRED_ARGS
 from vec_inf.client.config import ModelConfig
 from vec_inf.client.models import ModelStatus
@@ -77,7 +77,7 @@ def read_slurm_log(
                 json_content: dict[str, str] = json.load(file)
                 return json_content
         else:
-            with file_path.open("r") as file:
+            with file_path.open("r", errors="replace") as file:
                 return file.readlines()
     except FileNotFoundError:
         return f"LOG FILE NOT FOUND: {file_path}"
@@ -249,7 +249,7 @@ def load_config(config_path: Optional[str] = None) -> list[ModelConfig]:
     -----
     Configuration is loaded from:
     1. User path: specified by config_path
-    2. Default path: package's config/models.yaml or CACHED_CONFIG if it exists
+    2. Default path: package's config/models.yaml or CACHED_MODEL_CONFIG_PATH if exists
     3. Environment variable: specified by VEC_INF_CONFIG environment variable
         and merged with default config
@@ -303,8 +303,8 @@ def load_config(config_path: Optional[str] = None) -> list[ModelConfig]:
     # 2. Otherwise, load default config
     default_path = (
-        CACHED_CONFIG_DIR / "models.yaml"
-        if CACHED_CONFIG_DIR.exists()
+        CACHED_MODEL_CONFIG_PATH
+        if CACHED_MODEL_CONFIG_PATH.exists()
         else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
     )
     config = load_yaml_config(default_path)
@@ -444,10 +444,13 @@ def check_required_fields(params: dict[str, Any]) -> dict[str, Any]:
     params : dict[str, Any]
         Dictionary of parameters to check.
     """
-    env_overrides = {}
+    env_overrides: dict[str, str] = {}
+    if not REQUIRED_ARGS:
+        return env_overrides
     for arg in REQUIRED_ARGS:
         if not params.get(arg):
-            default_value = os.getenv(REQUIRED_ARGS[arg])
+            default_value = os.getenv(str(REQUIRED_ARGS[arg]))
             if default_value:
                 params[arg] = default_value
                 env_overrides[arg] = default_value

vec_inf/client/config.py CHANGED Viewed

@@ -8,13 +8,13 @@ from pathlib import Path
 from typing import Any, Optional, Union
 from pydantic import BaseModel, ConfigDict, Field
-from typing_extensions import Literal
 from vec_inf.client._slurm_vars import (
     DEFAULT_ARGS,
     MAX_CPUS_PER_TASK,
     MAX_GPUS_PER_NODE,
     MAX_NUM_NODES,
+    MODEL_TYPES,
     PARTITION,
     QOS,
     RESOURCE_TYPE,
@@ -66,8 +66,12 @@ class ModelConfig(BaseModel):
         Directory path for storing logs
     model_weights_parent_dir : Path, optional
         Base directory containing model weights
+    engine: str, optional
+        Inference engine to be used, supports 'vllm' and 'sglang'
     vllm_args : dict[str, Any], optional
         Additional arguments for vLLM engine configuration
+    sglang_args : dict[str, Any], optional
+        Additional arguments for SGLang engine configuration
     Notes
     -----
@@ -75,14 +79,16 @@ class ModelConfig(BaseModel):
     configured to be immutable (frozen) and forbids extra fields.
     """
+    model_config = ConfigDict(
+        extra="ignore", str_strip_whitespace=True, validate_default=True, frozen=True
+    )
     model_name: str = Field(..., min_length=3, pattern=r"^[a-zA-Z0-9\-_\.]+$")
     model_family: str = Field(..., min_length=2)
     model_variant: Optional[str] = Field(
         default=None, description="Specific variant/version of the model family"
     )
-    model_type: Literal["LLM", "VLM", "Text_Embedding", "Reward_Modeling"] = Field(
-        ..., description="Type of model architecture"
-    )
+    model_type: MODEL_TYPES = Field(..., description="Type of model architecture")
     gpus_per_node: int = Field(
         ..., gt=0, le=MAX_GPUS_PER_NODE, description="GPUs per node"
     )
@@ -148,12 +154,16 @@ class ModelConfig(BaseModel):
         default=Path(DEFAULT_ARGS["model_weights_parent_dir"]),
         description="Base directory for model weights",
     )
+    engine: Optional[str] = Field(
+        default="vllm",
+        description="Inference engine to be used, supports 'vllm' and 'sglang'",
+    )
     vllm_args: Optional[dict[str, Any]] = Field(
         default={}, description="vLLM engine arguments"
     )
+    sglang_args: Optional[dict[str, Any]] = Field(
+        default={}, description="SGLang engine arguments"
+    )
     env: Optional[dict[str, Any]] = Field(
         default={}, description="Environment variables to be set"
     )
-    model_config = ConfigDict(
-        extra="forbid", str_strip_whitespace=True, validate_default=True, frozen=True
-    )

vec-inf 0.7.3__py3-none-any.whl → 0.8.1__py3-none-any.whl

vec-inf 0.7.3py3-none-any.whl → 0.8.1py3-none-any.whl