vec-inf 0.5.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vec_inf/cli/_utils.py CHANGED
@@ -1,162 +1,38 @@
1
- """Utility functions for the CLI."""
1
+ """Helper functions for the CLI.
2
2
 
3
- import json
4
- import os
5
- import subprocess
6
- from pathlib import Path
7
- from typing import Any, Optional, Union, cast
3
+ This module provides utility functions for creating consistent table displays
4
+ in the command-line interface.
5
+ """
8
6
 
9
- import requests
10
- import yaml
11
7
  from rich.table import Table
12
8
 
13
- from vec_inf.cli._config import ModelConfig
14
-
15
-
16
- MODEL_READY_SIGNATURE = "INFO: Application startup complete."
17
- CACHED_CONFIG = Path("/", "model-weights", "vec-inf-shared", "models.yaml")
18
-
19
-
20
- def run_bash_command(command: str) -> tuple[str, str]:
21
- """Run a bash command and return the output."""
22
- process = subprocess.Popen(
23
- command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
24
- )
25
- return process.communicate()
26
-
27
-
28
- def read_slurm_log(
29
- slurm_job_name: str,
30
- slurm_job_id: int,
31
- slurm_log_type: str,
32
- log_dir: Optional[Union[str, Path]],
33
- ) -> Union[list[str], str, dict[str, str]]:
34
- """Read the slurm log file."""
35
- if not log_dir:
36
- # Default log directory
37
- models_dir = Path.home() / ".vec-inf-logs"
38
- if not models_dir.exists():
39
- return "LOG DIR NOT FOUND"
40
- # Iterate over all dirs in models_dir, sorted by dir name length in desc order
41
- for directory in sorted(
42
- [d for d in models_dir.iterdir() if d.is_dir()],
43
- key=lambda d: len(d.name),
44
- reverse=True,
45
- ):
46
- if directory.name in slurm_job_name:
47
- log_dir = directory
48
- break
49
- else:
50
- log_dir = Path(log_dir)
51
-
52
- # If log_dir is still not set, then didn't find the log dir at default location
53
- if not log_dir:
54
- return "LOG DIR NOT FOUND"
55
-
56
- try:
57
- file_path = (
58
- log_dir
59
- / Path(f"{slurm_job_name}.{slurm_job_id}")
60
- / f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}"
61
- )
62
- if slurm_log_type == "json":
63
- with file_path.open("r") as file:
64
- json_content: dict[str, str] = json.load(file)
65
- return json_content
66
- else:
67
- with file_path.open("r") as file:
68
- return file.readlines()
69
- except FileNotFoundError:
70
- return f"LOG FILE NOT FOUND: {file_path}"
71
-
72
-
73
- def is_server_running(
74
- slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
75
- ) -> Union[str, tuple[str, str]]:
76
- """Check if a model is ready to serve requests."""
77
- log_content = read_slurm_log(slurm_job_name, slurm_job_id, "err", log_dir)
78
- if isinstance(log_content, str):
79
- return log_content
80
-
81
- status: Union[str, tuple[str, str]] = "LAUNCHING"
82
-
83
- for line in log_content:
84
- if "error" in line.lower():
85
- status = ("FAILED", line.strip("\n"))
86
- if MODEL_READY_SIGNATURE in line:
87
- status = "RUNNING"
88
-
89
- return status
90
-
91
-
92
- def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
93
- """Get the base URL of a model."""
94
- log_content = read_slurm_log(slurm_job_name, slurm_job_id, "json", log_dir)
95
- if isinstance(log_content, str):
96
- return log_content
97
-
98
- server_addr = cast(dict[str, str], log_content).get("server_address")
99
- return server_addr if server_addr else "URL NOT FOUND"
100
-
101
-
102
- def model_health_check(
103
- slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
104
- ) -> tuple[str, Union[str, int]]:
105
- """Check the health of a running model on the cluster."""
106
- base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
107
- if not base_url.startswith("http"):
108
- return ("FAILED", base_url)
109
- health_check_url = base_url.replace("v1", "health")
110
-
111
- try:
112
- response = requests.get(health_check_url)
113
- # Check if the request was successful
114
- if response.status_code == 200:
115
- return ("READY", response.status_code)
116
- return ("FAILED", response.status_code)
117
- except requests.exceptions.RequestException as e:
118
- return ("FAILED", str(e))
119
-
120
9
 
121
10
  def create_table(
122
11
  key_title: str = "", value_title: str = "", show_header: bool = True
123
12
  ) -> Table:
124
- """Create a table for displaying model status."""
13
+ """Create a table for displaying model status.
14
+
15
+ Creates a two-column Rich table with consistent styling for displaying
16
+ key-value pairs in the CLI.
17
+
18
+ Parameters
19
+ ----------
20
+ key_title : str, default=""
21
+ Title for the key column
22
+ value_title : str, default=""
23
+ Title for the value column
24
+ show_header : bool, default=True
25
+ Whether to display column headers
26
+
27
+ Returns
28
+ -------
29
+ Table
30
+ Rich Table instance with configured styling:
31
+ - Headers in bold magenta
32
+ - Key column in dim style
33
+ - Value column in default style
34
+ """
125
35
  table = Table(show_header=show_header, header_style="bold magenta")
126
36
  table.add_column(key_title, style="dim")
127
37
  table.add_column(value_title)
128
38
  return table
129
-
130
-
131
- def load_config() -> list[ModelConfig]:
132
- """Load the model configuration."""
133
- default_path = (
134
- CACHED_CONFIG
135
- if CACHED_CONFIG.exists()
136
- else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
137
- )
138
-
139
- config: dict[str, Any] = {}
140
- with open(default_path) as f:
141
- config = yaml.safe_load(f) or {}
142
-
143
- user_path = os.getenv("VEC_INF_CONFIG")
144
- if user_path:
145
- user_path_obj = Path(user_path)
146
- if user_path_obj.exists():
147
- with open(user_path_obj) as f:
148
- user_config = yaml.safe_load(f) or {}
149
- for name, data in user_config.get("models", {}).items():
150
- if name in config.get("models", {}):
151
- config["models"][name].update(data)
152
- else:
153
- config.setdefault("models", {})[name] = data
154
- else:
155
- print(
156
- f"WARNING: Could not find user config: {user_path}, revert to default config located at {default_path}"
157
- )
158
-
159
- return [
160
- ModelConfig(model_name=name, **model_data)
161
- for name, model_data in config.get("models", {}).items()
162
- ]
vec_inf/cli/_vars.py ADDED
@@ -0,0 +1,32 @@
1
+ """Constants for CLI rendering.
2
+
3
+ This module defines constant mappings for model type priorities and colors
4
+ used in the CLI display formatting.
5
+
6
+ Constants
7
+ ---------
8
+ MODEL_TYPE_PRIORITY : dict
9
+ Mapping of model types to their display priority (lower numbers shown first)
10
+
11
+ MODEL_TYPE_COLORS : dict
12
+ Mapping of model types to their display colors in Rich
13
+
14
+ Notes
15
+ -----
16
+ These constants are used primarily by the ListCmdDisplay class to ensure
17
+ consistent sorting and color coding of different model types in the CLI output.
18
+ """
19
+
20
+ MODEL_TYPE_PRIORITY = {
21
+ "LLM": 0,
22
+ "VLM": 1,
23
+ "Text_Embedding": 2,
24
+ "Reward_Modeling": 3,
25
+ }
26
+
27
+ MODEL_TYPE_COLORS = {
28
+ "LLM": "cyan",
29
+ "VLM": "bright_blue",
30
+ "Text_Embedding": "purple",
31
+ "Reward_Modeling": "bright_magenta",
32
+ }
@@ -0,0 +1,31 @@
1
+ """Programmatic API for Vector Inference.
2
+
3
+ This module provides a Python API for launching and managing inference servers
4
+ using `vec_inf`. It is an alternative to the command-line interface, and allows
5
+ users direct control over the lifecycle of inference servers via python scripts.
6
+ """
7
+
8
+ from vec_inf.client.api import VecInfClient
9
+ from vec_inf.client.config import ModelConfig
10
+ from vec_inf.client.models import (
11
+ LaunchOptions,
12
+ LaunchResponse,
13
+ MetricsResponse,
14
+ ModelInfo,
15
+ ModelStatus,
16
+ ModelType,
17
+ StatusResponse,
18
+ )
19
+
20
+
21
+ __all__ = [
22
+ "VecInfClient",
23
+ "LaunchResponse",
24
+ "StatusResponse",
25
+ "ModelInfo",
26
+ "MetricsResponse",
27
+ "ModelStatus",
28
+ "ModelType",
29
+ "LaunchOptions",
30
+ "ModelConfig",
31
+ ]
@@ -0,0 +1,231 @@
1
+ """Global variables for Vector Inference.
2
+
3
+ This module contains configuration constants and templates used throughout the
4
+ Vector Inference package, including SLURM script templates, model configurations,
5
+ and metric definitions.
6
+
7
+ Constants
8
+ ---------
9
+ MODEL_READY_SIGNATURE : str
10
+ Signature string indicating successful model server startup
11
+ SRC_DIR : str
12
+ Absolute path to the package source directory
13
+ REQUIRED_FIELDS : set
14
+ Set of required fields for model configuration
15
+ KEY_METRICS : dict
16
+ Mapping of vLLM metrics to their human-readable names
17
+ SLURM_JOB_CONFIG_ARGS : dict
18
+ Mapping of SLURM configuration arguments to their parameter names
19
+ """
20
+
21
+ from pathlib import Path
22
+ from typing import TypedDict
23
+
24
+ from vec_inf.client.slurm_vars import (
25
+ LD_LIBRARY_PATH,
26
+ SINGULARITY_IMAGE,
27
+ SINGULARITY_LOAD_CMD,
28
+ VLLM_NCCL_SO_PATH,
29
+ )
30
+
31
+
32
+ MODEL_READY_SIGNATURE = "INFO: Application startup complete."
33
+ SRC_DIR = str(Path(__file__).parent.parent)
34
+
35
+
36
+ # Required fields for model configuration
37
+ REQUIRED_FIELDS = {
38
+ "model_family",
39
+ "model_type",
40
+ "gpus_per_node",
41
+ "num_nodes",
42
+ "vocab_size",
43
+ }
44
+
45
+ # Key production metrics for inference servers
46
+ KEY_METRICS = {
47
+ "vllm:prompt_tokens_total": "total_prompt_tokens",
48
+ "vllm:generation_tokens_total": "total_generation_tokens",
49
+ "vllm:e2e_request_latency_seconds_sum": "request_latency_sum",
50
+ "vllm:e2e_request_latency_seconds_count": "request_latency_count",
51
+ "vllm:request_queue_time_seconds_sum": "queue_time_sum",
52
+ "vllm:request_success_total": "successful_requests_total",
53
+ "vllm:num_requests_running": "requests_running",
54
+ "vllm:num_requests_waiting": "requests_waiting",
55
+ "vllm:num_requests_swapped": "requests_swapped",
56
+ "vllm:gpu_cache_usage_perc": "gpu_cache_usage",
57
+ "vllm:cpu_cache_usage_perc": "cpu_cache_usage",
58
+ }
59
+
60
+ # Slurm job configuration arguments
61
+ SLURM_JOB_CONFIG_ARGS = {
62
+ "job-name": "model_name",
63
+ "partition": "partition",
64
+ "account": "account",
65
+ "qos": "qos",
66
+ "time": "time",
67
+ "nodes": "num_nodes",
68
+ "exclude": "exclude",
69
+ "nodelist": "node_list",
70
+ "gpus-per-node": "gpus_per_node",
71
+ "cpus-per-task": "cpus_per_task",
72
+ "mem": "mem_per_node",
73
+ "output": "out_file",
74
+ "error": "err_file",
75
+ }
76
+
77
+ # vLLM engine args mapping between short and long names
78
+ VLLM_SHORT_TO_LONG_MAP = {
79
+ "-tp": "--tensor-parallel-size",
80
+ "-pp": "--pipeline-parallel-size",
81
+ "-dp": "--data-parallel-size",
82
+ "-dpl": "--data-parallel-size-local",
83
+ "-dpa": "--data-parallel-address",
84
+ "-dpp": "--data-parallel-rpc-port",
85
+ "-O": "--compilation-config",
86
+ "-q": "--quantization",
87
+ }
88
+
89
+
90
+ # Slurm script templates
91
+ class ShebangConfig(TypedDict):
92
+ """TypedDict for SLURM script shebang configuration.
93
+
94
+ Parameters
95
+ ----------
96
+ base : str
97
+ Base shebang line for all SLURM scripts
98
+ multinode : list[str]
99
+ Additional SLURM directives for multi-node configurations
100
+ """
101
+
102
+ base: str
103
+ multinode: list[str]
104
+
105
+
106
+ class ServerSetupConfig(TypedDict):
107
+ """TypedDict for server setup configuration.
108
+
109
+ Parameters
110
+ ----------
111
+ single_node : list[str]
112
+ Setup commands for single-node deployments
113
+ multinode : list[str]
114
+ Setup commands for multi-node deployments, including Ray initialization
115
+ """
116
+
117
+ single_node: list[str]
118
+ multinode: list[str]
119
+
120
+
121
+ class SlurmScriptTemplate(TypedDict):
122
+ """TypedDict for complete SLURM script template configuration.
123
+
124
+ Parameters
125
+ ----------
126
+ shebang : ShebangConfig
127
+ Shebang and SLURM directive configuration
128
+ singularity_setup : list[str]
129
+ Commands for Singularity container setup
130
+ imports : str
131
+ Import statements and source commands
132
+ env_vars : list[str]
133
+ Environment variables to set
134
+ singularity_command : str
135
+ Template for Singularity execution command
136
+ activate_venv : str
137
+ Template for virtual environment activation
138
+ server_setup : ServerSetupConfig
139
+ Server initialization commands for different deployment modes
140
+ find_vllm_port : list[str]
141
+ Commands to find available ports for vLLM server
142
+ write_to_json : list[str]
143
+ Commands to write server configuration to JSON
144
+ launch_cmd : list[str]
145
+ vLLM server launch commands
146
+ """
147
+
148
+ shebang: ShebangConfig
149
+ singularity_setup: list[str]
150
+ imports: str
151
+ env_vars: list[str]
152
+ singularity_command: str
153
+ activate_venv: str
154
+ server_setup: ServerSetupConfig
155
+ find_vllm_port: list[str]
156
+ write_to_json: list[str]
157
+ launch_cmd: list[str]
158
+
159
+
160
+ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
161
+ "shebang": {
162
+ "base": "#!/bin/bash",
163
+ "multinode": [
164
+ "#SBATCH --exclusive",
165
+ "#SBATCH --tasks-per-node=1",
166
+ ],
167
+ },
168
+ "singularity_setup": [
169
+ SINGULARITY_LOAD_CMD,
170
+ f"singularity exec {SINGULARITY_IMAGE} ray stop",
171
+ ],
172
+ "imports": "source {src_dir}/find_port.sh",
173
+ "env_vars": [
174
+ f"export LD_LIBRARY_PATH={LD_LIBRARY_PATH}",
175
+ f"export VLLM_NCCL_SO_PATH={VLLM_NCCL_SO_PATH}",
176
+ ],
177
+ "singularity_command": f"singularity exec --nv --bind {{model_weights_path}}{{additional_binds}} --containall {SINGULARITY_IMAGE}",
178
+ "activate_venv": "source {venv}/bin/activate",
179
+ "server_setup": {
180
+ "single_node": [
181
+ "\n# Find available port",
182
+ "head_node_ip=${SLURMD_NODENAME}",
183
+ ],
184
+ "multinode": [
185
+ "\n# Get list of nodes",
186
+ 'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
187
+ "nodes_array=($nodes)",
188
+ "head_node=${nodes_array[0]}",
189
+ 'head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address)',
190
+ "\n# Start Ray head node",
191
+ "head_node_port=$(find_available_port $head_node_ip 8080 65535)",
192
+ "ray_head=$head_node_ip:$head_node_port",
193
+ 'echo "Ray Head IP: $ray_head"',
194
+ 'echo "Starting HEAD at $head_node"',
195
+ 'srun --nodes=1 --ntasks=1 -w "$head_node" \\',
196
+ " SINGULARITY_PLACEHOLDER \\",
197
+ ' ray start --head --node-ip-address="$head_node_ip" --port=$head_node_port \\',
198
+ ' --num-cpus "$SLURM_CPUS_PER_TASK" --num-gpus "$SLURM_GPUS_PER_NODE" --block &',
199
+ "sleep 10",
200
+ "\n# Start Ray worker nodes",
201
+ "worker_num=$((SLURM_JOB_NUM_NODES - 1))",
202
+ "for ((i = 1; i <= worker_num; i++)); do",
203
+ " node_i=${nodes_array[$i]}",
204
+ ' echo "Starting WORKER $i at $node_i"',
205
+ ' srun --nodes=1 --ntasks=1 -w "$node_i" \\',
206
+ " SINGULARITY_PLACEHOLDER \\",
207
+ ' ray start --address "$ray_head" \\',
208
+ ' --num-cpus "$SLURM_CPUS_PER_TASK" --num-gpus "$SLURM_GPUS_PER_NODE" --block &',
209
+ " sleep 5",
210
+ "done",
211
+ ],
212
+ },
213
+ "find_vllm_port": [
214
+ "\nvllm_port_number=$(find_available_port $head_node_ip 8080 65535)",
215
+ 'server_address="http://${head_node_ip}:${vllm_port_number}/v1"',
216
+ ],
217
+ "write_to_json": [
218
+ '\njson_path="{log_dir}/{model_name}.$SLURM_JOB_ID/{model_name}.$SLURM_JOB_ID.json"',
219
+ 'jq --arg server_addr "$server_address" \\',
220
+ " '. + {{\"server_address\": $server_addr}}' \\",
221
+ ' "$json_path" > temp.json \\',
222
+ ' && mv temp.json "$json_path"',
223
+ ],
224
+ "launch_cmd": [
225
+ "vllm serve {model_weights_path} \\",
226
+ " --served-model-name {model_name} \\",
227
+ ' --host "0.0.0.0" \\',
228
+ " --port $vllm_port_number \\",
229
+ " --trust-remote-code \\",
230
+ ],
231
+ }
@@ -0,0 +1,37 @@
1
+ """Exceptions for the vector inference package."""
2
+
3
+
4
+ class ModelConfigurationError(Exception):
5
+ """Raised when the model config or weights are missing or invalid."""
6
+
7
+ pass
8
+
9
+
10
+ class MissingRequiredFieldsError(ValueError):
11
+ """Raised when required fields are missing from the provided parameters."""
12
+
13
+ pass
14
+
15
+
16
+ class ModelNotFoundError(KeyError):
17
+ """Raised when the specified model name is not found in the configuration."""
18
+
19
+ pass
20
+
21
+
22
+ class SlurmJobError(RuntimeError):
23
+ """Raised when there's an error with a Slurm job."""
24
+
25
+ pass
26
+
27
+
28
+ class APIError(Exception):
29
+ """Base exception for API errors."""
30
+
31
+ pass
32
+
33
+
34
+ class ServerError(Exception):
35
+ """Exception raised when there's an error with the inference server."""
36
+
37
+ pass