vec-inf 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/README.md +3 -3
- vec_inf/cli/_cli.py +214 -104
- vec_inf/cli/_helper.py +289 -564
- vec_inf/cli/_utils.py +26 -150
- vec_inf/cli/_vars.py +32 -0
- vec_inf/client/__init__.py +31 -0
- vec_inf/client/_client_vars.py +213 -0
- vec_inf/client/_exceptions.py +37 -0
- vec_inf/client/_helper.py +674 -0
- vec_inf/client/_slurm_script_generator.py +179 -0
- vec_inf/client/_utils.py +287 -0
- vec_inf/client/api.py +302 -0
- vec_inf/client/config.py +128 -0
- vec_inf/client/models.py +225 -0
- vec_inf/client/slurm_vars.py +49 -0
- vec_inf/config/README.md +0 -12
- vec_inf/config/models.yaml +417 -391
- {vec_inf-0.5.0.dist-info → vec_inf-0.6.0.dist-info}/METADATA +44 -61
- vec_inf-0.6.0.dist-info/RECORD +25 -0
- vec_inf/cli/_config.py +0 -87
- vec_inf/multinode_vllm.slurm +0 -154
- vec_inf/vllm.slurm +0 -90
- vec_inf-0.5.0.dist-info/RECORD +0 -17
- {vec_inf-0.5.0.dist-info → vec_inf-0.6.0.dist-info}/WHEEL +0 -0
- {vec_inf-0.5.0.dist-info → vec_inf-0.6.0.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.5.0.dist-info → vec_inf-0.6.0.dist-info}/licenses/LICENSE +0 -0
vec_inf/cli/_utils.py
CHANGED
|
@@ -1,162 +1,38 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Helper functions for the CLI.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Any, Optional, Union, cast
|
|
3
|
+
This module provides utility functions for creating consistent table displays
|
|
4
|
+
in the command-line interface.
|
|
5
|
+
"""
|
|
8
6
|
|
|
9
|
-
import requests
|
|
10
|
-
import yaml
|
|
11
7
|
from rich.table import Table
|
|
12
8
|
|
|
13
|
-
from vec_inf.cli._config import ModelConfig
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
MODEL_READY_SIGNATURE = "INFO: Application startup complete."
|
|
17
|
-
CACHED_CONFIG = Path("/", "model-weights", "vec-inf-shared", "models.yaml")
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def run_bash_command(command: str) -> tuple[str, str]:
|
|
21
|
-
"""Run a bash command and return the output."""
|
|
22
|
-
process = subprocess.Popen(
|
|
23
|
-
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
|
24
|
-
)
|
|
25
|
-
return process.communicate()
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def read_slurm_log(
|
|
29
|
-
slurm_job_name: str,
|
|
30
|
-
slurm_job_id: int,
|
|
31
|
-
slurm_log_type: str,
|
|
32
|
-
log_dir: Optional[Union[str, Path]],
|
|
33
|
-
) -> Union[list[str], str, dict[str, str]]:
|
|
34
|
-
"""Read the slurm log file."""
|
|
35
|
-
if not log_dir:
|
|
36
|
-
# Default log directory
|
|
37
|
-
models_dir = Path.home() / ".vec-inf-logs"
|
|
38
|
-
if not models_dir.exists():
|
|
39
|
-
return "LOG DIR NOT FOUND"
|
|
40
|
-
# Iterate over all dirs in models_dir, sorted by dir name length in desc order
|
|
41
|
-
for directory in sorted(
|
|
42
|
-
[d for d in models_dir.iterdir() if d.is_dir()],
|
|
43
|
-
key=lambda d: len(d.name),
|
|
44
|
-
reverse=True,
|
|
45
|
-
):
|
|
46
|
-
if directory.name in slurm_job_name:
|
|
47
|
-
log_dir = directory
|
|
48
|
-
break
|
|
49
|
-
else:
|
|
50
|
-
log_dir = Path(log_dir)
|
|
51
|
-
|
|
52
|
-
# If log_dir is still not set, then didn't find the log dir at default location
|
|
53
|
-
if not log_dir:
|
|
54
|
-
return "LOG DIR NOT FOUND"
|
|
55
|
-
|
|
56
|
-
try:
|
|
57
|
-
file_path = (
|
|
58
|
-
log_dir
|
|
59
|
-
/ Path(f"{slurm_job_name}.{slurm_job_id}")
|
|
60
|
-
/ f"{slurm_job_name}.{slurm_job_id}.{slurm_log_type}"
|
|
61
|
-
)
|
|
62
|
-
if slurm_log_type == "json":
|
|
63
|
-
with file_path.open("r") as file:
|
|
64
|
-
json_content: dict[str, str] = json.load(file)
|
|
65
|
-
return json_content
|
|
66
|
-
else:
|
|
67
|
-
with file_path.open("r") as file:
|
|
68
|
-
return file.readlines()
|
|
69
|
-
except FileNotFoundError:
|
|
70
|
-
return f"LOG FILE NOT FOUND: {file_path}"
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def is_server_running(
|
|
74
|
-
slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
|
|
75
|
-
) -> Union[str, tuple[str, str]]:
|
|
76
|
-
"""Check if a model is ready to serve requests."""
|
|
77
|
-
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "err", log_dir)
|
|
78
|
-
if isinstance(log_content, str):
|
|
79
|
-
return log_content
|
|
80
|
-
|
|
81
|
-
status: Union[str, tuple[str, str]] = "LAUNCHING"
|
|
82
|
-
|
|
83
|
-
for line in log_content:
|
|
84
|
-
if "error" in line.lower():
|
|
85
|
-
status = ("FAILED", line.strip("\n"))
|
|
86
|
-
if MODEL_READY_SIGNATURE in line:
|
|
87
|
-
status = "RUNNING"
|
|
88
|
-
|
|
89
|
-
return status
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def get_base_url(slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]) -> str:
|
|
93
|
-
"""Get the base URL of a model."""
|
|
94
|
-
log_content = read_slurm_log(slurm_job_name, slurm_job_id, "json", log_dir)
|
|
95
|
-
if isinstance(log_content, str):
|
|
96
|
-
return log_content
|
|
97
|
-
|
|
98
|
-
server_addr = cast(dict[str, str], log_content).get("server_address")
|
|
99
|
-
return server_addr if server_addr else "URL NOT FOUND"
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def model_health_check(
|
|
103
|
-
slurm_job_name: str, slurm_job_id: int, log_dir: Optional[str]
|
|
104
|
-
) -> tuple[str, Union[str, int]]:
|
|
105
|
-
"""Check the health of a running model on the cluster."""
|
|
106
|
-
base_url = get_base_url(slurm_job_name, slurm_job_id, log_dir)
|
|
107
|
-
if not base_url.startswith("http"):
|
|
108
|
-
return ("FAILED", base_url)
|
|
109
|
-
health_check_url = base_url.replace("v1", "health")
|
|
110
|
-
|
|
111
|
-
try:
|
|
112
|
-
response = requests.get(health_check_url)
|
|
113
|
-
# Check if the request was successful
|
|
114
|
-
if response.status_code == 200:
|
|
115
|
-
return ("READY", response.status_code)
|
|
116
|
-
return ("FAILED", response.status_code)
|
|
117
|
-
except requests.exceptions.RequestException as e:
|
|
118
|
-
return ("FAILED", str(e))
|
|
119
|
-
|
|
120
9
|
|
|
121
10
|
def create_table(
|
|
122
11
|
key_title: str = "", value_title: str = "", show_header: bool = True
|
|
123
12
|
) -> Table:
|
|
124
|
-
"""Create a table for displaying model status.
|
|
13
|
+
"""Create a table for displaying model status.
|
|
14
|
+
|
|
15
|
+
Creates a two-column Rich table with consistent styling for displaying
|
|
16
|
+
key-value pairs in the CLI.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
key_title : str, default=""
|
|
21
|
+
Title for the key column
|
|
22
|
+
value_title : str, default=""
|
|
23
|
+
Title for the value column
|
|
24
|
+
show_header : bool, default=True
|
|
25
|
+
Whether to display column headers
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
Table
|
|
30
|
+
Rich Table instance with configured styling:
|
|
31
|
+
- Headers in bold magenta
|
|
32
|
+
- Key column in dim style
|
|
33
|
+
- Value column in default style
|
|
34
|
+
"""
|
|
125
35
|
table = Table(show_header=show_header, header_style="bold magenta")
|
|
126
36
|
table.add_column(key_title, style="dim")
|
|
127
37
|
table.add_column(value_title)
|
|
128
38
|
return table
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def load_config() -> list[ModelConfig]:
|
|
132
|
-
"""Load the model configuration."""
|
|
133
|
-
default_path = (
|
|
134
|
-
CACHED_CONFIG
|
|
135
|
-
if CACHED_CONFIG.exists()
|
|
136
|
-
else Path(__file__).resolve().parent.parent / "config" / "models.yaml"
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
config: dict[str, Any] = {}
|
|
140
|
-
with open(default_path) as f:
|
|
141
|
-
config = yaml.safe_load(f) or {}
|
|
142
|
-
|
|
143
|
-
user_path = os.getenv("VEC_INF_CONFIG")
|
|
144
|
-
if user_path:
|
|
145
|
-
user_path_obj = Path(user_path)
|
|
146
|
-
if user_path_obj.exists():
|
|
147
|
-
with open(user_path_obj) as f:
|
|
148
|
-
user_config = yaml.safe_load(f) or {}
|
|
149
|
-
for name, data in user_config.get("models", {}).items():
|
|
150
|
-
if name in config.get("models", {}):
|
|
151
|
-
config["models"][name].update(data)
|
|
152
|
-
else:
|
|
153
|
-
config.setdefault("models", {})[name] = data
|
|
154
|
-
else:
|
|
155
|
-
print(
|
|
156
|
-
f"WARNING: Could not find user config: {user_path}, revert to default config located at {default_path}"
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
return [
|
|
160
|
-
ModelConfig(model_name=name, **model_data)
|
|
161
|
-
for name, model_data in config.get("models", {}).items()
|
|
162
|
-
]
|
vec_inf/cli/_vars.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Constants for CLI rendering.
|
|
2
|
+
|
|
3
|
+
This module defines constant mappings for model type priorities and colors
|
|
4
|
+
used in the CLI display formatting.
|
|
5
|
+
|
|
6
|
+
Constants
|
|
7
|
+
---------
|
|
8
|
+
MODEL_TYPE_PRIORITY : dict
|
|
9
|
+
Mapping of model types to their display priority (lower numbers shown first)
|
|
10
|
+
|
|
11
|
+
MODEL_TYPE_COLORS : dict
|
|
12
|
+
Mapping of model types to their display colors in Rich
|
|
13
|
+
|
|
14
|
+
Notes
|
|
15
|
+
-----
|
|
16
|
+
These constants are used primarily by the ListCmdDisplay class to ensure
|
|
17
|
+
consistent sorting and color coding of different model types in the CLI output.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
MODEL_TYPE_PRIORITY = {
|
|
21
|
+
"LLM": 0,
|
|
22
|
+
"VLM": 1,
|
|
23
|
+
"Text_Embedding": 2,
|
|
24
|
+
"Reward_Modeling": 3,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
MODEL_TYPE_COLORS = {
|
|
28
|
+
"LLM": "cyan",
|
|
29
|
+
"VLM": "bright_blue",
|
|
30
|
+
"Text_Embedding": "purple",
|
|
31
|
+
"Reward_Modeling": "bright_magenta",
|
|
32
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Programmatic API for Vector Inference.
|
|
2
|
+
|
|
3
|
+
This module provides a Python API for launching and managing inference servers
|
|
4
|
+
using `vec_inf`. It is an alternative to the command-line interface, and allows
|
|
5
|
+
users direct control over the lifecycle of inference servers via python scripts.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from vec_inf.client.api import VecInfClient
|
|
9
|
+
from vec_inf.client.config import ModelConfig
|
|
10
|
+
from vec_inf.client.models import (
|
|
11
|
+
LaunchOptions,
|
|
12
|
+
LaunchResponse,
|
|
13
|
+
MetricsResponse,
|
|
14
|
+
ModelInfo,
|
|
15
|
+
ModelStatus,
|
|
16
|
+
ModelType,
|
|
17
|
+
StatusResponse,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"VecInfClient",
|
|
23
|
+
"LaunchResponse",
|
|
24
|
+
"StatusResponse",
|
|
25
|
+
"ModelInfo",
|
|
26
|
+
"MetricsResponse",
|
|
27
|
+
"ModelStatus",
|
|
28
|
+
"ModelType",
|
|
29
|
+
"LaunchOptions",
|
|
30
|
+
"ModelConfig",
|
|
31
|
+
]
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""Global variables for Vector Inference.
|
|
2
|
+
|
|
3
|
+
This module contains configuration constants and templates used throughout the
|
|
4
|
+
Vector Inference package, including SLURM script templates, model configurations,
|
|
5
|
+
and metric definitions.
|
|
6
|
+
|
|
7
|
+
Constants
|
|
8
|
+
---------
|
|
9
|
+
MODEL_READY_SIGNATURE : str
|
|
10
|
+
Signature string indicating successful model server startup
|
|
11
|
+
SRC_DIR : str
|
|
12
|
+
Absolute path to the package source directory
|
|
13
|
+
REQUIRED_FIELDS : set
|
|
14
|
+
Set of required fields for model configuration
|
|
15
|
+
KEY_METRICS : dict
|
|
16
|
+
Mapping of vLLM metrics to their human-readable names
|
|
17
|
+
SLURM_JOB_CONFIG_ARGS : dict
|
|
18
|
+
Mapping of SLURM configuration arguments to their parameter names
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import TypedDict
|
|
23
|
+
|
|
24
|
+
from vec_inf.client.slurm_vars import SINGULARITY_LOAD_CMD
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
MODEL_READY_SIGNATURE = "INFO: Application startup complete."
|
|
28
|
+
SRC_DIR = str(Path(__file__).parent.parent)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Required fields for model configuration
|
|
32
|
+
REQUIRED_FIELDS = {
|
|
33
|
+
"model_family",
|
|
34
|
+
"model_type",
|
|
35
|
+
"gpus_per_node",
|
|
36
|
+
"num_nodes",
|
|
37
|
+
"vocab_size",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
# Key production metrics for inference servers
|
|
41
|
+
KEY_METRICS = {
|
|
42
|
+
"vllm:prompt_tokens_total": "total_prompt_tokens",
|
|
43
|
+
"vllm:generation_tokens_total": "total_generation_tokens",
|
|
44
|
+
"vllm:e2e_request_latency_seconds_sum": "request_latency_sum",
|
|
45
|
+
"vllm:e2e_request_latency_seconds_count": "request_latency_count",
|
|
46
|
+
"vllm:request_queue_time_seconds_sum": "queue_time_sum",
|
|
47
|
+
"vllm:request_success_total": "successful_requests_total",
|
|
48
|
+
"vllm:num_requests_running": "requests_running",
|
|
49
|
+
"vllm:num_requests_waiting": "requests_waiting",
|
|
50
|
+
"vllm:num_requests_swapped": "requests_swapped",
|
|
51
|
+
"vllm:gpu_cache_usage_perc": "gpu_cache_usage",
|
|
52
|
+
"vllm:cpu_cache_usage_perc": "cpu_cache_usage",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Slurm job configuration arguments
|
|
56
|
+
SLURM_JOB_CONFIG_ARGS = {
|
|
57
|
+
"job-name": "model_name",
|
|
58
|
+
"partition": "partition",
|
|
59
|
+
"account": "account",
|
|
60
|
+
"qos": "qos",
|
|
61
|
+
"time": "time",
|
|
62
|
+
"nodes": "num_nodes",
|
|
63
|
+
"gpus-per-node": "gpus_per_node",
|
|
64
|
+
"cpus-per-task": "cpus_per_task",
|
|
65
|
+
"mem": "mem_per_node",
|
|
66
|
+
"output": "out_file",
|
|
67
|
+
"error": "err_file",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# vLLM engine args mapping between short and long names
|
|
71
|
+
VLLM_SHORT_TO_LONG_MAP = {
|
|
72
|
+
"-tp": "--tensor-parallel-size",
|
|
73
|
+
"-pp": "--pipeline-parallel-size",
|
|
74
|
+
"-O": "--compilation-config",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# Slurm script templates
|
|
79
|
+
class ShebangConfig(TypedDict):
|
|
80
|
+
"""TypedDict for SLURM script shebang configuration.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
base : str
|
|
85
|
+
Base shebang line for all SLURM scripts
|
|
86
|
+
multinode : list[str]
|
|
87
|
+
Additional SLURM directives for multi-node configurations
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
base: str
|
|
91
|
+
multinode: list[str]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ServerSetupConfig(TypedDict):
|
|
95
|
+
"""TypedDict for server setup configuration.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
single_node : list[str]
|
|
100
|
+
Setup commands for single-node deployments
|
|
101
|
+
multinode : list[str]
|
|
102
|
+
Setup commands for multi-node deployments, including Ray initialization
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
single_node: list[str]
|
|
106
|
+
multinode: list[str]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class SlurmScriptTemplate(TypedDict):
|
|
110
|
+
"""TypedDict for complete SLURM script template configuration.
|
|
111
|
+
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
shebang : ShebangConfig
|
|
115
|
+
Shebang and SLURM directive configuration
|
|
116
|
+
singularity_setup : list[str]
|
|
117
|
+
Commands for Singularity container setup
|
|
118
|
+
imports : str
|
|
119
|
+
Import statements and source commands
|
|
120
|
+
singularity_command : str
|
|
121
|
+
Template for Singularity execution command
|
|
122
|
+
activate_venv : str
|
|
123
|
+
Template for virtual environment activation
|
|
124
|
+
server_setup : ServerSetupConfig
|
|
125
|
+
Server initialization commands for different deployment modes
|
|
126
|
+
find_vllm_port : list[str]
|
|
127
|
+
Commands to find available ports for vLLM server
|
|
128
|
+
write_to_json : list[str]
|
|
129
|
+
Commands to write server configuration to JSON
|
|
130
|
+
launch_cmd : list[str]
|
|
131
|
+
vLLM server launch commands
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
shebang: ShebangConfig
|
|
135
|
+
singularity_setup: list[str]
|
|
136
|
+
imports: str
|
|
137
|
+
singularity_command: str
|
|
138
|
+
activate_venv: str
|
|
139
|
+
server_setup: ServerSetupConfig
|
|
140
|
+
find_vllm_port: list[str]
|
|
141
|
+
write_to_json: list[str]
|
|
142
|
+
launch_cmd: list[str]
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
|
|
146
|
+
"shebang": {
|
|
147
|
+
"base": "#!/bin/bash",
|
|
148
|
+
"multinode": [
|
|
149
|
+
"#SBATCH --exclusive",
|
|
150
|
+
"#SBATCH --tasks-per-node=1",
|
|
151
|
+
],
|
|
152
|
+
},
|
|
153
|
+
"singularity_setup": [
|
|
154
|
+
SINGULARITY_LOAD_CMD,
|
|
155
|
+
"singularity exec {singularity_image} ray stop",
|
|
156
|
+
],
|
|
157
|
+
"imports": "source {src_dir}/find_port.sh",
|
|
158
|
+
"singularity_command": "singularity exec --nv --bind {model_weights_path}:{model_weights_path} --containall {singularity_image}",
|
|
159
|
+
"activate_venv": "source {venv}/bin/activate",
|
|
160
|
+
"server_setup": {
|
|
161
|
+
"single_node": [
|
|
162
|
+
"\n# Find available port",
|
|
163
|
+
"head_node_ip=${SLURMD_NODENAME}",
|
|
164
|
+
],
|
|
165
|
+
"multinode": [
|
|
166
|
+
"\n# Get list of nodes",
|
|
167
|
+
'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
|
|
168
|
+
"nodes_array=($nodes)",
|
|
169
|
+
"head_node=${nodes_array[0]}",
|
|
170
|
+
'head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address)',
|
|
171
|
+
"\n# Start Ray head node",
|
|
172
|
+
"head_node_port=$(find_available_port $head_node_ip 8080 65535)",
|
|
173
|
+
"ray_head=$head_node_ip:$head_node_port",
|
|
174
|
+
'echo "Ray Head IP: $ray_head"',
|
|
175
|
+
'echo "Starting HEAD at $head_node"',
|
|
176
|
+
'srun --nodes=1 --ntasks=1 -w "$head_node" \\',
|
|
177
|
+
" SINGULARITY_PLACEHOLDER \\",
|
|
178
|
+
' ray start --head --node-ip-address="$head_node_ip" --port=$head_node_port \\',
|
|
179
|
+
' --num-cpus "$SLURM_CPUS_PER_TASK" --num-gpus "$SLURM_GPUS_PER_NODE" --block &',
|
|
180
|
+
"sleep 10",
|
|
181
|
+
"\n# Start Ray worker nodes",
|
|
182
|
+
"worker_num=$((SLURM_JOB_NUM_NODES - 1))",
|
|
183
|
+
"for ((i = 1; i <= worker_num; i++)); do",
|
|
184
|
+
" node_i=${nodes_array[$i]}",
|
|
185
|
+
' echo "Starting WORKER $i at $node_i"',
|
|
186
|
+
' srun --nodes=1 --ntasks=1 -w "$node_i" \\',
|
|
187
|
+
" SINGULARITY_PLACEHOLDER \\",
|
|
188
|
+
' ray start --address "$ray_head" \\',
|
|
189
|
+
' --num-cpus "$SLURM_CPUS_PER_TASK" --num-gpus "$SLURM_GPUS_PER_NODE" --block &',
|
|
190
|
+
" sleep 5",
|
|
191
|
+
"done",
|
|
192
|
+
],
|
|
193
|
+
},
|
|
194
|
+
"find_vllm_port": [
|
|
195
|
+
"\nvllm_port_number=$(find_available_port $head_node_ip 8080 65535)",
|
|
196
|
+
'server_address="http://${head_node_ip}:${vllm_port_number}/v1"',
|
|
197
|
+
],
|
|
198
|
+
"write_to_json": [
|
|
199
|
+
'\njson_path="{log_dir}/{model_name}.$SLURM_JOB_ID/{model_name}.$SLURM_JOB_ID.json"',
|
|
200
|
+
'jq --arg server_addr "$server_address" \\',
|
|
201
|
+
" '. + {{\"server_address\": $server_addr}}' \\",
|
|
202
|
+
' "$json_path" > temp.json \\',
|
|
203
|
+
' && mv temp.json "$json_path"',
|
|
204
|
+
],
|
|
205
|
+
"launch_cmd": [
|
|
206
|
+
"python3.10 -m vllm.entrypoints.openai.api_server \\",
|
|
207
|
+
" --model {model_weights_path} \\",
|
|
208
|
+
" --served-model-name {model_name} \\",
|
|
209
|
+
' --host "0.0.0.0" \\',
|
|
210
|
+
" --port $vllm_port_number \\",
|
|
211
|
+
" --trust-remote-code \\",
|
|
212
|
+
],
|
|
213
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Exceptions for the vector inference package."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ModelConfigurationError(Exception):
|
|
5
|
+
"""Raised when the model config or weights are missing or invalid."""
|
|
6
|
+
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MissingRequiredFieldsError(ValueError):
|
|
11
|
+
"""Raised when required fields are missing from the provided parameters."""
|
|
12
|
+
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ModelNotFoundError(KeyError):
|
|
17
|
+
"""Raised when the specified model name is not found in the configuration."""
|
|
18
|
+
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SlurmJobError(RuntimeError):
|
|
23
|
+
"""Raised when there's an error with a Slurm job."""
|
|
24
|
+
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class APIError(Exception):
|
|
29
|
+
"""Base exception for API errors."""
|
|
30
|
+
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ServerError(Exception):
|
|
35
|
+
"""Exception raised when there's an error with the inference server."""
|
|
36
|
+
|
|
37
|
+
pass
|