vec-inf 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/README.md +18 -4
- vec_inf/cli/_cli.py +212 -30
- vec_inf/cli/_helper.py +95 -14
- vec_inf/client/_client_vars.py +19 -152
- vec_inf/client/_helper.py +386 -53
- vec_inf/client/_slurm_script_generator.py +210 -43
- vec_inf/client/_slurm_templates.py +248 -0
- vec_inf/client/_slurm_vars.py +82 -0
- vec_inf/client/_utils.py +190 -71
- vec_inf/client/api.py +96 -25
- vec_inf/client/config.py +46 -15
- vec_inf/client/models.py +51 -2
- vec_inf/config/README.md +4 -243
- vec_inf/config/environment.yaml +31 -0
- vec_inf/config/models.yaml +102 -281
- {vec_inf-0.6.0.dist-info → vec_inf-0.7.0.dist-info}/METADATA +25 -67
- vec_inf-0.7.0.dist-info/RECORD +27 -0
- vec_inf/client/slurm_vars.py +0 -49
- vec_inf-0.6.0.dist-info/RECORD +0 -25
- {vec_inf-0.6.0.dist-info → vec_inf-0.7.0.dist-info}/WHEEL +0 -0
- {vec_inf-0.6.0.dist-info → vec_inf-0.7.0.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.6.0.dist-info → vec_inf-0.7.0.dist-info}/licenses/LICENSE +0 -0
vec_inf/client/_client_vars.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""Global variables for Vector Inference.
|
|
2
2
|
|
|
3
3
|
This module contains configuration constants and templates used throughout the
|
|
4
|
-
Vector Inference package, including
|
|
5
|
-
and metric definitions.
|
|
4
|
+
Vector Inference package, including model configurations, and metric definitions.
|
|
6
5
|
|
|
7
6
|
Constants
|
|
8
7
|
---------
|
|
@@ -10,33 +9,21 @@ MODEL_READY_SIGNATURE : str
|
|
|
10
9
|
Signature string indicating successful model server startup
|
|
11
10
|
SRC_DIR : str
|
|
12
11
|
Absolute path to the package source directory
|
|
13
|
-
REQUIRED_FIELDS : set
|
|
14
|
-
Set of required fields for model configuration
|
|
15
12
|
KEY_METRICS : dict
|
|
16
13
|
Mapping of vLLM metrics to their human-readable names
|
|
17
14
|
SLURM_JOB_CONFIG_ARGS : dict
|
|
18
15
|
Mapping of SLURM configuration arguments to their parameter names
|
|
16
|
+
VLLM_SHORT_TO_LONG_MAP : dict
|
|
17
|
+
Mapping of vLLM short arguments to their long names
|
|
19
18
|
"""
|
|
20
19
|
|
|
21
20
|
from pathlib import Path
|
|
22
|
-
from typing import TypedDict
|
|
23
|
-
|
|
24
|
-
from vec_inf.client.slurm_vars import SINGULARITY_LOAD_CMD
|
|
25
21
|
|
|
26
22
|
|
|
27
23
|
MODEL_READY_SIGNATURE = "INFO: Application startup complete."
|
|
28
24
|
SRC_DIR = str(Path(__file__).parent.parent)
|
|
29
25
|
|
|
30
26
|
|
|
31
|
-
# Required fields for model configuration
|
|
32
|
-
REQUIRED_FIELDS = {
|
|
33
|
-
"model_family",
|
|
34
|
-
"model_type",
|
|
35
|
-
"gpus_per_node",
|
|
36
|
-
"num_nodes",
|
|
37
|
-
"vocab_size",
|
|
38
|
-
}
|
|
39
|
-
|
|
40
27
|
# Key production metrics for inference servers
|
|
41
28
|
KEY_METRICS = {
|
|
42
29
|
"vllm:prompt_tokens_total": "total_prompt_tokens",
|
|
@@ -57,10 +44,13 @@ SLURM_JOB_CONFIG_ARGS = {
|
|
|
57
44
|
"job-name": "model_name",
|
|
58
45
|
"partition": "partition",
|
|
59
46
|
"account": "account",
|
|
47
|
+
"chdir": "work_dir",
|
|
60
48
|
"qos": "qos",
|
|
61
49
|
"time": "time",
|
|
62
50
|
"nodes": "num_nodes",
|
|
63
|
-
"
|
|
51
|
+
"exclude": "exclude",
|
|
52
|
+
"nodelist": "node_list",
|
|
53
|
+
"gres": "gres",
|
|
64
54
|
"cpus-per-task": "cpus_per_task",
|
|
65
55
|
"mem": "mem_per_node",
|
|
66
56
|
"output": "out_file",
|
|
@@ -71,143 +61,20 @@ SLURM_JOB_CONFIG_ARGS = {
|
|
|
71
61
|
VLLM_SHORT_TO_LONG_MAP = {
|
|
72
62
|
"-tp": "--tensor-parallel-size",
|
|
73
63
|
"-pp": "--pipeline-parallel-size",
|
|
64
|
+
"-dp": "--data-parallel-size",
|
|
65
|
+
"-dpl": "--data-parallel-size-local",
|
|
66
|
+
"-dpa": "--data-parallel-address",
|
|
67
|
+
"-dpp": "--data-parallel-rpc-port",
|
|
74
68
|
"-O": "--compilation-config",
|
|
69
|
+
"-q": "--quantization",
|
|
75
70
|
}
|
|
76
71
|
|
|
72
|
+
# Required matching arguments for batch mode
|
|
73
|
+
BATCH_MODE_REQUIRED_MATCHING_ARGS = ["venv", "log_dir"]
|
|
77
74
|
|
|
78
|
-
#
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
----------
|
|
84
|
-
base : str
|
|
85
|
-
Base shebang line for all SLURM scripts
|
|
86
|
-
multinode : list[str]
|
|
87
|
-
Additional SLURM directives for multi-node configurations
|
|
88
|
-
"""
|
|
89
|
-
|
|
90
|
-
base: str
|
|
91
|
-
multinode: list[str]
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
class ServerSetupConfig(TypedDict):
|
|
95
|
-
"""TypedDict for server setup configuration.
|
|
96
|
-
|
|
97
|
-
Parameters
|
|
98
|
-
----------
|
|
99
|
-
single_node : list[str]
|
|
100
|
-
Setup commands for single-node deployments
|
|
101
|
-
multinode : list[str]
|
|
102
|
-
Setup commands for multi-node deployments, including Ray initialization
|
|
103
|
-
"""
|
|
104
|
-
|
|
105
|
-
single_node: list[str]
|
|
106
|
-
multinode: list[str]
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
class SlurmScriptTemplate(TypedDict):
|
|
110
|
-
"""TypedDict for complete SLURM script template configuration.
|
|
111
|
-
|
|
112
|
-
Parameters
|
|
113
|
-
----------
|
|
114
|
-
shebang : ShebangConfig
|
|
115
|
-
Shebang and SLURM directive configuration
|
|
116
|
-
singularity_setup : list[str]
|
|
117
|
-
Commands for Singularity container setup
|
|
118
|
-
imports : str
|
|
119
|
-
Import statements and source commands
|
|
120
|
-
singularity_command : str
|
|
121
|
-
Template for Singularity execution command
|
|
122
|
-
activate_venv : str
|
|
123
|
-
Template for virtual environment activation
|
|
124
|
-
server_setup : ServerSetupConfig
|
|
125
|
-
Server initialization commands for different deployment modes
|
|
126
|
-
find_vllm_port : list[str]
|
|
127
|
-
Commands to find available ports for vLLM server
|
|
128
|
-
write_to_json : list[str]
|
|
129
|
-
Commands to write server configuration to JSON
|
|
130
|
-
launch_cmd : list[str]
|
|
131
|
-
vLLM server launch commands
|
|
132
|
-
"""
|
|
133
|
-
|
|
134
|
-
shebang: ShebangConfig
|
|
135
|
-
singularity_setup: list[str]
|
|
136
|
-
imports: str
|
|
137
|
-
singularity_command: str
|
|
138
|
-
activate_venv: str
|
|
139
|
-
server_setup: ServerSetupConfig
|
|
140
|
-
find_vllm_port: list[str]
|
|
141
|
-
write_to_json: list[str]
|
|
142
|
-
launch_cmd: list[str]
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
|
|
146
|
-
"shebang": {
|
|
147
|
-
"base": "#!/bin/bash",
|
|
148
|
-
"multinode": [
|
|
149
|
-
"#SBATCH --exclusive",
|
|
150
|
-
"#SBATCH --tasks-per-node=1",
|
|
151
|
-
],
|
|
152
|
-
},
|
|
153
|
-
"singularity_setup": [
|
|
154
|
-
SINGULARITY_LOAD_CMD,
|
|
155
|
-
"singularity exec {singularity_image} ray stop",
|
|
156
|
-
],
|
|
157
|
-
"imports": "source {src_dir}/find_port.sh",
|
|
158
|
-
"singularity_command": "singularity exec --nv --bind {model_weights_path}:{model_weights_path} --containall {singularity_image}",
|
|
159
|
-
"activate_venv": "source {venv}/bin/activate",
|
|
160
|
-
"server_setup": {
|
|
161
|
-
"single_node": [
|
|
162
|
-
"\n# Find available port",
|
|
163
|
-
"head_node_ip=${SLURMD_NODENAME}",
|
|
164
|
-
],
|
|
165
|
-
"multinode": [
|
|
166
|
-
"\n# Get list of nodes",
|
|
167
|
-
'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
|
|
168
|
-
"nodes_array=($nodes)",
|
|
169
|
-
"head_node=${nodes_array[0]}",
|
|
170
|
-
'head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address)',
|
|
171
|
-
"\n# Start Ray head node",
|
|
172
|
-
"head_node_port=$(find_available_port $head_node_ip 8080 65535)",
|
|
173
|
-
"ray_head=$head_node_ip:$head_node_port",
|
|
174
|
-
'echo "Ray Head IP: $ray_head"',
|
|
175
|
-
'echo "Starting HEAD at $head_node"',
|
|
176
|
-
'srun --nodes=1 --ntasks=1 -w "$head_node" \\',
|
|
177
|
-
" SINGULARITY_PLACEHOLDER \\",
|
|
178
|
-
' ray start --head --node-ip-address="$head_node_ip" --port=$head_node_port \\',
|
|
179
|
-
' --num-cpus "$SLURM_CPUS_PER_TASK" --num-gpus "$SLURM_GPUS_PER_NODE" --block &',
|
|
180
|
-
"sleep 10",
|
|
181
|
-
"\n# Start Ray worker nodes",
|
|
182
|
-
"worker_num=$((SLURM_JOB_NUM_NODES - 1))",
|
|
183
|
-
"for ((i = 1; i <= worker_num; i++)); do",
|
|
184
|
-
" node_i=${nodes_array[$i]}",
|
|
185
|
-
' echo "Starting WORKER $i at $node_i"',
|
|
186
|
-
' srun --nodes=1 --ntasks=1 -w "$node_i" \\',
|
|
187
|
-
" SINGULARITY_PLACEHOLDER \\",
|
|
188
|
-
' ray start --address "$ray_head" \\',
|
|
189
|
-
' --num-cpus "$SLURM_CPUS_PER_TASK" --num-gpus "$SLURM_GPUS_PER_NODE" --block &',
|
|
190
|
-
" sleep 5",
|
|
191
|
-
"done",
|
|
192
|
-
],
|
|
193
|
-
},
|
|
194
|
-
"find_vllm_port": [
|
|
195
|
-
"\nvllm_port_number=$(find_available_port $head_node_ip 8080 65535)",
|
|
196
|
-
'server_address="http://${head_node_ip}:${vllm_port_number}/v1"',
|
|
197
|
-
],
|
|
198
|
-
"write_to_json": [
|
|
199
|
-
'\njson_path="{log_dir}/{model_name}.$SLURM_JOB_ID/{model_name}.$SLURM_JOB_ID.json"',
|
|
200
|
-
'jq --arg server_addr "$server_address" \\',
|
|
201
|
-
" '. + {{\"server_address\": $server_addr}}' \\",
|
|
202
|
-
' "$json_path" > temp.json \\',
|
|
203
|
-
' && mv temp.json "$json_path"',
|
|
204
|
-
],
|
|
205
|
-
"launch_cmd": [
|
|
206
|
-
"python3.10 -m vllm.entrypoints.openai.api_server \\",
|
|
207
|
-
" --model {model_weights_path} \\",
|
|
208
|
-
" --served-model-name {model_name} \\",
|
|
209
|
-
' --host "0.0.0.0" \\',
|
|
210
|
-
" --port $vllm_port_number \\",
|
|
211
|
-
" --trust-remote-code \\",
|
|
212
|
-
],
|
|
75
|
+
# Required arguments for launching jobs that don't have a default value and their
|
|
76
|
+
# corresponding environment variables
|
|
77
|
+
REQUIRED_ARGS = {
|
|
78
|
+
"account": "VEC_INF_ACCOUNT",
|
|
79
|
+
"work_dir": "VEC_INF_WORK_DIR",
|
|
213
80
|
}
|