vec-inf 0.6.1__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,7 @@
1
1
  """Global variables for Vector Inference.
2
2
 
3
3
  This module contains configuration constants and templates used throughout the
4
- Vector Inference package, including SLURM script templates, model configurations,
5
- and metric definitions.
4
+ Vector Inference package, including model configurations, and metric definitions.
6
5
 
7
6
  Constants
8
7
  ---------
@@ -10,38 +9,21 @@ MODEL_READY_SIGNATURE : str
10
9
  Signature string indicating successful model server startup
11
10
  SRC_DIR : str
12
11
  Absolute path to the package source directory
13
- REQUIRED_FIELDS : set
14
- Set of required fields for model configuration
15
12
  KEY_METRICS : dict
16
13
  Mapping of vLLM metrics to their human-readable names
17
14
  SLURM_JOB_CONFIG_ARGS : dict
18
15
  Mapping of SLURM configuration arguments to their parameter names
16
+ VLLM_SHORT_TO_LONG_MAP : dict
17
+ Mapping of vLLM short arguments to their long names
19
18
  """
20
19
 
21
20
  from pathlib import Path
22
- from typing import TypedDict
23
-
24
- from vec_inf.client.slurm_vars import (
25
- LD_LIBRARY_PATH,
26
- SINGULARITY_IMAGE,
27
- SINGULARITY_LOAD_CMD,
28
- VLLM_NCCL_SO_PATH,
29
- )
30
21
 
31
22
 
32
23
  MODEL_READY_SIGNATURE = "INFO: Application startup complete."
33
24
  SRC_DIR = str(Path(__file__).parent.parent)
34
25
 
35
26
 
36
- # Required fields for model configuration
37
- REQUIRED_FIELDS = {
38
- "model_family",
39
- "model_type",
40
- "gpus_per_node",
41
- "num_nodes",
42
- "vocab_size",
43
- }
44
-
45
27
  # Key production metrics for inference servers
46
28
  KEY_METRICS = {
47
29
  "vllm:prompt_tokens_total": "total_prompt_tokens",
@@ -62,12 +44,13 @@ SLURM_JOB_CONFIG_ARGS = {
62
44
  "job-name": "model_name",
63
45
  "partition": "partition",
64
46
  "account": "account",
47
+ "chdir": "work_dir",
65
48
  "qos": "qos",
66
49
  "time": "time",
67
50
  "nodes": "num_nodes",
68
51
  "exclude": "exclude",
69
52
  "nodelist": "node_list",
70
- "gpus-per-node": "gpus_per_node",
53
+ "gres": "gres",
71
54
  "cpus-per-task": "cpus_per_task",
72
55
  "mem": "mem_per_node",
73
56
  "output": "out_file",
@@ -86,146 +69,5 @@ VLLM_SHORT_TO_LONG_MAP = {
86
69
  "-q": "--quantization",
87
70
  }
88
71
 
89
-
90
- # Slurm script templates
91
- class ShebangConfig(TypedDict):
92
- """TypedDict for SLURM script shebang configuration.
93
-
94
- Parameters
95
- ----------
96
- base : str
97
- Base shebang line for all SLURM scripts
98
- multinode : list[str]
99
- Additional SLURM directives for multi-node configurations
100
- """
101
-
102
- base: str
103
- multinode: list[str]
104
-
105
-
106
- class ServerSetupConfig(TypedDict):
107
- """TypedDict for server setup configuration.
108
-
109
- Parameters
110
- ----------
111
- single_node : list[str]
112
- Setup commands for single-node deployments
113
- multinode : list[str]
114
- Setup commands for multi-node deployments, including Ray initialization
115
- """
116
-
117
- single_node: list[str]
118
- multinode: list[str]
119
-
120
-
121
- class SlurmScriptTemplate(TypedDict):
122
- """TypedDict for complete SLURM script template configuration.
123
-
124
- Parameters
125
- ----------
126
- shebang : ShebangConfig
127
- Shebang and SLURM directive configuration
128
- singularity_setup : list[str]
129
- Commands for Singularity container setup
130
- imports : str
131
- Import statements and source commands
132
- env_vars : list[str]
133
- Environment variables to set
134
- singularity_command : str
135
- Template for Singularity execution command
136
- activate_venv : str
137
- Template for virtual environment activation
138
- server_setup : ServerSetupConfig
139
- Server initialization commands for different deployment modes
140
- find_vllm_port : list[str]
141
- Commands to find available ports for vLLM server
142
- write_to_json : list[str]
143
- Commands to write server configuration to JSON
144
- launch_cmd : list[str]
145
- vLLM server launch commands
146
- """
147
-
148
- shebang: ShebangConfig
149
- singularity_setup: list[str]
150
- imports: str
151
- env_vars: list[str]
152
- singularity_command: str
153
- activate_venv: str
154
- server_setup: ServerSetupConfig
155
- find_vllm_port: list[str]
156
- write_to_json: list[str]
157
- launch_cmd: list[str]
158
-
159
-
160
- SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
161
- "shebang": {
162
- "base": "#!/bin/bash",
163
- "multinode": [
164
- "#SBATCH --exclusive",
165
- "#SBATCH --tasks-per-node=1",
166
- ],
167
- },
168
- "singularity_setup": [
169
- SINGULARITY_LOAD_CMD,
170
- f"singularity exec {SINGULARITY_IMAGE} ray stop",
171
- ],
172
- "imports": "source {src_dir}/find_port.sh",
173
- "env_vars": [
174
- f"export LD_LIBRARY_PATH={LD_LIBRARY_PATH}",
175
- f"export VLLM_NCCL_SO_PATH={VLLM_NCCL_SO_PATH}",
176
- ],
177
- "singularity_command": f"singularity exec --nv --bind {{model_weights_path}}{{additional_binds}} --containall {SINGULARITY_IMAGE}",
178
- "activate_venv": "source {venv}/bin/activate",
179
- "server_setup": {
180
- "single_node": [
181
- "\n# Find available port",
182
- "head_node_ip=${SLURMD_NODENAME}",
183
- ],
184
- "multinode": [
185
- "\n# Get list of nodes",
186
- 'nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")',
187
- "nodes_array=($nodes)",
188
- "head_node=${nodes_array[0]}",
189
- 'head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address)',
190
- "\n# Start Ray head node",
191
- "head_node_port=$(find_available_port $head_node_ip 8080 65535)",
192
- "ray_head=$head_node_ip:$head_node_port",
193
- 'echo "Ray Head IP: $ray_head"',
194
- 'echo "Starting HEAD at $head_node"',
195
- 'srun --nodes=1 --ntasks=1 -w "$head_node" \\',
196
- " SINGULARITY_PLACEHOLDER \\",
197
- ' ray start --head --node-ip-address="$head_node_ip" --port=$head_node_port \\',
198
- ' --num-cpus "$SLURM_CPUS_PER_TASK" --num-gpus "$SLURM_GPUS_PER_NODE" --block &',
199
- "sleep 10",
200
- "\n# Start Ray worker nodes",
201
- "worker_num=$((SLURM_JOB_NUM_NODES - 1))",
202
- "for ((i = 1; i <= worker_num; i++)); do",
203
- " node_i=${nodes_array[$i]}",
204
- ' echo "Starting WORKER $i at $node_i"',
205
- ' srun --nodes=1 --ntasks=1 -w "$node_i" \\',
206
- " SINGULARITY_PLACEHOLDER \\",
207
- ' ray start --address "$ray_head" \\',
208
- ' --num-cpus "$SLURM_CPUS_PER_TASK" --num-gpus "$SLURM_GPUS_PER_NODE" --block &',
209
- " sleep 5",
210
- "done",
211
- ],
212
- },
213
- "find_vllm_port": [
214
- "\nvllm_port_number=$(find_available_port $head_node_ip 8080 65535)",
215
- 'server_address="http://${head_node_ip}:${vllm_port_number}/v1"',
216
- ],
217
- "write_to_json": [
218
- '\njson_path="{log_dir}/{model_name}.$SLURM_JOB_ID/{model_name}.$SLURM_JOB_ID.json"',
219
- 'jq --arg server_addr "$server_address" \\',
220
- " '. + {{\"server_address\": $server_addr}}' \\",
221
- ' "$json_path" > temp.json \\',
222
- ' && mv temp.json "$json_path"',
223
- ],
224
- "launch_cmd": [
225
- "vllm serve {model_weights_path} \\",
226
- " --served-model-name {model_name} \\",
227
- ' --host "0.0.0.0" \\',
228
- " --port $vllm_port_number \\",
229
- " --trust-remote-code \\",
230
- ],
231
- }
72
+ # Required matching arguments for batch mode
73
+ BATCH_MODE_REQUIRED_MATCHING_ARGS = ["venv", "log_dir"]