vec-inf 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/client/_helper.py +17 -0
- vec_inf/client/_slurm_script_generator.py +2 -0
- vec_inf/client/_slurm_templates.py +2 -2
- vec_inf/config/models.yaml +49 -7
- {vec_inf-0.8.0.dist-info → vec_inf-0.8.1.dist-info}/METADATA +8 -5
- {vec_inf-0.8.0.dist-info → vec_inf-0.8.1.dist-info}/RECORD +9 -9
- {vec_inf-0.8.0.dist-info → vec_inf-0.8.1.dist-info}/WHEEL +0 -0
- {vec_inf-0.8.0.dist-info → vec_inf-0.8.1.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.8.0.dist-info → vec_inf-0.8.1.dist-info}/licenses/LICENSE +0 -0
vec_inf/client/_helper.py
CHANGED
|
@@ -358,6 +358,11 @@ class ModelLauncher:
|
|
|
358
358
|
# Check for required fields without default vals, will raise an error if missing
|
|
359
359
|
utils.check_required_fields(params)
|
|
360
360
|
|
|
361
|
+
if not params.get("work_dir"):
|
|
362
|
+
# This is last resort, work dir should always be a required field to avoid
|
|
363
|
+
# blowing up user home directory unless intended
|
|
364
|
+
params["work_dir"] = str(Path.home())
|
|
365
|
+
|
|
361
366
|
# Validate resource allocation and parallelization settings
|
|
362
367
|
self._validate_resource_allocation(params)
|
|
363
368
|
|
|
@@ -404,6 +409,10 @@ class ModelLauncher:
|
|
|
404
409
|
SlurmJobError
|
|
405
410
|
If SLURM job submission fails
|
|
406
411
|
"""
|
|
412
|
+
# Create cache directory if it doesn't exist
|
|
413
|
+
cache_dir = Path(self.params["work_dir"], ".vec-inf-cache").expanduser()
|
|
414
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
415
|
+
|
|
407
416
|
# Build and execute the launch command
|
|
408
417
|
command_output, stderr = utils.run_bash_command(self._build_launch_command())
|
|
409
418
|
|
|
@@ -661,6 +670,10 @@ class BatchModelLauncher:
|
|
|
661
670
|
else:
|
|
662
671
|
params["models"][model_name][arg] = value
|
|
663
672
|
|
|
673
|
+
if not params.get("work_dir"):
|
|
674
|
+
# This is last resort, work dir should always be a required field to avoid
|
|
675
|
+
# blowing up user home directory unless intended
|
|
676
|
+
params["work_dir"] = str(Path.home())
|
|
664
677
|
return params
|
|
665
678
|
|
|
666
679
|
def _build_launch_command(self) -> str:
|
|
@@ -689,6 +702,10 @@ class BatchModelLauncher:
|
|
|
689
702
|
SlurmJobError
|
|
690
703
|
If SLURM job submission fails
|
|
691
704
|
"""
|
|
705
|
+
# Create cache directory if it doesn't exist
|
|
706
|
+
cache_dir = Path(self.params["work_dir"], ".vec-inf-cache").expanduser()
|
|
707
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
708
|
+
|
|
692
709
|
# Build and execute the launch command
|
|
693
710
|
command_output, stderr = utils.run_bash_command(self._build_launch_command())
|
|
694
711
|
|
|
@@ -112,6 +112,7 @@ class SlurmScriptGenerator:
|
|
|
112
112
|
server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"]))
|
|
113
113
|
server_script.append(
|
|
114
114
|
SLURM_SCRIPT_TEMPLATE["bind_path"].format(
|
|
115
|
+
work_dir=self.params.get("work_dir", str(Path.home())),
|
|
115
116
|
model_weights_path=self.model_weights_path,
|
|
116
117
|
additional_binds=self.additional_binds,
|
|
117
118
|
)
|
|
@@ -319,6 +320,7 @@ class BatchSlurmScriptGenerator:
|
|
|
319
320
|
script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"])
|
|
320
321
|
script_content.append(
|
|
321
322
|
BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format(
|
|
323
|
+
work_dir=self.params.get("work_dir", str(Path.home())),
|
|
322
324
|
model_weights_path=model_params["model_weights_path"],
|
|
323
325
|
additional_binds=model_params["additional_binds"],
|
|
324
326
|
)
|
|
@@ -117,7 +117,7 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
|
|
|
117
117
|
CONTAINER_LOAD_CMD,
|
|
118
118
|
],
|
|
119
119
|
"imports": "source {src_dir}/find_port.sh",
|
|
120
|
-
"bind_path": f"export {
|
|
120
|
+
"bind_path": f"export {CONTAINER_MODULE_NAME_UPPER}_BINDPATH=${CONTAINER_MODULE_NAME_UPPER}_BINDPATH,/dev,/tmp,{{work_dir}}/.vec-inf-cache:$HOME/.cache,{{model_weights_path}}{{additional_binds}}",
|
|
121
121
|
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {{image_path}} \\",
|
|
122
122
|
"activate_venv": "source {venv}/bin/activate",
|
|
123
123
|
"server_setup": {
|
|
@@ -292,7 +292,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
|
|
|
292
292
|
BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
|
|
293
293
|
"shebang": "#!/bin/bash\n",
|
|
294
294
|
"container_setup": f"{CONTAINER_LOAD_CMD}\n",
|
|
295
|
-
"bind_path": f"export {
|
|
295
|
+
"bind_path": f"export {CONTAINER_MODULE_NAME_UPPER}_BINDPATH=${CONTAINER_MODULE_NAME_UPPER}_BINDPATH,/dev,/tmp,{{work_dir}}/.vec-inf-cache:$HOME/.cache,{{model_weights_path}}{{additional_binds}}",
|
|
296
296
|
"server_address_setup": [
|
|
297
297
|
"source {src_dir}/find_port.sh",
|
|
298
298
|
"head_node_ip=${{SLURMD_NODENAME}}",
|
vec_inf/config/models.yaml
CHANGED
|
@@ -813,6 +813,13 @@ models:
|
|
|
813
813
|
gpus_per_node: 1
|
|
814
814
|
num_nodes: 1
|
|
815
815
|
vocab_size: 129280
|
|
816
|
+
Qwen3-0.6B:
|
|
817
|
+
model_family: Qwen3
|
|
818
|
+
model_variant: 0.6B
|
|
819
|
+
model_type: LLM
|
|
820
|
+
gpus_per_node: 1
|
|
821
|
+
num_nodes: 1
|
|
822
|
+
vocab_size: 151936
|
|
816
823
|
Qwen3-8B:
|
|
817
824
|
model_family: Qwen3
|
|
818
825
|
model_variant: 8B
|
|
@@ -853,19 +860,19 @@ models:
|
|
|
853
860
|
model_family: Llama-4
|
|
854
861
|
model_variant: Maverick-17B-128E-Instruct
|
|
855
862
|
model_type: VLM
|
|
856
|
-
gpus_per_node:
|
|
857
|
-
num_nodes:
|
|
863
|
+
gpus_per_node: 8
|
|
864
|
+
num_nodes: 2
|
|
858
865
|
resource_type: h100
|
|
859
866
|
cpus_per_task: 6
|
|
860
867
|
mem-per-node: 60G
|
|
861
868
|
vocab_size: 202048
|
|
862
|
-
time:
|
|
869
|
+
time: 08:00:00
|
|
863
870
|
vllm_args:
|
|
864
|
-
--tensor-parallel-size:
|
|
865
|
-
--pipeline-parallel-size:
|
|
871
|
+
--tensor-parallel-size: 8
|
|
872
|
+
--pipeline-parallel-size: 2
|
|
866
873
|
sglang_args:
|
|
867
|
-
--tensor-parallel-size:
|
|
868
|
-
--pipeline-parallel-size:
|
|
874
|
+
--tensor-parallel-size: 8
|
|
875
|
+
--pipeline-parallel-size: 2
|
|
869
876
|
medgemma-4b-it:
|
|
870
877
|
model_family: medgemma
|
|
871
878
|
model_variant: 4b-it
|
|
@@ -884,3 +891,38 @@ models:
|
|
|
884
891
|
--tensor-parallel-size: 2
|
|
885
892
|
sglang_args:
|
|
886
893
|
--tensor-parallel-size: 2
|
|
894
|
+
Kimi-K2-Instruct:
|
|
895
|
+
model_family: Kimi-K2
|
|
896
|
+
model_variant: Instruct
|
|
897
|
+
model_type: LLM
|
|
898
|
+
gpus_per_node: 8
|
|
899
|
+
num_nodes: 2
|
|
900
|
+
resource_type: h100
|
|
901
|
+
cpus_per_task: 6
|
|
902
|
+
mem-per-node: 60G
|
|
903
|
+
vocab_size: 163840
|
|
904
|
+
vllm_args:
|
|
905
|
+
--tensor-parallel-size: 8
|
|
906
|
+
--pipeline-parallel-size: 2
|
|
907
|
+
sglang_args:
|
|
908
|
+
--tensor-parallel-size: 8
|
|
909
|
+
--pipeline-parallel-size: 2
|
|
910
|
+
Kimi-K2.5:
|
|
911
|
+
model_family: Kimi-K2.5
|
|
912
|
+
model_type: LLM
|
|
913
|
+
gpus_per_node: 8
|
|
914
|
+
num_nodes: 1
|
|
915
|
+
resource_type: h100
|
|
916
|
+
cpus_per_task: 6
|
|
917
|
+
mem-per-node: 60G
|
|
918
|
+
vocab_size: 163840
|
|
919
|
+
vllm_args:
|
|
920
|
+
--tensor-parallel-size: 8
|
|
921
|
+
sglang_args:
|
|
922
|
+
--tensor-parallel-size: 8
|
|
923
|
+
whisper-large-v3:
|
|
924
|
+
model_family: whisper-large-v3
|
|
925
|
+
model_type: Audio
|
|
926
|
+
gpus_per_node: 1
|
|
927
|
+
num_nodes: 1
|
|
928
|
+
vocab_size: 51866
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vec-inf
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: Efficient LLM inference on Slurm clusters using vLLM.
|
|
5
5
|
Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -18,7 +18,10 @@ Requires-Dist: sglang>=0.5.5; extra == 'sglang'
|
|
|
18
18
|
Requires-Dist: torchao>=0.9.0; extra == 'sglang'
|
|
19
19
|
Provides-Extra: vllm
|
|
20
20
|
Requires-Dist: ray[default]>=2.51.0; extra == 'vllm'
|
|
21
|
+
Requires-Dist: torchcodec<0.10.0,>=0.9.0; extra == 'vllm'
|
|
21
22
|
Requires-Dist: vllm>=0.11.2; extra == 'vllm'
|
|
23
|
+
Requires-Dist: vllm[audio]; extra == 'vllm'
|
|
24
|
+
Requires-Dist: vllm[bench]; extra == 'vllm'
|
|
22
25
|
Description-Content-Type: text/markdown
|
|
23
26
|
|
|
24
27
|
# Vector Inference: Easy inference on Slurm clusters
|
|
@@ -30,11 +33,11 @@ Description-Content-Type: text/markdown
|
|
|
30
33
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
|
|
31
34
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
|
|
32
35
|
[](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
|
|
33
|
-
[](https://docs.vllm.ai/en/v0.15.0/)
|
|
37
|
+
[](https://docs.sglang.io/index.html)
|
|
35
38
|

|
|
36
39
|
|
|
37
|
-
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.
|
|
40
|
+
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.15.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
|
|
38
41
|
|
|
39
42
|
**NOTE**: Supported models on Killarney are tracked [here](./MODEL_TRACKING.md)
|
|
40
43
|
|
|
@@ -72,7 +75,7 @@ You should see an output like the following:
|
|
|
72
75
|
* `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
|
|
73
76
|
* `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
|
|
74
77
|
|
|
75
|
-
Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
|
|
78
|
+
Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command). During the launch process, relevant log files and scripts will be written to a log directory (default to `.vec-inf-logs` in your home directory), and a cache directory (`.vec-inf-cache`) will be created in your working directory (defaults to your home directory if not specified or required) for torch compile cache.
|
|
76
79
|
|
|
77
80
|
#### Other commands
|
|
78
81
|
|
|
@@ -9,9 +9,9 @@ vec_inf/cli/_vars.py,sha256=ujrBtczo6qgsIyJb9greaInFo1gGvxZ6pga9CaBosPg,1147
|
|
|
9
9
|
vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
|
|
10
10
|
vec_inf/client/_client_vars.py,sha256=8TleM3nFsmwqOLX0V0y_vvdyz0SyTyd2m_aPt1SjR1Q,3396
|
|
11
11
|
vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
|
|
12
|
-
vec_inf/client/_helper.py,sha256=
|
|
13
|
-
vec_inf/client/_slurm_script_generator.py,sha256=
|
|
14
|
-
vec_inf/client/_slurm_templates.py,sha256=
|
|
12
|
+
vec_inf/client/_helper.py,sha256=EJFLZt2pIjsxHA9ITplj16RnHQMHAm0GW8iv1GKd6Xk,42324
|
|
13
|
+
vec_inf/client/_slurm_script_generator.py,sha256=YuOHc9c-PMuM4MJJK09TQU4qJrH-E3qhRs75srp1_bs,16365
|
|
14
|
+
vec_inf/client/_slurm_templates.py,sha256=xbuuzY5xK8TYRl_NxWVQ9CmwYDwi7gKJEQ2yc8hXC8s,11931
|
|
15
15
|
vec_inf/client/_slurm_vars.py,sha256=nKVYIUPcCKVLBVXzzMqt6b3BGaGIAX_gIyG28wqb_40,3270
|
|
16
16
|
vec_inf/client/_utils.py,sha256=NU_MZeei_RrHXdVNuymEkd-LWtv4qz3yyfn18JBddoM,14513
|
|
17
17
|
vec_inf/client/api.py,sha256=-vazAWvZp0vsn4jB6R-WdUo5eZ5bR-XJqU6r6qOL16A,13596
|
|
@@ -19,9 +19,9 @@ vec_inf/client/config.py,sha256=dB1getOXYQk4U4ge-x5qglHJlYZ4PHEaKh7rWdwA1Jg,6206
|
|
|
19
19
|
vec_inf/client/models.py,sha256=FFWo3XAIlu754FILnBWxCGtLYqLga1vhiCm8i8uZ0pc,7868
|
|
20
20
|
vec_inf/config/README.md,sha256=LrClRwcA-fR8XgmD9TyunuIzrSme4IAwwXmIf9O00zg,532
|
|
21
21
|
vec_inf/config/environment.yaml,sha256=FspYtoQi5fACmb2ludx5WkDNlex2PtFmoHWMZiDWujI,1092
|
|
22
|
-
vec_inf/config/models.yaml,sha256=
|
|
23
|
-
vec_inf-0.8.
|
|
24
|
-
vec_inf-0.8.
|
|
25
|
-
vec_inf-0.8.
|
|
26
|
-
vec_inf-0.8.
|
|
27
|
-
vec_inf-0.8.
|
|
22
|
+
vec_inf/config/models.yaml,sha256=w-n8j9buBUvuffQpVYZkKPDFE3Fv_O4ZEbYCp-ginYc,22020
|
|
23
|
+
vec_inf-0.8.1.dist-info/METADATA,sha256=VNqMHUaKtFhICQzU9YoTa5iH14V7-GyCg3YUuowYeus,10777
|
|
24
|
+
vec_inf-0.8.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
25
|
+
vec_inf-0.8.1.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
|
|
26
|
+
vec_inf-0.8.1.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
|
|
27
|
+
vec_inf-0.8.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|