vec-inf 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vec_inf/client/_helper.py CHANGED
@@ -358,6 +358,11 @@ class ModelLauncher:
358
358
  # Check for required fields without default vals, will raise an error if missing
359
359
  utils.check_required_fields(params)
360
360
 
361
+ if not params.get("work_dir"):
362
+ # This is last resort, work dir should always be a required field to avoid
363
+ # blowing up user home directory unless intended
364
+ params["work_dir"] = str(Path.home())
365
+
361
366
  # Validate resource allocation and parallelization settings
362
367
  self._validate_resource_allocation(params)
363
368
 
@@ -404,6 +409,10 @@ class ModelLauncher:
404
409
  SlurmJobError
405
410
  If SLURM job submission fails
406
411
  """
412
+ # Create cache directory if it doesn't exist
413
+ cache_dir = Path(self.params["work_dir"], ".vec-inf-cache").expanduser()
414
+ cache_dir.mkdir(parents=True, exist_ok=True)
415
+
407
416
  # Build and execute the launch command
408
417
  command_output, stderr = utils.run_bash_command(self._build_launch_command())
409
418
 
@@ -661,6 +670,10 @@ class BatchModelLauncher:
661
670
  else:
662
671
  params["models"][model_name][arg] = value
663
672
 
673
+ if not params.get("work_dir"):
674
+ # This is last resort, work dir should always be a required field to avoid
675
+ # blowing up user home directory unless intended
676
+ params["work_dir"] = str(Path.home())
664
677
  return params
665
678
 
666
679
  def _build_launch_command(self) -> str:
@@ -689,6 +702,10 @@ class BatchModelLauncher:
689
702
  SlurmJobError
690
703
  If SLURM job submission fails
691
704
  """
705
+ # Create cache directory if it doesn't exist
706
+ cache_dir = Path(self.params["work_dir"], ".vec-inf-cache").expanduser()
707
+ cache_dir.mkdir(parents=True, exist_ok=True)
708
+
692
709
  # Build and execute the launch command
693
710
  command_output, stderr = utils.run_bash_command(self._build_launch_command())
694
711
 
@@ -112,6 +112,7 @@ class SlurmScriptGenerator:
112
112
  server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"]))
113
113
  server_script.append(
114
114
  SLURM_SCRIPT_TEMPLATE["bind_path"].format(
115
+ work_dir=self.params.get("work_dir", str(Path.home())),
115
116
  model_weights_path=self.model_weights_path,
116
117
  additional_binds=self.additional_binds,
117
118
  )
@@ -319,6 +320,7 @@ class BatchSlurmScriptGenerator:
319
320
  script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"])
320
321
  script_content.append(
321
322
  BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format(
323
+ work_dir=self.params.get("work_dir", str(Path.home())),
322
324
  model_weights_path=model_params["model_weights_path"],
323
325
  additional_binds=model_params["additional_binds"],
324
326
  )
@@ -117,7 +117,7 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
117
117
  CONTAINER_LOAD_CMD,
118
118
  ],
119
119
  "imports": "source {src_dir}/find_port.sh",
120
- "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
120
+ "bind_path": f"export {CONTAINER_MODULE_NAME_UPPER}_BINDPATH=${CONTAINER_MODULE_NAME_UPPER}_BINDPATH,/dev,/tmp,{{work_dir}}/.vec-inf-cache:$HOME/.cache,{{model_weights_path}}{{additional_binds}}",
121
121
  "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {{image_path}} \\",
122
122
  "activate_venv": "source {venv}/bin/activate",
123
123
  "server_setup": {
@@ -292,7 +292,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
292
292
  BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
293
293
  "shebang": "#!/bin/bash\n",
294
294
  "container_setup": f"{CONTAINER_LOAD_CMD}\n",
295
- "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
295
+ "bind_path": f"export {CONTAINER_MODULE_NAME_UPPER}_BINDPATH=${CONTAINER_MODULE_NAME_UPPER}_BINDPATH,/dev,/tmp,{{work_dir}}/.vec-inf-cache:$HOME/.cache,{{model_weights_path}}{{additional_binds}}",
296
296
  "server_address_setup": [
297
297
  "source {src_dir}/find_port.sh",
298
298
  "head_node_ip=${{SLURMD_NODENAME}}",
@@ -813,6 +813,13 @@ models:
813
813
  gpus_per_node: 1
814
814
  num_nodes: 1
815
815
  vocab_size: 129280
816
+ Qwen3-0.6B:
817
+ model_family: Qwen3
818
+ model_variant: 0.6B
819
+ model_type: LLM
820
+ gpus_per_node: 1
821
+ num_nodes: 1
822
+ vocab_size: 151936
816
823
  Qwen3-8B:
817
824
  model_family: Qwen3
818
825
  model_variant: 8B
@@ -853,19 +860,19 @@ models:
853
860
  model_family: Llama-4
854
861
  model_variant: Maverick-17B-128E-Instruct
855
862
  model_type: VLM
856
- gpus_per_node: 4
857
- num_nodes: 4
863
+ gpus_per_node: 8
864
+ num_nodes: 2
858
865
  resource_type: h100
859
866
  cpus_per_task: 6
860
867
  mem-per-node: 60G
861
868
  vocab_size: 202048
862
- time: 03:00:00
869
+ time: 08:00:00
863
870
  vllm_args:
864
- --tensor-parallel-size: 4
865
- --pipeline-parallel-size: 4
871
+ --tensor-parallel-size: 8
872
+ --pipeline-parallel-size: 2
866
873
  sglang_args:
867
- --tensor-parallel-size: 4
868
- --pipeline-parallel-size: 4
874
+ --tensor-parallel-size: 8
875
+ --pipeline-parallel-size: 2
869
876
  medgemma-4b-it:
870
877
  model_family: medgemma
871
878
  model_variant: 4b-it
@@ -884,3 +891,38 @@ models:
884
891
  --tensor-parallel-size: 2
885
892
  sglang_args:
886
893
  --tensor-parallel-size: 2
894
+ Kimi-K2-Instruct:
895
+ model_family: Kimi-K2
896
+ model_variant: Instruct
897
+ model_type: LLM
898
+ gpus_per_node: 8
899
+ num_nodes: 2
900
+ resource_type: h100
901
+ cpus_per_task: 6
902
+ mem-per-node: 60G
903
+ vocab_size: 163840
904
+ vllm_args:
905
+ --tensor-parallel-size: 8
906
+ --pipeline-parallel-size: 2
907
+ sglang_args:
908
+ --tensor-parallel-size: 8
909
+ --pipeline-parallel-size: 2
910
+ Kimi-K2.5:
911
+ model_family: Kimi-K2.5
912
+ model_type: LLM
913
+ gpus_per_node: 8
914
+ num_nodes: 1
915
+ resource_type: h100
916
+ cpus_per_task: 6
917
+ mem-per-node: 60G
918
+ vocab_size: 163840
919
+ vllm_args:
920
+ --tensor-parallel-size: 8
921
+ sglang_args:
922
+ --tensor-parallel-size: 8
923
+ whisper-large-v3:
924
+ model_family: whisper-large-v3
925
+ model_type: Audio
926
+ gpus_per_node: 1
927
+ num_nodes: 1
928
+ vocab_size: 51866
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vec-inf
3
- Version: 0.8.0
3
+ Version: 0.8.1
4
4
  Summary: Efficient LLM inference on Slurm clusters using vLLM.
5
5
  Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
6
6
  License-Expression: MIT
@@ -18,7 +18,10 @@ Requires-Dist: sglang>=0.5.5; extra == 'sglang'
18
18
  Requires-Dist: torchao>=0.9.0; extra == 'sglang'
19
19
  Provides-Extra: vllm
20
20
  Requires-Dist: ray[default]>=2.51.0; extra == 'vllm'
21
+ Requires-Dist: torchcodec<0.10.0,>=0.9.0; extra == 'vllm'
21
22
  Requires-Dist: vllm>=0.11.2; extra == 'vllm'
23
+ Requires-Dist: vllm[audio]; extra == 'vllm'
24
+ Requires-Dist: vllm[bench]; extra == 'vllm'
22
25
  Description-Content-Type: text/markdown
23
26
 
24
27
  # Vector Inference: Easy inference on Slurm clusters
@@ -30,11 +33,11 @@ Description-Content-Type: text/markdown
30
33
  [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
31
34
  [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
32
35
  [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
33
- [![vLLM](https://img.shields.io/badge/vLLM-0.12.0-blue)](https://docs.vllm.ai/en/v0.12.0/)
34
- [![SGLang](https://img.shields.io/badge/SGLang-0.5.5.post3-blue)](https://docs.sglang.io/index.html)
36
+ [![vLLM](https://img.shields.io/badge/vLLM-0.15.0-blue)](https://docs.vllm.ai/en/v0.15.0/)
37
+ [![SGLang](https://img.shields.io/badge/SGLang-0.5.8-blue)](https://docs.sglang.io/index.html)
35
38
  ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
36
39
 
37
- This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.12.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
40
+ This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.15.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
38
41
 
39
42
  **NOTE**: Supported models on Killarney are tracked [here](./MODEL_TRACKING.md)
40
43
 
@@ -72,7 +75,7 @@ You should see an output like the following:
72
75
  * `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
73
76
  * `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
74
77
 
75
- Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
78
+ Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command). During the launch process, relevant log files and scripts will be written to a log directory (default to `.vec-inf-logs` in your home directory), and a cache directory (`.vec-inf-cache`) will be created in your working directory (defaults to your home directory if not specified or required) for torch compile cache.
76
79
 
77
80
  #### Other commands
78
81
 
@@ -9,9 +9,9 @@ vec_inf/cli/_vars.py,sha256=ujrBtczo6qgsIyJb9greaInFo1gGvxZ6pga9CaBosPg,1147
9
9
  vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
10
10
  vec_inf/client/_client_vars.py,sha256=8TleM3nFsmwqOLX0V0y_vvdyz0SyTyd2m_aPt1SjR1Q,3396
11
11
  vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
12
- vec_inf/client/_helper.py,sha256=veii4dKGpBbPpz_X01rHKi2BtkdBjw8RmXpMBBajsyM,41473
13
- vec_inf/client/_slurm_script_generator.py,sha256=QT36zbdoiADTaUgfe0aYPu0gbN8ctpv_4ElKlBt-Rf0,16217
14
- vec_inf/client/_slurm_templates.py,sha256=XxIPREQKyF3gT3qGTDFsxx-gduiiVX7rPm-vuAVgjiA,11857
12
+ vec_inf/client/_helper.py,sha256=EJFLZt2pIjsxHA9ITplj16RnHQMHAm0GW8iv1GKd6Xk,42324
13
+ vec_inf/client/_slurm_script_generator.py,sha256=YuOHc9c-PMuM4MJJK09TQU4qJrH-E3qhRs75srp1_bs,16365
14
+ vec_inf/client/_slurm_templates.py,sha256=xbuuzY5xK8TYRl_NxWVQ9CmwYDwi7gKJEQ2yc8hXC8s,11931
15
15
  vec_inf/client/_slurm_vars.py,sha256=nKVYIUPcCKVLBVXzzMqt6b3BGaGIAX_gIyG28wqb_40,3270
16
16
  vec_inf/client/_utils.py,sha256=NU_MZeei_RrHXdVNuymEkd-LWtv4qz3yyfn18JBddoM,14513
17
17
  vec_inf/client/api.py,sha256=-vazAWvZp0vsn4jB6R-WdUo5eZ5bR-XJqU6r6qOL16A,13596
@@ -19,9 +19,9 @@ vec_inf/client/config.py,sha256=dB1getOXYQk4U4ge-x5qglHJlYZ4PHEaKh7rWdwA1Jg,6206
19
19
  vec_inf/client/models.py,sha256=FFWo3XAIlu754FILnBWxCGtLYqLga1vhiCm8i8uZ0pc,7868
20
20
  vec_inf/config/README.md,sha256=LrClRwcA-fR8XgmD9TyunuIzrSme4IAwwXmIf9O00zg,532
21
21
  vec_inf/config/environment.yaml,sha256=FspYtoQi5fACmb2ludx5WkDNlex2PtFmoHWMZiDWujI,1092
22
- vec_inf/config/models.yaml,sha256=qQP1GTHnKeGxEOlWqAWvpaBddM6jbR0YOu4X0CENpHE,21069
23
- vec_inf-0.8.0.dist-info/METADATA,sha256=As1VQZ4ULgxXI1mRGRwHYYs7_qxJriZAqO6n2ZAdYvg,10319
24
- vec_inf-0.8.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
25
- vec_inf-0.8.0.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
26
- vec_inf-0.8.0.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
27
- vec_inf-0.8.0.dist-info/RECORD,,
22
+ vec_inf/config/models.yaml,sha256=w-n8j9buBUvuffQpVYZkKPDFE3Fv_O4ZEbYCp-ginYc,22020
23
+ vec_inf-0.8.1.dist-info/METADATA,sha256=VNqMHUaKtFhICQzU9YoTa5iH14V7-GyCg3YUuowYeus,10777
24
+ vec_inf-0.8.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
25
+ vec_inf-0.8.1.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
26
+ vec_inf-0.8.1.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
27
+ vec_inf-0.8.1.dist-info/RECORD,,