PyPI - vec-inf - Versions diffs - 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

vec-inf 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

vec_inf/client/_helper.py CHANGED Viewed

@@ -358,6 +358,11 @@ class ModelLauncher:
         # Check for required fields without default vals, will raise an error if missing
         utils.check_required_fields(params)
+        if not params.get("work_dir"):
+            # This is last resort, work dir should always be a required field to avoid
+            # blowing up user home directory unless intended
+            params["work_dir"] = str(Path.home())
         # Validate resource allocation and parallelization settings
         self._validate_resource_allocation(params)
@@ -404,6 +409,10 @@ class ModelLauncher:
         SlurmJobError
             If SLURM job submission fails
         """
+        # Create cache directory if it doesn't exist
+        cache_dir = Path(self.params["work_dir"], ".vec-inf-cache").expanduser()
+        cache_dir.mkdir(parents=True, exist_ok=True)
         # Build and execute the launch command
         command_output, stderr = utils.run_bash_command(self._build_launch_command())
@@ -661,6 +670,10 @@ class BatchModelLauncher:
                 else:
                     params["models"][model_name][arg] = value
+        if not params.get("work_dir"):
+            # This is last resort, work dir should always be a required field to avoid
+            # blowing up user home directory unless intended
+            params["work_dir"] = str(Path.home())
         return params
     def _build_launch_command(self) -> str:
@@ -689,6 +702,10 @@ class BatchModelLauncher:
         SlurmJobError
             If SLURM job submission fails
         """
+        # Create cache directory if it doesn't exist
+        cache_dir = Path(self.params["work_dir"], ".vec-inf-cache").expanduser()
+        cache_dir.mkdir(parents=True, exist_ok=True)
         # Build and execute the launch command
         command_output, stderr = utils.run_bash_command(self._build_launch_command())

vec_inf/client/_slurm_script_generator.py CHANGED Viewed

@@ -112,6 +112,7 @@ class SlurmScriptGenerator:
             server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"]))
             server_script.append(
                 SLURM_SCRIPT_TEMPLATE["bind_path"].format(
+                    work_dir=self.params.get("work_dir", str(Path.home())),
                     model_weights_path=self.model_weights_path,
                     additional_binds=self.additional_binds,
                 )
@@ -319,6 +320,7 @@ class BatchSlurmScriptGenerator:
             script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"])
         script_content.append(
             BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format(
+                work_dir=self.params.get("work_dir", str(Path.home())),
                 model_weights_path=model_params["model_weights_path"],
                 additional_binds=model_params["additional_binds"],
             )

vec_inf/client/_slurm_templates.py CHANGED Viewed

@@ -117,7 +117,7 @@ SLURM_SCRIPT_TEMPLATE: SlurmScriptTemplate = {
         CONTAINER_LOAD_CMD,
     ],
     "imports": "source {src_dir}/find_port.sh",
-    "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
+    "bind_path": f"export {CONTAINER_MODULE_NAME_UPPER}_BINDPATH=${CONTAINER_MODULE_NAME_UPPER}_BINDPATH,/dev,/tmp,{{work_dir}}/.vec-inf-cache:$HOME/.cache,{{model_weights_path}}{{additional_binds}}",
     "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {{image_path}} \\",
     "activate_venv": "source {venv}/bin/activate",
     "server_setup": {
@@ -292,7 +292,7 @@ class BatchModelLaunchScriptTemplate(TypedDict):
 BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE: BatchModelLaunchScriptTemplate = {
     "shebang": "#!/bin/bash\n",
     "container_setup": f"{CONTAINER_LOAD_CMD}\n",
-    "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp,{{model_weights_path}}{{additional_binds}}",
+    "bind_path": f"export {CONTAINER_MODULE_NAME_UPPER}_BINDPATH=${CONTAINER_MODULE_NAME_UPPER}_BINDPATH,/dev,/tmp,{{work_dir}}/.vec-inf-cache:$HOME/.cache,{{model_weights_path}}{{additional_binds}}",
     "server_address_setup": [
         "source {src_dir}/find_port.sh",
         "head_node_ip=${{SLURMD_NODENAME}}",

vec_inf/config/models.yaml CHANGED Viewed

@@ -813,6 +813,13 @@ models:
     gpus_per_node: 1
     num_nodes: 1
     vocab_size: 129280
+  Qwen3-0.6B:
+    model_family: Qwen3
+    model_variant: 0.6B
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 151936
   Qwen3-8B:
     model_family: Qwen3
     model_variant: 8B
@@ -853,19 +860,19 @@ models:
     model_family: Llama-4
     model_variant: Maverick-17B-128E-Instruct
     model_type: VLM
-    gpus_per_node: 4
-    num_nodes: 4
+    gpus_per_node: 8
+    num_nodes: 2
     resource_type: h100
     cpus_per_task: 6
     mem-per-node: 60G
     vocab_size: 202048
-    time: 03:00:00
+    time: 08:00:00
     vllm_args:
-      --tensor-parallel-size: 4
-      --pipeline-parallel-size: 4
+      --tensor-parallel-size: 8
+      --pipeline-parallel-size: 2
     sglang_args:
-      --tensor-parallel-size: 4
-      --pipeline-parallel-size: 4
+      --tensor-parallel-size: 8
+      --pipeline-parallel-size: 2
   medgemma-4b-it:
     model_family: medgemma
     model_variant: 4b-it
@@ -884,3 +891,38 @@ models:
       --tensor-parallel-size: 2
     sglang_args:
       --tensor-parallel-size: 2
+  Kimi-K2-Instruct:
+    model_family: Kimi-K2
+    model_variant: Instruct
+    model_type: LLM
+    gpus_per_node: 8
+    num_nodes: 2
+    resource_type: h100
+    cpus_per_task: 6
+    mem-per-node: 60G
+    vocab_size: 163840
+    vllm_args:
+      --tensor-parallel-size: 8
+      --pipeline-parallel-size: 2
+    sglang_args:
+      --tensor-parallel-size: 8
+      --pipeline-parallel-size: 2
+  Kimi-K2.5:
+    model_family: Kimi-K2.5
+    model_type: LLM
+    gpus_per_node: 8
+    num_nodes: 1
+    resource_type: h100
+    cpus_per_task: 6
+    mem-per-node: 60G
+    vocab_size: 163840
+    vllm_args:
+      --tensor-parallel-size: 8
+    sglang_args:
+      --tensor-parallel-size: 8
+  whisper-large-v3:
+    model_family: whisper-large-v3
+    model_type: Audio
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 51866

{vec_inf-0.8.0.dist-info → vec_inf-0.8.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vec-inf
-Version: 0.8.0
+Version: 0.8.1
 Summary: Efficient LLM inference on Slurm clusters using vLLM.
 Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
 License-Expression: MIT
@@ -18,7 +18,10 @@ Requires-Dist: sglang>=0.5.5; extra == 'sglang'
 Requires-Dist: torchao>=0.9.0; extra == 'sglang'
 Provides-Extra: vllm
 Requires-Dist: ray[default]>=2.51.0; extra == 'vllm'
+Requires-Dist: torchcodec<0.10.0,>=0.9.0; extra == 'vllm'
 Requires-Dist: vllm>=0.11.2; extra == 'vllm'
+Requires-Dist: vllm[audio]; extra == 'vllm'
+Requires-Dist: vllm[bench]; extra == 'vllm'
 Description-Content-Type: text/markdown
 # Vector Inference: Easy inference on Slurm clusters
@@ -30,11 +33,11 @@ Description-Content-Type: text/markdown
 [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
 [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
 [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
-[![vLLM](https://img.shields.io/badge/vLLM-0.12.0-blue)](https://docs.vllm.ai/en/v0.12.0/)
-[![SGLang](https://img.shields.io/badge/SGLang-0.5.5.post3-blue)](https://docs.sglang.io/index.html)
+[![vLLM](https://img.shields.io/badge/vLLM-0.15.0-blue)](https://docs.vllm.ai/en/v0.15.0/)
+[![SGLang](https://img.shields.io/badge/SGLang-0.5.8-blue)](https://docs.sglang.io/index.html)
 ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
-This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.12.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
+This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.15.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
 **NOTE**: Supported models on Killarney are tracked [here](./MODEL_TRACKING.md)
@@ -72,7 +75,7 @@ You should see an output like the following:
   * `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
   * `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
-Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
+Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command). During the launch process, relevant log files and scripts will be written to a log directory (default to `.vec-inf-logs` in your home directory), and a cache directory (`.vec-inf-cache`) will be created in your working directory (defaults to your home directory if not specified or required) for torch compile cache.
 #### Other commands

{vec_inf-0.8.0.dist-info → vec_inf-0.8.1.dist-info}/RECORD RENAMED Viewed

@@ -9,9 +9,9 @@ vec_inf/cli/_vars.py,sha256=ujrBtczo6qgsIyJb9greaInFo1gGvxZ6pga9CaBosPg,1147
 vec_inf/client/__init__.py,sha256=OLlUJ4kL1R-Kh-nXNbvKlAZ3mtHcnozHprVufkVCNWk,739
 vec_inf/client/_client_vars.py,sha256=8TleM3nFsmwqOLX0V0y_vvdyz0SyTyd2m_aPt1SjR1Q,3396
 vec_inf/client/_exceptions.py,sha256=94Nx_5k1SriJNXzbdnwyXFZolyMutydU08Gsikawzzo,749
-vec_inf/client/_helper.py,sha256=veii4dKGpBbPpz_X01rHKi2BtkdBjw8RmXpMBBajsyM,41473
-vec_inf/client/_slurm_script_generator.py,sha256=QT36zbdoiADTaUgfe0aYPu0gbN8ctpv_4ElKlBt-Rf0,16217
-vec_inf/client/_slurm_templates.py,sha256=XxIPREQKyF3gT3qGTDFsxx-gduiiVX7rPm-vuAVgjiA,11857
+vec_inf/client/_helper.py,sha256=EJFLZt2pIjsxHA9ITplj16RnHQMHAm0GW8iv1GKd6Xk,42324
+vec_inf/client/_slurm_script_generator.py,sha256=YuOHc9c-PMuM4MJJK09TQU4qJrH-E3qhRs75srp1_bs,16365
+vec_inf/client/_slurm_templates.py,sha256=xbuuzY5xK8TYRl_NxWVQ9CmwYDwi7gKJEQ2yc8hXC8s,11931
 vec_inf/client/_slurm_vars.py,sha256=nKVYIUPcCKVLBVXzzMqt6b3BGaGIAX_gIyG28wqb_40,3270
 vec_inf/client/_utils.py,sha256=NU_MZeei_RrHXdVNuymEkd-LWtv4qz3yyfn18JBddoM,14513
 vec_inf/client/api.py,sha256=-vazAWvZp0vsn4jB6R-WdUo5eZ5bR-XJqU6r6qOL16A,13596
@@ -19,9 +19,9 @@ vec_inf/client/config.py,sha256=dB1getOXYQk4U4ge-x5qglHJlYZ4PHEaKh7rWdwA1Jg,6206
 vec_inf/client/models.py,sha256=FFWo3XAIlu754FILnBWxCGtLYqLga1vhiCm8i8uZ0pc,7868
 vec_inf/config/README.md,sha256=LrClRwcA-fR8XgmD9TyunuIzrSme4IAwwXmIf9O00zg,532
 vec_inf/config/environment.yaml,sha256=FspYtoQi5fACmb2ludx5WkDNlex2PtFmoHWMZiDWujI,1092
-vec_inf/config/models.yaml,sha256=qQP1GTHnKeGxEOlWqAWvpaBddM6jbR0YOu4X0CENpHE,21069
-vec_inf-0.8.0.dist-info/METADATA,sha256=As1VQZ4ULgxXI1mRGRwHYYs7_qxJriZAqO6n2ZAdYvg,10319
-vec_inf-0.8.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-vec_inf-0.8.0.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
-vec_inf-0.8.0.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
-vec_inf-0.8.0.dist-info/RECORD,,
+vec_inf/config/models.yaml,sha256=w-n8j9buBUvuffQpVYZkKPDFE3Fv_O4ZEbYCp-ginYc,22020
+vec_inf-0.8.1.dist-info/METADATA,sha256=VNqMHUaKtFhICQzU9YoTa5iH14V7-GyCg3YUuowYeus,10777
+vec_inf-0.8.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+vec_inf-0.8.1.dist-info/entry_points.txt,sha256=uNRXjCuJSR2nveEqD3IeMznI9oVI9YLZh5a24cZg6B0,49
+vec_inf-0.8.1.dist-info/licenses/LICENSE,sha256=mq8zeqpvVSF1EsxmydeXcokt8XnEIfSofYn66S2-cJI,1073
+vec_inf-0.8.1.dist-info/RECORD,,

{vec_inf-0.8.0.dist-info → vec_inf-0.8.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{vec_inf-0.8.0.dist-info → vec_inf-0.8.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{vec_inf-0.8.0.dist-info → vec_inf-0.8.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

vec-inf 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

vec-inf 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl