vec-inf 0.8.0__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.pre-commit-config.yaml +1 -1
- {vec_inf-0.8.0 → vec_inf-0.8.1}/MODEL_TRACKING.md +4 -2
- {vec_inf-0.8.0 → vec_inf-0.8.1}/PKG-INFO +8 -5
- {vec_inf-0.8.0 → vec_inf-0.8.1}/README.md +4 -4
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/index.md +1 -1
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/user_guide.md +2 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/pyproject.toml +7 -1
- {vec_inf-0.8.0 → vec_inf-0.8.1}/sglang.Dockerfile +1 -1
- {vec_inf-0.8.0 → vec_inf-0.8.1}/uv.lock +4075 -1825
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/_helper.py +17 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/_slurm_script_generator.py +2 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/_slurm_templates.py +2 -2
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/config/models.yaml +49 -7
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vllm.Dockerfile +1 -1
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/ISSUE_TEMPLATE/model-request.md +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/dependabot.yml +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/pull_request_template.md +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/workflows/code_checks.yml +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/workflows/docker.yml +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/workflows/docs.yml +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/workflows/publish.yml +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.github/workflows/unit_tests.yml +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.gitignore +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/.python-version +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/LICENSE +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/codecov.yml +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/Makefile +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/api.md +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/assets/favicon-48x48.svg +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/assets/favicon.ico +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/assets/launch.png +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/assets/vector-logo.svg +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/contributing.md +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/make.bat +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/overrides/partials/copyright.html +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/overrides/partials/logo.html +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/docs/stylesheets/extra.css +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/README.md +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/api/basic_usage.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/inference/llm/chat_completions.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/inference/llm/completions.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/inference/llm/completions.sh +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/inference/text_embedding/embeddings.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/inference/vlm/vision_completions.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/logits/logits.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/slurm_dependency/README.md +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/slurm_dependency/downstream_job.sbatch +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/slurm_dependency/run_downstream.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/examples/slurm_dependency/run_workflow.sh +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/mkdocs.yml +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/profile/avg_throughput.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/profile/gen.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/__init__.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/test_imports.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/__init__.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/cli/__init__.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/cli/test_cli.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/cli/test_helper.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/cli/test_utils.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/client/__init__.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/client/test_api.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/client/test_engine_selection.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/client/test_examples.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/client/test_helper.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/client/test_models.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/client/test_slurm_script_generator.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/client/test_utils.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/tests/vec_inf/client/test_vars.env +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/README.md +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/__init__.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/cli/__init__.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/cli/_cli.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/cli/_helper.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/cli/_utils.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/cli/_vars.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/__init__.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/_client_vars.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/_exceptions.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/_slurm_vars.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/_utils.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/api.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/config.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/client/models.py +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/config/README.md +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/config/environment.yaml +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/vec_inf/find_port.sh +0 -0
- {vec_inf-0.8.0 → vec_inf-0.8.1}/venv.sh +0 -0
|
@@ -175,8 +175,9 @@ This document tracks all model weights available in the `/model-weights` directo
|
|
|
175
175
|
### Qwen: Qwen3
|
|
176
176
|
| Model | Configuration |
|
|
177
177
|
|:------|:-------------|
|
|
178
|
-
| `Qwen3-
|
|
178
|
+
| `Qwen3-0.6B` | ✅ |
|
|
179
179
|
| `Qwen3-8B` | ✅ |
|
|
180
|
+
| `Qwen3-14B` | ✅ |
|
|
180
181
|
| `Qwen3-32B` | ✅ |
|
|
181
182
|
| `Qwen3-235B-A22B` | ❌ |
|
|
182
183
|
| `Qwen3-Embedding-8B` | ❌ |
|
|
@@ -233,7 +234,8 @@ This document tracks all model weights available in the `/model-weights` directo
|
|
|
233
234
|
#### Moonshot AI: Kimi
|
|
234
235
|
| Model | Configuration |
|
|
235
236
|
|:------|:-------------|
|
|
236
|
-
| `Kimi-K2-Instruct` |
|
|
237
|
+
| `Kimi-K2-Instruct` | ✅ |
|
|
238
|
+
| `Kimi-K2.5` | ✅ |
|
|
237
239
|
|
|
238
240
|
#### Mistral AI: Ministral
|
|
239
241
|
| Model | Configuration |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vec-inf
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: Efficient LLM inference on Slurm clusters using vLLM.
|
|
5
5
|
Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -18,7 +18,10 @@ Requires-Dist: sglang>=0.5.5; extra == 'sglang'
|
|
|
18
18
|
Requires-Dist: torchao>=0.9.0; extra == 'sglang'
|
|
19
19
|
Provides-Extra: vllm
|
|
20
20
|
Requires-Dist: ray[default]>=2.51.0; extra == 'vllm'
|
|
21
|
+
Requires-Dist: torchcodec<0.10.0,>=0.9.0; extra == 'vllm'
|
|
21
22
|
Requires-Dist: vllm>=0.11.2; extra == 'vllm'
|
|
23
|
+
Requires-Dist: vllm[audio]; extra == 'vllm'
|
|
24
|
+
Requires-Dist: vllm[bench]; extra == 'vllm'
|
|
22
25
|
Description-Content-Type: text/markdown
|
|
23
26
|
|
|
24
27
|
# Vector Inference: Easy inference on Slurm clusters
|
|
@@ -30,11 +33,11 @@ Description-Content-Type: text/markdown
|
|
|
30
33
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
|
|
31
34
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
|
|
32
35
|
[](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
|
|
33
|
-
[](https://docs.vllm.ai/en/v0.15.0/)
|
|
37
|
+
[](https://docs.sglang.io/index.html)
|
|
35
38
|

|
|
36
39
|
|
|
37
|
-
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.
|
|
40
|
+
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.15.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
|
|
38
41
|
|
|
39
42
|
**NOTE**: Supported models on Killarney are tracked [here](./MODEL_TRACKING.md)
|
|
40
43
|
|
|
@@ -72,7 +75,7 @@ You should see an output like the following:
|
|
|
72
75
|
* `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
|
|
73
76
|
* `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
|
|
74
77
|
|
|
75
|
-
Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
|
|
78
|
+
Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command). During the launch process, relevant log files and scripts will be written to a log directory (default to `.vec-inf-logs` in your home directory), and a cache directory (`.vec-inf-cache`) will be created in your working directory (defaults to your home directory if not specified or required) for torch compile cache.
|
|
76
79
|
|
|
77
80
|
#### Other commands
|
|
78
81
|
|
|
@@ -7,11 +7,11 @@
|
|
|
7
7
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
|
|
8
8
|
[](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
|
|
9
9
|
[](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
|
|
10
|
-
[](https://docs.vllm.ai/en/v0.15.0/)
|
|
11
|
+
[](https://docs.sglang.io/index.html)
|
|
12
12
|

|
|
13
13
|
|
|
14
|
-
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.
|
|
14
|
+
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.15.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
|
|
15
15
|
|
|
16
16
|
**NOTE**: Supported models on Killarney are tracked [here](./MODEL_TRACKING.md)
|
|
17
17
|
|
|
@@ -49,7 +49,7 @@ You should see an output like the following:
|
|
|
49
49
|
* `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
|
|
50
50
|
* `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
|
|
51
51
|
|
|
52
|
-
Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
|
|
52
|
+
Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command). During the launch process, relevant log files and scripts will be written to a log directory (default to `.vec-inf-logs` in your home directory), and a cache directory (`.vec-inf-cache`) will be created in your working directory (defaults to your home directory if not specified or required) for torch compile cache.
|
|
53
53
|
|
|
54
54
|
#### Other commands
|
|
55
55
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Vector Inference: Easy inference on Slurm clusters
|
|
2
2
|
|
|
3
|
-
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.
|
|
3
|
+
This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.15.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
**NOTE**: Supported models on Killarney are tracked [here](https://github.com/VectorInstitute/vector-inference/blob/main/MODEL_TRACKING.md)
|
|
@@ -110,6 +110,8 @@ export VEC_INF_MODEL_CONFIG=/h/<username>/my-model-config.yaml
|
|
|
110
110
|
|
|
111
111
|
**NOTE**: There are other parameters that can also be added to the config but not shown in this example, check the [`ModelConfig`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/client/config.py) for details.
|
|
112
112
|
|
|
113
|
+
During the launch process, relevant log files and scripts will be written to a log directory (default to `.vec-inf-logs` in your home directory), and a cache directory (`.vec-inf-cache`) will be created in your working directory (defaults to your home directory if not specified or required) for torch compile cache.
|
|
114
|
+
|
|
113
115
|
### `batch-launch` command
|
|
114
116
|
|
|
115
117
|
The `batch-launch` command allows users to launch multiple inference servers at once, here is an example of launching 2 models:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "vec-inf"
|
|
3
|
-
version = "0.8.
|
|
3
|
+
version = "0.8.1"
|
|
4
4
|
description = "Efficient LLM inference on Slurm clusters using vLLM."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [{name = "Marshall Wang", email = "marshall.wang@vectorinstitute.ai"}]
|
|
@@ -42,6 +42,9 @@ inference = [
|
|
|
42
42
|
"torch>=2.7.0",
|
|
43
43
|
"cupy-cuda12x>=12.3.0",
|
|
44
44
|
"flashinfer-python>=0.4.0",
|
|
45
|
+
"ax-platform>=1.1.0",
|
|
46
|
+
"py3nvml",
|
|
47
|
+
"wandb>=0.17.0",
|
|
45
48
|
]
|
|
46
49
|
|
|
47
50
|
[project.optional-dependencies]
|
|
@@ -50,6 +53,9 @@ inference = [
|
|
|
50
53
|
vllm = [
|
|
51
54
|
"vllm>=0.11.2",
|
|
52
55
|
"ray[default]>=2.51.0",
|
|
56
|
+
"vllm[audio]",
|
|
57
|
+
"vllm[bench]",
|
|
58
|
+
"torchcodec>=0.9.0,<0.10.0",
|
|
53
59
|
]
|
|
54
60
|
# SGLang inference backend (conflicts with vllm due to dependency version conflicts)
|
|
55
61
|
# Install with: uv sync --extra sglang --group inference
|
|
@@ -17,7 +17,7 @@ RUN apt-get update && apt-get install -y \
|
|
|
17
17
|
wget build-essential libssl-dev zlib1g-dev libbz2-dev \
|
|
18
18
|
libreadline-dev libsqlite3-dev libffi-dev libncursesw5-dev \
|
|
19
19
|
xz-utils tk-dev libxml2-dev libxmlsec1-dev liblzma-dev libnuma1 \
|
|
20
|
-
git vim \
|
|
20
|
+
git vim ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libswresample-dev \
|
|
21
21
|
&& rm -rf /var/lib/apt/lists/*
|
|
22
22
|
|
|
23
23
|
# Install Python
|