vec-inf 0.7.3__tar.gz → 0.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {vec_inf-0.7.3 → vec_inf-0.8.1}/.github/workflows/code_checks.yml +1 -1
  2. vec_inf-0.8.1/.github/workflows/docker.yml +85 -0
  3. {vec_inf-0.7.3 → vec_inf-0.8.1}/.github/workflows/docs.yml +6 -6
  4. {vec_inf-0.7.3 → vec_inf-0.8.1}/.github/workflows/publish.yml +1 -1
  5. {vec_inf-0.7.3 → vec_inf-0.8.1}/.github/workflows/unit_tests.yml +12 -2
  6. {vec_inf-0.7.3 → vec_inf-0.8.1}/.pre-commit-config.yaml +2 -2
  7. {vec_inf-0.7.3 → vec_inf-0.8.1}/MODEL_TRACKING.md +84 -12
  8. {vec_inf-0.7.3 → vec_inf-0.8.1}/PKG-INFO +19 -15
  9. {vec_inf-0.7.3 → vec_inf-0.8.1}/README.md +7 -6
  10. vec_inf-0.8.1/docs/assets/launch.png +0 -0
  11. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/index.md +5 -3
  12. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/user_guide.md +42 -31
  13. {vec_inf-0.7.3 → vec_inf-0.8.1}/pyproject.toml +36 -8
  14. vec_inf-0.8.1/sglang.Dockerfile +70 -0
  15. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/cli/test_cli.py +181 -2
  16. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/cli/test_helper.py +7 -2
  17. vec_inf-0.8.1/tests/vec_inf/client/test_engine_selection.py +348 -0
  18. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/client/test_helper.py +233 -5
  19. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/client/test_models.py +39 -0
  20. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/client/test_slurm_script_generator.py +192 -9
  21. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/client/test_utils.py +27 -8
  22. vec_inf-0.8.1/uv.lock +10492 -0
  23. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/cli/_cli.py +19 -3
  24. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/cli/_helper.py +23 -12
  25. vec_inf-0.8.1/vec_inf/cli/_vars.py +47 -0
  26. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/_client_vars.py +31 -1
  27. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/_helper.py +157 -44
  28. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/_slurm_script_generator.py +87 -30
  29. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/_slurm_templates.py +104 -40
  30. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/_slurm_vars.py +13 -4
  31. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/_utils.py +10 -7
  32. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/config.py +17 -7
  33. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/models.py +25 -19
  34. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/config/README.md +1 -1
  35. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/config/environment.yaml +9 -2
  36. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/config/models.yaml +223 -364
  37. vec_inf-0.7.3/Dockerfile → vec_inf-0.8.1/vllm.Dockerfile +9 -6
  38. vec_inf-0.7.3/.github/workflows/docker.yml +0 -61
  39. vec_inf-0.7.3/uv.lock +0 -6357
  40. vec_inf-0.7.3/vec_inf/cli/_vars.py +0 -32
  41. {vec_inf-0.7.3 → vec_inf-0.8.1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  42. {vec_inf-0.7.3 → vec_inf-0.8.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  43. {vec_inf-0.7.3 → vec_inf-0.8.1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  44. {vec_inf-0.7.3 → vec_inf-0.8.1}/.github/ISSUE_TEMPLATE/model-request.md +0 -0
  45. {vec_inf-0.7.3 → vec_inf-0.8.1}/.github/dependabot.yml +0 -0
  46. {vec_inf-0.7.3 → vec_inf-0.8.1}/.github/pull_request_template.md +0 -0
  47. {vec_inf-0.7.3 → vec_inf-0.8.1}/.gitignore +0 -0
  48. {vec_inf-0.7.3 → vec_inf-0.8.1}/.python-version +0 -0
  49. {vec_inf-0.7.3 → vec_inf-0.8.1}/LICENSE +0 -0
  50. {vec_inf-0.7.3 → vec_inf-0.8.1}/codecov.yml +0 -0
  51. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/Makefile +0 -0
  52. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/api.md +0 -0
  53. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/assets/favicon-48x48.svg +0 -0
  54. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/assets/favicon.ico +0 -0
  55. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/assets/vector-logo.svg +0 -0
  56. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/contributing.md +0 -0
  57. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/make.bat +0 -0
  58. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/overrides/partials/copyright.html +0 -0
  59. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/overrides/partials/logo.html +0 -0
  60. {vec_inf-0.7.3 → vec_inf-0.8.1}/docs/stylesheets/extra.css +0 -0
  61. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/README.md +0 -0
  62. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/api/basic_usage.py +0 -0
  63. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/inference/llm/chat_completions.py +0 -0
  64. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/inference/llm/completions.py +0 -0
  65. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/inference/llm/completions.sh +0 -0
  66. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/inference/text_embedding/embeddings.py +0 -0
  67. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/inference/vlm/vision_completions.py +0 -0
  68. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/logits/logits.py +0 -0
  69. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/slurm_dependency/README.md +0 -0
  70. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/slurm_dependency/downstream_job.sbatch +0 -0
  71. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/slurm_dependency/run_downstream.py +0 -0
  72. {vec_inf-0.7.3 → vec_inf-0.8.1}/examples/slurm_dependency/run_workflow.sh +0 -0
  73. {vec_inf-0.7.3 → vec_inf-0.8.1}/mkdocs.yml +0 -0
  74. {vec_inf-0.7.3 → vec_inf-0.8.1}/profile/avg_throughput.py +0 -0
  75. {vec_inf-0.7.3 → vec_inf-0.8.1}/profile/gen.py +0 -0
  76. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/__init__.py +0 -0
  77. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/test_imports.py +0 -0
  78. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/__init__.py +0 -0
  79. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/cli/__init__.py +0 -0
  80. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/cli/test_utils.py +0 -0
  81. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/client/__init__.py +0 -0
  82. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/client/test_api.py +0 -0
  83. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/client/test_examples.py +0 -0
  84. {vec_inf-0.7.3 → vec_inf-0.8.1}/tests/vec_inf/client/test_vars.env +0 -0
  85. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/README.md +0 -0
  86. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/__init__.py +0 -0
  87. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/cli/__init__.py +0 -0
  88. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/cli/_utils.py +0 -0
  89. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/__init__.py +0 -0
  90. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/_exceptions.py +0 -0
  91. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/client/api.py +0 -0
  92. {vec_inf-0.7.3 → vec_inf-0.8.1}/vec_inf/find_port.sh +0 -0
  93. {vec_inf-0.7.3 → vec_inf-0.8.1}/venv.sh +0 -0
@@ -28,7 +28,7 @@ jobs:
28
28
  run-code-check:
29
29
  runs-on: ubuntu-latest
30
30
  steps:
31
- - uses: actions/checkout@v5.0.0
31
+ - uses: actions/checkout@v6.0.1
32
32
  - name: Install uv
33
33
  uses: astral-sh/setup-uv@v7
34
34
  with:
@@ -0,0 +1,85 @@
1
+ name: docker
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+ push:
7
+ branches:
8
+ - main
9
+ paths:
10
+ - vllm.Dockerfile
11
+ - sglang.Dockerfile
12
+ - .github/workflows/docker.yml
13
+ - uv.lock
14
+ pull_request:
15
+ branches:
16
+ - main
17
+ - f/sglang-support
18
+ paths:
19
+ - vllm.Dockerfile
20
+ - sglang.Dockerfile
21
+ - .github/workflows/docker.yml
22
+ - uv.lock
23
+
24
+ jobs:
25
+ push_to_registry:
26
+ name: Build and push Docker images
27
+ runs-on:
28
+ - ubuntu-latest
29
+ strategy:
30
+ matrix:
31
+ backend: [vllm, sglang]
32
+ steps:
33
+ - name: Checkout repository
34
+ uses: actions/checkout@v6.0.1
35
+
36
+ - name: Extract backend version
37
+ id: backend-version
38
+ run: |
39
+ VERSION=$(grep -A 1 "name = \"${{ matrix.backend }}\"" uv.lock | grep version | cut -d '"' -f 2)
40
+ echo "version=$VERSION" >> $GITHUB_OUTPUT
41
+
42
+ - name: Maximize build space
43
+ run: |
44
+ echo "Disk space before cleanup:"
45
+ df -h
46
+ # Remove unnecessary pre-installed software
47
+ sudo rm -rf /usr/share/dotnet
48
+ sudo rm -rf /usr/local/lib/android
49
+ sudo rm -rf /opt/ghc
50
+ sudo rm -rf /opt/hostedtoolcache/CodeQL
51
+ sudo rm -rf /usr/local/share/boost
52
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
53
+ # Clean apt cache
54
+ sudo apt-get clean
55
+ # Remove docker images
56
+ docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
57
+ echo "Disk space after cleanup:"
58
+ df -h
59
+
60
+ - name: Set up Docker Buildx
61
+ uses: docker/setup-buildx-action@v3
62
+
63
+ - name: Log in to Docker Hub
64
+ uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
65
+ with:
66
+ username: ${{ secrets.DOCKER_USERNAME }}
67
+ password: ${{ secrets.DOCKER_PASSWORD }}
68
+
69
+ - name: Extract metadata (tags, labels) for Docker
70
+ id: meta
71
+ uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051
72
+ with:
73
+ images: vectorinstitute/vector-inference-${{ matrix.backend }}
74
+
75
+ - name: Build and push Docker image
76
+ uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
77
+ with:
78
+ context: .
79
+ file: ./${{ matrix.backend }}.Dockerfile
80
+ push: true
81
+ tags: |
82
+ ${{ steps.meta.outputs.tags }}
83
+ vectorinstitute/vector-inference-${{ matrix.backend }}:${{ steps.backend-version.outputs.version }}
84
+ vectorinstitute/vector-inference-${{ matrix.backend }}:latest
85
+ labels: ${{ steps.meta.outputs.labels }}
@@ -51,7 +51,7 @@ jobs:
51
51
  runs-on: ubuntu-latest
52
52
  steps:
53
53
  - name: Checkout code
54
- uses: actions/checkout@v5.0.0
54
+ uses: actions/checkout@v6.0.1
55
55
  with:
56
56
  fetch-depth: 0 # Fetch all history for proper versioning
57
57
 
@@ -67,7 +67,7 @@ jobs:
67
67
  python-version-file: ".python-version"
68
68
 
69
69
  - name: Install the project
70
- run: uv sync --all-extras --group docs --prerelease=allow
70
+ run: uv sync --group docs --prerelease=allow
71
71
 
72
72
  - name: Build docs
73
73
  run: uv run --frozen mkdocs build
@@ -76,7 +76,7 @@ jobs:
76
76
  run: touch site/.nojekyll
77
77
 
78
78
  - name: Upload artifact
79
- uses: actions/upload-artifact@v5
79
+ uses: actions/upload-artifact@v6
80
80
  with:
81
81
  name: docs-site
82
82
  path: site/
@@ -88,7 +88,7 @@ jobs:
88
88
  runs-on: ubuntu-latest
89
89
  steps:
90
90
  - name: Checkout code
91
- uses: actions/checkout@v5.0.0
91
+ uses: actions/checkout@v6.0.1
92
92
  with:
93
93
  fetch-depth: 0 # Fetch all history for proper versioning
94
94
 
@@ -104,7 +104,7 @@ jobs:
104
104
  python-version-file: ".python-version"
105
105
 
106
106
  - name: Install the project
107
- run: uv sync --all-extras --group docs --frozen
107
+ run: uv sync --group docs --frozen
108
108
 
109
109
  - name: Configure Git Credentials
110
110
  run: |
@@ -112,7 +112,7 @@ jobs:
112
112
  git config user.email 41898282+github-actions[bot]@users.noreply.github.com
113
113
 
114
114
  - name: Download artifact
115
- uses: actions/download-artifact@v6
115
+ uses: actions/download-artifact@v7
116
116
  with:
117
117
  name: docs-site
118
118
  path: site
@@ -13,7 +13,7 @@ jobs:
13
13
  sudo apt-get update
14
14
  sudo apt-get install libcurl4-openssl-dev libssl-dev
15
15
 
16
- - uses: actions/checkout@v5.0.0
16
+ - uses: actions/checkout@v6.0.1
17
17
 
18
18
  - name: Install uv
19
19
  uses: astral-sh/setup-uv@v7
@@ -43,7 +43,7 @@ jobs:
43
43
  matrix:
44
44
  python-version: ["3.10", "3.11", "3.12"]
45
45
  steps:
46
- - uses: actions/checkout@v5.0.0
46
+ - uses: actions/checkout@v6.0.1
47
47
 
48
48
  - name: Install uv
49
49
  uses: astral-sh/setup-uv@v7
@@ -58,16 +58,26 @@ jobs:
58
58
  python-version: ${{ matrix.python-version }}
59
59
 
60
60
  - name: Install the project
61
+ env:
62
+ # Ensure uv uses the matrix interpreter instead of `.python-version` (3.10),
63
+ # otherwise the "3.11"/"3.12" jobs silently run on 3.10.
64
+ UV_PYTHON: ${{ matrix.python-version }}
61
65
  run: uv sync --dev --prerelease=allow
62
66
 
63
67
  - name: Install dependencies and check code
68
+ env:
69
+ UV_PYTHON: ${{ matrix.python-version }}
64
70
  run: |
65
71
  uv run --frozen pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
66
72
 
67
73
  - name: Install the core package only
74
+ env:
75
+ UV_PYTHON: ${{ matrix.python-version }}
68
76
  run: uv sync --no-dev
69
77
 
70
78
  - name: Run package import tests
79
+ env:
80
+ UV_PYTHON: ${{ matrix.python-version }}
71
81
  run: |
72
82
  uv run --frozen pytest tests/test_imports.py
73
83
 
@@ -76,7 +86,7 @@ jobs:
76
86
  gpg --keyserver keyserver.ubuntu.com --recv-keys 806BB28AED779869
77
87
 
78
88
  - name: Upload coverage to Codecov
79
- uses: codecov/codecov-action@v5.5.1
89
+ uses: codecov/codecov-action@v5.5.2
80
90
  with:
81
91
  token: ${{ secrets.CODECOV_TOKEN }}
82
92
  files: ./coverage.xml
@@ -17,7 +17,7 @@ repos:
17
17
  - id: check-toml
18
18
 
19
19
  - repo: https://github.com/astral-sh/ruff-pre-commit
20
- rev: 'v0.14.5'
20
+ rev: 'v0.14.14'
21
21
  hooks:
22
22
  - id: ruff
23
23
  args: [--fix, --exit-non-zero-on-fix]
@@ -26,7 +26,7 @@ repos:
26
26
  types_or: [python, jupyter]
27
27
 
28
28
  - repo: https://github.com/pre-commit/mirrors-mypy
29
- rev: v1.18.2
29
+ rev: v1.19.1
30
30
  hooks:
31
31
  - id: mypy
32
32
  entry: python3 -m mypy --config-file pyproject.toml
@@ -94,6 +94,7 @@ This document tracks all model weights available in the `/model-weights` directo
94
94
  | Model | Configuration |
95
95
  |:------|:-------------|
96
96
  | `Llama-4-Scout-17B-16E-Instruct` | ❌ |
97
+ | `Llama-4-Maverick-17B-128E-Instruct` | ❌ |
97
98
 
98
99
  ### Mistral AI: Mistral
99
100
  | Model | Configuration |
@@ -128,6 +129,7 @@ This document tracks all model weights available in the `/model-weights` directo
128
129
  |:------|:-------------|
129
130
  | `Qwen2.5-0.5B-Instruct` | ✅ |
130
131
  | `Qwen2.5-1.5B-Instruct` | ✅ |
132
+ | `Qwen2.5-3B` | ❌ |
131
133
  | `Qwen2.5-3B-Instruct` | ✅ |
132
134
  | `Qwen2.5-7B-Instruct` | ✅ |
133
135
  | `Qwen2.5-14B-Instruct` | ✅ |
@@ -138,12 +140,14 @@ This document tracks all model weights available in the `/model-weights` directo
138
140
  | Model | Configuration |
139
141
  |:------|:-------------|
140
142
  | `Qwen2.5-Math-1.5B-Instruct` | ✅ |
143
+ | `Qwen2.5-Math-7B` | ❌ |
141
144
  | `Qwen2.5-Math-7B-Instruct` | ✅ |
142
145
  | `Qwen2.5-Math-72B-Instruct` | ✅ |
143
146
 
144
147
  ### Qwen: Qwen2.5-Coder
145
148
  | Model | Configuration |
146
149
  |:------|:-------------|
150
+ | `Qwen2.5-Coder-3B-Instruct` | ✅ |
147
151
  | `Qwen2.5-Coder-7B-Instruct` | ✅ |
148
152
 
149
153
  ### Qwen: QwQ
@@ -162,11 +166,18 @@ This document tracks all model weights available in the `/model-weights` directo
162
166
  | `Qwen2-Math-72B-Instruct` | ❌ |
163
167
  | `Qwen2-VL-7B-Instruct` | ❌ |
164
168
 
169
+ ### Qwen: Qwen2.5-VL
170
+ | Model | Configuration |
171
+ |:------|:-------------|
172
+ | `Qwen2.5-VL-3B-Instruct` | ❌ |
173
+ | `Qwen2.5-VL-7B-Instruct` | ✅ |
174
+
165
175
  ### Qwen: Qwen3
166
176
  | Model | Configuration |
167
177
  |:------|:-------------|
168
- | `Qwen3-14B` | ✅ |
178
+ | `Qwen3-0.6B` | ✅ |
169
179
  | `Qwen3-8B` | ✅ |
180
+ | `Qwen3-14B` | ✅ |
170
181
  | `Qwen3-32B` | ✅ |
171
182
  | `Qwen3-235B-A22B` | ❌ |
172
183
  | `Qwen3-Embedding-8B` | ❌ |
@@ -191,27 +202,77 @@ This document tracks all model weights available in the `/model-weights` directo
191
202
  | Model | Configuration |
192
203
  |:------|:-------------|
193
204
  | `gpt-oss-120b` | ✅ |
205
+ | `gpt-oss-20b` | ✅ |
206
+
194
207
 
195
- ### Other LLM Models
208
+ #### AI21: Jamba
196
209
  | Model | Configuration |
197
210
  |:------|:-------------|
198
211
  | `AI21-Jamba-1.5-Mini` | ❌ |
199
- | `aya-expanse-32b` | ✅ (as Aya-Expanse-32B) |
212
+
213
+ #### Cohere for AI: Aya
214
+ | Model | Configuration |
215
+ |:------|:-------------|
216
+ | `aya-expanse-32b` | ✅ |
217
+
218
+ #### OpenAI: GPT-2
219
+ | Model | Configuration |
220
+ |:------|:-------------|
200
221
  | `gpt2-large` | ❌ |
201
222
  | `gpt2-xl` | ❌ |
202
- | `gpt-oss-120b` | ❌ |
203
- | `instructblip-vicuna-7b` | ❌ |
223
+
224
+ #### InternLM: InternLM2
225
+ | Model | Configuration |
226
+ |:------|:-------------|
204
227
  | `internlm2-math-plus-7b` | ❌ |
228
+
229
+ #### Janus
230
+ | Model | Configuration |
231
+ |:------|:-------------|
205
232
  | `Janus-Pro-7B` | ❌ |
206
- | `Kimi-K2-Instruct` | ❌ |
233
+
234
+ #### Moonshot AI: Kimi
235
+ | Model | Configuration |
236
+ |:------|:-------------|
237
+ | `Kimi-K2-Instruct` | ✅ |
238
+ | `Kimi-K2.5` | ✅ |
239
+
240
+ #### Mistral AI: Ministral
241
+ | Model | Configuration |
242
+ |:------|:-------------|
207
243
  | `Ministral-8B-Instruct-2410` | ❌ |
208
- | `Molmo-7B-D-0924` | ✅ |
244
+
245
+ #### AI2: OLMo
246
+ | Model | Configuration |
247
+ |:------|:-------------|
209
248
  | `OLMo-1B-hf` | ❌ |
210
249
  | `OLMo-7B-hf` | ❌ |
211
250
  | `OLMo-7B-SFT` | ❌ |
251
+
252
+ #### EleutherAI: Pythia
253
+ | Model | Configuration |
254
+ |:------|:-------------|
212
255
  | `pythia` | ❌ |
256
+
257
+ #### Qwen: Qwen1.5
258
+ | Model | Configuration |
259
+ |:------|:-------------|
213
260
  | `Qwen1.5-72B-Chat` | ❌ |
261
+
262
+ #### ReasonFlux
263
+ | Model | Configuration |
264
+ |:------|:-------------|
214
265
  | `ReasonFlux-PRM-7B` | ❌ |
266
+
267
+ #### LMSYS: Vicuna
268
+ | Model | Configuration |
269
+ |:------|:-------------|
270
+ | `vicuna-13b-v1.5` | ❌ |
271
+
272
+ #### Google: T5 (Encoder-Decoder Models)
273
+ **Note**: These are encoder-decoder (T5) models, not decoder-only LLMs.
274
+ | Model | Configuration |
275
+ |:------|:-------------|
215
276
  | `t5-large-lm-adapt` | ❌ |
216
277
  | `t5-xl-lm-adapt` | ❌ |
217
278
  | `mt5-xl-lm-adapt` | ❌ |
@@ -238,10 +299,10 @@ This document tracks all model weights available in the `/model-weights` directo
238
299
  ### Meta: Llama 3.2 Vision
239
300
  | Model | Configuration |
240
301
  |:------|:-------------|
241
- | `Llama-3.2-11B-Vision` | |
242
- | `Llama-3.2-11B-Vision-Instruct` | ✅ |
243
- | `Llama-3.2-90B-Vision` | |
244
- | `Llama-3.2-90B-Vision-Instruct` | ✅ |
302
+ | `Llama-3.2-11B-Vision` | |
303
+ | `Llama-3.2-11B-Vision-Instruct` | ✅ | (SGLang only)
304
+ | `Llama-3.2-90B-Vision` | |
305
+ | `Llama-3.2-90B-Vision-Instruct` | ✅ | (SGLang only)
245
306
 
246
307
  ### Mistral: Pixtral
247
308
  | Model | Configuration |
@@ -266,10 +327,19 @@ This document tracks all model weights available in the `/model-weights` directo
266
327
  | `deepseek-vl2` | ✅ |
267
328
  | `deepseek-vl2-small` | ✅ |
268
329
 
330
+ ### Google: MedGemma
331
+ | Model | Configuration |
332
+ |:------|:-------------|
333
+ | `medgemma-4b-it` | ✅ |
334
+ | `medgemma-27b-it` | ✅ |
335
+ | `medgemma-27b-text-it` | ❌ |
336
+
269
337
  ### Other VLM Models
270
338
  | Model | Configuration |
271
339
  |:------|:-------------|
340
+ | `instructblip-vicuna-7b` | ❌ |
272
341
  | `MiniCPM-Llama3-V-2_5` | ❌ |
342
+ | `Molmo-7B-D-0924` | ✅ |
273
343
 
274
344
  ---
275
345
 
@@ -298,6 +368,8 @@ This document tracks all model weights available in the `/model-weights` directo
298
368
  | `data2vec` | ❌ |
299
369
  | `gte-modernbert-base` | ❌ |
300
370
  | `gte-Qwen2-7B-instruct` | ❌ |
371
+ | `KaLM-Embedding-Gemma3-12B-2511` | ❌ |
372
+ | `llama-embed-nemotron-8b` | ❌ |
301
373
  | `m2-bert-80M-32k-retrieval` | ❌ |
302
374
  | `m2-bert-80M-8k-retrieval` | ❌ |
303
375
 
@@ -313,7 +385,7 @@ This document tracks all model weights available in the `/model-weights` directo
313
385
 
314
386
  ---
315
387
 
316
- ## Multimodal Models
388
+ ## Vision Models
317
389
 
318
390
  ### CLIP
319
391
  | Model | Configuration |
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vec-inf
3
- Version: 0.7.3
3
+ Version: 0.8.1
4
4
  Summary: Efficient LLM inference on Slurm clusters using vLLM.
5
5
  Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
6
6
  License-Expression: MIT
@@ -11,14 +11,17 @@ Requires-Dist: pydantic>=2.10.6
11
11
  Requires-Dist: pyyaml>=6.0.2
12
12
  Requires-Dist: requests>=2.31.0
13
13
  Requires-Dist: rich>=13.7.0
14
- Provides-Extra: dev
15
- Requires-Dist: cupy-cuda12x==12.1.0; extra == 'dev'
16
- Requires-Dist: flashinfer-python>=0.4.0; extra == 'dev'
17
- Requires-Dist: ray[default]>=2.50.0; extra == 'dev'
18
- Requires-Dist: sglang>=0.5.0; extra == 'dev'
19
- Requires-Dist: torch>=2.7.0; extra == 'dev'
20
- Requires-Dist: vllm>=0.10.0; extra == 'dev'
21
- Requires-Dist: xgrammar>=0.1.11; extra == 'dev'
14
+ Provides-Extra: sglang
15
+ Requires-Dist: orjson>=3.11.0; extra == 'sglang'
16
+ Requires-Dist: sgl-kernel>=0.3.0; extra == 'sglang'
17
+ Requires-Dist: sglang>=0.5.5; extra == 'sglang'
18
+ Requires-Dist: torchao>=0.9.0; extra == 'sglang'
19
+ Provides-Extra: vllm
20
+ Requires-Dist: ray[default]>=2.51.0; extra == 'vllm'
21
+ Requires-Dist: torchcodec<0.10.0,>=0.9.0; extra == 'vllm'
22
+ Requires-Dist: vllm>=0.11.2; extra == 'vllm'
23
+ Requires-Dist: vllm[audio]; extra == 'vllm'
24
+ Requires-Dist: vllm[bench]; extra == 'vllm'
22
25
  Description-Content-Type: text/markdown
23
26
 
24
27
  # Vector Inference: Easy inference on Slurm clusters
@@ -30,10 +33,11 @@ Description-Content-Type: text/markdown
30
33
  [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
31
34
  [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
32
35
  [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
33
- [![vLLM](https://img.shields.io/badge/vLLM-0.11.0-blue)](https://docs.vllm.ai/en/v0.11.0/)
36
+ [![vLLM](https://img.shields.io/badge/vLLM-0.15.0-blue)](https://docs.vllm.ai/en/v0.15.0/)
37
+ [![SGLang](https://img.shields.io/badge/SGLang-0.5.8-blue)](https://docs.sglang.io/index.html)
34
38
  ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
35
39
 
36
- This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
40
+ This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.15.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
37
41
 
38
42
  **NOTE**: Supported models on Killarney are tracked [here](./MODEL_TRACKING.md)
39
43
 
@@ -43,12 +47,12 @@ If you are using the Vector cluster environment, and you don't need any customiz
43
47
  ```bash
44
48
  pip install vec-inf
45
49
  ```
46
- Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
50
+ Otherwise, we recommend using the provided [`vllm.Dockerfile`](vllm.Dockerfile) and [`sglang.Dockerfile`](sglang.Dockerfile) to set up your own environment with the package. The built images are available through [Docker Hub](https://hub.docker.com/orgs/vectorinstitute/repositories)
47
51
 
48
52
  If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
49
53
  * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
50
54
  * The package would try to look for cached configuration files in your environment before using the default configuration. The default cached configuration directory path points to `/model-weights/vec-inf-shared`, you would need to create an `environment.yaml` and a `models.yaml` following the format of these files in [`vec_inf/config`](vec_inf/config/).
51
- * The package would also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.
55
+ * [OPTIONAL] The package could also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.
52
56
 
53
57
  ## Usage
54
58
 
@@ -65,13 +69,13 @@ vec-inf launch Meta-Llama-3.1-8B-Instruct
65
69
  ```
66
70
  You should see an output like the following:
67
71
 
68
- <img width="720" alt="launch_image" src="https://github.com/user-attachments/assets/c1e0c60c-cf7a-49ed-a426-fdb38ebf88ee" />
72
+ <img width="720" alt="launch_image" src="./docs/assets/launch.png" />
69
73
 
70
74
  **NOTE**: You can set the required fields in the environment configuration (`environment.yaml`), it's a mapping between required arguments and their corresponding environment variables. On the Vector **Killarney** Cluster environment, the required fields are:
71
75
  * `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
72
76
  * `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
73
77
 
74
- Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is [supported by vLLM](https://docs.vllm.ai/en/stable/models/supported_models.html). For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
78
+ Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command). During the launch process, relevant log files and scripts will be written to a log directory (default to `.vec-inf-logs` in your home directory), and a cache directory (`.vec-inf-cache`) will be created in your working directory (defaults to your home directory if not specified or required) for torch compile cache.
75
79
 
76
80
  #### Other commands
77
81
 
@@ -7,10 +7,11 @@
7
7
  [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
8
8
  [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
9
9
  [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
10
- [![vLLM](https://img.shields.io/badge/vLLM-0.11.0-blue)](https://docs.vllm.ai/en/v0.11.0/)
10
+ [![vLLM](https://img.shields.io/badge/vLLM-0.15.0-blue)](https://docs.vllm.ai/en/v0.15.0/)
11
+ [![SGLang](https://img.shields.io/badge/SGLang-0.5.8-blue)](https://docs.sglang.io/index.html)
11
12
  ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
12
13
 
13
- This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
14
+ This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.15.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
14
15
 
15
16
  **NOTE**: Supported models on Killarney are tracked [here](./MODEL_TRACKING.md)
16
17
 
@@ -20,12 +21,12 @@ If you are using the Vector cluster environment, and you don't need any customiz
20
21
  ```bash
21
22
  pip install vec-inf
22
23
  ```
23
- Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
24
+ Otherwise, we recommend using the provided [`vllm.Dockerfile`](vllm.Dockerfile) and [`sglang.Dockerfile`](sglang.Dockerfile) to set up your own environment with the package. The built images are available through [Docker Hub](https://hub.docker.com/orgs/vectorinstitute/repositories)
24
25
 
25
26
  If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
26
27
  * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
27
28
  * The package would try to look for cached configuration files in your environment before using the default configuration. The default cached configuration directory path points to `/model-weights/vec-inf-shared`, you would need to create an `environment.yaml` and a `models.yaml` following the format of these files in [`vec_inf/config`](vec_inf/config/).
28
- * The package would also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.
29
+ * [OPTIONAL] The package could also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.
29
30
 
30
31
  ## Usage
31
32
 
@@ -42,13 +43,13 @@ vec-inf launch Meta-Llama-3.1-8B-Instruct
42
43
  ```
43
44
  You should see an output like the following:
44
45
 
45
- <img width="720" alt="launch_image" src="https://github.com/user-attachments/assets/c1e0c60c-cf7a-49ed-a426-fdb38ebf88ee" />
46
+ <img width="720" alt="launch_image" src="./docs/assets/launch.png" />
46
47
 
47
48
  **NOTE**: You can set the required fields in the environment configuration (`environment.yaml`), it's a mapping between required arguments and their corresponding environment variables. On the Vector **Killarney** Cluster environment, the required fields are:
48
49
  * `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
49
50
  * `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
50
51
 
51
- Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is [supported by vLLM](https://docs.vllm.ai/en/stable/models/supported_models.html). For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
52
+ Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command). During the launch process, relevant log files and scripts will be written to a log directory (default to `.vec-inf-logs` in your home directory), and a cache directory (`.vec-inf-cache`) will be created in your working directory (defaults to your home directory if not specified or required) for torch compile cache.
52
53
 
53
54
  #### Other commands
54
55
 
Binary file
@@ -1,6 +1,7 @@
1
1
  # Vector Inference: Easy inference on Slurm clusters
2
2
 
3
- This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/stable/). **This package runs natively on the Vector Institute cluster environment**. To adapt to other environments, follow the instructions in [Installation](#installation).
3
+ This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.15.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
4
+
4
5
 
5
6
  **NOTE**: Supported models on Killarney are tracked [here](https://github.com/VectorInstitute/vector-inference/blob/main/MODEL_TRACKING.md)
6
7
 
@@ -12,9 +13,10 @@ If you are using the Vector cluster environment, and you don't need any customiz
12
13
  pip install vec-inf
13
14
  ```
14
15
 
15
- Otherwise, we recommend using the provided [`Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
16
+ Otherwise, we recommend using the provided [`vllm.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/vllm.Dockerfile) and [`sglang.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/sglang.Dockerfile) to set up your own environment with the package. The built images are available through [Docker Hub](https://hub.docker.com/orgs/vectorinstitute/repositories)
16
17
 
17
18
  If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
19
+
18
20
  * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config), then install from source by running `pip install .`.
19
21
  * The package would try to look for cached configuration files in your environment before using the default configuration. The default cached configuration directory path points to `/model-weights/vec-inf-shared`, you would need to create an `environment.yaml` and a `models.yaml` following the format of these files in [`vec_inf/config`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config).
20
- * The package would also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.
22
+ * [OPTIONAL] The package would also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.