vec-inf 0.7.3__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {vec_inf-0.7.3 → vec_inf-0.8.0}/.github/workflows/code_checks.yml +1 -1
  2. vec_inf-0.8.0/.github/workflows/docker.yml +85 -0
  3. {vec_inf-0.7.3 → vec_inf-0.8.0}/.github/workflows/docs.yml +6 -6
  4. {vec_inf-0.7.3 → vec_inf-0.8.0}/.github/workflows/publish.yml +1 -1
  5. {vec_inf-0.7.3 → vec_inf-0.8.0}/.github/workflows/unit_tests.yml +12 -2
  6. {vec_inf-0.7.3 → vec_inf-0.8.0}/.pre-commit-config.yaml +2 -2
  7. {vec_inf-0.7.3 → vec_inf-0.8.0}/MODEL_TRACKING.md +80 -10
  8. {vec_inf-0.7.3 → vec_inf-0.8.0}/PKG-INFO +16 -15
  9. {vec_inf-0.7.3 → vec_inf-0.8.0}/README.md +7 -6
  10. vec_inf-0.8.0/docs/assets/launch.png +0 -0
  11. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/index.md +5 -3
  12. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/user_guide.md +40 -31
  13. {vec_inf-0.7.3 → vec_inf-0.8.0}/pyproject.toml +30 -8
  14. vec_inf-0.8.0/sglang.Dockerfile +70 -0
  15. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/cli/test_cli.py +181 -2
  16. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/cli/test_helper.py +7 -2
  17. vec_inf-0.8.0/tests/vec_inf/client/test_engine_selection.py +348 -0
  18. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/client/test_helper.py +233 -5
  19. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/client/test_models.py +39 -0
  20. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/client/test_slurm_script_generator.py +192 -9
  21. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/client/test_utils.py +27 -8
  22. {vec_inf-0.7.3 → vec_inf-0.8.0}/uv.lock +3442 -1557
  23. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/cli/_cli.py +19 -3
  24. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/cli/_helper.py +23 -12
  25. vec_inf-0.8.0/vec_inf/cli/_vars.py +47 -0
  26. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/_client_vars.py +31 -1
  27. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/_helper.py +140 -44
  28. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/_slurm_script_generator.py +85 -30
  29. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/_slurm_templates.py +102 -38
  30. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/_slurm_vars.py +13 -4
  31. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/_utils.py +10 -7
  32. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/config.py +17 -7
  33. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/models.py +25 -19
  34. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/config/README.md +1 -1
  35. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/config/environment.yaml +9 -2
  36. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/config/models.yaml +182 -365
  37. vec_inf-0.7.3/Dockerfile → vec_inf-0.8.0/vllm.Dockerfile +9 -6
  38. vec_inf-0.7.3/.github/workflows/docker.yml +0 -61
  39. vec_inf-0.7.3/vec_inf/cli/_vars.py +0 -32
  40. {vec_inf-0.7.3 → vec_inf-0.8.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  41. {vec_inf-0.7.3 → vec_inf-0.8.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  42. {vec_inf-0.7.3 → vec_inf-0.8.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  43. {vec_inf-0.7.3 → vec_inf-0.8.0}/.github/ISSUE_TEMPLATE/model-request.md +0 -0
  44. {vec_inf-0.7.3 → vec_inf-0.8.0}/.github/dependabot.yml +0 -0
  45. {vec_inf-0.7.3 → vec_inf-0.8.0}/.github/pull_request_template.md +0 -0
  46. {vec_inf-0.7.3 → vec_inf-0.8.0}/.gitignore +0 -0
  47. {vec_inf-0.7.3 → vec_inf-0.8.0}/.python-version +0 -0
  48. {vec_inf-0.7.3 → vec_inf-0.8.0}/LICENSE +0 -0
  49. {vec_inf-0.7.3 → vec_inf-0.8.0}/codecov.yml +0 -0
  50. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/Makefile +0 -0
  51. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/api.md +0 -0
  52. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/assets/favicon-48x48.svg +0 -0
  53. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/assets/favicon.ico +0 -0
  54. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/assets/vector-logo.svg +0 -0
  55. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/contributing.md +0 -0
  56. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/make.bat +0 -0
  57. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/overrides/partials/copyright.html +0 -0
  58. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/overrides/partials/logo.html +0 -0
  59. {vec_inf-0.7.3 → vec_inf-0.8.0}/docs/stylesheets/extra.css +0 -0
  60. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/README.md +0 -0
  61. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/api/basic_usage.py +0 -0
  62. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/inference/llm/chat_completions.py +0 -0
  63. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/inference/llm/completions.py +0 -0
  64. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/inference/llm/completions.sh +0 -0
  65. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/inference/text_embedding/embeddings.py +0 -0
  66. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/inference/vlm/vision_completions.py +0 -0
  67. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/logits/logits.py +0 -0
  68. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/slurm_dependency/README.md +0 -0
  69. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/slurm_dependency/downstream_job.sbatch +0 -0
  70. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/slurm_dependency/run_downstream.py +0 -0
  71. {vec_inf-0.7.3 → vec_inf-0.8.0}/examples/slurm_dependency/run_workflow.sh +0 -0
  72. {vec_inf-0.7.3 → vec_inf-0.8.0}/mkdocs.yml +0 -0
  73. {vec_inf-0.7.3 → vec_inf-0.8.0}/profile/avg_throughput.py +0 -0
  74. {vec_inf-0.7.3 → vec_inf-0.8.0}/profile/gen.py +0 -0
  75. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/__init__.py +0 -0
  76. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/test_imports.py +0 -0
  77. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/__init__.py +0 -0
  78. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/cli/__init__.py +0 -0
  79. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/cli/test_utils.py +0 -0
  80. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/client/__init__.py +0 -0
  81. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/client/test_api.py +0 -0
  82. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/client/test_examples.py +0 -0
  83. {vec_inf-0.7.3 → vec_inf-0.8.0}/tests/vec_inf/client/test_vars.env +0 -0
  84. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/README.md +0 -0
  85. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/__init__.py +0 -0
  86. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/cli/__init__.py +0 -0
  87. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/cli/_utils.py +0 -0
  88. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/__init__.py +0 -0
  89. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/_exceptions.py +0 -0
  90. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/client/api.py +0 -0
  91. {vec_inf-0.7.3 → vec_inf-0.8.0}/vec_inf/find_port.sh +0 -0
  92. {vec_inf-0.7.3 → vec_inf-0.8.0}/venv.sh +0 -0
@@ -28,7 +28,7 @@ jobs:
28
28
  run-code-check:
29
29
  runs-on: ubuntu-latest
30
30
  steps:
31
- - uses: actions/checkout@v5.0.0
31
+ - uses: actions/checkout@v6.0.1
32
32
  - name: Install uv
33
33
  uses: astral-sh/setup-uv@v7
34
34
  with:
@@ -0,0 +1,85 @@
1
+ name: docker
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+ push:
7
+ branches:
8
+ - main
9
+ paths:
10
+ - vllm.Dockerfile
11
+ - sglang.Dockerfile
12
+ - .github/workflows/docker.yml
13
+ - uv.lock
14
+ pull_request:
15
+ branches:
16
+ - main
17
+ - f/sglang-support
18
+ paths:
19
+ - vllm.Dockerfile
20
+ - sglang.Dockerfile
21
+ - .github/workflows/docker.yml
22
+ - uv.lock
23
+
24
+ jobs:
25
+ push_to_registry:
26
+ name: Build and push Docker images
27
+ runs-on:
28
+ - ubuntu-latest
29
+ strategy:
30
+ matrix:
31
+ backend: [vllm, sglang]
32
+ steps:
33
+ - name: Checkout repository
34
+ uses: actions/checkout@v6.0.1
35
+
36
+ - name: Extract backend version
37
+ id: backend-version
38
+ run: |
39
+ VERSION=$(grep -A 1 "name = \"${{ matrix.backend }}\"" uv.lock | grep version | cut -d '"' -f 2)
40
+ echo "version=$VERSION" >> $GITHUB_OUTPUT
41
+
42
+ - name: Maximize build space
43
+ run: |
44
+ echo "Disk space before cleanup:"
45
+ df -h
46
+ # Remove unnecessary pre-installed software
47
+ sudo rm -rf /usr/share/dotnet
48
+ sudo rm -rf /usr/local/lib/android
49
+ sudo rm -rf /opt/ghc
50
+ sudo rm -rf /opt/hostedtoolcache/CodeQL
51
+ sudo rm -rf /usr/local/share/boost
52
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
53
+ # Clean apt cache
54
+ sudo apt-get clean
55
+ # Remove docker images
56
+ docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
57
+ echo "Disk space after cleanup:"
58
+ df -h
59
+
60
+ - name: Set up Docker Buildx
61
+ uses: docker/setup-buildx-action@v3
62
+
63
+ - name: Log in to Docker Hub
64
+ uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
65
+ with:
66
+ username: ${{ secrets.DOCKER_USERNAME }}
67
+ password: ${{ secrets.DOCKER_PASSWORD }}
68
+
69
+ - name: Extract metadata (tags, labels) for Docker
70
+ id: meta
71
+ uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051
72
+ with:
73
+ images: vectorinstitute/vector-inference-${{ matrix.backend }}
74
+
75
+ - name: Build and push Docker image
76
+ uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
77
+ with:
78
+ context: .
79
+ file: ./${{ matrix.backend }}.Dockerfile
80
+ push: true
81
+ tags: |
82
+ ${{ steps.meta.outputs.tags }}
83
+ vectorinstitute/vector-inference-${{ matrix.backend }}:${{ steps.backend-version.outputs.version }}
84
+ vectorinstitute/vector-inference-${{ matrix.backend }}:latest
85
+ labels: ${{ steps.meta.outputs.labels }}
@@ -51,7 +51,7 @@ jobs:
51
51
  runs-on: ubuntu-latest
52
52
  steps:
53
53
  - name: Checkout code
54
- uses: actions/checkout@v5.0.0
54
+ uses: actions/checkout@v6.0.1
55
55
  with:
56
56
  fetch-depth: 0 # Fetch all history for proper versioning
57
57
 
@@ -67,7 +67,7 @@ jobs:
67
67
  python-version-file: ".python-version"
68
68
 
69
69
  - name: Install the project
70
- run: uv sync --all-extras --group docs --prerelease=allow
70
+ run: uv sync --group docs --prerelease=allow
71
71
 
72
72
  - name: Build docs
73
73
  run: uv run --frozen mkdocs build
@@ -76,7 +76,7 @@ jobs:
76
76
  run: touch site/.nojekyll
77
77
 
78
78
  - name: Upload artifact
79
- uses: actions/upload-artifact@v5
79
+ uses: actions/upload-artifact@v6
80
80
  with:
81
81
  name: docs-site
82
82
  path: site/
@@ -88,7 +88,7 @@ jobs:
88
88
  runs-on: ubuntu-latest
89
89
  steps:
90
90
  - name: Checkout code
91
- uses: actions/checkout@v5.0.0
91
+ uses: actions/checkout@v6.0.1
92
92
  with:
93
93
  fetch-depth: 0 # Fetch all history for proper versioning
94
94
 
@@ -104,7 +104,7 @@ jobs:
104
104
  python-version-file: ".python-version"
105
105
 
106
106
  - name: Install the project
107
- run: uv sync --all-extras --group docs --frozen
107
+ run: uv sync --group docs --frozen
108
108
 
109
109
  - name: Configure Git Credentials
110
110
  run: |
@@ -112,7 +112,7 @@ jobs:
112
112
  git config user.email 41898282+github-actions[bot]@users.noreply.github.com
113
113
 
114
114
  - name: Download artifact
115
- uses: actions/download-artifact@v6
115
+ uses: actions/download-artifact@v7
116
116
  with:
117
117
  name: docs-site
118
118
  path: site
@@ -13,7 +13,7 @@ jobs:
13
13
  sudo apt-get update
14
14
  sudo apt-get install libcurl4-openssl-dev libssl-dev
15
15
 
16
- - uses: actions/checkout@v5.0.0
16
+ - uses: actions/checkout@v6.0.1
17
17
 
18
18
  - name: Install uv
19
19
  uses: astral-sh/setup-uv@v7
@@ -43,7 +43,7 @@ jobs:
43
43
  matrix:
44
44
  python-version: ["3.10", "3.11", "3.12"]
45
45
  steps:
46
- - uses: actions/checkout@v5.0.0
46
+ - uses: actions/checkout@v6.0.1
47
47
 
48
48
  - name: Install uv
49
49
  uses: astral-sh/setup-uv@v7
@@ -58,16 +58,26 @@ jobs:
58
58
  python-version: ${{ matrix.python-version }}
59
59
 
60
60
  - name: Install the project
61
+ env:
62
+ # Ensure uv uses the matrix interpreter instead of `.python-version` (3.10),
63
+ # otherwise the "3.11"/"3.12" jobs silently run on 3.10.
64
+ UV_PYTHON: ${{ matrix.python-version }}
61
65
  run: uv sync --dev --prerelease=allow
62
66
 
63
67
  - name: Install dependencies and check code
68
+ env:
69
+ UV_PYTHON: ${{ matrix.python-version }}
64
70
  run: |
65
71
  uv run --frozen pytest -m "not integration_test" --cov vec_inf --cov-report=xml tests
66
72
 
67
73
  - name: Install the core package only
74
+ env:
75
+ UV_PYTHON: ${{ matrix.python-version }}
68
76
  run: uv sync --no-dev
69
77
 
70
78
  - name: Run package import tests
79
+ env:
80
+ UV_PYTHON: ${{ matrix.python-version }}
71
81
  run: |
72
82
  uv run --frozen pytest tests/test_imports.py
73
83
 
@@ -76,7 +86,7 @@ jobs:
76
86
  gpg --keyserver keyserver.ubuntu.com --recv-keys 806BB28AED779869
77
87
 
78
88
  - name: Upload coverage to Codecov
79
- uses: codecov/codecov-action@v5.5.1
89
+ uses: codecov/codecov-action@v5.5.2
80
90
  with:
81
91
  token: ${{ secrets.CODECOV_TOKEN }}
82
92
  files: ./coverage.xml
@@ -17,7 +17,7 @@ repos:
17
17
  - id: check-toml
18
18
 
19
19
  - repo: https://github.com/astral-sh/ruff-pre-commit
20
- rev: 'v0.14.5'
20
+ rev: 'v0.14.10'
21
21
  hooks:
22
22
  - id: ruff
23
23
  args: [--fix, --exit-non-zero-on-fix]
@@ -26,7 +26,7 @@ repos:
26
26
  types_or: [python, jupyter]
27
27
 
28
28
  - repo: https://github.com/pre-commit/mirrors-mypy
29
- rev: v1.18.2
29
+ rev: v1.19.1
30
30
  hooks:
31
31
  - id: mypy
32
32
  entry: python3 -m mypy --config-file pyproject.toml
@@ -94,6 +94,7 @@ This document tracks all model weights available in the `/model-weights` directo
94
94
  | Model | Configuration |
95
95
  |:------|:-------------|
96
96
  | `Llama-4-Scout-17B-16E-Instruct` | ❌ |
97
+ | `Llama-4-Maverick-17B-128E-Instruct` | ❌ |
97
98
 
98
99
  ### Mistral AI: Mistral
99
100
  | Model | Configuration |
@@ -128,6 +129,7 @@ This document tracks all model weights available in the `/model-weights` directo
128
129
  |:------|:-------------|
129
130
  | `Qwen2.5-0.5B-Instruct` | ✅ |
130
131
  | `Qwen2.5-1.5B-Instruct` | ✅ |
132
+ | `Qwen2.5-3B` | ❌ |
131
133
  | `Qwen2.5-3B-Instruct` | ✅ |
132
134
  | `Qwen2.5-7B-Instruct` | ✅ |
133
135
  | `Qwen2.5-14B-Instruct` | ✅ |
@@ -138,12 +140,14 @@ This document tracks all model weights available in the `/model-weights` directo
138
140
  | Model | Configuration |
139
141
  |:------|:-------------|
140
142
  | `Qwen2.5-Math-1.5B-Instruct` | ✅ |
143
+ | `Qwen2.5-Math-7B` | ❌ |
141
144
  | `Qwen2.5-Math-7B-Instruct` | ✅ |
142
145
  | `Qwen2.5-Math-72B-Instruct` | ✅ |
143
146
 
144
147
  ### Qwen: Qwen2.5-Coder
145
148
  | Model | Configuration |
146
149
  |:------|:-------------|
150
+ | `Qwen2.5-Coder-3B-Instruct` | ✅ |
147
151
  | `Qwen2.5-Coder-7B-Instruct` | ✅ |
148
152
 
149
153
  ### Qwen: QwQ
@@ -162,6 +166,12 @@ This document tracks all model weights available in the `/model-weights` directo
162
166
  | `Qwen2-Math-72B-Instruct` | ❌ |
163
167
  | `Qwen2-VL-7B-Instruct` | ❌ |
164
168
 
169
+ ### Qwen: Qwen2.5-VL
170
+ | Model | Configuration |
171
+ |:------|:-------------|
172
+ | `Qwen2.5-VL-3B-Instruct` | ❌ |
173
+ | `Qwen2.5-VL-7B-Instruct` | ✅ |
174
+
165
175
  ### Qwen: Qwen3
166
176
  | Model | Configuration |
167
177
  |:------|:-------------|
@@ -191,27 +201,76 @@ This document tracks all model weights available in the `/model-weights` directo
191
201
  | Model | Configuration |
192
202
  |:------|:-------------|
193
203
  | `gpt-oss-120b` | ✅ |
204
+ | `gpt-oss-20b` | ✅ |
194
205
 
195
- ### Other LLM Models
206
+
207
+ #### AI21: Jamba
196
208
  | Model | Configuration |
197
209
  |:------|:-------------|
198
210
  | `AI21-Jamba-1.5-Mini` | ❌ |
199
- | `aya-expanse-32b` | ✅ (as Aya-Expanse-32B) |
211
+
212
+ #### Cohere for AI: Aya
213
+ | Model | Configuration |
214
+ |:------|:-------------|
215
+ | `aya-expanse-32b` | ✅ |
216
+
217
+ #### OpenAI: GPT-2
218
+ | Model | Configuration |
219
+ |:------|:-------------|
200
220
  | `gpt2-large` | ❌ |
201
221
  | `gpt2-xl` | ❌ |
202
- | `gpt-oss-120b` | ❌ |
203
- | `instructblip-vicuna-7b` | ❌ |
222
+
223
+ #### InternLM: InternLM2
224
+ | Model | Configuration |
225
+ |:------|:-------------|
204
226
  | `internlm2-math-plus-7b` | ❌ |
227
+
228
+ #### Janus
229
+ | Model | Configuration |
230
+ |:------|:-------------|
205
231
  | `Janus-Pro-7B` | ❌ |
232
+
233
+ #### Moonshot AI: Kimi
234
+ | Model | Configuration |
235
+ |:------|:-------------|
206
236
  | `Kimi-K2-Instruct` | ❌ |
237
+
238
+ #### Mistral AI: Ministral
239
+ | Model | Configuration |
240
+ |:------|:-------------|
207
241
  | `Ministral-8B-Instruct-2410` | ❌ |
208
- | `Molmo-7B-D-0924` | ✅ |
242
+
243
+ #### AI2: OLMo
244
+ | Model | Configuration |
245
+ |:------|:-------------|
209
246
  | `OLMo-1B-hf` | ❌ |
210
247
  | `OLMo-7B-hf` | ❌ |
211
248
  | `OLMo-7B-SFT` | ❌ |
249
+
250
+ #### EleutherAI: Pythia
251
+ | Model | Configuration |
252
+ |:------|:-------------|
212
253
  | `pythia` | ❌ |
254
+
255
+ #### Qwen: Qwen1.5
256
+ | Model | Configuration |
257
+ |:------|:-------------|
213
258
  | `Qwen1.5-72B-Chat` | ❌ |
259
+
260
+ #### ReasonFlux
261
+ | Model | Configuration |
262
+ |:------|:-------------|
214
263
  | `ReasonFlux-PRM-7B` | ❌ |
264
+
265
+ #### LMSYS: Vicuna
266
+ | Model | Configuration |
267
+ |:------|:-------------|
268
+ | `vicuna-13b-v1.5` | ❌ |
269
+
270
+ #### Google: T5 (Encoder-Decoder Models)
271
+ **Note**: These are encoder-decoder (T5) models, not decoder-only LLMs.
272
+ | Model | Configuration |
273
+ |:------|:-------------|
215
274
  | `t5-large-lm-adapt` | ❌ |
216
275
  | `t5-xl-lm-adapt` | ❌ |
217
276
  | `mt5-xl-lm-adapt` | ❌ |
@@ -238,10 +297,10 @@ This document tracks all model weights available in the `/model-weights` directo
238
297
  ### Meta: Llama 3.2 Vision
239
298
  | Model | Configuration |
240
299
  |:------|:-------------|
241
- | `Llama-3.2-11B-Vision` | |
242
- | `Llama-3.2-11B-Vision-Instruct` | ✅ |
243
- | `Llama-3.2-90B-Vision` | |
244
- | `Llama-3.2-90B-Vision-Instruct` | ✅ |
300
+ | `Llama-3.2-11B-Vision` | |
301
+ | `Llama-3.2-11B-Vision-Instruct` | ✅ | (SGLang only)
302
+ | `Llama-3.2-90B-Vision` | |
303
+ | `Llama-3.2-90B-Vision-Instruct` | ✅ | (SGLang only)
245
304
 
246
305
  ### Mistral: Pixtral
247
306
  | Model | Configuration |
@@ -266,10 +325,19 @@ This document tracks all model weights available in the `/model-weights` directo
266
325
  | `deepseek-vl2` | ✅ |
267
326
  | `deepseek-vl2-small` | ✅ |
268
327
 
328
+ ### Google: MedGemma
329
+ | Model | Configuration |
330
+ |:------|:-------------|
331
+ | `medgemma-4b-it` | ✅ |
332
+ | `medgemma-27b-it` | ✅ |
333
+ | `medgemma-27b-text-it` | ❌ |
334
+
269
335
  ### Other VLM Models
270
336
  | Model | Configuration |
271
337
  |:------|:-------------|
338
+ | `instructblip-vicuna-7b` | ❌ |
272
339
  | `MiniCPM-Llama3-V-2_5` | ❌ |
340
+ | `Molmo-7B-D-0924` | ✅ |
273
341
 
274
342
  ---
275
343
 
@@ -298,6 +366,8 @@ This document tracks all model weights available in the `/model-weights` directo
298
366
  | `data2vec` | ❌ |
299
367
  | `gte-modernbert-base` | ❌ |
300
368
  | `gte-Qwen2-7B-instruct` | ❌ |
369
+ | `KaLM-Embedding-Gemma3-12B-2511` | ❌ |
370
+ | `llama-embed-nemotron-8b` | ❌ |
301
371
  | `m2-bert-80M-32k-retrieval` | ❌ |
302
372
  | `m2-bert-80M-8k-retrieval` | ❌ |
303
373
 
@@ -313,7 +383,7 @@ This document tracks all model weights available in the `/model-weights` directo
313
383
 
314
384
  ---
315
385
 
316
- ## Multimodal Models
386
+ ## Vision Models
317
387
 
318
388
  ### CLIP
319
389
  | Model | Configuration |
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vec-inf
3
- Version: 0.7.3
3
+ Version: 0.8.0
4
4
  Summary: Efficient LLM inference on Slurm clusters using vLLM.
5
5
  Author-email: Marshall Wang <marshall.wang@vectorinstitute.ai>
6
6
  License-Expression: MIT
@@ -11,14 +11,14 @@ Requires-Dist: pydantic>=2.10.6
11
11
  Requires-Dist: pyyaml>=6.0.2
12
12
  Requires-Dist: requests>=2.31.0
13
13
  Requires-Dist: rich>=13.7.0
14
- Provides-Extra: dev
15
- Requires-Dist: cupy-cuda12x==12.1.0; extra == 'dev'
16
- Requires-Dist: flashinfer-python>=0.4.0; extra == 'dev'
17
- Requires-Dist: ray[default]>=2.50.0; extra == 'dev'
18
- Requires-Dist: sglang>=0.5.0; extra == 'dev'
19
- Requires-Dist: torch>=2.7.0; extra == 'dev'
20
- Requires-Dist: vllm>=0.10.0; extra == 'dev'
21
- Requires-Dist: xgrammar>=0.1.11; extra == 'dev'
14
+ Provides-Extra: sglang
15
+ Requires-Dist: orjson>=3.11.0; extra == 'sglang'
16
+ Requires-Dist: sgl-kernel>=0.3.0; extra == 'sglang'
17
+ Requires-Dist: sglang>=0.5.5; extra == 'sglang'
18
+ Requires-Dist: torchao>=0.9.0; extra == 'sglang'
19
+ Provides-Extra: vllm
20
+ Requires-Dist: ray[default]>=2.51.0; extra == 'vllm'
21
+ Requires-Dist: vllm>=0.11.2; extra == 'vllm'
22
22
  Description-Content-Type: text/markdown
23
23
 
24
24
  # Vector Inference: Easy inference on Slurm clusters
@@ -30,10 +30,11 @@ Description-Content-Type: text/markdown
30
30
  [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
31
31
  [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
32
32
  [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
33
- [![vLLM](https://img.shields.io/badge/vLLM-0.11.0-blue)](https://docs.vllm.ai/en/v0.11.0/)
33
+ [![vLLM](https://img.shields.io/badge/vLLM-0.12.0-blue)](https://docs.vllm.ai/en/v0.12.0/)
34
+ [![SGLang](https://img.shields.io/badge/SGLang-0.5.5.post3-blue)](https://docs.sglang.io/index.html)
34
35
  ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
35
36
 
36
- This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
37
+ This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.12.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
37
38
 
38
39
  **NOTE**: Supported models on Killarney are tracked [here](./MODEL_TRACKING.md)
39
40
 
@@ -43,12 +44,12 @@ If you are using the Vector cluster environment, and you don't need any customiz
43
44
  ```bash
44
45
  pip install vec-inf
45
46
  ```
46
- Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
47
+ Otherwise, we recommend using the provided [`vllm.Dockerfile`](vllm.Dockerfile) and [`sglang.Dockerfile`](sglang.Dockerfile) to set up your own environment with the package. The built images are available through [Docker Hub](https://hub.docker.com/orgs/vectorinstitute/repositories)
47
48
 
48
49
  If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
49
50
  * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
50
51
  * The package would try to look for cached configuration files in your environment before using the default configuration. The default cached configuration directory path points to `/model-weights/vec-inf-shared`, you would need to create an `environment.yaml` and a `models.yaml` following the format of these files in [`vec_inf/config`](vec_inf/config/).
51
- * The package would also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.
52
+ * [OPTIONAL] The package could also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.
52
53
 
53
54
  ## Usage
54
55
 
@@ -65,13 +66,13 @@ vec-inf launch Meta-Llama-3.1-8B-Instruct
65
66
  ```
66
67
  You should see an output like the following:
67
68
 
68
- <img width="720" alt="launch_image" src="https://github.com/user-attachments/assets/c1e0c60c-cf7a-49ed-a426-fdb38ebf88ee" />
69
+ <img width="720" alt="launch_image" src="./docs/assets/launch.png" />
69
70
 
70
71
  **NOTE**: You can set the required fields in the environment configuration (`environment.yaml`), it's a mapping between required arguments and their corresponding environment variables. On the Vector **Killarney** Cluster environment, the required fields are:
71
72
  * `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
72
73
  * `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
73
74
 
74
- Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is [supported by vLLM](https://docs.vllm.ai/en/stable/models/supported_models.html). For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
75
+ Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
75
76
 
76
77
  #### Other commands
77
78
 
@@ -7,10 +7,11 @@
7
7
  [![code checks](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/code_checks.yml)
8
8
  [![docs](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/vector-inference/actions/workflows/docs.yml)
9
9
  [![codecov](https://codecov.io/github/VectorInstitute/vector-inference/branch/main/graph/badge.svg?token=NI88QSIGAC)](https://app.codecov.io/github/VectorInstitute/vector-inference/tree/main)
10
- [![vLLM](https://img.shields.io/badge/vLLM-0.11.0-blue)](https://docs.vllm.ai/en/v0.11.0/)
10
+ [![vLLM](https://img.shields.io/badge/vLLM-0.12.0-blue)](https://docs.vllm.ai/en/v0.12.0/)
11
+ [![SGLang](https://img.shields.io/badge/SGLang-0.5.5.post3-blue)](https://docs.sglang.io/index.html)
11
12
  ![GitHub License](https://img.shields.io/github/license/VectorInstitute/vector-inference)
12
13
 
13
- This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/latest/). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
14
+ This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.12.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
14
15
 
15
16
  **NOTE**: Supported models on Killarney are tracked [here](./MODEL_TRACKING.md)
16
17
 
@@ -20,12 +21,12 @@ If you are using the Vector cluster environment, and you don't need any customiz
20
21
  ```bash
21
22
  pip install vec-inf
22
23
  ```
23
- Otherwise, we recommend using the provided [`Dockerfile`](Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
24
+ Otherwise, we recommend using the provided [`vllm.Dockerfile`](vllm.Dockerfile) and [`sglang.Dockerfile`](sglang.Dockerfile) to set up your own environment with the package. The built images are available through [Docker Hub](https://hub.docker.com/orgs/vectorinstitute/repositories)
24
25
 
25
26
  If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
26
27
  * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
27
28
  * The package would try to look for cached configuration files in your environment before using the default configuration. The default cached configuration directory path points to `/model-weights/vec-inf-shared`, you would need to create an `environment.yaml` and a `models.yaml` following the format of these files in [`vec_inf/config`](vec_inf/config/).
28
- * The package would also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.
29
+ * [OPTIONAL] The package could also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.
29
30
 
30
31
  ## Usage
31
32
 
@@ -42,13 +43,13 @@ vec-inf launch Meta-Llama-3.1-8B-Instruct
42
43
  ```
43
44
  You should see an output like the following:
44
45
 
45
- <img width="720" alt="launch_image" src="https://github.com/user-attachments/assets/c1e0c60c-cf7a-49ed-a426-fdb38ebf88ee" />
46
+ <img width="720" alt="launch_image" src="./docs/assets/launch.png" />
46
47
 
47
48
  **NOTE**: You can set the required fields in the environment configuration (`environment.yaml`), it's a mapping between required arguments and their corresponding environment variables. On the Vector **Killarney** Cluster environment, the required fields are:
48
49
  * `--account`, `-A`: The Slurm account, this argument can be set to default by setting environment variable `VEC_INF_ACCOUNT`.
49
50
  * `--work-dir`, `-D`: A working directory other than your home directory, this argument can be set to default by seeting environment variable `VEC_INF_WORK_DIR`.
50
51
 
51
- Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is [supported by vLLM](https://docs.vllm.ai/en/stable/models/supported_models.html). For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
52
+ Models that are already supported by `vec-inf` would be launched using the cached configuration (set in [slurm_vars.py](vec_inf/client/slurm_vars.py)) or [default configuration](vec_inf/config/models.yaml). You can override these values by providing additional parameters. Use `vec-inf launch --help` to see the full list of parameters that can be overriden. You can also launch your own custom model as long as the model architecture is supported by the underlying inference engine. For detailed instructions on how to customize your model launch, check out the [`launch` command section in User Guide](https://vectorinstitute.github.io/vector-inference/latest/user_guide/#launch-command)
52
53
 
53
54
  #### Other commands
54
55
 
Binary file
@@ -1,6 +1,7 @@
1
1
  # Vector Inference: Easy inference on Slurm clusters
2
2
 
3
- This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using [vLLM](https://docs.vllm.ai/en/stable/). **This package runs natively on the Vector Institute cluster environment**. To adapt to other environments, follow the instructions in [Installation](#installation).
3
+ This repository provides an easy-to-use solution to run inference servers on [Slurm](https://slurm.schedmd.com/overview.html)-managed computing clusters using open-source inference engines ([vLLM](https://docs.vllm.ai/en/v0.12.0/), [SGLang](https://docs.sglang.io/index.html)). **This package runs natively on the Vector Institute cluster environments**. To adapt to other environments, follow the instructions in [Installation](#installation).
4
+
4
5
 
5
6
  **NOTE**: Supported models on Killarney are tracked [here](https://github.com/VectorInstitute/vector-inference/blob/main/MODEL_TRACKING.md)
6
7
 
@@ -12,9 +13,10 @@ If you are using the Vector cluster environment, and you don't need any customiz
12
13
  pip install vec-inf
13
14
  ```
14
15
 
15
- Otherwise, we recommend using the provided [`Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/Dockerfile) to set up your own environment with the package. The latest image has `vLLM` version `0.11.0`.
16
+ Otherwise, we recommend using the provided [`vllm.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/vllm.Dockerfile) and [`sglang.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/sglang.Dockerfile) to set up your own environment with the package. The built images are available through [Docker Hub](https://hub.docker.com/orgs/vectorinstitute/repositories)
16
17
 
17
18
  If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
19
+
18
20
  * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config), then install from source by running `pip install .`.
19
21
  * The package would try to look for cached configuration files in your environment before using the default configuration. The default cached configuration directory path points to `/model-weights/vec-inf-shared`, you would need to create an `environment.yaml` and a `models.yaml` following the format of these files in [`vec_inf/config`](https://github.com/VectorInstitute/vector-inference/blob/main/vec_inf/config).
20
- * The package would also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.
22
+ * [OPTIONAL] The package would also look for an enviroment variable `VEC_INF_CONFIG_DIR`. You can put your `environment.yaml` and `models.yaml` in a directory of your choice and set the enviroment variable `VEC_INF_CONFIG_DIR` to point to that location.