vec-inf 0.6.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/ISSUE_TEMPLATE/bug_report.md +1 -1
- vec_inf-0.7.0/.github/ISSUE_TEMPLATE/model-request.md +14 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/workflows/code_checks.yml +1 -1
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/workflows/docker.yml +4 -4
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/workflows/docs.yml +3 -3
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/workflows/publish.yml +1 -1
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/workflows/unit_tests.yml +6 -2
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.gitignore +4 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.pre-commit-config.yaml +3 -3
- {vec_inf-0.6.0 → vec_inf-0.7.0}/Dockerfile +17 -8
- vec_inf-0.7.0/MODEL_TRACKING.md +324 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/PKG-INFO +25 -67
- {vec_inf-0.6.0 → vec_inf-0.7.0}/README.md +22 -64
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/api.md +9 -0
- vec_inf-0.7.0/docs/index.md +20 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/user_guide.md +110 -37
- {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/README.md +1 -0
- vec_inf-0.7.0/examples/slurm_dependency/README.md +33 -0
- vec_inf-0.7.0/examples/slurm_dependency/downstream_job.sbatch +18 -0
- vec_inf-0.7.0/examples/slurm_dependency/run_downstream.py +26 -0
- vec_inf-0.7.0/examples/slurm_dependency/run_workflow.sh +14 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/pyproject.toml +3 -3
- vec_inf-0.7.0/tests/test_imports.py +33 -0
- vec_inf-0.7.0/tests/vec_inf/cli/test_cli.py +406 -0
- vec_inf-0.7.0/tests/vec_inf/cli/test_helper.py +521 -0
- vec_inf-0.7.0/tests/vec_inf/client/test_api.py +520 -0
- vec_inf-0.7.0/tests/vec_inf/client/test_helper.py +997 -0
- vec_inf-0.7.0/tests/vec_inf/client/test_slurm_script_generator.py +498 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/client/test_utils.py +162 -26
- vec_inf-0.7.0/tests/vec_inf/client/test_vars.env +2 -0
- vec_inf-0.7.0/uv.lock +5260 -0
- vec_inf-0.7.0/vec_inf/README.md +23 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/cli/_cli.py +212 -30
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/cli/_helper.py +95 -14
- vec_inf-0.7.0/vec_inf/client/_client_vars.py +80 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/_helper.py +386 -53
- vec_inf-0.7.0/vec_inf/client/_slurm_script_generator.py +346 -0
- vec_inf-0.7.0/vec_inf/client/_slurm_templates.py +248 -0
- vec_inf-0.7.0/vec_inf/client/_slurm_vars.py +82 -0
- vec_inf-0.7.0/vec_inf/client/_utils.py +406 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/api.py +96 -25
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/config.py +46 -15
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/models.py +51 -2
- vec_inf-0.7.0/vec_inf/config/README.md +6 -0
- vec_inf-0.7.0/vec_inf/config/environment.yaml +31 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/config/models.yaml +102 -281
- {vec_inf-0.6.0 → vec_inf-0.7.0}/venv.sh +14 -13
- vec_inf-0.6.0/docs/index.md +0 -13
- vec_inf-0.6.0/tests/test_imports.py +0 -32
- vec_inf-0.6.0/tests/vec_inf/cli/test_cli.py +0 -533
- vec_inf-0.6.0/tests/vec_inf/client/test_api.py +0 -130
- vec_inf-0.6.0/uv.lock +0 -4701
- vec_inf-0.6.0/vec_inf/README.md +0 -9
- vec_inf-0.6.0/vec_inf/client/_client_vars.py +0 -213
- vec_inf-0.6.0/vec_inf/client/_slurm_script_generator.py +0 -179
- vec_inf-0.6.0/vec_inf/client/_utils.py +0 -287
- vec_inf-0.6.0/vec_inf/client/slurm_vars.py +0 -49
- vec_inf-0.6.0/vec_inf/config/README.md +0 -245
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/dependabot.yml +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/pull_request_template.md +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/.python-version +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/LICENSE +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/codecov.yml +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/Makefile +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/assets/favicon-48x48.svg +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/assets/favicon.ico +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/assets/vector-logo.svg +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/contributing.md +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/make.bat +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/overrides/partials/copyright.html +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/overrides/partials/logo.html +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/stylesheets/extra.css +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/api/basic_usage.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/inference/llm/chat_completions.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/inference/llm/completions.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/inference/llm/completions.sh +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/inference/text_embedding/embeddings.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/inference/vlm/vision_completions.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/logits/logits.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/mkdocs.yml +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/profile/avg_throughput.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/profile/gen.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/__init__.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/__init__.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/cli/__init__.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/cli/test_utils.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/client/__init__.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/client/test_examples.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/client/test_models.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/__init__.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/cli/__init__.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/cli/_utils.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/cli/_vars.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/__init__.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/_exceptions.py +0 -0
- {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/find_port.sh +0 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Model request
|
|
3
|
+
about: Request for new model weights or model config
|
|
4
|
+
title: New model request for [MODEL_NAME]
|
|
5
|
+
labels: new model
|
|
6
|
+
assignees: XkunW
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
### Request Type
|
|
11
|
+
Model weights | Model config | Both
|
|
12
|
+
|
|
13
|
+
### Model Name
|
|
14
|
+
Name of the model requested
|
|
@@ -24,7 +24,7 @@ jobs:
|
|
|
24
24
|
runs-on: ubuntu-latest
|
|
25
25
|
steps:
|
|
26
26
|
- name: Checkout repository
|
|
27
|
-
uses: actions/checkout@
|
|
27
|
+
uses: actions/checkout@v5.0.0
|
|
28
28
|
|
|
29
29
|
- name: Extract vLLM version
|
|
30
30
|
id: vllm-version
|
|
@@ -33,19 +33,19 @@ jobs:
|
|
|
33
33
|
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
|
34
34
|
|
|
35
35
|
- name: Log in to Docker Hub
|
|
36
|
-
uses: docker/login-action@
|
|
36
|
+
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1
|
|
37
37
|
with:
|
|
38
38
|
username: ${{ secrets.DOCKER_USERNAME }}
|
|
39
39
|
password: ${{ secrets.DOCKER_PASSWORD }}
|
|
40
40
|
|
|
41
41
|
- name: Extract metadata (tags, labels) for Docker
|
|
42
42
|
id: meta
|
|
43
|
-
uses: docker/metadata-action@
|
|
43
|
+
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f
|
|
44
44
|
with:
|
|
45
45
|
images: vectorinstitute/vector-inference
|
|
46
46
|
|
|
47
47
|
- name: Build and push Docker image
|
|
48
|
-
uses: docker/build-push-action@
|
|
48
|
+
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
|
|
49
49
|
with:
|
|
50
50
|
context: .
|
|
51
51
|
file: ./Dockerfile
|
|
@@ -51,7 +51,7 @@ jobs:
|
|
|
51
51
|
runs-on: ubuntu-latest
|
|
52
52
|
steps:
|
|
53
53
|
- name: Checkout code
|
|
54
|
-
uses: actions/checkout@
|
|
54
|
+
uses: actions/checkout@v5.0.0
|
|
55
55
|
with:
|
|
56
56
|
fetch-depth: 0 # Fetch all history for proper versioning
|
|
57
57
|
|
|
@@ -88,7 +88,7 @@ jobs:
|
|
|
88
88
|
runs-on: ubuntu-latest
|
|
89
89
|
steps:
|
|
90
90
|
- name: Checkout code
|
|
91
|
-
uses: actions/checkout@
|
|
91
|
+
uses: actions/checkout@v5.0.0
|
|
92
92
|
with:
|
|
93
93
|
fetch-depth: 0 # Fetch all history for proper versioning
|
|
94
94
|
|
|
@@ -112,7 +112,7 @@ jobs:
|
|
|
112
112
|
git config user.email 41898282+github-actions[bot]@users.noreply.github.com
|
|
113
113
|
|
|
114
114
|
- name: Download artifact
|
|
115
|
-
uses: actions/download-artifact@
|
|
115
|
+
uses: actions/download-artifact@v5
|
|
116
116
|
with:
|
|
117
117
|
name: docs-site
|
|
118
118
|
path: site
|
|
@@ -43,7 +43,7 @@ jobs:
|
|
|
43
43
|
matrix:
|
|
44
44
|
python-version: ["3.10", "3.11", "3.12"]
|
|
45
45
|
steps:
|
|
46
|
-
- uses: actions/checkout@
|
|
46
|
+
- uses: actions/checkout@v5.0.0
|
|
47
47
|
|
|
48
48
|
- name: Install uv
|
|
49
49
|
uses: astral-sh/setup-uv@v6
|
|
@@ -71,8 +71,12 @@ jobs:
|
|
|
71
71
|
run: |
|
|
72
72
|
uv run pytest tests/test_imports.py
|
|
73
73
|
|
|
74
|
+
- name: Import Codecov GPG public key
|
|
75
|
+
run: |
|
|
76
|
+
gpg --keyserver keyserver.ubuntu.com --recv-keys 806BB28AED779869
|
|
77
|
+
|
|
74
78
|
- name: Upload coverage to Codecov
|
|
75
|
-
uses: codecov/codecov-action@v5.
|
|
79
|
+
uses: codecov/codecov-action@v5.5.0
|
|
76
80
|
with:
|
|
77
81
|
token: ${{ secrets.CODECOV_TOKEN }}
|
|
78
82
|
file: ./coverage.xml
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
repos:
|
|
2
2
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
-
rev:
|
|
3
|
+
rev: v6.0.0 # Use the ref you want to point at
|
|
4
4
|
hooks:
|
|
5
5
|
- id: trailing-whitespace
|
|
6
6
|
- id: check-ast
|
|
@@ -17,7 +17,7 @@ repos:
|
|
|
17
17
|
- id: check-toml
|
|
18
18
|
|
|
19
19
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
20
|
-
rev: 'v0.
|
|
20
|
+
rev: 'v0.12.10'
|
|
21
21
|
hooks:
|
|
22
22
|
- id: ruff
|
|
23
23
|
args: [--fix, --exit-non-zero-on-fix]
|
|
@@ -26,7 +26,7 @@ repos:
|
|
|
26
26
|
types_or: [python, jupyter]
|
|
27
27
|
|
|
28
28
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
29
|
-
rev: v1.
|
|
29
|
+
rev: v1.17.1
|
|
30
30
|
hooks:
|
|
31
31
|
- id: mypy
|
|
32
32
|
entry: python3 -m mypy --config-file pyproject.toml
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
FROM nvidia/cuda:12.
|
|
1
|
+
FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04
|
|
2
2
|
|
|
3
3
|
# Non-interactive apt-get commands
|
|
4
4
|
ARG DEBIAN_FRONTEND=noninteractive
|
|
@@ -6,8 +6,8 @@ ARG DEBIAN_FRONTEND=noninteractive
|
|
|
6
6
|
# No GPUs visible during build
|
|
7
7
|
ARG CUDA_VISIBLE_DEVICES=none
|
|
8
8
|
|
|
9
|
-
# Specify CUDA architectures -> 7.5: RTX 6000 & T4, 8.0: A100, 8.6
|
|
10
|
-
ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6+PTX"
|
|
9
|
+
# Specify CUDA architectures -> 7.5: Quadro RTX 6000 & T4, 8.0: A100, 8.6: A40, 8.9: L40S, 9.0: H100
|
|
10
|
+
ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0+PTX"
|
|
11
11
|
|
|
12
12
|
# Set the Python version
|
|
13
13
|
ARG PYTHON_VERSION=3.10.12
|
|
@@ -35,20 +35,29 @@ RUN wget https://bootstrap.pypa.io/get-pip.py && \
|
|
|
35
35
|
rm get-pip.py && \
|
|
36
36
|
python3.10 -m pip install --upgrade pip setuptools wheel uv
|
|
37
37
|
|
|
38
|
+
# Install Infiniband/RDMA support
|
|
39
|
+
RUN apt-get update && apt-get install -y \
|
|
40
|
+
libibverbs1 libibverbs-dev ibverbs-utils \
|
|
41
|
+
librdmacm1 librdmacm-dev rdmacm-utils \
|
|
42
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
43
|
+
|
|
44
|
+
# Set up RDMA environment (these will persist in the final container)
|
|
45
|
+
ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
|
|
46
|
+
ENV UCX_NET_DEVICES=all
|
|
47
|
+
ENV NCCL_IB_DISABLE=0
|
|
48
|
+
|
|
38
49
|
# Set up project
|
|
39
50
|
WORKDIR /vec-inf
|
|
40
51
|
COPY . /vec-inf
|
|
41
52
|
|
|
42
53
|
# Install project dependencies with build requirements
|
|
43
|
-
RUN PIP_INDEX_URL="https://download.pytorch.org/whl/
|
|
44
|
-
# Install FlashAttention
|
|
45
|
-
RUN python3.10 -m pip install flash-attn --no-build-isolation
|
|
46
|
-
# Install FlashInfer
|
|
47
|
-
RUN python3.10 -m pip install flashinfer-python -i https://flashinfer.ai/whl/cu124/torch2.6/
|
|
54
|
+
RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu128" uv pip install --system -e .[dev]
|
|
48
55
|
|
|
49
56
|
# Final configuration
|
|
50
57
|
RUN mkdir -p /vec-inf/nccl && \
|
|
51
58
|
mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /vec-inf/nccl/libnccl.so.2.18.1
|
|
59
|
+
ENV VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
|
|
60
|
+
ENV NCCL_DEBUG=INFO
|
|
52
61
|
|
|
53
62
|
# Set the default command to start an interactive shell
|
|
54
63
|
CMD ["bash"]
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
# Model Weights Tracking
|
|
2
|
+
|
|
3
|
+
This document tracks all model weights available in the `/model-weights` directory on Killarney cluster and indicates which ones have existing configurations in the cached model config (`/model-weights/vec-inf-shared/models.yaml`). By default, `vec-inf` would use the cached model config. To request new model weights to be downloaded or model configuration to be added, please open an issue for "Model request".
|
|
4
|
+
|
|
5
|
+
**NOTE**: The [`models.yaml`](./vec_inf/config/models.yaml) file in the package is not always up to date with the latest cached model config on Killarney cluster, new model config would be added to the cached model config. `models.yaml` would be updated to reflect the cached model config when a new version of the package is released.
|
|
6
|
+
|
|
7
|
+
## Legend
|
|
8
|
+
- ✅ **Configured**: Model has a complete configuration in `models.yaml`
|
|
9
|
+
- ❌ **Not Configured**: Model exists in `/model-weights` but lacks configuration
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Text Generation Models (LLM)
|
|
14
|
+
|
|
15
|
+
### Cohere for AI: Command R
|
|
16
|
+
| Model | Configuration |
|
|
17
|
+
|:------|:-------------|
|
|
18
|
+
| `c4ai-command-r-plus-08-2024` | ✅ |
|
|
19
|
+
| `c4ai-command-r-08-2024` | ✅ |
|
|
20
|
+
|
|
21
|
+
### Code Llama
|
|
22
|
+
| Model | Configuration |
|
|
23
|
+
|:------|:-------------|
|
|
24
|
+
| `CodeLlama-7b-hf` | ✅ |
|
|
25
|
+
| `CodeLlama-7b-Instruct-hf` | ✅ |
|
|
26
|
+
| `CodeLlama-13b-hf` | ✅ |
|
|
27
|
+
| `CodeLlama-13b-Instruct-hf` | ✅ |
|
|
28
|
+
| `CodeLlama-34b-hf` | ✅ |
|
|
29
|
+
| `CodeLlama-34b-Instruct-hf` | ✅ |
|
|
30
|
+
| `CodeLlama-70b-hf` | ✅ |
|
|
31
|
+
| `CodeLlama-70b-Instruct-hf` | ✅ |
|
|
32
|
+
| `CodeLlama-7b-Python-hf` | ❌ |
|
|
33
|
+
| `CodeLlama-13b-Python-hf` | ❌ |
|
|
34
|
+
| `CodeLlama-70b-Python-hf` | ❌ |
|
|
35
|
+
|
|
36
|
+
### Google: Gemma
|
|
37
|
+
| Model | Configuration |
|
|
38
|
+
|:------|:-------------|
|
|
39
|
+
| `gemma-2b` | ❌ |
|
|
40
|
+
| `gemma-2b-it` | ❌ |
|
|
41
|
+
| `gemma-7b` | ❌ |
|
|
42
|
+
| `gemma-7b-it` | ❌ |
|
|
43
|
+
| `gemma-2-9b` | ✅ |
|
|
44
|
+
| `gemma-2-9b-it` | ✅ |
|
|
45
|
+
| `gemma-2-27b` | ✅ |
|
|
46
|
+
| `gemma-2-27b-it` | ✅ |
|
|
47
|
+
| `gemma-3-1b-it` | ❌ |
|
|
48
|
+
| `gemma-3-4b-it` | ❌ |
|
|
49
|
+
| `gemma-3-12b-it` | ❌ |
|
|
50
|
+
| `gemma-3-27b-it` | ❌ |
|
|
51
|
+
|
|
52
|
+
### Meta: Llama 2
|
|
53
|
+
| Model | Configuration |
|
|
54
|
+
|:------|:-------------|
|
|
55
|
+
| `Llama-2-7b-hf` | ✅ |
|
|
56
|
+
| `Llama-2-7b-chat-hf` | ✅ |
|
|
57
|
+
| `Llama-2-13b-hf` | ✅ |
|
|
58
|
+
| `Llama-2-13b-chat-hf` | ✅ |
|
|
59
|
+
| `Llama-2-70b-hf` | ✅ |
|
|
60
|
+
| `Llama-2-70b-chat-hf` | ✅ |
|
|
61
|
+
|
|
62
|
+
### Meta: Llama 3
|
|
63
|
+
| Model | Configuration |
|
|
64
|
+
|:------|:-------------|
|
|
65
|
+
| `Meta-Llama-3-8B` | ✅ |
|
|
66
|
+
| `Meta-Llama-3-8B-Instruct` | ✅ |
|
|
67
|
+
| `Meta-Llama-3-70B` | ✅ |
|
|
68
|
+
| `Meta-Llama-3-70B-Instruct` | ✅ |
|
|
69
|
+
|
|
70
|
+
### Meta: Llama 3.1
|
|
71
|
+
| Model | Configuration |
|
|
72
|
+
|:------|:-------------|
|
|
73
|
+
| `Meta-Llama-3.1-8B` | ✅ |
|
|
74
|
+
| `Meta-Llama-3.1-8B-Instruct` | ✅ |
|
|
75
|
+
| `Meta-Llama-3.1-70B` | ✅ |
|
|
76
|
+
| `Meta-Llama-3.1-70B-Instruct` | ✅ |
|
|
77
|
+
| `Meta-Llama-3.1-405B-Instruct` | ✅ |
|
|
78
|
+
|
|
79
|
+
### Meta: Llama 3.2
|
|
80
|
+
| Model | Configuration |
|
|
81
|
+
|:------|:-------------|
|
|
82
|
+
| `Llama-3.2-1B` | ✅ |
|
|
83
|
+
| `Llama-3.2-1B-Instruct` | ✅ |
|
|
84
|
+
| `Llama-3.2-3B` | ✅ |
|
|
85
|
+
| `Llama-3.2-3B-Instruct` | ✅ |
|
|
86
|
+
|
|
87
|
+
### Meta: Llama 3.3
|
|
88
|
+
| Model | Configuration |
|
|
89
|
+
|:------|:-------------|
|
|
90
|
+
| `Llama-3.3-70B-Instruct` | ✅ |
|
|
91
|
+
|
|
92
|
+
### Meta: Llama 4
|
|
93
|
+
| Model | Configuration |
|
|
94
|
+
|:------|:-------------|
|
|
95
|
+
| `Llama-4-Scout-17B-16E-Instruct` | ❌ |
|
|
96
|
+
|
|
97
|
+
### Mistral AI: Mistral
|
|
98
|
+
| Model | Configuration |
|
|
99
|
+
|:------|:-------------|
|
|
100
|
+
| `Mistral-7B-v0.3` | ✅ |
|
|
101
|
+
| `Mistral-7B-Instruct-v0.1` | ✅ |
|
|
102
|
+
| `Mistral-7B-Instruct-v0.2` | ✅ |
|
|
103
|
+
| `Mistral-7B-Instruct-v0.3` | ✅ |
|
|
104
|
+
| `Mistral-Large-Instruct-2407` | ✅ |
|
|
105
|
+
| `Mistral-Large-Instruct-2411` | ✅ |
|
|
106
|
+
|
|
107
|
+
### Mistral AI: Mixtral
|
|
108
|
+
| Model | Configuration |
|
|
109
|
+
|:------|:-------------|
|
|
110
|
+
| `Mixtral-8x7B-Instruct-v0.1` | ✅ |
|
|
111
|
+
| `Mixtral-8x22B-v0.1` | ✅ |
|
|
112
|
+
| `Mixtral-8x22B-Instruct-v0.1` | ✅ |
|
|
113
|
+
|
|
114
|
+
### Microsoft: Phi
|
|
115
|
+
| Model | Configuration |
|
|
116
|
+
|:------|:-------------|
|
|
117
|
+
| `Phi-3-medium-128k-instruct` | ✅ |
|
|
118
|
+
| `phi-4` | ❌ |
|
|
119
|
+
|
|
120
|
+
### Nvidia: Llama-3.1-Nemotron
|
|
121
|
+
| Model | Configuration |
|
|
122
|
+
|:------|:-------------|
|
|
123
|
+
| `Llama-3.1-Nemotron-70B-Instruct-HF` | ✅ |
|
|
124
|
+
|
|
125
|
+
### Qwen: Qwen2.5
|
|
126
|
+
| Model | Configuration |
|
|
127
|
+
|:------|:-------------|
|
|
128
|
+
| `Qwen2.5-0.5B-Instruct` | ✅ |
|
|
129
|
+
| `Qwen2.5-1.5B-Instruct` | ✅ |
|
|
130
|
+
| `Qwen2.5-3B-Instruct` | ✅ |
|
|
131
|
+
| `Qwen2.5-7B-Instruct` | ✅ |
|
|
132
|
+
| `Qwen2.5-14B-Instruct` | ✅ |
|
|
133
|
+
| `Qwen2.5-32B-Instruct` | ✅ |
|
|
134
|
+
| `Qwen2.5-72B-Instruct` | ✅ |
|
|
135
|
+
|
|
136
|
+
### Qwen: Qwen2.5-Math
|
|
137
|
+
| Model | Configuration |
|
|
138
|
+
|:------|:-------------|
|
|
139
|
+
| `Qwen2.5-Math-1.5B-Instruct` | ✅ |
|
|
140
|
+
| `Qwen2.5-Math-7B-Instruct` | ✅ |
|
|
141
|
+
| `Qwen2.5-Math-72B-Instruct` | ✅ |
|
|
142
|
+
|
|
143
|
+
### Qwen: Qwen2.5-Coder
|
|
144
|
+
| Model | Configuration |
|
|
145
|
+
|:------|:-------------|
|
|
146
|
+
| `Qwen2.5-Coder-7B-Instruct` | ✅ |
|
|
147
|
+
|
|
148
|
+
### Qwen: QwQ
|
|
149
|
+
| Model | Configuration |
|
|
150
|
+
|:------|:-------------|
|
|
151
|
+
| `QwQ-32B` | ✅ |
|
|
152
|
+
|
|
153
|
+
### Qwen: Qwen2
|
|
154
|
+
| Model | Configuration |
|
|
155
|
+
|:------|:-------------|
|
|
156
|
+
| `Qwen2-1.5B-Instruct` | ❌ |
|
|
157
|
+
| `Qwen2-7B-Instruct` | ❌ |
|
|
158
|
+
| `Qwen2-Math-1.5B-Instruct` | ❌ |
|
|
159
|
+
| `Qwen2-Math-7B-Instruct` | ❌ |
|
|
160
|
+
| `Qwen2-Math-72B` | ❌ |
|
|
161
|
+
| `Qwen2-Math-72B-Instruct` | ❌ |
|
|
162
|
+
| `Qwen2-VL-7B-Instruct` | ❌ |
|
|
163
|
+
|
|
164
|
+
### Qwen: Qwen3
|
|
165
|
+
| Model | Configuration |
|
|
166
|
+
|:------|:-------------|
|
|
167
|
+
| `Qwen3-14B` | ✅ |
|
|
168
|
+
| `Qwen3-8B` | ❌ |
|
|
169
|
+
| `Qwen3-32B` | ❌ |
|
|
170
|
+
| `Qwen3-235B-A22B` | ❌ |
|
|
171
|
+
| `Qwen3-Embedding-8B` | ❌ |
|
|
172
|
+
|
|
173
|
+
### DeepSeek: DeepSeek-R1
|
|
174
|
+
| Model | Configuration |
|
|
175
|
+
|:------|:-------------|
|
|
176
|
+
| `DeepSeek-R1-Distill-Llama-8B` | ✅ |
|
|
177
|
+
| `DeepSeek-R1-Distill-Llama-70B` | ✅ |
|
|
178
|
+
| `DeepSeek-R1-Distill-Qwen-1.5B` | ✅ |
|
|
179
|
+
| `DeepSeek-R1-Distill-Qwen-7B` | ✅ |
|
|
180
|
+
| `DeepSeek-R1-Distill-Qwen-14B` | ✅ |
|
|
181
|
+
| `DeepSeek-R1-Distill-Qwen-32B` | ✅ |
|
|
182
|
+
|
|
183
|
+
### DeepSeek: Other Models
|
|
184
|
+
| Model | Configuration |
|
|
185
|
+
|:------|:-------------|
|
|
186
|
+
| `DeepSeek-Coder-V2-Lite-Instruct` | ❌ |
|
|
187
|
+
| `deepseek-math-7b-instruct` | ❌ |
|
|
188
|
+
|
|
189
|
+
### Other LLM Models
|
|
190
|
+
| Model | Configuration |
|
|
191
|
+
|:------|:-------------|
|
|
192
|
+
| `AI21-Jamba-1.5-Mini` | ❌ |
|
|
193
|
+
| `aya-expanse-32b` | ✅ (as Aya-Expanse-32B) |
|
|
194
|
+
| `gpt2-large` | ❌ |
|
|
195
|
+
| `gpt2-xl` | ❌ |
|
|
196
|
+
| `gpt-oss-120b` | ❌ |
|
|
197
|
+
| `instructblip-vicuna-7b` | ❌ |
|
|
198
|
+
| `internlm2-math-plus-7b` | ❌ |
|
|
199
|
+
| `Janus-Pro-7B` | ❌ |
|
|
200
|
+
| `Kimi-K2-Instruct` | ❌ |
|
|
201
|
+
| `Ministral-8B-Instruct-2410` | ❌ |
|
|
202
|
+
| `Molmo-7B-D-0924` | ✅ |
|
|
203
|
+
| `OLMo-1B-hf` | ❌ |
|
|
204
|
+
| `OLMo-7B-hf` | ❌ |
|
|
205
|
+
| `OLMo-7B-SFT` | ❌ |
|
|
206
|
+
| `pythia` | ❌ |
|
|
207
|
+
| `Qwen1.5-72B-Chat` | ❌ |
|
|
208
|
+
| `ReasonFlux-PRM-7B` | ❌ |
|
|
209
|
+
| `t5-large-lm-adapt` | ❌ |
|
|
210
|
+
| `t5-xl-lm-adapt` | ❌ |
|
|
211
|
+
| `mt5-xl-lm-adapt` | ❌ |
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## Vision Language Models (VLM)
|
|
216
|
+
|
|
217
|
+
### LLaVa
|
|
218
|
+
| Model | Configuration |
|
|
219
|
+
|:------|:-------------|
|
|
220
|
+
| `llava-1.5-7b-hf` | ✅ |
|
|
221
|
+
| `llava-1.5-13b-hf` | ✅ |
|
|
222
|
+
| `llava-v1.6-mistral-7b-hf` | ✅ |
|
|
223
|
+
| `llava-v1.6-34b-hf` | ✅ |
|
|
224
|
+
| `llava-med-v1.5-mistral-7b` | ❌ |
|
|
225
|
+
|
|
226
|
+
### Microsoft: Phi 3 Vision
|
|
227
|
+
| Model | Configuration |
|
|
228
|
+
|:------|:-------------|
|
|
229
|
+
| `Phi-3-vision-128k-instruct` | ✅ |
|
|
230
|
+
| `Phi-3.5-vision-instruct` | ✅ |
|
|
231
|
+
|
|
232
|
+
### Meta: Llama 3.2 Vision
|
|
233
|
+
| Model | Configuration |
|
|
234
|
+
|:------|:-------------|
|
|
235
|
+
| `Llama-3.2-11B-Vision` | ✅ |
|
|
236
|
+
| `Llama-3.2-11B-Vision-Instruct` | ✅ |
|
|
237
|
+
| `Llama-3.2-90B-Vision` | ✅ |
|
|
238
|
+
| `Llama-3.2-90B-Vision-Instruct` | ✅ |
|
|
239
|
+
|
|
240
|
+
### Mistral: Pixtral
|
|
241
|
+
| Model | Configuration |
|
|
242
|
+
|:------|:-------------|
|
|
243
|
+
| `Pixtral-12B-2409` | ✅ |
|
|
244
|
+
|
|
245
|
+
### OpenGVLab: InternVL2.5
|
|
246
|
+
| Model | Configuration |
|
|
247
|
+
|:------|:-------------|
|
|
248
|
+
| `InternVL2_5-8B` | ✅ |
|
|
249
|
+
| `InternVL2_5-26B` | ✅ |
|
|
250
|
+
| `InternVL2_5-38B` | ✅ |
|
|
251
|
+
|
|
252
|
+
### THUDM: GLM-4
|
|
253
|
+
| Model | Configuration |
|
|
254
|
+
|:------|:-------------|
|
|
255
|
+
| `glm-4v-9b` | ✅ |
|
|
256
|
+
|
|
257
|
+
### DeepSeek: DeepSeek-VL2
|
|
258
|
+
| Model | Configuration |
|
|
259
|
+
|:------|:-------------|
|
|
260
|
+
| `deepseek-vl2` | ✅ |
|
|
261
|
+
| `deepseek-vl2-small` | ✅ |
|
|
262
|
+
|
|
263
|
+
### Other VLM Models
|
|
264
|
+
| Model | Configuration |
|
|
265
|
+
|:------|:-------------|
|
|
266
|
+
| `MiniCPM-Llama3-V-2_5` | ❌ |
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
270
|
+
## Text Embedding Models
|
|
271
|
+
|
|
272
|
+
### Liang Wang: e5
|
|
273
|
+
| Model | Configuration |
|
|
274
|
+
|:------|:-------------|
|
|
275
|
+
| `e5-mistral-7b-instruct` | ✅ |
|
|
276
|
+
|
|
277
|
+
### BAAI: bge
|
|
278
|
+
| Model | Configuration |
|
|
279
|
+
|:------|:-------------|
|
|
280
|
+
| `bge-base-en-v1.5` | ✅ |
|
|
281
|
+
| `bge-m3` | ❌ |
|
|
282
|
+
| `bge-multilingual-gemma2` | ❌ |
|
|
283
|
+
|
|
284
|
+
### Sentence Transformers: MiniLM
|
|
285
|
+
| Model | Configuration |
|
|
286
|
+
|:------|:-------------|
|
|
287
|
+
| `all-MiniLM-L6-v2` | ✅ |
|
|
288
|
+
|
|
289
|
+
### Other Embedding Models
|
|
290
|
+
| Model | Configuration |
|
|
291
|
+
|:------|:-------------|
|
|
292
|
+
| `data2vec` | ❌ |
|
|
293
|
+
| `gte-modernbert-base` | ❌ |
|
|
294
|
+
| `gte-Qwen2-7B-instruct` | ❌ |
|
|
295
|
+
| `m2-bert-80M-32k-retrieval` | ❌ |
|
|
296
|
+
| `m2-bert-80M-8k-retrieval` | ❌ |
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## Reward Modeling Models
|
|
301
|
+
|
|
302
|
+
### Qwen: Qwen2.5-Math
|
|
303
|
+
| Model | Configuration |
|
|
304
|
+
|:------|:-------------|
|
|
305
|
+
| `Qwen2.5-Math-RM-72B` | ✅ |
|
|
306
|
+
| `Qwen2.5-Math-PRM-7B` | ✅ |
|
|
307
|
+
|
|
308
|
+
---
|
|
309
|
+
|
|
310
|
+
## Multimodal Models
|
|
311
|
+
|
|
312
|
+
### CLIP
|
|
313
|
+
| Model | Configuration |
|
|
314
|
+
|:------|:-------------|
|
|
315
|
+
| `clip-vit-base-patch16` | ❌ |
|
|
316
|
+
| `clip-vit-large-patch14-336` | ❌ |
|
|
317
|
+
|
|
318
|
+
### Stable Diffusion
|
|
319
|
+
| Model | Configuration |
|
|
320
|
+
|:------|:-------------|
|
|
321
|
+
| `sd-v1-4-full-ema` | ❌ |
|
|
322
|
+
| `stable-diffusion-v1-4` | ❌ |
|
|
323
|
+
|
|
324
|
+
---
|