vec-inf 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/ISSUE_TEMPLATE/bug_report.md +1 -1
  2. vec_inf-0.7.0/.github/ISSUE_TEMPLATE/model-request.md +14 -0
  3. {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/workflows/code_checks.yml +1 -1
  4. {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/workflows/docker.yml +4 -4
  5. {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/workflows/docs.yml +3 -3
  6. {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/workflows/publish.yml +1 -1
  7. {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/workflows/unit_tests.yml +6 -2
  8. {vec_inf-0.6.0 → vec_inf-0.7.0}/.gitignore +4 -0
  9. {vec_inf-0.6.0 → vec_inf-0.7.0}/.pre-commit-config.yaml +3 -3
  10. {vec_inf-0.6.0 → vec_inf-0.7.0}/Dockerfile +17 -8
  11. vec_inf-0.7.0/MODEL_TRACKING.md +324 -0
  12. {vec_inf-0.6.0 → vec_inf-0.7.0}/PKG-INFO +25 -67
  13. {vec_inf-0.6.0 → vec_inf-0.7.0}/README.md +22 -64
  14. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/api.md +9 -0
  15. vec_inf-0.7.0/docs/index.md +20 -0
  16. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/user_guide.md +110 -37
  17. {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/README.md +1 -0
  18. vec_inf-0.7.0/examples/slurm_dependency/README.md +33 -0
  19. vec_inf-0.7.0/examples/slurm_dependency/downstream_job.sbatch +18 -0
  20. vec_inf-0.7.0/examples/slurm_dependency/run_downstream.py +26 -0
  21. vec_inf-0.7.0/examples/slurm_dependency/run_workflow.sh +14 -0
  22. {vec_inf-0.6.0 → vec_inf-0.7.0}/pyproject.toml +3 -3
  23. vec_inf-0.7.0/tests/test_imports.py +33 -0
  24. vec_inf-0.7.0/tests/vec_inf/cli/test_cli.py +406 -0
  25. vec_inf-0.7.0/tests/vec_inf/cli/test_helper.py +521 -0
  26. vec_inf-0.7.0/tests/vec_inf/client/test_api.py +520 -0
  27. vec_inf-0.7.0/tests/vec_inf/client/test_helper.py +997 -0
  28. vec_inf-0.7.0/tests/vec_inf/client/test_slurm_script_generator.py +498 -0
  29. {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/client/test_utils.py +162 -26
  30. vec_inf-0.7.0/tests/vec_inf/client/test_vars.env +2 -0
  31. vec_inf-0.7.0/uv.lock +5260 -0
  32. vec_inf-0.7.0/vec_inf/README.md +23 -0
  33. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/cli/_cli.py +212 -30
  34. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/cli/_helper.py +95 -14
  35. vec_inf-0.7.0/vec_inf/client/_client_vars.py +80 -0
  36. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/_helper.py +386 -53
  37. vec_inf-0.7.0/vec_inf/client/_slurm_script_generator.py +346 -0
  38. vec_inf-0.7.0/vec_inf/client/_slurm_templates.py +248 -0
  39. vec_inf-0.7.0/vec_inf/client/_slurm_vars.py +82 -0
  40. vec_inf-0.7.0/vec_inf/client/_utils.py +406 -0
  41. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/api.py +96 -25
  42. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/config.py +46 -15
  43. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/models.py +51 -2
  44. vec_inf-0.7.0/vec_inf/config/README.md +6 -0
  45. vec_inf-0.7.0/vec_inf/config/environment.yaml +31 -0
  46. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/config/models.yaml +102 -281
  47. {vec_inf-0.6.0 → vec_inf-0.7.0}/venv.sh +14 -13
  48. vec_inf-0.6.0/docs/index.md +0 -13
  49. vec_inf-0.6.0/tests/test_imports.py +0 -32
  50. vec_inf-0.6.0/tests/vec_inf/cli/test_cli.py +0 -533
  51. vec_inf-0.6.0/tests/vec_inf/client/test_api.py +0 -130
  52. vec_inf-0.6.0/uv.lock +0 -4701
  53. vec_inf-0.6.0/vec_inf/README.md +0 -9
  54. vec_inf-0.6.0/vec_inf/client/_client_vars.py +0 -213
  55. vec_inf-0.6.0/vec_inf/client/_slurm_script_generator.py +0 -179
  56. vec_inf-0.6.0/vec_inf/client/_utils.py +0 -287
  57. vec_inf-0.6.0/vec_inf/client/slurm_vars.py +0 -49
  58. vec_inf-0.6.0/vec_inf/config/README.md +0 -245
  59. {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  60. {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  61. {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/dependabot.yml +0 -0
  62. {vec_inf-0.6.0 → vec_inf-0.7.0}/.github/pull_request_template.md +0 -0
  63. {vec_inf-0.6.0 → vec_inf-0.7.0}/.python-version +0 -0
  64. {vec_inf-0.6.0 → vec_inf-0.7.0}/LICENSE +0 -0
  65. {vec_inf-0.6.0 → vec_inf-0.7.0}/codecov.yml +0 -0
  66. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/Makefile +0 -0
  67. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/assets/favicon-48x48.svg +0 -0
  68. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/assets/favicon.ico +0 -0
  69. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/assets/vector-logo.svg +0 -0
  70. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/contributing.md +0 -0
  71. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/make.bat +0 -0
  72. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/overrides/partials/copyright.html +0 -0
  73. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/overrides/partials/logo.html +0 -0
  74. {vec_inf-0.6.0 → vec_inf-0.7.0}/docs/stylesheets/extra.css +0 -0
  75. {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/api/basic_usage.py +0 -0
  76. {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/inference/llm/chat_completions.py +0 -0
  77. {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/inference/llm/completions.py +0 -0
  78. {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/inference/llm/completions.sh +0 -0
  79. {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/inference/text_embedding/embeddings.py +0 -0
  80. {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/inference/vlm/vision_completions.py +0 -0
  81. {vec_inf-0.6.0 → vec_inf-0.7.0}/examples/logits/logits.py +0 -0
  82. {vec_inf-0.6.0 → vec_inf-0.7.0}/mkdocs.yml +0 -0
  83. {vec_inf-0.6.0 → vec_inf-0.7.0}/profile/avg_throughput.py +0 -0
  84. {vec_inf-0.6.0 → vec_inf-0.7.0}/profile/gen.py +0 -0
  85. {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/__init__.py +0 -0
  86. {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/__init__.py +0 -0
  87. {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/cli/__init__.py +0 -0
  88. {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/cli/test_utils.py +0 -0
  89. {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/client/__init__.py +0 -0
  90. {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/client/test_examples.py +0 -0
  91. {vec_inf-0.6.0 → vec_inf-0.7.0}/tests/vec_inf/client/test_models.py +0 -0
  92. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/__init__.py +0 -0
  93. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/cli/__init__.py +0 -0
  94. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/cli/_utils.py +0 -0
  95. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/cli/_vars.py +0 -0
  96. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/__init__.py +0 -0
  97. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/client/_exceptions.py +0 -0
  98. {vec_inf-0.6.0 → vec_inf-0.7.0}/vec_inf/find_port.sh +0 -0
@@ -3,7 +3,7 @@ name: Bug report
3
3
  about: Create a report to help us improve
4
4
  title: ''
5
5
  labels: ''
6
- assignees: ''
6
+ assignees: XkunW
7
7
 
8
8
  ---
9
9
 
@@ -0,0 +1,14 @@
1
+ ---
2
+ name: Model request
3
+ about: Request for new model weights or model config
4
+ title: New model request for [MODEL_NAME]
5
+ labels: new model
6
+ assignees: XkunW
7
+
8
+ ---
9
+
10
+ ### Request Type
11
+ Model weights | Model config | Both
12
+
13
+ ### Model Name
14
+ Name of the model requested
@@ -28,7 +28,7 @@ jobs:
28
28
  run-code-check:
29
29
  runs-on: ubuntu-latest
30
30
  steps:
31
- - uses: actions/checkout@v4.2.2
31
+ - uses: actions/checkout@v5.0.0
32
32
  - name: Install uv
33
33
  uses: astral-sh/setup-uv@v6
34
34
  with:
@@ -24,7 +24,7 @@ jobs:
24
24
  runs-on: ubuntu-latest
25
25
  steps:
26
26
  - name: Checkout repository
27
- uses: actions/checkout@v4.2.2
27
+ uses: actions/checkout@v5.0.0
28
28
 
29
29
  - name: Extract vLLM version
30
30
  id: vllm-version
@@ -33,19 +33,19 @@ jobs:
33
33
  echo "version=$VERSION" >> $GITHUB_OUTPUT
34
34
 
35
35
  - name: Log in to Docker Hub
36
- uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772
36
+ uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1
37
37
  with:
38
38
  username: ${{ secrets.DOCKER_USERNAME }}
39
39
  password: ${{ secrets.DOCKER_PASSWORD }}
40
40
 
41
41
  - name: Extract metadata (tags, labels) for Docker
42
42
  id: meta
43
- uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804
43
+ uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f
44
44
  with:
45
45
  images: vectorinstitute/vector-inference
46
46
 
47
47
  - name: Build and push Docker image
48
- uses: docker/build-push-action@14487ce63c7a62a4a324b0bfb37086795e31c6c1
48
+ uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83
49
49
  with:
50
50
  context: .
51
51
  file: ./Dockerfile
@@ -51,7 +51,7 @@ jobs:
51
51
  runs-on: ubuntu-latest
52
52
  steps:
53
53
  - name: Checkout code
54
- uses: actions/checkout@v4.2.2
54
+ uses: actions/checkout@v5.0.0
55
55
  with:
56
56
  fetch-depth: 0 # Fetch all history for proper versioning
57
57
 
@@ -88,7 +88,7 @@ jobs:
88
88
  runs-on: ubuntu-latest
89
89
  steps:
90
90
  - name: Checkout code
91
- uses: actions/checkout@v4.2.2
91
+ uses: actions/checkout@v5.0.0
92
92
  with:
93
93
  fetch-depth: 0 # Fetch all history for proper versioning
94
94
 
@@ -112,7 +112,7 @@ jobs:
112
112
  git config user.email 41898282+github-actions[bot]@users.noreply.github.com
113
113
 
114
114
  - name: Download artifact
115
- uses: actions/download-artifact@v4
115
+ uses: actions/download-artifact@v5
116
116
  with:
117
117
  name: docs-site
118
118
  path: site
@@ -13,7 +13,7 @@ jobs:
13
13
  sudo apt-get update
14
14
  sudo apt-get install libcurl4-openssl-dev libssl-dev
15
15
 
16
- - uses: actions/checkout@v4.2.2
16
+ - uses: actions/checkout@v5.0.0
17
17
 
18
18
  - name: Install uv
19
19
  uses: astral-sh/setup-uv@v6
@@ -43,7 +43,7 @@ jobs:
43
43
  matrix:
44
44
  python-version: ["3.10", "3.11", "3.12"]
45
45
  steps:
46
- - uses: actions/checkout@v4.2.2
46
+ - uses: actions/checkout@v5.0.0
47
47
 
48
48
  - name: Install uv
49
49
  uses: astral-sh/setup-uv@v6
@@ -71,8 +71,12 @@ jobs:
71
71
  run: |
72
72
  uv run pytest tests/test_imports.py
73
73
 
74
+ - name: Import Codecov GPG public key
75
+ run: |
76
+ gpg --keyserver keyserver.ubuntu.com --recv-keys 806BB28AED779869
77
+
74
78
  - name: Upload coverage to Codecov
75
- uses: codecov/codecov-action@v5.4.2
79
+ uses: codecov/codecov-action@v5.5.0
76
80
  with:
77
81
  token: ${{ secrets.CODECOV_TOKEN }}
78
82
  file: ./coverage.xml
@@ -152,3 +152,7 @@ collect_env.py
152
152
 
153
153
  # build files
154
154
  dist/
155
+
156
+ # type stubs
157
+ stubs/
158
+ mypy.ini
@@ -1,6 +1,6 @@
1
1
  repos:
2
2
  - repo: https://github.com/pre-commit/pre-commit-hooks
3
- rev: v5.0.0 # Use the ref you want to point at
3
+ rev: v6.0.0 # Use the ref you want to point at
4
4
  hooks:
5
5
  - id: trailing-whitespace
6
6
  - id: check-ast
@@ -17,7 +17,7 @@ repos:
17
17
  - id: check-toml
18
18
 
19
19
  - repo: https://github.com/astral-sh/ruff-pre-commit
20
- rev: 'v0.11.8'
20
+ rev: 'v0.12.10'
21
21
  hooks:
22
22
  - id: ruff
23
23
  args: [--fix, --exit-non-zero-on-fix]
@@ -26,7 +26,7 @@ repos:
26
26
  types_or: [python, jupyter]
27
27
 
28
28
  - repo: https://github.com/pre-commit/mirrors-mypy
29
- rev: v1.15.0
29
+ rev: v1.17.1
30
30
  hooks:
31
31
  - id: mypy
32
32
  entry: python3 -m mypy --config-file pyproject.toml
@@ -1,4 +1,4 @@
1
- FROM nvidia/cuda:12.4.1-devel-ubuntu20.04
1
+ FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04
2
2
 
3
3
  # Non-interactive apt-get commands
4
4
  ARG DEBIAN_FRONTEND=noninteractive
@@ -6,8 +6,8 @@ ARG DEBIAN_FRONTEND=noninteractive
6
6
  # No GPUs visible during build
7
7
  ARG CUDA_VISIBLE_DEVICES=none
8
8
 
9
- # Specify CUDA architectures -> 7.5: RTX 6000 & T4, 8.0: A100, 8.6+PTX
10
- ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6+PTX"
9
+ # Specify CUDA architectures -> 7.5: Quadro RTX 6000 & T4, 8.0: A100, 8.6: A40, 8.9: L40S, 9.0: H100
10
+ ARG TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0+PTX"
11
11
 
12
12
  # Set the Python version
13
13
  ARG PYTHON_VERSION=3.10.12
@@ -35,20 +35,29 @@ RUN wget https://bootstrap.pypa.io/get-pip.py && \
35
35
  rm get-pip.py && \
36
36
  python3.10 -m pip install --upgrade pip setuptools wheel uv
37
37
 
38
+ # Install Infiniband/RDMA support
39
+ RUN apt-get update && apt-get install -y \
40
+ libibverbs1 libibverbs-dev ibverbs-utils \
41
+ librdmacm1 librdmacm-dev rdmacm-utils \
42
+ && rm -rf /var/lib/apt/lists/*
43
+
44
+ # Set up RDMA environment (these will persist in the final container)
45
+ ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
46
+ ENV UCX_NET_DEVICES=all
47
+ ENV NCCL_IB_DISABLE=0
48
+
38
49
  # Set up project
39
50
  WORKDIR /vec-inf
40
51
  COPY . /vec-inf
41
52
 
42
53
  # Install project dependencies with build requirements
43
- RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu121" uv pip install --system -e .[dev]
44
- # Install FlashAttention
45
- RUN python3.10 -m pip install flash-attn --no-build-isolation
46
- # Install FlashInfer
47
- RUN python3.10 -m pip install flashinfer-python -i https://flashinfer.ai/whl/cu124/torch2.6/
54
+ RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu128" uv pip install --system -e .[dev]
48
55
 
49
56
  # Final configuration
50
57
  RUN mkdir -p /vec-inf/nccl && \
51
58
  mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /vec-inf/nccl/libnccl.so.2.18.1
59
+ ENV VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
60
+ ENV NCCL_DEBUG=INFO
52
61
 
53
62
  # Set the default command to start an interactive shell
54
63
  CMD ["bash"]
@@ -0,0 +1,324 @@
1
+ # Model Weights Tracking
2
+
3
+ This document tracks all model weights available in the `/model-weights` directory on Killarney cluster and indicates which ones have existing configurations in the cached model config (`/model-weights/vec-inf-shared/models.yaml`). By default, `vec-inf` would use the cached model config. To request new model weights to be downloaded or model configuration to be added, please open an issue for "Model request".
4
+
5
+ **NOTE**: The [`models.yaml`](./vec_inf/config/models.yaml) file in the package is not always up to date with the latest cached model config on Killarney cluster, new model config would be added to the cached model config. `models.yaml` would be updated to reflect the cached model config when a new version of the package is released.
6
+
7
+ ## Legend
8
+ - ✅ **Configured**: Model has a complete configuration in `models.yaml`
9
+ - ❌ **Not Configured**: Model exists in `/model-weights` but lacks configuration
10
+
11
+ ---
12
+
13
+ ## Text Generation Models (LLM)
14
+
15
+ ### Cohere for AI: Command R
16
+ | Model | Configuration |
17
+ |:------|:-------------|
18
+ | `c4ai-command-r-plus-08-2024` | ✅ |
19
+ | `c4ai-command-r-08-2024` | ✅ |
20
+
21
+ ### Code Llama
22
+ | Model | Configuration |
23
+ |:------|:-------------|
24
+ | `CodeLlama-7b-hf` | ✅ |
25
+ | `CodeLlama-7b-Instruct-hf` | ✅ |
26
+ | `CodeLlama-13b-hf` | ✅ |
27
+ | `CodeLlama-13b-Instruct-hf` | ✅ |
28
+ | `CodeLlama-34b-hf` | ✅ |
29
+ | `CodeLlama-34b-Instruct-hf` | ✅ |
30
+ | `CodeLlama-70b-hf` | ✅ |
31
+ | `CodeLlama-70b-Instruct-hf` | ✅ |
32
+ | `CodeLlama-7b-Python-hf` | ❌ |
33
+ | `CodeLlama-13b-Python-hf` | ❌ |
34
+ | `CodeLlama-70b-Python-hf` | ❌ |
35
+
36
+ ### Google: Gemma
37
+ | Model | Configuration |
38
+ |:------|:-------------|
39
+ | `gemma-2b` | ❌ |
40
+ | `gemma-2b-it` | ❌ |
41
+ | `gemma-7b` | ❌ |
42
+ | `gemma-7b-it` | ❌ |
43
+ | `gemma-2-9b` | ✅ |
44
+ | `gemma-2-9b-it` | ✅ |
45
+ | `gemma-2-27b` | ✅ |
46
+ | `gemma-2-27b-it` | ✅ |
47
+ | `gemma-3-1b-it` | ❌ |
48
+ | `gemma-3-4b-it` | ❌ |
49
+ | `gemma-3-12b-it` | ❌ |
50
+ | `gemma-3-27b-it` | ❌ |
51
+
52
+ ### Meta: Llama 2
53
+ | Model | Configuration |
54
+ |:------|:-------------|
55
+ | `Llama-2-7b-hf` | ✅ |
56
+ | `Llama-2-7b-chat-hf` | ✅ |
57
+ | `Llama-2-13b-hf` | ✅ |
58
+ | `Llama-2-13b-chat-hf` | ✅ |
59
+ | `Llama-2-70b-hf` | ✅ |
60
+ | `Llama-2-70b-chat-hf` | ✅ |
61
+
62
+ ### Meta: Llama 3
63
+ | Model | Configuration |
64
+ |:------|:-------------|
65
+ | `Meta-Llama-3-8B` | ✅ |
66
+ | `Meta-Llama-3-8B-Instruct` | ✅ |
67
+ | `Meta-Llama-3-70B` | ✅ |
68
+ | `Meta-Llama-3-70B-Instruct` | ✅ |
69
+
70
+ ### Meta: Llama 3.1
71
+ | Model | Configuration |
72
+ |:------|:-------------|
73
+ | `Meta-Llama-3.1-8B` | ✅ |
74
+ | `Meta-Llama-3.1-8B-Instruct` | ✅ |
75
+ | `Meta-Llama-3.1-70B` | ✅ |
76
+ | `Meta-Llama-3.1-70B-Instruct` | ✅ |
77
+ | `Meta-Llama-3.1-405B-Instruct` | ✅ |
78
+
79
+ ### Meta: Llama 3.2
80
+ | Model | Configuration |
81
+ |:------|:-------------|
82
+ | `Llama-3.2-1B` | ✅ |
83
+ | `Llama-3.2-1B-Instruct` | ✅ |
84
+ | `Llama-3.2-3B` | ✅ |
85
+ | `Llama-3.2-3B-Instruct` | ✅ |
86
+
87
+ ### Meta: Llama 3.3
88
+ | Model | Configuration |
89
+ |:------|:-------------|
90
+ | `Llama-3.3-70B-Instruct` | ✅ |
91
+
92
+ ### Meta: Llama 4
93
+ | Model | Configuration |
94
+ |:------|:-------------|
95
+ | `Llama-4-Scout-17B-16E-Instruct` | ❌ |
96
+
97
+ ### Mistral AI: Mistral
98
+ | Model | Configuration |
99
+ |:------|:-------------|
100
+ | `Mistral-7B-v0.3` | ✅ |
101
+ | `Mistral-7B-Instruct-v0.1` | ✅ |
102
+ | `Mistral-7B-Instruct-v0.2` | ✅ |
103
+ | `Mistral-7B-Instruct-v0.3` | ✅ |
104
+ | `Mistral-Large-Instruct-2407` | ✅ |
105
+ | `Mistral-Large-Instruct-2411` | ✅ |
106
+
107
+ ### Mistral AI: Mixtral
108
+ | Model | Configuration |
109
+ |:------|:-------------|
110
+ | `Mixtral-8x7B-Instruct-v0.1` | ✅ |
111
+ | `Mixtral-8x22B-v0.1` | ✅ |
112
+ | `Mixtral-8x22B-Instruct-v0.1` | ✅ |
113
+
114
+ ### Microsoft: Phi
115
+ | Model | Configuration |
116
+ |:------|:-------------|
117
+ | `Phi-3-medium-128k-instruct` | ✅ |
118
+ | `phi-4` | ❌ |
119
+
120
+ ### Nvidia: Llama-3.1-Nemotron
121
+ | Model | Configuration |
122
+ |:------|:-------------|
123
+ | `Llama-3.1-Nemotron-70B-Instruct-HF` | ✅ |
124
+
125
+ ### Qwen: Qwen2.5
126
+ | Model | Configuration |
127
+ |:------|:-------------|
128
+ | `Qwen2.5-0.5B-Instruct` | ✅ |
129
+ | `Qwen2.5-1.5B-Instruct` | ✅ |
130
+ | `Qwen2.5-3B-Instruct` | ✅ |
131
+ | `Qwen2.5-7B-Instruct` | ✅ |
132
+ | `Qwen2.5-14B-Instruct` | ✅ |
133
+ | `Qwen2.5-32B-Instruct` | ✅ |
134
+ | `Qwen2.5-72B-Instruct` | ✅ |
135
+
136
+ ### Qwen: Qwen2.5-Math
137
+ | Model | Configuration |
138
+ |:------|:-------------|
139
+ | `Qwen2.5-Math-1.5B-Instruct` | ✅ |
140
+ | `Qwen2.5-Math-7B-Instruct` | ✅ |
141
+ | `Qwen2.5-Math-72B-Instruct` | ✅ |
142
+
143
+ ### Qwen: Qwen2.5-Coder
144
+ | Model | Configuration |
145
+ |:------|:-------------|
146
+ | `Qwen2.5-Coder-7B-Instruct` | ✅ |
147
+
148
+ ### Qwen: QwQ
149
+ | Model | Configuration |
150
+ |:------|:-------------|
151
+ | `QwQ-32B` | ✅ |
152
+
153
+ ### Qwen: Qwen2
154
+ | Model | Configuration |
155
+ |:------|:-------------|
156
+ | `Qwen2-1.5B-Instruct` | ❌ |
157
+ | `Qwen2-7B-Instruct` | ❌ |
158
+ | `Qwen2-Math-1.5B-Instruct` | ❌ |
159
+ | `Qwen2-Math-7B-Instruct` | ❌ |
160
+ | `Qwen2-Math-72B` | ❌ |
161
+ | `Qwen2-Math-72B-Instruct` | ❌ |
162
+ | `Qwen2-VL-7B-Instruct` | ❌ |
163
+
164
+ ### Qwen: Qwen3
165
+ | Model | Configuration |
166
+ |:------|:-------------|
167
+ | `Qwen3-14B` | ✅ |
168
+ | `Qwen3-8B` | ❌ |
169
+ | `Qwen3-32B` | ❌ |
170
+ | `Qwen3-235B-A22B` | ❌ |
171
+ | `Qwen3-Embedding-8B` | ❌ |
172
+
173
+ ### DeepSeek: DeepSeek-R1
174
+ | Model | Configuration |
175
+ |:------|:-------------|
176
+ | `DeepSeek-R1-Distill-Llama-8B` | ✅ |
177
+ | `DeepSeek-R1-Distill-Llama-70B` | ✅ |
178
+ | `DeepSeek-R1-Distill-Qwen-1.5B` | ✅ |
179
+ | `DeepSeek-R1-Distill-Qwen-7B` | ✅ |
180
+ | `DeepSeek-R1-Distill-Qwen-14B` | ✅ |
181
+ | `DeepSeek-R1-Distill-Qwen-32B` | ✅ |
182
+
183
+ ### DeepSeek: Other Models
184
+ | Model | Configuration |
185
+ |:------|:-------------|
186
+ | `DeepSeek-Coder-V2-Lite-Instruct` | ❌ |
187
+ | `deepseek-math-7b-instruct` | ❌ |
188
+
189
+ ### Other LLM Models
190
+ | Model | Configuration |
191
+ |:------|:-------------|
192
+ | `AI21-Jamba-1.5-Mini` | ❌ |
193
+ | `aya-expanse-32b` | ✅ (as Aya-Expanse-32B) |
194
+ | `gpt2-large` | ❌ |
195
+ | `gpt2-xl` | ❌ |
196
+ | `gpt-oss-120b` | ❌ |
197
+ | `instructblip-vicuna-7b` | ❌ |
198
+ | `internlm2-math-plus-7b` | ❌ |
199
+ | `Janus-Pro-7B` | ❌ |
200
+ | `Kimi-K2-Instruct` | ❌ |
201
+ | `Ministral-8B-Instruct-2410` | ❌ |
202
+ | `Molmo-7B-D-0924` | ✅ |
203
+ | `OLMo-1B-hf` | ❌ |
204
+ | `OLMo-7B-hf` | ❌ |
205
+ | `OLMo-7B-SFT` | ❌ |
206
+ | `pythia` | ❌ |
207
+ | `Qwen1.5-72B-Chat` | ❌ |
208
+ | `ReasonFlux-PRM-7B` | ❌ |
209
+ | `t5-large-lm-adapt` | ❌ |
210
+ | `t5-xl-lm-adapt` | ❌ |
211
+ | `mt5-xl-lm-adapt` | ❌ |
212
+
213
+ ---
214
+
215
+ ## Vision Language Models (VLM)
216
+
217
+ ### LLaVa
218
+ | Model | Configuration |
219
+ |:------|:-------------|
220
+ | `llava-1.5-7b-hf` | ✅ |
221
+ | `llava-1.5-13b-hf` | ✅ |
222
+ | `llava-v1.6-mistral-7b-hf` | ✅ |
223
+ | `llava-v1.6-34b-hf` | ✅ |
224
+ | `llava-med-v1.5-mistral-7b` | ❌ |
225
+
226
+ ### Microsoft: Phi 3 Vision
227
+ | Model | Configuration |
228
+ |:------|:-------------|
229
+ | `Phi-3-vision-128k-instruct` | ✅ |
230
+ | `Phi-3.5-vision-instruct` | ✅ |
231
+
232
+ ### Meta: Llama 3.2 Vision
233
+ | Model | Configuration |
234
+ |:------|:-------------|
235
+ | `Llama-3.2-11B-Vision` | ✅ |
236
+ | `Llama-3.2-11B-Vision-Instruct` | ✅ |
237
+ | `Llama-3.2-90B-Vision` | ✅ |
238
+ | `Llama-3.2-90B-Vision-Instruct` | ✅ |
239
+
240
+ ### Mistral: Pixtral
241
+ | Model | Configuration |
242
+ |:------|:-------------|
243
+ | `Pixtral-12B-2409` | ✅ |
244
+
245
+ ### OpenGVLab: InternVL2.5
246
+ | Model | Configuration |
247
+ |:------|:-------------|
248
+ | `InternVL2_5-8B` | ✅ |
249
+ | `InternVL2_5-26B` | ✅ |
250
+ | `InternVL2_5-38B` | ✅ |
251
+
252
+ ### THUDM: GLM-4
253
+ | Model | Configuration |
254
+ |:------|:-------------|
255
+ | `glm-4v-9b` | ✅ |
256
+
257
+ ### DeepSeek: DeepSeek-VL2
258
+ | Model | Configuration |
259
+ |:------|:-------------|
260
+ | `deepseek-vl2` | ✅ |
261
+ | `deepseek-vl2-small` | ✅ |
262
+
263
+ ### Other VLM Models
264
+ | Model | Configuration |
265
+ |:------|:-------------|
266
+ | `MiniCPM-Llama3-V-2_5` | ❌ |
267
+
268
+ ---
269
+
270
+ ## Text Embedding Models
271
+
272
+ ### Liang Wang: e5
273
+ | Model | Configuration |
274
+ |:------|:-------------|
275
+ | `e5-mistral-7b-instruct` | ✅ |
276
+
277
+ ### BAAI: bge
278
+ | Model | Configuration |
279
+ |:------|:-------------|
280
+ | `bge-base-en-v1.5` | ✅ |
281
+ | `bge-m3` | ❌ |
282
+ | `bge-multilingual-gemma2` | ❌ |
283
+
284
+ ### Sentence Transformers: MiniLM
285
+ | Model | Configuration |
286
+ |:------|:-------------|
287
+ | `all-MiniLM-L6-v2` | ✅ |
288
+
289
+ ### Other Embedding Models
290
+ | Model | Configuration |
291
+ |:------|:-------------|
292
+ | `data2vec` | ❌ |
293
+ | `gte-modernbert-base` | ❌ |
294
+ | `gte-Qwen2-7B-instruct` | ❌ |
295
+ | `m2-bert-80M-32k-retrieval` | ❌ |
296
+ | `m2-bert-80M-8k-retrieval` | ❌ |
297
+
298
+ ---
299
+
300
+ ## Reward Modeling Models
301
+
302
+ ### Qwen: Qwen2.5-Math
303
+ | Model | Configuration |
304
+ |:------|:-------------|
305
+ | `Qwen2.5-Math-RM-72B` | ✅ |
306
+ | `Qwen2.5-Math-PRM-7B` | ✅ |
307
+
308
+ ---
309
+
310
+ ## Multimodal Models
311
+
312
+ ### CLIP
313
+ | Model | Configuration |
314
+ |:------|:-------------|
315
+ | `clip-vit-base-patch16` | ❌ |
316
+ | `clip-vit-large-patch14-336` | ❌ |
317
+
318
+ ### Stable Diffusion
319
+ | Model | Configuration |
320
+ |:------|:-------------|
321
+ | `sd-v1-4-full-ema` | ❌ |
322
+ | `stable-diffusion-v1-4` | ❌ |
323
+
324
+ ---