tuft 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tuft-0.1.2/.github/workflows/docker/docker-compose.yml +56 -0
- {tuft-0.1.1 → tuft-0.1.2}/.github/workflows/install-script.yml +51 -46
- {tuft-0.1.1 → tuft-0.1.2}/.github/workflows/unittest.yml +2 -3
- {tuft-0.1.1 → tuft-0.1.2}/PKG-INFO +2 -2
- {tuft-0.1.1 → tuft-0.1.2}/docker/Dockerfile +8 -10
- {tuft-0.1.1 → tuft-0.1.2}/pyproject.toml +2 -2
- {tuft-0.1.1 → tuft-0.1.2}/scripts/install.sh +63 -148
- tuft-0.1.2/src/tuft/__main__.py +7 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/cli.py +41 -8
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/config.py +6 -4
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/sampling_controller.py +3 -1
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/training_controller.py +6 -1
- {tuft-0.1.1 → tuft-0.1.2}/tests/helpers.py +47 -0
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_cli.py +1 -0
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_integration.py +3 -7
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_integration_persistence.py +4 -7
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_sampling_backend.py +3 -1
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_server.py +3 -1
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_state_controllers.py +3 -3
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_telemetry.py +3 -6
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_training_backend.py +15 -0
- tuft-0.1.1/.github/workflows/docker/docker-compose.yml +0 -72
- {tuft-0.1.1 → tuft-0.1.2}/.gitattributes +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/.github/workflows/checks.yml +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/.github/workflows/docker.yml +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/.github/workflows/publish.yml +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/.gitignore +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/.gitmodules +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/.pre-commit-config.yaml +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/.python-version +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/.secrets.baseline +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/LICENSE +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/README.md +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/assets/countdown_rl.png +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/assets/test_nll_sft.png +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/assets/train_mean_nll_sft.png +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/assets/tuft-logo-colorful.svg +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/config/tuft_config.example.yaml +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/docs/chat_sft.md +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/docs/countdown_rl.md +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/docs/how_to_write_tests.md +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/examples/chat_sft.ipynb +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/examples/countdown_rl.ipynb +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/scripts/install_flash_attn.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/__init__.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/auth.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/backend.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/backends/__init__.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/backends/base_backend.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/backends/hf_training_model.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/backends/sampling_backend.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/backends/training_backend.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/checkpoints.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/exceptions.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/futures.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/loss_fn/__init__.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/loss_fn/cispo.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/loss_fn/cross_entropy.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/loss_fn/dro.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/loss_fn/importance_sampling.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/loss_fn/ppo.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/persistence/__init__.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/persistence/file_redis.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/persistence/redis_store.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/server.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/state.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/telemetry/__init__.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/telemetry/metrics.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/telemetry/provider.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/src/tuft/telemetry/tracing.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/tests/__init__.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/tests/conftest.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/tests/data/models.yaml +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_checkpoints.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_file_redis.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_futures.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_loss_fn.py +0 -0
- {tuft-0.1.1 → tuft-0.1.2}/tests/test_persistence.py +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
services:
|
|
2
|
+
# use 2 nodes to simulate a cluster environment
|
|
3
|
+
tuft-node-1:
|
|
4
|
+
image: nvcr.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04
|
|
5
|
+
pull_policy: never
|
|
6
|
+
command: bash -c "
|
|
7
|
+
chmod 1777 /tmp && apt update && apt install -y --no-install-recommends \
|
|
8
|
+
build-essential \
|
|
9
|
+
curl git wget vim tmux net-tools \
|
|
10
|
+
python3 python3-pip python3-dev python3-packaging python3-venv \
|
|
11
|
+
libomp-dev infiniband-diags libibverbs-dev librdmacm-dev rdma-core perftest \
|
|
12
|
+
&& rm -rf /var/lib/apt/lists/* \
|
|
13
|
+
&& ln -sf /usr/bin/python3 /usr/bin/python \
|
|
14
|
+
&& ln -sf /usr/bin/pip3 /usr/bin/pip \
|
|
15
|
+
&& bash /workspace/scripts/install.sh --local-source /workspace \
|
|
16
|
+
&& source $HOME/.local/bin/env \
|
|
17
|
+
&& source /root/.tuft/venv/bin/activate \
|
|
18
|
+
&& uv pip install .[dev] \
|
|
19
|
+
&& ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
|
|
20
|
+
environment:
|
|
21
|
+
- HF_ENDPOINT=https://hf-mirror.com
|
|
22
|
+
- RAY_ADDRESS=auto
|
|
23
|
+
- TUFT_CHECKPOINT_DIR=/mnt/checkpoints
|
|
24
|
+
- TUFT_TEST_MODEL=/mnt/models/Qwen3-0.6B
|
|
25
|
+
- TUFT_TEST_MODEL_1=/mnt/models/Qwen3-0.6B
|
|
26
|
+
- TUFT_TEST_MODEL_2=/mnt/models/Qwen3-1.7B
|
|
27
|
+
- TEST_REDIS_URL=redis://tuft-redis:6379
|
|
28
|
+
- VIRTUAL_ENV=/root/.tuft/venv
|
|
29
|
+
working_dir: /workspace
|
|
30
|
+
networks:
|
|
31
|
+
- tuft-network
|
|
32
|
+
volumes:
|
|
33
|
+
- tuft-volume:/mnt
|
|
34
|
+
- ../../..:/workspace
|
|
35
|
+
shm_size: "64G"
|
|
36
|
+
deploy:
|
|
37
|
+
resources:
|
|
38
|
+
reservations:
|
|
39
|
+
devices:
|
|
40
|
+
- driver: nvidia
|
|
41
|
+
device_ids: ['0', '1', '2', '3']
|
|
42
|
+
capabilities: [gpu]
|
|
43
|
+
|
|
44
|
+
tuft-redis:
|
|
45
|
+
image: redis:7.0
|
|
46
|
+
command: ["redis-server", "--save", "60", "1", "--loglevel", "warning"]
|
|
47
|
+
networks:
|
|
48
|
+
- tuft-network
|
|
49
|
+
|
|
50
|
+
networks:
|
|
51
|
+
tuft-network:
|
|
52
|
+
driver: bridge
|
|
53
|
+
|
|
54
|
+
volumes:
|
|
55
|
+
tuft-volume:
|
|
56
|
+
external: true
|
|
@@ -59,36 +59,42 @@ jobs:
|
|
|
59
59
|
env:
|
|
60
60
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
61
61
|
|
|
62
|
-
- name: Test tuft
|
|
62
|
+
- name: Test tuft launch --help
|
|
63
63
|
run: |
|
|
64
64
|
export PATH="${TUFT_HOME}/bin:$PATH"
|
|
65
|
-
|
|
66
|
-
tuft 2>&1 | grep -q "\-\-config" || tuft 2>&1 | grep -q "config"
|
|
65
|
+
tuft launch --help
|
|
67
66
|
env:
|
|
68
67
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
69
68
|
|
|
70
|
-
- name:
|
|
71
|
-
run: rm -rf "${TUFT_HOME}"
|
|
72
|
-
env:
|
|
73
|
-
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
74
|
-
|
|
75
|
-
test-install-default-with-backend:
|
|
76
|
-
runs-on: ubuntu-latest
|
|
77
|
-
|
|
78
|
-
steps:
|
|
79
|
-
- name: Checkout code
|
|
80
|
-
uses: actions/checkout@v4
|
|
81
|
-
|
|
82
|
-
- name: Run install script (default includes backend)
|
|
69
|
+
- name: Test tuft launch requires config
|
|
83
70
|
run: |
|
|
84
|
-
|
|
71
|
+
export PATH="${TUFT_HOME}/bin:$PATH"
|
|
72
|
+
# Should fail with config error when no config provided
|
|
73
|
+
if tuft launch 2>&1; then
|
|
74
|
+
echo "Expected tuft launch to fail without config"
|
|
75
|
+
exit 1
|
|
76
|
+
fi
|
|
77
|
+
# Verify error message mentions config
|
|
78
|
+
tuft launch 2>&1 | grep -qi "config"
|
|
85
79
|
env:
|
|
86
80
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
87
81
|
|
|
88
|
-
- name:
|
|
82
|
+
- name: Test tuft launch with config file
|
|
89
83
|
run: |
|
|
90
|
-
"${TUFT_HOME}/
|
|
91
|
-
|
|
84
|
+
export PATH="${TUFT_HOME}/bin:$PATH"
|
|
85
|
+
# Create a minimal config file
|
|
86
|
+
cat > "${TUFT_HOME}/configs/tuft_config.yaml" << 'EOF'
|
|
87
|
+
model_owner: test
|
|
88
|
+
supported_models:
|
|
89
|
+
- model_name: test-model
|
|
90
|
+
model_path: /nonexistent/path
|
|
91
|
+
max_model_len: 1024
|
|
92
|
+
authorized_users:
|
|
93
|
+
test-key: test-user
|
|
94
|
+
EOF
|
|
95
|
+
# Launch should fail due to missing model, but get past config validation
|
|
96
|
+
# We just verify it doesn't fail on config parsing
|
|
97
|
+
tuft launch 2>&1 | grep -v "Configuration file must be provided" || true
|
|
92
98
|
env:
|
|
93
99
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
94
100
|
|
|
@@ -97,25 +103,23 @@ jobs:
|
|
|
97
103
|
env:
|
|
98
104
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
99
105
|
|
|
100
|
-
test-
|
|
106
|
+
test-backend-dependencies:
|
|
101
107
|
runs-on: ubuntu-latest
|
|
102
108
|
|
|
103
109
|
steps:
|
|
104
110
|
- name: Checkout code
|
|
105
111
|
uses: actions/checkout@v4
|
|
106
112
|
|
|
107
|
-
- name: Run install script
|
|
113
|
+
- name: Run install script
|
|
108
114
|
run: |
|
|
109
|
-
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
|
|
115
|
+
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
|
|
110
116
|
env:
|
|
111
117
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
112
118
|
|
|
113
|
-
- name: Verify
|
|
119
|
+
- name: Verify backend dependencies installed
|
|
114
120
|
run: |
|
|
115
|
-
|
|
116
|
-
"${TUFT_HOME}/venv/bin/python" -c "import
|
|
117
|
-
# tuft should still be importable
|
|
118
|
-
"${TUFT_HOME}/venv/bin/python" -c "import tuft; print('tuft imported successfully')"
|
|
121
|
+
"${TUFT_HOME}/venv/bin/python" -c "import peft; print('peft imported successfully')"
|
|
122
|
+
"${TUFT_HOME}/venv/bin/python" -c "import redis; print('redis imported successfully')"
|
|
119
123
|
env:
|
|
120
124
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
121
125
|
|
|
@@ -150,13 +154,20 @@ jobs:
|
|
|
150
154
|
env:
|
|
151
155
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
152
156
|
|
|
153
|
-
- name: Test upgrade command
|
|
157
|
+
- name: Test upgrade command (from PyPI)
|
|
154
158
|
run: |
|
|
155
159
|
export PATH="${TUFT_HOME}/bin:$PATH"
|
|
156
160
|
tuft upgrade
|
|
157
161
|
env:
|
|
158
162
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
159
163
|
|
|
164
|
+
- name: Test upgrade command (from local source)
|
|
165
|
+
run: |
|
|
166
|
+
export PATH="${TUFT_HOME}/bin:$PATH"
|
|
167
|
+
tuft upgrade --local-source "$GITHUB_WORKSPACE"
|
|
168
|
+
env:
|
|
169
|
+
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
170
|
+
|
|
160
171
|
- name: Clean up installation
|
|
161
172
|
run: rm -rf "${TUFT_HOME}"
|
|
162
173
|
env:
|
|
@@ -171,7 +182,7 @@ jobs:
|
|
|
171
182
|
|
|
172
183
|
- name: Initial install
|
|
173
184
|
run: |
|
|
174
|
-
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
|
|
185
|
+
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
|
|
175
186
|
env:
|
|
176
187
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
177
188
|
|
|
@@ -184,7 +195,7 @@ jobs:
|
|
|
184
195
|
|
|
185
196
|
- name: Reinstall with --clean
|
|
186
197
|
run: |
|
|
187
|
-
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --
|
|
198
|
+
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --clean
|
|
188
199
|
env:
|
|
189
200
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
190
201
|
|
|
@@ -202,36 +213,30 @@ jobs:
|
|
|
202
213
|
env:
|
|
203
214
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
204
215
|
|
|
205
|
-
test-
|
|
216
|
+
test-upgrade-from-source:
|
|
206
217
|
runs-on: ubuntu-latest
|
|
207
218
|
|
|
208
219
|
steps:
|
|
209
220
|
- name: Checkout code
|
|
210
221
|
uses: actions/checkout@v4
|
|
211
222
|
|
|
212
|
-
- name: Install
|
|
213
|
-
run:
|
|
214
|
-
bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --without-backend
|
|
215
|
-
env:
|
|
216
|
-
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
217
|
-
|
|
218
|
-
- name: Verify peft is NOT installed
|
|
219
|
-
run: |
|
|
220
|
-
"${TUFT_HOME}/venv/bin/python" -c "import peft" 2>&1 && exit 1 || echo "peft not installed (expected)"
|
|
223
|
+
- name: Install tuft
|
|
224
|
+
run: bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
|
|
221
225
|
env:
|
|
222
226
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
223
227
|
|
|
224
|
-
- name:
|
|
228
|
+
- name: Test upgrade --from-source
|
|
225
229
|
run: |
|
|
226
230
|
export PATH="${TUFT_HOME}/bin:$PATH"
|
|
227
|
-
tuft
|
|
231
|
+
tuft upgrade --from-source
|
|
228
232
|
env:
|
|
229
233
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
230
234
|
|
|
231
|
-
- name: Verify
|
|
235
|
+
- name: Verify tuft still works after upgrade
|
|
232
236
|
run: |
|
|
233
|
-
"${TUFT_HOME}/
|
|
234
|
-
|
|
237
|
+
export PATH="${TUFT_HOME}/bin:$PATH"
|
|
238
|
+
tuft version
|
|
239
|
+
tuft launch --help
|
|
235
240
|
env:
|
|
236
241
|
TUFT_HOME: ${{ runner.temp }}/tuft
|
|
237
242
|
|
|
@@ -34,8 +34,7 @@ jobs:
|
|
|
34
34
|
MAX_RETRIES=20
|
|
35
35
|
RETRY_INTERVAL=5
|
|
36
36
|
for i in $(seq 1 $MAX_RETRIES); do
|
|
37
|
-
if docker compose exec tuft-node-1 bash -c "source /
|
|
38
|
-
&& docker compose exec tuft-node-2 bash -c "source /opt/venv/bin/activate && ray status"; then
|
|
37
|
+
if docker compose exec tuft-node-1 bash -c "source /root/.tuft/venv/bin/activate && ray status"; then
|
|
39
38
|
break
|
|
40
39
|
fi
|
|
41
40
|
echo "Waiting for ray cluster to be ready... ($i/$MAX_RETRIES)"
|
|
@@ -51,7 +50,7 @@ jobs:
|
|
|
51
50
|
# set a github env variable to indicate tests were run, so that subsequent steps can check it
|
|
52
51
|
run: |
|
|
53
52
|
echo "tests_run=true" >> $GITHUB_ENV
|
|
54
|
-
docker compose exec tuft-node-1 bash -c "source /
|
|
53
|
+
docker compose exec tuft-node-1 bash -c "source /root/.tuft/venv/bin/activate && pytest tests -v -s --gpu --basetemp /mnt/checkpoints --ctrf report.json"
|
|
55
54
|
|
|
56
55
|
- name: Convert report.json time to ms
|
|
57
56
|
working-directory: tuft-${{ github.run_id }}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tuft
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: A multi-tenant fine-tuning platform for LLMs with Tinker-compatible API
|
|
5
5
|
Author-email: TuFT Developers <tuft@list.alibaba-inc.com>
|
|
6
6
|
License: MIT License
|
|
@@ -29,6 +29,7 @@ Requires-Python: >=3.11
|
|
|
29
29
|
Requires-Dist: fastapi>=0.125.0
|
|
30
30
|
Requires-Dist: httpx>=0.28.1
|
|
31
31
|
Requires-Dist: numpy<2.0.0
|
|
32
|
+
Requires-Dist: nvidia-ml-py>=13.0.0
|
|
32
33
|
Requires-Dist: omegaconf>=2.3.0
|
|
33
34
|
Requires-Dist: opentelemetry-api>=1.20.0
|
|
34
35
|
Requires-Dist: opentelemetry-exporter-otlp>=1.20.0
|
|
@@ -36,7 +37,6 @@ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.41b0
|
|
|
36
37
|
Requires-Dist: opentelemetry-instrumentation-logging>=0.41b0
|
|
37
38
|
Requires-Dist: opentelemetry-sdk>=1.20.0
|
|
38
39
|
Requires-Dist: psutil>=5.9.0
|
|
39
|
-
Requires-Dist: pynvml>=11.5.0
|
|
40
40
|
Requires-Dist: ray>=2.50.0
|
|
41
41
|
Requires-Dist: tinker>=0.7.0
|
|
42
42
|
Requires-Dist: transformers<5.0.0,>=4.57.3
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
#
|
|
8
8
|
# Note:
|
|
9
9
|
# This Dockerfile uses 'uv' to create a virtual environment for better package management.
|
|
10
|
-
# The uv virtual environment is created at `/
|
|
10
|
+
# The uv virtual environment is created at `/root/.tuft/venv`, use `source /root/.tuft/venv/bin/activate` to activate it.
|
|
11
11
|
# Make sure to use `uv pip` to install packages within the virtual environment.
|
|
12
12
|
|
|
13
13
|
FROM nvcr.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04
|
|
@@ -23,13 +23,14 @@ RUN chmod 1777 /tmp && apt update && apt install -y --no-install-recommends \
|
|
|
23
23
|
&& ln -sf /usr/bin/python3 /usr/bin/python \
|
|
24
24
|
&& ln -sf /usr/bin/pip3 /usr/bin/pip
|
|
25
25
|
|
|
26
|
-
ENV VIRTUAL_ENV=/
|
|
26
|
+
ENV VIRTUAL_ENV=/root/.tuft/venv
|
|
27
27
|
|
|
28
28
|
# copy the TuFT dir into the workspace
|
|
29
29
|
COPY ./pyproject.toml .
|
|
30
30
|
COPY ./LICENSE .
|
|
31
31
|
COPY ./README.md .
|
|
32
32
|
COPY ./src ./src
|
|
33
|
+
COPY ./scripts ./scripts
|
|
33
34
|
|
|
34
35
|
# Uncomment the following line if you want to use AliCloud Mirror to speed up pip install
|
|
35
36
|
# ENV UV_DEFAULT_INDEX=http://mirrors.cloud.aliyuncs.com/pypi/simple/
|
|
@@ -37,14 +38,11 @@ COPY ./src ./src
|
|
|
37
38
|
# Uncomment the following line to use a Hugging Face mirror if you have network connection problem with Hugging Face
|
|
38
39
|
# ENV HF_ENDPOINT=https://hf-mirror.com
|
|
39
40
|
|
|
40
|
-
# Install
|
|
41
|
-
RUN
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
# Install flash_attn
|
|
47
|
-
RUN . ${VIRTUAL_ENV}/bin/activate && uv pip install flash_attn==2.8.1 --no-build-isolation
|
|
41
|
+
# Install
|
|
42
|
+
RUN bash ./scripts/install.sh --local-source /workspace \
|
|
43
|
+
&& . $HOME/.local/bin/env \
|
|
44
|
+
&& . /root/.tuft/venv/bin/activate \
|
|
45
|
+
&& uv pip install .[dev]
|
|
48
46
|
|
|
49
47
|
ENTRYPOINT ["/bin/bash", "-c", "source ${VIRTUAL_ENV}/bin/activate && exec \"$@\"", "--"]
|
|
50
48
|
CMD ["bash"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "tuft"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.2"
|
|
4
4
|
description = "A multi-tenant fine-tuning platform for LLMs with Tinker-compatible API"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "TuFT Developers", email = "tuft@list.alibaba-inc.com" }
|
|
@@ -24,7 +24,7 @@ dependencies = [
|
|
|
24
24
|
"opentelemetry-instrumentation-fastapi>=0.41b0",
|
|
25
25
|
"opentelemetry-instrumentation-logging>=0.41b0",
|
|
26
26
|
"psutil>=5.9.0",
|
|
27
|
-
"
|
|
27
|
+
"nvidia-ml-py>=13.0.0",
|
|
28
28
|
]
|
|
29
29
|
|
|
30
30
|
[project.scripts]
|