PyPI - tuft - Versions diffs - 0.1.1__tar.gz → 0.1.3__tar.gz - Mend

tuft 0.1.1tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

tuft-0.1.3/.github/workflows/docker/docker-compose.yml ADDED Viewed

@@ -0,0 +1,57 @@
+services:
+  # use 2 nodes to simulate a cluster environment
+  tuft-node-1:
+    image: nvcr.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04
+    pull_policy: never
+    command: bash -c "
+      chmod 1777 /tmp && apt update && apt install -y --no-install-recommends \
+      build-essential \
+      curl git wget vim tmux net-tools \
+      python3 python3-pip python3-dev python3-packaging python3-venv \
+      libomp-dev infiniband-diags libibverbs-dev librdmacm-dev rdma-core perftest \
+      && rm -rf /var/lib/apt/lists/* \
+      && ln -sf /usr/bin/python3 /usr/bin/python \
+      && ln -sf /usr/bin/pip3 /usr/bin/pip \
+      && bash /workspace/scripts/install.sh --local-source /workspace \
+      && source /root/.local/bin/env \
+      && source /root/.tuft/venv/bin/activate \
+      && uv pip install .[dev] \
+      && ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
+    environment:
+      - HF_ENDPOINT=https://hf-mirror.com
+      - RAY_ADDRESS=auto
+      - TUFT_CHECKPOINT_DIR=/mnt/checkpoints
+      - TUFT_TEST_MODEL=/mnt/models/Qwen3-0.6B
+      - TUFT_TEST_MODEL_1=/mnt/models/Qwen3-0.6B
+      - TUFT_TEST_MODEL_2=/mnt/models/Qwen3-1.7B
+      - TUFT_DOCKER_UNITTEST=1
+      - TEST_REDIS_URL=redis://tuft-redis:6379
+      - VIRTUAL_ENV=/root/.tuft/venv
+    working_dir: /workspace
+    networks:
+      - tuft-network
+    volumes:
+      - tuft-volume:/mnt
+      - ../../..:/workspace
+    shm_size: "64G"
+    deploy:
+      resources:
+        reservations:
+          devices:
+          - driver: nvidia
+            device_ids: ['0', '1', '2', '3']
+            capabilities: [gpu]
+  tuft-redis:
+    image: redis:7.0
+    command: ["redis-server", "--save", "60", "1", "--loglevel", "warning"]
+    networks:
+      - tuft-network
+networks:
+  tuft-network:
+    driver: bridge
+volumes:
+  tuft-volume:
+    external: true

{tuft-0.1.1 → tuft-0.1.3}/.github/workflows/install-script.yml RENAMED Viewed

@@ -59,36 +59,42 @@ jobs:
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
-      - name: Test tuft (dry run - check config error)
+      - name: Test tuft launch --help
         run: |
           export PATH="${TUFT_HOME}/bin:$PATH"
-          # Should fail with config error, not import error
-          tuft 2>&1 | grep -q "\-\-config" || tuft 2>&1 | grep -q "config"
+          tuft launch --help
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
-      - name: Clean up installation
-        run: rm -rf "${TUFT_HOME}"
-        env:
-          TUFT_HOME: ${{ runner.temp }}/tuft
-  test-install-default-with-backend:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-      - name: Run install script (default includes backend)
+      - name: Test tuft launch requires config
         run: |
-          bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
+          export PATH="${TUFT_HOME}/bin:$PATH"
+          # Should fail with config error when no config provided
+          if tuft launch 2>&1; then
+            echo "Expected tuft launch to fail without config"
+            exit 1
+          fi
+          # Verify error message mentions config
+          tuft launch 2>&1 | grep -qi "config"
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
-      - name: Verify backend dependencies installed
+      - name: Test tuft launch with config file
         run: |
-          "${TUFT_HOME}/venv/bin/python" -c "import peft; print('peft imported successfully')"
-          "${TUFT_HOME}/venv/bin/python" -c "import redis; print('redis imported successfully')"
+          export PATH="${TUFT_HOME}/bin:$PATH"
+          # Create a minimal config file
+          cat > "${TUFT_HOME}/configs/tuft_config.yaml" << 'EOF'
+          model_owner: test
+          supported_models:
+            - model_name: test-model
+              model_path: /nonexistent/path
+              max_model_len: 1024
+          authorized_users:
+            test-key: test-user
+          EOF
+          # Launch should fail due to missing model, but get past config validation
+          # We just verify it doesn't fail on config parsing
+          tuft launch 2>&1 | grep -v "Configuration file must be provided" || true
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
@@ -97,25 +103,23 @@ jobs:
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
-  test-install-without-backend:
+  test-backend-dependencies:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
-      - name: Run install script without backend
+      - name: Run install script
         run: |
-          bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --without-backend
+          bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
-      - name: Verify minimal install (no peft)
+      - name: Verify backend dependencies installed
         run: |
-          # peft should NOT be installed in minimal mode
-          "${TUFT_HOME}/venv/bin/python" -c "import peft" 2>&1 && exit 1 || echo "peft not installed (expected)"
-          # tuft should still be importable
-          "${TUFT_HOME}/venv/bin/python" -c "import tuft; print('tuft imported successfully')"
+          "${TUFT_HOME}/venv/bin/python" -c "import peft; print('peft imported successfully')"
+          "${TUFT_HOME}/venv/bin/python" -c "import redis; print('redis imported successfully')"
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
@@ -150,13 +154,20 @@ jobs:
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
-      - name: Test upgrade command
+      - name: Test upgrade command (from PyPI)
         run: |
           export PATH="${TUFT_HOME}/bin:$PATH"
           tuft upgrade
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
+      - name: Test upgrade command (from local source)
+        run: |
+          export PATH="${TUFT_HOME}/bin:$PATH"
+          tuft upgrade --local-source "$GITHUB_WORKSPACE"
+        env:
+          TUFT_HOME: ${{ runner.temp }}/tuft
       - name: Clean up installation
         run: rm -rf "${TUFT_HOME}"
         env:
@@ -171,7 +182,7 @@ jobs:
       - name: Initial install
         run: |
-          bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --without-backend
+          bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
@@ -184,7 +195,7 @@ jobs:
       - name: Reinstall with --clean
         run: |
-          bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --without-backend --clean
+          bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --clean
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
@@ -202,36 +213,30 @@ jobs:
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
-  test-install-backend-command:
+  test-upgrade-from-source:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
-      - name: Install without backend first
-        run: |
-          bash scripts/install.sh --local-source "$GITHUB_WORKSPACE" --without-backend
-        env:
-          TUFT_HOME: ${{ runner.temp }}/tuft
-      - name: Verify peft is NOT installed
-        run: |
-          "${TUFT_HOME}/venv/bin/python" -c "import peft" 2>&1 && exit 1 || echo "peft not installed (expected)"
+      - name: Install tuft
+        run: bash scripts/install.sh --local-source "$GITHUB_WORKSPACE"
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
-      - name: Run install-backend command
+      - name: Test upgrade --from-source
         run: |
           export PATH="${TUFT_HOME}/bin:$PATH"
-          tuft install-backend
+          tuft upgrade --from-source
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft
-      - name: Verify backend dependencies now installed
+      - name: Verify tuft still works after upgrade
         run: |
-          "${TUFT_HOME}/venv/bin/python" -c "import peft; print('peft imported successfully')"
-          "${TUFT_HOME}/venv/bin/python" -c "import redis; print('redis imported successfully')"
+          export PATH="${TUFT_HOME}/bin:$PATH"
+          tuft version
+          tuft launch --help
         env:
           TUFT_HOME: ${{ runner.temp }}/tuft

{tuft-0.1.1 → tuft-0.1.3}/.github/workflows/unittest.yml RENAMED Viewed

@@ -31,11 +31,10 @@ jobs:
     - name: Check ray status
       working-directory: tuft-${{ github.run_id }}/.github/workflows/docker
       run: |
-        MAX_RETRIES=20
-        RETRY_INTERVAL=5
+        MAX_RETRIES=90
+        RETRY_INTERVAL=30
         for i in $(seq 1 $MAX_RETRIES); do
-          if docker compose exec tuft-node-1 bash -c "source /opt/venv/bin/activate && ray status" \
-            && docker compose exec tuft-node-2 bash -c "source /opt/venv/bin/activate && ray status"; then
+          if docker compose exec tuft-node-1 bash -c "source /root/.tuft/venv/bin/activate && ray status"; then
             break
           fi
           echo "Waiting for ray cluster to be ready... ($i/$MAX_RETRIES)"
@@ -51,7 +50,7 @@ jobs:
       # set a github env variable to indicate tests were run, so that subsequent steps can check it
       run: |
         echo "tests_run=true" >> $GITHUB_ENV
-        docker compose exec tuft-node-1 bash -c "source /opt/venv/bin/activate && pytest tests -v -s --gpu --basetemp /mnt/checkpoints --ctrf report.json"
+        docker compose exec tuft-node-1 bash -c "source /root/.tuft/venv/bin/activate && pytest tests -v -s --gpu --basetemp /mnt/checkpoints --ctrf report.json"
     - name: Convert report.json time to ms
       working-directory: tuft-${{ github.run_id }}

{tuft-0.1.1 → tuft-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tuft
-Version: 0.1.1
+Version: 0.1.3
 Summary: A multi-tenant fine-tuning platform for LLMs with Tinker-compatible API
 Author-email: TuFT Developers <tuft@list.alibaba-inc.com>
 License: MIT License
@@ -29,6 +29,7 @@ Requires-Python: >=3.11
 Requires-Dist: fastapi>=0.125.0
 Requires-Dist: httpx>=0.28.1
 Requires-Dist: numpy<2.0.0
+Requires-Dist: nvidia-ml-py>=13.0.0
 Requires-Dist: omegaconf>=2.3.0
 Requires-Dist: opentelemetry-api>=1.20.0
 Requires-Dist: opentelemetry-exporter-otlp>=1.20.0
@@ -36,7 +37,6 @@ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.41b0
 Requires-Dist: opentelemetry-instrumentation-logging>=0.41b0
 Requires-Dist: opentelemetry-sdk>=1.20.0
 Requires-Dist: psutil>=5.9.0
-Requires-Dist: pynvml>=11.5.0
 Requires-Dist: ray>=2.50.0
 Requires-Dist: tinker>=0.7.0
 Requires-Dist: transformers<5.0.0,>=4.57.3
@@ -300,7 +300,7 @@ uv pip install "tuft[dev,backend,persistence]"
 The CLI starts a FastAPI server:
 ```bash
-tuft --port 10610 --config /path/to/tuft_config.yaml
+tuft launch --port 10610 --config /path/to/tuft_config.yaml
 ```
 The config file `tuft_config.yaml` specifies server settings including available base models, authentication, persistence, and telemetry. Below is a minimal example.
@@ -340,7 +340,7 @@ you can use the pre-built Docker image.
         -p 10610:10610 \
         -v <host_dir>:/data \
         ghcr.io/agentscope-ai/tuft:latest \
-        tuft --port 10610 --config /data/tuft_config.yaml
+        tuft launch --port 10610 --config /data/tuft_config.yaml
     ```
     Please replace `<host_dir>` with a directory on your host machine where you want to store model checkpoints and other data.
@@ -378,77 +378,25 @@ We provide practical examples to demonstrate how to use TuFT for training and sa
 ## Persistence
-TuFT supports optional Redis-based persistence for server state. When enabled,
-the server can recover sessions, training runs, and pending futures after a restart.
+TuFT supports optional persistence for server state. When enabled, the server can recover sessions, training runs, sampling sessions, and futures after a restart (and then restore runtime model state from checkpoints).
-To use persistence, install the optional dependency:
+See [docs/persistence.md](docs/persistence.md) for full details (key layout, restore semantics, and safety checks).
 ```bash
-uv pip install tuft[persistence]
+uv pip install "tuft[persistence]"
 ```
-### Persistence Modes
-TuFT provides three persistence modes:
-| Mode | Description | Use Case |
-|------|-------------|----------|
-| `disabled` | No persistence, data in-memory only | Development, testing without state recovery |
-| `redis_url` | External Redis server | Production, multi-instance deployments |
-| `file_redis` | File-backed store | Demos, small-scale testing |
-### Configuration
-Add a `persistence` section to your `tuft_config.yaml` configuration file and choose one of the following modes.
-#### Mode 1: Disabled (Default)
-No configuration needed. All data is stored in memory and lost on restart.
-```yaml
-# tuft_config.yaml
-persistence:
-  mode: disabled
-```
-#### Mode 2: External Redis Server
-Use an external Redis server for production deployments:
 ```yaml
 # tuft_config.yaml
 persistence:
-  mode: redis_url
+  mode: REDIS
   redis_url: "redis://localhost:6379/0"
-  namespace: "tuft"
-```
-You can start a local Redis instance using Docker:
-```bash
-docker run -d --name TuFT-redis -p 6379:6379 redis:7-alpine
-```
-#### Mode 3: File-backed Store
-Use the file-backed store for demos or small-scale testing:
-```yaml
-# tuft_config.yaml
-persistence:
-  mode: file_redis
-  file_path: "~/.cache/tuft/file_redis.json"
-  namespace: "tuft"
+  namespace: "persistence-tuft-server"
 ```
 ## Observability (OpenTelemetry)
-TuFT supports optional OpenTelemetry integration for distributed tracing, metrics, and logging.
-This allows you to monitor your TuFT server using observability tools like SigNoz, Jaeger, or Grafana.
-### Configuration
-Add the following `telemetry` section to your `tuft_config.yaml` configuration file:
+TuFT supports optional OpenTelemetry integration for tracing, metrics, and logs. See [docs/telemetry.md](docs/telemetry.md) for details (what TuFT records, correlation keys, Ray context propagation, and collector setup).
 ```yaml
 # tuft_config.yaml
@@ -457,10 +405,6 @@ telemetry:
   service_name: tuft
   otlp_endpoint: http://localhost:4317  # Your OTLP collector endpoint
   resource_attributes: {}
-    # example:
-    # deployment.environment: production
-    # service.version: 1.0.0
-    # service.namespace: my-namespace
 ```
 Alternatively, use environment variables:

{tuft-0.1.1 → tuft-0.1.3}/README.md RENAMED Viewed

@@ -238,7 +238,7 @@ uv pip install "tuft[dev,backend,persistence]"
 The CLI starts a FastAPI server:
 ```bash
-tuft --port 10610 --config /path/to/tuft_config.yaml
+tuft launch --port 10610 --config /path/to/tuft_config.yaml
 ```
 The config file `tuft_config.yaml` specifies server settings including available base models, authentication, persistence, and telemetry. Below is a minimal example.
@@ -278,7 +278,7 @@ you can use the pre-built Docker image.
         -p 10610:10610 \
         -v <host_dir>:/data \
         ghcr.io/agentscope-ai/tuft:latest \
-        tuft --port 10610 --config /data/tuft_config.yaml
+        tuft launch --port 10610 --config /data/tuft_config.yaml
     ```
     Please replace `<host_dir>` with a directory on your host machine where you want to store model checkpoints and other data.
@@ -316,77 +316,25 @@ We provide practical examples to demonstrate how to use TuFT for training and sa
 ## Persistence
-TuFT supports optional Redis-based persistence for server state. When enabled,
-the server can recover sessions, training runs, and pending futures after a restart.
+TuFT supports optional persistence for server state. When enabled, the server can recover sessions, training runs, sampling sessions, and futures after a restart (and then restore runtime model state from checkpoints).
-To use persistence, install the optional dependency:
+See [docs/persistence.md](docs/persistence.md) for full details (key layout, restore semantics, and safety checks).
 ```bash
-uv pip install tuft[persistence]
+uv pip install "tuft[persistence]"
 ```
-### Persistence Modes
-TuFT provides three persistence modes:
-| Mode | Description | Use Case |
-|------|-------------|----------|
-| `disabled` | No persistence, data in-memory only | Development, testing without state recovery |
-| `redis_url` | External Redis server | Production, multi-instance deployments |
-| `file_redis` | File-backed store | Demos, small-scale testing |
-### Configuration
-Add a `persistence` section to your `tuft_config.yaml` configuration file and choose one of the following modes.
-#### Mode 1: Disabled (Default)
-No configuration needed. All data is stored in memory and lost on restart.
-```yaml
-# tuft_config.yaml
-persistence:
-  mode: disabled
-```
-#### Mode 2: External Redis Server
-Use an external Redis server for production deployments:
 ```yaml
 # tuft_config.yaml
 persistence:
-  mode: redis_url
+  mode: REDIS
   redis_url: "redis://localhost:6379/0"
-  namespace: "tuft"
-```
-You can start a local Redis instance using Docker:
-```bash
-docker run -d --name TuFT-redis -p 6379:6379 redis:7-alpine
-```
-#### Mode 3: File-backed Store
-Use the file-backed store for demos or small-scale testing:
-```yaml
-# tuft_config.yaml
-persistence:
-  mode: file_redis
-  file_path: "~/.cache/tuft/file_redis.json"
-  namespace: "tuft"
+  namespace: "persistence-tuft-server"
 ```
 ## Observability (OpenTelemetry)
-TuFT supports optional OpenTelemetry integration for distributed tracing, metrics, and logging.
-This allows you to monitor your TuFT server using observability tools like SigNoz, Jaeger, or Grafana.
-### Configuration
-Add the following `telemetry` section to your `tuft_config.yaml` configuration file:
+TuFT supports optional OpenTelemetry integration for tracing, metrics, and logs. See [docs/telemetry.md](docs/telemetry.md) for details (what TuFT records, correlation keys, Ray context propagation, and collector setup).
 ```yaml
 # tuft_config.yaml
@@ -395,10 +343,6 @@ telemetry:
   service_name: tuft
   otlp_endpoint: http://localhost:4317  # Your OTLP collector endpoint
   resource_attributes: {}
-    # example:
-    # deployment.environment: production
-    # service.version: 1.0.0
-    # service.namespace: my-namespace
 ```
 Alternatively, use environment variables:

{tuft-0.1.1 → tuft-0.1.3}/config/tuft_config.example.yaml RENAMED Viewed

@@ -4,7 +4,7 @@
 # Copy this file to your desired location and modify as needed.
 #
 # Usage:
-#   tuft --config /path/to/your/tuft_config.yaml
+#   tuft launch --config /path/to/your/tuft_config.yaml
 # =============================================================================
 # Checkpoint Directory
@@ -79,23 +79,38 @@ authorized_users:
 # Persistence Configuration
 # =============================================================================
 # Configure state persistence for recovery after server restart.
+# For detailed documentation, see the "Persistence" section in README.md.
 #
 # Available modes:
-#   - disabled: No persistence (default)
-#   - redis_url: External Redis server
-#   - file_redis: File-backed store
+#   - DISABLE: No persistence (default)
+#   - REDIS: External Redis server
+#   - FILE: File-backed store
 persistence:
-  mode: disabled  # Options: disabled, redis_url, file_redis
+  mode: DISABLE  # Options: DISABLE, REDIS, FILE
-  # For redis_url mode:
+  # For REDIS mode:
   # redis_url: "redis://localhost:6379/0"
-  # For file_redis mode:
+  # For FILE mode:
   # file_path: "~/.cache/tuft/file_redis.json"
-  # Namespace prefix for Redis keys (optional)
-  # namespace: "tuft"
+  # Namespace prefix for Redis keys. (optional, defaults to "persistence-tuft-server".)
+  # namespace: "persistence-tuft-server"
+  # TTL (Time-To-Live) for future records in seconds.
+  # Futures are short-lived async operation results that expire after this duration.
+  # Set to null for no expiry (not recommended for production).
+  # Default: 86400 (1 day)
+  # future_ttl_seconds: 86400
+  # Fields to validate on server restart for config consistency.
+  # For detailed documentation on available fields and config validation,
+  # see the "Configuration Validation" section in README.md.
+  # Defaults to ["SUPPORTED_MODELS"]. SUPPORTED_MODELS is always checked.
+  # check_fields:
+  #   - SUPPORTED_MODELS
+  #   - CHECKPOINT_DIR
 # =============================================================================
 # Telemetry Configuration (OpenTelemetry)

{tuft-0.1.1 → tuft-0.1.3}/docker/Dockerfile RENAMED Viewed

@@ -7,7 +7,7 @@
 #
 # Note:
 # This Dockerfile uses 'uv' to create a virtual environment for better package management.
-# The uv virtual environment is created at `/opt/venv`, use `source /opt/venv/bin/activate` to activate it.
+# The uv virtual environment is created at `/root/.tuft/venv`, use `source /root/.tuft/venv/bin/activate` to activate it.
 # Make sure to use `uv pip` to install packages within the virtual environment.
 FROM nvcr.io/nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04
@@ -23,13 +23,14 @@ RUN chmod 1777 /tmp && apt update && apt install -y --no-install-recommends \
     && ln -sf /usr/bin/python3 /usr/bin/python \
     && ln -sf /usr/bin/pip3 /usr/bin/pip
-ENV VIRTUAL_ENV=/opt/venv
+ENV VIRTUAL_ENV=/root/.tuft/venv
 # copy the TuFT dir into the workspace
 COPY ./pyproject.toml .
 COPY ./LICENSE .
 COPY ./README.md .
 COPY ./src ./src
+COPY ./scripts ./scripts
 #  Uncomment the following line if you want to use AliCloud Mirror to speed up pip install
 # ENV UV_DEFAULT_INDEX=http://mirrors.cloud.aliyuncs.com/pypi/simple/
@@ -37,14 +38,11 @@ COPY ./src ./src
 # Uncomment the following line to use a Hugging Face mirror if you have network connection problem with Hugging Face
 # ENV HF_ENDPOINT=https://hf-mirror.com
-# Install uv
-RUN pip install uv && uv venv ${VIRTUAL_ENV} --python=python3.12
-# Install minimal TuFT
-RUN . ${VIRTUAL_ENV}/bin/activate && uv pip install -e .[dev,backend,persistence]
-# Install flash_attn
-RUN . ${VIRTUAL_ENV}/bin/activate && uv pip install flash_attn==2.8.1 --no-build-isolation
+# Install
+RUN bash ./scripts/install.sh --local-source /workspace \
+    && . $HOME/.local/bin/env \
+    && . /root/.tuft/venv/bin/activate \
+    && uv pip install .[dev]
 ENTRYPOINT ["/bin/bash", "-c", "source ${VIRTUAL_ENV}/bin/activate && exec \"$@\"", "--"]
 CMD ["bash"]

tuft 0.1.1__tar.gz → 0.1.3__tar.gz

tuft 0.1.1tar.gz → 0.1.3tar.gz