PyPI - truss - Versions diffs - 0.11.6rc102__py3-none-any.whl → 0.11.24rc2__py3-none-any.whl - Mend

truss 0.11.6rc102py3-none-any.whl → 0.11.24rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

truss/api/__init__.py +5 -2
truss/base/constants.py +1 -0
truss/base/trt_llm_config.py +14 -3
truss/base/truss_config.py +19 -4
truss/cli/chains_commands.py +49 -1
truss/cli/cli.py +38 -7
truss/cli/logs/base_watcher.py +31 -12
truss/cli/logs/model_log_watcher.py +24 -1
truss/cli/remote_cli.py +29 -0
truss/cli/resolvers/chain_team_resolver.py +82 -0
truss/cli/resolvers/model_team_resolver.py +90 -0
truss/cli/resolvers/training_project_team_resolver.py +81 -0
truss/cli/train/cache.py +332 -0
truss/cli/train/core.py +57 -163
truss/cli/train/deploy_checkpoints/__init__.py +2 -2
truss/cli/train/deploy_checkpoints/deploy_checkpoints.py +236 -103
truss/cli/train/deploy_checkpoints/deploy_checkpoints_helpers.py +1 -52
truss/cli/train/deploy_checkpoints/deploy_full_checkpoints.py +1 -86
truss/cli/train/deploy_checkpoints/deploy_lora_checkpoints.py +1 -85
truss/cli/train/deploy_checkpoints/deploy_whisper_checkpoints.py +1 -56
truss/cli/train/types.py +18 -9
truss/cli/train_commands.py +180 -35
truss/cli/utils/common.py +40 -3
truss/contexts/image_builder/serving_image_builder.py +17 -4
truss/remote/baseten/api.py +215 -9
truss/remote/baseten/core.py +63 -7
truss/remote/baseten/custom_types.py +1 -0
truss/remote/baseten/remote.py +42 -2
truss/remote/baseten/service.py +0 -7
truss/remote/baseten/utils/transfer.py +5 -2
truss/templates/base.Dockerfile.jinja +8 -4
truss/templates/control/control/application.py +51 -26
truss/templates/control/control/endpoints.py +1 -5
truss/templates/control/control/helpers/inference_server_process_controller.py +10 -4
truss/templates/control/control/helpers/truss_patch/model_container_patch_applier.py +33 -18
truss/templates/control/control/server.py +1 -1
truss/templates/control/requirements.txt +1 -2
truss/templates/docker_server/proxy.conf.jinja +13 -0
truss/templates/docker_server/supervisord.conf.jinja +2 -1
truss/templates/no_build.Dockerfile.jinja +1 -0
truss/templates/server/requirements.txt +2 -3
truss/templates/server/truss_server.py +2 -5
truss/templates/server.Dockerfile.jinja +12 -12
truss/templates/shared/lazy_data_resolver.py +214 -2
truss/templates/shared/util.py +6 -5
truss/tests/cli/chains/test_chains_team_parameter.py +443 -0
truss/tests/cli/test_chains_cli.py +144 -0
truss/tests/cli/test_cli.py +134 -1
truss/tests/cli/test_cli_utils_common.py +11 -0
truss/tests/cli/test_model_team_resolver.py +279 -0
truss/tests/cli/train/test_cache_view.py +240 -3
truss/tests/cli/train/test_deploy_checkpoints.py +2 -846
truss/tests/cli/train/test_train_cli_core.py +2 -2
truss/tests/cli/train/test_train_team_parameter.py +395 -0
truss/tests/conftest.py +187 -0
truss/tests/contexts/image_builder/test_serving_image_builder.py +10 -5
truss/tests/remote/baseten/test_api.py +122 -3
truss/tests/remote/baseten/test_chain_upload.py +294 -0
truss/tests/remote/baseten/test_core.py +86 -0
truss/tests/remote/baseten/test_remote.py +216 -288
truss/tests/remote/baseten/test_service.py +56 -0
truss/tests/templates/control/control/conftest.py +20 -0
truss/tests/templates/control/control/test_endpoints.py +4 -0
truss/tests/templates/control/control/test_server.py +8 -24
truss/tests/templates/control/control/test_server_integration.py +4 -2
truss/tests/test_config.py +21 -12
truss/tests/test_data/server.Dockerfile +3 -1
truss/tests/test_data/test_build_commands_truss/__init__.py +0 -0
truss/tests/test_data/test_build_commands_truss/config.yaml +14 -0
truss/tests/test_data/test_build_commands_truss/model/model.py +12 -0
truss/tests/test_data/test_build_commands_truss/packages/constants/constants.py +1 -0
truss/tests/test_data/test_truss_server_model_cache_v1/config.yaml +1 -0
truss/tests/test_model_inference.py +13 -0
truss/tests/util/test_env_vars.py +8 -3
truss/util/__init__.py +0 -0
truss/util/env_vars.py +19 -8
truss/util/error_utils.py +37 -0
{truss-0.11.6rc102.dist-info → truss-0.11.24rc2.dist-info}/METADATA +2 -2
{truss-0.11.6rc102.dist-info → truss-0.11.24rc2.dist-info}/RECORD +88 -70
{truss-0.11.6rc102.dist-info → truss-0.11.24rc2.dist-info}/WHEEL +1 -1
truss_chains/deployment/deployment_client.py +16 -4
truss_chains/private_types.py +18 -0
truss_chains/public_api.py +3 -0
truss_train/definitions.py +6 -4
truss_train/deployment.py +43 -21
truss_train/public_api.py +4 -2
{truss-0.11.6rc102.dist-info → truss-0.11.24rc2.dist-info}/entry_points.txt +0 -0
{truss-0.11.6rc102.dist-info → truss-0.11.24rc2.dist-info}/licenses/LICENSE +0 -0

truss/remote/baseten/remote.py CHANGED Viewed

@@ -31,6 +31,7 @@ from truss.remote.baseten.core import (
     get_model_and_versions,
     get_prod_version_from_versions,
     get_truss_watch_state,
+    upload_chain_artifact,
     upload_truss,
     validate_truss_config_against_backend,
 )
@@ -68,6 +69,7 @@ class FinalPushData(custom_types.OracleData):
     origin: Optional[custom_types.ModelOrigin] = None
     environment: Optional[str] = None
     allow_truss_download: bool
+    team_id: Optional[str] = None
 class BasetenRemote(TrussRemote):
@@ -126,6 +128,8 @@ class BasetenRemote(TrussRemote):
         origin: Optional[custom_types.ModelOrigin] = None,
         environment: Optional[str] = None,
         progress_bar: Optional[Type["progress.Progress"]] = None,
+        deploy_timeout_minutes: Optional[int] = None,
+        team_id: Optional[str] = None,
     ) -> FinalPushData:
         if model_name.isspace():
             raise ValueError("Model name cannot be empty")
@@ -163,6 +167,13 @@ class BasetenRemote(TrussRemote):
                 "Deployment name must only contain alphanumeric, -, _ and . characters"
             )
+        if deploy_timeout_minutes is not None and (
+            deploy_timeout_minutes < 10 or deploy_timeout_minutes > 1440
+        ):
+            raise ValueError(
+                "deploy-timeout-minutes must be between 10 minutes and 1440 minutes (24 hours)"
+            )
         model_id = exists_model(self._api, model_name)
         if model_id is not None and disable_truss_download:
@@ -187,6 +198,7 @@ class BasetenRemote(TrussRemote):
             origin=origin,
             environment=environment,
             allow_truss_download=not disable_truss_download,
+            team_id=team_id,
         )
     def push(  # type: ignore
@@ -204,6 +216,8 @@ class BasetenRemote(TrussRemote):
         progress_bar: Optional[Type["progress.Progress"]] = None,
         include_git_info: bool = False,
         preserve_env_instance_type: bool = True,
+        deploy_timeout_minutes: Optional[int] = None,
+        team_id: Optional[str] = None,
     ) -> BasetenService:
         push_data = self._prepare_push(
             truss_handle=truss_handle,
@@ -216,6 +230,8 @@ class BasetenRemote(TrussRemote):
             origin=origin,
             environment=environment,
             progress_bar=progress_bar,
+            deploy_timeout_minutes=deploy_timeout_minutes,
+            team_id=team_id,
         )
         if include_git_info:
@@ -241,6 +257,8 @@ class BasetenRemote(TrussRemote):
             environment=push_data.environment,
             truss_user_env=truss_user_env,
             preserve_env_instance_type=preserve_env_instance_type,
+            deploy_timeout_minutes=deploy_timeout_minutes,
+            team_id=push_data.team_id,
         )
         if model_version_handle.instance_type_name:
@@ -263,9 +281,13 @@ class BasetenRemote(TrussRemote):
         entrypoint_artifact: custom_types.ChainletArtifact,
         dependency_artifacts: List[custom_types.ChainletArtifact],
         truss_user_env: b10_types.TrussUserEnv,
+        chain_root: Optional[Path] = None,
         publish: bool = False,
         environment: Optional[str] = None,
         progress_bar: Optional[Type["progress.Progress"]] = None,
+        disable_chain_download: bool = False,
+        deployment_name: Optional[str] = None,
+        team_id: Optional[str] = None,
     ) -> ChainDeploymentHandleAtomic:
         # If we are promoting a model to an environment after deploy, it must be published.
         # Draft models cannot be promoted.
@@ -285,6 +307,8 @@ class BasetenRemote(TrussRemote):
                 publish=publish,
                 origin=custom_types.ModelOrigin.CHAINS,
                 progress_bar=progress_bar,
+                disable_truss_download=disable_chain_download,
+                deployment_name=deployment_name,
             )
             oracle_data = custom_types.OracleData(
                 model_name=push_data.model_name,
@@ -300,6 +324,18 @@ class BasetenRemote(TrussRemote):
                 )
             )
+        # Upload raw chain artifact if chain_root is provided
+        raw_chain_s3_key = None
+        if chain_root is not None:
+            logging.info("Uploading source artifact")
+            # Create a tar file from the chain root directory
+            original_source_tar = archive_dir(dir=chain_root, progress_bar=progress_bar)
+            # Upload the chain artifact to S3
+            raw_chain_s3_key = upload_chain_artifact(
+                api=self._api,
+                serialize_file=original_source_tar,
+                progress_bar=progress_bar,
+            )
         chain_deployment_handle = create_chain_atomic(
             api=self._api,
             chain_name=chain_name,
@@ -308,6 +344,10 @@ class BasetenRemote(TrussRemote):
             is_draft=not publish,
             truss_user_env=truss_user_env,
             environment=environment,
+            original_source_artifact_s3_key=raw_chain_s3_key,
+            allow_truss_download=not disable_chain_download,
+            deployment_name=deployment_name,
+            team_id=team_id,
         )
         logging.info("Successfully pushed to baseten. Chain is building and deploying.")
         return chain_deployment_handle
@@ -571,5 +611,5 @@ class BasetenRemote(TrussRemote):
     ) -> PatchResult:
         return self._patch(watch_path, truss_ignore_patterns, console=None)
-    def upsert_training_project(self, training_project):
-        return self._api.upsert_training_project(training_project)
+    def upsert_training_project(self, training_project, team_id=None):
+        return self._api.upsert_training_project(training_project, team_id=team_id)

truss/remote/baseten/service.py CHANGED Viewed

@@ -137,13 +137,6 @@ class BasetenService(TrussService):
             return decode_content()
-        parsed_response = response.json()
-        if "error" in parsed_response:
-            # In the case that the model is in a non-ready state, the response
-            # will be a json with an `error` key.
-            return parsed_response
         return response.json()
     def authenticate(self) -> dict:

truss/remote/baseten/utils/transfer.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Optional, Type
 import boto3
 from boto3.s3.transfer import TransferConfig
-from truss.util.env_vars import override_env_vars
+from truss.util.env_vars import modify_env_vars
 if TYPE_CHECKING:
     from rich import progress
@@ -26,7 +26,10 @@ def multipart_upload_boto3(
 ) -> None:
     # In the CLI flow, ignore any local ~/.aws/config files,
     # which can interfere with uploading the Truss to S3.
-    with override_env_vars({"AWS_CONFIG_FILE": ""}):
+    aws_env_vars = set(
+        env_var for env_var in os.environ.keys() if env_var.startswith("AWS_")
+    )
+    with modify_env_vars(deletions=aws_env_vars):
         s3_resource = boto3.resource("s3", **credentials)
         filesize = os.stat(file_path).st_size

truss/templates/base.Dockerfile.jinja CHANGED Viewed

@@ -33,7 +33,7 @@ RUN useradd -u {{ app_user_uid }} -ms /bin/bash {{ app_username }}
 ENV DEBIAN_FRONTEND=noninteractive
-{%- set UV_VERSION = "0.7.19" %}
+{%- set UV_VERSION = "0.8.22" %}
 {#
 NB(nikhil): We use a semi-complex uv installation command across the board:
 - A generous UV_HTTP_TIMEOUT (5m) for packages that take a long time to install.
@@ -59,10 +59,12 @@ RUN {{ python_exec_path }} -c "import sys; \
 {% endblock %}
 {% block install_uv %}
-{# Install `uv` and `curl` if not already present in the image. #}
+{# Install `uv` and `curl` if not already present in the image. We validate the expected location for `uv` at the very end
+   due to limitations with `pipefail` in Docker context. #}
 RUN if ! command -v uv >/dev/null 2>&1; then \
     command -v curl >/dev/null 2>&1 || (apt update && apt install -y curl) && \
-    curl -LsSf --retry 5 --retry-delay 5 https://astral.sh/uv/{{ UV_VERSION }}/install.sh | sh; \
+    curl -LsSf --retry 5 --retry-delay 5 https://astral.sh/uv/{{ UV_VERSION }}/install.sh | sh && \
+    test -x ${HOME}/.local/bin/uv; \
 fi
 {# Add the user's local bin to the path, used by uv. #}
 ENV PATH=${PATH}:${HOME}/.local/bin
@@ -113,9 +115,11 @@ WORKDIR $APP_HOME
 {% endblock %}
+{% set packages_dir = "/packages" %}
+RUN mkdir -p {{ packages_dir }}
 {% block bundled_packages_copy %}
     {%- if bundled_packages_dir_exists %}
-COPY --chown={{ default_owner }} ./{{ config.bundled_packages_dir }} /packages
+COPY --chown={{ default_owner }} ./{{ config.bundled_packages_dir }} {{ packages_dir }}
 {%- endif %}
 {% endblock %}

truss/templates/control/control/application.py CHANGED Viewed

@@ -1,13 +1,15 @@
 import asyncio
+import http
 import logging
 import logging.config
 import re
+import traceback
 from pathlib import Path
-from typing import Dict
+from typing import Awaitable, Callable, Dict
 import httpx
 from endpoints import control_app
-from fastapi import FastAPI
+from fastapi import FastAPI, Request, Response
 from fastapi.responses import JSONResponse
 from helpers.errors import ModelLoadFailed, PatchApplicatonError
 from helpers.inference_server_controller import InferenceServerController
@@ -16,22 +18,50 @@ from helpers.inference_server_starter import async_inference_server_startup_flow
 from helpers.truss_patch.model_container_patch_applier import ModelContainerPatchApplier
 from shared import log_config
 from starlette.datastructures import State
-async def handle_patch_error(_, exc):
-    error_type = _camel_to_snake_case(type(exc).__name__)
-    return JSONResponse(content={"error": {"type": error_type, "msg": str(exc)}})
-async def generic_error_handler(_, exc):
-    return JSONResponse(
-        content={"error": {"type": "unknown", "msg": f"{type(exc)}: {exc}"}}
-    )
-async def handle_model_load_failed(_, error):
-    # Model load failures should result in 503 status
-    return JSONResponse({"error": str(error)}, 503)
+from starlette.middleware.base import BaseHTTPMiddleware
+SANITIZED_EXCEPTION_FRAMES = 2
+# NB(nikhil): SanitizedExceptionMiddleware will reduce the noise of control server stack frames, since
+# users often complain about the verbosity. Now, if any exceptions are explicitly raised during a proxied
+# request, we'll log the last two stack frames which should be sufficient for debugging while significantly
+# cutting down the volume.
+class SanitizedExceptionMiddleware(BaseHTTPMiddleware):
+    def __init__(self, app, num_frames: int = SANITIZED_EXCEPTION_FRAMES):
+        super().__init__(app)
+        self.num_frames = num_frames
+    async def dispatch(
+        self, request: Request, call_next: Callable[[Request], Awaitable[Response]]
+    ) -> Response:
+        try:
+            return await call_next(request)
+        except Exception as exc:
+            # NB(nikhil): Intentionally bypass error logging for ModelLoadFailed, since health checks
+            # are noisy. The underlying model logs for why the load failed will still be visible.
+            if isinstance(exc, ModelLoadFailed):
+                return JSONResponse(
+                    {"error": str(exc)}, status_code=http.HTTPStatus.BAD_GATEWAY.value
+                )
+            sanitized_traceback = self._create_sanitized_traceback(exc)
+            request.app.state.logger.error(sanitized_traceback)
+            if isinstance(exc, PatchApplicatonError):
+                error_type = _camel_to_snake_case(type(exc).__name__)
+                return JSONResponse({"error": {"type": error_type, "msg": str(exc)}})
+            else:
+                return JSONResponse(
+                    {"error": {"type": "unknown", "msg": str(exc)}},
+                    status_code=http.HTTPStatus.INTERNAL_SERVER_ERROR.value,
+                )
+    def _create_sanitized_traceback(self, error: Exception) -> str:
+        tb_lines = traceback.format_tb(error.__traceback__)
+        if tb_lines and self.num_frames > 0:
+            return "".join(tb_lines[-self.num_frames :])
+        return f"{type(error).__name__}: {error}"
 def create_app(base_config: Dict):
@@ -57,10 +87,9 @@ def create_app(base_config: Dict):
         base_url=f"http://localhost:{app_state.inference_server_port}", limits=limits
     )
-    pip_path = getattr(app_state, "pip_path", None)
+    uv_path = getattr(app_state, "uv_path", None)
     patch_applier = ModelContainerPatchApplier(
-        Path(app_state.inference_server_home), app_logger, pip_path
+        Path(app_state.inference_server_home), app_logger, uv_path
     )
     oversee_inference_server = getattr(app_state, "oversee_inference_server", True)
@@ -82,14 +111,10 @@ def create_app(base_config: Dict):
     app = FastAPI(
         title="Truss Live Reload Server",
         on_startup=[start_background_inference_startup],
-        exception_handlers={
-            PatchApplicatonError: handle_patch_error,
-            ModelLoadFailed: handle_model_load_failed,
-            Exception: generic_error_handler,
-        },
     )
     app.state = app_state
     app.include_router(control_app)
+    app.add_middleware(SanitizedExceptionMiddleware)
     @app.on_event("shutdown")
     def on_shutdown():

truss/templates/control/control/endpoints.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Callable, Dict, Optional, Protocol
 import httpx
 from fastapi import APIRouter, WebSocket
 from fastapi.responses import JSONResponse, StreamingResponse
+from helpers.errors import ModelLoadFailed, ModelNotReady
 from httpx_ws import AsyncWebSocketSession, WebSocketDisconnect, aconnect_ws
 from httpx_ws import _exceptions as httpx_ws_exceptions
 from starlette.requests import ClientDisconnect, Request
@@ -13,11 +14,6 @@ from starlette.websockets import WebSocketDisconnect as StartletteWebSocketDisco
 from tenacity import RetryCallState, Retrying, retry_if_exception_type, wait_fixed
 from wsproto.events import BytesMessage, TextMessage
-from truss.templates.control.control.helpers.errors import (
-    ModelLoadFailed,
-    ModelNotReady,
-)
 INFERENCE_SERVER_START_WAIT_SECS = 60
 BASE_RETRY_EXCEPTIONS = (
     retry_if_exception_type(httpx.ConnectError)

truss/templates/control/control/helpers/inference_server_process_controller.py CHANGED Viewed

@@ -5,6 +5,7 @@ from pathlib import Path
 from typing import List, Optional
 from helpers.context_managers import current_directory
+from shared.util import kill_child_processes
 INFERENCE_SERVER_FAILED_FILE = Path("~/inference_server_crashed.txt").expanduser()
 TERMINATION_TIMEOUT_SECS = 120.0
@@ -46,17 +47,22 @@ class InferenceServerProcessController:
             self._inference_server_ever_started = True
             self._logged_unrecoverable_since_last_restart = False
+    def _terminate_children_and_process(self):
+        """Kill child processes first, then parent. Prevents port binding conflicts."""
+        # Use a shorter timeout than the truss patch read timeout (=120s):
+        # see remote/baseten/api.py:_post_graphql_query()
+        kill_child_processes(self._inference_server_process.pid, timeout_seconds=30)
+        self._inference_server_process.terminate()
     def stop(self):
         if self._inference_server_process is not None:
-            self._inference_server_process.terminate()
+            self._terminate_children_and_process()
             self._inference_server_process.wait()
-            # Introduce delay to avoid failing to grab the port
-            time.sleep(3)
         self._inference_server_started = False
     def terminate_with_wait(self):
-        self._inference_server_process.terminate()
+        self._terminate_children_and_process()
         self._inference_server_terminated = True
         termination_check_attempts = int(
             TERMINATION_TIMEOUT_SECS / TERMINATION_CHECK_INTERVAL_SECS

truss/templates/control/control/helpers/truss_patch/model_container_patch_applier.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import logging
+import os
+import shutil
 import subprocess
 from pathlib import Path
 from typing import Optional
@@ -30,7 +32,7 @@ class ModelContainerPatchApplier:
         self,
         inference_server_home: Path,
         app_logger: logging.Logger,
-        pip_path: Optional[str] = None,  # Only meant for testing
+        uv_path: Optional[str] = None,  # Only meant for testing
     ) -> None:
         self._inference_server_home = inference_server_home
         self._model_module_dir = (
@@ -41,9 +43,19 @@ class ModelContainerPatchApplier:
         ).resolve()
         self._data_dir = self._inference_server_home / self._truss_config.data_dir
         self._app_logger = app_logger
-        self._pip_path_cached = None
-        if pip_path is not None:
-            self._pip_path_cached = "pip"
+        self._uv_path_cached = None
+        if uv_path is not None:
+            self._uv_path_cached = uv_path
+        self._python_executable = self._get_python_executable()
+    def _get_python_executable(self) -> str:
+        # NB(nikhil): `uv` requires the full path to the python interpreter for patching
+        # python modules. We expect PYTHON_EXECUTABLE to exist in all development images, but
+        # we fallback to python3 as a default.
+        python_executable = os.environ.get("PYTHON_EXECUTABLE", "python3")
+        full_executable_path = shutil.which(python_executable)
+        return full_executable_path or python_executable
     def __call__(self, patch: Patch, inf_env: dict):
         self._app_logger.debug(f"Applying patch {patch.to_dict()}")
@@ -79,10 +91,10 @@ class ModelContainerPatchApplier:
         return TrussConfig.from_yaml(self._inference_server_home / "config.yaml")
     @property
-    def _pip_path(self) -> str:
-        if self._pip_path_cached is None:
-            self._pip_path_cached = _identify_pip_path()
-        return self._pip_path_cached
+    def _uv_path(self) -> str:
+        if self._uv_path_cached is None:
+            self._uv_path_cached = _identify_uv_path()
+        return self._uv_path_cached
     def _apply_python_requirement_patch(
         self, python_requirement_patch: PythonRequirementPatch
@@ -95,20 +107,25 @@ class ModelContainerPatchApplier:
         if action == Action.REMOVE:
             subprocess.run(
                 [
-                    self._pip_path,
+                    self._uv_path,
+                    "pip",
                     "uninstall",
-                    "-y",
                     python_requirement_patch.requirement,
+                    "--python",
+                    self._python_executable,
                 ],
                 check=True,
             )
         elif action in [Action.ADD, Action.UPDATE]:
             subprocess.run(
                 [
-                    self._pip_path,
+                    self._uv_path,
+                    "pip",
                     "install",
                     python_requirement_patch.requirement,
                     "--upgrade",
+                    "--python",
+                    self._python_executable,
                 ],
                 check=True,
             )
@@ -158,11 +175,9 @@ class ModelContainerPatchApplier:
             raise ValueError(f"Unknown patch action {action}")
-def _identify_pip_path() -> str:
-    if Path("/usr/local/bin/pip3").exists():
-        return "/usr/local/bin/pip3"
-    if Path("/usr/local/bin/pip").exists():
-        return "/usr/local/bin/pip"
+def _identify_uv_path() -> str:
+    uv_path = shutil.which("uv")
+    if not uv_path:
+        raise RuntimeError("Unable to find `uv`, make sure it's installed.")
-    raise RuntimeError("Unable to find pip, make sure it's installed.")
+    return uv_path

truss/templates/control/control/server.py CHANGED Viewed

@@ -72,9 +72,9 @@ class ControlServer:
             # httptools installed, which does not work with our requests & version
             # of uvicorn.
             http="h11",
+            loop="uvloop",
             **extra_kwargs,
         )
-        cfg.setup_event_loop()
         server = uvicorn.Server(cfg)
         asyncio.run(server.serve())

truss/templates/control/requirements.txt CHANGED Viewed

@@ -7,7 +7,6 @@ python-json-logger>=2.0.2
 tenacity>=8.1.0
  # To avoid divergence, this should follow the latest release.
 truss==0.11.1
-# NB(nikhil): Uvicorn 0.36.0 has breaking changes for the event loop, so we pin to a lower version.
-uvicorn>=0.24.0,<0.36.0
+uvicorn>=0.24.0
 uvloop>=0.19.0
 websockets>=10.0

truss/templates/docker_server/proxy.conf.jinja CHANGED Viewed

@@ -45,6 +45,19 @@ server {
         proxy_pass http://127.0.0.1:{{server_port}};
     }
+    location ~ ^/v1/websocket$ {
+        proxy_redirect off;
+        proxy_read_timeout 18030s;
+        proxy_http_version 1.1;
+        proxy_set_header Upgrade $upgrade_header;
+        proxy_set_header Connection $connection_header;
+        rewrite ^/v1/websocket$ {{server_endpoint}} break;
+        proxy_pass http://127.0.0.1:{{server_port}};
+    }
     # Forward all other paths
     location / {
         proxy_redirect off;

truss/templates/docker_server/supervisord.conf.jinja CHANGED Viewed

@@ -7,8 +7,9 @@ logfile_maxbytes=0           ; No size limit on logfile (since logging is disabl
 [program:model-server]
 command={{start_command}}    ; Command to start the model server (provided by Jinja variable)
 startsecs=30                 ; Wait 30 seconds before assuming the server is running
+startretries=0               ; Do not retry if server fails to start
 autostart=true               ; Automatically start the program when supervisord starts
-autorestart=true             ; Always restart the program if it exits, no matter what the exit code
+autorestart=false             ; Don't restart the program
 stdout_logfile=/dev/fd/1     ; Send stdout to the first file descriptor (stdout)
 stdout_logfile_maxbytes=0    ; No size limit on stdout log
 redirect_stderr=true         ; Redirect stderr to stdout

truss/templates/no_build.Dockerfile.jinja ADDED Viewed

	@@ -0,0 +1 @@
1	+ FROM {{ config.base_image.image }}

truss/templates/server/requirements.txt CHANGED Viewed

@@ -18,8 +18,7 @@ psutil>=5.9.4
 python-json-logger>=2.0.2
 pyyaml>=6.0.0
 requests>=2.31.0
-truss-transfer==0.0.31
-# NB(nikhil): Uvicorn 0.36.0 has breaking changes for the event loop, so we pin to a lower version.
-uvicorn>=0.24.0,<0.36.0
+truss-transfer==0.0.38
+uvicorn>=0.24.0
 uvloop>=0.19.0
 websockets>=10.0

truss/templates/server/truss_server.py CHANGED Viewed

@@ -185,7 +185,7 @@ class BasetenEndpoints:
         request_id = request.headers.get("x-baseten-request-id")
         logging.debug(
-            f"Request received - {request.method} {method.__name__} "
+            f"[DEBUG] Request received - {request.method} /{method.__name__} "
             f", Request ID: {request_id}"
         )
         self.check_healthy()
@@ -470,9 +470,6 @@ class TrussServer:
             if self._config["runtime"].get("enable_debug_logs", False)
             else "INFO"
         )
-        logging.info(f"Starting truss server with log level {log_level}")
-        logging.info(f"Config: {self._config["runtime"]}")
         extra_kwargs = {}
         # We don't pass these if not set, to not override the default.
         if (
@@ -500,9 +497,9 @@ class TrussServer:
             timeout_graceful_shutdown=TIMEOUT_GRACEFUL_SHUTDOWN,
             log_config=log_config.make_log_config(log_level),
             ws_max_size=WS_MAX_MSG_SZ_BYTES,
+            loop="uvloop",
             **extra_kwargs,
         )
-        cfg.setup_event_loop()  # Call this so uvloop gets used
         server = uvicorn.Server(config=cfg)
         self._server = server
         asyncio.run(server.serve())

truss/templates/server.Dockerfile.jinja CHANGED Viewed

@@ -56,12 +56,6 @@ RUN mkdir -p {{ dst.parent }}; curl -L "{{ url }}" -o {{ dst }}
 {% endfor %} {#- endfor external_data_files #}
 {%- endif %} {#- endif external_data_files #}
-{%- if build_commands %}
-{% for command in build_commands %}
-RUN {% for secret,path in config.build.secret_to_path_mapping.items() %} --mount=type=secret,id={{ secret }},target={{ path }}{%- endfor %} {{ command }}
-{% endfor %} {#- endfor build_commands #}
-{%- endif %} {#- endif build_commands #}
 {# Copy data before code for better caching #}
 {%- if data_dir_exists %}
 COPY --chown={{ default_owner }} ./{{ config.data_dir }} ${APP_HOME}/data
@@ -69,7 +63,7 @@ COPY --chown={{ default_owner }} ./{{ config.data_dir }} ${APP_HOME}/data
 {%- if model_cache_v2 %}
 {# v0.0.9, keep synced with server_requirements.txt #}
-RUN curl -sSL --fail --retry 5 --retry-delay 2 -o /usr/local/bin/truss-transfer-cli https://github.com/basetenlabs/truss/releases/download/v0.10.11rc1/truss-transfer-cli-v0.10.11rc1-linux-x86_64-unknown-linux-musl
+RUN curl -sSL --fail --retry 5 --retry-delay 2 -o /usr/local/bin/truss-transfer-cli https://github.com/basetenlabs/truss/releases/download/v0.11.13rc3/truss-transfer-cli-v0.11.13rc3-linux-x86_64-unknown-linux-musl
 RUN chmod +x /usr/local/bin/truss-transfer-cli
 RUN mkdir /static-bptr
 RUN echo "hash {{model_cache_hash}}"
@@ -104,12 +98,18 @@ COPY --chown={{ default_owner }} ./{{ config.model_module_dir }} ${APP_HOME}/mod
 {# Macro to change ownership of directories and switch to regular user #}
 {%- macro chown_and_switch_to_regular_user_if_enabled(additional_chown_dirs=[]) -%}
 {%- if non_root_user %}
-RUN chown -R {{ app_username }}:{{ app_username }} {% for dir in additional_chown_dirs %}{{ dir }} {% endfor %}${HOME} ${APP_HOME}
+RUN chown -R {{ app_username }}:{{ app_username }} ${HOME} ${APP_HOME} {{ packages_dir }} {% for dir in additional_chown_dirs %}{{ dir }} {% endfor %}
 USER {{ app_username }}
 {%- endif %} {#- endif non_root_user #}
 {%- endmacro -%}
-    {%- if config.docker_server %}
+{%- if build_commands %}
+{% for command in build_commands %}
+RUN {% for secret,path in config.build.secret_to_path_mapping.items() %} --mount=type=secret,id={{ secret }},target={{ path }}{%- endfor %} {{ command }}
+{% endfor %} {#- endfor build_commands #}
+{%- endif %} {#- endif build_commands #}
+{%- if config.docker_server %}
 RUN apt-get update -y && apt-get install -y --no-install-recommends \
         curl nginx && rm -rf /var/lib/apt/lists/*
 COPY --chown={{ default_owner }} ./docker_server_requirements.txt ${APP_HOME}/docker_server_requirements.txt
@@ -131,7 +131,7 @@ RUN rm -f /etc/nginx/sites-enabled/default
 {{ chown_and_switch_to_regular_user_if_enabled(["/var/lib/nginx", "/var/log/nginx", "/run"]) }}
 ENTRYPOINT ["/docker_server/.venv/bin/supervisord", "-c", "{{ supervisor_config_path }}"]
-    {%- elif requires_live_reload %} {#- elif requires_live_reload #}
+{%- elif requires_live_reload %} {#- elif requires_live_reload #}
 ENV HASH_TRUSS="{{ truss_hash }}"
 ENV CONTROL_SERVER_PORT="8080"
 ENV INFERENCE_SERVER_PORT="8090"
@@ -139,11 +139,11 @@ ENV SERVER_START_CMD="/control/.env/bin/python /control/control/server.py"
 {{ chown_and_switch_to_regular_user_if_enabled() }}
 ENTRYPOINT ["/control/.env/bin/python", "/control/control/server.py"]
-    {%- else %} {#- else (default inference server) #}
+{%- else %} {#- else (default inference server) #}
 ENV INFERENCE_SERVER_PORT="8080"
 ENV SERVER_START_CMD="{{ python_executable }} /app/main.py"
 {{ chown_and_switch_to_regular_user_if_enabled() }}
 ENTRYPOINT ["{{ python_executable }}", "/app/main.py"]
-    {%- endif %} {#- endif config.docker_server / live_reload #}
+{%- endif %} {#- endif config.docker_server / live_reload #}
 {% endblock %} {#- endblock run #}

truss 0.11.6rc102__py3-none-any.whl → 0.11.24rc2__py3-none-any.whl

truss 0.11.6rc102py3-none-any.whl → 0.11.24rc2py3-none-any.whl