truss 0.11.8rc7__py3-none-any.whl → 0.11.9rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truss might be problematic. Click here for more details.

@@ -329,9 +329,16 @@ pip install truss==0.10.8
329
329
  raise ValueError("Using fp8 context fmha requires paged context fmha")
330
330
  if (
331
331
  self.plugin_configuration.use_fp8_context_fmha
332
- and not self.quantization_type == TrussTRTLLMQuantizationType.FP8_KV
332
+ and self.quantization_type
333
+ not in (
334
+ TrussTRTLLMQuantizationType.FP8_KV,
335
+ TrussTRTLLMQuantizationType.FP4_KV,
336
+ )
333
337
  ):
334
- raise ValueError("Using fp8 context fmha requires fp8 kv cache dtype")
338
+ raise ValueError(
339
+ "Using fp8 context fmha requires fp8 kv, or fp4 with kv cache dtype"
340
+ )
341
+
335
342
  return self
336
343
 
337
344
  def _validate_speculator_config(self):
@@ -2,13 +2,12 @@ import asyncio
2
2
  import logging
3
3
  import logging.config
4
4
  import re
5
- import traceback
6
5
  from pathlib import Path
7
- from typing import Awaitable, Callable, Dict
6
+ from typing import Dict
8
7
 
9
8
  import httpx
10
9
  from endpoints import control_app
11
- from fastapi import FastAPI, Request, Response
10
+ from fastapi import FastAPI
12
11
  from fastapi.responses import JSONResponse
13
12
  from helpers.errors import ModelLoadFailed, PatchApplicatonError
14
13
  from helpers.inference_server_controller import InferenceServerController
@@ -17,45 +16,22 @@ from helpers.inference_server_starter import async_inference_server_startup_flow
17
16
  from helpers.truss_patch.model_container_patch_applier import ModelContainerPatchApplier
18
17
  from shared import log_config
19
18
  from starlette.datastructures import State
20
- from starlette.middleware.base import BaseHTTPMiddleware
21
-
22
- SANITIZED_EXCEPTION_FRAMES = 2
23
-
24
-
25
- class SanitizedExceptionMiddleware(BaseHTTPMiddleware):
26
- def __init__(self, app, num_frames: int = SANITIZED_EXCEPTION_FRAMES):
27
- super().__init__(app)
28
- self.num_frames = num_frames
29
-
30
- async def dispatch(
31
- self, request: Request, call_next: Callable[[Request], Awaitable[Response]]
32
- ) -> Response:
33
- try:
34
- return await call_next(request)
35
- except Exception as exc:
36
- sanitized_traceback = self._create_sanitized_traceback(exc)
37
- if hasattr(request.app.state, "logger"):
38
- request.app.state.logger.error(f"Error:\n{sanitized_traceback}")
39
-
40
- if isinstance(exc, ModelLoadFailed):
41
- return JSONResponse({"error": str(exc)}, status_code=503)
42
- elif isinstance(exc, PatchApplicatonError):
43
- error_type = _camel_to_snake_case(type(exc).__name__)
44
- return JSONResponse(
45
- {"error": {"type": error_type, "msg": str(exc)}}, status_code=400
46
- )
47
- else:
48
- return JSONResponse(
49
- {"error": {"type": "unknown", "msg": str(exc)}}, status_code=500
50
- )
51
-
52
- def _create_sanitized_traceback(self, error: Exception) -> str:
53
- tb_lines = traceback.format_tb(error.__traceback__)
54
- if tb_lines and self.num_frames > 0:
55
- selected_frames = tb_lines[-self.num_frames :]
56
- return "".join(selected_frames).rstrip()
57
- else:
58
- return f"{type(error).__name__}: {error}"
19
+
20
+
21
+ async def handle_patch_error(_, exc):
22
+ error_type = _camel_to_snake_case(type(exc).__name__)
23
+ return JSONResponse(content={"error": {"type": error_type, "msg": str(exc)}})
24
+
25
+
26
+ async def generic_error_handler(_, exc):
27
+ return JSONResponse(
28
+ content={"error": {"type": "unknown", "msg": f"{type(exc)}: {exc}"}}
29
+ )
30
+
31
+
32
+ async def handle_model_load_failed(_, error):
33
+ # Model load failures should result in 503 status
34
+ return JSONResponse({"error": str(error)}, 503)
59
35
 
60
36
 
61
37
  def create_app(base_config: Dict):
@@ -106,10 +82,14 @@ def create_app(base_config: Dict):
106
82
  app = FastAPI(
107
83
  title="Truss Live Reload Server",
108
84
  on_startup=[start_background_inference_startup],
85
+ exception_handlers={
86
+ PatchApplicatonError: handle_patch_error,
87
+ ModelLoadFailed: handle_model_load_failed,
88
+ Exception: generic_error_handler,
89
+ },
109
90
  )
110
91
  app.state = app_state
111
92
  app.include_router(control_app)
112
- app.add_middleware(SanitizedExceptionMiddleware)
113
93
 
114
94
  @app.on_event("shutdown")
115
95
  def on_shutdown():
@@ -5,7 +5,6 @@ from typing import Any, Callable, Dict, Optional, Protocol
5
5
  import httpx
6
6
  from fastapi import APIRouter, WebSocket
7
7
  from fastapi.responses import JSONResponse, StreamingResponse
8
- from helpers.errors import ModelLoadFailed, ModelNotReady
9
8
  from httpx_ws import AsyncWebSocketSession, WebSocketDisconnect, aconnect_ws
10
9
  from httpx_ws import _exceptions as httpx_ws_exceptions
11
10
  from starlette.requests import ClientDisconnect, Request
@@ -14,6 +13,11 @@ from starlette.websockets import WebSocketDisconnect as StartletteWebSocketDisco
14
13
  from tenacity import RetryCallState, Retrying, retry_if_exception_type, wait_fixed
15
14
  from wsproto.events import BytesMessage, TextMessage
16
15
 
16
+ from truss.templates.control.control.helpers.errors import (
17
+ ModelLoadFailed,
18
+ ModelNotReady,
19
+ )
20
+
17
21
  INFERENCE_SERVER_START_WAIT_SECS = 60
18
22
  BASE_RETRY_EXCEPTIONS = (
19
23
  retry_if_exception_type(httpx.ConnectError)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: truss
3
- Version: 0.11.8rc7
3
+ Version: 0.11.9rc1
4
4
  Summary: A seamless bridge from model development to model delivery
5
5
  Project-URL: Repository, https://github.com/basetenlabs/truss
6
6
  Project-URL: Homepage, https://truss.baseten.co
@@ -5,7 +5,7 @@ truss/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  truss/base/constants.py,sha256=sExArdnuGg83z83XMgaQ4b8SS3V_j_bJEpOATDGJzpE,3600
6
6
  truss/base/custom_types.py,sha256=FUSIT2lPOQb6gfg6IzT63YBV8r8L6NIZ0D74Fp3e_jQ,2835
7
7
  truss/base/errors.py,sha256=zDVLEvseTChdPP0oNhBBQCtQUtZJUaof5zeWMIjqz6o,691
8
- truss/base/trt_llm_config.py,sha256=CRz3AqGDAyv8YpcBWXUrnfjvNAauyo3yf8ZOGVsSt6g,32782
8
+ truss/base/trt_llm_config.py,sha256=-hRpRsbxnfDaKS-5112yT0iP6R0evOtoTvnn557cwvc,32926
9
9
  truss/base/truss_config.py,sha256=7CtiJIwMHtDU8Wzn8UTJUVVunD0pWFl4QUVycK2aIpY,28055
10
10
  truss/base/truss_spec.py,sha256=jFVF79CXoEEspl2kXBAPyi-rwISReIGTdobGpaIhwJw,5979
11
11
  truss/cli/chains_commands.py,sha256=Kpa5mCg6URAJQE2ZmZfVQFhjBHEitKT28tKiW0H6XAI,17406
@@ -73,8 +73,8 @@ truss/templates/copy_cache_files.Dockerfile.jinja,sha256=Os5zFdYLZ_AfCRGq4RcpVTO
73
73
  truss/templates/docker_server_requirements.txt,sha256=PyhOPKAmKW1N2vLvTfLMwsEtuGpoRrbWuNo7tT6v2Mc,18
74
74
  truss/templates/server.Dockerfile.jinja,sha256=CUYnF_hgxPGq2re7__0UPWlwzOHMoFkxp6NVKi3U16s,7071
75
75
  truss/templates/control/requirements.txt,sha256=nqqNmlTwFeV8sV4fqwItwzzd_egADBP_e-cEopXBJ4k,358
76
- truss/templates/control/control/application.py,sha256=XfA5udraulB0z6s4J_S05w0y1TJGcoMuC3jJOjbPVu4,4839
77
- truss/templates/control/control/endpoints.py,sha256=KzqsLVNJE6r6TCPW8D5FMCtsfHadTwR15A3z_viGxmM,11782
76
+ truss/templates/control/control/application.py,sha256=jYeta6hWe1SkfLL3W4IDmdYjg3ZuKqI_UagWYs5RB_E,3793
77
+ truss/templates/control/control/endpoints.py,sha256=VQ1lvZjFvR091yRkiFdvXw1Q7PiNGXT9rJwY7_sX6yg,11828
78
78
  truss/templates/control/control/server.py,sha256=R4Y219i1dcz0kkksN8obLoX-YXWGo9iW1igindyG50c,3128
79
79
  truss/templates/control/control/helpers/context_managers.py,sha256=W6dyFgLBhPa5meqrOb3w_phMtKfaJI-GhwUfpiycDc8,413
80
80
  truss/templates/control/control/helpers/custom_types.py,sha256=n_lTudtLTpy4oPV3aDdJ4X2rh3KCV5btYO9UnTeUouQ,5471
@@ -368,8 +368,8 @@ truss_train/deployment.py,sha256=lWWANSuzBWu2M4oK4qD7n-oVR1JKdmw2Pn5BJQHg-Ck,307
368
368
  truss_train/loader.py,sha256=0o66EjBaHc2YY4syxxHVR4ordJWs13lNXnKjKq2wq0U,1630
369
369
  truss_train/public_api.py,sha256=9N_NstiUlmBuLUwH_fNG_1x7OhGCytZLNvqKXBlStrM,1220
370
370
  truss_train/restore_from_checkpoint.py,sha256=8hdPm-WSgkt74HDPjvCjZMBpvA9MwtoYsxVjOoa7BaM,1176
371
- truss-0.11.8rc7.dist-info/METADATA,sha256=37bkH41Vl6H2ftd6R4aJZWnxqanKAupqM4uOgFNmYlA,6680
372
- truss-0.11.8rc7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
373
- truss-0.11.8rc7.dist-info/entry_points.txt,sha256=-MwKfHHQHQ6j0HqIgvxrz3CehCmczDLTD-OsRHnjjuU,130
374
- truss-0.11.8rc7.dist-info/licenses/LICENSE,sha256=FTqGzu85i-uw1Gi8E_o0oD60bH9yQ_XIGtZbA1QUYiw,1064
375
- truss-0.11.8rc7.dist-info/RECORD,,
371
+ truss-0.11.9rc1.dist-info/METADATA,sha256=nvg4yIXu46U0vuOQcptDUP_17YL3LGeFmsFmLPMgZbY,6680
372
+ truss-0.11.9rc1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
373
+ truss-0.11.9rc1.dist-info/entry_points.txt,sha256=-MwKfHHQHQ6j0HqIgvxrz3CehCmczDLTD-OsRHnjjuU,130
374
+ truss-0.11.9rc1.dist-info/licenses/LICENSE,sha256=FTqGzu85i-uw1Gi8E_o0oD60bH9yQ_XIGtZbA1QUYiw,1064
375
+ truss-0.11.9rc1.dist-info/RECORD,,