truss 0.11.8rc6__py3-none-any.whl → 0.11.8rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truss might be problematic. Click here for more details.

@@ -329,9 +329,16 @@ pip install truss==0.10.8
329
329
  raise ValueError("Using fp8 context fmha requires paged context fmha")
330
330
  if (
331
331
  self.plugin_configuration.use_fp8_context_fmha
332
- and not self.quantization_type == TrussTRTLLMQuantizationType.FP8_KV
332
+ and self.quantization_type
333
+ not in (
334
+ TrussTRTLLMQuantizationType.FP8_KV,
335
+ TrussTRTLLMQuantizationType.FP4_KV,
336
+ )
333
337
  ):
334
- raise ValueError("Using fp8 context fmha requires fp8 kv cache dtype")
338
+ raise ValueError(
339
+ "Using fp8 context fmha requires fp8 kv, or fp4 with kv cache dtype"
340
+ )
341
+
335
342
  return self
336
343
 
337
344
  def _validate_speculator_config(self):
@@ -10,61 +10,63 @@ import httpx
10
10
  from endpoints import control_app
11
11
  from fastapi import FastAPI, Request, Response
12
12
  from fastapi.responses import JSONResponse
13
- from helpers.errors import ModelLoadFailed, PatchApplicatonError
14
- from helpers.inference_server_controller import InferenceServerController
15
- from helpers.inference_server_process_controller import InferenceServerProcessController
16
- from helpers.inference_server_starter import async_inference_server_startup_flow
17
- from helpers.truss_patch.model_container_patch_applier import ModelContainerPatchApplier
18
13
  from shared import log_config
19
14
  from starlette.datastructures import State
15
+ from starlette.middleware.base import BaseHTTPMiddleware
16
+
17
+ from truss.templates.control.control.helpers.errors import (
18
+ ModelLoadFailed,
19
+ PatchApplicatonError,
20
+ )
21
+ from truss.templates.control.control.helpers.inference_server_controller import (
22
+ InferenceServerController,
23
+ )
24
+ from truss.templates.control.control.helpers.inference_server_process_controller import (
25
+ InferenceServerProcessController,
26
+ )
27
+ from truss.templates.control.control.helpers.inference_server_starter import (
28
+ async_inference_server_startup_flow,
29
+ )
30
+ from truss.templates.control.control.helpers.truss_patch.model_container_patch_applier import (
31
+ ModelContainerPatchApplier,
32
+ )
20
33
 
21
34
  SANITIZED_EXCEPTION_FRAMES = 2
22
35
 
23
36
 
24
- def create_sanitized_traceback(error: Exception, num_frames: int) -> str:
25
- tb_lines = traceback.format_tb(error.__traceback__)
26
- if tb_lines and num_frames > 0:
27
- selected_frames = tb_lines[-num_frames:]
28
- return "".join(selected_frames).rstrip()
29
- else:
30
- return f"{type(error).__name__}: {error}"
31
-
32
-
33
- def sanitize_exception(num_frames=SANITIZED_EXCEPTION_FRAMES):
34
- def decorator(
35
- handler_func: Callable[[Request, Exception], Awaitable[Response]],
36
- ) -> Callable[[Request, Exception], Awaitable[Response]]:
37
- async def wrapper(request: Request, exc: Exception) -> Response:
38
- sanitized_traceback = create_sanitized_traceback(exc, num_frames)
39
- if hasattr(request.app.state, "logger"):
40
- request.app.state.logger.error(sanitized_traceback)
41
-
42
- return await handler_func(request, exc)
43
-
44
- return wrapper
45
-
46
- return decorator
47
-
48
-
49
- @sanitize_exception()
50
- async def handle_patch_error(request, exc):
51
- error_type = _camel_to_snake_case(type(exc).__name__)
52
- return JSONResponse(content={"error": {"type": error_type, "msg": str(exc)}})
53
-
54
-
55
- @sanitize_exception()
56
- async def generic_error_handler(request, exc):
57
- print("CALLED GENERIC ========== ")
58
- return JSONResponse(
59
- content={"error": {"type": "unknown", "msg": f"{type(exc)}: {exc}"}}
60
- )
61
-
62
-
63
- @sanitize_exception()
64
- async def handle_model_load_failed(request, error):
65
- # Model load failures should result in 503 status
66
- print("CALLED MODEL LOAD ========== ")
67
- return JSONResponse({"error": str(error)}, 503)
37
+ class SanitizedExceptionMiddleware(BaseHTTPMiddleware):
38
+ def __init__(self, app, num_frames: int = SANITIZED_EXCEPTION_FRAMES):
39
+ super().__init__(app)
40
+ self.num_frames = num_frames
41
+
42
+ async def dispatch(
43
+ self, request: Request, call_next: Callable[[Request], Awaitable[Response]]
44
+ ) -> Response:
45
+ try:
46
+ return await call_next(request)
47
+ except Exception as exc:
48
+ sanitized_traceback = self._create_sanitized_traceback(exc)
49
+ request.app.state.logger.error(sanitized_traceback)
50
+
51
+ if isinstance(exc, ModelLoadFailed):
52
+ return JSONResponse({"error": str(exc)}, status_code=503)
53
+ elif isinstance(exc, PatchApplicatonError):
54
+ error_type = _camel_to_snake_case(type(exc).__name__)
55
+ return JSONResponse(
56
+ {"error": {"type": error_type, "msg": str(exc)}}, status_code=400
57
+ )
58
+ else:
59
+ return JSONResponse(
60
+ {"error": {"type": "unknown", "msg": str(exc)}}, status_code=500
61
+ )
62
+
63
+ def _create_sanitized_traceback(self, error: Exception) -> str:
64
+ tb_lines = traceback.format_tb(error.__traceback__)
65
+ if tb_lines and self.num_frames > 0:
66
+ selected_frames = tb_lines[-self.num_frames :]
67
+ return "".join(selected_frames).rstrip()
68
+ else:
69
+ return f"{type(error).__name__}: {error}"
68
70
 
69
71
 
70
72
  def create_app(base_config: Dict):
@@ -115,14 +117,10 @@ def create_app(base_config: Dict):
115
117
  app = FastAPI(
116
118
  title="Truss Live Reload Server",
117
119
  on_startup=[start_background_inference_startup],
118
- exception_handlers={
119
- PatchApplicatonError: handle_patch_error,
120
- ModelLoadFailed: handle_model_load_failed,
121
- Exception: generic_error_handler,
122
- },
123
120
  )
124
121
  app.state = app_state
125
122
  app.include_router(control_app)
123
+ app.add_middleware(SanitizedExceptionMiddleware)
126
124
 
127
125
  @app.on_event("shutdown")
128
126
  def on_shutdown():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: truss
3
- Version: 0.11.8rc6
3
+ Version: 0.11.8rc8
4
4
  Summary: A seamless bridge from model development to model delivery
5
5
  Project-URL: Repository, https://github.com/basetenlabs/truss
6
6
  Project-URL: Homepage, https://truss.baseten.co
@@ -5,7 +5,7 @@ truss/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  truss/base/constants.py,sha256=sExArdnuGg83z83XMgaQ4b8SS3V_j_bJEpOATDGJzpE,3600
6
6
  truss/base/custom_types.py,sha256=FUSIT2lPOQb6gfg6IzT63YBV8r8L6NIZ0D74Fp3e_jQ,2835
7
7
  truss/base/errors.py,sha256=zDVLEvseTChdPP0oNhBBQCtQUtZJUaof5zeWMIjqz6o,691
8
- truss/base/trt_llm_config.py,sha256=CRz3AqGDAyv8YpcBWXUrnfjvNAauyo3yf8ZOGVsSt6g,32782
8
+ truss/base/trt_llm_config.py,sha256=-hRpRsbxnfDaKS-5112yT0iP6R0evOtoTvnn557cwvc,32926
9
9
  truss/base/truss_config.py,sha256=7CtiJIwMHtDU8Wzn8UTJUVVunD0pWFl4QUVycK2aIpY,28055
10
10
  truss/base/truss_spec.py,sha256=jFVF79CXoEEspl2kXBAPyi-rwISReIGTdobGpaIhwJw,5979
11
11
  truss/cli/chains_commands.py,sha256=Kpa5mCg6URAJQE2ZmZfVQFhjBHEitKT28tKiW0H6XAI,17406
@@ -73,7 +73,7 @@ truss/templates/copy_cache_files.Dockerfile.jinja,sha256=Os5zFdYLZ_AfCRGq4RcpVTO
73
73
  truss/templates/docker_server_requirements.txt,sha256=PyhOPKAmKW1N2vLvTfLMwsEtuGpoRrbWuNo7tT6v2Mc,18
74
74
  truss/templates/server.Dockerfile.jinja,sha256=CUYnF_hgxPGq2re7__0UPWlwzOHMoFkxp6NVKi3U16s,7071
75
75
  truss/templates/control/requirements.txt,sha256=nqqNmlTwFeV8sV4fqwItwzzd_egADBP_e-cEopXBJ4k,358
76
- truss/templates/control/control/application.py,sha256=FuL4DibeWy9ux81B5JhUnXuyu0Ro1t4UnmE-_W89gg4,4967
76
+ truss/templates/control/control/application.py,sha256=bmAMh1hxEPu509O7KRUyp_Vaz92xbzgA6vRHhaNt5ts,4979
77
77
  truss/templates/control/control/endpoints.py,sha256=VQ1lvZjFvR091yRkiFdvXw1Q7PiNGXT9rJwY7_sX6yg,11828
78
78
  truss/templates/control/control/server.py,sha256=R4Y219i1dcz0kkksN8obLoX-YXWGo9iW1igindyG50c,3128
79
79
  truss/templates/control/control/helpers/context_managers.py,sha256=W6dyFgLBhPa5meqrOb3w_phMtKfaJI-GhwUfpiycDc8,413
@@ -368,8 +368,8 @@ truss_train/deployment.py,sha256=lWWANSuzBWu2M4oK4qD7n-oVR1JKdmw2Pn5BJQHg-Ck,307
368
368
  truss_train/loader.py,sha256=0o66EjBaHc2YY4syxxHVR4ordJWs13lNXnKjKq2wq0U,1630
369
369
  truss_train/public_api.py,sha256=9N_NstiUlmBuLUwH_fNG_1x7OhGCytZLNvqKXBlStrM,1220
370
370
  truss_train/restore_from_checkpoint.py,sha256=8hdPm-WSgkt74HDPjvCjZMBpvA9MwtoYsxVjOoa7BaM,1176
371
- truss-0.11.8rc6.dist-info/METADATA,sha256=d2tJzaGAOT3N68nSAYVF3DmmN4SMdm29LYpmnvkjqmo,6680
372
- truss-0.11.8rc6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
373
- truss-0.11.8rc6.dist-info/entry_points.txt,sha256=-MwKfHHQHQ6j0HqIgvxrz3CehCmczDLTD-OsRHnjjuU,130
374
- truss-0.11.8rc6.dist-info/licenses/LICENSE,sha256=FTqGzu85i-uw1Gi8E_o0oD60bH9yQ_XIGtZbA1QUYiw,1064
375
- truss-0.11.8rc6.dist-info/RECORD,,
371
+ truss-0.11.8rc8.dist-info/METADATA,sha256=sFf-bspU7seibbvhZolfFQxsX-B6dKktsUVY1-InAew,6680
372
+ truss-0.11.8rc8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
373
+ truss-0.11.8rc8.dist-info/entry_points.txt,sha256=-MwKfHHQHQ6j0HqIgvxrz3CehCmczDLTD-OsRHnjjuU,130
374
+ truss-0.11.8rc8.dist-info/licenses/LICENSE,sha256=FTqGzu85i-uw1Gi8E_o0oD60bH9yQ_XIGtZbA1QUYiw,1064
375
+ truss-0.11.8rc8.dist-info/RECORD,,