truss 0.11.13rc3__py3-none-any.whl → 0.11.13rc500__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truss might be problematic. Click here for more details.
- truss/base/trt_llm_config.py +4 -1
- truss/templates/base.Dockerfile.jinja +3 -1
- truss/templates/server.Dockerfile.jinja +2 -2
- truss/templates/shared/lazy_data_resolver.py +30 -16
- truss/tests/test_data/server.Dockerfile +1 -0
- {truss-0.11.13rc3.dist-info → truss-0.11.13rc500.dist-info}/METADATA +2 -2
- {truss-0.11.13rc3.dist-info → truss-0.11.13rc500.dist-info}/RECORD +10 -10
- {truss-0.11.13rc3.dist-info → truss-0.11.13rc500.dist-info}/WHEEL +0 -0
- {truss-0.11.13rc3.dist-info → truss-0.11.13rc500.dist-info}/entry_points.txt +0 -0
- {truss-0.11.13rc3.dist-info → truss-0.11.13rc500.dist-info}/licenses/LICENSE +0 -0
truss/base/trt_llm_config.py
CHANGED
|
@@ -68,6 +68,7 @@ class TrussTRTLLMQuantizationType(str, Enum):
|
|
|
68
68
|
FP8_KV = "fp8_kv"
|
|
69
69
|
FP4 = "fp4"
|
|
70
70
|
FP4_KV = "fp4_kv"
|
|
71
|
+
FP4_MLP_ONLY = "fp4_mlp_only"
|
|
71
72
|
|
|
72
73
|
|
|
73
74
|
class TrussTRTLLMPluginConfiguration(PydanticTrTBaseModel):
|
|
@@ -713,7 +714,9 @@ def trt_llm_common_validation(config: "TrussConfig"):
|
|
|
713
714
|
"accelerators or newer (CUDA_COMPUTE>=89)"
|
|
714
715
|
)
|
|
715
716
|
elif trt_llm_config.build.quantization_type in [
|
|
716
|
-
TrussTRTLLMQuantizationType.FP4
|
|
717
|
+
TrussTRTLLMQuantizationType.FP4,
|
|
718
|
+
TrussTRTLLMQuantizationType.FP4_KV,
|
|
719
|
+
TrussTRTLLMQuantizationType.FP4_MLP_ONLY,
|
|
717
720
|
] and config.resources.accelerator.accelerator in [
|
|
718
721
|
truss_config.Accelerator.H100,
|
|
719
722
|
truss_config.Accelerator.L4,
|
|
@@ -115,9 +115,11 @@ WORKDIR $APP_HOME
|
|
|
115
115
|
{% endblock %}
|
|
116
116
|
|
|
117
117
|
|
|
118
|
+
{% set packages_dir = "/packages" %}
|
|
119
|
+
RUN mkdir -p {{ packages_dir }}
|
|
118
120
|
{% block bundled_packages_copy %}
|
|
119
121
|
{%- if bundled_packages_dir_exists %}
|
|
120
|
-
COPY --chown={{ default_owner }} ./{{ config.bundled_packages_dir }}
|
|
122
|
+
COPY --chown={{ default_owner }} ./{{ config.bundled_packages_dir }} {{ packages_dir }}
|
|
121
123
|
{%- endif %}
|
|
122
124
|
{% endblock %}
|
|
123
125
|
|
|
@@ -69,7 +69,7 @@ COPY --chown={{ default_owner }} ./{{ config.data_dir }} ${APP_HOME}/data
|
|
|
69
69
|
|
|
70
70
|
{%- if model_cache_v2 %}
|
|
71
71
|
{# v0.0.9, keep synced with server_requirements.txt #}
|
|
72
|
-
RUN curl -sSL --fail --retry 5 --retry-delay 2 -o /usr/local/bin/truss-transfer-cli https://github.com/basetenlabs/truss/releases/download/v0.11.
|
|
72
|
+
RUN curl -sSL --fail --retry 5 --retry-delay 2 -o /usr/local/bin/truss-transfer-cli https://github.com/basetenlabs/truss/releases/download/v0.11.13rc3/truss-transfer-cli-v0.11.13rc3-linux-x86_64-unknown-linux-musl
|
|
73
73
|
RUN chmod +x /usr/local/bin/truss-transfer-cli
|
|
74
74
|
RUN mkdir /static-bptr
|
|
75
75
|
RUN echo "hash {{model_cache_hash}}"
|
|
@@ -104,7 +104,7 @@ COPY --chown={{ default_owner }} ./{{ config.model_module_dir }} ${APP_HOME}/mod
|
|
|
104
104
|
{# Macro to change ownership of directories and switch to regular user #}
|
|
105
105
|
{%- macro chown_and_switch_to_regular_user_if_enabled(additional_chown_dirs=[]) -%}
|
|
106
106
|
{%- if non_root_user %}
|
|
107
|
-
RUN chown -R {{ app_username }}:{{ app_username }} {% for dir in additional_chown_dirs %}{{ dir }} {% endfor %}
|
|
107
|
+
RUN chown -R {{ app_username }}:{{ app_username }} ${HOME} ${APP_HOME} {{ packages_dir }} {% for dir in additional_chown_dirs %}{{ dir }} {% endfor %}
|
|
108
108
|
USER {{ app_username }}
|
|
109
109
|
{%- endif %} {#- endif non_root_user #}
|
|
110
110
|
{%- endmacro -%}
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import atexit
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
import os
|
|
5
4
|
import time
|
|
6
5
|
from dataclasses import dataclass
|
|
7
6
|
from functools import lru_cache
|
|
8
7
|
from pathlib import Path
|
|
9
8
|
from threading import Lock, Thread
|
|
10
|
-
from typing import Optional, Union
|
|
9
|
+
from typing import List, Optional, Union
|
|
11
10
|
|
|
12
11
|
try:
|
|
13
12
|
from prometheus_client import Counter, Gauge, Histogram
|
|
@@ -31,7 +30,7 @@ class TrussTransferStats:
|
|
|
31
30
|
total_manifest_size_bytes: int
|
|
32
31
|
total_download_time_secs: float
|
|
33
32
|
total_aggregated_mb_s: Optional[float]
|
|
34
|
-
file_downloads:
|
|
33
|
+
file_downloads: List[FileDownloadMetric]
|
|
35
34
|
b10fs_read_speed_mbps: Optional[float]
|
|
36
35
|
b10fs_decision_to_use: bool
|
|
37
36
|
b10fs_enabled: bool
|
|
@@ -70,26 +69,35 @@ class TrussTransferStats:
|
|
|
70
69
|
except Exception:
|
|
71
70
|
return None
|
|
72
71
|
|
|
73
|
-
def publish_to_prometheus(self):
|
|
72
|
+
def publish_to_prometheus(self, hidden_time: float = 0.0):
|
|
74
73
|
"""Publish transfer stats to Prometheus metrics. Only runs once."""
|
|
75
74
|
if not PROMETHEUS_AVAILABLE:
|
|
76
75
|
return
|
|
77
76
|
global METRICS_REGISTERED
|
|
78
|
-
|
|
77
|
+
|
|
78
|
+
if METRICS_REGISTERED:
|
|
79
|
+
logging.info(
|
|
80
|
+
"Model cache metrics already registered, skipping."
|
|
81
|
+
) # this should never happen
|
|
82
|
+
return
|
|
83
|
+
else:
|
|
79
84
|
# Ensure metrics are only registered once
|
|
80
85
|
METRICS_REGISTERED = True
|
|
81
86
|
|
|
82
|
-
# Define metrics with model_cache
|
|
87
|
+
# Define metrics with model_cache prefix
|
|
83
88
|
manifest_size_gauge = Gauge(
|
|
84
89
|
"model_cache_manifest_size_bytes", "Total manifest size in bytes"
|
|
85
90
|
)
|
|
91
|
+
# histograms have intentially wide buckets to capture a variety of download times
|
|
86
92
|
download_time_histogram = Histogram(
|
|
87
93
|
"model_cache_download_time_seconds",
|
|
88
94
|
"Total download time in seconds",
|
|
89
|
-
buckets=[
|
|
95
|
+
buckets=[0]
|
|
96
|
+
+ [
|
|
90
97
|
2**i
|
|
91
98
|
for i in range(-3, 11) # = [0.125, .. 2048] seconds
|
|
92
|
-
]
|
|
99
|
+
]
|
|
100
|
+
+ [float("inf")],
|
|
93
101
|
)
|
|
94
102
|
download_speed_gauge = Gauge(
|
|
95
103
|
"model_cache_download_speed_mbps", "Aggregated download speed in MB/s"
|
|
@@ -103,21 +111,29 @@ class TrussTransferStats:
|
|
|
103
111
|
"model_cache_file_size_bytes_total",
|
|
104
112
|
"Total size of downloaded files in bytes",
|
|
105
113
|
)
|
|
114
|
+
file_download_hidden_time_gauge = Gauge(
|
|
115
|
+
"model_cache_file_download_hidden_time_seconds",
|
|
116
|
+
"Total time hidden from user by starting the import before user code (seconds)",
|
|
117
|
+
)
|
|
106
118
|
file_download_time_histogram = Histogram(
|
|
107
119
|
"model_cache_file_download_time_seconds",
|
|
108
120
|
"File download time distribution",
|
|
109
|
-
buckets=[
|
|
121
|
+
buckets=[0]
|
|
122
|
+
+ [
|
|
110
123
|
2**i
|
|
111
124
|
for i in range(-3, 11) # = [0.125, .. 2048] seconds
|
|
112
|
-
]
|
|
125
|
+
]
|
|
126
|
+
+ [float("inf")],
|
|
113
127
|
)
|
|
114
128
|
file_download_speed_histogram = Histogram(
|
|
115
129
|
"model_cache_file_download_speed_mbps",
|
|
116
130
|
"File download speed distribution",
|
|
117
|
-
buckets=[
|
|
131
|
+
buckets=[0]
|
|
132
|
+
+ [
|
|
118
133
|
2**i
|
|
119
134
|
for i in range(-1, 12) # = [0.5, .. 4096] MB/s
|
|
120
|
-
]
|
|
135
|
+
]
|
|
136
|
+
+ [float("inf")],
|
|
121
137
|
)
|
|
122
138
|
|
|
123
139
|
# B10FS specific metrics
|
|
@@ -153,6 +169,7 @@ class TrussTransferStats:
|
|
|
153
169
|
# Set main transfer metrics
|
|
154
170
|
manifest_size_gauge.set(self.total_manifest_size_bytes)
|
|
155
171
|
download_time_histogram.observe(self.total_download_time_secs)
|
|
172
|
+
file_download_hidden_time_gauge.set(hidden_time)
|
|
156
173
|
|
|
157
174
|
if self.total_aggregated_mb_s is not None:
|
|
158
175
|
download_speed_gauge.set(self.total_aggregated_mb_s)
|
|
@@ -331,10 +348,7 @@ class LazyDataResolverV2:
|
|
|
331
348
|
if stats and publish_stats:
|
|
332
349
|
self.logger.info(f"model_cache: {stats}")
|
|
333
350
|
# Publish stats to Prometheus
|
|
334
|
-
|
|
335
|
-
os.getenv("TRUSS_MODEL_CACHE_PROMETHEUS", "0") == "1"
|
|
336
|
-
): # Hide behind feature flag for core-product to enabled.
|
|
337
|
-
stats.publish_to_prometheus()
|
|
351
|
+
stats.publish_to_prometheus()
|
|
338
352
|
self.logger.info(
|
|
339
353
|
f"model_cache: Fetch took {fetch_t:.2f} seconds, of which {start_lock_t:.2f} seconds were spent blocking."
|
|
340
354
|
)
|
|
@@ -36,6 +36,7 @@ COPY --chown= ./data ${APP_HOME}/data
|
|
|
36
36
|
COPY --chown= ./server ${APP_HOME}
|
|
37
37
|
COPY --chown= ./config.yaml ${APP_HOME}/config.yaml
|
|
38
38
|
COPY --chown= ./model ${APP_HOME}/model
|
|
39
|
+
RUN mkdir -p /packages
|
|
39
40
|
COPY --chown= ./packages /packages
|
|
40
41
|
ENV INFERENCE_SERVER_PORT="8080"
|
|
41
42
|
ENV SERVER_START_CMD="/usr/local/bin/python3 /app/main.py"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: truss
|
|
3
|
-
Version: 0.11.
|
|
3
|
+
Version: 0.11.13rc500
|
|
4
4
|
Summary: A seamless bridge from model development to model delivery
|
|
5
5
|
Project-URL: Repository, https://github.com/basetenlabs/truss
|
|
6
6
|
Project-URL: Homepage, https://truss.baseten.co
|
|
@@ -37,7 +37,7 @@ Requires-Dist: rich<14,>=13.4.2
|
|
|
37
37
|
Requires-Dist: ruff>=0.4.8
|
|
38
38
|
Requires-Dist: tenacity>=8.0.1
|
|
39
39
|
Requires-Dist: tomlkit>=0.13.2
|
|
40
|
-
Requires-Dist: truss-transfer<0.0.40,>=0.0.
|
|
40
|
+
Requires-Dist: truss-transfer<0.0.40,>=0.0.37
|
|
41
41
|
Requires-Dist: watchfiles<0.20,>=0.19.0
|
|
42
42
|
Description-Content-Type: text/markdown
|
|
43
43
|
|
|
@@ -5,7 +5,7 @@ truss/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
5
5
|
truss/base/constants.py,sha256=sExArdnuGg83z83XMgaQ4b8SS3V_j_bJEpOATDGJzpE,3600
|
|
6
6
|
truss/base/custom_types.py,sha256=FUSIT2lPOQb6gfg6IzT63YBV8r8L6NIZ0D74Fp3e_jQ,2835
|
|
7
7
|
truss/base/errors.py,sha256=zDVLEvseTChdPP0oNhBBQCtQUtZJUaof5zeWMIjqz6o,691
|
|
8
|
-
truss/base/trt_llm_config.py,sha256=
|
|
8
|
+
truss/base/trt_llm_config.py,sha256=rEtBVFg2QnNMxnaz11s5Z69dJB1w7Bpt48Wf6jSsVZI,33087
|
|
9
9
|
truss/base/truss_config.py,sha256=7CtiJIwMHtDU8Wzn8UTJUVVunD0pWFl4QUVycK2aIpY,28055
|
|
10
10
|
truss/base/truss_spec.py,sha256=jFVF79CXoEEspl2kXBAPyi-rwISReIGTdobGpaIhwJw,5979
|
|
11
11
|
truss/cli/chains_commands.py,sha256=Kpa5mCg6URAJQE2ZmZfVQFhjBHEitKT28tKiW0H6XAI,17406
|
|
@@ -66,12 +66,12 @@ truss/remote/baseten/utils/time.py,sha256=Ry9GMjYnbIGYVIGwtmv4V8ljWjvdcaCf5NOQzl
|
|
|
66
66
|
truss/remote/baseten/utils/transfer.py,sha256=d3VptuQb6M1nyS6kz0BAfeOYDLkMKUjatJXpY-mp-As,1548
|
|
67
67
|
truss/templates/README.md.jinja,sha256=N7CJdyldZuJamj5jLh47le0hFBdu9irVsTBqoxhPNPQ,2476
|
|
68
68
|
truss/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
|
-
truss/templates/base.Dockerfile.jinja,sha256=
|
|
69
|
+
truss/templates/base.Dockerfile.jinja,sha256=tdMmK5TeiQuYbz4gqbACM3R-l-mazqL9tAZtJ4sxC4g,5331
|
|
70
70
|
truss/templates/cache.Dockerfile.jinja,sha256=1qZqDo1phrcqi-Vwol-VafYJkADsBbQWU6huQ-_1x00,1146
|
|
71
71
|
truss/templates/cache_requirements.txt,sha256=xoPoJ-OVnf1z6oq_RVM3vCr3ionByyqMLj7wGs61nUs,87
|
|
72
72
|
truss/templates/copy_cache_files.Dockerfile.jinja,sha256=Os5zFdYLZ_AfCRGq4RcpVTObOTwL7zvmwYcvOzd_Zqo,126
|
|
73
73
|
truss/templates/docker_server_requirements.txt,sha256=PyhOPKAmKW1N2vLvTfLMwsEtuGpoRrbWuNo7tT6v2Mc,18
|
|
74
|
-
truss/templates/server.Dockerfile.jinja,sha256=
|
|
74
|
+
truss/templates/server.Dockerfile.jinja,sha256=Mu5_ZxuAknwaEOsF0l-XssA9pDg3pD3eLl6JBzNJ4rg,7091
|
|
75
75
|
truss/templates/control/requirements.txt,sha256=tJGr83WoE0CZm2FrloZ9VScK84q-_FTuVXjDYrexhW0,250
|
|
76
76
|
truss/templates/control/control/application.py,sha256=5Kam6M-XtfKGaXQz8cc3d0bwDkB80o2MskABWROx1gk,5321
|
|
77
77
|
truss/templates/control/control/endpoints.py,sha256=KzqsLVNJE6r6TCPW8D5FMCtsfHadTwR15A3z_viGxmM,11782
|
|
@@ -107,7 +107,7 @@ truss/templates/server/common/tracing.py,sha256=XSTXNoRtV8vXwveJoX3H32go0JKnLmzn
|
|
|
107
107
|
truss/templates/server/common/patches/whisper/patch.py,sha256=kDECQ-wmEpeAZFhUTQP457ofueeMsm7DgNy9tqinhJQ,2383
|
|
108
108
|
truss/templates/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
109
109
|
truss/templates/shared/dynamic_config_resolver.py,sha256=75s42NFhQI5jL7BqlJH_UkuQS7ptbtFh13f2nh6X5Wo,920
|
|
110
|
-
truss/templates/shared/lazy_data_resolver.py,sha256=
|
|
110
|
+
truss/templates/shared/lazy_data_resolver.py,sha256=HxrZz6X30j2LbsExYSqhuOGoYEffGpd7FPBtJexI7TQ,14064
|
|
111
111
|
truss/templates/shared/log_config.py,sha256=l9udyu4VKHZePlfK9LQEd5TOUUodPuehypsXRSUL4Ac,5411
|
|
112
112
|
truss/templates/shared/secrets_resolver.py,sha256=3prDe3Q06NTmUEe7KCW-W4TD1CzGck9lpDG789209z4,2110
|
|
113
113
|
truss/templates/shared/serialization.py,sha256=_WC_2PPkRi-MdTwxwjG8LKQptnHi4sANfpOlKWevqWc,3736
|
|
@@ -185,7 +185,7 @@ truss/tests/test_data/pima-indians-diabetes.csv,sha256=BvW3ws17ymhv2k-S6rX2Hn_2Q
|
|
|
185
185
|
truss/tests/test_data/readme_int_example.md,sha256=fuHvpLtdkJy1f4NAR_djotVBdzusHYNXc-Fwh588XAE,1586
|
|
186
186
|
truss/tests/test_data/readme_no_example.md,sha256=T2CzFMRvICXeX3_5XbFoqhHchcHGot-xM7izx34B3aQ,1607
|
|
187
187
|
truss/tests/test_data/readme_str_example.md,sha256=fP4pvMqgLdIapaOf_BgRiV0H7pw4so0RNxrlq5lbROE,1726
|
|
188
|
-
truss/tests/test_data/server.Dockerfile,sha256=
|
|
188
|
+
truss/tests/test_data/server.Dockerfile,sha256=KoQN5qBpiXL93qbjbG76kfRVapwfV5CNJzQcpjPocz0,2047
|
|
189
189
|
truss/tests/test_data/annotated_types_truss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
190
190
|
truss/tests/test_data/annotated_types_truss/config.yaml,sha256=B-ZyyjLLqtxGfXj2tkH68Hy7NOMB_coYvoWyWom61g0,147
|
|
191
191
|
truss/tests/test_data/annotated_types_truss/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -370,8 +370,8 @@ truss_train/deployment.py,sha256=lWWANSuzBWu2M4oK4qD7n-oVR1JKdmw2Pn5BJQHg-Ck,307
|
|
|
370
370
|
truss_train/loader.py,sha256=0o66EjBaHc2YY4syxxHVR4ordJWs13lNXnKjKq2wq0U,1630
|
|
371
371
|
truss_train/public_api.py,sha256=9N_NstiUlmBuLUwH_fNG_1x7OhGCytZLNvqKXBlStrM,1220
|
|
372
372
|
truss_train/restore_from_checkpoint.py,sha256=8hdPm-WSgkt74HDPjvCjZMBpvA9MwtoYsxVjOoa7BaM,1176
|
|
373
|
-
truss-0.11.
|
|
374
|
-
truss-0.11.
|
|
375
|
-
truss-0.11.
|
|
376
|
-
truss-0.11.
|
|
377
|
-
truss-0.11.
|
|
373
|
+
truss-0.11.13rc500.dist-info/METADATA,sha256=fO90Qd0TBo1ZirChnji7aZh2vHpqYmuUGsCYMZGe4Oo,6683
|
|
374
|
+
truss-0.11.13rc500.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
375
|
+
truss-0.11.13rc500.dist-info/entry_points.txt,sha256=-MwKfHHQHQ6j0HqIgvxrz3CehCmczDLTD-OsRHnjjuU,130
|
|
376
|
+
truss-0.11.13rc500.dist-info/licenses/LICENSE,sha256=FTqGzu85i-uw1Gi8E_o0oD60bH9yQ_XIGtZbA1QUYiw,1064
|
|
377
|
+
truss-0.11.13rc500.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|