truss 0.11.13rc2__py3-none-any.whl → 0.11.13rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of truss might be problematic. Click here for more details.
- truss/base/trt_llm_config.py +4 -1
- truss/templates/server.Dockerfile.jinja +1 -1
- truss/templates/shared/lazy_data_resolver.py +143 -2
- {truss-0.11.13rc2.dist-info → truss-0.11.13rc4.dist-info}/METADATA +2 -2
- {truss-0.11.13rc2.dist-info → truss-0.11.13rc4.dist-info}/RECORD +8 -8
- {truss-0.11.13rc2.dist-info → truss-0.11.13rc4.dist-info}/WHEEL +0 -0
- {truss-0.11.13rc2.dist-info → truss-0.11.13rc4.dist-info}/entry_points.txt +0 -0
- {truss-0.11.13rc2.dist-info → truss-0.11.13rc4.dist-info}/licenses/LICENSE +0 -0
truss/base/trt_llm_config.py
CHANGED
|
@@ -68,6 +68,7 @@ class TrussTRTLLMQuantizationType(str, Enum):
|
|
|
68
68
|
FP8_KV = "fp8_kv"
|
|
69
69
|
FP4 = "fp4"
|
|
70
70
|
FP4_KV = "fp4_kv"
|
|
71
|
+
FP4_MLP_ONLY = "fp4_mlp_only"
|
|
71
72
|
|
|
72
73
|
|
|
73
74
|
class TrussTRTLLMPluginConfiguration(PydanticTrTBaseModel):
|
|
@@ -713,7 +714,9 @@ def trt_llm_common_validation(config: "TrussConfig"):
|
|
|
713
714
|
"accelerators or newer (CUDA_COMPUTE>=89)"
|
|
714
715
|
)
|
|
715
716
|
elif trt_llm_config.build.quantization_type in [
|
|
716
|
-
TrussTRTLLMQuantizationType.FP4
|
|
717
|
+
TrussTRTLLMQuantizationType.FP4,
|
|
718
|
+
TrussTRTLLMQuantizationType.FP4_KV,
|
|
719
|
+
TrussTRTLLMQuantizationType.FP4_MLP_ONLY,
|
|
717
720
|
] and config.resources.accelerator.accelerator in [
|
|
718
721
|
truss_config.Accelerator.H100,
|
|
719
722
|
truss_config.Accelerator.L4,
|
|
@@ -69,7 +69,7 @@ COPY --chown={{ default_owner }} ./{{ config.data_dir }} ${APP_HOME}/data
|
|
|
69
69
|
|
|
70
70
|
{%- if model_cache_v2 %}
|
|
71
71
|
{# v0.0.9, keep synced with server_requirements.txt #}
|
|
72
|
-
RUN curl -sSL --fail --retry 5 --retry-delay 2 -o /usr/local/bin/truss-transfer-cli https://github.com/basetenlabs/truss/releases/download/v0.11.
|
|
72
|
+
RUN curl -sSL --fail --retry 5 --retry-delay 2 -o /usr/local/bin/truss-transfer-cli https://github.com/basetenlabs/truss/releases/download/v0.11.13rc3/truss-transfer-cli-v0.11.13rc3-linux-x86_64-unknown-linux-musl
|
|
73
73
|
RUN chmod +x /usr/local/bin/truss-transfer-cli
|
|
74
74
|
RUN mkdir /static-bptr
|
|
75
75
|
RUN echo "hash {{model_cache_hash}}"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import atexit
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
import os
|
|
4
5
|
import time
|
|
5
6
|
from dataclasses import dataclass
|
|
6
7
|
from functools import lru_cache
|
|
@@ -8,6 +9,14 @@ from pathlib import Path
|
|
|
8
9
|
from threading import Lock, Thread
|
|
9
10
|
from typing import Optional, Union
|
|
10
11
|
|
|
12
|
+
try:
|
|
13
|
+
from prometheus_client import Counter, Gauge, Histogram
|
|
14
|
+
|
|
15
|
+
PROMETHEUS_AVAILABLE = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
PROMETHEUS_AVAILABLE = False
|
|
18
|
+
METRICS_REGISTERED = False
|
|
19
|
+
|
|
11
20
|
|
|
12
21
|
@dataclass(frozen=True)
|
|
13
22
|
class FileDownloadMetric:
|
|
@@ -61,6 +70,128 @@ class TrussTransferStats:
|
|
|
61
70
|
except Exception:
|
|
62
71
|
return None
|
|
63
72
|
|
|
73
|
+
def publish_to_prometheus(self):
|
|
74
|
+
"""Publish transfer stats to Prometheus metrics. Only runs once."""
|
|
75
|
+
if not PROMETHEUS_AVAILABLE:
|
|
76
|
+
return
|
|
77
|
+
global METRICS_REGISTERED
|
|
78
|
+
|
|
79
|
+
if METRICS_REGISTERED:
|
|
80
|
+
logging.info(
|
|
81
|
+
"Model cache metrics already registered, skipping."
|
|
82
|
+
) # this should never happen
|
|
83
|
+
return
|
|
84
|
+
else:
|
|
85
|
+
# Ensure metrics are only registered once
|
|
86
|
+
METRICS_REGISTERED = True
|
|
87
|
+
|
|
88
|
+
# Define metrics with model_cache prefix
|
|
89
|
+
manifest_size_gauge = Gauge(
|
|
90
|
+
"model_cache_manifest_size_bytes", "Total manifest size in bytes"
|
|
91
|
+
)
|
|
92
|
+
# histograms have intentially wide buckets to capture a variety of download times
|
|
93
|
+
download_time_histogram = Histogram(
|
|
94
|
+
"model_cache_download_time_seconds",
|
|
95
|
+
"Total download time in seconds",
|
|
96
|
+
buckets=[
|
|
97
|
+
2**i
|
|
98
|
+
for i in range(-3, 11) # = [0.125, .. 2048] seconds
|
|
99
|
+
],
|
|
100
|
+
)
|
|
101
|
+
download_speed_gauge = Gauge(
|
|
102
|
+
"model_cache_download_speed_mbps", "Aggregated download speed in MB/s"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# File download metrics (aggregated)
|
|
106
|
+
files_downloaded_counter = Counter(
|
|
107
|
+
"model_cache_files_downloaded_total", "Total number of files downloaded"
|
|
108
|
+
)
|
|
109
|
+
total_file_size_counter = Counter(
|
|
110
|
+
"model_cache_file_size_bytes_total",
|
|
111
|
+
"Total size of downloaded files in bytes",
|
|
112
|
+
)
|
|
113
|
+
file_download_time_histogram = Histogram(
|
|
114
|
+
"model_cache_file_download_time_seconds",
|
|
115
|
+
"File download time distribution",
|
|
116
|
+
buckets=[
|
|
117
|
+
2**i
|
|
118
|
+
for i in range(-3, 11) # = [0.125, .. 2048] seconds
|
|
119
|
+
],
|
|
120
|
+
)
|
|
121
|
+
file_download_speed_histogram = Histogram(
|
|
122
|
+
"model_cache_file_download_speed_mbps",
|
|
123
|
+
"File download speed distribution",
|
|
124
|
+
buckets=[
|
|
125
|
+
2**i
|
|
126
|
+
for i in range(-1, 12) # = [0.5, .. 4096] MB/s
|
|
127
|
+
],
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# B10FS specific metrics
|
|
131
|
+
b10fs_enabled_gauge = Gauge(
|
|
132
|
+
"model_cache_b10fs_enabled", "Whether B10FS is enabled"
|
|
133
|
+
)
|
|
134
|
+
b10fs_decision_gauge = Gauge(
|
|
135
|
+
"model_cache_b10fs_decision_to_use", "Whether B10FS was chosen for use"
|
|
136
|
+
)
|
|
137
|
+
b10fs_read_speed_gauge = Gauge(
|
|
138
|
+
"model_cache_b10fs_read_speed_mbps", "B10FS read speed in Mbps"
|
|
139
|
+
)
|
|
140
|
+
b10fs_hot_files_gauge = Gauge(
|
|
141
|
+
"model_cache_b10fs_hot_starts_files", "Number of hot start files"
|
|
142
|
+
)
|
|
143
|
+
b10fs_hot_bytes_gauge = Gauge(
|
|
144
|
+
"model_cache_b10fs_hot_starts_bytes", "Number of hot start bytes"
|
|
145
|
+
)
|
|
146
|
+
b10fs_cold_files_gauge = Gauge(
|
|
147
|
+
"model_cache_b10fs_cold_starts_files", "Number of cold start files"
|
|
148
|
+
)
|
|
149
|
+
b10fs_cold_bytes_gauge = Gauge(
|
|
150
|
+
"model_cache_b10fs_cold_starts_bytes", "Number of cold start bytes"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Transfer success metric
|
|
154
|
+
transfer_success_counter = Counter(
|
|
155
|
+
"model_cache_transfer_success_total",
|
|
156
|
+
"Total successful transfers",
|
|
157
|
+
["success"],
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Set main transfer metrics
|
|
161
|
+
manifest_size_gauge.set(self.total_manifest_size_bytes)
|
|
162
|
+
download_time_histogram.observe(self.total_download_time_secs)
|
|
163
|
+
|
|
164
|
+
if self.total_aggregated_mb_s is not None:
|
|
165
|
+
download_speed_gauge.set(self.total_aggregated_mb_s)
|
|
166
|
+
|
|
167
|
+
# Aggregate file download metrics
|
|
168
|
+
total_files = len(self.file_downloads)
|
|
169
|
+
total_file_bytes = sum(fd.file_size_bytes for fd in self.file_downloads)
|
|
170
|
+
|
|
171
|
+
files_downloaded_counter.inc(total_files)
|
|
172
|
+
total_file_size_counter.inc(total_file_bytes)
|
|
173
|
+
|
|
174
|
+
# Record individual file metrics for distribution
|
|
175
|
+
for fd in self.file_downloads:
|
|
176
|
+
if fd.file_size_bytes > 1 * 1024 * 1024: # Only log files larger than 1MB
|
|
177
|
+
file_download_time_histogram.observe(fd.download_time_secs)
|
|
178
|
+
file_download_speed_histogram.observe(fd.download_speed_mb_s)
|
|
179
|
+
|
|
180
|
+
# B10FS metrics
|
|
181
|
+
b10fs_enabled_gauge.set(1 if self.b10fs_enabled else 0)
|
|
182
|
+
b10fs_decision_gauge.set(1 if self.b10fs_decision_to_use else 0)
|
|
183
|
+
|
|
184
|
+
if self.b10fs_read_speed_mbps is not None:
|
|
185
|
+
b10fs_read_speed_gauge.set(self.b10fs_read_speed_mbps)
|
|
186
|
+
|
|
187
|
+
b10fs_hot_files_gauge.set(self.b10fs_hot_starts_files)
|
|
188
|
+
b10fs_hot_bytes_gauge.set(self.b10fs_hot_starts_bytes)
|
|
189
|
+
b10fs_cold_files_gauge.set(self.b10fs_cold_starts_files)
|
|
190
|
+
b10fs_cold_bytes_gauge.set(self.b10fs_cold_starts_bytes)
|
|
191
|
+
|
|
192
|
+
# Success metric
|
|
193
|
+
transfer_success_counter.labels(success=str(self.success)).inc()
|
|
194
|
+
|
|
64
195
|
|
|
65
196
|
LAZY_DATA_RESOLVER_PATH = [
|
|
66
197
|
# synced with pub static LAZY_DATA_RESOLVER_PATHS: &[&str]
|
|
@@ -185,6 +316,9 @@ class LazyDataResolverV2:
|
|
|
185
316
|
|
|
186
317
|
"""
|
|
187
318
|
start_lock = time.time()
|
|
319
|
+
publish_stats = (
|
|
320
|
+
log_stats and not self._is_collected_by_user
|
|
321
|
+
) # only publish results once per resolver
|
|
188
322
|
self._is_collected_by_user = issue_collect or self._is_collected_by_user
|
|
189
323
|
with self._lock:
|
|
190
324
|
result = self._fetch()
|
|
@@ -196,13 +330,20 @@ class LazyDataResolverV2:
|
|
|
196
330
|
# TODO: instument the stats, which are written to /tmp/truss_transfer_stats.json
|
|
197
331
|
# also add fetch time, and blocking time
|
|
198
332
|
# TrussTransferStats
|
|
333
|
+
fetch_t = time.time() - self._start_time
|
|
334
|
+
start_lock_t = time.time() - start_lock
|
|
199
335
|
stats = TrussTransferStats.from_json_file(
|
|
200
336
|
Path("/tmp/truss_transfer_stats.json")
|
|
201
337
|
)
|
|
202
|
-
if stats
|
|
338
|
+
if stats and publish_stats:
|
|
203
339
|
self.logger.info(f"model_cache: {stats}")
|
|
340
|
+
# Publish stats to Prometheus
|
|
341
|
+
if (
|
|
342
|
+
os.getenv("TRUSS_MODEL_CACHE_PROMETHEUS", "0") == "1"
|
|
343
|
+
): # Hide behind feature flag for core-product to enabled.
|
|
344
|
+
stats.publish_to_prometheus()
|
|
204
345
|
self.logger.info(
|
|
205
|
-
f"model_cache: Fetch took {
|
|
346
|
+
f"model_cache: Fetch took {fetch_t:.2f} seconds, of which {start_lock_t:.2f} seconds were spent blocking."
|
|
206
347
|
)
|
|
207
348
|
return result
|
|
208
349
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: truss
|
|
3
|
-
Version: 0.11.
|
|
3
|
+
Version: 0.11.13rc4
|
|
4
4
|
Summary: A seamless bridge from model development to model delivery
|
|
5
5
|
Project-URL: Repository, https://github.com/basetenlabs/truss
|
|
6
6
|
Project-URL: Homepage, https://truss.baseten.co
|
|
@@ -37,7 +37,7 @@ Requires-Dist: rich<14,>=13.4.2
|
|
|
37
37
|
Requires-Dist: ruff>=0.4.8
|
|
38
38
|
Requires-Dist: tenacity>=8.0.1
|
|
39
39
|
Requires-Dist: tomlkit>=0.13.2
|
|
40
|
-
Requires-Dist: truss-transfer<0.0.40,>=0.0.
|
|
40
|
+
Requires-Dist: truss-transfer<0.0.40,>=0.0.37
|
|
41
41
|
Requires-Dist: watchfiles<0.20,>=0.19.0
|
|
42
42
|
Description-Content-Type: text/markdown
|
|
43
43
|
|
|
@@ -5,7 +5,7 @@ truss/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
5
5
|
truss/base/constants.py,sha256=sExArdnuGg83z83XMgaQ4b8SS3V_j_bJEpOATDGJzpE,3600
|
|
6
6
|
truss/base/custom_types.py,sha256=FUSIT2lPOQb6gfg6IzT63YBV8r8L6NIZ0D74Fp3e_jQ,2835
|
|
7
7
|
truss/base/errors.py,sha256=zDVLEvseTChdPP0oNhBBQCtQUtZJUaof5zeWMIjqz6o,691
|
|
8
|
-
truss/base/trt_llm_config.py,sha256=
|
|
8
|
+
truss/base/trt_llm_config.py,sha256=rEtBVFg2QnNMxnaz11s5Z69dJB1w7Bpt48Wf6jSsVZI,33087
|
|
9
9
|
truss/base/truss_config.py,sha256=7CtiJIwMHtDU8Wzn8UTJUVVunD0pWFl4QUVycK2aIpY,28055
|
|
10
10
|
truss/base/truss_spec.py,sha256=jFVF79CXoEEspl2kXBAPyi-rwISReIGTdobGpaIhwJw,5979
|
|
11
11
|
truss/cli/chains_commands.py,sha256=Kpa5mCg6URAJQE2ZmZfVQFhjBHEitKT28tKiW0H6XAI,17406
|
|
@@ -71,7 +71,7 @@ truss/templates/cache.Dockerfile.jinja,sha256=1qZqDo1phrcqi-Vwol-VafYJkADsBbQWU6
|
|
|
71
71
|
truss/templates/cache_requirements.txt,sha256=xoPoJ-OVnf1z6oq_RVM3vCr3ionByyqMLj7wGs61nUs,87
|
|
72
72
|
truss/templates/copy_cache_files.Dockerfile.jinja,sha256=Os5zFdYLZ_AfCRGq4RcpVTObOTwL7zvmwYcvOzd_Zqo,126
|
|
73
73
|
truss/templates/docker_server_requirements.txt,sha256=PyhOPKAmKW1N2vLvTfLMwsEtuGpoRrbWuNo7tT6v2Mc,18
|
|
74
|
-
truss/templates/server.Dockerfile.jinja,sha256=
|
|
74
|
+
truss/templates/server.Dockerfile.jinja,sha256=FdxCIXBjBMXVQy0fFxEY2acL-MAZGOD8JKKWAhu3M24,7071
|
|
75
75
|
truss/templates/control/requirements.txt,sha256=tJGr83WoE0CZm2FrloZ9VScK84q-_FTuVXjDYrexhW0,250
|
|
76
76
|
truss/templates/control/control/application.py,sha256=5Kam6M-XtfKGaXQz8cc3d0bwDkB80o2MskABWROx1gk,5321
|
|
77
77
|
truss/templates/control/control/endpoints.py,sha256=KzqsLVNJE6r6TCPW8D5FMCtsfHadTwR15A3z_viGxmM,11782
|
|
@@ -107,7 +107,7 @@ truss/templates/server/common/tracing.py,sha256=XSTXNoRtV8vXwveJoX3H32go0JKnLmzn
|
|
|
107
107
|
truss/templates/server/common/patches/whisper/patch.py,sha256=kDECQ-wmEpeAZFhUTQP457ofueeMsm7DgNy9tqinhJQ,2383
|
|
108
108
|
truss/templates/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
109
109
|
truss/templates/shared/dynamic_config_resolver.py,sha256=75s42NFhQI5jL7BqlJH_UkuQS7ptbtFh13f2nh6X5Wo,920
|
|
110
|
-
truss/templates/shared/lazy_data_resolver.py,sha256=
|
|
110
|
+
truss/templates/shared/lazy_data_resolver.py,sha256=2QS_0Qac5MMJYhzW-DGgs9_Wh7BtIGLfBtnm29I7X8o,13818
|
|
111
111
|
truss/templates/shared/log_config.py,sha256=l9udyu4VKHZePlfK9LQEd5TOUUodPuehypsXRSUL4Ac,5411
|
|
112
112
|
truss/templates/shared/secrets_resolver.py,sha256=3prDe3Q06NTmUEe7KCW-W4TD1CzGck9lpDG789209z4,2110
|
|
113
113
|
truss/templates/shared/serialization.py,sha256=_WC_2PPkRi-MdTwxwjG8LKQptnHi4sANfpOlKWevqWc,3736
|
|
@@ -370,8 +370,8 @@ truss_train/deployment.py,sha256=lWWANSuzBWu2M4oK4qD7n-oVR1JKdmw2Pn5BJQHg-Ck,307
|
|
|
370
370
|
truss_train/loader.py,sha256=0o66EjBaHc2YY4syxxHVR4ordJWs13lNXnKjKq2wq0U,1630
|
|
371
371
|
truss_train/public_api.py,sha256=9N_NstiUlmBuLUwH_fNG_1x7OhGCytZLNvqKXBlStrM,1220
|
|
372
372
|
truss_train/restore_from_checkpoint.py,sha256=8hdPm-WSgkt74HDPjvCjZMBpvA9MwtoYsxVjOoa7BaM,1176
|
|
373
|
-
truss-0.11.
|
|
374
|
-
truss-0.11.
|
|
375
|
-
truss-0.11.
|
|
376
|
-
truss-0.11.
|
|
377
|
-
truss-0.11.
|
|
373
|
+
truss-0.11.13rc4.dist-info/METADATA,sha256=xxM0NsWyJwukE2KXOuzkZ--EuVl7gRNiJw2wBFwpPT0,6681
|
|
374
|
+
truss-0.11.13rc4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
375
|
+
truss-0.11.13rc4.dist-info/entry_points.txt,sha256=-MwKfHHQHQ6j0HqIgvxrz3CehCmczDLTD-OsRHnjjuU,130
|
|
376
|
+
truss-0.11.13rc4.dist-info/licenses/LICENSE,sha256=FTqGzu85i-uw1Gi8E_o0oD60bH9yQ_XIGtZbA1QUYiw,1064
|
|
377
|
+
truss-0.11.13rc4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|