truss 0.11.13rc2__py3-none-any.whl → 0.11.13rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truss might be problematic. Click here for more details.

@@ -68,6 +68,7 @@ class TrussTRTLLMQuantizationType(str, Enum):
68
68
  FP8_KV = "fp8_kv"
69
69
  FP4 = "fp4"
70
70
  FP4_KV = "fp4_kv"
71
+ FP4_MLP_ONLY = "fp4_mlp_only"
71
72
 
72
73
 
73
74
  class TrussTRTLLMPluginConfiguration(PydanticTrTBaseModel):
@@ -713,7 +714,9 @@ def trt_llm_common_validation(config: "TrussConfig"):
713
714
  "accelerators or newer (CUDA_COMPUTE>=89)"
714
715
  )
715
716
  elif trt_llm_config.build.quantization_type in [
716
- TrussTRTLLMQuantizationType.FP4
717
+ TrussTRTLLMQuantizationType.FP4,
718
+ TrussTRTLLMQuantizationType.FP4_KV,
719
+ TrussTRTLLMQuantizationType.FP4_MLP_ONLY,
717
720
  ] and config.resources.accelerator.accelerator in [
718
721
  truss_config.Accelerator.H100,
719
722
  truss_config.Accelerator.L4,
@@ -69,7 +69,7 @@ COPY --chown={{ default_owner }} ./{{ config.data_dir }} ${APP_HOME}/data
69
69
 
70
70
  {%- if model_cache_v2 %}
71
71
  {# v0.0.9, keep synced with server_requirements.txt #}
72
- RUN curl -sSL --fail --retry 5 --retry-delay 2 -o /usr/local/bin/truss-transfer-cli https://github.com/basetenlabs/truss/releases/download/v0.11.12rc4/truss-transfer-cli-v0.11.12rc4-linux-x86_64-unknown-linux-musl
72
+ RUN curl -sSL --fail --retry 5 --retry-delay 2 -o /usr/local/bin/truss-transfer-cli https://github.com/basetenlabs/truss/releases/download/v0.11.13rc3/truss-transfer-cli-v0.11.13rc3-linux-x86_64-unknown-linux-musl
73
73
  RUN chmod +x /usr/local/bin/truss-transfer-cli
74
74
  RUN mkdir /static-bptr
75
75
  RUN echo "hash {{model_cache_hash}}"
@@ -1,6 +1,7 @@
1
1
  import atexit
2
2
  import json
3
3
  import logging
4
+ import os
4
5
  import time
5
6
  from dataclasses import dataclass
6
7
  from functools import lru_cache
@@ -8,6 +9,14 @@ from pathlib import Path
8
9
  from threading import Lock, Thread
9
10
  from typing import Optional, Union
10
11
 
12
+ try:
13
+ from prometheus_client import Counter, Gauge, Histogram
14
+
15
+ PROMETHEUS_AVAILABLE = True
16
+ except ImportError:
17
+ PROMETHEUS_AVAILABLE = False
18
+ METRICS_REGISTERED = False
19
+
11
20
 
12
21
  @dataclass(frozen=True)
13
22
  class FileDownloadMetric:
@@ -61,6 +70,128 @@ class TrussTransferStats:
61
70
  except Exception:
62
71
  return None
63
72
 
73
+ def publish_to_prometheus(self):
74
+ """Publish transfer stats to Prometheus metrics. Only runs once."""
75
+ if not PROMETHEUS_AVAILABLE:
76
+ return
77
+ global METRICS_REGISTERED
78
+
79
+ if METRICS_REGISTERED:
80
+ logging.info(
81
+ "Model cache metrics already registered, skipping."
82
+ ) # this should never happen
83
+ return
84
+ else:
85
+ # Ensure metrics are only registered once
86
+ METRICS_REGISTERED = True
87
+
88
+ # Define metrics with model_cache prefix
89
+ manifest_size_gauge = Gauge(
90
+ "model_cache_manifest_size_bytes", "Total manifest size in bytes"
91
+ )
92
+ # histograms have intentially wide buckets to capture a variety of download times
93
+ download_time_histogram = Histogram(
94
+ "model_cache_download_time_seconds",
95
+ "Total download time in seconds",
96
+ buckets=[
97
+ 2**i
98
+ for i in range(-3, 11) # = [0.125, .. 2048] seconds
99
+ ],
100
+ )
101
+ download_speed_gauge = Gauge(
102
+ "model_cache_download_speed_mbps", "Aggregated download speed in MB/s"
103
+ )
104
+
105
+ # File download metrics (aggregated)
106
+ files_downloaded_counter = Counter(
107
+ "model_cache_files_downloaded_total", "Total number of files downloaded"
108
+ )
109
+ total_file_size_counter = Counter(
110
+ "model_cache_file_size_bytes_total",
111
+ "Total size of downloaded files in bytes",
112
+ )
113
+ file_download_time_histogram = Histogram(
114
+ "model_cache_file_download_time_seconds",
115
+ "File download time distribution",
116
+ buckets=[
117
+ 2**i
118
+ for i in range(-3, 11) # = [0.125, .. 2048] seconds
119
+ ],
120
+ )
121
+ file_download_speed_histogram = Histogram(
122
+ "model_cache_file_download_speed_mbps",
123
+ "File download speed distribution",
124
+ buckets=[
125
+ 2**i
126
+ for i in range(-1, 12) # = [0.5, .. 4096] MB/s
127
+ ],
128
+ )
129
+
130
+ # B10FS specific metrics
131
+ b10fs_enabled_gauge = Gauge(
132
+ "model_cache_b10fs_enabled", "Whether B10FS is enabled"
133
+ )
134
+ b10fs_decision_gauge = Gauge(
135
+ "model_cache_b10fs_decision_to_use", "Whether B10FS was chosen for use"
136
+ )
137
+ b10fs_read_speed_gauge = Gauge(
138
+ "model_cache_b10fs_read_speed_mbps", "B10FS read speed in Mbps"
139
+ )
140
+ b10fs_hot_files_gauge = Gauge(
141
+ "model_cache_b10fs_hot_starts_files", "Number of hot start files"
142
+ )
143
+ b10fs_hot_bytes_gauge = Gauge(
144
+ "model_cache_b10fs_hot_starts_bytes", "Number of hot start bytes"
145
+ )
146
+ b10fs_cold_files_gauge = Gauge(
147
+ "model_cache_b10fs_cold_starts_files", "Number of cold start files"
148
+ )
149
+ b10fs_cold_bytes_gauge = Gauge(
150
+ "model_cache_b10fs_cold_starts_bytes", "Number of cold start bytes"
151
+ )
152
+
153
+ # Transfer success metric
154
+ transfer_success_counter = Counter(
155
+ "model_cache_transfer_success_total",
156
+ "Total successful transfers",
157
+ ["success"],
158
+ )
159
+
160
+ # Set main transfer metrics
161
+ manifest_size_gauge.set(self.total_manifest_size_bytes)
162
+ download_time_histogram.observe(self.total_download_time_secs)
163
+
164
+ if self.total_aggregated_mb_s is not None:
165
+ download_speed_gauge.set(self.total_aggregated_mb_s)
166
+
167
+ # Aggregate file download metrics
168
+ total_files = len(self.file_downloads)
169
+ total_file_bytes = sum(fd.file_size_bytes for fd in self.file_downloads)
170
+
171
+ files_downloaded_counter.inc(total_files)
172
+ total_file_size_counter.inc(total_file_bytes)
173
+
174
+ # Record individual file metrics for distribution
175
+ for fd in self.file_downloads:
176
+ if fd.file_size_bytes > 1 * 1024 * 1024: # Only log files larger than 1MB
177
+ file_download_time_histogram.observe(fd.download_time_secs)
178
+ file_download_speed_histogram.observe(fd.download_speed_mb_s)
179
+
180
+ # B10FS metrics
181
+ b10fs_enabled_gauge.set(1 if self.b10fs_enabled else 0)
182
+ b10fs_decision_gauge.set(1 if self.b10fs_decision_to_use else 0)
183
+
184
+ if self.b10fs_read_speed_mbps is not None:
185
+ b10fs_read_speed_gauge.set(self.b10fs_read_speed_mbps)
186
+
187
+ b10fs_hot_files_gauge.set(self.b10fs_hot_starts_files)
188
+ b10fs_hot_bytes_gauge.set(self.b10fs_hot_starts_bytes)
189
+ b10fs_cold_files_gauge.set(self.b10fs_cold_starts_files)
190
+ b10fs_cold_bytes_gauge.set(self.b10fs_cold_starts_bytes)
191
+
192
+ # Success metric
193
+ transfer_success_counter.labels(success=str(self.success)).inc()
194
+
64
195
 
65
196
  LAZY_DATA_RESOLVER_PATH = [
66
197
  # synced with pub static LAZY_DATA_RESOLVER_PATHS: &[&str]
@@ -185,6 +316,9 @@ class LazyDataResolverV2:
185
316
 
186
317
  """
187
318
  start_lock = time.time()
319
+ publish_stats = (
320
+ log_stats and not self._is_collected_by_user
321
+ ) # only publish results once per resolver
188
322
  self._is_collected_by_user = issue_collect or self._is_collected_by_user
189
323
  with self._lock:
190
324
  result = self._fetch()
@@ -196,13 +330,20 @@ class LazyDataResolverV2:
196
330
  # TODO: instument the stats, which are written to /tmp/truss_transfer_stats.json
197
331
  # also add fetch time, and blocking time
198
332
  # TrussTransferStats
333
+ fetch_t = time.time() - self._start_time
334
+ start_lock_t = time.time() - start_lock
199
335
  stats = TrussTransferStats.from_json_file(
200
336
  Path("/tmp/truss_transfer_stats.json")
201
337
  )
202
- if stats is None:
338
+ if stats and publish_stats:
203
339
  self.logger.info(f"model_cache: {stats}")
340
+ # Publish stats to Prometheus
341
+ if (
342
+ os.getenv("TRUSS_MODEL_CACHE_PROMETHEUS", "0") == "1"
343
+ ): # Hide behind feature flag for core-product to enabled.
344
+ stats.publish_to_prometheus()
204
345
  self.logger.info(
205
- f"model_cache: Fetch took {time.time() - self._start_time:.2f} seconds, of which {time.time() - start_lock:.2f} seconds were spent blocking."
346
+ f"model_cache: Fetch took {fetch_t:.2f} seconds, of which {start_lock_t:.2f} seconds were spent blocking."
206
347
  )
207
348
  return result
208
349
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: truss
3
- Version: 0.11.13rc2
3
+ Version: 0.11.13rc4
4
4
  Summary: A seamless bridge from model development to model delivery
5
5
  Project-URL: Repository, https://github.com/basetenlabs/truss
6
6
  Project-URL: Homepage, https://truss.baseten.co
@@ -37,7 +37,7 @@ Requires-Dist: rich<14,>=13.4.2
37
37
  Requires-Dist: ruff>=0.4.8
38
38
  Requires-Dist: tenacity>=8.0.1
39
39
  Requires-Dist: tomlkit>=0.13.2
40
- Requires-Dist: truss-transfer<0.0.40,>=0.0.36
40
+ Requires-Dist: truss-transfer<0.0.40,>=0.0.37
41
41
  Requires-Dist: watchfiles<0.20,>=0.19.0
42
42
  Description-Content-Type: text/markdown
43
43
 
@@ -5,7 +5,7 @@ truss/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  truss/base/constants.py,sha256=sExArdnuGg83z83XMgaQ4b8SS3V_j_bJEpOATDGJzpE,3600
6
6
  truss/base/custom_types.py,sha256=FUSIT2lPOQb6gfg6IzT63YBV8r8L6NIZ0D74Fp3e_jQ,2835
7
7
  truss/base/errors.py,sha256=zDVLEvseTChdPP0oNhBBQCtQUtZJUaof5zeWMIjqz6o,691
8
- truss/base/trt_llm_config.py,sha256=81ZZxRQF3o29HLCX6nlXtPwALejcdns6c4mbrExwASk,32958
8
+ truss/base/trt_llm_config.py,sha256=rEtBVFg2QnNMxnaz11s5Z69dJB1w7Bpt48Wf6jSsVZI,33087
9
9
  truss/base/truss_config.py,sha256=7CtiJIwMHtDU8Wzn8UTJUVVunD0pWFl4QUVycK2aIpY,28055
10
10
  truss/base/truss_spec.py,sha256=jFVF79CXoEEspl2kXBAPyi-rwISReIGTdobGpaIhwJw,5979
11
11
  truss/cli/chains_commands.py,sha256=Kpa5mCg6URAJQE2ZmZfVQFhjBHEitKT28tKiW0H6XAI,17406
@@ -71,7 +71,7 @@ truss/templates/cache.Dockerfile.jinja,sha256=1qZqDo1phrcqi-Vwol-VafYJkADsBbQWU6
71
71
  truss/templates/cache_requirements.txt,sha256=xoPoJ-OVnf1z6oq_RVM3vCr3ionByyqMLj7wGs61nUs,87
72
72
  truss/templates/copy_cache_files.Dockerfile.jinja,sha256=Os5zFdYLZ_AfCRGq4RcpVTObOTwL7zvmwYcvOzd_Zqo,126
73
73
  truss/templates/docker_server_requirements.txt,sha256=PyhOPKAmKW1N2vLvTfLMwsEtuGpoRrbWuNo7tT6v2Mc,18
74
- truss/templates/server.Dockerfile.jinja,sha256=BQpo2Mt_fBrdin1qD8HBKBo2N3Yr2lXrvV_a7J5WSzE,7071
74
+ truss/templates/server.Dockerfile.jinja,sha256=FdxCIXBjBMXVQy0fFxEY2acL-MAZGOD8JKKWAhu3M24,7071
75
75
  truss/templates/control/requirements.txt,sha256=tJGr83WoE0CZm2FrloZ9VScK84q-_FTuVXjDYrexhW0,250
76
76
  truss/templates/control/control/application.py,sha256=5Kam6M-XtfKGaXQz8cc3d0bwDkB80o2MskABWROx1gk,5321
77
77
  truss/templates/control/control/endpoints.py,sha256=KzqsLVNJE6r6TCPW8D5FMCtsfHadTwR15A3z_viGxmM,11782
@@ -107,7 +107,7 @@ truss/templates/server/common/tracing.py,sha256=XSTXNoRtV8vXwveJoX3H32go0JKnLmzn
107
107
  truss/templates/server/common/patches/whisper/patch.py,sha256=kDECQ-wmEpeAZFhUTQP457ofueeMsm7DgNy9tqinhJQ,2383
108
108
  truss/templates/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
109
109
  truss/templates/shared/dynamic_config_resolver.py,sha256=75s42NFhQI5jL7BqlJH_UkuQS7ptbtFh13f2nh6X5Wo,920
110
- truss/templates/shared/lazy_data_resolver.py,sha256=eOq7Fgr9QkAWpsxyDWkeZic1Z2S4Mt-drB1A7zNEkYE,8368
110
+ truss/templates/shared/lazy_data_resolver.py,sha256=2QS_0Qac5MMJYhzW-DGgs9_Wh7BtIGLfBtnm29I7X8o,13818
111
111
  truss/templates/shared/log_config.py,sha256=l9udyu4VKHZePlfK9LQEd5TOUUodPuehypsXRSUL4Ac,5411
112
112
  truss/templates/shared/secrets_resolver.py,sha256=3prDe3Q06NTmUEe7KCW-W4TD1CzGck9lpDG789209z4,2110
113
113
  truss/templates/shared/serialization.py,sha256=_WC_2PPkRi-MdTwxwjG8LKQptnHi4sANfpOlKWevqWc,3736
@@ -370,8 +370,8 @@ truss_train/deployment.py,sha256=lWWANSuzBWu2M4oK4qD7n-oVR1JKdmw2Pn5BJQHg-Ck,307
370
370
  truss_train/loader.py,sha256=0o66EjBaHc2YY4syxxHVR4ordJWs13lNXnKjKq2wq0U,1630
371
371
  truss_train/public_api.py,sha256=9N_NstiUlmBuLUwH_fNG_1x7OhGCytZLNvqKXBlStrM,1220
372
372
  truss_train/restore_from_checkpoint.py,sha256=8hdPm-WSgkt74HDPjvCjZMBpvA9MwtoYsxVjOoa7BaM,1176
373
- truss-0.11.13rc2.dist-info/METADATA,sha256=GCadCR-s-rOXbH__XXvO5wFJEp-sPcFjpRC1tlcRBwo,6681
374
- truss-0.11.13rc2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
375
- truss-0.11.13rc2.dist-info/entry_points.txt,sha256=-MwKfHHQHQ6j0HqIgvxrz3CehCmczDLTD-OsRHnjjuU,130
376
- truss-0.11.13rc2.dist-info/licenses/LICENSE,sha256=FTqGzu85i-uw1Gi8E_o0oD60bH9yQ_XIGtZbA1QUYiw,1064
377
- truss-0.11.13rc2.dist-info/RECORD,,
373
+ truss-0.11.13rc4.dist-info/METADATA,sha256=xxM0NsWyJwukE2KXOuzkZ--EuVl7gRNiJw2wBFwpPT0,6681
374
+ truss-0.11.13rc4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
375
+ truss-0.11.13rc4.dist-info/entry_points.txt,sha256=-MwKfHHQHQ6j0HqIgvxrz3CehCmczDLTD-OsRHnjjuU,130
376
+ truss-0.11.13rc4.dist-info/licenses/LICENSE,sha256=FTqGzu85i-uw1Gi8E_o0oD60bH9yQ_XIGtZbA1QUYiw,1064
377
+ truss-0.11.13rc4.dist-info/RECORD,,