truss 0.11.13rc3__py3-none-any.whl → 0.11.13rc500__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of truss might be problematic. Click here for more details.

@@ -68,6 +68,7 @@ class TrussTRTLLMQuantizationType(str, Enum):
68
68
  FP8_KV = "fp8_kv"
69
69
  FP4 = "fp4"
70
70
  FP4_KV = "fp4_kv"
71
+ FP4_MLP_ONLY = "fp4_mlp_only"
71
72
 
72
73
 
73
74
  class TrussTRTLLMPluginConfiguration(PydanticTrTBaseModel):
@@ -713,7 +714,9 @@ def trt_llm_common_validation(config: "TrussConfig"):
713
714
  "accelerators or newer (CUDA_COMPUTE>=89)"
714
715
  )
715
716
  elif trt_llm_config.build.quantization_type in [
716
- TrussTRTLLMQuantizationType.FP4
717
+ TrussTRTLLMQuantizationType.FP4,
718
+ TrussTRTLLMQuantizationType.FP4_KV,
719
+ TrussTRTLLMQuantizationType.FP4_MLP_ONLY,
717
720
  ] and config.resources.accelerator.accelerator in [
718
721
  truss_config.Accelerator.H100,
719
722
  truss_config.Accelerator.L4,
@@ -115,9 +115,11 @@ WORKDIR $APP_HOME
115
115
  {% endblock %}
116
116
 
117
117
 
118
+ {% set packages_dir = "/packages" %}
119
+ RUN mkdir -p {{ packages_dir }}
118
120
  {% block bundled_packages_copy %}
119
121
  {%- if bundled_packages_dir_exists %}
120
- COPY --chown={{ default_owner }} ./{{ config.bundled_packages_dir }} /packages
122
+ COPY --chown={{ default_owner }} ./{{ config.bundled_packages_dir }} {{ packages_dir }}
121
123
  {%- endif %}
122
124
  {% endblock %}
123
125
 
@@ -69,7 +69,7 @@ COPY --chown={{ default_owner }} ./{{ config.data_dir }} ${APP_HOME}/data
69
69
 
70
70
  {%- if model_cache_v2 %}
71
71
  {# v0.0.9, keep synced with server_requirements.txt #}
72
- RUN curl -sSL --fail --retry 5 --retry-delay 2 -o /usr/local/bin/truss-transfer-cli https://github.com/basetenlabs/truss/releases/download/v0.11.12rc4/truss-transfer-cli-v0.11.12rc4-linux-x86_64-unknown-linux-musl
72
+ RUN curl -sSL --fail --retry 5 --retry-delay 2 -o /usr/local/bin/truss-transfer-cli https://github.com/basetenlabs/truss/releases/download/v0.11.13rc3/truss-transfer-cli-v0.11.13rc3-linux-x86_64-unknown-linux-musl
73
73
  RUN chmod +x /usr/local/bin/truss-transfer-cli
74
74
  RUN mkdir /static-bptr
75
75
  RUN echo "hash {{model_cache_hash}}"
@@ -104,7 +104,7 @@ COPY --chown={{ default_owner }} ./{{ config.model_module_dir }} ${APP_HOME}/mod
104
104
  {# Macro to change ownership of directories and switch to regular user #}
105
105
  {%- macro chown_and_switch_to_regular_user_if_enabled(additional_chown_dirs=[]) -%}
106
106
  {%- if non_root_user %}
107
- RUN chown -R {{ app_username }}:{{ app_username }} {% for dir in additional_chown_dirs %}{{ dir }} {% endfor %}${HOME} ${APP_HOME}
107
+ RUN chown -R {{ app_username }}:{{ app_username }} ${HOME} ${APP_HOME} {{ packages_dir }} {% for dir in additional_chown_dirs %}{{ dir }} {% endfor %}
108
108
  USER {{ app_username }}
109
109
  {%- endif %} {#- endif non_root_user #}
110
110
  {%- endmacro -%}
@@ -1,13 +1,12 @@
1
1
  import atexit
2
2
  import json
3
3
  import logging
4
- import os
5
4
  import time
6
5
  from dataclasses import dataclass
7
6
  from functools import lru_cache
8
7
  from pathlib import Path
9
8
  from threading import Lock, Thread
10
- from typing import Optional, Union
9
+ from typing import List, Optional, Union
11
10
 
12
11
  try:
13
12
  from prometheus_client import Counter, Gauge, Histogram
@@ -31,7 +30,7 @@ class TrussTransferStats:
31
30
  total_manifest_size_bytes: int
32
31
  total_download_time_secs: float
33
32
  total_aggregated_mb_s: Optional[float]
34
- file_downloads: list[FileDownloadMetric]
33
+ file_downloads: List[FileDownloadMetric]
35
34
  b10fs_read_speed_mbps: Optional[float]
36
35
  b10fs_decision_to_use: bool
37
36
  b10fs_enabled: bool
@@ -70,26 +69,35 @@ class TrussTransferStats:
70
69
  except Exception:
71
70
  return None
72
71
 
73
- def publish_to_prometheus(self):
72
+ def publish_to_prometheus(self, hidden_time: float = 0.0):
74
73
  """Publish transfer stats to Prometheus metrics. Only runs once."""
75
74
  if not PROMETHEUS_AVAILABLE:
76
75
  return
77
76
  global METRICS_REGISTERED
78
- if not METRICS_REGISTERED:
77
+
78
+ if METRICS_REGISTERED:
79
+ logging.info(
80
+ "Model cache metrics already registered, skipping."
81
+ ) # this should never happen
82
+ return
83
+ else:
79
84
  # Ensure metrics are only registered once
80
85
  METRICS_REGISTERED = True
81
86
 
82
- # Define metrics with model_cache label
87
+ # Define metrics with model_cache prefix
83
88
  manifest_size_gauge = Gauge(
84
89
  "model_cache_manifest_size_bytes", "Total manifest size in bytes"
85
90
  )
91
+ # histograms have intentially wide buckets to capture a variety of download times
86
92
  download_time_histogram = Histogram(
87
93
  "model_cache_download_time_seconds",
88
94
  "Total download time in seconds",
89
- buckets=[
95
+ buckets=[0]
96
+ + [
90
97
  2**i
91
98
  for i in range(-3, 11) # = [0.125, .. 2048] seconds
92
- ],
99
+ ]
100
+ + [float("inf")],
93
101
  )
94
102
  download_speed_gauge = Gauge(
95
103
  "model_cache_download_speed_mbps", "Aggregated download speed in MB/s"
@@ -103,21 +111,29 @@ class TrussTransferStats:
103
111
  "model_cache_file_size_bytes_total",
104
112
  "Total size of downloaded files in bytes",
105
113
  )
114
+ file_download_hidden_time_gauge = Gauge(
115
+ "model_cache_file_download_hidden_time_seconds",
116
+ "Total time hidden from user by starting the import before user code (seconds)",
117
+ )
106
118
  file_download_time_histogram = Histogram(
107
119
  "model_cache_file_download_time_seconds",
108
120
  "File download time distribution",
109
- buckets=[
121
+ buckets=[0]
122
+ + [
110
123
  2**i
111
124
  for i in range(-3, 11) # = [0.125, .. 2048] seconds
112
- ],
125
+ ]
126
+ + [float("inf")],
113
127
  )
114
128
  file_download_speed_histogram = Histogram(
115
129
  "model_cache_file_download_speed_mbps",
116
130
  "File download speed distribution",
117
- buckets=[
131
+ buckets=[0]
132
+ + [
118
133
  2**i
119
134
  for i in range(-1, 12) # = [0.5, .. 4096] MB/s
120
- ],
135
+ ]
136
+ + [float("inf")],
121
137
  )
122
138
 
123
139
  # B10FS specific metrics
@@ -153,6 +169,7 @@ class TrussTransferStats:
153
169
  # Set main transfer metrics
154
170
  manifest_size_gauge.set(self.total_manifest_size_bytes)
155
171
  download_time_histogram.observe(self.total_download_time_secs)
172
+ file_download_hidden_time_gauge.set(hidden_time)
156
173
 
157
174
  if self.total_aggregated_mb_s is not None:
158
175
  download_speed_gauge.set(self.total_aggregated_mb_s)
@@ -331,10 +348,7 @@ class LazyDataResolverV2:
331
348
  if stats and publish_stats:
332
349
  self.logger.info(f"model_cache: {stats}")
333
350
  # Publish stats to Prometheus
334
- if (
335
- os.getenv("TRUSS_MODEL_CACHE_PROMETHEUS", "0") == "1"
336
- ): # Hide behind feature flag for core-product to enabled.
337
- stats.publish_to_prometheus()
351
+ stats.publish_to_prometheus()
338
352
  self.logger.info(
339
353
  f"model_cache: Fetch took {fetch_t:.2f} seconds, of which {start_lock_t:.2f} seconds were spent blocking."
340
354
  )
@@ -36,6 +36,7 @@ COPY --chown= ./data ${APP_HOME}/data
36
36
  COPY --chown= ./server ${APP_HOME}
37
37
  COPY --chown= ./config.yaml ${APP_HOME}/config.yaml
38
38
  COPY --chown= ./model ${APP_HOME}/model
39
+ RUN mkdir -p /packages
39
40
  COPY --chown= ./packages /packages
40
41
  ENV INFERENCE_SERVER_PORT="8080"
41
42
  ENV SERVER_START_CMD="/usr/local/bin/python3 /app/main.py"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: truss
3
- Version: 0.11.13rc3
3
+ Version: 0.11.13rc500
4
4
  Summary: A seamless bridge from model development to model delivery
5
5
  Project-URL: Repository, https://github.com/basetenlabs/truss
6
6
  Project-URL: Homepage, https://truss.baseten.co
@@ -37,7 +37,7 @@ Requires-Dist: rich<14,>=13.4.2
37
37
  Requires-Dist: ruff>=0.4.8
38
38
  Requires-Dist: tenacity>=8.0.1
39
39
  Requires-Dist: tomlkit>=0.13.2
40
- Requires-Dist: truss-transfer<0.0.40,>=0.0.36
40
+ Requires-Dist: truss-transfer<0.0.40,>=0.0.37
41
41
  Requires-Dist: watchfiles<0.20,>=0.19.0
42
42
  Description-Content-Type: text/markdown
43
43
 
@@ -5,7 +5,7 @@ truss/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  truss/base/constants.py,sha256=sExArdnuGg83z83XMgaQ4b8SS3V_j_bJEpOATDGJzpE,3600
6
6
  truss/base/custom_types.py,sha256=FUSIT2lPOQb6gfg6IzT63YBV8r8L6NIZ0D74Fp3e_jQ,2835
7
7
  truss/base/errors.py,sha256=zDVLEvseTChdPP0oNhBBQCtQUtZJUaof5zeWMIjqz6o,691
8
- truss/base/trt_llm_config.py,sha256=81ZZxRQF3o29HLCX6nlXtPwALejcdns6c4mbrExwASk,32958
8
+ truss/base/trt_llm_config.py,sha256=rEtBVFg2QnNMxnaz11s5Z69dJB1w7Bpt48Wf6jSsVZI,33087
9
9
  truss/base/truss_config.py,sha256=7CtiJIwMHtDU8Wzn8UTJUVVunD0pWFl4QUVycK2aIpY,28055
10
10
  truss/base/truss_spec.py,sha256=jFVF79CXoEEspl2kXBAPyi-rwISReIGTdobGpaIhwJw,5979
11
11
  truss/cli/chains_commands.py,sha256=Kpa5mCg6URAJQE2ZmZfVQFhjBHEitKT28tKiW0H6XAI,17406
@@ -66,12 +66,12 @@ truss/remote/baseten/utils/time.py,sha256=Ry9GMjYnbIGYVIGwtmv4V8ljWjvdcaCf5NOQzl
66
66
  truss/remote/baseten/utils/transfer.py,sha256=d3VptuQb6M1nyS6kz0BAfeOYDLkMKUjatJXpY-mp-As,1548
67
67
  truss/templates/README.md.jinja,sha256=N7CJdyldZuJamj5jLh47le0hFBdu9irVsTBqoxhPNPQ,2476
68
68
  truss/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- truss/templates/base.Dockerfile.jinja,sha256=DgvNqmkt5QRrqy7a9hw6HffmVTUrdGl06oVZ4eeha5Y,5253
69
+ truss/templates/base.Dockerfile.jinja,sha256=tdMmK5TeiQuYbz4gqbACM3R-l-mazqL9tAZtJ4sxC4g,5331
70
70
  truss/templates/cache.Dockerfile.jinja,sha256=1qZqDo1phrcqi-Vwol-VafYJkADsBbQWU6huQ-_1x00,1146
71
71
  truss/templates/cache_requirements.txt,sha256=xoPoJ-OVnf1z6oq_RVM3vCr3ionByyqMLj7wGs61nUs,87
72
72
  truss/templates/copy_cache_files.Dockerfile.jinja,sha256=Os5zFdYLZ_AfCRGq4RcpVTObOTwL7zvmwYcvOzd_Zqo,126
73
73
  truss/templates/docker_server_requirements.txt,sha256=PyhOPKAmKW1N2vLvTfLMwsEtuGpoRrbWuNo7tT6v2Mc,18
74
- truss/templates/server.Dockerfile.jinja,sha256=BQpo2Mt_fBrdin1qD8HBKBo2N3Yr2lXrvV_a7J5WSzE,7071
74
+ truss/templates/server.Dockerfile.jinja,sha256=Mu5_ZxuAknwaEOsF0l-XssA9pDg3pD3eLl6JBzNJ4rg,7091
75
75
  truss/templates/control/requirements.txt,sha256=tJGr83WoE0CZm2FrloZ9VScK84q-_FTuVXjDYrexhW0,250
76
76
  truss/templates/control/control/application.py,sha256=5Kam6M-XtfKGaXQz8cc3d0bwDkB80o2MskABWROx1gk,5321
77
77
  truss/templates/control/control/endpoints.py,sha256=KzqsLVNJE6r6TCPW8D5FMCtsfHadTwR15A3z_viGxmM,11782
@@ -107,7 +107,7 @@ truss/templates/server/common/tracing.py,sha256=XSTXNoRtV8vXwveJoX3H32go0JKnLmzn
107
107
  truss/templates/server/common/patches/whisper/patch.py,sha256=kDECQ-wmEpeAZFhUTQP457ofueeMsm7DgNy9tqinhJQ,2383
108
108
  truss/templates/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
109
109
  truss/templates/shared/dynamic_config_resolver.py,sha256=75s42NFhQI5jL7BqlJH_UkuQS7ptbtFh13f2nh6X5Wo,920
110
- truss/templates/shared/lazy_data_resolver.py,sha256=czfggu9DZ_qDnE2MxOdE2R8aZyJe2G1Cd-PL0AUGx-I,13561
110
+ truss/templates/shared/lazy_data_resolver.py,sha256=HxrZz6X30j2LbsExYSqhuOGoYEffGpd7FPBtJexI7TQ,14064
111
111
  truss/templates/shared/log_config.py,sha256=l9udyu4VKHZePlfK9LQEd5TOUUodPuehypsXRSUL4Ac,5411
112
112
  truss/templates/shared/secrets_resolver.py,sha256=3prDe3Q06NTmUEe7KCW-W4TD1CzGck9lpDG789209z4,2110
113
113
  truss/templates/shared/serialization.py,sha256=_WC_2PPkRi-MdTwxwjG8LKQptnHi4sANfpOlKWevqWc,3736
@@ -185,7 +185,7 @@ truss/tests/test_data/pima-indians-diabetes.csv,sha256=BvW3ws17ymhv2k-S6rX2Hn_2Q
185
185
  truss/tests/test_data/readme_int_example.md,sha256=fuHvpLtdkJy1f4NAR_djotVBdzusHYNXc-Fwh588XAE,1586
186
186
  truss/tests/test_data/readme_no_example.md,sha256=T2CzFMRvICXeX3_5XbFoqhHchcHGot-xM7izx34B3aQ,1607
187
187
  truss/tests/test_data/readme_str_example.md,sha256=fP4pvMqgLdIapaOf_BgRiV0H7pw4so0RNxrlq5lbROE,1726
188
- truss/tests/test_data/server.Dockerfile,sha256=3rWiU3RxBh0PIh8pS23FnP8UA-ErmzxTlgf_J22mMQ8,2024
188
+ truss/tests/test_data/server.Dockerfile,sha256=KoQN5qBpiXL93qbjbG76kfRVapwfV5CNJzQcpjPocz0,2047
189
189
  truss/tests/test_data/annotated_types_truss/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
190
190
  truss/tests/test_data/annotated_types_truss/config.yaml,sha256=B-ZyyjLLqtxGfXj2tkH68Hy7NOMB_coYvoWyWom61g0,147
191
191
  truss/tests/test_data/annotated_types_truss/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -370,8 +370,8 @@ truss_train/deployment.py,sha256=lWWANSuzBWu2M4oK4qD7n-oVR1JKdmw2Pn5BJQHg-Ck,307
370
370
  truss_train/loader.py,sha256=0o66EjBaHc2YY4syxxHVR4ordJWs13lNXnKjKq2wq0U,1630
371
371
  truss_train/public_api.py,sha256=9N_NstiUlmBuLUwH_fNG_1x7OhGCytZLNvqKXBlStrM,1220
372
372
  truss_train/restore_from_checkpoint.py,sha256=8hdPm-WSgkt74HDPjvCjZMBpvA9MwtoYsxVjOoa7BaM,1176
373
- truss-0.11.13rc3.dist-info/METADATA,sha256=cPpD-bEoveXxM_dTFQAJPui0DFO940QYrTTWlO7aivc,6681
374
- truss-0.11.13rc3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
375
- truss-0.11.13rc3.dist-info/entry_points.txt,sha256=-MwKfHHQHQ6j0HqIgvxrz3CehCmczDLTD-OsRHnjjuU,130
376
- truss-0.11.13rc3.dist-info/licenses/LICENSE,sha256=FTqGzu85i-uw1Gi8E_o0oD60bH9yQ_XIGtZbA1QUYiw,1064
377
- truss-0.11.13rc3.dist-info/RECORD,,
373
+ truss-0.11.13rc500.dist-info/METADATA,sha256=fO90Qd0TBo1ZirChnji7aZh2vHpqYmuUGsCYMZGe4Oo,6683
374
+ truss-0.11.13rc500.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
375
+ truss-0.11.13rc500.dist-info/entry_points.txt,sha256=-MwKfHHQHQ6j0HqIgvxrz3CehCmczDLTD-OsRHnjjuU,130
376
+ truss-0.11.13rc500.dist-info/licenses/LICENSE,sha256=FTqGzu85i-uw1Gi8E_o0oD60bH9yQ_XIGtZbA1QUYiw,1064
377
+ truss-0.11.13rc500.dist-info/RECORD,,