spanforge 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +695 -0
- spanforge/_batch_exporter.py +322 -0
- spanforge/_cli.py +3081 -0
- spanforge/_hooks.py +340 -0
- spanforge/_server.py +953 -0
- spanforge/_span.py +1015 -0
- spanforge/_store.py +287 -0
- spanforge/_stream.py +654 -0
- spanforge/_trace.py +334 -0
- spanforge/_tracer.py +253 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +464 -0
- spanforge/auto.py +181 -0
- spanforge/baseline.py +336 -0
- spanforge/config.py +460 -0
- spanforge/consent.py +227 -0
- spanforge/consumer.py +379 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1060 -0
- spanforge/cost.py +597 -0
- spanforge/debug.py +514 -0
- spanforge/drift.py +488 -0
- spanforge/egress.py +63 -0
- spanforge/eval.py +575 -0
- spanforge/event.py +1052 -0
- spanforge/exceptions.py +246 -0
- spanforge/explain.py +181 -0
- spanforge/export/__init__.py +50 -0
- spanforge/export/append_only.py +342 -0
- spanforge/export/cloud.py +349 -0
- spanforge/export/datadog.py +495 -0
- spanforge/export/grafana.py +331 -0
- spanforge/export/jsonl.py +198 -0
- spanforge/export/otel_bridge.py +291 -0
- spanforge/export/otlp.py +817 -0
- spanforge/export/otlp_bridge.py +231 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/webhook.py +302 -0
- spanforge/exporters/__init__.py +29 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/hitl.py +297 -0
- spanforge/inspect.py +429 -0
- spanforge/integrations/__init__.py +39 -0
- spanforge/integrations/_pricing.py +277 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/bedrock.py +306 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +349 -0
- spanforge/integrations/groq.py +444 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/llamaindex.py +370 -0
- spanforge/integrations/ollama.py +286 -0
- spanforge/integrations/openai.py +370 -0
- spanforge/integrations/together.py +485 -0
- spanforge/metrics.py +393 -0
- spanforge/metrics_export.py +342 -0
- spanforge/migrate.py +278 -0
- spanforge/model_registry.py +282 -0
- spanforge/models.py +407 -0
- spanforge/namespaces/__init__.py +215 -0
- spanforge/namespaces/audit.py +253 -0
- spanforge/namespaces/cache.py +209 -0
- spanforge/namespaces/chain.py +74 -0
- spanforge/namespaces/confidence.py +69 -0
- spanforge/namespaces/consent.py +85 -0
- spanforge/namespaces/cost.py +175 -0
- spanforge/namespaces/decision.py +135 -0
- spanforge/namespaces/diff.py +146 -0
- spanforge/namespaces/drift.py +79 -0
- spanforge/namespaces/eval_.py +232 -0
- spanforge/namespaces/fence.py +180 -0
- spanforge/namespaces/guard.py +104 -0
- spanforge/namespaces/hitl.py +92 -0
- spanforge/namespaces/latency.py +69 -0
- spanforge/namespaces/prompt.py +185 -0
- spanforge/namespaces/redact.py +172 -0
- spanforge/namespaces/template.py +197 -0
- spanforge/namespaces/tool_call.py +76 -0
- spanforge/namespaces/trace.py +1006 -0
- spanforge/normalizer.py +183 -0
- spanforge/presidio_backend.py +149 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +415 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +780 -0
- spanforge/sampling.py +500 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/signing.py +1152 -0
- spanforge/stream.py +559 -0
- spanforge/testing.py +376 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +304 -0
- spanforge/validate.py +383 -0
- spanforge-2.0.0.dist-info/METADATA +1777 -0
- spanforge-2.0.0.dist-info/RECORD +101 -0
- spanforge-2.0.0.dist-info/WHEEL +4 -0
- spanforge-2.0.0.dist-info/entry_points.txt +5 -0
- spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
spanforge/sampling.py
ADDED
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
"""spanforge.sampling — Sampling strategies for span/event emission.
|
|
2
|
+
|
|
3
|
+
Samplers decide **at observation time** whether a span or event should be
|
|
4
|
+
exported. They are composable: a :class:`ParentBasedSampler` delegates to a
|
|
5
|
+
root sampler for new traces and honours the parent's decision for child spans.
|
|
6
|
+
|
|
7
|
+
Configure via :func:`spanforge.configure`::
|
|
8
|
+
|
|
9
|
+
from spanforge import configure
|
|
10
|
+
from spanforge.sampling import RatioSampler, ParentBasedSampler
|
|
11
|
+
|
|
12
|
+
configure(sampler=ParentBasedSampler(root_sampler=RatioSampler(0.1)))
|
|
13
|
+
|
|
14
|
+
Built-in samplers
|
|
15
|
+
-----------------
|
|
16
|
+
|
|
17
|
+
========================================= =====================================
|
|
18
|
+
Class Description
|
|
19
|
+
========================================= =====================================
|
|
20
|
+
:class:`AlwaysOnSampler` Export every span (default).
|
|
21
|
+
:class:`AlwaysOffSampler` Drop every span.
|
|
22
|
+
:class:`RatioSampler` Probabilistic head-based sampling.
|
|
23
|
+
:class:`ParentBasedSampler` Honour parent trace flags; use
|
|
24
|
+
``root_sampler`` for new traces.
|
|
25
|
+
:class:`RuleBasedSampler` Per-operation / per-model rules.
|
|
26
|
+
:class:`TailBasedSampler` Buffer spans, decide after span ends
|
|
27
|
+
(e.g. always keep errors).
|
|
28
|
+
========================================= =====================================
|
|
29
|
+
|
|
30
|
+
Custom samplers
|
|
31
|
+
---------------
|
|
32
|
+
Implement the :class:`Sampler` protocol::
|
|
33
|
+
|
|
34
|
+
class MySampler:
|
|
35
|
+
def should_sample(self, span_or_event, cfg) -> bool:
|
|
36
|
+
return True # or False
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from __future__ import annotations
|
|
40
|
+
|
|
41
|
+
import contextlib
|
|
42
|
+
import hashlib
|
|
43
|
+
import logging
|
|
44
|
+
import random
|
|
45
|
+
import threading
|
|
46
|
+
from typing import TYPE_CHECKING, Any, Generator, Protocol, runtime_checkable
|
|
47
|
+
|
|
48
|
+
if TYPE_CHECKING:
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"AlwaysOffSampler",
|
|
53
|
+
"AlwaysOnSampler",
|
|
54
|
+
"ComplianceSampler",
|
|
55
|
+
"ParentBasedSampler",
|
|
56
|
+
"RatioSampler",
|
|
57
|
+
"RuleBasedSampler",
|
|
58
|
+
"Sampler",
|
|
59
|
+
"TailBasedSampler",
|
|
60
|
+
"bypass_sampling",
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
_log = logging.getLogger("spanforge.sampling")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
# Protocol
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@runtime_checkable
|
|
72
|
+
class Sampler(Protocol):
|
|
73
|
+
"""Protocol implemented by all samplers.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
span_or_event: The :class:`~spanforge._span.Span` or
|
|
77
|
+
:class:`~spanforge.event.Event` being considered.
|
|
78
|
+
cfg: The active :class:`~spanforge.config.SpanForgeConfig`.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
``True`` if the span/event should be exported, ``False`` to drop it.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
|
|
85
|
+
"""Return ``True`` to export, ``False`` to drop."""
|
|
86
|
+
...
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
# Always-on / Always-off
|
|
91
|
+
# ---------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class AlwaysOnSampler:
|
|
95
|
+
"""Export every span. This is the SDK default when no sampler is set."""
|
|
96
|
+
|
|
97
|
+
def should_sample(self, span_or_event: Any, cfg: Any) -> bool: # noqa: ARG002
|
|
98
|
+
return True
|
|
99
|
+
|
|
100
|
+
def __repr__(self) -> str:
|
|
101
|
+
return "AlwaysOnSampler()"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class AlwaysOffSampler:
|
|
105
|
+
"""Drop every span. Useful for completely silencing test code."""
|
|
106
|
+
|
|
107
|
+
def should_sample(self, span_or_event: Any, cfg: Any) -> bool: # noqa: ARG002
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
def __repr__(self) -> str:
|
|
111
|
+
return "AlwaysOffSampler()"
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
# Ratio / probabilistic
|
|
116
|
+
# ---------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class RatioSampler:
|
|
120
|
+
"""Probabilistic head-based sampler.
|
|
121
|
+
|
|
122
|
+
Makes a deterministic decision based on the span's ``trace_id`` so that
|
|
123
|
+
all spans in the same trace receive the *same* sampling decision.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
rate: Fraction of traces to export. ``1.0`` exports all,
|
|
127
|
+
``0.0`` exports none, ``0.1`` exports roughly one-in-ten.
|
|
128
|
+
|
|
129
|
+
Raises:
|
|
130
|
+
ValueError: If *rate* is not in ``[0.0, 1.0]``.
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
def __init__(self, rate: float) -> None:
|
|
134
|
+
if not 0.0 <= rate <= 1.0:
|
|
135
|
+
raise ValueError(f"RatioSampler.rate must be in [0.0, 1.0], got {rate!r}")
|
|
136
|
+
self._rate = rate
|
|
137
|
+
# Threshold in [0, 2^64) — use the upper bound as an integer range.
|
|
138
|
+
self._threshold = int(rate * (2**64))
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def rate(self) -> float:
|
|
142
|
+
return self._rate
|
|
143
|
+
|
|
144
|
+
def should_sample(self, span_or_event: Any, cfg: Any) -> bool: # noqa: ARG002
|
|
145
|
+
if self._rate >= 1.0:
|
|
146
|
+
return True
|
|
147
|
+
if self._rate <= 0.0:
|
|
148
|
+
return False
|
|
149
|
+
trace_id = _get_trace_id(span_or_event)
|
|
150
|
+
if trace_id is None:
|
|
151
|
+
return True # no trace context — fall through to export
|
|
152
|
+
# SHA-256 of the trace_id for uniform distribution regardless of
|
|
153
|
+
# whether trace_id is a UUID, ULID, or 32-hex string.
|
|
154
|
+
digest = hashlib.sha256(trace_id.encode()).digest()
|
|
155
|
+
# Use first 8 bytes as a big-endian uint64.
|
|
156
|
+
value = int.from_bytes(digest[:8], "big")
|
|
157
|
+
return value < self._threshold
|
|
158
|
+
|
|
159
|
+
def __repr__(self) -> str:
|
|
160
|
+
return f"RatioSampler(rate={self._rate!r})"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
# Parent-based
|
|
165
|
+
# ---------------------------------------------------------------------------
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class ParentBasedSampler:
|
|
169
|
+
"""Honour the parent span's sampling decision; use ``root_sampler`` for roots.
|
|
170
|
+
|
|
171
|
+
This mirrors the OpenTelemetry ``ParentBased`` sampler spec so that the
|
|
172
|
+
entire trace follows a single consistent decision.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
root_sampler: The sampler to use for root spans (no parent).
|
|
176
|
+
Defaults to :class:`AlwaysOnSampler`.
|
|
177
|
+
remote_parent_sampled: Decision for remote-parent spans where the
|
|
178
|
+
parent *was* sampled. Defaults to ``True`` (always export).
|
|
179
|
+
remote_parent_not_sampled: Decision for remote-parent spans where the
|
|
180
|
+
parent was *not* sampled. Defaults to ``False`` (always drop).
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
def __init__(
|
|
184
|
+
self,
|
|
185
|
+
root_sampler: Any | None = None,
|
|
186
|
+
*,
|
|
187
|
+
remote_parent_sampled: bool = True,
|
|
188
|
+
remote_parent_not_sampled: bool = False,
|
|
189
|
+
) -> None:
|
|
190
|
+
self._root = root_sampler if root_sampler is not None else AlwaysOnSampler()
|
|
191
|
+
self._remote_sampled = remote_parent_sampled
|
|
192
|
+
self._remote_not_sampled = remote_parent_not_sampled
|
|
193
|
+
|
|
194
|
+
def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
|
|
195
|
+
# Check if there's an incoming traceparent (remote parent).
|
|
196
|
+
traceparent = getattr(span_or_event, "traceparent", None)
|
|
197
|
+
if traceparent is not None:
|
|
198
|
+
# Parse the trace-flags byte (last field of W3C traceparent).
|
|
199
|
+
# Format: 00-{trace_id}-{parent_id}-{flags}
|
|
200
|
+
try:
|
|
201
|
+
flags = int(traceparent.rsplit("-", 1)[-1], 16)
|
|
202
|
+
sampled_flag = bool(flags & 0x01)
|
|
203
|
+
except (ValueError, IndexError):
|
|
204
|
+
sampled_flag = False # conservative: corrupt flags → don't sample
|
|
205
|
+
return self._remote_sampled if sampled_flag else self._remote_not_sampled
|
|
206
|
+
|
|
207
|
+
# Check if there's a local parent span via spanforge's context stack.
|
|
208
|
+
parent_id = getattr(span_or_event, "parent_span_id", None)
|
|
209
|
+
if parent_id is not None:
|
|
210
|
+
# Local parent — honour the parent decision (keep the span since
|
|
211
|
+
# the parent was already sampled to get to this point).
|
|
212
|
+
return True
|
|
213
|
+
|
|
214
|
+
# Root span — delegate to root_sampler.
|
|
215
|
+
return self._root.should_sample(span_or_event, cfg)
|
|
216
|
+
|
|
217
|
+
def __repr__(self) -> str:
|
|
218
|
+
return (
|
|
219
|
+
f"ParentBasedSampler(root_sampler={self._root!r}, "
|
|
220
|
+
f"remote_parent_sampled={self._remote_sampled!r}, "
|
|
221
|
+
f"remote_parent_not_sampled={self._remote_not_sampled!r})"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# ---------------------------------------------------------------------------
|
|
226
|
+
# Rule-based
|
|
227
|
+
# ---------------------------------------------------------------------------
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class RuleBasedSampler:
|
|
231
|
+
"""Sample based on user-defined attribute rules.
|
|
232
|
+
|
|
233
|
+
Each rule is a ``dict`` mapping span attribute names to match values.
|
|
234
|
+
A rule matches when *all* specified attributes equal their target values
|
|
235
|
+
on the span. The first matching rule wins.
|
|
236
|
+
|
|
237
|
+
Rules list entries are dicts with keys:
|
|
238
|
+
|
|
239
|
+
* ``match``: ``dict[str, Any]`` — attribute → expected-value pairs.
|
|
240
|
+
* ``sample``: ``bool`` — whether to export when matched.
|
|
241
|
+
|
|
242
|
+
A default decision (``default``) applies when no rule matches.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
rules: Ordered list of rule dicts.
|
|
246
|
+
default: Sampling decision when no rule matches. Defaults to
|
|
247
|
+
``True`` (export everything by default).
|
|
248
|
+
|
|
249
|
+
Example::
|
|
250
|
+
|
|
251
|
+
sampler = RuleBasedSampler(
|
|
252
|
+
rules=[
|
|
253
|
+
{"match": {"span_name": "health_check"}, "sample": False},
|
|
254
|
+
{"match": {"operation": "chat", "model.name": "gpt-4o"}, "sample": True},
|
|
255
|
+
],
|
|
256
|
+
default=True,
|
|
257
|
+
)
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
def __init__(
|
|
261
|
+
self,
|
|
262
|
+
rules: list[dict[str, Any]] | None = None,
|
|
263
|
+
*,
|
|
264
|
+
default: bool = True,
|
|
265
|
+
) -> None:
|
|
266
|
+
self._rules: list[dict[str, Any]] = list(rules or [])
|
|
267
|
+
self._default = default
|
|
268
|
+
|
|
269
|
+
def should_sample(self, span_or_event: Any, cfg: Any) -> bool: # noqa: ARG002
|
|
270
|
+
for rule in self._rules:
|
|
271
|
+
match = rule.get("match", {})
|
|
272
|
+
decision = rule.get("sample", self._default)
|
|
273
|
+
if self._matches(span_or_event, match):
|
|
274
|
+
return bool(decision)
|
|
275
|
+
return self._default
|
|
276
|
+
|
|
277
|
+
@staticmethod
|
|
278
|
+
def _matches(obj: Any, match: dict[str, Any]) -> bool:
|
|
279
|
+
for key, expected in match.items():
|
|
280
|
+
# Support dotted attribute paths, e.g. "model.name".
|
|
281
|
+
parts = key.split(".", 1)
|
|
282
|
+
val = getattr(obj, parts[0], None)
|
|
283
|
+
if len(parts) == 2 and val is not None:
|
|
284
|
+
val = getattr(val, parts[1], None)
|
|
285
|
+
if val != expected:
|
|
286
|
+
return False
|
|
287
|
+
return True
|
|
288
|
+
|
|
289
|
+
def __repr__(self) -> str:
|
|
290
|
+
return f"RuleBasedSampler(rules={self._rules!r}, default={self._default!r})"
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
# ---------------------------------------------------------------------------
|
|
294
|
+
# Tail-based
|
|
295
|
+
# ---------------------------------------------------------------------------
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class TailBasedSampler:
|
|
299
|
+
"""Buffer spans and decide whether to export after the span ends.
|
|
300
|
+
|
|
301
|
+
Tail sampling inspects the *final* span state (e.g. error status, latency)
|
|
302
|
+
before making an export decision. This enables use cases like:
|
|
303
|
+
|
|
304
|
+
* Always export error spans.
|
|
305
|
+
* Always export spans with ``duration_ms > threshold``.
|
|
306
|
+
* Sample only the slow-path at a given rate.
|
|
307
|
+
|
|
308
|
+
Because decisions are made at ``on_end``, this sampler is designed to
|
|
309
|
+
work alongside :class:`~spanforge.processor.SpanProcessor`. The
|
|
310
|
+
:meth:`should_sample` method is called by the SDK just before export.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
always_sample_errors: If ``True``, spans with ``status == "error"``
|
|
314
|
+
are always exported regardless of other rules. (Default: ``True``)
|
|
315
|
+
always_sample_slow_ms: If set, spans with ``duration_ms >=`` this
|
|
316
|
+
value are always exported. (Default: ``None``)
|
|
317
|
+
fallback_sampler: Sampler used for spans that don't match the above
|
|
318
|
+
conditions. Defaults to :class:`AlwaysOnSampler`.
|
|
319
|
+
buffer_size: Maximum number of *pending* span decisions to hold in
|
|
320
|
+
memory. Oldest are evicted when the buffer is full.
|
|
321
|
+
(Default: 1 000)
|
|
322
|
+
|
|
323
|
+
Note:
|
|
324
|
+
This implementation makes the sampling decision at the time
|
|
325
|
+
:meth:`should_sample` is called (typically just before export).
|
|
326
|
+
The ``buffer_size`` parameter controls how many span IDs are tracked
|
|
327
|
+
to deduplicate decisions within a single process.
|
|
328
|
+
"""
|
|
329
|
+
|
|
330
|
+
def __init__(
|
|
331
|
+
self,
|
|
332
|
+
*,
|
|
333
|
+
always_sample_errors: bool = True,
|
|
334
|
+
always_sample_slow_ms: float | None = None,
|
|
335
|
+
fallback_sampler: Any | None = None,
|
|
336
|
+
) -> None:
|
|
337
|
+
self._always_errors = always_sample_errors
|
|
338
|
+
self._slow_ms = always_sample_slow_ms
|
|
339
|
+
self._fallback = fallback_sampler if fallback_sampler is not None else AlwaysOnSampler()
|
|
340
|
+
self._lock = threading.Lock()
|
|
341
|
+
|
|
342
|
+
def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
|
|
343
|
+
# Error spans — always sample.
|
|
344
|
+
if self._always_errors:
|
|
345
|
+
status = getattr(span_or_event, "status", None)
|
|
346
|
+
if isinstance(status, str) and status == "error":
|
|
347
|
+
return True
|
|
348
|
+
|
|
349
|
+
# Slow spans — always sample.
|
|
350
|
+
if self._slow_ms is not None:
|
|
351
|
+
duration = getattr(span_or_event, "duration_ms", None)
|
|
352
|
+
if isinstance(duration, (int, float)) and duration >= self._slow_ms:
|
|
353
|
+
return True
|
|
354
|
+
|
|
355
|
+
# Fallback sampler for normal spans.
|
|
356
|
+
return self._fallback.should_sample(span_or_event, cfg)
|
|
357
|
+
|
|
358
|
+
def __repr__(self) -> str:
|
|
359
|
+
return (
|
|
360
|
+
f"TailBasedSampler("
|
|
361
|
+
f"always_sample_errors={self._always_errors!r}, "
|
|
362
|
+
f"always_sample_slow_ms={self._slow_ms!r}, "
|
|
363
|
+
f"fallback_sampler={self._fallback!r})"
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
# ---------------------------------------------------------------------------
|
|
368
|
+
# Helpers
|
|
369
|
+
# ---------------------------------------------------------------------------
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _get_trace_id(obj: Any) -> str | None:
|
|
373
|
+
"""Extract trace_id from a Span or Event."""
|
|
374
|
+
# Direct attribute on Span.
|
|
375
|
+
tid = getattr(obj, "trace_id", None)
|
|
376
|
+
if isinstance(tid, str) and tid:
|
|
377
|
+
return tid
|
|
378
|
+
# Nested inside payload dict (Event.payload["trace_id"]).
|
|
379
|
+
payload = getattr(obj, "payload", None)
|
|
380
|
+
if isinstance(payload, dict):
|
|
381
|
+
tid = payload.get("trace_id")
|
|
382
|
+
if isinstance(tid, str) and tid:
|
|
383
|
+
return tid
|
|
384
|
+
return None
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def _get_event_type(obj: Any) -> str | None:
|
|
388
|
+
"""Extract event_type string from a Span or Event."""
|
|
389
|
+
et = getattr(obj, "event_type", None)
|
|
390
|
+
if et is not None:
|
|
391
|
+
return str(et)
|
|
392
|
+
return None
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
# ---------------------------------------------------------------------------
|
|
396
|
+
# Compliance-aware sampler (SF-16)
|
|
397
|
+
# ---------------------------------------------------------------------------
|
|
398
|
+
|
|
399
|
+
_DEFAULT_ALWAYS_RECORD: frozenset[str] = frozenset({
|
|
400
|
+
"llm.redact.",
|
|
401
|
+
"llm.audit.",
|
|
402
|
+
"llm.guard.",
|
|
403
|
+
"llm.cost.",
|
|
404
|
+
})
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
class ComplianceSampler:
|
|
408
|
+
"""Compliance-aware sampler that never drops critical event types.
|
|
409
|
+
|
|
410
|
+
Events whose ``event_type`` starts with any prefix in *always_record*
|
|
411
|
+
are always exported (100% recording). All other events are sampled
|
|
412
|
+
at *base_rate* using deterministic trace-ID-based hashing so entire
|
|
413
|
+
traces are kept or dropped together.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
base_rate: Fraction of non-compliance events to export (0.0–1.0).
|
|
417
|
+
always_record: Frozenset of event-type prefixes that bypass sampling.
|
|
418
|
+
Defaults to ``llm.redact.``, ``llm.audit.``, ``llm.guard.``,
|
|
419
|
+
``llm.cost.``.
|
|
420
|
+
|
|
421
|
+
Example::
|
|
422
|
+
|
|
423
|
+
sampler = ComplianceSampler(base_rate=0.1)
|
|
424
|
+
# llm.audit.* events → always recorded
|
|
425
|
+
# llm.trace.* events → ~10% recorded
|
|
426
|
+
"""
|
|
427
|
+
|
|
428
|
+
def __init__(
|
|
429
|
+
self,
|
|
430
|
+
base_rate: float = 0.1,
|
|
431
|
+
always_record: frozenset[str] | None = None,
|
|
432
|
+
) -> None:
|
|
433
|
+
if not 0.0 <= base_rate <= 1.0:
|
|
434
|
+
raise ValueError(f"ComplianceSampler.base_rate must be in [0.0, 1.0], got {base_rate!r}")
|
|
435
|
+
self._base_rate = base_rate
|
|
436
|
+
self._always_record = always_record if always_record is not None else _DEFAULT_ALWAYS_RECORD
|
|
437
|
+
self._threshold = int(base_rate * (2**64))
|
|
438
|
+
|
|
439
|
+
@property
|
|
440
|
+
def base_rate(self) -> float:
|
|
441
|
+
return self._base_rate
|
|
442
|
+
|
|
443
|
+
@property
|
|
444
|
+
def always_record(self) -> frozenset[str]:
|
|
445
|
+
return self._always_record
|
|
446
|
+
|
|
447
|
+
def should_sample(self, span_or_event: Any, cfg: Any) -> bool: # noqa: ARG002
|
|
448
|
+
# Check if bypass is active
|
|
449
|
+
if getattr(_bypass_active, "value", False):
|
|
450
|
+
return True
|
|
451
|
+
|
|
452
|
+
# Always record compliance-critical events
|
|
453
|
+
event_type = _get_event_type(span_or_event)
|
|
454
|
+
if event_type is not None:
|
|
455
|
+
for prefix in self._always_record:
|
|
456
|
+
if event_type.startswith(prefix):
|
|
457
|
+
return True
|
|
458
|
+
|
|
459
|
+
# Deterministic trace-ID-based sampling for other events
|
|
460
|
+
if self._base_rate >= 1.0:
|
|
461
|
+
return True
|
|
462
|
+
if self._base_rate <= 0.0:
|
|
463
|
+
return False
|
|
464
|
+
|
|
465
|
+
trace_id = _get_trace_id(span_or_event)
|
|
466
|
+
if trace_id is not None:
|
|
467
|
+
digest = hashlib.sha256(trace_id.encode()).digest()
|
|
468
|
+
value = int.from_bytes(digest[:8], "big")
|
|
469
|
+
return value < self._threshold
|
|
470
|
+
|
|
471
|
+
# No trace_id — fall back to random
|
|
472
|
+
return random.random() < self._base_rate # noqa: S311
|
|
473
|
+
|
|
474
|
+
def __repr__(self) -> str:
|
|
475
|
+
return f"ComplianceSampler(base_rate={self._base_rate!r})"
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
# ---------------------------------------------------------------------------
|
|
479
|
+
# Sampling bypass context manager (SF-16-D)
|
|
480
|
+
# ---------------------------------------------------------------------------
|
|
481
|
+
|
|
482
|
+
_bypass_active: threading.local = threading.local()
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
@contextlib.contextmanager
|
|
486
|
+
def bypass_sampling() -> Generator[None, None, None]:
|
|
487
|
+
"""Context manager that forces all sampling decisions to return ``True``.
|
|
488
|
+
|
|
489
|
+
Used by compliance report generation to ensure reports reflect the
|
|
490
|
+
complete audit trail, not the sampled subset::
|
|
491
|
+
|
|
492
|
+
with bypass_sampling():
|
|
493
|
+
package = engine.generate_evidence_package(...)
|
|
494
|
+
"""
|
|
495
|
+
prev = getattr(_bypass_active, "value", False)
|
|
496
|
+
_bypass_active.value = True
|
|
497
|
+
try:
|
|
498
|
+
yield
|
|
499
|
+
finally:
|
|
500
|
+
_bypass_active.value = prev
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://llm-toolkit-schema.dev/schemas/v1.0/schema.json",
|
|
4
|
+
"title": "llm-toolkit-schema Event Envelope",
|
|
5
|
+
"description": "Canonical JSON Schema for the llm-toolkit-schema v1.0 Event envelope. Every event emitted by a tool in the LLM Developer Toolkit must validate against this schema.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": [
|
|
8
|
+
"schema_version",
|
|
9
|
+
"event_id",
|
|
10
|
+
"event_type",
|
|
11
|
+
"timestamp",
|
|
12
|
+
"source",
|
|
13
|
+
"payload"
|
|
14
|
+
],
|
|
15
|
+
"additionalProperties": false,
|
|
16
|
+
"properties": {
|
|
17
|
+
"schema_version": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "Schema version. Accepted values: '1.0' and '2.0' (RFC-0001 §15.5).",
|
|
20
|
+
"enum": ["1.0", "2.0"],
|
|
21
|
+
"examples": ["1.0", "2.0"]
|
|
22
|
+
},
|
|
23
|
+
"event_id": {
|
|
24
|
+
"type": "string",
|
|
25
|
+
"description": "Universally unique ULID identifier for this event. 26-character Crockford Base32 string; first character MUST be in [0-7].",
|
|
26
|
+
"pattern": "^[0-7][0-9A-HJKMNP-TV-Z]{25}$",
|
|
27
|
+
"minLength": 26,
|
|
28
|
+
"maxLength": 26,
|
|
29
|
+
"examples": ["01HZ8G3EPRP1YF2QV70NMBE6J4"]
|
|
30
|
+
},
|
|
31
|
+
"event_type": {
|
|
32
|
+
"type": "string",
|
|
33
|
+
"description": "Dot-separated namespaced event type, e.g. 'llm.trace.span.completed'.",
|
|
34
|
+
"oneOf": [
|
|
35
|
+
{
|
|
36
|
+
"enum": [
|
|
37
|
+
"llm.trace.span.started",
|
|
38
|
+
"llm.trace.span.completed",
|
|
39
|
+
"llm.trace.span.failed",
|
|
40
|
+
"llm.trace.agent.step",
|
|
41
|
+
"llm.trace.agent.completed",
|
|
42
|
+
"llm.trace.reasoning.step",
|
|
43
|
+
"llm.cost.token.recorded",
|
|
44
|
+
"llm.cost.session.recorded",
|
|
45
|
+
"llm.cost.attributed",
|
|
46
|
+
"llm.cache.hit",
|
|
47
|
+
"llm.cache.miss",
|
|
48
|
+
"llm.cache.evicted",
|
|
49
|
+
"llm.cache.written",
|
|
50
|
+
"llm.eval.score.recorded",
|
|
51
|
+
"llm.eval.regression.detected",
|
|
52
|
+
"llm.eval.scenario.started",
|
|
53
|
+
"llm.eval.scenario.completed",
|
|
54
|
+
"llm.guard.input.blocked",
|
|
55
|
+
"llm.guard.input.passed",
|
|
56
|
+
"llm.guard.output.blocked",
|
|
57
|
+
"llm.guard.output.passed",
|
|
58
|
+
"llm.fence.validated",
|
|
59
|
+
"llm.fence.retry.triggered",
|
|
60
|
+
"llm.fence.max_retries.exceeded",
|
|
61
|
+
"llm.prompt.rendered",
|
|
62
|
+
"llm.prompt.template.loaded",
|
|
63
|
+
"llm.prompt.version.changed",
|
|
64
|
+
"llm.redact.pii.detected",
|
|
65
|
+
"llm.redact.phi.detected",
|
|
66
|
+
"llm.redact.applied",
|
|
67
|
+
"llm.diff.computed",
|
|
68
|
+
"llm.diff.regression.flagged",
|
|
69
|
+
"llm.template.registered",
|
|
70
|
+
"llm.template.variable.bound",
|
|
71
|
+
"llm.template.validation.failed",
|
|
72
|
+
"llm.audit.key.rotated"
|
|
73
|
+
]
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"pattern": "^(?!llm\\.)[a-z][a-z0-9-]*(?:\\.[a-z][a-z0-9-]*)+\\.[a-z][a-z0-9_]*\\.[a-z][a-z0-9_]*$"
|
|
77
|
+
}
|
|
78
|
+
],
|
|
79
|
+
"examples": [
|
|
80
|
+
"llm.trace.span.completed",
|
|
81
|
+
"llm.prompt.rendered",
|
|
82
|
+
"llm.cost.token.recorded"
|
|
83
|
+
]
|
|
84
|
+
},
|
|
85
|
+
"timestamp": {
|
|
86
|
+
"type": "string",
|
|
87
|
+
"description": "UTC ISO-8601 timestamp with microsecond precision (exactly 6 decimal places). Format: YYYY-MM-DDThh:mm:ss.ffffffZ.",
|
|
88
|
+
"pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{6}Z$",
|
|
89
|
+
"examples": ["2024-05-13T12:00:00.000000Z"]
|
|
90
|
+
},
|
|
91
|
+
"source": {
|
|
92
|
+
"type": "string",
|
|
93
|
+
"description": "Emitting tool in 'name@semver' format, e.g. 'my-agent@1.0.0'.",
|
|
94
|
+
"pattern": "^[a-zA-Z][a-zA-Z0-9._-]*@\\d+\\.\\d+\\.\\d+(?:[.-][a-zA-Z0-9.]+)?$",
|
|
95
|
+
"examples": ["llm-trace@0.3.1", "promptlock@1.0.0", "MyAgent@2.1.0-beta.1"]
|
|
96
|
+
},
|
|
97
|
+
"payload": {
|
|
98
|
+
"type": "object",
|
|
99
|
+
"description": "Tool-specific event data. Must contain at least one property.",
|
|
100
|
+
"minProperties": 1
|
|
101
|
+
},
|
|
102
|
+
"trace_id": {
|
|
103
|
+
"type": "string",
|
|
104
|
+
"description": "OpenTelemetry trace ID — exactly 32 lowercase hexadecimal characters.",
|
|
105
|
+
"pattern": "^[0-9a-f]{32}$",
|
|
106
|
+
"examples": ["4bf92f3577b34da6a3ce929d0e0e4736"]
|
|
107
|
+
},
|
|
108
|
+
"span_id": {
|
|
109
|
+
"type": "string",
|
|
110
|
+
"description": "OpenTelemetry span ID — exactly 16 lowercase hexadecimal characters.",
|
|
111
|
+
"pattern": "^[0-9a-f]{16}$",
|
|
112
|
+
"examples": ["00f067aa0ba902b7"]
|
|
113
|
+
},
|
|
114
|
+
"parent_span_id": {
|
|
115
|
+
"type": "string",
|
|
116
|
+
"description": "Parent span ID — exactly 16 lowercase hexadecimal characters.",
|
|
117
|
+
"pattern": "^[0-9a-f]{16}$"
|
|
118
|
+
},
|
|
119
|
+
"org_id": {
|
|
120
|
+
"type": "string",
|
|
121
|
+
"description": "Organisation identifier for multi-tenant deployments.",
|
|
122
|
+
"minLength": 1
|
|
123
|
+
},
|
|
124
|
+
"team_id": {
|
|
125
|
+
"type": "string",
|
|
126
|
+
"description": "Team identifier.",
|
|
127
|
+
"minLength": 1
|
|
128
|
+
},
|
|
129
|
+
"actor_id": {
|
|
130
|
+
"type": "string",
|
|
131
|
+
"description": "User or service-account identifier that triggered the event.",
|
|
132
|
+
"minLength": 1
|
|
133
|
+
},
|
|
134
|
+
"session_id": {
|
|
135
|
+
"type": "string",
|
|
136
|
+
"description": "Session identifier grouping a series of related events.",
|
|
137
|
+
"minLength": 1
|
|
138
|
+
},
|
|
139
|
+
"tags": {
|
|
140
|
+
"type": "object",
|
|
141
|
+
"description": "Arbitrary string key→value metadata attached to the event.",
|
|
142
|
+
"maxProperties": 50,
|
|
143
|
+
"propertyNames": {
|
|
144
|
+
"type": "string",
|
|
145
|
+
"minLength": 1
|
|
146
|
+
},
|
|
147
|
+
"additionalProperties": {
|
|
148
|
+
"type": "string",
|
|
149
|
+
"minLength": 1
|
|
150
|
+
}
|
|
151
|
+
},
|
|
152
|
+
"checksum": {
|
|
153
|
+
"type": "string",
|
|
154
|
+
"description": "SHA-256 digest of the canonical payload. Format: 'sha256:' + 64 lowercase hex characters. Set by sign().",
|
|
155
|
+
"pattern": "^sha256:[0-9a-f]{64}$"
|
|
156
|
+
},
|
|
157
|
+
"signature": {
|
|
158
|
+
"type": "string",
|
|
159
|
+
"description": "HMAC-SHA256 audit chain signature. Format: 'hmac-sha256:' + 64 lowercase hex characters. Set by sign().",
|
|
160
|
+
"pattern": "^hmac-sha256:[0-9a-f]{64}$"
|
|
161
|
+
},
|
|
162
|
+
"prev_id": {
|
|
163
|
+
"type": "string",
|
|
164
|
+
"description": "ULID of the previous event in an audit chain. First character MUST be in [0-7].",
|
|
165
|
+
"pattern": "^[0-7][0-9A-HJKMNP-TV-Z]{25}$",
|
|
166
|
+
"minLength": 26,
|
|
167
|
+
"maxLength": 26
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|