spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
spanforge/sampling.py ADDED
@@ -0,0 +1,500 @@
1
+ """spanforge.sampling — Sampling strategies for span/event emission.
2
+
3
+ Samplers decide **at observation time** whether a span or event should be
4
+ exported. They are composable: a :class:`ParentBasedSampler` delegates to a
5
+ root sampler for new traces and honours the parent's decision for child spans.
6
+
7
+ Configure via :func:`spanforge.configure`::
8
+
9
+ from spanforge import configure
10
+ from spanforge.sampling import RatioSampler, ParentBasedSampler
11
+
12
+ configure(sampler=ParentBasedSampler(root_sampler=RatioSampler(0.1)))
13
+
14
+ Built-in samplers
15
+ -----------------
16
+
17
+ ========================================= =====================================
18
+ Class Description
19
+ ========================================= =====================================
20
+ :class:`AlwaysOnSampler` Export every span (default).
21
+ :class:`AlwaysOffSampler` Drop every span.
22
+ :class:`RatioSampler` Probabilistic head-based sampling.
23
+ :class:`ParentBasedSampler` Honour parent trace flags; use
24
+ ``root_sampler`` for new traces.
25
+ :class:`RuleBasedSampler` Per-operation / per-model rules.
26
+ :class:`TailBasedSampler` Buffer spans, decide after span ends
27
+ (e.g. always keep errors).
28
+ ========================================= =====================================
29
+
30
+ Custom samplers
31
+ ---------------
32
+ Implement the :class:`Sampler` protocol::
33
+
34
+ class MySampler:
35
+ def should_sample(self, span_or_event, cfg) -> bool:
36
+ return True # or False
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import contextlib
42
+ import hashlib
43
+ import logging
44
+ import random
45
+ import threading
46
+ from typing import TYPE_CHECKING, Any, Generator, Protocol, runtime_checkable
47
+
48
+ if TYPE_CHECKING:
49
+ pass
50
+
51
+ __all__ = [
52
+ "AlwaysOffSampler",
53
+ "AlwaysOnSampler",
54
+ "ComplianceSampler",
55
+ "ParentBasedSampler",
56
+ "RatioSampler",
57
+ "RuleBasedSampler",
58
+ "Sampler",
59
+ "TailBasedSampler",
60
+ "bypass_sampling",
61
+ ]
62
+
63
+ _log = logging.getLogger("spanforge.sampling")
64
+
65
+
66
+ # ---------------------------------------------------------------------------
67
+ # Protocol
68
+ # ---------------------------------------------------------------------------
69
+
70
+
71
+ @runtime_checkable
72
+ class Sampler(Protocol):
73
+ """Protocol implemented by all samplers.
74
+
75
+ Args:
76
+ span_or_event: The :class:`~spanforge._span.Span` or
77
+ :class:`~spanforge.event.Event` being considered.
78
+ cfg: The active :class:`~spanforge.config.SpanForgeConfig`.
79
+
80
+ Returns:
81
+ ``True`` if the span/event should be exported, ``False`` to drop it.
82
+ """
83
+
84
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
85
+ """Return ``True`` to export, ``False`` to drop."""
86
+ ...
87
+
88
+
89
+ # ---------------------------------------------------------------------------
90
+ # Always-on / Always-off
91
+ # ---------------------------------------------------------------------------
92
+
93
+
94
+ class AlwaysOnSampler:
95
+ """Export every span. This is the SDK default when no sampler is set."""
96
+
97
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool: # noqa: ARG002
98
+ return True
99
+
100
+ def __repr__(self) -> str:
101
+ return "AlwaysOnSampler()"
102
+
103
+
104
+ class AlwaysOffSampler:
105
+ """Drop every span. Useful for completely silencing test code."""
106
+
107
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool: # noqa: ARG002
108
+ return False
109
+
110
+ def __repr__(self) -> str:
111
+ return "AlwaysOffSampler()"
112
+
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # Ratio / probabilistic
116
+ # ---------------------------------------------------------------------------
117
+
118
+
119
+ class RatioSampler:
120
+ """Probabilistic head-based sampler.
121
+
122
+ Makes a deterministic decision based on the span's ``trace_id`` so that
123
+ all spans in the same trace receive the *same* sampling decision.
124
+
125
+ Args:
126
+ rate: Fraction of traces to export. ``1.0`` exports all,
127
+ ``0.0`` exports none, ``0.1`` exports roughly one-in-ten.
128
+
129
+ Raises:
130
+ ValueError: If *rate* is not in ``[0.0, 1.0]``.
131
+ """
132
+
133
+ def __init__(self, rate: float) -> None:
134
+ if not 0.0 <= rate <= 1.0:
135
+ raise ValueError(f"RatioSampler.rate must be in [0.0, 1.0], got {rate!r}")
136
+ self._rate = rate
137
+ # Threshold in [0, 2^64) — use the upper bound as an integer range.
138
+ self._threshold = int(rate * (2**64))
139
+
140
+ @property
141
+ def rate(self) -> float:
142
+ return self._rate
143
+
144
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool: # noqa: ARG002
145
+ if self._rate >= 1.0:
146
+ return True
147
+ if self._rate <= 0.0:
148
+ return False
149
+ trace_id = _get_trace_id(span_or_event)
150
+ if trace_id is None:
151
+ return True # no trace context — fall through to export
152
+ # SHA-256 of the trace_id for uniform distribution regardless of
153
+ # whether trace_id is a UUID, ULID, or 32-hex string.
154
+ digest = hashlib.sha256(trace_id.encode()).digest()
155
+ # Use first 8 bytes as a big-endian uint64.
156
+ value = int.from_bytes(digest[:8], "big")
157
+ return value < self._threshold
158
+
159
+ def __repr__(self) -> str:
160
+ return f"RatioSampler(rate={self._rate!r})"
161
+
162
+
163
+ # ---------------------------------------------------------------------------
164
+ # Parent-based
165
+ # ---------------------------------------------------------------------------
166
+
167
+
168
+ class ParentBasedSampler:
169
+ """Honour the parent span's sampling decision; use ``root_sampler`` for roots.
170
+
171
+ This mirrors the OpenTelemetry ``ParentBased`` sampler spec so that the
172
+ entire trace follows a single consistent decision.
173
+
174
+ Args:
175
+ root_sampler: The sampler to use for root spans (no parent).
176
+ Defaults to :class:`AlwaysOnSampler`.
177
+ remote_parent_sampled: Decision for remote-parent spans where the
178
+ parent *was* sampled. Defaults to ``True`` (always export).
179
+ remote_parent_not_sampled: Decision for remote-parent spans where the
180
+ parent was *not* sampled. Defaults to ``False`` (always drop).
181
+ """
182
+
183
+ def __init__(
184
+ self,
185
+ root_sampler: Any | None = None,
186
+ *,
187
+ remote_parent_sampled: bool = True,
188
+ remote_parent_not_sampled: bool = False,
189
+ ) -> None:
190
+ self._root = root_sampler if root_sampler is not None else AlwaysOnSampler()
191
+ self._remote_sampled = remote_parent_sampled
192
+ self._remote_not_sampled = remote_parent_not_sampled
193
+
194
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
195
+ # Check if there's an incoming traceparent (remote parent).
196
+ traceparent = getattr(span_or_event, "traceparent", None)
197
+ if traceparent is not None:
198
+ # Parse the trace-flags byte (last field of W3C traceparent).
199
+ # Format: 00-{trace_id}-{parent_id}-{flags}
200
+ try:
201
+ flags = int(traceparent.rsplit("-", 1)[-1], 16)
202
+ sampled_flag = bool(flags & 0x01)
203
+ except (ValueError, IndexError):
204
+ sampled_flag = False # conservative: corrupt flags → don't sample
205
+ return self._remote_sampled if sampled_flag else self._remote_not_sampled
206
+
207
+ # Check if there's a local parent span via spanforge's context stack.
208
+ parent_id = getattr(span_or_event, "parent_span_id", None)
209
+ if parent_id is not None:
210
+ # Local parent — honour the parent decision (keep the span since
211
+ # the parent was already sampled to get to this point).
212
+ return True
213
+
214
+ # Root span — delegate to root_sampler.
215
+ return self._root.should_sample(span_or_event, cfg)
216
+
217
+ def __repr__(self) -> str:
218
+ return (
219
+ f"ParentBasedSampler(root_sampler={self._root!r}, "
220
+ f"remote_parent_sampled={self._remote_sampled!r}, "
221
+ f"remote_parent_not_sampled={self._remote_not_sampled!r})"
222
+ )
223
+
224
+
225
+ # ---------------------------------------------------------------------------
226
+ # Rule-based
227
+ # ---------------------------------------------------------------------------
228
+
229
+
230
+ class RuleBasedSampler:
231
+ """Sample based on user-defined attribute rules.
232
+
233
+ Each rule is a ``dict`` mapping span attribute names to match values.
234
+ A rule matches when *all* specified attributes equal their target values
235
+ on the span. The first matching rule wins.
236
+
237
+ Rules list entries are dicts with keys:
238
+
239
+ * ``match``: ``dict[str, Any]`` — attribute → expected-value pairs.
240
+ * ``sample``: ``bool`` — whether to export when matched.
241
+
242
+ A default decision (``default``) applies when no rule matches.
243
+
244
+ Args:
245
+ rules: Ordered list of rule dicts.
246
+ default: Sampling decision when no rule matches. Defaults to
247
+ ``True`` (export everything by default).
248
+
249
+ Example::
250
+
251
+ sampler = RuleBasedSampler(
252
+ rules=[
253
+ {"match": {"span_name": "health_check"}, "sample": False},
254
+ {"match": {"operation": "chat", "model.name": "gpt-4o"}, "sample": True},
255
+ ],
256
+ default=True,
257
+ )
258
+ """
259
+
260
+ def __init__(
261
+ self,
262
+ rules: list[dict[str, Any]] | None = None,
263
+ *,
264
+ default: bool = True,
265
+ ) -> None:
266
+ self._rules: list[dict[str, Any]] = list(rules or [])
267
+ self._default = default
268
+
269
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool: # noqa: ARG002
270
+ for rule in self._rules:
271
+ match = rule.get("match", {})
272
+ decision = rule.get("sample", self._default)
273
+ if self._matches(span_or_event, match):
274
+ return bool(decision)
275
+ return self._default
276
+
277
+ @staticmethod
278
+ def _matches(obj: Any, match: dict[str, Any]) -> bool:
279
+ for key, expected in match.items():
280
+ # Support dotted attribute paths, e.g. "model.name".
281
+ parts = key.split(".", 1)
282
+ val = getattr(obj, parts[0], None)
283
+ if len(parts) == 2 and val is not None:
284
+ val = getattr(val, parts[1], None)
285
+ if val != expected:
286
+ return False
287
+ return True
288
+
289
+ def __repr__(self) -> str:
290
+ return f"RuleBasedSampler(rules={self._rules!r}, default={self._default!r})"
291
+
292
+
293
+ # ---------------------------------------------------------------------------
294
+ # Tail-based
295
+ # ---------------------------------------------------------------------------
296
+
297
+
298
+ class TailBasedSampler:
299
+ """Buffer spans and decide whether to export after the span ends.
300
+
301
+ Tail sampling inspects the *final* span state (e.g. error status, latency)
302
+ before making an export decision. This enables use cases like:
303
+
304
+ * Always export error spans.
305
+ * Always export spans with ``duration_ms > threshold``.
306
+ * Sample only the slow-path at a given rate.
307
+
308
+ Because decisions are made at ``on_end``, this sampler is designed to
309
+ work alongside :class:`~spanforge.processor.SpanProcessor`. The
310
+ :meth:`should_sample` method is called by the SDK just before export.
311
+
312
+ Args:
313
+ always_sample_errors: If ``True``, spans with ``status == "error"``
314
+ are always exported regardless of other rules. (Default: ``True``)
315
+ always_sample_slow_ms: If set, spans with ``duration_ms >=`` this
316
+ value are always exported. (Default: ``None``)
317
+ fallback_sampler: Sampler used for spans that don't match the above
318
+ conditions. Defaults to :class:`AlwaysOnSampler`.
319
+ buffer_size: Maximum number of *pending* span decisions to hold in
320
+ memory. Oldest are evicted when the buffer is full.
321
+ (Default: 1 000)
322
+
323
+ Note:
324
+ This implementation makes the sampling decision at the time
325
+ :meth:`should_sample` is called (typically just before export).
326
+ The ``buffer_size`` parameter controls how many span IDs are tracked
327
+ to deduplicate decisions within a single process.
328
+ """
329
+
330
+ def __init__(
331
+ self,
332
+ *,
333
+ always_sample_errors: bool = True,
334
+ always_sample_slow_ms: float | None = None,
335
+ fallback_sampler: Any | None = None,
336
+ ) -> None:
337
+ self._always_errors = always_sample_errors
338
+ self._slow_ms = always_sample_slow_ms
339
+ self._fallback = fallback_sampler if fallback_sampler is not None else AlwaysOnSampler()
340
+ self._lock = threading.Lock()
341
+
342
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
343
+ # Error spans — always sample.
344
+ if self._always_errors:
345
+ status = getattr(span_or_event, "status", None)
346
+ if isinstance(status, str) and status == "error":
347
+ return True
348
+
349
+ # Slow spans — always sample.
350
+ if self._slow_ms is not None:
351
+ duration = getattr(span_or_event, "duration_ms", None)
352
+ if isinstance(duration, (int, float)) and duration >= self._slow_ms:
353
+ return True
354
+
355
+ # Fallback sampler for normal spans.
356
+ return self._fallback.should_sample(span_or_event, cfg)
357
+
358
+ def __repr__(self) -> str:
359
+ return (
360
+ f"TailBasedSampler("
361
+ f"always_sample_errors={self._always_errors!r}, "
362
+ f"always_sample_slow_ms={self._slow_ms!r}, "
363
+ f"fallback_sampler={self._fallback!r})"
364
+ )
365
+
366
+
367
+ # ---------------------------------------------------------------------------
368
+ # Helpers
369
+ # ---------------------------------------------------------------------------
370
+
371
+
372
+ def _get_trace_id(obj: Any) -> str | None:
373
+ """Extract trace_id from a Span or Event."""
374
+ # Direct attribute on Span.
375
+ tid = getattr(obj, "trace_id", None)
376
+ if isinstance(tid, str) and tid:
377
+ return tid
378
+ # Nested inside payload dict (Event.payload["trace_id"]).
379
+ payload = getattr(obj, "payload", None)
380
+ if isinstance(payload, dict):
381
+ tid = payload.get("trace_id")
382
+ if isinstance(tid, str) and tid:
383
+ return tid
384
+ return None
385
+
386
+
387
+ def _get_event_type(obj: Any) -> str | None:
388
+ """Extract event_type string from a Span or Event."""
389
+ et = getattr(obj, "event_type", None)
390
+ if et is not None:
391
+ return str(et)
392
+ return None
393
+
394
+
395
+ # ---------------------------------------------------------------------------
396
+ # Compliance-aware sampler (SF-16)
397
+ # ---------------------------------------------------------------------------
398
+
399
+ _DEFAULT_ALWAYS_RECORD: frozenset[str] = frozenset({
400
+ "llm.redact.",
401
+ "llm.audit.",
402
+ "llm.guard.",
403
+ "llm.cost.",
404
+ })
405
+
406
+
407
+ class ComplianceSampler:
408
+ """Compliance-aware sampler that never drops critical event types.
409
+
410
+ Events whose ``event_type`` starts with any prefix in *always_record*
411
+ are always exported (100% recording). All other events are sampled
412
+ at *base_rate* using deterministic trace-ID-based hashing so entire
413
+ traces are kept or dropped together.
414
+
415
+ Args:
416
+ base_rate: Fraction of non-compliance events to export (0.0–1.0).
417
+ always_record: Frozenset of event-type prefixes that bypass sampling.
418
+ Defaults to ``llm.redact.``, ``llm.audit.``, ``llm.guard.``,
419
+ ``llm.cost.``.
420
+
421
+ Example::
422
+
423
+ sampler = ComplianceSampler(base_rate=0.1)
424
+ # llm.audit.* events → always recorded
425
+ # llm.trace.* events → ~10% recorded
426
+ """
427
+
428
+ def __init__(
429
+ self,
430
+ base_rate: float = 0.1,
431
+ always_record: frozenset[str] | None = None,
432
+ ) -> None:
433
+ if not 0.0 <= base_rate <= 1.0:
434
+ raise ValueError(f"ComplianceSampler.base_rate must be in [0.0, 1.0], got {base_rate!r}")
435
+ self._base_rate = base_rate
436
+ self._always_record = always_record if always_record is not None else _DEFAULT_ALWAYS_RECORD
437
+ self._threshold = int(base_rate * (2**64))
438
+
439
+ @property
440
+ def base_rate(self) -> float:
441
+ return self._base_rate
442
+
443
+ @property
444
+ def always_record(self) -> frozenset[str]:
445
+ return self._always_record
446
+
447
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool: # noqa: ARG002
448
+ # Check if bypass is active
449
+ if getattr(_bypass_active, "value", False):
450
+ return True
451
+
452
+ # Always record compliance-critical events
453
+ event_type = _get_event_type(span_or_event)
454
+ if event_type is not None:
455
+ for prefix in self._always_record:
456
+ if event_type.startswith(prefix):
457
+ return True
458
+
459
+ # Deterministic trace-ID-based sampling for other events
460
+ if self._base_rate >= 1.0:
461
+ return True
462
+ if self._base_rate <= 0.0:
463
+ return False
464
+
465
+ trace_id = _get_trace_id(span_or_event)
466
+ if trace_id is not None:
467
+ digest = hashlib.sha256(trace_id.encode()).digest()
468
+ value = int.from_bytes(digest[:8], "big")
469
+ return value < self._threshold
470
+
471
+ # No trace_id — fall back to random
472
+ return random.random() < self._base_rate # noqa: S311
473
+
474
+ def __repr__(self) -> str:
475
+ return f"ComplianceSampler(base_rate={self._base_rate!r})"
476
+
477
+
478
+ # ---------------------------------------------------------------------------
479
+ # Sampling bypass context manager (SF-16-D)
480
+ # ---------------------------------------------------------------------------
481
+
482
+ _bypass_active: threading.local = threading.local()
483
+
484
+
485
+ @contextlib.contextmanager
486
+ def bypass_sampling() -> Generator[None, None, None]:
487
+ """Context manager that forces all sampling decisions to return ``True``.
488
+
489
+ Used by compliance report generation to ensure reports reflect the
490
+ complete audit trail, not the sampled subset::
491
+
492
+ with bypass_sampling():
493
+ package = engine.generate_evidence_package(...)
494
+ """
495
+ prev = getattr(_bypass_active, "value", False)
496
+ _bypass_active.value = True
497
+ try:
498
+ yield
499
+ finally:
500
+ _bypass_active.value = prev
@@ -0,0 +1,170 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://llm-toolkit-schema.dev/schemas/v1.0/schema.json",
4
+ "title": "llm-toolkit-schema Event Envelope",
5
+ "description": "Canonical JSON Schema for the llm-toolkit-schema v1.0 Event envelope. Every event emitted by a tool in the LLM Developer Toolkit must validate against this schema.",
6
+ "type": "object",
7
+ "required": [
8
+ "schema_version",
9
+ "event_id",
10
+ "event_type",
11
+ "timestamp",
12
+ "source",
13
+ "payload"
14
+ ],
15
+ "additionalProperties": false,
16
+ "properties": {
17
+ "schema_version": {
18
+ "type": "string",
19
+ "description": "Schema version. Accepted values: '1.0' and '2.0' (RFC-0001 §15.5).",
20
+ "enum": ["1.0", "2.0"],
21
+ "examples": ["1.0", "2.0"]
22
+ },
23
+ "event_id": {
24
+ "type": "string",
25
+ "description": "Universally unique ULID identifier for this event. 26-character Crockford Base32 string; first character MUST be in [0-7].",
26
+ "pattern": "^[0-7][0-9A-HJKMNP-TV-Z]{25}$",
27
+ "minLength": 26,
28
+ "maxLength": 26,
29
+ "examples": ["01HZ8G3EPRP1YF2QV70NMBE6J4"]
30
+ },
31
+ "event_type": {
32
+ "type": "string",
33
+ "description": "Dot-separated namespaced event type, e.g. 'llm.trace.span.completed'.",
34
+ "oneOf": [
35
+ {
36
+ "enum": [
37
+ "llm.trace.span.started",
38
+ "llm.trace.span.completed",
39
+ "llm.trace.span.failed",
40
+ "llm.trace.agent.step",
41
+ "llm.trace.agent.completed",
42
+ "llm.trace.reasoning.step",
43
+ "llm.cost.token.recorded",
44
+ "llm.cost.session.recorded",
45
+ "llm.cost.attributed",
46
+ "llm.cache.hit",
47
+ "llm.cache.miss",
48
+ "llm.cache.evicted",
49
+ "llm.cache.written",
50
+ "llm.eval.score.recorded",
51
+ "llm.eval.regression.detected",
52
+ "llm.eval.scenario.started",
53
+ "llm.eval.scenario.completed",
54
+ "llm.guard.input.blocked",
55
+ "llm.guard.input.passed",
56
+ "llm.guard.output.blocked",
57
+ "llm.guard.output.passed",
58
+ "llm.fence.validated",
59
+ "llm.fence.retry.triggered",
60
+ "llm.fence.max_retries.exceeded",
61
+ "llm.prompt.rendered",
62
+ "llm.prompt.template.loaded",
63
+ "llm.prompt.version.changed",
64
+ "llm.redact.pii.detected",
65
+ "llm.redact.phi.detected",
66
+ "llm.redact.applied",
67
+ "llm.diff.computed",
68
+ "llm.diff.regression.flagged",
69
+ "llm.template.registered",
70
+ "llm.template.variable.bound",
71
+ "llm.template.validation.failed",
72
+ "llm.audit.key.rotated"
73
+ ]
74
+ },
75
+ {
76
+ "pattern": "^(?!llm\\.)[a-z][a-z0-9-]*(?:\\.[a-z][a-z0-9-]*)+\\.[a-z][a-z0-9_]*\\.[a-z][a-z0-9_]*$"
77
+ }
78
+ ],
79
+ "examples": [
80
+ "llm.trace.span.completed",
81
+ "llm.prompt.rendered",
82
+ "llm.cost.token.recorded"
83
+ ]
84
+ },
85
+ "timestamp": {
86
+ "type": "string",
87
+ "description": "UTC ISO-8601 timestamp with microsecond precision (exactly 6 decimal places). Format: YYYY-MM-DDThh:mm:ss.ffffffZ.",
88
+ "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{6}Z$",
89
+ "examples": ["2024-05-13T12:00:00.000000Z"]
90
+ },
91
+ "source": {
92
+ "type": "string",
93
+ "description": "Emitting tool in 'name@semver' format, e.g. 'my-agent@1.0.0'.",
94
+ "pattern": "^[a-zA-Z][a-zA-Z0-9._-]*@\\d+\\.\\d+\\.\\d+(?:[.-][a-zA-Z0-9.]+)?$",
95
+ "examples": ["llm-trace@0.3.1", "promptlock@1.0.0", "MyAgent@2.1.0-beta.1"]
96
+ },
97
+ "payload": {
98
+ "type": "object",
99
+ "description": "Tool-specific event data. Must contain at least one property.",
100
+ "minProperties": 1
101
+ },
102
+ "trace_id": {
103
+ "type": "string",
104
+ "description": "OpenTelemetry trace ID — exactly 32 lowercase hexadecimal characters.",
105
+ "pattern": "^[0-9a-f]{32}$",
106
+ "examples": ["4bf92f3577b34da6a3ce929d0e0e4736"]
107
+ },
108
+ "span_id": {
109
+ "type": "string",
110
+ "description": "OpenTelemetry span ID — exactly 16 lowercase hexadecimal characters.",
111
+ "pattern": "^[0-9a-f]{16}$",
112
+ "examples": ["00f067aa0ba902b7"]
113
+ },
114
+ "parent_span_id": {
115
+ "type": "string",
116
+ "description": "Parent span ID — exactly 16 lowercase hexadecimal characters.",
117
+ "pattern": "^[0-9a-f]{16}$"
118
+ },
119
+ "org_id": {
120
+ "type": "string",
121
+ "description": "Organisation identifier for multi-tenant deployments.",
122
+ "minLength": 1
123
+ },
124
+ "team_id": {
125
+ "type": "string",
126
+ "description": "Team identifier.",
127
+ "minLength": 1
128
+ },
129
+ "actor_id": {
130
+ "type": "string",
131
+ "description": "User or service-account identifier that triggered the event.",
132
+ "minLength": 1
133
+ },
134
+ "session_id": {
135
+ "type": "string",
136
+ "description": "Session identifier grouping a series of related events.",
137
+ "minLength": 1
138
+ },
139
+ "tags": {
140
+ "type": "object",
141
+ "description": "Arbitrary string key→value metadata attached to the event.",
142
+ "maxProperties": 50,
143
+ "propertyNames": {
144
+ "type": "string",
145
+ "minLength": 1
146
+ },
147
+ "additionalProperties": {
148
+ "type": "string",
149
+ "minLength": 1
150
+ }
151
+ },
152
+ "checksum": {
153
+ "type": "string",
154
+ "description": "SHA-256 digest of the canonical payload. Format: 'sha256:' + 64 lowercase hex characters. Set by sign().",
155
+ "pattern": "^sha256:[0-9a-f]{64}$"
156
+ },
157
+ "signature": {
158
+ "type": "string",
159
+ "description": "HMAC-SHA256 audit chain signature. Format: 'hmac-sha256:' + 64 lowercase hex characters. Set by sign().",
160
+ "pattern": "^hmac-sha256:[0-9a-f]{64}$"
161
+ },
162
+ "prev_id": {
163
+ "type": "string",
164
+ "description": "ULID of the previous event in an audit chain. First character MUST be in [0-7].",
165
+ "pattern": "^[0-7][0-9A-HJKMNP-TV-Z]{25}$",
166
+ "minLength": 26,
167
+ "maxLength": 26
168
+ }
169
+ }
170
+ }