spanforge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. spanforge/__init__.py +815 -0
  2. spanforge/_ansi.py +93 -0
  3. spanforge/_batch_exporter.py +409 -0
  4. spanforge/_cli.py +2094 -0
  5. spanforge/_cli_audit.py +639 -0
  6. spanforge/_cli_compliance.py +711 -0
  7. spanforge/_cli_cost.py +243 -0
  8. spanforge/_cli_ops.py +791 -0
  9. spanforge/_cli_phase11.py +356 -0
  10. spanforge/_hooks.py +337 -0
  11. spanforge/_server.py +1708 -0
  12. spanforge/_span.py +1036 -0
  13. spanforge/_store.py +288 -0
  14. spanforge/_stream.py +664 -0
  15. spanforge/_trace.py +335 -0
  16. spanforge/_tracer.py +254 -0
  17. spanforge/actor.py +141 -0
  18. spanforge/alerts.py +469 -0
  19. spanforge/auto.py +464 -0
  20. spanforge/baseline.py +335 -0
  21. spanforge/cache.py +635 -0
  22. spanforge/compliance.py +325 -0
  23. spanforge/config.py +532 -0
  24. spanforge/consent.py +228 -0
  25. spanforge/consumer.py +377 -0
  26. spanforge/core/__init__.py +5 -0
  27. spanforge/core/compliance_mapping.py +1254 -0
  28. spanforge/cost.py +600 -0
  29. spanforge/debug.py +548 -0
  30. spanforge/deprecations.py +205 -0
  31. spanforge/drift.py +482 -0
  32. spanforge/egress.py +58 -0
  33. spanforge/eval.py +648 -0
  34. spanforge/event.py +1064 -0
  35. spanforge/exceptions.py +240 -0
  36. spanforge/explain.py +178 -0
  37. spanforge/export/__init__.py +69 -0
  38. spanforge/export/append_only.py +337 -0
  39. spanforge/export/cloud.py +357 -0
  40. spanforge/export/datadog.py +497 -0
  41. spanforge/export/grafana.py +320 -0
  42. spanforge/export/jsonl.py +195 -0
  43. spanforge/export/openinference.py +158 -0
  44. spanforge/export/otel_bridge.py +294 -0
  45. spanforge/export/otlp.py +811 -0
  46. spanforge/export/otlp_bridge.py +233 -0
  47. spanforge/export/redis_backend.py +282 -0
  48. spanforge/export/siem_schema.py +98 -0
  49. spanforge/export/siem_splunk.py +264 -0
  50. spanforge/export/siem_syslog.py +212 -0
  51. spanforge/export/webhook.py +299 -0
  52. spanforge/exporters/__init__.py +30 -0
  53. spanforge/exporters/console.py +271 -0
  54. spanforge/exporters/jsonl.py +144 -0
  55. spanforge/exporters/sqlite.py +142 -0
  56. spanforge/gate.py +1150 -0
  57. spanforge/governance.py +181 -0
  58. spanforge/hitl.py +295 -0
  59. spanforge/http.py +187 -0
  60. spanforge/inspect.py +427 -0
  61. spanforge/integrations/__init__.py +45 -0
  62. spanforge/integrations/_pricing.py +280 -0
  63. spanforge/integrations/anthropic.py +388 -0
  64. spanforge/integrations/azure_openai.py +133 -0
  65. spanforge/integrations/bedrock.py +292 -0
  66. spanforge/integrations/crewai.py +251 -0
  67. spanforge/integrations/gemini.py +351 -0
  68. spanforge/integrations/groq.py +442 -0
  69. spanforge/integrations/langchain.py +349 -0
  70. spanforge/integrations/langgraph.py +306 -0
  71. spanforge/integrations/llamaindex.py +373 -0
  72. spanforge/integrations/ollama.py +287 -0
  73. spanforge/integrations/openai.py +368 -0
  74. spanforge/integrations/together.py +483 -0
  75. spanforge/io.py +214 -0
  76. spanforge/lint.py +322 -0
  77. spanforge/metrics.py +417 -0
  78. spanforge/metrics_export.py +343 -0
  79. spanforge/migrate.py +402 -0
  80. spanforge/model_registry.py +278 -0
  81. spanforge/models.py +389 -0
  82. spanforge/namespaces/__init__.py +254 -0
  83. spanforge/namespaces/audit.py +256 -0
  84. spanforge/namespaces/cache.py +237 -0
  85. spanforge/namespaces/chain.py +77 -0
  86. spanforge/namespaces/confidence.py +72 -0
  87. spanforge/namespaces/consent.py +92 -0
  88. spanforge/namespaces/cost.py +179 -0
  89. spanforge/namespaces/decision.py +143 -0
  90. spanforge/namespaces/diff.py +157 -0
  91. spanforge/namespaces/drift.py +80 -0
  92. spanforge/namespaces/eval_.py +251 -0
  93. spanforge/namespaces/feedback.py +241 -0
  94. spanforge/namespaces/fence.py +193 -0
  95. spanforge/namespaces/guard.py +105 -0
  96. spanforge/namespaces/hitl.py +91 -0
  97. spanforge/namespaces/latency.py +72 -0
  98. spanforge/namespaces/prompt.py +190 -0
  99. spanforge/namespaces/redact.py +173 -0
  100. spanforge/namespaces/retrieval.py +379 -0
  101. spanforge/namespaces/runtime_governance.py +494 -0
  102. spanforge/namespaces/template.py +208 -0
  103. spanforge/namespaces/tool_call.py +77 -0
  104. spanforge/namespaces/trace.py +1029 -0
  105. spanforge/normalizer.py +171 -0
  106. spanforge/plugins.py +82 -0
  107. spanforge/presidio_backend.py +349 -0
  108. spanforge/processor.py +258 -0
  109. spanforge/prompt_registry.py +418 -0
  110. spanforge/py.typed +0 -0
  111. spanforge/redact.py +914 -0
  112. spanforge/regression.py +192 -0
  113. spanforge/runtime_policy.py +159 -0
  114. spanforge/sampling.py +511 -0
  115. spanforge/schema.py +183 -0
  116. spanforge/schemas/v1.0/schema.json +170 -0
  117. spanforge/schemas/v2.0/schema.json +536 -0
  118. spanforge/sdk/__init__.py +625 -0
  119. spanforge/sdk/_base.py +584 -0
  120. spanforge/sdk/_base.pyi +71 -0
  121. spanforge/sdk/_exceptions.py +1096 -0
  122. spanforge/sdk/_types.py +2184 -0
  123. spanforge/sdk/alert.py +1514 -0
  124. spanforge/sdk/alert.pyi +56 -0
  125. spanforge/sdk/audit.py +1196 -0
  126. spanforge/sdk/audit.pyi +67 -0
  127. spanforge/sdk/cec.py +1215 -0
  128. spanforge/sdk/cec.pyi +37 -0
  129. spanforge/sdk/config.py +641 -0
  130. spanforge/sdk/config.pyi +55 -0
  131. spanforge/sdk/enterprise.py +714 -0
  132. spanforge/sdk/enterprise.pyi +79 -0
  133. spanforge/sdk/explain.py +170 -0
  134. spanforge/sdk/fallback.py +432 -0
  135. spanforge/sdk/feedback.py +351 -0
  136. spanforge/sdk/gate.py +874 -0
  137. spanforge/sdk/gate.pyi +51 -0
  138. spanforge/sdk/identity.py +2114 -0
  139. spanforge/sdk/identity.pyi +47 -0
  140. spanforge/sdk/lineage.py +175 -0
  141. spanforge/sdk/observe.py +1065 -0
  142. spanforge/sdk/observe.pyi +50 -0
  143. spanforge/sdk/operator.py +338 -0
  144. spanforge/sdk/pii.py +1473 -0
  145. spanforge/sdk/pii.pyi +119 -0
  146. spanforge/sdk/pipelines.py +458 -0
  147. spanforge/sdk/pipelines.pyi +39 -0
  148. spanforge/sdk/policy.py +930 -0
  149. spanforge/sdk/rag.py +594 -0
  150. spanforge/sdk/rbac.py +280 -0
  151. spanforge/sdk/registry.py +430 -0
  152. spanforge/sdk/registry.pyi +46 -0
  153. spanforge/sdk/scope.py +279 -0
  154. spanforge/sdk/secrets.py +293 -0
  155. spanforge/sdk/secrets.pyi +25 -0
  156. spanforge/sdk/security.py +560 -0
  157. spanforge/sdk/security.pyi +57 -0
  158. spanforge/sdk/trust.py +472 -0
  159. spanforge/sdk/trust.pyi +41 -0
  160. spanforge/secrets.py +799 -0
  161. spanforge/signing.py +1179 -0
  162. spanforge/stats.py +100 -0
  163. spanforge/stream.py +560 -0
  164. spanforge/testing.py +378 -0
  165. spanforge/testing_mocks.py +1052 -0
  166. spanforge/trace.py +199 -0
  167. spanforge/types.py +696 -0
  168. spanforge/ulid.py +300 -0
  169. spanforge/validate.py +379 -0
  170. spanforge-1.0.0.dist-info/METADATA +1509 -0
  171. spanforge-1.0.0.dist-info/RECORD +174 -0
  172. spanforge-1.0.0.dist-info/WHEEL +4 -0
  173. spanforge-1.0.0.dist-info/entry_points.txt +5 -0
  174. spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/sampling.py ADDED
@@ -0,0 +1,511 @@
1
+ """spanforge.sampling — Sampling strategies for span/event emission.
2
+
3
+ Samplers decide **at observation time** whether a span or event should be
4
+ exported. They are composable: a :class:`ParentBasedSampler` delegates to a
5
+ root sampler for new traces and honours the parent's decision for child spans.
6
+
7
+ Configure via :func:`spanforge.configure`::
8
+
9
+ from spanforge import configure
10
+ from spanforge.sampling import RatioSampler, ParentBasedSampler
11
+
12
+ configure(sampler=ParentBasedSampler(root_sampler=RatioSampler(0.1)))
13
+
14
+ Built-in samplers
15
+ -----------------
16
+
17
+ ========================================= =====================================
18
+ Class Description
19
+ ========================================= =====================================
20
+ :class:`AlwaysOnSampler` Export every span (default).
21
+ :class:`AlwaysOffSampler` Drop every span.
22
+ :class:`RatioSampler` Probabilistic head-based sampling.
23
+ :class:`ParentBasedSampler` Honour parent trace flags; use
24
+ ``root_sampler`` for new traces.
25
+ :class:`RuleBasedSampler` Per-operation / per-model rules.
26
+ :class:`TailBasedSampler` Buffer spans, decide after span ends
27
+ (e.g. always keep errors).
28
+ ========================================= =====================================
29
+
30
+ Custom samplers
31
+ ---------------
32
+ Implement the :class:`Sampler` protocol::
33
+
34
+ class MySampler:
35
+ def should_sample(self, span_or_event, cfg) -> bool:
36
+ return True # or False
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import contextlib
42
+ import hashlib
43
+ import logging
44
+ import secrets
45
+ import threading
46
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
47
+
48
+ if TYPE_CHECKING:
49
+ from collections.abc import Generator
50
+
51
+ __all__ = [
52
+ "AlwaysOffSampler",
53
+ "AlwaysOnSampler",
54
+ "ComplianceSampler",
55
+ "ParentBasedSampler",
56
+ "RatioSampler",
57
+ "RuleBasedSampler",
58
+ "Sampler",
59
+ "TailBasedSampler",
60
+ "bypass_sampling",
61
+ ]
62
+
63
+ _log = logging.getLogger("spanforge.sampling")
64
+
65
+
66
+ # ---------------------------------------------------------------------------
67
+ # Protocol
68
+ # ---------------------------------------------------------------------------
69
+
70
+
71
+ @runtime_checkable
72
+ class Sampler(Protocol):
73
+ """Protocol implemented by all samplers.
74
+
75
+ Args:
76
+ span_or_event: The :class:`~spanforge._span.Span` or
77
+ :class:`~spanforge.event.Event` being considered.
78
+ cfg: The active :class:`~spanforge.config.SpanForgeConfig`.
79
+
80
+ Returns:
81
+ ``True`` if the span/event should be exported, ``False`` to drop it.
82
+ """
83
+
84
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
85
+ """Return ``True`` to export, ``False`` to drop."""
86
+ ...
87
+
88
+
89
+ # ---------------------------------------------------------------------------
90
+ # Always-on / Always-off
91
+ # ---------------------------------------------------------------------------
92
+
93
+
94
+ class AlwaysOnSampler:
95
+ """Export every span. This is the SDK default when no sampler is set."""
96
+
97
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
98
+ """Always returns True — every span is sampled."""
99
+ return True
100
+
101
+
102
+ class AlwaysOffSampler:
103
+ """Drop every span. Useful for completely silencing test code."""
104
+
105
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
106
+ """Always returns False — every span is dropped."""
107
+ return False
108
+
109
+ def __repr__(self) -> str:
110
+ return "AlwaysOffSampler()"
111
+
112
+
113
+ # ---------------------------------------------------------------------------
114
+ # Ratio / probabilistic
115
+ # ---------------------------------------------------------------------------
116
+
117
+
118
+ class RatioSampler:
119
+ """Probabilistic head-based sampler.
120
+
121
+ Makes a deterministic decision based on the span's ``trace_id`` so that
122
+ all spans in the same trace receive the *same* sampling decision.
123
+
124
+ Args:
125
+ rate: Fraction of traces to export. ``1.0`` exports all,
126
+ ``0.0`` exports none, ``0.1`` exports roughly one-in-ten.
127
+
128
+ Raises:
129
+ ValueError: If *rate* is not in ``[0.0, 1.0]``.
130
+ """
131
+
132
+ def __init__(self, rate: float) -> None:
133
+ if not 0.0 <= rate <= 1.0:
134
+ raise ValueError(f"RatioSampler.rate must be in [0.0, 1.0], got {rate!r}")
135
+ self._rate = rate
136
+ # Threshold in [0, 2^64) — use the upper bound as an integer range.
137
+ self._threshold = int(rate * (2**64))
138
+
139
+ @property
140
+ def rate(self) -> float:
141
+ """The configured sampling fraction in [0.0, 1.0]."""
142
+ return self._rate
143
+
144
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
145
+ """Return True if the span's trace_id hashes below the configured threshold."""
146
+ if self._rate >= 1.0:
147
+ return True
148
+ if self._rate <= 0.0:
149
+ return False
150
+ trace_id = _get_trace_id(span_or_event)
151
+ if trace_id is None:
152
+ return True # no trace context — fall through to export
153
+ # SHA-256 of the trace_id for uniform distribution regardless of
154
+ # whether trace_id is a UUID, ULID, or 32-hex string.
155
+ digest = hashlib.sha256(trace_id.encode()).digest()
156
+ # Use first 8 bytes as a big-endian uint64.
157
+ value = int.from_bytes(digest[:8], "big")
158
+ return value < self._threshold
159
+
160
+ def __repr__(self) -> str:
161
+ return f"RatioSampler(rate={self._rate!r})"
162
+
163
+
164
+ # ---------------------------------------------------------------------------
165
+ # Parent-based
166
+ # ---------------------------------------------------------------------------
167
+
168
+
169
+ class ParentBasedSampler:
170
+ """Honour the parent span's sampling decision; use ``root_sampler`` for roots.
171
+
172
+ This mirrors the OpenTelemetry ``ParentBased`` sampler spec so that the
173
+ entire trace follows a single consistent decision.
174
+
175
+ Args:
176
+ root_sampler: The sampler to use for root spans (no parent).
177
+ Defaults to :class:`AlwaysOnSampler`.
178
+ remote_parent_sampled: Decision for remote-parent spans where the
179
+ parent *was* sampled. Defaults to ``True`` (always export).
180
+ remote_parent_not_sampled: Decision for remote-parent spans where the
181
+ parent was *not* sampled. Defaults to ``False`` (always drop).
182
+ """
183
+
184
+ def __init__(
185
+ self,
186
+ root_sampler: Any | None = None,
187
+ *,
188
+ remote_parent_sampled: bool = True,
189
+ remote_parent_not_sampled: bool = False,
190
+ ) -> None:
191
+ self._root = root_sampler if root_sampler is not None else AlwaysOnSampler()
192
+ self._remote_sampled = remote_parent_sampled
193
+ self._remote_not_sampled = remote_parent_not_sampled
194
+
195
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
196
+ """Delegate to root_sampler for roots; honour parent decision for child spans."""
197
+ # Check if there's an incoming traceparent (remote parent).
198
+ traceparent = getattr(span_or_event, "traceparent", None)
199
+ if traceparent is not None:
200
+ # Parse the trace-flags byte (last field of W3C traceparent).
201
+ # Format: 00-{trace_id}-{parent_id}-{flags}
202
+ try:
203
+ flags = int(traceparent.rsplit("-", 1)[-1], 16)
204
+ sampled_flag = bool(flags & 0x01)
205
+ except (ValueError, IndexError):
206
+ sampled_flag = False # conservative: corrupt flags → don't sample
207
+ return self._remote_sampled if sampled_flag else self._remote_not_sampled
208
+
209
+ # Check if there's a local parent span via spanforge's context stack.
210
+ parent_id = getattr(span_or_event, "parent_span_id", None)
211
+ if parent_id is not None:
212
+ # Local parent — honour the parent decision (keep the span since
213
+ # the parent was already sampled to get to this point).
214
+ return True
215
+
216
+ # Root span — delegate to root_sampler.
217
+ return self._root.should_sample(span_or_event, cfg)
218
+
219
+ def __repr__(self) -> str:
220
+ return (
221
+ f"ParentBasedSampler(root_sampler={self._root!r}, "
222
+ f"remote_parent_sampled={self._remote_sampled!r}, "
223
+ f"remote_parent_not_sampled={self._remote_not_sampled!r})"
224
+ )
225
+
226
+
227
+ # ---------------------------------------------------------------------------
228
+ # Rule-based
229
+ # ---------------------------------------------------------------------------
230
+
231
+
232
+ class RuleBasedSampler:
233
+ """Sample based on user-defined attribute rules.
234
+
235
+ Each rule is a ``dict`` mapping span attribute names to match values.
236
+ A rule matches when *all* specified attributes equal their target values
237
+ on the span. The first matching rule wins.
238
+
239
+ Rules list entries are dicts with keys:
240
+
241
+ * ``match``: ``dict[str, Any]`` — attribute → expected-value pairs.
242
+ * ``sample``: ``bool`` — whether to export when matched.
243
+
244
+ A default decision (``default``) applies when no rule matches.
245
+
246
+ Args:
247
+ rules: Ordered list of rule dicts.
248
+ default: Sampling decision when no rule matches. Defaults to
249
+ ``True`` (export everything by default).
250
+
251
+ Example::
252
+
253
+ sampler = RuleBasedSampler(
254
+ rules=[
255
+ {"match": {"span_name": "health_check"}, "sample": False},
256
+ {"match": {"operation": "chat", "model.name": "gpt-4o"}, "sample": True},
257
+ ],
258
+ default=True,
259
+ )
260
+ """
261
+
262
+ def __init__(
263
+ self,
264
+ rules: list[dict[str, Any]] | None = None,
265
+ *,
266
+ default: bool = True,
267
+ ) -> None:
268
+ self._rules: list[dict[str, Any]] = list(rules or [])
269
+ self._default = default
270
+
271
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
272
+ """Return the first matching rule's decision, or the default."""
273
+ for rule in self._rules:
274
+ match = rule.get("match", {})
275
+ decision = rule.get("sample", self._default)
276
+ if self._matches(span_or_event, match):
277
+ return bool(decision)
278
+ return self._default
279
+
280
+ @staticmethod
281
+ def _matches(obj: Any, match: dict[str, Any]) -> bool:
282
+ for key, expected in match.items():
283
+ # Support dotted attribute paths, e.g. "model.name".
284
+ parts = key.split(".", 1)
285
+ val = getattr(obj, parts[0], None)
286
+ if len(parts) == 2 and val is not None:
287
+ val = getattr(val, parts[1], None)
288
+ if val != expected:
289
+ return False
290
+ return True
291
+
292
+ def __repr__(self) -> str:
293
+ return f"RuleBasedSampler(rules={self._rules!r}, default={self._default!r})"
294
+
295
+
296
+ # ---------------------------------------------------------------------------
297
+ # Tail-based
298
+ # ---------------------------------------------------------------------------
299
+
300
+
301
+ class TailBasedSampler:
302
+ """Buffer spans and decide whether to export after the span ends.
303
+
304
+ Tail sampling inspects the *final* span state (e.g. error status, latency)
305
+ before making an export decision. This enables use cases like:
306
+
307
+ * Always export error spans.
308
+ * Always export spans with ``duration_ms > threshold``.
309
+ * Sample only the slow-path at a given rate.
310
+
311
+ Because decisions are made at ``on_end``, this sampler is designed to
312
+ work alongside :class:`~spanforge.processor.SpanProcessor`. The
313
+ :meth:`should_sample` method is called by the SDK just before export.
314
+
315
+ Args:
316
+ always_sample_errors: If ``True``, spans with ``status == "error"``
317
+ are always exported regardless of other rules. (Default: ``True``)
318
+ always_sample_slow_ms: If set, spans with ``duration_ms >=`` this
319
+ value are always exported. (Default: ``None``)
320
+ fallback_sampler: Sampler used for spans that don't match the above
321
+ conditions. Defaults to :class:`AlwaysOnSampler`.
322
+ buffer_size: Maximum number of *pending* span decisions to hold in
323
+ memory. Oldest are evicted when the buffer is full.
324
+ (Default: 1 000)
325
+
326
+ Note:
327
+ This implementation makes the sampling decision at the time
328
+ :meth:`should_sample` is called (typically just before export).
329
+ The ``buffer_size`` parameter controls how many span IDs are tracked
330
+ to deduplicate decisions within a single process.
331
+ """
332
+
333
+ def __init__(
334
+ self,
335
+ *,
336
+ always_sample_errors: bool = True,
337
+ always_sample_slow_ms: float | None = None,
338
+ fallback_sampler: Any | None = None,
339
+ ) -> None:
340
+ self._always_errors = always_sample_errors
341
+ self._slow_ms = always_sample_slow_ms
342
+ self._fallback = fallback_sampler if fallback_sampler is not None else AlwaysOnSampler()
343
+ self._lock = threading.Lock()
344
+
345
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
346
+ """Return True if the span should be exported based on error/latency rules."""
347
+ # Error spans — always sample.
348
+ if self._always_errors:
349
+ status = getattr(span_or_event, "status", None)
350
+ if isinstance(status, str) and status == "error":
351
+ return True
352
+
353
+ # Slow spans — always sample.
354
+ if self._slow_ms is not None:
355
+ duration = getattr(span_or_event, "duration_ms", None)
356
+ if isinstance(duration, (int, float)) and duration >= self._slow_ms:
357
+ return True
358
+
359
+ # Fallback sampler for normal spans.
360
+ return self._fallback.should_sample(span_or_event, cfg)
361
+
362
+ def __repr__(self) -> str:
363
+ return (
364
+ f"TailBasedSampler("
365
+ f"always_sample_errors={self._always_errors!r}, "
366
+ f"always_sample_slow_ms={self._slow_ms!r}, "
367
+ f"fallback_sampler={self._fallback!r})"
368
+ )
369
+
370
+
371
+ # ---------------------------------------------------------------------------
372
+ # Helpers
373
+ # ---------------------------------------------------------------------------
374
+
375
+
376
+ def _get_trace_id(obj: Any) -> str | None:
377
+ """Extract trace_id from a Span or Event."""
378
+ # Direct attribute on Span.
379
+ tid = getattr(obj, "trace_id", None)
380
+ if isinstance(tid, str) and tid:
381
+ return tid
382
+ # Nested inside payload dict (Event.payload["trace_id"]).
383
+ payload = getattr(obj, "payload", None)
384
+ if isinstance(payload, dict):
385
+ tid = payload.get("trace_id")
386
+ if isinstance(tid, str) and tid:
387
+ return tid
388
+ return None
389
+
390
+
391
+ def _get_event_type(obj: Any) -> str | None:
392
+ """Extract event_type string from a Span or Event."""
393
+ et = getattr(obj, "event_type", None)
394
+ if et is not None:
395
+ return str(et)
396
+ return None
397
+
398
+
399
+ # ---------------------------------------------------------------------------
400
+ # Compliance-aware sampler (SF-16)
401
+ # ---------------------------------------------------------------------------
402
+
403
+ _DEFAULT_ALWAYS_RECORD: frozenset[str] = frozenset(
404
+ {
405
+ "llm.redact.",
406
+ "llm.audit.",
407
+ "llm.guard.",
408
+ "llm.cost.",
409
+ }
410
+ )
411
+
412
+
413
+ class ComplianceSampler:
414
+ """Compliance-aware sampler that never drops critical event types.
415
+
416
+ Events whose ``event_type`` starts with any prefix in *always_record*
417
+ are always exported (100% recording). All other events are sampled
418
+ at *base_rate* using deterministic trace-ID-based hashing so entire
419
+ traces are kept or dropped together.
420
+
421
+ Args:
422
+ base_rate: Fraction of non-compliance events to export (0.0-1.0).
423
+ always_record: Frozenset of event-type prefixes that bypass sampling.
424
+ Defaults to ``llm.redact.``, ``llm.audit.``, ``llm.guard.``,
425
+ ``llm.cost.``.
426
+
427
+ Example::
428
+
429
+ sampler = ComplianceSampler(base_rate=0.1)
430
+ # llm.audit.* events → always recorded
431
+ # llm.trace.* events → ~10% recorded
432
+ """
433
+
434
+ def __init__(
435
+ self,
436
+ base_rate: float = 0.1,
437
+ always_record: frozenset[str] | None = None,
438
+ ) -> None:
439
+ if not 0.0 <= base_rate <= 1.0:
440
+ raise ValueError(
441
+ f"ComplianceSampler.base_rate must be in [0.0, 1.0], got {base_rate!r}"
442
+ )
443
+ self._base_rate = base_rate
444
+ self._always_record = always_record if always_record is not None else _DEFAULT_ALWAYS_RECORD
445
+ self._threshold = int(base_rate * (2**64))
446
+
447
+ @property
448
+ def base_rate(self) -> float:
449
+ """The base sampling fraction for non-compliance events."""
450
+ return self._base_rate
451
+
452
+ @property
453
+ def always_record(self) -> frozenset[str]:
454
+ """Frozenset of event-type prefixes that are always recorded."""
455
+ return self._always_record
456
+
457
+ def should_sample(self, span_or_event: Any, cfg: Any) -> bool:
458
+ """Return True for compliance-critical events; sample others at base_rate."""
459
+ # Check if bypass is active
460
+ if getattr(_bypass_active, "value", False):
461
+ return True
462
+
463
+ # Always record compliance-critical events
464
+ event_type = _get_event_type(span_or_event)
465
+ if event_type is not None:
466
+ for prefix in self._always_record:
467
+ if event_type.startswith(prefix):
468
+ return True
469
+
470
+ # Deterministic trace-ID-based sampling for other events
471
+ if self._base_rate >= 1.0:
472
+ return True
473
+ if self._base_rate <= 0.0:
474
+ return False
475
+
476
+ trace_id = _get_trace_id(span_or_event)
477
+ if trace_id is not None:
478
+ digest = hashlib.sha256(trace_id.encode()).digest()
479
+ value = int.from_bytes(digest[:8], "big")
480
+ return value < self._threshold
481
+
482
+ # No trace_id — fall back to random
483
+ return (secrets.randbits(32) / 0xFFFF_FFFF) < self._base_rate
484
+
485
+ def __repr__(self) -> str:
486
+ return f"ComplianceSampler(base_rate={self._base_rate!r})"
487
+
488
+
489
+ # ---------------------------------------------------------------------------
490
+ # Sampling bypass context manager (SF-16-D)
491
+ # ---------------------------------------------------------------------------
492
+
493
+ _bypass_active: threading.local = threading.local()
494
+
495
+
496
+ @contextlib.contextmanager
497
+ def bypass_sampling() -> Generator[None, None, None]:
498
+ """Context manager that forces all sampling decisions to return ``True``.
499
+
500
+ Used by compliance report generation to ensure reports reflect the
501
+ complete audit trail, not the sampled subset::
502
+
503
+ with bypass_sampling():
504
+ package = engine.generate_evidence_package(...)
505
+ """
506
+ prev = getattr(_bypass_active, "value", False)
507
+ _bypass_active.value = True
508
+ try:
509
+ yield
510
+ finally:
511
+ _bypass_active.value = prev
spanforge/schema.py ADDED
@@ -0,0 +1,183 @@
1
+ """spanforge.schema — Lightweight JSON Schema validator.
2
+
3
+ Provides :func:`validate`, a zero-dependency validator that supports the most
4
+ commonly needed JSON Schema keywords: ``type``, ``required``, ``properties``,
5
+ ``items``, ``enum``, ``minimum``, ``maximum``, ``minLength``, and
6
+ ``maxLength``. It returns a list of human-readable error strings (empty list
7
+ = valid), making it easy to surface schema violations in log messages or
8
+ CI output without throwing exceptions.
9
+
10
+ Intended for validating structured LLM output (e.g. function-calling
11
+ responses, JSON-mode completions) anywhere in the spanforge ecosystem.
12
+
13
+ Usage::
14
+
15
+ from spanforge.schema import validate
16
+
17
+ schema = {
18
+ "type": "object",
19
+ "required": ["answer", "confidence"],
20
+ "properties": {
21
+ "answer": {"type": "string"},
22
+ "confidence": {"type": "number", "minimum": 0.0, "maximum": 1.0},
23
+ },
24
+ }
25
+
26
+ errors = validate({"answer": "Paris", "confidence": 0.95}, schema)
27
+ assert errors == []
28
+
29
+ errors = validate({"answer": 42}, schema)
30
+ # errors == ["$.answer: expected type string, got int",
31
+ # "$: missing required property 'confidence'"]
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ from typing import Any
37
+
38
+ __all__ = ["SchemaValidationError", "validate"]
39
+
40
+
41
+ class SchemaValidationError(ValueError):
42
+ """Raised by :func:`validate_strict` when validation fails.
43
+
44
+ Attributes:
45
+ errors: The list of error strings from :func:`validate`.
46
+ """
47
+
48
+ def __init__(self, errors: list[str]) -> None:
49
+ self.errors = errors
50
+ super().__init__("; ".join(errors))
51
+
52
+
53
+ # JSON Schema "type" → Python type(s) mapping
54
+ _TYPE_MAP: dict[str, type | tuple[type, ...]] = {
55
+ "string": str,
56
+ "number": (int, float),
57
+ "integer": int,
58
+ "boolean": bool,
59
+ "array": list,
60
+ "object": dict,
61
+ "null": type(None),
62
+ }
63
+
64
+
65
+ def validate(
66
+ instance: Any,
67
+ schema: dict[str, Any],
68
+ path: str = "$",
69
+ ) -> list[str]:
70
+ """Validate *instance* against a JSON Schema subset.
71
+
72
+ Supported keywords
73
+ ------------------
74
+ * ``type`` — ``"string"``, ``"number"``, ``"integer"``, ``"boolean"``,
75
+ ``"array"``, ``"object"``, ``"null"``
76
+ * ``enum`` — list of allowed values
77
+ * ``required`` — list of required property names (objects only)
78
+ * ``properties`` — sub-schema per property name (objects only)
79
+ * ``items`` — sub-schema for every array element (arrays only)
80
+ * ``minimum`` / ``maximum`` — inclusive bounds (numbers only)
81
+ * ``minLength`` / ``maxLength`` — length bounds (strings only)
82
+
83
+ Args:
84
+ instance: The Python value to validate.
85
+ schema: A JSON Schema dict (subset supported as described above).
86
+ path: JSONPath-style prefix used in error messages. Defaults to
87
+ ``"$"`` (document root). Recursive calls set sub-paths
88
+ automatically; callers usually leave this as default.
89
+
90
+ Returns:
91
+ A list of error strings. An empty list means the instance is valid.
92
+
93
+ Example::
94
+
95
+ errors = validate("hello", {"type": "string", "minLength": 3})
96
+ assert errors == []
97
+
98
+ errors = validate(2, {"type": "string"})
99
+ assert errors == ["$: expected type string, got int"]
100
+ """
101
+ errors: list[str] = []
102
+ schema_type = schema.get("type")
103
+
104
+ # --- type check ---
105
+ if schema_type is not None:
106
+ expected = _TYPE_MAP.get(schema_type)
107
+ if expected is not None:
108
+ # Special case: bool is a subclass of int in Python, but JSON
109
+ # Schema treats them as distinct types. Check bool BEFORE the
110
+ # isinstance() call because isinstance(True, int) is True.
111
+ if isinstance(instance, bool) and schema_type in ("integer", "number"):
112
+ errors.append(f"{path}: expected type {schema_type}, got bool")
113
+ return errors # type mismatch; sub-checks meaningless
114
+ if not isinstance(instance, expected):
115
+ errors.append(f"{path}: expected type {schema_type}, got {type(instance).__name__}")
116
+ return errors # type mismatch; sub-checks are meaningless
117
+
118
+ # --- enum check ---
119
+ if "enum" in schema and instance not in schema["enum"]:
120
+ errors.append(f"{path}: value {instance!r} not in enum {schema['enum']!r}")
121
+
122
+ # --- object checks ---
123
+ if schema_type == "object" and isinstance(instance, dict):
124
+ errors.extend(
125
+ f"{path}: missing required property {key!r}"
126
+ for key in schema.get("required", [])
127
+ if key not in instance
128
+ )
129
+ for key, sub_schema in schema.get("properties", {}).items():
130
+ if key in instance:
131
+ errors.extend(validate(instance[key], sub_schema, f"{path}.{key}"))
132
+
133
+ # --- array checks ---
134
+ if schema_type == "array" and isinstance(instance, list):
135
+ items_schema = schema.get("items")
136
+ if items_schema is not None:
137
+ for i, item in enumerate(instance):
138
+ errors.extend(validate(item, items_schema, f"{path}[{i}]"))
139
+
140
+ # --- numeric bounds ---
141
+ if (
142
+ schema_type in ("number", "integer")
143
+ and isinstance(instance, (int, float))
144
+ and not isinstance(instance, bool)
145
+ ):
146
+ if "minimum" in schema and instance < schema["minimum"]:
147
+ errors.append(f"{path}: {instance} is less than minimum {schema['minimum']}")
148
+ if "maximum" in schema and instance > schema["maximum"]:
149
+ errors.append(f"{path}: {instance} is greater than maximum {schema['maximum']}")
150
+
151
+ # --- string length ---
152
+ if schema_type == "string" and isinstance(instance, str):
153
+ if "minLength" in schema and len(instance) < schema["minLength"]:
154
+ errors.append(
155
+ f"{path}: string length {len(instance)} is less than "
156
+ f"minLength {schema['minLength']}"
157
+ )
158
+ if "maxLength" in schema and len(instance) > schema["maxLength"]:
159
+ errors.append(
160
+ f"{path}: string length {len(instance)} exceeds maxLength {schema['maxLength']}"
161
+ )
162
+
163
+ return errors
164
+
165
+
166
+ def validate_strict(
167
+ instance: Any,
168
+ schema: dict[str, Any],
169
+ path: str = "$",
170
+ ) -> None:
171
+ """Like :func:`validate` but raises :class:`SchemaValidationError` on failure.
172
+
173
+ Args:
174
+ instance: The value to validate.
175
+ schema: JSON Schema dict.
176
+ path: Starting path prefix (default ``"$"``).
177
+
178
+ Raises:
179
+ SchemaValidationError: When :func:`validate` returns any errors.
180
+ """
181
+ errors = validate(instance, schema, path)
182
+ if errors:
183
+ raise SchemaValidationError(errors)