spanforge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. spanforge/__init__.py +815 -0
  2. spanforge/_ansi.py +93 -0
  3. spanforge/_batch_exporter.py +409 -0
  4. spanforge/_cli.py +2094 -0
  5. spanforge/_cli_audit.py +639 -0
  6. spanforge/_cli_compliance.py +711 -0
  7. spanforge/_cli_cost.py +243 -0
  8. spanforge/_cli_ops.py +791 -0
  9. spanforge/_cli_phase11.py +356 -0
  10. spanforge/_hooks.py +337 -0
  11. spanforge/_server.py +1708 -0
  12. spanforge/_span.py +1036 -0
  13. spanforge/_store.py +288 -0
  14. spanforge/_stream.py +664 -0
  15. spanforge/_trace.py +335 -0
  16. spanforge/_tracer.py +254 -0
  17. spanforge/actor.py +141 -0
  18. spanforge/alerts.py +469 -0
  19. spanforge/auto.py +464 -0
  20. spanforge/baseline.py +335 -0
  21. spanforge/cache.py +635 -0
  22. spanforge/compliance.py +325 -0
  23. spanforge/config.py +532 -0
  24. spanforge/consent.py +228 -0
  25. spanforge/consumer.py +377 -0
  26. spanforge/core/__init__.py +5 -0
  27. spanforge/core/compliance_mapping.py +1254 -0
  28. spanforge/cost.py +600 -0
  29. spanforge/debug.py +548 -0
  30. spanforge/deprecations.py +205 -0
  31. spanforge/drift.py +482 -0
  32. spanforge/egress.py +58 -0
  33. spanforge/eval.py +648 -0
  34. spanforge/event.py +1064 -0
  35. spanforge/exceptions.py +240 -0
  36. spanforge/explain.py +178 -0
  37. spanforge/export/__init__.py +69 -0
  38. spanforge/export/append_only.py +337 -0
  39. spanforge/export/cloud.py +357 -0
  40. spanforge/export/datadog.py +497 -0
  41. spanforge/export/grafana.py +320 -0
  42. spanforge/export/jsonl.py +195 -0
  43. spanforge/export/openinference.py +158 -0
  44. spanforge/export/otel_bridge.py +294 -0
  45. spanforge/export/otlp.py +811 -0
  46. spanforge/export/otlp_bridge.py +233 -0
  47. spanforge/export/redis_backend.py +282 -0
  48. spanforge/export/siem_schema.py +98 -0
  49. spanforge/export/siem_splunk.py +264 -0
  50. spanforge/export/siem_syslog.py +212 -0
  51. spanforge/export/webhook.py +299 -0
  52. spanforge/exporters/__init__.py +30 -0
  53. spanforge/exporters/console.py +271 -0
  54. spanforge/exporters/jsonl.py +144 -0
  55. spanforge/exporters/sqlite.py +142 -0
  56. spanforge/gate.py +1150 -0
  57. spanforge/governance.py +181 -0
  58. spanforge/hitl.py +295 -0
  59. spanforge/http.py +187 -0
  60. spanforge/inspect.py +427 -0
  61. spanforge/integrations/__init__.py +45 -0
  62. spanforge/integrations/_pricing.py +280 -0
  63. spanforge/integrations/anthropic.py +388 -0
  64. spanforge/integrations/azure_openai.py +133 -0
  65. spanforge/integrations/bedrock.py +292 -0
  66. spanforge/integrations/crewai.py +251 -0
  67. spanforge/integrations/gemini.py +351 -0
  68. spanforge/integrations/groq.py +442 -0
  69. spanforge/integrations/langchain.py +349 -0
  70. spanforge/integrations/langgraph.py +306 -0
  71. spanforge/integrations/llamaindex.py +373 -0
  72. spanforge/integrations/ollama.py +287 -0
  73. spanforge/integrations/openai.py +368 -0
  74. spanforge/integrations/together.py +483 -0
  75. spanforge/io.py +214 -0
  76. spanforge/lint.py +322 -0
  77. spanforge/metrics.py +417 -0
  78. spanforge/metrics_export.py +343 -0
  79. spanforge/migrate.py +402 -0
  80. spanforge/model_registry.py +278 -0
  81. spanforge/models.py +389 -0
  82. spanforge/namespaces/__init__.py +254 -0
  83. spanforge/namespaces/audit.py +256 -0
  84. spanforge/namespaces/cache.py +237 -0
  85. spanforge/namespaces/chain.py +77 -0
  86. spanforge/namespaces/confidence.py +72 -0
  87. spanforge/namespaces/consent.py +92 -0
  88. spanforge/namespaces/cost.py +179 -0
  89. spanforge/namespaces/decision.py +143 -0
  90. spanforge/namespaces/diff.py +157 -0
  91. spanforge/namespaces/drift.py +80 -0
  92. spanforge/namespaces/eval_.py +251 -0
  93. spanforge/namespaces/feedback.py +241 -0
  94. spanforge/namespaces/fence.py +193 -0
  95. spanforge/namespaces/guard.py +105 -0
  96. spanforge/namespaces/hitl.py +91 -0
  97. spanforge/namespaces/latency.py +72 -0
  98. spanforge/namespaces/prompt.py +190 -0
  99. spanforge/namespaces/redact.py +173 -0
  100. spanforge/namespaces/retrieval.py +379 -0
  101. spanforge/namespaces/runtime_governance.py +494 -0
  102. spanforge/namespaces/template.py +208 -0
  103. spanforge/namespaces/tool_call.py +77 -0
  104. spanforge/namespaces/trace.py +1029 -0
  105. spanforge/normalizer.py +171 -0
  106. spanforge/plugins.py +82 -0
  107. spanforge/presidio_backend.py +349 -0
  108. spanforge/processor.py +258 -0
  109. spanforge/prompt_registry.py +418 -0
  110. spanforge/py.typed +0 -0
  111. spanforge/redact.py +914 -0
  112. spanforge/regression.py +192 -0
  113. spanforge/runtime_policy.py +159 -0
  114. spanforge/sampling.py +511 -0
  115. spanforge/schema.py +183 -0
  116. spanforge/schemas/v1.0/schema.json +170 -0
  117. spanforge/schemas/v2.0/schema.json +536 -0
  118. spanforge/sdk/__init__.py +625 -0
  119. spanforge/sdk/_base.py +584 -0
  120. spanforge/sdk/_base.pyi +71 -0
  121. spanforge/sdk/_exceptions.py +1096 -0
  122. spanforge/sdk/_types.py +2184 -0
  123. spanforge/sdk/alert.py +1514 -0
  124. spanforge/sdk/alert.pyi +56 -0
  125. spanforge/sdk/audit.py +1196 -0
  126. spanforge/sdk/audit.pyi +67 -0
  127. spanforge/sdk/cec.py +1215 -0
  128. spanforge/sdk/cec.pyi +37 -0
  129. spanforge/sdk/config.py +641 -0
  130. spanforge/sdk/config.pyi +55 -0
  131. spanforge/sdk/enterprise.py +714 -0
  132. spanforge/sdk/enterprise.pyi +79 -0
  133. spanforge/sdk/explain.py +170 -0
  134. spanforge/sdk/fallback.py +432 -0
  135. spanforge/sdk/feedback.py +351 -0
  136. spanforge/sdk/gate.py +874 -0
  137. spanforge/sdk/gate.pyi +51 -0
  138. spanforge/sdk/identity.py +2114 -0
  139. spanforge/sdk/identity.pyi +47 -0
  140. spanforge/sdk/lineage.py +175 -0
  141. spanforge/sdk/observe.py +1065 -0
  142. spanforge/sdk/observe.pyi +50 -0
  143. spanforge/sdk/operator.py +338 -0
  144. spanforge/sdk/pii.py +1473 -0
  145. spanforge/sdk/pii.pyi +119 -0
  146. spanforge/sdk/pipelines.py +458 -0
  147. spanforge/sdk/pipelines.pyi +39 -0
  148. spanforge/sdk/policy.py +930 -0
  149. spanforge/sdk/rag.py +594 -0
  150. spanforge/sdk/rbac.py +280 -0
  151. spanforge/sdk/registry.py +430 -0
  152. spanforge/sdk/registry.pyi +46 -0
  153. spanforge/sdk/scope.py +279 -0
  154. spanforge/sdk/secrets.py +293 -0
  155. spanforge/sdk/secrets.pyi +25 -0
  156. spanforge/sdk/security.py +560 -0
  157. spanforge/sdk/security.pyi +57 -0
  158. spanforge/sdk/trust.py +472 -0
  159. spanforge/sdk/trust.pyi +41 -0
  160. spanforge/secrets.py +799 -0
  161. spanforge/signing.py +1179 -0
  162. spanforge/stats.py +100 -0
  163. spanforge/stream.py +560 -0
  164. spanforge/testing.py +378 -0
  165. spanforge/testing_mocks.py +1052 -0
  166. spanforge/trace.py +199 -0
  167. spanforge/types.py +696 -0
  168. spanforge/ulid.py +300 -0
  169. spanforge/validate.py +379 -0
  170. spanforge-1.0.0.dist-info/METADATA +1509 -0
  171. spanforge-1.0.0.dist-info/RECORD +174 -0
  172. spanforge-1.0.0.dist-info/WHEEL +4 -0
  173. spanforge-1.0.0.dist-info/entry_points.txt +5 -0
  174. spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
@@ -0,0 +1,205 @@
1
+ """spanforge.deprecations — Per-event-type deprecation tracking.
2
+
3
+ Provides a thread-safe registry for deprecation notices that can be queried at
4
+ runtime, used by the CLI ``spanforge deprecations`` command, and populated from
5
+ migration roadmaps.
6
+
7
+ Public API
8
+ ----------
9
+ DeprecationNotice Frozen dataclass describing a single deprecation.
10
+ DeprecationRegistry Thread-safe registry (use module-level helpers instead
11
+ of instantiating this directly in most cases).
12
+ get_registry() Return the global singleton registry.
13
+ mark_deprecated(...) Register a notice in the global registry.
14
+ get_deprecation_notice() Look up a notice by event type.
15
+ warn_if_deprecated() Issue DeprecationWarning if the type is registered.
16
+ list_deprecated() Return all registered notices sorted by event_type.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import threading
22
+ import warnings
23
+ from dataclasses import dataclass
24
+ from typing import Optional
25
+
26
+ __all__ = [
27
+ "DeprecationNotice",
28
+ "DeprecationRegistry",
29
+ "get_deprecation_notice",
30
+ "get_registry",
31
+ "list_deprecated",
32
+ "mark_deprecated",
33
+ "warn_if_deprecated",
34
+ ]
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # DeprecationNotice
39
+ # ---------------------------------------------------------------------------
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class DeprecationNotice:
44
+ """Immutable record describing the deprecation of a single event type."""
45
+
46
+ event_type: str
47
+ since: str
48
+ sunset: str
49
+ replacement: Optional[str] = None
50
+ notes: Optional[str] = None
51
+
52
+ def format_message(self) -> str:
53
+ """Return a human-readable deprecation message.
54
+
55
+ Example::
56
+
57
+ 'llm.legacy.trace' is deprecated since 1.0.0 and will be removed
58
+ in 2.0.0. Use 'llm.trace.span.completed' instead.
59
+ Use the trace namespace instead.
60
+ """
61
+ msg = (
62
+ f"'{self.event_type}' is deprecated since {self.since} "
63
+ f"and will be removed in {self.sunset}."
64
+ )
65
+ if self.replacement:
66
+ msg += f" Use '{self.replacement}' instead."
67
+ if self.notes:
68
+ msg += f" {self.notes}"
69
+ return msg
70
+
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # DeprecationRegistry
74
+ # ---------------------------------------------------------------------------
75
+
76
+
77
+ class DeprecationRegistry:
78
+ """Thread-safe registry mapping event type strings to :class:`DeprecationNotice` objects."""
79
+
80
+ def __init__(self) -> None:
81
+ self._lock = threading.Lock()
82
+ self._notices: dict[str, DeprecationNotice] = {}
83
+
84
+ def mark_deprecated(
85
+ self,
86
+ event_type: str,
87
+ *,
88
+ since: str,
89
+ sunset: str,
90
+ replacement: Optional[str] = None,
91
+ notes: Optional[str] = None,
92
+ ) -> DeprecationNotice:
93
+ """Register a deprecation notice and return it.
94
+
95
+ Args:
96
+ event_type: The event type string being deprecated.
97
+ since: Version when the deprecation was introduced.
98
+ sunset: Version when the type will be removed.
99
+ replacement: Optional suggested replacement event type.
100
+ notes: Optional migration guidance.
101
+
102
+ Returns:
103
+ The newly registered :class:`DeprecationNotice`.
104
+ """
105
+ notice = DeprecationNotice(
106
+ event_type=event_type,
107
+ since=since,
108
+ sunset=sunset,
109
+ replacement=replacement,
110
+ notes=notes,
111
+ )
112
+ with self._lock:
113
+ self._notices[event_type] = notice
114
+ return notice
115
+
116
+ def get(self, event_type: str) -> Optional[DeprecationNotice]:
117
+ """Return the notice for *event_type*, or ``None`` if not deprecated."""
118
+ with self._lock:
119
+ return self._notices.get(event_type)
120
+
121
+ def is_deprecated(self, event_type: str) -> bool:
122
+ """Return ``True`` if *event_type* has a registered deprecation notice."""
123
+ with self._lock:
124
+ return event_type in self._notices
125
+
126
+ def warn_if_deprecated(self, event_type: str) -> None:
127
+ """Issue a stdlib :class:`DeprecationWarning` if *event_type* is deprecated.
128
+
129
+ Uses ``warnings.warn(..., DeprecationWarning, stacklevel=2)``. No-op
130
+ if the type is not registered.
131
+ """
132
+ notice = self.get(event_type)
133
+ if notice is not None:
134
+ warnings.warn(notice.format_message(), DeprecationWarning, stacklevel=2)
135
+
136
+ def list_all(self) -> list[DeprecationNotice]:
137
+ """Return all registered notices sorted by ``event_type``."""
138
+ with self._lock:
139
+ return sorted(self._notices.values(), key=lambda n: n.event_type)
140
+
141
+ def remove(self, event_type: str) -> bool:
142
+ """Remove the notice for *event_type*.
143
+
144
+ Returns:
145
+ ``True`` if a notice was removed, ``False`` if not found.
146
+ """
147
+ with self._lock:
148
+ return self._notices.pop(event_type, None) is not None
149
+
150
+ def clear(self) -> None:
151
+ """Remove all registered notices. Useful in tests."""
152
+ with self._lock:
153
+ self._notices.clear()
154
+
155
+
156
+ # ---------------------------------------------------------------------------
157
+ # Global singleton
158
+ # ---------------------------------------------------------------------------
159
+
160
+ _global_registry = DeprecationRegistry()
161
+
162
+
163
+ def get_registry() -> DeprecationRegistry:
164
+ """Return the global :class:`DeprecationRegistry` singleton."""
165
+ return _global_registry
166
+
167
+
168
+ # ---------------------------------------------------------------------------
169
+ # Module-level convenience helpers (operate on the global registry)
170
+ # ---------------------------------------------------------------------------
171
+
172
+
173
+ def mark_deprecated(
174
+ event_type: str,
175
+ *,
176
+ since: str,
177
+ sunset: str,
178
+ replacement: Optional[str] = None,
179
+ notes: Optional[str] = None,
180
+ ) -> DeprecationNotice:
181
+ """Register a deprecation notice in the global registry."""
182
+ return _global_registry.mark_deprecated(
183
+ event_type,
184
+ since=since,
185
+ sunset=sunset,
186
+ replacement=replacement,
187
+ notes=notes,
188
+ )
189
+
190
+
191
+ def get_deprecation_notice(event_type: str) -> Optional[DeprecationNotice]:
192
+ """Return the notice for *event_type* from the global registry, or ``None``."""
193
+ return _global_registry.get(event_type)
194
+
195
+
196
+ def warn_if_deprecated(event_type: str) -> None:
197
+ """Issue :class:`DeprecationWarning` if *event_type* is in the global registry."""
198
+ notice = _global_registry.get(event_type)
199
+ if notice is not None:
200
+ warnings.warn(notice.format_message(), DeprecationWarning, stacklevel=2)
201
+
202
+
203
+ def list_deprecated() -> list[DeprecationNotice]:
204
+ """Return all notices from the global registry sorted by ``event_type``."""
205
+ return _global_registry.list_all()
spanforge/drift.py ADDED
@@ -0,0 +1,482 @@
1
+ """spanforge.drift — Behavioural drift detection engine (Phase 3).
2
+
3
+ :class:`DriftDetector` maintains a sliding window of observed metric values
4
+ and compares them against a :class:`~spanforge.baseline.BehaviouralBaseline`
5
+ using Z-score and KL-divergence statistics. When a threshold is breached it
6
+ returns :class:`~spanforge.namespaces.drift.DriftPayload` objects that can be
7
+ emitted as RFC-0001 SPANFORGE ``drift.*`` events via
8
+ :func:`~spanforge._stream.emit_rfc_event`.
9
+
10
+ Usage::
11
+
12
+ from spanforge.baseline import BehaviouralBaseline
13
+ from spanforge.drift import DriftDetector
14
+ from spanforge._stream import emit_rfc_event
15
+ from spanforge.types import EventType
16
+
17
+ baseline = BehaviouralBaseline.load("baseline.json")
18
+ detector = DriftDetector(baseline, agent_id="my-agent")
19
+
20
+ for event in live_event_stream():
21
+ results = detector.record(event)
22
+ for payload in results:
23
+ emit_rfc_event(
24
+ EventType("drift." + payload.status.replace("_", "_")),
25
+ payload.to_dict(),
26
+ )
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import contextlib
32
+ import math
33
+ import statistics
34
+ import threading
35
+ import time
36
+ from collections import deque
37
+ from dataclasses import dataclass
38
+ from typing import TYPE_CHECKING
39
+
40
+ from spanforge.namespaces.drift import DriftPayload
41
+
42
+ if TYPE_CHECKING:
43
+ from spanforge.baseline import BehaviouralBaseline
44
+ from spanforge.event import Event
45
+
46
+ __all__ = ["DriftDetector", "DriftResult"]
47
+
48
+ # Minimum observations required in the window before drift analysis is attempted.
49
+ _MIN_WINDOW_SAMPLES: int = 10
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Value object
54
+ # ---------------------------------------------------------------------------
55
+
56
+
57
+ @dataclass(frozen=True)
58
+ class DriftResult:
59
+ """Drift assessment for a single metric observation.
60
+
61
+ Attributes:
62
+ metric_name: Dot-separated metric identifier (e.g. ``"tokens"``,
63
+ ``"confidence.classification"``, ``"latency.chat"``).
64
+ current_value: The raw observed value that triggered the assessment.
65
+ window_mean: Current window mean (rolling).
66
+ window_stddev: Current window standard deviation (rolling).
67
+ baseline_mean: Mean from the :class:`~spanforge.baseline.BehaviouralBaseline`.
68
+ baseline_stddev: Std-dev from the baseline.
69
+ z_score: ``(window_mean - baseline_mean) / baseline_stddev``.
70
+ kl_divergence: KL-divergence between window and baseline Gaussian
71
+ (``None`` if baseline stddev is zero or window has
72
+ fewer than 2 samples).
73
+ threshold: The configured Z-score threshold.
74
+ status: ``"ok"`` | ``"detected"`` | ``"threshold_breach"`` |
75
+ ``"resolved"``.
76
+ payload: Ready-to-emit :class:`~spanforge.namespaces.drift.DriftPayload`
77
+ (``None`` when status is ``"ok"``).
78
+ """
79
+
80
+ metric_name: str
81
+ current_value: float
82
+ window_mean: float
83
+ window_stddev: float
84
+ baseline_mean: float
85
+ baseline_stddev: float
86
+ z_score: float
87
+ kl_divergence: float | None
88
+ threshold: float
89
+ status: str
90
+ payload: DriftPayload | None
91
+
92
+
93
+ # ---------------------------------------------------------------------------
94
+ # KL-divergence (Gaussian approximation)
95
+ # ---------------------------------------------------------------------------
96
+
97
+
98
+ def _kl_divergence_gaussian(
99
+ mu_p: float,
100
+ sigma_p: float,
101
+ mu_q: float,
102
+ sigma_q: float,
103
+ ) -> float | None:
104
+ """KL-divergence KL(P || Q) between two univariate Gaussians.
105
+
106
+ KL(N(μ_P, σ_P²) || N(μ_Q, σ_Q²)) =
107
+ log(σ_Q / σ_P) + (σ_P² + (μ_P − μ_Q)²) / (2 σ_Q²) − 1/2
108
+
109
+ Returns ``None`` when σ_P ≤ 0 or σ_Q ≤ 0 (degenerate distribution).
110
+ """
111
+ if sigma_p <= 0.0 or sigma_q <= 0.0:
112
+ return None
113
+ return (
114
+ math.log(sigma_q / sigma_p) + (sigma_p**2 + (mu_p - mu_q) ** 2) / (2.0 * sigma_q**2) - 0.5
115
+ )
116
+
117
+
118
+ # ---------------------------------------------------------------------------
119
+ # DriftDetector
120
+ # ---------------------------------------------------------------------------
121
+
122
+
123
+ class DriftDetector:
124
+ """Sliding-window behavioural drift detector.
125
+
126
+ Maintains per-metric rolling windows and reports
127
+ :class:`DriftResult` / :class:`~spanforge.namespaces.drift.DriftPayload`
128
+ objects whenever the current window deviates significantly from the
129
+ recorded :class:`~spanforge.baseline.BehaviouralBaseline`.
130
+
131
+ Args:
132
+ baseline: Deployment-time statistical baseline.
133
+ agent_id: Identifier for the monitored agent (embedded in every
134
+ emitted :class:`~spanforge.namespaces.drift.DriftPayload`).
135
+ window_size: Maximum number of observations per metric in the rolling
136
+ window (default 500).
137
+ z_threshold: Z-score that triggers a ``threshold_breach`` (default 3.0).
138
+ kl_threshold: KL-divergence that triggers a ``threshold_breach``
139
+ (default 0.5).
140
+ window_seconds: Nominal window duration embedded in emitted payloads
141
+ (default 3 600 s = 1 h).
142
+ auto_emit: When ``True`` (default), calls
143
+ :func:`~spanforge._stream.emit_rfc_event` for each
144
+ ``detected`` / ``threshold_breach`` / ``resolved`` result.
145
+ """
146
+
147
+ def __init__(
148
+ self,
149
+ baseline: BehaviouralBaseline,
150
+ agent_id: str,
151
+ window_size: int = 500,
152
+ z_threshold: float = 3.0,
153
+ kl_threshold: float = 0.5,
154
+ window_seconds: int = 3600,
155
+ auto_emit: bool = True,
156
+ metric_ttl_seconds: int = 86400,
157
+ ) -> None:
158
+ if not agent_id:
159
+ raise ValueError("DriftDetector: agent_id must be non-empty")
160
+ if window_size < 1:
161
+ raise ValueError("DriftDetector: window_size must be >= 1")
162
+ if not math.isfinite(z_threshold) or z_threshold <= 0:
163
+ raise ValueError("DriftDetector: z_threshold must be a finite positive number")
164
+ if window_seconds <= 0:
165
+ raise ValueError("DriftDetector: window_seconds must be > 0")
166
+ if metric_ttl_seconds <= 0:
167
+ raise ValueError("DriftDetector: metric_ttl_seconds must be > 0")
168
+
169
+ self._baseline = baseline
170
+ self._agent_id = agent_id
171
+ self._window_size = window_size
172
+ self._z_threshold = z_threshold
173
+ self._kl_threshold = kl_threshold
174
+ self._window_seconds = window_seconds
175
+ self._auto_emit = auto_emit
176
+ self._metric_ttl_seconds = metric_ttl_seconds
177
+
178
+ self._lock = threading.Lock()
179
+ # metric_name → rolling deque of float observations
180
+ self._windows: dict[str, deque[float]] = {}
181
+ # metric_name → current breach state
182
+ self._in_breach: dict[str, bool] = {}
183
+ # metric_name → last observation time (monotonic clock)
184
+ self._last_seen: dict[str, float] = {}
185
+
186
+ # ------------------------------------------------------------------
187
+ # Public API
188
+ # ------------------------------------------------------------------
189
+
190
+ @property
191
+ def baseline(self) -> BehaviouralBaseline:
192
+ """The baseline this detector is comparing against."""
193
+ return self._baseline
194
+
195
+ @property
196
+ def agent_id(self) -> str:
197
+ """The agent ID this detector is tracking."""
198
+ return self._agent_id
199
+
200
+ @property
201
+ def window_size(self) -> int:
202
+ """The rolling window size used for drift calculations."""
203
+ return self._window_size
204
+
205
+ def record(self, event: Event) -> list[DriftResult]:
206
+ """Ingest *event*, update rolling windows, and return drift results.
207
+
208
+ Extracts metric observations from the event payload based on its
209
+ event type and compares the updated window statistics against the
210
+ baseline.
211
+
212
+ Args:
213
+ event: A :class:`~spanforge.event.Event` (any type; non-metric
214
+ events are silently ignored).
215
+
216
+ Returns:
217
+ A list of :class:`DriftResult` objects for every metric that had a
218
+ state transition (``ok``, ``detected``, ``threshold_breach``,
219
+ or ``resolved``). Returns an empty list for most events.
220
+ """
221
+ observations = _extract_metric_observations(event)
222
+ if not observations:
223
+ return []
224
+
225
+ results: list[DriftResult] = []
226
+ with self._lock:
227
+ for metric_name, value in observations:
228
+ result = self._assess(metric_name, value)
229
+ if result is not None:
230
+ results.append(result)
231
+
232
+ if self._auto_emit:
233
+ self._emit_results(results)
234
+
235
+ return results
236
+
237
+ def window_stats(self, metric_name: str) -> tuple[float, float, int] | None:
238
+ """Return ``(mean, stddev, count)`` for *metric_name*'s current window.
239
+
240
+ Returns ``None`` if no data has been recorded for the metric yet.
241
+ """
242
+ with self._lock:
243
+ window = self._windows.get(metric_name)
244
+ if not window:
245
+ return None
246
+ data = list(window)
247
+ mean = statistics.mean(data)
248
+ stddev = statistics.stdev(data) if len(data) >= 2 else 0.0
249
+ return mean, stddev, len(data)
250
+
251
+ def reset_window(self, metric_name: str | None = None) -> None:
252
+ """Clear the rolling window(s).
253
+
254
+ Args:
255
+ metric_name: If given, clears only that metric's window and breach
256
+ state. If ``None``, clears all metrics.
257
+ """
258
+ with self._lock:
259
+ if metric_name is None:
260
+ self._windows.clear()
261
+ self._in_breach.clear()
262
+ else:
263
+ self._windows.pop(metric_name, None)
264
+ self._in_breach.pop(metric_name, None)
265
+
266
+ def in_breach(self, metric_name: str) -> bool:
267
+ """Return ``True`` if *metric_name* is currently in threshold breach."""
268
+ with self._lock:
269
+ return self._in_breach.get(metric_name, False)
270
+
271
+ # ------------------------------------------------------------------
272
+ # Internal helpers (must be called with self._lock held)
273
+ # ------------------------------------------------------------------
274
+
275
+ def _get_baseline_stats(self, metric_name: str) -> tuple[float, float] | None:
276
+ """Return (baseline_mean, baseline_stddev) for *metric_name*, or None."""
277
+ if metric_name == "tokens":
278
+ return self._baseline.tokens.mean, self._baseline.tokens.stddev
279
+
280
+ if metric_name.startswith("confidence."):
281
+ dtype = metric_name[len("confidence.") :]
282
+ stats = self._baseline.confidence_by_type.get(dtype)
283
+ if stats is not None:
284
+ return stats.mean, stats.stddev
285
+
286
+ if metric_name.startswith("latency."):
287
+ op = metric_name[len("latency.") :]
288
+ stats = self._baseline.latency_by_operation.get(op)
289
+ if stats is not None:
290
+ return stats.mean, stats.stddev
291
+
292
+ return None
293
+
294
+ def _evict_stale(self) -> None:
295
+ r"""Evict metrics that have not been observed within ``metric_ttl_seconds``.\n\n Called with ``self._lock`` already held. Prevents unbounded memory\n growth when many short-lived agent instances write unique metric keys.\n."""
296
+ now = time.monotonic()
297
+ cutoff = now - self._metric_ttl_seconds
298
+ stale = [k for k, ts in self._last_seen.items() if ts < cutoff]
299
+ for k in stale:
300
+ self._windows.pop(k, None)
301
+ self._in_breach.pop(k, None)
302
+ self._last_seen.pop(k, None)
303
+
304
+ def _assess(self, metric_name: str, value: float) -> DriftResult | None:
305
+ """Update the window for *metric_name* with *value* and return a result.
306
+
307
+ Returns ``None`` when there is no baseline for the metric or the window
308
+ has fewer than ``_MIN_WINDOW_SAMPLES`` observations.
309
+ """
310
+ # Update rolling window
311
+ window = self._windows.setdefault(metric_name, deque(maxlen=self._window_size))
312
+ window.append(value)
313
+ self._last_seen[metric_name] = time.monotonic()
314
+
315
+ # Evict metrics that haven't been seen within the TTL.
316
+ self._evict_stale()
317
+
318
+ if len(window) < _MIN_WINDOW_SAMPLES:
319
+ return None
320
+
321
+ baseline_stats = self._get_baseline_stats(metric_name)
322
+ if baseline_stats is None:
323
+ return None
324
+
325
+ baseline_mean, baseline_stddev = baseline_stats
326
+
327
+ # Avoid division by zero for constant-baseline metrics
328
+ effective_stddev = baseline_stddev if baseline_stddev > 0 else 1e-9
329
+
330
+ data = list(window)
331
+ win_mean = statistics.mean(data)
332
+ win_stddev = statistics.stdev(data) if len(data) >= 2 else 0.0
333
+
334
+ z_score = abs(win_mean - baseline_mean) / effective_stddev
335
+
336
+ kl_div = _kl_divergence_gaussian(
337
+ mu_p=win_mean,
338
+ sigma_p=win_stddev,
339
+ mu_q=baseline_mean,
340
+ sigma_q=baseline_stddev,
341
+ )
342
+
343
+ # Determine status
344
+ was_in_breach = self._in_breach.get(metric_name, False)
345
+
346
+ if z_score >= self._z_threshold or (kl_div is not None and kl_div >= self._kl_threshold):
347
+ new_status = "threshold_breach"
348
+ self._in_breach[metric_name] = True
349
+ # No active breach — resolve or downgrade
350
+ elif was_in_breach:
351
+ new_status = "resolved"
352
+ self._in_breach[metric_name] = False
353
+ elif z_score >= self._z_threshold * (2.0 / 3.0):
354
+ # "detected" zone: Z is elevated but below the breach threshold
355
+ new_status = "detected"
356
+ else:
357
+ new_status = "ok"
358
+
359
+ if new_status == "ok":
360
+ return None
361
+
362
+ # Map to DriftPayload status literals
363
+ payload_status: str
364
+ if new_status == "threshold_breach":
365
+ payload_status = "threshold_breach"
366
+ elif new_status == "detected":
367
+ payload_status = "detected"
368
+ else: # resolved
369
+ payload_status = "resolved"
370
+
371
+ drift_payload = DriftPayload(
372
+ metric_name=metric_name,
373
+ agent_id=self._agent_id,
374
+ current_value=value,
375
+ baseline_mean=baseline_mean,
376
+ baseline_stddev=baseline_stddev,
377
+ z_score=round(z_score, 6),
378
+ kl_divergence=round(kl_div, 6) if kl_div is not None else None,
379
+ threshold=self._z_threshold,
380
+ window_seconds=self._window_seconds,
381
+ status=payload_status, # type: ignore[arg-type]
382
+ )
383
+
384
+ return DriftResult(
385
+ metric_name=metric_name,
386
+ current_value=value,
387
+ window_mean=win_mean,
388
+ window_stddev=win_stddev,
389
+ baseline_mean=baseline_mean,
390
+ baseline_stddev=baseline_stddev,
391
+ z_score=z_score,
392
+ kl_divergence=kl_div,
393
+ threshold=self._z_threshold,
394
+ status=new_status,
395
+ payload=drift_payload,
396
+ )
397
+
398
+ # ------------------------------------------------------------------
399
+ # Auto-emit
400
+ # ------------------------------------------------------------------
401
+
402
+ def _emit_results(self, results: list[DriftResult]) -> None:
403
+ """Emit drift events for each non-ok result via emit_rfc_event."""
404
+ if not results:
405
+ return
406
+ try:
407
+ from spanforge._stream import emit_rfc_event
408
+ from spanforge.types import EventType
409
+
410
+ _status_to_event_type = {
411
+ "detected": EventType.DRIFT_DETECTED,
412
+ "threshold_breach": EventType.DRIFT_THRESHOLD_BREACH,
413
+ "resolved": EventType.DRIFT_RESOLVED,
414
+ }
415
+ for result in results:
416
+ if result.payload is None:
417
+ continue
418
+ et = _status_to_event_type.get(result.status)
419
+ if et is not None:
420
+ with contextlib.suppress(Exception):
421
+ emit_rfc_event(
422
+ et, result.payload.to_dict()
423
+ ) # never let auto-emit failures disrupt the caller
424
+ except ImportError:
425
+ pass
426
+
427
+
428
+ # ---------------------------------------------------------------------------
429
+ # Metric extraction helpers
430
+ # ---------------------------------------------------------------------------
431
+
432
+
433
+ def _event_type_str(event: Event) -> str:
434
+ et = event.event_type
435
+ return et.value if hasattr(et, "value") else str(et)
436
+
437
+
438
+ def _extract_metric_observations(
439
+ event: Event,
440
+ ) -> list[tuple[str, float]]:
441
+ """Extract (metric_name, value) pairs from *event*.
442
+
443
+ Returns an empty list for event types that carry no drift-relevant metrics.
444
+ """
445
+ etype = _event_type_str(event)
446
+ payload = event.payload
447
+ observations: list[tuple[str, float]] = []
448
+
449
+ # LLM span events — token count + latency per operation
450
+ if etype in ("llm.trace.span.completed", "llm.trace.span.failed"):
451
+ tu = payload.get("token_usage")
452
+ if tu:
453
+ total = int(tu.get("total_tokens", 0) or 0)
454
+ if total > 0:
455
+ observations.append(("tokens", float(total)))
456
+ dur = payload.get("duration_ms")
457
+ if dur is not None:
458
+ op = str(payload.get("operation", "unknown"))
459
+ observations.append((f"latency.{op}", float(dur)))
460
+
461
+ # Confidence namespace
462
+ elif etype == "confidence.sample":
463
+ dtype = str(payload.get("decision_type", "unknown"))
464
+ score = payload.get("score")
465
+ if score is not None:
466
+ observations.append((f"confidence.{dtype}", float(score)))
467
+
468
+ # Latency namespace
469
+ elif etype == "latency.sample":
470
+ op = str(payload.get("operation", "unknown"))
471
+ lat = payload.get("latency_ms")
472
+ if lat is not None:
473
+ observations.append((f"latency.{op}", float(lat)))
474
+
475
+ # Tool call namespace
476
+ elif etype.startswith("tool_call."):
477
+ lat = payload.get("latency_ms")
478
+ tool_name = str(payload.get("tool_name", "unknown"))
479
+ if lat is not None:
480
+ observations.append((f"latency.{tool_name}", float(lat)))
481
+
482
+ return observations