spanforge 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +815 -0
- spanforge/_ansi.py +93 -0
- spanforge/_batch_exporter.py +409 -0
- spanforge/_cli.py +2094 -0
- spanforge/_cli_audit.py +639 -0
- spanforge/_cli_compliance.py +711 -0
- spanforge/_cli_cost.py +243 -0
- spanforge/_cli_ops.py +791 -0
- spanforge/_cli_phase11.py +356 -0
- spanforge/_hooks.py +337 -0
- spanforge/_server.py +1708 -0
- spanforge/_span.py +1036 -0
- spanforge/_store.py +288 -0
- spanforge/_stream.py +664 -0
- spanforge/_trace.py +335 -0
- spanforge/_tracer.py +254 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +469 -0
- spanforge/auto.py +464 -0
- spanforge/baseline.py +335 -0
- spanforge/cache.py +635 -0
- spanforge/compliance.py +325 -0
- spanforge/config.py +532 -0
- spanforge/consent.py +228 -0
- spanforge/consumer.py +377 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1254 -0
- spanforge/cost.py +600 -0
- spanforge/debug.py +548 -0
- spanforge/deprecations.py +205 -0
- spanforge/drift.py +482 -0
- spanforge/egress.py +58 -0
- spanforge/eval.py +648 -0
- spanforge/event.py +1064 -0
- spanforge/exceptions.py +240 -0
- spanforge/explain.py +178 -0
- spanforge/export/__init__.py +69 -0
- spanforge/export/append_only.py +337 -0
- spanforge/export/cloud.py +357 -0
- spanforge/export/datadog.py +497 -0
- spanforge/export/grafana.py +320 -0
- spanforge/export/jsonl.py +195 -0
- spanforge/export/openinference.py +158 -0
- spanforge/export/otel_bridge.py +294 -0
- spanforge/export/otlp.py +811 -0
- spanforge/export/otlp_bridge.py +233 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/siem_schema.py +98 -0
- spanforge/export/siem_splunk.py +264 -0
- spanforge/export/siem_syslog.py +212 -0
- spanforge/export/webhook.py +299 -0
- spanforge/exporters/__init__.py +30 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/exporters/sqlite.py +142 -0
- spanforge/gate.py +1150 -0
- spanforge/governance.py +181 -0
- spanforge/hitl.py +295 -0
- spanforge/http.py +187 -0
- spanforge/inspect.py +427 -0
- spanforge/integrations/__init__.py +45 -0
- spanforge/integrations/_pricing.py +280 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/azure_openai.py +133 -0
- spanforge/integrations/bedrock.py +292 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +351 -0
- spanforge/integrations/groq.py +442 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/langgraph.py +306 -0
- spanforge/integrations/llamaindex.py +373 -0
- spanforge/integrations/ollama.py +287 -0
- spanforge/integrations/openai.py +368 -0
- spanforge/integrations/together.py +483 -0
- spanforge/io.py +214 -0
- spanforge/lint.py +322 -0
- spanforge/metrics.py +417 -0
- spanforge/metrics_export.py +343 -0
- spanforge/migrate.py +402 -0
- spanforge/model_registry.py +278 -0
- spanforge/models.py +389 -0
- spanforge/namespaces/__init__.py +254 -0
- spanforge/namespaces/audit.py +256 -0
- spanforge/namespaces/cache.py +237 -0
- spanforge/namespaces/chain.py +77 -0
- spanforge/namespaces/confidence.py +72 -0
- spanforge/namespaces/consent.py +92 -0
- spanforge/namespaces/cost.py +179 -0
- spanforge/namespaces/decision.py +143 -0
- spanforge/namespaces/diff.py +157 -0
- spanforge/namespaces/drift.py +80 -0
- spanforge/namespaces/eval_.py +251 -0
- spanforge/namespaces/feedback.py +241 -0
- spanforge/namespaces/fence.py +193 -0
- spanforge/namespaces/guard.py +105 -0
- spanforge/namespaces/hitl.py +91 -0
- spanforge/namespaces/latency.py +72 -0
- spanforge/namespaces/prompt.py +190 -0
- spanforge/namespaces/redact.py +173 -0
- spanforge/namespaces/retrieval.py +379 -0
- spanforge/namespaces/runtime_governance.py +494 -0
- spanforge/namespaces/template.py +208 -0
- spanforge/namespaces/tool_call.py +77 -0
- spanforge/namespaces/trace.py +1029 -0
- spanforge/normalizer.py +171 -0
- spanforge/plugins.py +82 -0
- spanforge/presidio_backend.py +349 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +418 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +914 -0
- spanforge/regression.py +192 -0
- spanforge/runtime_policy.py +159 -0
- spanforge/sampling.py +511 -0
- spanforge/schema.py +183 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/sdk/__init__.py +625 -0
- spanforge/sdk/_base.py +584 -0
- spanforge/sdk/_base.pyi +71 -0
- spanforge/sdk/_exceptions.py +1096 -0
- spanforge/sdk/_types.py +2184 -0
- spanforge/sdk/alert.py +1514 -0
- spanforge/sdk/alert.pyi +56 -0
- spanforge/sdk/audit.py +1196 -0
- spanforge/sdk/audit.pyi +67 -0
- spanforge/sdk/cec.py +1215 -0
- spanforge/sdk/cec.pyi +37 -0
- spanforge/sdk/config.py +641 -0
- spanforge/sdk/config.pyi +55 -0
- spanforge/sdk/enterprise.py +714 -0
- spanforge/sdk/enterprise.pyi +79 -0
- spanforge/sdk/explain.py +170 -0
- spanforge/sdk/fallback.py +432 -0
- spanforge/sdk/feedback.py +351 -0
- spanforge/sdk/gate.py +874 -0
- spanforge/sdk/gate.pyi +51 -0
- spanforge/sdk/identity.py +2114 -0
- spanforge/sdk/identity.pyi +47 -0
- spanforge/sdk/lineage.py +175 -0
- spanforge/sdk/observe.py +1065 -0
- spanforge/sdk/observe.pyi +50 -0
- spanforge/sdk/operator.py +338 -0
- spanforge/sdk/pii.py +1473 -0
- spanforge/sdk/pii.pyi +119 -0
- spanforge/sdk/pipelines.py +458 -0
- spanforge/sdk/pipelines.pyi +39 -0
- spanforge/sdk/policy.py +930 -0
- spanforge/sdk/rag.py +594 -0
- spanforge/sdk/rbac.py +280 -0
- spanforge/sdk/registry.py +430 -0
- spanforge/sdk/registry.pyi +46 -0
- spanforge/sdk/scope.py +279 -0
- spanforge/sdk/secrets.py +293 -0
- spanforge/sdk/secrets.pyi +25 -0
- spanforge/sdk/security.py +560 -0
- spanforge/sdk/security.pyi +57 -0
- spanforge/sdk/trust.py +472 -0
- spanforge/sdk/trust.pyi +41 -0
- spanforge/secrets.py +799 -0
- spanforge/signing.py +1179 -0
- spanforge/stats.py +100 -0
- spanforge/stream.py +560 -0
- spanforge/testing.py +378 -0
- spanforge/testing_mocks.py +1052 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +300 -0
- spanforge/validate.py +379 -0
- spanforge-1.0.0.dist-info/METADATA +1509 -0
- spanforge-1.0.0.dist-info/RECORD +174 -0
- spanforge-1.0.0.dist-info/WHEEL +4 -0
- spanforge-1.0.0.dist-info/entry_points.txt +5 -0
- spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""spanforge.deprecations — Per-event-type deprecation tracking.
|
|
2
|
+
|
|
3
|
+
Provides a thread-safe registry for deprecation notices that can be queried at
|
|
4
|
+
runtime, used by the CLI ``spanforge deprecations`` command, and populated from
|
|
5
|
+
migration roadmaps.
|
|
6
|
+
|
|
7
|
+
Public API
|
|
8
|
+
----------
|
|
9
|
+
DeprecationNotice Frozen dataclass describing a single deprecation.
|
|
10
|
+
DeprecationRegistry Thread-safe registry (use module-level helpers instead
|
|
11
|
+
of instantiating this directly in most cases).
|
|
12
|
+
get_registry() Return the global singleton registry.
|
|
13
|
+
mark_deprecated(...) Register a notice in the global registry.
|
|
14
|
+
get_deprecation_notice() Look up a notice by event type.
|
|
15
|
+
warn_if_deprecated() Issue DeprecationWarning if the type is registered.
|
|
16
|
+
list_deprecated() Return all registered notices sorted by event_type.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import threading
|
|
22
|
+
import warnings
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
from typing import Optional
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"DeprecationNotice",
|
|
28
|
+
"DeprecationRegistry",
|
|
29
|
+
"get_deprecation_notice",
|
|
30
|
+
"get_registry",
|
|
31
|
+
"list_deprecated",
|
|
32
|
+
"mark_deprecated",
|
|
33
|
+
"warn_if_deprecated",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# DeprecationNotice
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(frozen=True)
|
|
43
|
+
class DeprecationNotice:
|
|
44
|
+
"""Immutable record describing the deprecation of a single event type."""
|
|
45
|
+
|
|
46
|
+
event_type: str
|
|
47
|
+
since: str
|
|
48
|
+
sunset: str
|
|
49
|
+
replacement: Optional[str] = None
|
|
50
|
+
notes: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
def format_message(self) -> str:
|
|
53
|
+
"""Return a human-readable deprecation message.
|
|
54
|
+
|
|
55
|
+
Example::
|
|
56
|
+
|
|
57
|
+
'llm.legacy.trace' is deprecated since 1.0.0 and will be removed
|
|
58
|
+
in 2.0.0. Use 'llm.trace.span.completed' instead.
|
|
59
|
+
Use the trace namespace instead.
|
|
60
|
+
"""
|
|
61
|
+
msg = (
|
|
62
|
+
f"'{self.event_type}' is deprecated since {self.since} "
|
|
63
|
+
f"and will be removed in {self.sunset}."
|
|
64
|
+
)
|
|
65
|
+
if self.replacement:
|
|
66
|
+
msg += f" Use '{self.replacement}' instead."
|
|
67
|
+
if self.notes:
|
|
68
|
+
msg += f" {self.notes}"
|
|
69
|
+
return msg
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# DeprecationRegistry
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class DeprecationRegistry:
|
|
78
|
+
"""Thread-safe registry mapping event type strings to :class:`DeprecationNotice` objects."""
|
|
79
|
+
|
|
80
|
+
def __init__(self) -> None:
|
|
81
|
+
self._lock = threading.Lock()
|
|
82
|
+
self._notices: dict[str, DeprecationNotice] = {}
|
|
83
|
+
|
|
84
|
+
def mark_deprecated(
|
|
85
|
+
self,
|
|
86
|
+
event_type: str,
|
|
87
|
+
*,
|
|
88
|
+
since: str,
|
|
89
|
+
sunset: str,
|
|
90
|
+
replacement: Optional[str] = None,
|
|
91
|
+
notes: Optional[str] = None,
|
|
92
|
+
) -> DeprecationNotice:
|
|
93
|
+
"""Register a deprecation notice and return it.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
event_type: The event type string being deprecated.
|
|
97
|
+
since: Version when the deprecation was introduced.
|
|
98
|
+
sunset: Version when the type will be removed.
|
|
99
|
+
replacement: Optional suggested replacement event type.
|
|
100
|
+
notes: Optional migration guidance.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
The newly registered :class:`DeprecationNotice`.
|
|
104
|
+
"""
|
|
105
|
+
notice = DeprecationNotice(
|
|
106
|
+
event_type=event_type,
|
|
107
|
+
since=since,
|
|
108
|
+
sunset=sunset,
|
|
109
|
+
replacement=replacement,
|
|
110
|
+
notes=notes,
|
|
111
|
+
)
|
|
112
|
+
with self._lock:
|
|
113
|
+
self._notices[event_type] = notice
|
|
114
|
+
return notice
|
|
115
|
+
|
|
116
|
+
def get(self, event_type: str) -> Optional[DeprecationNotice]:
|
|
117
|
+
"""Return the notice for *event_type*, or ``None`` if not deprecated."""
|
|
118
|
+
with self._lock:
|
|
119
|
+
return self._notices.get(event_type)
|
|
120
|
+
|
|
121
|
+
def is_deprecated(self, event_type: str) -> bool:
|
|
122
|
+
"""Return ``True`` if *event_type* has a registered deprecation notice."""
|
|
123
|
+
with self._lock:
|
|
124
|
+
return event_type in self._notices
|
|
125
|
+
|
|
126
|
+
def warn_if_deprecated(self, event_type: str) -> None:
|
|
127
|
+
"""Issue a stdlib :class:`DeprecationWarning` if *event_type* is deprecated.
|
|
128
|
+
|
|
129
|
+
Uses ``warnings.warn(..., DeprecationWarning, stacklevel=2)``. No-op
|
|
130
|
+
if the type is not registered.
|
|
131
|
+
"""
|
|
132
|
+
notice = self.get(event_type)
|
|
133
|
+
if notice is not None:
|
|
134
|
+
warnings.warn(notice.format_message(), DeprecationWarning, stacklevel=2)
|
|
135
|
+
|
|
136
|
+
def list_all(self) -> list[DeprecationNotice]:
|
|
137
|
+
"""Return all registered notices sorted by ``event_type``."""
|
|
138
|
+
with self._lock:
|
|
139
|
+
return sorted(self._notices.values(), key=lambda n: n.event_type)
|
|
140
|
+
|
|
141
|
+
def remove(self, event_type: str) -> bool:
|
|
142
|
+
"""Remove the notice for *event_type*.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
``True`` if a notice was removed, ``False`` if not found.
|
|
146
|
+
"""
|
|
147
|
+
with self._lock:
|
|
148
|
+
return self._notices.pop(event_type, None) is not None
|
|
149
|
+
|
|
150
|
+
def clear(self) -> None:
|
|
151
|
+
"""Remove all registered notices. Useful in tests."""
|
|
152
|
+
with self._lock:
|
|
153
|
+
self._notices.clear()
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# ---------------------------------------------------------------------------
|
|
157
|
+
# Global singleton
|
|
158
|
+
# ---------------------------------------------------------------------------
|
|
159
|
+
|
|
160
|
+
_global_registry = DeprecationRegistry()
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def get_registry() -> DeprecationRegistry:
|
|
164
|
+
"""Return the global :class:`DeprecationRegistry` singleton."""
|
|
165
|
+
return _global_registry
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
# Module-level convenience helpers (operate on the global registry)
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def mark_deprecated(
|
|
174
|
+
event_type: str,
|
|
175
|
+
*,
|
|
176
|
+
since: str,
|
|
177
|
+
sunset: str,
|
|
178
|
+
replacement: Optional[str] = None,
|
|
179
|
+
notes: Optional[str] = None,
|
|
180
|
+
) -> DeprecationNotice:
|
|
181
|
+
"""Register a deprecation notice in the global registry."""
|
|
182
|
+
return _global_registry.mark_deprecated(
|
|
183
|
+
event_type,
|
|
184
|
+
since=since,
|
|
185
|
+
sunset=sunset,
|
|
186
|
+
replacement=replacement,
|
|
187
|
+
notes=notes,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def get_deprecation_notice(event_type: str) -> Optional[DeprecationNotice]:
|
|
192
|
+
"""Return the notice for *event_type* from the global registry, or ``None``."""
|
|
193
|
+
return _global_registry.get(event_type)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def warn_if_deprecated(event_type: str) -> None:
|
|
197
|
+
"""Issue :class:`DeprecationWarning` if *event_type* is in the global registry."""
|
|
198
|
+
notice = _global_registry.get(event_type)
|
|
199
|
+
if notice is not None:
|
|
200
|
+
warnings.warn(notice.format_message(), DeprecationWarning, stacklevel=2)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def list_deprecated() -> list[DeprecationNotice]:
|
|
204
|
+
"""Return all notices from the global registry sorted by ``event_type``."""
|
|
205
|
+
return _global_registry.list_all()
|
spanforge/drift.py
ADDED
|
@@ -0,0 +1,482 @@
|
|
|
1
|
+
"""spanforge.drift — Behavioural drift detection engine (Phase 3).
|
|
2
|
+
|
|
3
|
+
:class:`DriftDetector` maintains a sliding window of observed metric values
|
|
4
|
+
and compares them against a :class:`~spanforge.baseline.BehaviouralBaseline`
|
|
5
|
+
using Z-score and KL-divergence statistics. When a threshold is breached it
|
|
6
|
+
returns :class:`~spanforge.namespaces.drift.DriftPayload` objects that can be
|
|
7
|
+
emitted as RFC-0001 SPANFORGE ``drift.*`` events via
|
|
8
|
+
:func:`~spanforge._stream.emit_rfc_event`.
|
|
9
|
+
|
|
10
|
+
Usage::
|
|
11
|
+
|
|
12
|
+
from spanforge.baseline import BehaviouralBaseline
|
|
13
|
+
from spanforge.drift import DriftDetector
|
|
14
|
+
from spanforge._stream import emit_rfc_event
|
|
15
|
+
from spanforge.types import EventType
|
|
16
|
+
|
|
17
|
+
baseline = BehaviouralBaseline.load("baseline.json")
|
|
18
|
+
detector = DriftDetector(baseline, agent_id="my-agent")
|
|
19
|
+
|
|
20
|
+
for event in live_event_stream():
|
|
21
|
+
results = detector.record(event)
|
|
22
|
+
for payload in results:
|
|
23
|
+
emit_rfc_event(
|
|
24
|
+
EventType("drift." + payload.status.replace("_", "_")),
|
|
25
|
+
payload.to_dict(),
|
|
26
|
+
)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import contextlib
|
|
32
|
+
import math
|
|
33
|
+
import statistics
|
|
34
|
+
import threading
|
|
35
|
+
import time
|
|
36
|
+
from collections import deque
|
|
37
|
+
from dataclasses import dataclass
|
|
38
|
+
from typing import TYPE_CHECKING
|
|
39
|
+
|
|
40
|
+
from spanforge.namespaces.drift import DriftPayload
|
|
41
|
+
|
|
42
|
+
if TYPE_CHECKING:
|
|
43
|
+
from spanforge.baseline import BehaviouralBaseline
|
|
44
|
+
from spanforge.event import Event
|
|
45
|
+
|
|
46
|
+
__all__ = ["DriftDetector", "DriftResult"]
|
|
47
|
+
|
|
48
|
+
# Minimum observations required in the window before drift analysis is attempted.
|
|
49
|
+
_MIN_WINDOW_SAMPLES: int = 10
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
# Value object
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True)
|
|
58
|
+
class DriftResult:
|
|
59
|
+
"""Drift assessment for a single metric observation.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
metric_name: Dot-separated metric identifier (e.g. ``"tokens"``,
|
|
63
|
+
``"confidence.classification"``, ``"latency.chat"``).
|
|
64
|
+
current_value: The raw observed value that triggered the assessment.
|
|
65
|
+
window_mean: Current window mean (rolling).
|
|
66
|
+
window_stddev: Current window standard deviation (rolling).
|
|
67
|
+
baseline_mean: Mean from the :class:`~spanforge.baseline.BehaviouralBaseline`.
|
|
68
|
+
baseline_stddev: Std-dev from the baseline.
|
|
69
|
+
z_score: ``(window_mean - baseline_mean) / baseline_stddev``.
|
|
70
|
+
kl_divergence: KL-divergence between window and baseline Gaussian
|
|
71
|
+
(``None`` if baseline stddev is zero or window has
|
|
72
|
+
fewer than 2 samples).
|
|
73
|
+
threshold: The configured Z-score threshold.
|
|
74
|
+
status: ``"ok"`` | ``"detected"`` | ``"threshold_breach"`` |
|
|
75
|
+
``"resolved"``.
|
|
76
|
+
payload: Ready-to-emit :class:`~spanforge.namespaces.drift.DriftPayload`
|
|
77
|
+
(``None`` when status is ``"ok"``).
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
metric_name: str
|
|
81
|
+
current_value: float
|
|
82
|
+
window_mean: float
|
|
83
|
+
window_stddev: float
|
|
84
|
+
baseline_mean: float
|
|
85
|
+
baseline_stddev: float
|
|
86
|
+
z_score: float
|
|
87
|
+
kl_divergence: float | None
|
|
88
|
+
threshold: float
|
|
89
|
+
status: str
|
|
90
|
+
payload: DriftPayload | None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
# KL-divergence (Gaussian approximation)
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _kl_divergence_gaussian(
|
|
99
|
+
mu_p: float,
|
|
100
|
+
sigma_p: float,
|
|
101
|
+
mu_q: float,
|
|
102
|
+
sigma_q: float,
|
|
103
|
+
) -> float | None:
|
|
104
|
+
"""KL-divergence KL(P || Q) between two univariate Gaussians.
|
|
105
|
+
|
|
106
|
+
KL(N(μ_P, σ_P²) || N(μ_Q, σ_Q²)) =
|
|
107
|
+
log(σ_Q / σ_P) + (σ_P² + (μ_P − μ_Q)²) / (2 σ_Q²) − 1/2
|
|
108
|
+
|
|
109
|
+
Returns ``None`` when σ_P ≤ 0 or σ_Q ≤ 0 (degenerate distribution).
|
|
110
|
+
"""
|
|
111
|
+
if sigma_p <= 0.0 or sigma_q <= 0.0:
|
|
112
|
+
return None
|
|
113
|
+
return (
|
|
114
|
+
math.log(sigma_q / sigma_p) + (sigma_p**2 + (mu_p - mu_q) ** 2) / (2.0 * sigma_q**2) - 0.5
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
# DriftDetector
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class DriftDetector:
|
|
124
|
+
"""Sliding-window behavioural drift detector.
|
|
125
|
+
|
|
126
|
+
Maintains per-metric rolling windows and reports
|
|
127
|
+
:class:`DriftResult` / :class:`~spanforge.namespaces.drift.DriftPayload`
|
|
128
|
+
objects whenever the current window deviates significantly from the
|
|
129
|
+
recorded :class:`~spanforge.baseline.BehaviouralBaseline`.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
baseline: Deployment-time statistical baseline.
|
|
133
|
+
agent_id: Identifier for the monitored agent (embedded in every
|
|
134
|
+
emitted :class:`~spanforge.namespaces.drift.DriftPayload`).
|
|
135
|
+
window_size: Maximum number of observations per metric in the rolling
|
|
136
|
+
window (default 500).
|
|
137
|
+
z_threshold: Z-score that triggers a ``threshold_breach`` (default 3.0).
|
|
138
|
+
kl_threshold: KL-divergence that triggers a ``threshold_breach``
|
|
139
|
+
(default 0.5).
|
|
140
|
+
window_seconds: Nominal window duration embedded in emitted payloads
|
|
141
|
+
(default 3 600 s = 1 h).
|
|
142
|
+
auto_emit: When ``True`` (default), calls
|
|
143
|
+
:func:`~spanforge._stream.emit_rfc_event` for each
|
|
144
|
+
``detected`` / ``threshold_breach`` / ``resolved`` result.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
def __init__(
|
|
148
|
+
self,
|
|
149
|
+
baseline: BehaviouralBaseline,
|
|
150
|
+
agent_id: str,
|
|
151
|
+
window_size: int = 500,
|
|
152
|
+
z_threshold: float = 3.0,
|
|
153
|
+
kl_threshold: float = 0.5,
|
|
154
|
+
window_seconds: int = 3600,
|
|
155
|
+
auto_emit: bool = True,
|
|
156
|
+
metric_ttl_seconds: int = 86400,
|
|
157
|
+
) -> None:
|
|
158
|
+
if not agent_id:
|
|
159
|
+
raise ValueError("DriftDetector: agent_id must be non-empty")
|
|
160
|
+
if window_size < 1:
|
|
161
|
+
raise ValueError("DriftDetector: window_size must be >= 1")
|
|
162
|
+
if not math.isfinite(z_threshold) or z_threshold <= 0:
|
|
163
|
+
raise ValueError("DriftDetector: z_threshold must be a finite positive number")
|
|
164
|
+
if window_seconds <= 0:
|
|
165
|
+
raise ValueError("DriftDetector: window_seconds must be > 0")
|
|
166
|
+
if metric_ttl_seconds <= 0:
|
|
167
|
+
raise ValueError("DriftDetector: metric_ttl_seconds must be > 0")
|
|
168
|
+
|
|
169
|
+
self._baseline = baseline
|
|
170
|
+
self._agent_id = agent_id
|
|
171
|
+
self._window_size = window_size
|
|
172
|
+
self._z_threshold = z_threshold
|
|
173
|
+
self._kl_threshold = kl_threshold
|
|
174
|
+
self._window_seconds = window_seconds
|
|
175
|
+
self._auto_emit = auto_emit
|
|
176
|
+
self._metric_ttl_seconds = metric_ttl_seconds
|
|
177
|
+
|
|
178
|
+
self._lock = threading.Lock()
|
|
179
|
+
# metric_name → rolling deque of float observations
|
|
180
|
+
self._windows: dict[str, deque[float]] = {}
|
|
181
|
+
# metric_name → current breach state
|
|
182
|
+
self._in_breach: dict[str, bool] = {}
|
|
183
|
+
# metric_name → last observation time (monotonic clock)
|
|
184
|
+
self._last_seen: dict[str, float] = {}
|
|
185
|
+
|
|
186
|
+
# ------------------------------------------------------------------
|
|
187
|
+
# Public API
|
|
188
|
+
# ------------------------------------------------------------------
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def baseline(self) -> BehaviouralBaseline:
|
|
192
|
+
"""The baseline this detector is comparing against."""
|
|
193
|
+
return self._baseline
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def agent_id(self) -> str:
|
|
197
|
+
"""The agent ID this detector is tracking."""
|
|
198
|
+
return self._agent_id
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def window_size(self) -> int:
|
|
202
|
+
"""The rolling window size used for drift calculations."""
|
|
203
|
+
return self._window_size
|
|
204
|
+
|
|
205
|
+
def record(self, event: Event) -> list[DriftResult]:
|
|
206
|
+
"""Ingest *event*, update rolling windows, and return drift results.
|
|
207
|
+
|
|
208
|
+
Extracts metric observations from the event payload based on its
|
|
209
|
+
event type and compares the updated window statistics against the
|
|
210
|
+
baseline.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
event: A :class:`~spanforge.event.Event` (any type; non-metric
|
|
214
|
+
events are silently ignored).
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
A list of :class:`DriftResult` objects for every metric that had a
|
|
218
|
+
state transition (``ok``, ``detected``, ``threshold_breach``,
|
|
219
|
+
or ``resolved``). Returns an empty list for most events.
|
|
220
|
+
"""
|
|
221
|
+
observations = _extract_metric_observations(event)
|
|
222
|
+
if not observations:
|
|
223
|
+
return []
|
|
224
|
+
|
|
225
|
+
results: list[DriftResult] = []
|
|
226
|
+
with self._lock:
|
|
227
|
+
for metric_name, value in observations:
|
|
228
|
+
result = self._assess(metric_name, value)
|
|
229
|
+
if result is not None:
|
|
230
|
+
results.append(result)
|
|
231
|
+
|
|
232
|
+
if self._auto_emit:
|
|
233
|
+
self._emit_results(results)
|
|
234
|
+
|
|
235
|
+
return results
|
|
236
|
+
|
|
237
|
+
def window_stats(self, metric_name: str) -> tuple[float, float, int] | None:
|
|
238
|
+
"""Return ``(mean, stddev, count)`` for *metric_name*'s current window.
|
|
239
|
+
|
|
240
|
+
Returns ``None`` if no data has been recorded for the metric yet.
|
|
241
|
+
"""
|
|
242
|
+
with self._lock:
|
|
243
|
+
window = self._windows.get(metric_name)
|
|
244
|
+
if not window:
|
|
245
|
+
return None
|
|
246
|
+
data = list(window)
|
|
247
|
+
mean = statistics.mean(data)
|
|
248
|
+
stddev = statistics.stdev(data) if len(data) >= 2 else 0.0
|
|
249
|
+
return mean, stddev, len(data)
|
|
250
|
+
|
|
251
|
+
def reset_window(self, metric_name: str | None = None) -> None:
|
|
252
|
+
"""Clear the rolling window(s).
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
metric_name: If given, clears only that metric's window and breach
|
|
256
|
+
state. If ``None``, clears all metrics.
|
|
257
|
+
"""
|
|
258
|
+
with self._lock:
|
|
259
|
+
if metric_name is None:
|
|
260
|
+
self._windows.clear()
|
|
261
|
+
self._in_breach.clear()
|
|
262
|
+
else:
|
|
263
|
+
self._windows.pop(metric_name, None)
|
|
264
|
+
self._in_breach.pop(metric_name, None)
|
|
265
|
+
|
|
266
|
+
def in_breach(self, metric_name: str) -> bool:
|
|
267
|
+
"""Return ``True`` if *metric_name* is currently in threshold breach."""
|
|
268
|
+
with self._lock:
|
|
269
|
+
return self._in_breach.get(metric_name, False)
|
|
270
|
+
|
|
271
|
+
# ------------------------------------------------------------------
|
|
272
|
+
# Internal helpers (must be called with self._lock held)
|
|
273
|
+
# ------------------------------------------------------------------
|
|
274
|
+
|
|
275
|
+
def _get_baseline_stats(self, metric_name: str) -> tuple[float, float] | None:
|
|
276
|
+
"""Return (baseline_mean, baseline_stddev) for *metric_name*, or None."""
|
|
277
|
+
if metric_name == "tokens":
|
|
278
|
+
return self._baseline.tokens.mean, self._baseline.tokens.stddev
|
|
279
|
+
|
|
280
|
+
if metric_name.startswith("confidence."):
|
|
281
|
+
dtype = metric_name[len("confidence.") :]
|
|
282
|
+
stats = self._baseline.confidence_by_type.get(dtype)
|
|
283
|
+
if stats is not None:
|
|
284
|
+
return stats.mean, stats.stddev
|
|
285
|
+
|
|
286
|
+
if metric_name.startswith("latency."):
|
|
287
|
+
op = metric_name[len("latency.") :]
|
|
288
|
+
stats = self._baseline.latency_by_operation.get(op)
|
|
289
|
+
if stats is not None:
|
|
290
|
+
return stats.mean, stats.stddev
|
|
291
|
+
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
def _evict_stale(self) -> None:
|
|
295
|
+
r"""Evict metrics that have not been observed within ``metric_ttl_seconds``.\n\n Called with ``self._lock`` already held. Prevents unbounded memory\n growth when many short-lived agent instances write unique metric keys.\n."""
|
|
296
|
+
now = time.monotonic()
|
|
297
|
+
cutoff = now - self._metric_ttl_seconds
|
|
298
|
+
stale = [k for k, ts in self._last_seen.items() if ts < cutoff]
|
|
299
|
+
for k in stale:
|
|
300
|
+
self._windows.pop(k, None)
|
|
301
|
+
self._in_breach.pop(k, None)
|
|
302
|
+
self._last_seen.pop(k, None)
|
|
303
|
+
|
|
304
|
+
def _assess(self, metric_name: str, value: float) -> DriftResult | None:
|
|
305
|
+
"""Update the window for *metric_name* with *value* and return a result.
|
|
306
|
+
|
|
307
|
+
Returns ``None`` when there is no baseline for the metric or the window
|
|
308
|
+
has fewer than ``_MIN_WINDOW_SAMPLES`` observations.
|
|
309
|
+
"""
|
|
310
|
+
# Update rolling window
|
|
311
|
+
window = self._windows.setdefault(metric_name, deque(maxlen=self._window_size))
|
|
312
|
+
window.append(value)
|
|
313
|
+
self._last_seen[metric_name] = time.monotonic()
|
|
314
|
+
|
|
315
|
+
# Evict metrics that haven't been seen within the TTL.
|
|
316
|
+
self._evict_stale()
|
|
317
|
+
|
|
318
|
+
if len(window) < _MIN_WINDOW_SAMPLES:
|
|
319
|
+
return None
|
|
320
|
+
|
|
321
|
+
baseline_stats = self._get_baseline_stats(metric_name)
|
|
322
|
+
if baseline_stats is None:
|
|
323
|
+
return None
|
|
324
|
+
|
|
325
|
+
baseline_mean, baseline_stddev = baseline_stats
|
|
326
|
+
|
|
327
|
+
# Avoid division by zero for constant-baseline metrics
|
|
328
|
+
effective_stddev = baseline_stddev if baseline_stddev > 0 else 1e-9
|
|
329
|
+
|
|
330
|
+
data = list(window)
|
|
331
|
+
win_mean = statistics.mean(data)
|
|
332
|
+
win_stddev = statistics.stdev(data) if len(data) >= 2 else 0.0
|
|
333
|
+
|
|
334
|
+
z_score = abs(win_mean - baseline_mean) / effective_stddev
|
|
335
|
+
|
|
336
|
+
kl_div = _kl_divergence_gaussian(
|
|
337
|
+
mu_p=win_mean,
|
|
338
|
+
sigma_p=win_stddev,
|
|
339
|
+
mu_q=baseline_mean,
|
|
340
|
+
sigma_q=baseline_stddev,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Determine status
|
|
344
|
+
was_in_breach = self._in_breach.get(metric_name, False)
|
|
345
|
+
|
|
346
|
+
if z_score >= self._z_threshold or (kl_div is not None and kl_div >= self._kl_threshold):
|
|
347
|
+
new_status = "threshold_breach"
|
|
348
|
+
self._in_breach[metric_name] = True
|
|
349
|
+
# No active breach — resolve or downgrade
|
|
350
|
+
elif was_in_breach:
|
|
351
|
+
new_status = "resolved"
|
|
352
|
+
self._in_breach[metric_name] = False
|
|
353
|
+
elif z_score >= self._z_threshold * (2.0 / 3.0):
|
|
354
|
+
# "detected" zone: Z is elevated but below the breach threshold
|
|
355
|
+
new_status = "detected"
|
|
356
|
+
else:
|
|
357
|
+
new_status = "ok"
|
|
358
|
+
|
|
359
|
+
if new_status == "ok":
|
|
360
|
+
return None
|
|
361
|
+
|
|
362
|
+
# Map to DriftPayload status literals
|
|
363
|
+
payload_status: str
|
|
364
|
+
if new_status == "threshold_breach":
|
|
365
|
+
payload_status = "threshold_breach"
|
|
366
|
+
elif new_status == "detected":
|
|
367
|
+
payload_status = "detected"
|
|
368
|
+
else: # resolved
|
|
369
|
+
payload_status = "resolved"
|
|
370
|
+
|
|
371
|
+
drift_payload = DriftPayload(
|
|
372
|
+
metric_name=metric_name,
|
|
373
|
+
agent_id=self._agent_id,
|
|
374
|
+
current_value=value,
|
|
375
|
+
baseline_mean=baseline_mean,
|
|
376
|
+
baseline_stddev=baseline_stddev,
|
|
377
|
+
z_score=round(z_score, 6),
|
|
378
|
+
kl_divergence=round(kl_div, 6) if kl_div is not None else None,
|
|
379
|
+
threshold=self._z_threshold,
|
|
380
|
+
window_seconds=self._window_seconds,
|
|
381
|
+
status=payload_status, # type: ignore[arg-type]
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
return DriftResult(
|
|
385
|
+
metric_name=metric_name,
|
|
386
|
+
current_value=value,
|
|
387
|
+
window_mean=win_mean,
|
|
388
|
+
window_stddev=win_stddev,
|
|
389
|
+
baseline_mean=baseline_mean,
|
|
390
|
+
baseline_stddev=baseline_stddev,
|
|
391
|
+
z_score=z_score,
|
|
392
|
+
kl_divergence=kl_div,
|
|
393
|
+
threshold=self._z_threshold,
|
|
394
|
+
status=new_status,
|
|
395
|
+
payload=drift_payload,
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# ------------------------------------------------------------------
|
|
399
|
+
# Auto-emit
|
|
400
|
+
# ------------------------------------------------------------------
|
|
401
|
+
|
|
402
|
+
def _emit_results(self, results: list[DriftResult]) -> None:
|
|
403
|
+
"""Emit drift events for each non-ok result via emit_rfc_event."""
|
|
404
|
+
if not results:
|
|
405
|
+
return
|
|
406
|
+
try:
|
|
407
|
+
from spanforge._stream import emit_rfc_event
|
|
408
|
+
from spanforge.types import EventType
|
|
409
|
+
|
|
410
|
+
_status_to_event_type = {
|
|
411
|
+
"detected": EventType.DRIFT_DETECTED,
|
|
412
|
+
"threshold_breach": EventType.DRIFT_THRESHOLD_BREACH,
|
|
413
|
+
"resolved": EventType.DRIFT_RESOLVED,
|
|
414
|
+
}
|
|
415
|
+
for result in results:
|
|
416
|
+
if result.payload is None:
|
|
417
|
+
continue
|
|
418
|
+
et = _status_to_event_type.get(result.status)
|
|
419
|
+
if et is not None:
|
|
420
|
+
with contextlib.suppress(Exception):
|
|
421
|
+
emit_rfc_event(
|
|
422
|
+
et, result.payload.to_dict()
|
|
423
|
+
) # never let auto-emit failures disrupt the caller
|
|
424
|
+
except ImportError:
|
|
425
|
+
pass
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
# ---------------------------------------------------------------------------
|
|
429
|
+
# Metric extraction helpers
|
|
430
|
+
# ---------------------------------------------------------------------------
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def _event_type_str(event: Event) -> str:
|
|
434
|
+
et = event.event_type
|
|
435
|
+
return et.value if hasattr(et, "value") else str(et)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _extract_metric_observations(
|
|
439
|
+
event: Event,
|
|
440
|
+
) -> list[tuple[str, float]]:
|
|
441
|
+
"""Extract (metric_name, value) pairs from *event*.
|
|
442
|
+
|
|
443
|
+
Returns an empty list for event types that carry no drift-relevant metrics.
|
|
444
|
+
"""
|
|
445
|
+
etype = _event_type_str(event)
|
|
446
|
+
payload = event.payload
|
|
447
|
+
observations: list[tuple[str, float]] = []
|
|
448
|
+
|
|
449
|
+
# LLM span events — token count + latency per operation
|
|
450
|
+
if etype in ("llm.trace.span.completed", "llm.trace.span.failed"):
|
|
451
|
+
tu = payload.get("token_usage")
|
|
452
|
+
if tu:
|
|
453
|
+
total = int(tu.get("total_tokens", 0) or 0)
|
|
454
|
+
if total > 0:
|
|
455
|
+
observations.append(("tokens", float(total)))
|
|
456
|
+
dur = payload.get("duration_ms")
|
|
457
|
+
if dur is not None:
|
|
458
|
+
op = str(payload.get("operation", "unknown"))
|
|
459
|
+
observations.append((f"latency.{op}", float(dur)))
|
|
460
|
+
|
|
461
|
+
# Confidence namespace
|
|
462
|
+
elif etype == "confidence.sample":
|
|
463
|
+
dtype = str(payload.get("decision_type", "unknown"))
|
|
464
|
+
score = payload.get("score")
|
|
465
|
+
if score is not None:
|
|
466
|
+
observations.append((f"confidence.{dtype}", float(score)))
|
|
467
|
+
|
|
468
|
+
# Latency namespace
|
|
469
|
+
elif etype == "latency.sample":
|
|
470
|
+
op = str(payload.get("operation", "unknown"))
|
|
471
|
+
lat = payload.get("latency_ms")
|
|
472
|
+
if lat is not None:
|
|
473
|
+
observations.append((f"latency.{op}", float(lat)))
|
|
474
|
+
|
|
475
|
+
# Tool call namespace
|
|
476
|
+
elif etype.startswith("tool_call."):
|
|
477
|
+
lat = payload.get("latency_ms")
|
|
478
|
+
tool_name = str(payload.get("tool_name", "unknown"))
|
|
479
|
+
if lat is not None:
|
|
480
|
+
observations.append((f"latency.{tool_name}", float(lat)))
|
|
481
|
+
|
|
482
|
+
return observations
|