spanforge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. spanforge/__init__.py +815 -0
  2. spanforge/_ansi.py +93 -0
  3. spanforge/_batch_exporter.py +409 -0
  4. spanforge/_cli.py +2094 -0
  5. spanforge/_cli_audit.py +639 -0
  6. spanforge/_cli_compliance.py +711 -0
  7. spanforge/_cli_cost.py +243 -0
  8. spanforge/_cli_ops.py +791 -0
  9. spanforge/_cli_phase11.py +356 -0
  10. spanforge/_hooks.py +337 -0
  11. spanforge/_server.py +1708 -0
  12. spanforge/_span.py +1036 -0
  13. spanforge/_store.py +288 -0
  14. spanforge/_stream.py +664 -0
  15. spanforge/_trace.py +335 -0
  16. spanforge/_tracer.py +254 -0
  17. spanforge/actor.py +141 -0
  18. spanforge/alerts.py +469 -0
  19. spanforge/auto.py +464 -0
  20. spanforge/baseline.py +335 -0
  21. spanforge/cache.py +635 -0
  22. spanforge/compliance.py +325 -0
  23. spanforge/config.py +532 -0
  24. spanforge/consent.py +228 -0
  25. spanforge/consumer.py +377 -0
  26. spanforge/core/__init__.py +5 -0
  27. spanforge/core/compliance_mapping.py +1254 -0
  28. spanforge/cost.py +600 -0
  29. spanforge/debug.py +548 -0
  30. spanforge/deprecations.py +205 -0
  31. spanforge/drift.py +482 -0
  32. spanforge/egress.py +58 -0
  33. spanforge/eval.py +648 -0
  34. spanforge/event.py +1064 -0
  35. spanforge/exceptions.py +240 -0
  36. spanforge/explain.py +178 -0
  37. spanforge/export/__init__.py +69 -0
  38. spanforge/export/append_only.py +337 -0
  39. spanforge/export/cloud.py +357 -0
  40. spanforge/export/datadog.py +497 -0
  41. spanforge/export/grafana.py +320 -0
  42. spanforge/export/jsonl.py +195 -0
  43. spanforge/export/openinference.py +158 -0
  44. spanforge/export/otel_bridge.py +294 -0
  45. spanforge/export/otlp.py +811 -0
  46. spanforge/export/otlp_bridge.py +233 -0
  47. spanforge/export/redis_backend.py +282 -0
  48. spanforge/export/siem_schema.py +98 -0
  49. spanforge/export/siem_splunk.py +264 -0
  50. spanforge/export/siem_syslog.py +212 -0
  51. spanforge/export/webhook.py +299 -0
  52. spanforge/exporters/__init__.py +30 -0
  53. spanforge/exporters/console.py +271 -0
  54. spanforge/exporters/jsonl.py +144 -0
  55. spanforge/exporters/sqlite.py +142 -0
  56. spanforge/gate.py +1150 -0
  57. spanforge/governance.py +181 -0
  58. spanforge/hitl.py +295 -0
  59. spanforge/http.py +187 -0
  60. spanforge/inspect.py +427 -0
  61. spanforge/integrations/__init__.py +45 -0
  62. spanforge/integrations/_pricing.py +280 -0
  63. spanforge/integrations/anthropic.py +388 -0
  64. spanforge/integrations/azure_openai.py +133 -0
  65. spanforge/integrations/bedrock.py +292 -0
  66. spanforge/integrations/crewai.py +251 -0
  67. spanforge/integrations/gemini.py +351 -0
  68. spanforge/integrations/groq.py +442 -0
  69. spanforge/integrations/langchain.py +349 -0
  70. spanforge/integrations/langgraph.py +306 -0
  71. spanforge/integrations/llamaindex.py +373 -0
  72. spanforge/integrations/ollama.py +287 -0
  73. spanforge/integrations/openai.py +368 -0
  74. spanforge/integrations/together.py +483 -0
  75. spanforge/io.py +214 -0
  76. spanforge/lint.py +322 -0
  77. spanforge/metrics.py +417 -0
  78. spanforge/metrics_export.py +343 -0
  79. spanforge/migrate.py +402 -0
  80. spanforge/model_registry.py +278 -0
  81. spanforge/models.py +389 -0
  82. spanforge/namespaces/__init__.py +254 -0
  83. spanforge/namespaces/audit.py +256 -0
  84. spanforge/namespaces/cache.py +237 -0
  85. spanforge/namespaces/chain.py +77 -0
  86. spanforge/namespaces/confidence.py +72 -0
  87. spanforge/namespaces/consent.py +92 -0
  88. spanforge/namespaces/cost.py +179 -0
  89. spanforge/namespaces/decision.py +143 -0
  90. spanforge/namespaces/diff.py +157 -0
  91. spanforge/namespaces/drift.py +80 -0
  92. spanforge/namespaces/eval_.py +251 -0
  93. spanforge/namespaces/feedback.py +241 -0
  94. spanforge/namespaces/fence.py +193 -0
  95. spanforge/namespaces/guard.py +105 -0
  96. spanforge/namespaces/hitl.py +91 -0
  97. spanforge/namespaces/latency.py +72 -0
  98. spanforge/namespaces/prompt.py +190 -0
  99. spanforge/namespaces/redact.py +173 -0
  100. spanforge/namespaces/retrieval.py +379 -0
  101. spanforge/namespaces/runtime_governance.py +494 -0
  102. spanforge/namespaces/template.py +208 -0
  103. spanforge/namespaces/tool_call.py +77 -0
  104. spanforge/namespaces/trace.py +1029 -0
  105. spanforge/normalizer.py +171 -0
  106. spanforge/plugins.py +82 -0
  107. spanforge/presidio_backend.py +349 -0
  108. spanforge/processor.py +258 -0
  109. spanforge/prompt_registry.py +418 -0
  110. spanforge/py.typed +0 -0
  111. spanforge/redact.py +914 -0
  112. spanforge/regression.py +192 -0
  113. spanforge/runtime_policy.py +159 -0
  114. spanforge/sampling.py +511 -0
  115. spanforge/schema.py +183 -0
  116. spanforge/schemas/v1.0/schema.json +170 -0
  117. spanforge/schemas/v2.0/schema.json +536 -0
  118. spanforge/sdk/__init__.py +625 -0
  119. spanforge/sdk/_base.py +584 -0
  120. spanforge/sdk/_base.pyi +71 -0
  121. spanforge/sdk/_exceptions.py +1096 -0
  122. spanforge/sdk/_types.py +2184 -0
  123. spanforge/sdk/alert.py +1514 -0
  124. spanforge/sdk/alert.pyi +56 -0
  125. spanforge/sdk/audit.py +1196 -0
  126. spanforge/sdk/audit.pyi +67 -0
  127. spanforge/sdk/cec.py +1215 -0
  128. spanforge/sdk/cec.pyi +37 -0
  129. spanforge/sdk/config.py +641 -0
  130. spanforge/sdk/config.pyi +55 -0
  131. spanforge/sdk/enterprise.py +714 -0
  132. spanforge/sdk/enterprise.pyi +79 -0
  133. spanforge/sdk/explain.py +170 -0
  134. spanforge/sdk/fallback.py +432 -0
  135. spanforge/sdk/feedback.py +351 -0
  136. spanforge/sdk/gate.py +874 -0
  137. spanforge/sdk/gate.pyi +51 -0
  138. spanforge/sdk/identity.py +2114 -0
  139. spanforge/sdk/identity.pyi +47 -0
  140. spanforge/sdk/lineage.py +175 -0
  141. spanforge/sdk/observe.py +1065 -0
  142. spanforge/sdk/observe.pyi +50 -0
  143. spanforge/sdk/operator.py +338 -0
  144. spanforge/sdk/pii.py +1473 -0
  145. spanforge/sdk/pii.pyi +119 -0
  146. spanforge/sdk/pipelines.py +458 -0
  147. spanforge/sdk/pipelines.pyi +39 -0
  148. spanforge/sdk/policy.py +930 -0
  149. spanforge/sdk/rag.py +594 -0
  150. spanforge/sdk/rbac.py +280 -0
  151. spanforge/sdk/registry.py +430 -0
  152. spanforge/sdk/registry.pyi +46 -0
  153. spanforge/sdk/scope.py +279 -0
  154. spanforge/sdk/secrets.py +293 -0
  155. spanforge/sdk/secrets.pyi +25 -0
  156. spanforge/sdk/security.py +560 -0
  157. spanforge/sdk/security.pyi +57 -0
  158. spanforge/sdk/trust.py +472 -0
  159. spanforge/sdk/trust.pyi +41 -0
  160. spanforge/secrets.py +799 -0
  161. spanforge/signing.py +1179 -0
  162. spanforge/stats.py +100 -0
  163. spanforge/stream.py +560 -0
  164. spanforge/testing.py +378 -0
  165. spanforge/testing_mocks.py +1052 -0
  166. spanforge/trace.py +199 -0
  167. spanforge/types.py +696 -0
  168. spanforge/ulid.py +300 -0
  169. spanforge/validate.py +379 -0
  170. spanforge-1.0.0.dist-info/METADATA +1509 -0
  171. spanforge-1.0.0.dist-info/RECORD +174 -0
  172. spanforge-1.0.0.dist-info/WHEEL +4 -0
  173. spanforge-1.0.0.dist-info/entry_points.txt +5 -0
  174. spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/baseline.py ADDED
@@ -0,0 +1,335 @@
1
+ """spanforge.baseline — Behavioural baseline construction for drift detection.
2
+
3
+ :class:`BehaviouralBaseline` captures the statistical summary of an agent's
4
+ typical behaviour over an initial traffic window (default: up to 1 000 events
5
+ or 24 hours). The baseline is serialisable to JSON so it can be persisted and
6
+ reloaded across restarts.
7
+
8
+ Usage::
9
+
10
+ from spanforge.baseline import BehaviouralBaseline
11
+ from spanforge.stream import iter_file
12
+
13
+ events = list(iter_file("events.jsonl"))
14
+ baseline = BehaviouralBaseline.from_events(events)
15
+ baseline.save("baseline.json")
16
+
17
+ # — on restart —
18
+ baseline = BehaviouralBaseline.load("baseline.json")
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import datetime
24
+ import json
25
+ import pathlib
26
+ import statistics
27
+ from dataclasses import dataclass, field
28
+ from typing import TYPE_CHECKING, Any
29
+
30
+ if TYPE_CHECKING:
31
+ from collections.abc import Iterable
32
+
33
+ from spanforge.event import Event
34
+
35
+ __all__ = ["BehaviouralBaseline", "DistributionStats"]
36
+
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Statistical helpers
40
+ # ---------------------------------------------------------------------------
41
+
42
+
43
+ def _percentile(sorted_data: list[float], pct: float) -> float:
44
+ """Return the *pct*-th percentile of an already-sorted list."""
45
+ if not sorted_data:
46
+ return 0.0
47
+ if len(sorted_data) == 1:
48
+ return float(sorted_data[0])
49
+ idx = (pct / 100.0) * (len(sorted_data) - 1)
50
+ lo = int(idx)
51
+ hi = lo + 1
52
+ if hi >= len(sorted_data):
53
+ return float(sorted_data[-1])
54
+ frac = idx - lo
55
+ return sorted_data[lo] * (1.0 - frac) + sorted_data[hi] * frac
56
+
57
+
58
+ def _event_type_str(event: Event) -> str:
59
+ et = event.event_type
60
+ return et.value if hasattr(et, "value") else str(et)
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Value object
65
+ # ---------------------------------------------------------------------------
66
+
67
+
68
+ @dataclass(frozen=True)
69
+ class DistributionStats:
70
+ """Mean, standard deviation, and percentiles for a numeric metric.
71
+
72
+ Attributes:
73
+ mean: Arithmetic mean of the sample population.
74
+ stddev: Sample standard deviation (0.0 when fewer than 2 samples).
75
+ p50: 50th percentile (median).
76
+ p95: 95th percentile.
77
+ p99: 99th percentile.
78
+ sample_count: Number of observations used to compute the statistics.
79
+ """
80
+
81
+ mean: float
82
+ stddev: float
83
+ p50: float
84
+ p95: float
85
+ p99: float
86
+ sample_count: int
87
+
88
+ # ------------------------------------------------------------------
89
+ # Factory
90
+ # ------------------------------------------------------------------
91
+
92
+ @classmethod
93
+ def from_samples(cls, samples: list[float]) -> DistributionStats:
94
+ """Build a :class:`DistributionStats` from a list of observations."""
95
+ if not samples:
96
+ return cls(mean=0.0, stddev=0.0, p50=0.0, p95=0.0, p99=0.0, sample_count=0)
97
+ s = sorted(samples)
98
+ mean = statistics.mean(s)
99
+ stddev = statistics.stdev(s) if len(s) >= 2 else 0.0
100
+ return cls(
101
+ mean=mean,
102
+ stddev=stddev,
103
+ p50=_percentile(s, 50),
104
+ p95=_percentile(s, 95),
105
+ p99=_percentile(s, 99),
106
+ sample_count=len(s),
107
+ )
108
+
109
+ # ------------------------------------------------------------------
110
+ # Serialisation
111
+ # ------------------------------------------------------------------
112
+
113
+ def to_dict(self) -> dict[str, Any]:
114
+ """Serialise to a plain dict."""
115
+ return {
116
+ "mean": self.mean,
117
+ "stddev": self.stddev,
118
+ "p50": self.p50,
119
+ "p95": self.p95,
120
+ "p99": self.p99,
121
+ "sample_count": self.sample_count,
122
+ }
123
+
124
+ @classmethod
125
+ def from_dict(cls, d: dict[str, Any]) -> DistributionStats:
126
+ """Deserialise from a plain dict."""
127
+ return cls(
128
+ mean=float(d["mean"]),
129
+ stddev=float(d["stddev"]),
130
+ p50=float(d["p50"]),
131
+ p95=float(d["p95"]),
132
+ p99=float(d["p99"]),
133
+ sample_count=int(d["sample_count"]),
134
+ )
135
+
136
+
137
+ # ---------------------------------------------------------------------------
138
+ # Baseline
139
+ # ---------------------------------------------------------------------------
140
+
141
+
142
+ @dataclass
143
+ class BehaviouralBaseline:
144
+ """Statistical summary of an agent's typical behaviour.
145
+
146
+ Built from an initial traffic window and used by :class:`~spanforge.drift.DriftDetector`
147
+ to detect statistically significant deviations at runtime.
148
+
149
+ Attributes:
150
+ tokens: Token count distribution across all LLM spans.
151
+ confidence_by_type: Per-decision-type confidence score distributions.
152
+ latency_by_operation: Per-operation latency distributions (milliseconds).
153
+ tool_rate_per_hour: Observed tool invocation rate per tool name (calls/h).
154
+ decision_rate_per_hour: Observed decision rate per decision type (decisions/h).
155
+ event_count: Number of events consumed to build this baseline.
156
+ window_seconds: Duration of the baseline traffic window in seconds.
157
+ recorded_at: ISO 8601 UTC timestamp when the baseline was created.
158
+ """
159
+
160
+ tokens: DistributionStats
161
+ confidence_by_type: dict[str, DistributionStats] = field(default_factory=dict)
162
+ latency_by_operation: dict[str, DistributionStats] = field(default_factory=dict)
163
+ tool_rate_per_hour: dict[str, float] = field(default_factory=dict)
164
+ decision_rate_per_hour: dict[str, float] = field(default_factory=dict)
165
+ event_count: int = 0
166
+ window_seconds: float = 86400.0
167
+ recorded_at: str = ""
168
+
169
+ # ------------------------------------------------------------------
170
+ # Factory
171
+ # ------------------------------------------------------------------
172
+
173
+ @classmethod
174
+ def from_events(
175
+ cls,
176
+ events: Iterable[Event],
177
+ max_events: int = 1000,
178
+ window_seconds: float = 86400.0,
179
+ ) -> BehaviouralBaseline:
180
+ """Build a baseline from a stream of events.
181
+
182
+ Consumes at most *max_events* events from *events* (or the whole
183
+ iterable, whichever comes first) and computes statistical distributions
184
+ for the following metric groups:
185
+
186
+ - **Tokens** — total token count from ``llm.trace.span.completed``
187
+ payloads that contain a ``token_usage`` dict.
188
+ - **Confidence** — per-decision-type score from ``confidence.sample``
189
+ events.
190
+ - **Latency** — per-operation latency from ``llm.trace.span.completed``,
191
+ ``tool_call.*``, and ``latency.sample`` events.
192
+ - **Tool invocation rates** — calls per hour from ``tool_call.*`` events.
193
+ - **Decision rates** — decisions per hour from ``decision.made`` events.
194
+
195
+ Args:
196
+ events: Source iterable of :class:`~spanforge.event.Event`.
197
+ max_events: Upper bound on events consumed (default 1 000).
198
+ window_seconds: Denominator for rate calculations (default 86 400 s = 24 h).
199
+
200
+ Returns:
201
+ A fully-populated :class:`BehaviouralBaseline`.
202
+ """
203
+ token_samples: list[float] = []
204
+ confidence_samples: dict[str, list[float]] = {}
205
+ latency_samples: dict[str, list[float]] = {}
206
+ tool_counts: dict[str, int] = {}
207
+ decision_counts: dict[str, int] = {}
208
+
209
+ count = 0
210
+ for event in events:
211
+ if count >= max_events:
212
+ break
213
+ count += 1
214
+ etype = _event_type_str(event)
215
+ payload = event.payload
216
+
217
+ # LLM span events — tokens + latency
218
+ if etype in ("llm.trace.span.completed", "llm.trace.span.failed"):
219
+ tu = payload.get("token_usage")
220
+ if tu:
221
+ total = int(tu.get("total_tokens", 0) or 0)
222
+ if total > 0:
223
+ token_samples.append(float(total))
224
+ dur = payload.get("duration_ms")
225
+ if dur is not None:
226
+ op = str(payload.get("operation", "unknown"))
227
+ latency_samples.setdefault(op, []).append(float(dur))
228
+ if op == "tool_call":
229
+ tool_counts[op] = tool_counts.get(op, 0) + 1
230
+
231
+ # Confidence namespace events
232
+ elif etype == "confidence.sample":
233
+ dtype = str(payload.get("decision_type", "unknown"))
234
+ score = payload.get("score")
235
+ if score is not None:
236
+ confidence_samples.setdefault(dtype, []).append(float(score))
237
+
238
+ # Decision namespace events
239
+ elif etype == "decision.made":
240
+ dtype = str(payload.get("decision_type", "unknown"))
241
+ decision_counts[dtype] = decision_counts.get(dtype, 0) + 1
242
+
243
+ # Tool call namespace events
244
+ elif etype.startswith("tool_call."):
245
+ tool_name = str(payload.get("tool_name", "unknown"))
246
+ tool_counts[tool_name] = tool_counts.get(tool_name, 0) + 1
247
+ lat = payload.get("latency_ms")
248
+ if lat is not None:
249
+ latency_samples.setdefault(tool_name, []).append(float(lat))
250
+
251
+ # Latency namespace events
252
+ elif etype == "latency.sample":
253
+ op = str(payload.get("operation", "unknown"))
254
+ lat = payload.get("latency_ms")
255
+ if lat is not None:
256
+ latency_samples.setdefault(op, []).append(float(lat))
257
+
258
+ hours = (window_seconds / 3600.0) if window_seconds > 0 else 1.0
259
+
260
+ return cls(
261
+ tokens=DistributionStats.from_samples(token_samples),
262
+ confidence_by_type={
263
+ dt: DistributionStats.from_samples(samples)
264
+ for dt, samples in confidence_samples.items()
265
+ },
266
+ latency_by_operation={
267
+ op: DistributionStats.from_samples(samples)
268
+ for op, samples in latency_samples.items()
269
+ },
270
+ tool_rate_per_hour={op: cnt / hours for op, cnt in tool_counts.items()},
271
+ decision_rate_per_hour={dt: cnt / hours for dt, cnt in decision_counts.items()},
272
+ event_count=count,
273
+ window_seconds=window_seconds,
274
+ recorded_at=datetime.datetime.now(datetime.timezone.utc).strftime(
275
+ "%Y-%m-%dT%H:%M:%S.%f"
276
+ )
277
+ + "Z",
278
+ )
279
+
280
+ # ------------------------------------------------------------------
281
+ # Serialisation
282
+ # ------------------------------------------------------------------
283
+
284
+ def to_dict(self) -> dict[str, Any]:
285
+ """Serialise to a plain dict."""
286
+ return {
287
+ "tokens": self.tokens.to_dict(),
288
+ "confidence_by_type": {k: v.to_dict() for k, v in self.confidence_by_type.items()},
289
+ "latency_by_operation": {k: v.to_dict() for k, v in self.latency_by_operation.items()},
290
+ "tool_rate_per_hour": dict(self.tool_rate_per_hour),
291
+ "decision_rate_per_hour": dict(self.decision_rate_per_hour),
292
+ "event_count": self.event_count,
293
+ "window_seconds": self.window_seconds,
294
+ "recorded_at": self.recorded_at,
295
+ }
296
+
297
+ def to_json(self) -> str:
298
+ """Serialise to a compact JSON string (keys sorted)."""
299
+ return json.dumps(self.to_dict(), sort_keys=True, indent=2)
300
+
301
+ @classmethod
302
+ def from_dict(cls, d: dict[str, Any]) -> BehaviouralBaseline:
303
+ """Deserialise from a plain dict."""
304
+ return cls(
305
+ tokens=DistributionStats.from_dict(d["tokens"]),
306
+ confidence_by_type={
307
+ k: DistributionStats.from_dict(v)
308
+ for k, v in d.get("confidence_by_type", {}).items()
309
+ },
310
+ latency_by_operation={
311
+ k: DistributionStats.from_dict(v)
312
+ for k, v in d.get("latency_by_operation", {}).items()
313
+ },
314
+ tool_rate_per_hour={k: float(v) for k, v in d.get("tool_rate_per_hour", {}).items()},
315
+ decision_rate_per_hour={
316
+ k: float(v) for k, v in d.get("decision_rate_per_hour", {}).items()
317
+ },
318
+ event_count=int(d.get("event_count", 0)),
319
+ window_seconds=float(d.get("window_seconds", 86400.0)),
320
+ recorded_at=str(d.get("recorded_at", "")),
321
+ )
322
+
323
+ @classmethod
324
+ def from_json(cls, s: str) -> BehaviouralBaseline:
325
+ """Deserialise from a JSON string produced by :meth:`to_json`."""
326
+ return cls.from_dict(json.loads(s))
327
+
328
+ def save(self, path: str | pathlib.Path) -> None:
329
+ """Write the baseline to *path* as UTF-8 JSON."""
330
+ pathlib.Path(path).write_text(self.to_json(), encoding="utf-8")
331
+
332
+ @classmethod
333
+ def load(cls, path: str | pathlib.Path) -> BehaviouralBaseline:
334
+ """Load a baseline previously saved with :meth:`save`."""
335
+ return cls.from_json(pathlib.Path(path).read_text(encoding="utf-8"))