spanforge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. spanforge/__init__.py +815 -0
  2. spanforge/_ansi.py +93 -0
  3. spanforge/_batch_exporter.py +409 -0
  4. spanforge/_cli.py +2094 -0
  5. spanforge/_cli_audit.py +639 -0
  6. spanforge/_cli_compliance.py +711 -0
  7. spanforge/_cli_cost.py +243 -0
  8. spanforge/_cli_ops.py +791 -0
  9. spanforge/_cli_phase11.py +356 -0
  10. spanforge/_hooks.py +337 -0
  11. spanforge/_server.py +1708 -0
  12. spanforge/_span.py +1036 -0
  13. spanforge/_store.py +288 -0
  14. spanforge/_stream.py +664 -0
  15. spanforge/_trace.py +335 -0
  16. spanforge/_tracer.py +254 -0
  17. spanforge/actor.py +141 -0
  18. spanforge/alerts.py +469 -0
  19. spanforge/auto.py +464 -0
  20. spanforge/baseline.py +335 -0
  21. spanforge/cache.py +635 -0
  22. spanforge/compliance.py +325 -0
  23. spanforge/config.py +532 -0
  24. spanforge/consent.py +228 -0
  25. spanforge/consumer.py +377 -0
  26. spanforge/core/__init__.py +5 -0
  27. spanforge/core/compliance_mapping.py +1254 -0
  28. spanforge/cost.py +600 -0
  29. spanforge/debug.py +548 -0
  30. spanforge/deprecations.py +205 -0
  31. spanforge/drift.py +482 -0
  32. spanforge/egress.py +58 -0
  33. spanforge/eval.py +648 -0
  34. spanforge/event.py +1064 -0
  35. spanforge/exceptions.py +240 -0
  36. spanforge/explain.py +178 -0
  37. spanforge/export/__init__.py +69 -0
  38. spanforge/export/append_only.py +337 -0
  39. spanforge/export/cloud.py +357 -0
  40. spanforge/export/datadog.py +497 -0
  41. spanforge/export/grafana.py +320 -0
  42. spanforge/export/jsonl.py +195 -0
  43. spanforge/export/openinference.py +158 -0
  44. spanforge/export/otel_bridge.py +294 -0
  45. spanforge/export/otlp.py +811 -0
  46. spanforge/export/otlp_bridge.py +233 -0
  47. spanforge/export/redis_backend.py +282 -0
  48. spanforge/export/siem_schema.py +98 -0
  49. spanforge/export/siem_splunk.py +264 -0
  50. spanforge/export/siem_syslog.py +212 -0
  51. spanforge/export/webhook.py +299 -0
  52. spanforge/exporters/__init__.py +30 -0
  53. spanforge/exporters/console.py +271 -0
  54. spanforge/exporters/jsonl.py +144 -0
  55. spanforge/exporters/sqlite.py +142 -0
  56. spanforge/gate.py +1150 -0
  57. spanforge/governance.py +181 -0
  58. spanforge/hitl.py +295 -0
  59. spanforge/http.py +187 -0
  60. spanforge/inspect.py +427 -0
  61. spanforge/integrations/__init__.py +45 -0
  62. spanforge/integrations/_pricing.py +280 -0
  63. spanforge/integrations/anthropic.py +388 -0
  64. spanforge/integrations/azure_openai.py +133 -0
  65. spanforge/integrations/bedrock.py +292 -0
  66. spanforge/integrations/crewai.py +251 -0
  67. spanforge/integrations/gemini.py +351 -0
  68. spanforge/integrations/groq.py +442 -0
  69. spanforge/integrations/langchain.py +349 -0
  70. spanforge/integrations/langgraph.py +306 -0
  71. spanforge/integrations/llamaindex.py +373 -0
  72. spanforge/integrations/ollama.py +287 -0
  73. spanforge/integrations/openai.py +368 -0
  74. spanforge/integrations/together.py +483 -0
  75. spanforge/io.py +214 -0
  76. spanforge/lint.py +322 -0
  77. spanforge/metrics.py +417 -0
  78. spanforge/metrics_export.py +343 -0
  79. spanforge/migrate.py +402 -0
  80. spanforge/model_registry.py +278 -0
  81. spanforge/models.py +389 -0
  82. spanforge/namespaces/__init__.py +254 -0
  83. spanforge/namespaces/audit.py +256 -0
  84. spanforge/namespaces/cache.py +237 -0
  85. spanforge/namespaces/chain.py +77 -0
  86. spanforge/namespaces/confidence.py +72 -0
  87. spanforge/namespaces/consent.py +92 -0
  88. spanforge/namespaces/cost.py +179 -0
  89. spanforge/namespaces/decision.py +143 -0
  90. spanforge/namespaces/diff.py +157 -0
  91. spanforge/namespaces/drift.py +80 -0
  92. spanforge/namespaces/eval_.py +251 -0
  93. spanforge/namespaces/feedback.py +241 -0
  94. spanforge/namespaces/fence.py +193 -0
  95. spanforge/namespaces/guard.py +105 -0
  96. spanforge/namespaces/hitl.py +91 -0
  97. spanforge/namespaces/latency.py +72 -0
  98. spanforge/namespaces/prompt.py +190 -0
  99. spanforge/namespaces/redact.py +173 -0
  100. spanforge/namespaces/retrieval.py +379 -0
  101. spanforge/namespaces/runtime_governance.py +494 -0
  102. spanforge/namespaces/template.py +208 -0
  103. spanforge/namespaces/tool_call.py +77 -0
  104. spanforge/namespaces/trace.py +1029 -0
  105. spanforge/normalizer.py +171 -0
  106. spanforge/plugins.py +82 -0
  107. spanforge/presidio_backend.py +349 -0
  108. spanforge/processor.py +258 -0
  109. spanforge/prompt_registry.py +418 -0
  110. spanforge/py.typed +0 -0
  111. spanforge/redact.py +914 -0
  112. spanforge/regression.py +192 -0
  113. spanforge/runtime_policy.py +159 -0
  114. spanforge/sampling.py +511 -0
  115. spanforge/schema.py +183 -0
  116. spanforge/schemas/v1.0/schema.json +170 -0
  117. spanforge/schemas/v2.0/schema.json +536 -0
  118. spanforge/sdk/__init__.py +625 -0
  119. spanforge/sdk/_base.py +584 -0
  120. spanforge/sdk/_base.pyi +71 -0
  121. spanforge/sdk/_exceptions.py +1096 -0
  122. spanforge/sdk/_types.py +2184 -0
  123. spanforge/sdk/alert.py +1514 -0
  124. spanforge/sdk/alert.pyi +56 -0
  125. spanforge/sdk/audit.py +1196 -0
  126. spanforge/sdk/audit.pyi +67 -0
  127. spanforge/sdk/cec.py +1215 -0
  128. spanforge/sdk/cec.pyi +37 -0
  129. spanforge/sdk/config.py +641 -0
  130. spanforge/sdk/config.pyi +55 -0
  131. spanforge/sdk/enterprise.py +714 -0
  132. spanforge/sdk/enterprise.pyi +79 -0
  133. spanforge/sdk/explain.py +170 -0
  134. spanforge/sdk/fallback.py +432 -0
  135. spanforge/sdk/feedback.py +351 -0
  136. spanforge/sdk/gate.py +874 -0
  137. spanforge/sdk/gate.pyi +51 -0
  138. spanforge/sdk/identity.py +2114 -0
  139. spanforge/sdk/identity.pyi +47 -0
  140. spanforge/sdk/lineage.py +175 -0
  141. spanforge/sdk/observe.py +1065 -0
  142. spanforge/sdk/observe.pyi +50 -0
  143. spanforge/sdk/operator.py +338 -0
  144. spanforge/sdk/pii.py +1473 -0
  145. spanforge/sdk/pii.pyi +119 -0
  146. spanforge/sdk/pipelines.py +458 -0
  147. spanforge/sdk/pipelines.pyi +39 -0
  148. spanforge/sdk/policy.py +930 -0
  149. spanforge/sdk/rag.py +594 -0
  150. spanforge/sdk/rbac.py +280 -0
  151. spanforge/sdk/registry.py +430 -0
  152. spanforge/sdk/registry.pyi +46 -0
  153. spanforge/sdk/scope.py +279 -0
  154. spanforge/sdk/secrets.py +293 -0
  155. spanforge/sdk/secrets.pyi +25 -0
  156. spanforge/sdk/security.py +560 -0
  157. spanforge/sdk/security.pyi +57 -0
  158. spanforge/sdk/trust.py +472 -0
  159. spanforge/sdk/trust.pyi +41 -0
  160. spanforge/secrets.py +799 -0
  161. spanforge/signing.py +1179 -0
  162. spanforge/stats.py +100 -0
  163. spanforge/stream.py +560 -0
  164. spanforge/testing.py +378 -0
  165. spanforge/testing_mocks.py +1052 -0
  166. spanforge/trace.py +199 -0
  167. spanforge/types.py +696 -0
  168. spanforge/ulid.py +300 -0
  169. spanforge/validate.py +379 -0
  170. spanforge-1.0.0.dist-info/METADATA +1509 -0
  171. spanforge-1.0.0.dist-info/RECORD +174 -0
  172. spanforge-1.0.0.dist-info/WHEEL +4 -0
  173. spanforge-1.0.0.dist-info/entry_points.txt +5 -0
  174. spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/metrics.py ADDED
@@ -0,0 +1,417 @@
1
+ """spanforge.metrics — Programmatic metrics extraction from SpanForge traces.
2
+
3
+ Provides aggregation functions that accept any ``Iterable[Event]`` — such as
4
+ an in-memory list, an ``EventStream.from_file(...)`` iterator, or a
5
+ :class:`~spanforge._store.TraceStore` query result — and return structured
6
+ :class:`MetricsSummary` / :class:`LatencyStats` objects.
7
+
8
+ Usage::
9
+
10
+ import spanforge.metrics as metrics
11
+ from spanforge.stream import iter_file
12
+
13
+ events = list(iter_file("events.jsonl"))
14
+ summary = metrics.aggregate(events)
15
+ print(f"Success rate: {summary.agent_success_rate:.1%}")
16
+ print(f"p95 LLM latency: {summary.llm_latency_ms.p95:.1f} ms")
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import statistics
22
+ from collections import defaultdict
23
+ from dataclasses import dataclass, field
24
+ from typing import TYPE_CHECKING
25
+
26
+ if TYPE_CHECKING:
27
+ from collections.abc import Iterable
28
+
29
+ from spanforge.event import Event
30
+
31
+ __all__ = [
32
+ "LatencyStats",
33
+ "MetricsSummary",
34
+ "agent_success_rate",
35
+ "aggregate",
36
+ "llm_latency",
37
+ "token_usage",
38
+ "tool_failure_rate",
39
+ ]
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # EventType string constants (avoid circular import)
43
+ # ---------------------------------------------------------------------------
44
+
45
+ _SPAN_COMPLETED = "llm.trace.span.completed"
46
+ _SPAN_FAILED = "llm.trace.span.failed"
47
+ _AGENT_COMPLETED = "llm.trace.agent.completed"
48
+
49
+ _SPAN_EVENT_TYPES = frozenset({_SPAN_COMPLETED, _SPAN_FAILED})
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Value objects
53
+ # ---------------------------------------------------------------------------
54
+
55
+
56
+ @dataclass(frozen=True)
57
+ class LatencyStats:
58
+ """Latency percentile distribution for LLM calls (all values in ms)."""
59
+
60
+ min: float
61
+ max: float
62
+ p50: float
63
+ p95: float
64
+ p99: float
65
+
66
+ @classmethod
67
+ def _from_samples(cls, samples: list[float]) -> LatencyStats:
68
+ if not samples:
69
+ return cls(min=0.0, max=0.0, p50=0.0, p95=0.0, p99=0.0)
70
+ samples = sorted(samples)
71
+ return cls(
72
+ min=samples[0],
73
+ max=samples[-1],
74
+ p50=_percentile(samples, 50),
75
+ p95=_percentile(samples, 95),
76
+ p99=_percentile(samples, 99),
77
+ )
78
+
79
+
80
+ @dataclass
81
+ class MetricsSummary:
82
+ """Aggregated metrics extracted from a collection of SpanForge events.
83
+
84
+ Attributes:
85
+ trace_count: Number of distinct ``trace_id`` values seen.
86
+ span_count: Total number of span events.
87
+ agent_success_rate: Fraction of traces that contain no error spans
88
+ (0.0 - 1.0).
89
+ avg_trace_duration_ms: Mean duration across all agent-run events.
90
+ p50_trace_duration_ms: Median trace duration.
91
+ p95_trace_duration_ms: 95th-percentile trace duration.
92
+ total_input_tokens: Cumulative input/prompt tokens across all spans.
93
+ total_output_tokens: Cumulative output/completion tokens across all spans.
94
+ total_cost_usd: Cumulative inferred cost in USD.
95
+ llm_latency_ms: :class:`LatencyStats` for LLM-type spans.
96
+ tool_failure_rate: Fraction of tool-call spans with ``status="error"``.
97
+ token_usage_by_model: Per-model ``TokenUsage``-like dict (input/output/total).
98
+ cost_by_model: Per-model total cost in USD.
99
+ drift_incidents: Count of ``drift.threshold_breach`` events in the stream.
100
+ confidence_trend: Rolling mean confidence score per 50-event window;
101
+ empty when no ``confidence.sample`` events are present.
102
+ baseline_deviation_pct: Coefficient of variation of observed confidence scores
103
+ (``stddev / mean * 100``); 0.0 when unavailable.
104
+ """
105
+
106
+ trace_count: int = 0
107
+ span_count: int = 0
108
+ agent_success_rate: float = 1.0
109
+ avg_trace_duration_ms: float = 0.0
110
+ p50_trace_duration_ms: float = 0.0
111
+ p95_trace_duration_ms: float = 0.0
112
+ total_input_tokens: int = 0
113
+ total_output_tokens: int = 0
114
+ total_cost_usd: float = 0.0
115
+ llm_latency_ms: LatencyStats = field(default_factory=lambda: LatencyStats(0, 0, 0, 0, 0))
116
+ tool_failure_rate: float = 0.0
117
+ token_usage_by_model: dict[str, dict[str, int]] = field(default_factory=dict)
118
+ cost_by_model: dict[str, float] = field(default_factory=dict)
119
+ drift_incidents: int = 0
120
+ confidence_trend: list[float] = field(default_factory=list)
121
+ baseline_deviation_pct: float = 0.0
122
+
123
+
124
+ # ---------------------------------------------------------------------------
125
+ # Helpers
126
+ # ---------------------------------------------------------------------------
127
+
128
+
129
+ def _percentile(sorted_data: list[float], pct: float) -> float:
130
+ """Return the *pct*-th percentile of an already-sorted list."""
131
+ if not sorted_data:
132
+ return 0.0
133
+ if len(sorted_data) == 1:
134
+ return sorted_data[0]
135
+ idx = (pct / 100.0) * (len(sorted_data) - 1)
136
+ lo = int(idx)
137
+ hi = lo + 1
138
+ if hi >= len(sorted_data):
139
+ return float(sorted_data[-1])
140
+ frac = idx - lo
141
+ return sorted_data[lo] * (1 - frac) + sorted_data[hi] * frac
142
+
143
+
144
+ def _event_type_str(event: Event) -> str:
145
+ """Return the string value of ``event.event_type``."""
146
+ et = event.event_type
147
+ return et.value if hasattr(et, "value") else str(et)
148
+
149
+
150
+ def _is_span_event(event: Event) -> bool:
151
+ return _event_type_str(event) in _SPAN_EVENT_TYPES
152
+
153
+
154
+ def _is_agent_completed(event: Event) -> bool:
155
+ return _event_type_str(event) == _AGENT_COMPLETED
156
+
157
+
158
+ def _is_llm_span(payload: dict[str, object]) -> bool:
159
+ op = payload.get("operation", "")
160
+ return op in ("chat", "completion", "embedding", "chat_completion", "generate")
161
+
162
+
163
+ def _is_tool_span(payload: dict[str, object]) -> bool:
164
+ op = payload.get("operation", "")
165
+ return op == "tool_call"
166
+
167
+
168
+ # ---------------------------------------------------------------------------
169
+ # Public API
170
+ # ---------------------------------------------------------------------------
171
+
172
+
173
+ def _process_llm_span(
174
+ payload: dict[str, object],
175
+ duration_ms: float,
176
+ llm_latencies: list[float],
177
+ token_by_model: dict[str, dict[str, int]],
178
+ cost_by_model: dict[str, float],
179
+ ) -> tuple[int, int, float]:
180
+ """Process LLM span metrics; returns (input_tokens, output_tokens, cost_usd)."""
181
+ if duration_ms >= 0:
182
+ llm_latencies.append(duration_ms)
183
+ inp = out = 0
184
+ cost_usd = 0.0
185
+ tu = payload.get("token_usage")
186
+ if tu:
187
+ inp = int(tu.get("input_tokens", 0)) # type: ignore[attr-defined]
188
+ out = int(tu.get("output_tokens", 0)) # type: ignore[attr-defined]
189
+ tot = int(tu.get("total_tokens", 0)) # type: ignore[attr-defined]
190
+ model_name = (payload.get("model") or {}).get("name", "unknown") # type: ignore[attr-defined]
191
+ token_by_model[model_name]["input_tokens"] += inp
192
+ token_by_model[model_name]["output_tokens"] += out
193
+ token_by_model[model_name]["total_tokens"] += tot
194
+ cost = payload.get("cost")
195
+ if cost:
196
+ cost_usd = float(cost.get("total_cost_usd", 0.0)) # type: ignore[attr-defined]
197
+ model_name = (payload.get("model") or {}).get("name", "unknown") # type: ignore[attr-defined]
198
+ cost_by_model[model_name] += cost_usd
199
+ return inp, out, cost_usd
200
+
201
+
202
+ def _process_span_event(
203
+ event: Event,
204
+ span_count: int,
205
+ trace_errors: dict[str, bool],
206
+ llm_latencies: list[float],
207
+ token_by_model: dict[str, dict[str, int]],
208
+ cost_by_model: dict[str, float],
209
+ tool_total: int,
210
+ tool_errors: int,
211
+ total_input_tokens: int,
212
+ total_output_tokens: int,
213
+ total_cost_usd: float,
214
+ ) -> tuple[int, int, int, int, int, float]:
215
+ """Process a single span event; returns updated counters."""
216
+ payload = event.payload
217
+ span_count += 1
218
+ status = payload.get("status", "ok")
219
+ trace_id = payload.get("trace_id", "")
220
+ duration_ms = float(payload.get("duration_ms", 0.0))
221
+
222
+ if trace_id and trace_id not in trace_errors:
223
+ trace_errors[trace_id] = False
224
+
225
+ if status == "error" and trace_id:
226
+ trace_errors[trace_id] = True
227
+
228
+ if _is_llm_span(payload): # type: ignore[arg-type]
229
+ inp, out, cost_usd = _process_llm_span(
230
+ dict(payload),
231
+ duration_ms,
232
+ llm_latencies,
233
+ token_by_model,
234
+ cost_by_model,
235
+ )
236
+ total_input_tokens += inp
237
+ total_output_tokens += out
238
+ total_cost_usd += cost_usd
239
+
240
+ if _is_tool_span(payload): # type: ignore[arg-type]
241
+ tool_total += 1
242
+ if status == "error":
243
+ tool_errors += 1
244
+
245
+ return (
246
+ span_count,
247
+ tool_total,
248
+ tool_errors,
249
+ total_input_tokens,
250
+ total_output_tokens,
251
+ total_cost_usd,
252
+ )
253
+
254
+
255
+ def aggregate(events: Iterable[Event]) -> MetricsSummary:
256
+ """Aggregate a collection of SpanForge events into a :class:`MetricsSummary`.
257
+
258
+ Args:
259
+ events: Any iterable of :class:`~spanforge.event.Event` objects.
260
+
261
+ Returns:
262
+ A fully-populated :class:`MetricsSummary`.
263
+ """
264
+ events_list = list(events)
265
+
266
+ # Track per-trace error status (trace_id → has_error)
267
+ trace_errors: dict[str, bool] = {}
268
+ trace_durations: list[float] = []
269
+
270
+ span_count = 0
271
+ llm_latencies: list[float] = []
272
+ tool_total = 0
273
+ tool_errors = 0
274
+ total_input_tokens = 0
275
+ total_output_tokens = 0
276
+ total_cost_usd = 0.0
277
+ token_by_model: dict[str, dict[str, int]] = defaultdict(
278
+ lambda: {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
279
+ )
280
+ cost_by_model: dict[str, float] = defaultdict(float)
281
+
282
+ drift_incidents = 0
283
+ confidence_scores: list[float] = []
284
+
285
+ for event in events_list:
286
+ payload = event.payload
287
+
288
+ if _is_span_event(event):
289
+ (
290
+ span_count,
291
+ tool_total,
292
+ tool_errors,
293
+ total_input_tokens,
294
+ total_output_tokens,
295
+ total_cost_usd,
296
+ ) = _process_span_event(
297
+ event,
298
+ span_count,
299
+ trace_errors,
300
+ llm_latencies,
301
+ token_by_model,
302
+ cost_by_model,
303
+ tool_total,
304
+ tool_errors,
305
+ total_input_tokens,
306
+ total_output_tokens,
307
+ total_cost_usd,
308
+ )
309
+
310
+ elif _is_agent_completed(event):
311
+ dur = float(payload.get("duration_ms", 0.0))
312
+ trace_durations.append(dur)
313
+
314
+ elif _event_type_str(event) == "drift.threshold_breach":
315
+ drift_incidents += 1
316
+
317
+ elif _event_type_str(event) == "confidence.sample":
318
+ score = payload.get("score")
319
+ if score is not None:
320
+ confidence_scores.append(float(score))
321
+
322
+ # Success rate
323
+ if trace_errors:
324
+ success_count = sum(1 for has_err in trace_errors.values() if not has_err)
325
+ success_rate = success_count / len(trace_errors)
326
+ else:
327
+ success_rate = 1.0
328
+
329
+ # Trace duration stats
330
+ sorted_durations = sorted(trace_durations)
331
+ avg_dur = statistics.mean(sorted_durations) if sorted_durations else 0.0
332
+ p50_dur = _percentile(sorted_durations, 50)
333
+ p95_dur = _percentile(sorted_durations, 95)
334
+
335
+ # Confidence trend: rolling mean per 50-event window
336
+ _confidence_window = 50
337
+ confidence_trend: list[float] = []
338
+ for i in range(0, len(confidence_scores), _confidence_window):
339
+ window = confidence_scores[i : i + _confidence_window]
340
+ if window:
341
+ confidence_trend.append(statistics.mean(window))
342
+
343
+ # Baseline deviation: coefficient of variation (stddev / mean * 100)
344
+ baseline_deviation_pct = 0.0
345
+ if len(confidence_scores) >= 2:
346
+ mean_conf = statistics.mean(confidence_scores)
347
+ if mean_conf > 0:
348
+ baseline_deviation_pct = (statistics.stdev(confidence_scores) / mean_conf) * 100.0
349
+
350
+ return MetricsSummary(
351
+ trace_count=len(trace_errors),
352
+ span_count=span_count,
353
+ agent_success_rate=success_rate,
354
+ avg_trace_duration_ms=avg_dur,
355
+ p50_trace_duration_ms=p50_dur,
356
+ p95_trace_duration_ms=p95_dur,
357
+ total_input_tokens=total_input_tokens,
358
+ total_output_tokens=total_output_tokens,
359
+ total_cost_usd=total_cost_usd,
360
+ llm_latency_ms=LatencyStats._from_samples(llm_latencies),
361
+ tool_failure_rate=tool_errors / tool_total if tool_total > 0 else 0.0,
362
+ token_usage_by_model=dict(token_by_model),
363
+ cost_by_model=dict(cost_by_model),
364
+ drift_incidents=drift_incidents,
365
+ confidence_trend=confidence_trend,
366
+ baseline_deviation_pct=baseline_deviation_pct,
367
+ )
368
+
369
+
370
+ def agent_success_rate(events: Iterable[Event]) -> float:
371
+ """Return the fraction of traces with no error spans.
372
+
373
+ Args:
374
+ events: Any iterable of :class:`~spanforge.event.Event` objects.
375
+
376
+ Returns:
377
+ Success rate in the range 0.0 - 1.0. Returns ``1.0`` when there are
378
+ no span events (nothing to interpret as a failure).
379
+ """
380
+ return aggregate(events).agent_success_rate
381
+
382
+
383
+ def llm_latency(events: Iterable[Event]) -> LatencyStats:
384
+ """Return :class:`LatencyStats` for all LLM-operation spans.
385
+
386
+ Args:
387
+ events: Any iterable of :class:`~spanforge.event.Event` objects.
388
+
389
+ Returns:
390
+ Latency percentiles in milliseconds.
391
+ """
392
+ return aggregate(events).llm_latency_ms
393
+
394
+
395
+ def tool_failure_rate(events: Iterable[Event]) -> float:
396
+ """Return the fraction of tool-call spans that ended with ``status="error"``.
397
+
398
+ Args:
399
+ events: Any iterable of :class:`~spanforge.event.Event` objects.
400
+
401
+ Returns:
402
+ Failure rate in the range 0.0 - 1.0.
403
+ """
404
+ return aggregate(events).tool_failure_rate
405
+
406
+
407
+ def token_usage(events: Iterable[Event]) -> dict[str, dict[str, int]]:
408
+ """Return per-model token usage totals.
409
+
410
+ Args:
411
+ events: Any iterable of :class:`~spanforge.event.Event` objects.
412
+
413
+ Returns:
414
+ Dict mapping model name → ``{"input_tokens": int, "output_tokens": int,
415
+ "total_tokens": int}``.
416
+ """
417
+ return aggregate(events).token_usage_by_model