spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
spanforge/inspect.py ADDED
@@ -0,0 +1,429 @@
1
+ """spanforge.inspect — Tool Call Inspector (RFC-0001, Tool 3 / llm-inspect).
2
+
3
+ Surfaces every tool call in an agent run: function name, arguments, return
4
+ value, execution time, and whether the model actually used the tool's output.
5
+
6
+ Public API::
7
+
8
+ from spanforge.inspect import InspectorSession, inspect_trace
9
+
10
+ # --- Runtime inspection ---
11
+ session = InspectorSession()
12
+ tracer = spanforge.Tracer()
13
+ with tracer.agent_run("research") as run:
14
+ session.attach(run) # start recording tool spans
15
+ result = my_tool("query")
16
+ session.detach() # stop recording
17
+
18
+ for call in session.tool_calls:
19
+ print(call.name, call.duration_ms, call.was_result_used)
20
+ print(session.summary())
21
+
22
+ # --- Post-run replay from JSONL ---
23
+ calls = inspect_trace("events.jsonl", trace_id="01XXXX")
24
+ for call in calls:
25
+ print(call)
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import threading
31
+ from dataclasses import dataclass, field
32
+ from typing import Any, TYPE_CHECKING
33
+
34
+ if TYPE_CHECKING:
35
+ from spanforge._span import AgentRunContext, Span
36
+
37
+ __all__ = [
38
+ "InspectorSession",
39
+ "ToolCallRecord",
40
+ "inspect_trace",
41
+ ]
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # ToolCallRecord
45
+ # ---------------------------------------------------------------------------
46
+
47
+
48
+ @dataclass(frozen=True)
49
+ class ToolCallRecord:
50
+ """Immutable record capturing one tool function invocation.
51
+
52
+ Attributes:
53
+ name: Function name (span name).
54
+ args: Captured argument dict ``{param_name: repr_str}``.
55
+ Populated when ``@trace(tool=True, capture_args=True)``
56
+ or when ``@trace(tool=True)`` (args captured automatically).
57
+ result: Captured return value repr string. ``None`` if
58
+ return capture was not enabled.
59
+ duration_ms: Wall-clock duration in milliseconds, or ``None`` if
60
+ the span did not record a duration.
61
+ span_id: OTel-compatible 16-char hex span ID.
62
+ trace_id: OTel-compatible 32-char hex trace ID.
63
+ timestamp: Unix timestamp (seconds) when the tool call started.
64
+ status: Span status: ``"ok"``, ``"error"``, or ``"timeout"``.
65
+ error: Error message if ``status == "error"``, else ``None``.
66
+ was_result_used: Heuristic result:
67
+ ``True`` — tool result string was found in a
68
+ subsequent span's captured arguments (likely used).
69
+ ``False`` — no subsequent span contained the result
70
+ (likely discarded).
71
+ ``None`` — indeterminate (no result captured, or no
72
+ subsequent spans).
73
+ """
74
+
75
+ name: str
76
+ args: dict[str, Any]
77
+ result: Any
78
+ duration_ms: float | None
79
+ span_id: str
80
+ trace_id: str
81
+ timestamp: float
82
+ status: str
83
+ error: str | None
84
+ was_result_used: bool | None = None
85
+
86
+ def __str__(self) -> str:
87
+ dur = f"{self.duration_ms:.1f}ms" if self.duration_ms is not None else "?"
88
+ used_str = {True: "used", False: "discarded", None: "unknown"}[self.was_result_used]
89
+ err_part = f" error={self.error!r}" if self.error else ""
90
+ return (
91
+ f"ToolCallRecord(name={self.name!r}, duration={dur}, "
92
+ f"status={self.status!r}, result_used={used_str}{err_part})"
93
+ )
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # InspectorSession
98
+ # ---------------------------------------------------------------------------
99
+
100
+ _TOOL_OPERATIONS = frozenset({"execute_tool", "tool_call"})
101
+
102
+
103
+ def _is_tool_span(span: "Span") -> bool:
104
+ """Return True if *span* represents a tool call."""
105
+ op = str(getattr(span, "operation", "") or "")
106
+ if op in _TOOL_OPERATIONS:
107
+ return True
108
+ attrs = getattr(span, "attributes", {}) or {}
109
+ return bool(attrs.get("tool"))
110
+
111
+
112
+ def _extract_args(span: "Span") -> dict[str, Any]:
113
+ """Extract ``arg.*`` attributes from *span* into a plain dict."""
114
+ attrs = getattr(span, "attributes", {}) or {}
115
+ return {k[4:]: v for k, v in attrs.items() if k.startswith("arg.")}
116
+
117
+
118
+ def _extract_result(span: "Span") -> Any:
119
+ """Return the ``return_value`` attribute of *span*, or ``None``."""
120
+ attrs = getattr(span, "attributes", {}) or {}
121
+ return attrs.get("return_value")
122
+
123
+
124
+ def _check_result_used(tool_span: "Span", subsequent_spans: list["Span"]) -> bool | None:
125
+ """Heuristic: did any subsequent span capture the tool result in its args?
126
+
127
+ Scans the ``arg.*`` attributes of every subsequent span for the tool
128
+ result string. Returns ``True`` if found, ``False`` if not found,
129
+ or ``None`` if the result was not captured or subsequent spans are absent.
130
+ """
131
+ result = _extract_result(tool_span)
132
+ if result is None:
133
+ return None
134
+ result_str = str(result)
135
+ # Skip trivially empty or un-informative results.
136
+ if not result_str or result_str in ("None", "<unrepresentable>", "''", '""'):
137
+ return None
138
+ if not subsequent_spans:
139
+ return None
140
+
141
+ for span in subsequent_spans:
142
+ attrs = getattr(span, "attributes", {}) or {}
143
+ for v in attrs.values():
144
+ if isinstance(v, str) and result_str in v:
145
+ return True
146
+ return False
147
+
148
+
149
+ class InspectorSession:
150
+ """Collects tool call records from live span events.
151
+
152
+ Attach to an :class:`~spanforge._span.AgentRunContext` (or globally) to
153
+ intercept every span that closes with ``operation="execute_tool"`` or
154
+ ``attributes["tool"] = True``.
155
+
156
+ Usage::
157
+
158
+ session = InspectorSession()
159
+ with tracer.agent_run("research") as run:
160
+ session.attach(run)
161
+ result = search("query") # @trace(tool=True)
162
+ session.detach()
163
+
164
+ for call in session.tool_calls:
165
+ print(call)
166
+
167
+ print(session.summary())
168
+
169
+ The session is *not* reusable: call :meth:`reset` if you want to start a
170
+ fresh recording on the same instance.
171
+ """
172
+
173
+ def __init__(self) -> None:
174
+ self._lock = threading.Lock()
175
+ self._spans: list["Span"] = [] # all spans captured (tool + model)
176
+ self._active = False
177
+ self._trace_id_filter: str | None = None
178
+
179
+ # ------------------------------------------------------------------
180
+ # Lifecycle
181
+ # ------------------------------------------------------------------
182
+
183
+ def attach(self, run: "AgentRunContext | None" = None) -> "InspectorSession":
184
+ """Start recording tool call spans.
185
+
186
+ Args:
187
+ run: Optional :class:`~spanforge._span.AgentRunContext` returned
188
+ by ``tracer.agent_run()``. When provided, only spans that
189
+ belong to this run's ``trace_id`` are recorded. When
190
+ ``None``, all spans are captured globally.
191
+
192
+ Returns:
193
+ ``self`` for chaining.
194
+ """
195
+ self._active = True
196
+ if run is not None:
197
+ self._trace_id_filter = getattr(run, "trace_id", None)
198
+
199
+ from spanforge._hooks import hooks # noqa: PLC0415
200
+ hooks.on_span_end(self._on_span_end)
201
+ return self
202
+
203
+ def detach(self) -> "InspectorSession":
204
+ """Stop recording new spans.
205
+
206
+ The hook remains registered in the global registry but is a no-op
207
+ once ``_active`` is ``False``. Call :meth:`reset` to clear recorded
208
+ data.
209
+
210
+ Returns:
211
+ ``self`` for chaining.
212
+ """
213
+ self._active = False
214
+ return self
215
+
216
+ def reset(self) -> "InspectorSession":
217
+ """Clear all recorded spans and re-enable recording.
218
+
219
+ Returns:
220
+ ``self`` for chaining.
221
+ """
222
+ with self._lock:
223
+ self._spans.clear()
224
+ self._active = True
225
+ self._trace_id_filter = None
226
+ return self
227
+
228
+ # ------------------------------------------------------------------
229
+ # Hook callback
230
+ # ------------------------------------------------------------------
231
+
232
+ def _on_span_end(self, span: "Span") -> None:
233
+ if not self._active:
234
+ return
235
+ trace_id = getattr(span, "trace_id", None)
236
+ if self._trace_id_filter and trace_id != self._trace_id_filter:
237
+ return
238
+ with self._lock:
239
+ self._spans.append(span)
240
+
241
+ # ------------------------------------------------------------------
242
+ # Accessors
243
+ # ------------------------------------------------------------------
244
+
245
+ @property
246
+ def tool_calls(self) -> list[ToolCallRecord]:
247
+ """Return tool call records with heuristic ``was_result_used`` flags.
248
+
249
+ Records are returned in the order the spans were collected (typically
250
+ chronological). ``was_result_used`` is computed lazily by scanning
251
+ all spans captured after each tool span.
252
+ """
253
+ with self._lock:
254
+ spans = list(self._spans)
255
+
256
+ records: list[ToolCallRecord] = []
257
+ for i, span in enumerate(spans):
258
+ if not _is_tool_span(span):
259
+ continue
260
+ subsequent = spans[i + 1:]
261
+ was_used = _check_result_used(span, subsequent)
262
+ records.append(
263
+ ToolCallRecord(
264
+ name=getattr(span, "name", ""),
265
+ args=_extract_args(span),
266
+ result=_extract_result(span),
267
+ duration_ms=getattr(span, "duration_ms", None),
268
+ span_id=getattr(span, "span_id", ""),
269
+ trace_id=getattr(span, "trace_id", ""),
270
+ timestamp=getattr(span, "start_ns", 0) / 1_000_000_000.0,
271
+ status=getattr(span, "status", "ok"),
272
+ error=getattr(span, "error", None),
273
+ was_result_used=was_used,
274
+ )
275
+ )
276
+ return records
277
+
278
+ @property
279
+ def all_span_count(self) -> int:
280
+ """Total number of spans captured (tool + non-tool)."""
281
+ with self._lock:
282
+ return len(self._spans)
283
+
284
+ # ------------------------------------------------------------------
285
+ # Display helpers
286
+ # ------------------------------------------------------------------
287
+
288
+ def summary(self) -> str:
289
+ """Return a plain-text table of all recorded tool calls.
290
+
291
+ Returns:
292
+ Multi-line string suitable for ``print()``.
293
+ """
294
+ calls = self.tool_calls
295
+ lines: list[str] = []
296
+ lines.append("=" * 72)
297
+ lines.append(" SpanForge Tool Call Inspector")
298
+ lines.append("=" * 72)
299
+ if not calls:
300
+ lines.append(" No tool calls recorded.")
301
+ lines.append("=" * 72)
302
+ return "\n".join(lines)
303
+
304
+ lines.append(
305
+ f" {'Name':<28} {'Duration':>10} {'Status':<8} {'Result Used':<12}"
306
+ )
307
+ lines.append("-" * 72)
308
+ for r in calls:
309
+ dur = f"{r.duration_ms:.1f}ms" if r.duration_ms is not None else "?"
310
+ used = {True: "yes", False: "no", None: "?"}[r.was_result_used]
311
+ lines.append(
312
+ f" {r.name:<28} {dur:>10} {r.status:<8} {used:<12}"
313
+ )
314
+ if r.error:
315
+ lines.append(f" error: {r.error}")
316
+ lines.append("=" * 72)
317
+ lines.append(f" Total: {len(calls)} tool call(s)")
318
+ lines.append("=" * 72)
319
+ return "\n".join(lines)
320
+
321
+ def __repr__(self) -> str:
322
+ return self.summary()
323
+
324
+ def __len__(self) -> int:
325
+ return len(self.tool_calls)
326
+
327
+
328
+ # ---------------------------------------------------------------------------
329
+ # inspect_trace() — JSONL replay
330
+ # ---------------------------------------------------------------------------
331
+
332
+
333
+ def inspect_trace(
334
+ path: str,
335
+ *,
336
+ trace_id: str | None = None,
337
+ skip_errors: bool = False,
338
+ ) -> list[ToolCallRecord]:
339
+ """Reconstruct tool call records from a JSONL trace file.
340
+
341
+ Reads every span event from *path*, filters to tool spans (those with
342
+ ``operation="execute_tool"`` or ``attributes.tool=true``), and returns
343
+ a list of :class:`ToolCallRecord` objects. The ``was_result_used``
344
+ heuristic is applied against all other span events in the same file.
345
+
346
+ Args:
347
+ path: Path to the NDJSON/JSONL events file.
348
+ trace_id: When provided, only records whose ``trace_id`` matches
349
+ are returned. ``None`` returns records from all traces.
350
+ skip_errors: When ``True``, malformed JSONL lines are silently
351
+ skipped instead of raising.
352
+
353
+ Returns:
354
+ Ordered list of :class:`ToolCallRecord` objects.
355
+
356
+ Raises:
357
+ DeserializationError: On the first malformed line when
358
+ ``skip_errors=False``.
359
+ """
360
+ from spanforge.stream import iter_file # noqa: PLC0415
361
+
362
+ _SPAN_EVENTS = frozenset({
363
+ "llm.trace.span.completed",
364
+ "llm.trace.span.failed",
365
+ })
366
+
367
+ # Collect all span payloads (and their index for ordering).
368
+ all_payloads: list[dict] = []
369
+
370
+ for event in iter_file(path, skip_errors=skip_errors):
371
+ et = event.event_type
372
+ et_str = et.value if hasattr(et, "value") else str(et)
373
+ if et_str not in _SPAN_EVENTS:
374
+ continue
375
+ payload = event.payload
376
+ if trace_id:
377
+ if payload.get("trace_id") != trace_id:
378
+ continue
379
+ all_payloads.append(payload)
380
+
381
+ # Identify tool span indices.
382
+ records: list[ToolCallRecord] = []
383
+ for i, payload in enumerate(all_payloads):
384
+ op = payload.get("operation", "")
385
+ attrs: dict = payload.get("attributes") or {}
386
+ is_tool = op in _TOOL_OPERATIONS or bool(attrs.get("tool"))
387
+ if not is_tool:
388
+ continue
389
+
390
+ subsequent = all_payloads[i + 1:]
391
+ result = attrs.get("return_value")
392
+ was_used = _check_result_used_from_dicts(result, subsequent)
393
+
394
+ start_ns = payload.get("start_time_unix_nano") or 0
395
+ records.append(
396
+ ToolCallRecord(
397
+ name=payload.get("span_name", ""),
398
+ args={k[4:]: v for k, v in attrs.items() if k.startswith("arg.")},
399
+ result=result,
400
+ duration_ms=payload.get("duration_ms"),
401
+ span_id=payload.get("span_id") or "",
402
+ trace_id=payload.get("trace_id") or "",
403
+ timestamp=start_ns / 1_000_000_000.0,
404
+ status=payload.get("status", "ok"),
405
+ error=payload.get("error"),
406
+ was_result_used=was_used,
407
+ )
408
+ )
409
+ return records
410
+
411
+
412
+ def _check_result_used_from_dicts(
413
+ result: Any,
414
+ subsequent_payloads: list[dict],
415
+ ) -> bool | None:
416
+ """Dict-based variant of the heuristic used by :func:`inspect_trace`."""
417
+ if result is None:
418
+ return None
419
+ result_str = str(result)
420
+ if not result_str or result_str in ("None", "<unrepresentable>", "''", '""'):
421
+ return None
422
+ if not subsequent_payloads:
423
+ return None
424
+ for payload in subsequent_payloads:
425
+ sp_attrs: dict = payload.get("attributes") or {}
426
+ for v in sp_attrs.values():
427
+ if isinstance(v, str) and result_str in v:
428
+ return True
429
+ return False
@@ -0,0 +1,39 @@
1
+ """spanforge.integrations — Third-party provider and framework integrations.
2
+
3
+ Each sub-module is an optional extra that sits on top of the zero-dependency
4
+ core SDK. Install the relevant extra before importing:
5
+
6
+ pip install "spanforge[openai]" # OpenAI auto-instrumentation
7
+ pip install "spanforge[anthropic]" # Anthropic Claude auto-instrumentation
8
+ pip install "spanforge[gemini]" # Google Gemini auto-instrumentation
9
+ pip install "spanforge[bedrock]" # AWS Bedrock auto-instrumentation
10
+ pip install "spanforge[ollama]" # Ollama local model auto-instrumentation
11
+ pip install "spanforge[groq]" # Groq API auto-instrumentation
12
+ pip install "spanforge[together]" # Together AI auto-instrumentation
13
+ pip install "spanforge[langchain]" # LangChain callback handler
14
+ pip install "spanforge[llamaindex]" # LlamaIndex event handler
15
+
16
+ Available integrations
17
+ ----------------------
18
+ * :mod:`spanforge.integrations.openai` — OpenAI chat completions (Phase 6)
19
+ * :mod:`spanforge.integrations.anthropic` — Anthropic Claude (Phase 7)
20
+ * :mod:`spanforge.integrations.gemini` — Google Gemini (Phase 10)
21
+ * :mod:`spanforge.integrations.bedrock` — AWS Bedrock (Phase 10)
22
+ * :mod:`spanforge.integrations.ollama` — Ollama local models (Phase 7)
23
+ * :mod:`spanforge.integrations.groq` — Groq API (Phase 7)
24
+ * :mod:`spanforge.integrations.together` — Together AI (Phase 7)
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ __all__: list[str] = [
30
+ "anthropic",
31
+ "bedrock",
32
+ "gemini",
33
+ "groq",
34
+ "langchain",
35
+ "llamaindex",
36
+ "ollama",
37
+ "openai",
38
+ "together",
39
+ ]