spanforge 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +815 -0
- spanforge/_ansi.py +93 -0
- spanforge/_batch_exporter.py +409 -0
- spanforge/_cli.py +2094 -0
- spanforge/_cli_audit.py +639 -0
- spanforge/_cli_compliance.py +711 -0
- spanforge/_cli_cost.py +243 -0
- spanforge/_cli_ops.py +791 -0
- spanforge/_cli_phase11.py +356 -0
- spanforge/_hooks.py +337 -0
- spanforge/_server.py +1708 -0
- spanforge/_span.py +1036 -0
- spanforge/_store.py +288 -0
- spanforge/_stream.py +664 -0
- spanforge/_trace.py +335 -0
- spanforge/_tracer.py +254 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +469 -0
- spanforge/auto.py +464 -0
- spanforge/baseline.py +335 -0
- spanforge/cache.py +635 -0
- spanforge/compliance.py +325 -0
- spanforge/config.py +532 -0
- spanforge/consent.py +228 -0
- spanforge/consumer.py +377 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1254 -0
- spanforge/cost.py +600 -0
- spanforge/debug.py +548 -0
- spanforge/deprecations.py +205 -0
- spanforge/drift.py +482 -0
- spanforge/egress.py +58 -0
- spanforge/eval.py +648 -0
- spanforge/event.py +1064 -0
- spanforge/exceptions.py +240 -0
- spanforge/explain.py +178 -0
- spanforge/export/__init__.py +69 -0
- spanforge/export/append_only.py +337 -0
- spanforge/export/cloud.py +357 -0
- spanforge/export/datadog.py +497 -0
- spanforge/export/grafana.py +320 -0
- spanforge/export/jsonl.py +195 -0
- spanforge/export/openinference.py +158 -0
- spanforge/export/otel_bridge.py +294 -0
- spanforge/export/otlp.py +811 -0
- spanforge/export/otlp_bridge.py +233 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/siem_schema.py +98 -0
- spanforge/export/siem_splunk.py +264 -0
- spanforge/export/siem_syslog.py +212 -0
- spanforge/export/webhook.py +299 -0
- spanforge/exporters/__init__.py +30 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/exporters/sqlite.py +142 -0
- spanforge/gate.py +1150 -0
- spanforge/governance.py +181 -0
- spanforge/hitl.py +295 -0
- spanforge/http.py +187 -0
- spanforge/inspect.py +427 -0
- spanforge/integrations/__init__.py +45 -0
- spanforge/integrations/_pricing.py +280 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/azure_openai.py +133 -0
- spanforge/integrations/bedrock.py +292 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +351 -0
- spanforge/integrations/groq.py +442 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/langgraph.py +306 -0
- spanforge/integrations/llamaindex.py +373 -0
- spanforge/integrations/ollama.py +287 -0
- spanforge/integrations/openai.py +368 -0
- spanforge/integrations/together.py +483 -0
- spanforge/io.py +214 -0
- spanforge/lint.py +322 -0
- spanforge/metrics.py +417 -0
- spanforge/metrics_export.py +343 -0
- spanforge/migrate.py +402 -0
- spanforge/model_registry.py +278 -0
- spanforge/models.py +389 -0
- spanforge/namespaces/__init__.py +254 -0
- spanforge/namespaces/audit.py +256 -0
- spanforge/namespaces/cache.py +237 -0
- spanforge/namespaces/chain.py +77 -0
- spanforge/namespaces/confidence.py +72 -0
- spanforge/namespaces/consent.py +92 -0
- spanforge/namespaces/cost.py +179 -0
- spanforge/namespaces/decision.py +143 -0
- spanforge/namespaces/diff.py +157 -0
- spanforge/namespaces/drift.py +80 -0
- spanforge/namespaces/eval_.py +251 -0
- spanforge/namespaces/feedback.py +241 -0
- spanforge/namespaces/fence.py +193 -0
- spanforge/namespaces/guard.py +105 -0
- spanforge/namespaces/hitl.py +91 -0
- spanforge/namespaces/latency.py +72 -0
- spanforge/namespaces/prompt.py +190 -0
- spanforge/namespaces/redact.py +173 -0
- spanforge/namespaces/retrieval.py +379 -0
- spanforge/namespaces/runtime_governance.py +494 -0
- spanforge/namespaces/template.py +208 -0
- spanforge/namespaces/tool_call.py +77 -0
- spanforge/namespaces/trace.py +1029 -0
- spanforge/normalizer.py +171 -0
- spanforge/plugins.py +82 -0
- spanforge/presidio_backend.py +349 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +418 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +914 -0
- spanforge/regression.py +192 -0
- spanforge/runtime_policy.py +159 -0
- spanforge/sampling.py +511 -0
- spanforge/schema.py +183 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/sdk/__init__.py +625 -0
- spanforge/sdk/_base.py +584 -0
- spanforge/sdk/_base.pyi +71 -0
- spanforge/sdk/_exceptions.py +1096 -0
- spanforge/sdk/_types.py +2184 -0
- spanforge/sdk/alert.py +1514 -0
- spanforge/sdk/alert.pyi +56 -0
- spanforge/sdk/audit.py +1196 -0
- spanforge/sdk/audit.pyi +67 -0
- spanforge/sdk/cec.py +1215 -0
- spanforge/sdk/cec.pyi +37 -0
- spanforge/sdk/config.py +641 -0
- spanforge/sdk/config.pyi +55 -0
- spanforge/sdk/enterprise.py +714 -0
- spanforge/sdk/enterprise.pyi +79 -0
- spanforge/sdk/explain.py +170 -0
- spanforge/sdk/fallback.py +432 -0
- spanforge/sdk/feedback.py +351 -0
- spanforge/sdk/gate.py +874 -0
- spanforge/sdk/gate.pyi +51 -0
- spanforge/sdk/identity.py +2114 -0
- spanforge/sdk/identity.pyi +47 -0
- spanforge/sdk/lineage.py +175 -0
- spanforge/sdk/observe.py +1065 -0
- spanforge/sdk/observe.pyi +50 -0
- spanforge/sdk/operator.py +338 -0
- spanforge/sdk/pii.py +1473 -0
- spanforge/sdk/pii.pyi +119 -0
- spanforge/sdk/pipelines.py +458 -0
- spanforge/sdk/pipelines.pyi +39 -0
- spanforge/sdk/policy.py +930 -0
- spanforge/sdk/rag.py +594 -0
- spanforge/sdk/rbac.py +280 -0
- spanforge/sdk/registry.py +430 -0
- spanforge/sdk/registry.pyi +46 -0
- spanforge/sdk/scope.py +279 -0
- spanforge/sdk/secrets.py +293 -0
- spanforge/sdk/secrets.pyi +25 -0
- spanforge/sdk/security.py +560 -0
- spanforge/sdk/security.pyi +57 -0
- spanforge/sdk/trust.py +472 -0
- spanforge/sdk/trust.pyi +41 -0
- spanforge/secrets.py +799 -0
- spanforge/signing.py +1179 -0
- spanforge/stats.py +100 -0
- spanforge/stream.py +560 -0
- spanforge/testing.py +378 -0
- spanforge/testing_mocks.py +1052 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +300 -0
- spanforge/validate.py +379 -0
- spanforge-1.0.0.dist-info/METADATA +1509 -0
- spanforge-1.0.0.dist-info/RECORD +174 -0
- spanforge-1.0.0.dist-info/WHEEL +4 -0
- spanforge-1.0.0.dist-info/entry_points.txt +5 -0
- spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/inspect.py
ADDED
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
"""spanforge.inspect — Tool Call Inspector (RFC-0001, Tool 3 / llm-inspect).
|
|
2
|
+
|
|
3
|
+
Surfaces every tool call in an agent run: function name, arguments, return
|
|
4
|
+
value, execution time, and whether the model actually used the tool's output.
|
|
5
|
+
|
|
6
|
+
Public API::
|
|
7
|
+
|
|
8
|
+
from spanforge.inspect import InspectorSession, inspect_trace
|
|
9
|
+
|
|
10
|
+
# --- Runtime inspection ---
|
|
11
|
+
session = InspectorSession()
|
|
12
|
+
tracer = spanforge.Tracer()
|
|
13
|
+
with tracer.agent_run("research") as run:
|
|
14
|
+
session.attach(run) # start recording tool spans
|
|
15
|
+
result = my_tool("query")
|
|
16
|
+
session.detach() # stop recording
|
|
17
|
+
|
|
18
|
+
for call in session.tool_calls:
|
|
19
|
+
print(call.name, call.duration_ms, call.was_result_used)
|
|
20
|
+
print(session.summary())
|
|
21
|
+
|
|
22
|
+
# --- Post-run replay from JSONL ---
|
|
23
|
+
calls = inspect_trace("events.jsonl", trace_id="01XXXX")
|
|
24
|
+
for call in calls:
|
|
25
|
+
print(call)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import threading
|
|
31
|
+
from dataclasses import dataclass
|
|
32
|
+
from typing import TYPE_CHECKING, Any
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from spanforge._span import AgentRunContext, Span
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"InspectorSession",
|
|
39
|
+
"ToolCallRecord",
|
|
40
|
+
"inspect_trace",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# ToolCallRecord
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(frozen=True)
|
|
49
|
+
class ToolCallRecord:
|
|
50
|
+
"""Immutable record capturing one tool function invocation.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
name: Function name (span name).
|
|
54
|
+
args: Captured argument dict ``{param_name: repr_str}``.
|
|
55
|
+
Populated when ``@trace(tool=True, capture_args=True)``
|
|
56
|
+
or when ``@trace(tool=True)`` (args captured automatically).
|
|
57
|
+
result: Captured return value repr string. ``None`` if
|
|
58
|
+
return capture was not enabled.
|
|
59
|
+
duration_ms: Wall-clock duration in milliseconds, or ``None`` if
|
|
60
|
+
the span did not record a duration.
|
|
61
|
+
span_id: OTel-compatible 16-char hex span ID.
|
|
62
|
+
trace_id: OTel-compatible 32-char hex trace ID.
|
|
63
|
+
timestamp: Unix timestamp (seconds) when the tool call started.
|
|
64
|
+
status: Span status: ``"ok"``, ``"error"``, or ``"timeout"``.
|
|
65
|
+
error: Error message if ``status == "error"``, else ``None``.
|
|
66
|
+
was_result_used: Heuristic result:
|
|
67
|
+
``True`` — tool result string was found in a
|
|
68
|
+
subsequent span's captured arguments (likely used).
|
|
69
|
+
``False`` — no subsequent span contained the result
|
|
70
|
+
(likely discarded).
|
|
71
|
+
``None`` — indeterminate (no result captured, or no
|
|
72
|
+
subsequent spans).
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
name: str
|
|
76
|
+
args: dict[str, Any]
|
|
77
|
+
result: Any
|
|
78
|
+
duration_ms: float | None
|
|
79
|
+
span_id: str
|
|
80
|
+
trace_id: str
|
|
81
|
+
timestamp: float
|
|
82
|
+
status: str
|
|
83
|
+
error: str | None
|
|
84
|
+
was_result_used: bool | None = None
|
|
85
|
+
|
|
86
|
+
def __str__(self) -> str:
|
|
87
|
+
dur = f"{self.duration_ms:.1f}ms" if self.duration_ms is not None else "?"
|
|
88
|
+
used_str = {True: "used", False: "discarded", None: "unknown"}[self.was_result_used]
|
|
89
|
+
err_part = f" error={self.error!r}" if self.error else ""
|
|
90
|
+
return (
|
|
91
|
+
f"ToolCallRecord(name={self.name!r}, duration={dur}, "
|
|
92
|
+
f"status={self.status!r}, result_used={used_str}{err_part})"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
# InspectorSession
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
_TOOL_OPERATIONS = frozenset({"execute_tool", "tool_call"})
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _is_tool_span(span: Span) -> bool:
|
|
104
|
+
"""Return True if *span* represents a tool call."""
|
|
105
|
+
op = str(getattr(span, "operation", "") or "")
|
|
106
|
+
if op in _TOOL_OPERATIONS:
|
|
107
|
+
return True
|
|
108
|
+
attrs = getattr(span, "attributes", {}) or {}
|
|
109
|
+
return bool(attrs.get("tool"))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _extract_args(span: Span) -> dict[str, Any]:
|
|
113
|
+
"""Extract ``arg.*`` attributes from *span* into a plain dict."""
|
|
114
|
+
attrs = getattr(span, "attributes", {}) or {}
|
|
115
|
+
return {k[4:]: v for k, v in attrs.items() if k.startswith("arg.")}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _extract_result(span: Span) -> Any:
|
|
119
|
+
"""Return the ``return_value`` attribute of *span*, or ``None``."""
|
|
120
|
+
attrs = getattr(span, "attributes", {}) or {}
|
|
121
|
+
return attrs.get("return_value")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _check_result_used(tool_span: Span, subsequent_spans: list[Span]) -> bool | None:
|
|
125
|
+
"""Heuristic: did any subsequent span capture the tool result in its args?
|
|
126
|
+
|
|
127
|
+
Scans the ``arg.*`` attributes of every subsequent span for the tool
|
|
128
|
+
result string. Returns ``True`` if found, ``False`` if not found,
|
|
129
|
+
or ``None`` if the result was not captured or subsequent spans are absent.
|
|
130
|
+
"""
|
|
131
|
+
result = _extract_result(tool_span)
|
|
132
|
+
if result is None:
|
|
133
|
+
return None
|
|
134
|
+
result_str = str(result)
|
|
135
|
+
# Skip trivially empty or un-informative results.
|
|
136
|
+
if not result_str or result_str in ("None", "<unrepresentable>", "''", '""'):
|
|
137
|
+
return None
|
|
138
|
+
if not subsequent_spans:
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
for span in subsequent_spans:
|
|
142
|
+
attrs = getattr(span, "attributes", {}) or {}
|
|
143
|
+
for v in attrs.values():
|
|
144
|
+
if isinstance(v, str) and result_str in v:
|
|
145
|
+
return True
|
|
146
|
+
return False
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class InspectorSession:
|
|
150
|
+
"""Collects tool call records from live span events.
|
|
151
|
+
|
|
152
|
+
Attach to an :class:`~spanforge._span.AgentRunContext` (or globally) to
|
|
153
|
+
intercept every span that closes with ``operation="execute_tool"`` or
|
|
154
|
+
``attributes["tool"] = True``.
|
|
155
|
+
|
|
156
|
+
Usage::
|
|
157
|
+
|
|
158
|
+
session = InspectorSession()
|
|
159
|
+
with tracer.agent_run("research") as run:
|
|
160
|
+
session.attach(run)
|
|
161
|
+
result = search("query") # @trace(tool=True)
|
|
162
|
+
session.detach()
|
|
163
|
+
|
|
164
|
+
for call in session.tool_calls:
|
|
165
|
+
print(call)
|
|
166
|
+
|
|
167
|
+
print(session.summary())
|
|
168
|
+
|
|
169
|
+
The session is *not* reusable: call :meth:`reset` if you want to start a
|
|
170
|
+
fresh recording on the same instance.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def __init__(self) -> None:
|
|
174
|
+
self._lock = threading.Lock()
|
|
175
|
+
self._spans: list[Span] = [] # all spans captured (tool + model)
|
|
176
|
+
self._active = False
|
|
177
|
+
self._trace_id_filter: str | None = None
|
|
178
|
+
|
|
179
|
+
# ------------------------------------------------------------------
|
|
180
|
+
# Lifecycle
|
|
181
|
+
# ------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
def attach(self, run: AgentRunContext | None = None) -> InspectorSession:
|
|
184
|
+
"""Start recording tool call spans.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
run: Optional :class:`~spanforge._span.AgentRunContext` returned
|
|
188
|
+
by ``tracer.agent_run()``. When provided, only spans that
|
|
189
|
+
belong to this run's ``trace_id`` are recorded. When
|
|
190
|
+
``None``, all spans are captured globally.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
``self`` for chaining.
|
|
194
|
+
"""
|
|
195
|
+
self._active = True
|
|
196
|
+
if run is not None:
|
|
197
|
+
self._trace_id_filter = getattr(run, "trace_id", None)
|
|
198
|
+
|
|
199
|
+
from spanforge._hooks import hooks
|
|
200
|
+
|
|
201
|
+
hooks.on_span_end(self._on_span_end)
|
|
202
|
+
return self
|
|
203
|
+
|
|
204
|
+
def detach(self) -> InspectorSession:
|
|
205
|
+
"""Stop recording new spans.
|
|
206
|
+
|
|
207
|
+
The hook remains registered in the global registry but is a no-op
|
|
208
|
+
once ``_active`` is ``False``. Call :meth:`reset` to clear recorded
|
|
209
|
+
data.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
``self`` for chaining.
|
|
213
|
+
"""
|
|
214
|
+
self._active = False
|
|
215
|
+
return self
|
|
216
|
+
|
|
217
|
+
def reset(self) -> InspectorSession:
|
|
218
|
+
"""Clear all recorded spans and re-enable recording.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
``self`` for chaining.
|
|
222
|
+
"""
|
|
223
|
+
with self._lock:
|
|
224
|
+
self._spans.clear()
|
|
225
|
+
self._active = True
|
|
226
|
+
self._trace_id_filter = None
|
|
227
|
+
return self
|
|
228
|
+
|
|
229
|
+
# ------------------------------------------------------------------
|
|
230
|
+
# Hook callback
|
|
231
|
+
# ------------------------------------------------------------------
|
|
232
|
+
|
|
233
|
+
def _on_span_end(self, span: Span) -> None:
|
|
234
|
+
if not self._active:
|
|
235
|
+
return
|
|
236
|
+
trace_id = getattr(span, "trace_id", None)
|
|
237
|
+
if self._trace_id_filter and trace_id != self._trace_id_filter:
|
|
238
|
+
return
|
|
239
|
+
with self._lock:
|
|
240
|
+
self._spans.append(span)
|
|
241
|
+
|
|
242
|
+
# ------------------------------------------------------------------
|
|
243
|
+
# Accessors
|
|
244
|
+
# ------------------------------------------------------------------
|
|
245
|
+
|
|
246
|
+
@property
|
|
247
|
+
def tool_calls(self) -> list[ToolCallRecord]:
|
|
248
|
+
"""Return tool call records with heuristic ``was_result_used`` flags.
|
|
249
|
+
|
|
250
|
+
Records are returned in the order the spans were collected (typically
|
|
251
|
+
chronological). ``was_result_used`` is computed lazily by scanning
|
|
252
|
+
all spans captured after each tool span.
|
|
253
|
+
"""
|
|
254
|
+
with self._lock:
|
|
255
|
+
spans = list(self._spans)
|
|
256
|
+
|
|
257
|
+
records: list[ToolCallRecord] = []
|
|
258
|
+
for i, span in enumerate(spans):
|
|
259
|
+
if not _is_tool_span(span):
|
|
260
|
+
continue
|
|
261
|
+
subsequent = spans[i + 1 :]
|
|
262
|
+
was_used = _check_result_used(span, subsequent)
|
|
263
|
+
records.append(
|
|
264
|
+
ToolCallRecord(
|
|
265
|
+
name=getattr(span, "name", ""),
|
|
266
|
+
args=_extract_args(span),
|
|
267
|
+
result=_extract_result(span),
|
|
268
|
+
duration_ms=getattr(span, "duration_ms", None),
|
|
269
|
+
span_id=getattr(span, "span_id", ""),
|
|
270
|
+
trace_id=getattr(span, "trace_id", ""),
|
|
271
|
+
timestamp=getattr(span, "start_ns", 0) / 1_000_000_000.0,
|
|
272
|
+
status=getattr(span, "status", "ok"),
|
|
273
|
+
error=getattr(span, "error", None),
|
|
274
|
+
was_result_used=was_used,
|
|
275
|
+
)
|
|
276
|
+
)
|
|
277
|
+
return records
|
|
278
|
+
|
|
279
|
+
@property
|
|
280
|
+
def all_span_count(self) -> int:
|
|
281
|
+
"""Total number of spans captured (tool + non-tool)."""
|
|
282
|
+
with self._lock:
|
|
283
|
+
return len(self._spans)
|
|
284
|
+
|
|
285
|
+
# ------------------------------------------------------------------
|
|
286
|
+
# Display helpers
|
|
287
|
+
# ------------------------------------------------------------------
|
|
288
|
+
|
|
289
|
+
def summary(self) -> str:
|
|
290
|
+
"""Return a plain-text table of all recorded tool calls.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
Multi-line string suitable for ``print()``.
|
|
294
|
+
"""
|
|
295
|
+
calls = self.tool_calls
|
|
296
|
+
lines: list[str] = []
|
|
297
|
+
lines.append("=" * 72)
|
|
298
|
+
lines.append(" SpanForge Tool Call Inspector")
|
|
299
|
+
lines.append("=" * 72)
|
|
300
|
+
if not calls:
|
|
301
|
+
lines.append(" No tool calls recorded.")
|
|
302
|
+
lines.append("=" * 72)
|
|
303
|
+
return "\n".join(lines)
|
|
304
|
+
|
|
305
|
+
lines.append(f" {'Name':<28} {'Duration':>10} {'Status':<8} {'Result Used':<12}")
|
|
306
|
+
lines.append("-" * 72)
|
|
307
|
+
for r in calls:
|
|
308
|
+
dur = f"{r.duration_ms:.1f}ms" if r.duration_ms is not None else "?"
|
|
309
|
+
used = {True: "yes", False: "no", None: "?"}[r.was_result_used]
|
|
310
|
+
lines.append(f" {r.name:<28} {dur:>10} {r.status:<8} {used:<12}")
|
|
311
|
+
if r.error:
|
|
312
|
+
lines.append(f" error: {r.error}")
|
|
313
|
+
lines.append("=" * 72)
|
|
314
|
+
lines.append(f" Total: {len(calls)} tool call(s)")
|
|
315
|
+
lines.append("=" * 72)
|
|
316
|
+
return "\n".join(lines)
|
|
317
|
+
|
|
318
|
+
def __repr__(self) -> str:
|
|
319
|
+
return self.summary()
|
|
320
|
+
|
|
321
|
+
def __len__(self) -> int:
|
|
322
|
+
return len(self.tool_calls)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
# ---------------------------------------------------------------------------
|
|
326
|
+
# inspect_trace() — JSONL replay
|
|
327
|
+
# ---------------------------------------------------------------------------
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def inspect_trace(
|
|
331
|
+
path: str,
|
|
332
|
+
*,
|
|
333
|
+
trace_id: str | None = None,
|
|
334
|
+
skip_errors: bool = False,
|
|
335
|
+
) -> list[ToolCallRecord]:
|
|
336
|
+
"""Reconstruct tool call records from a JSONL trace file.
|
|
337
|
+
|
|
338
|
+
Reads every span event from *path*, filters to tool spans (those with
|
|
339
|
+
``operation="execute_tool"`` or ``attributes.tool=true``), and returns
|
|
340
|
+
a list of :class:`ToolCallRecord` objects. The ``was_result_used``
|
|
341
|
+
heuristic is applied against all other span events in the same file.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
path: Path to the NDJSON/JSONL events file.
|
|
345
|
+
trace_id: When provided, only records whose ``trace_id`` matches
|
|
346
|
+
are returned. ``None`` returns records from all traces.
|
|
347
|
+
skip_errors: When ``True``, malformed JSONL lines are silently
|
|
348
|
+
skipped instead of raising.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
Ordered list of :class:`ToolCallRecord` objects.
|
|
352
|
+
|
|
353
|
+
Raises:
|
|
354
|
+
DeserializationError: On the first malformed line when
|
|
355
|
+
``skip_errors=False``.
|
|
356
|
+
"""
|
|
357
|
+
from spanforge.stream import iter_file
|
|
358
|
+
|
|
359
|
+
_span_events = frozenset(
|
|
360
|
+
{
|
|
361
|
+
"llm.trace.span.completed",
|
|
362
|
+
"llm.trace.span.failed",
|
|
363
|
+
}
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
# Collect all span payloads (and their index for ordering).
|
|
367
|
+
all_payloads: list[dict[str, Any]] = []
|
|
368
|
+
|
|
369
|
+
for event in iter_file(path, skip_errors=skip_errors):
|
|
370
|
+
et = event.event_type
|
|
371
|
+
et_str = et.value if hasattr(et, "value") else str(et)
|
|
372
|
+
if et_str not in _span_events:
|
|
373
|
+
continue
|
|
374
|
+
payload = event.payload
|
|
375
|
+
if trace_id and payload.get("trace_id") != trace_id:
|
|
376
|
+
continue
|
|
377
|
+
all_payloads.append(dict(payload))
|
|
378
|
+
|
|
379
|
+
# Identify tool span indices.
|
|
380
|
+
records: list[ToolCallRecord] = []
|
|
381
|
+
for i, payload in enumerate(all_payloads):
|
|
382
|
+
op = payload.get("operation", "")
|
|
383
|
+
attrs: dict[str, Any] = payload.get("attributes") or {}
|
|
384
|
+
is_tool = op in _TOOL_OPERATIONS or bool(attrs.get("tool"))
|
|
385
|
+
if not is_tool:
|
|
386
|
+
continue
|
|
387
|
+
|
|
388
|
+
subsequent = all_payloads[i + 1 :]
|
|
389
|
+
result = attrs.get("return_value")
|
|
390
|
+
was_used = _check_result_used_from_dicts(result, subsequent)
|
|
391
|
+
|
|
392
|
+
start_ns = payload.get("start_time_unix_nano") or 0
|
|
393
|
+
records.append(
|
|
394
|
+
ToolCallRecord(
|
|
395
|
+
name=payload.get("span_name", ""),
|
|
396
|
+
args={k[4:]: v for k, v in attrs.items() if k.startswith("arg.")},
|
|
397
|
+
result=result,
|
|
398
|
+
duration_ms=payload.get("duration_ms"),
|
|
399
|
+
span_id=payload.get("span_id") or "",
|
|
400
|
+
trace_id=payload.get("trace_id") or "",
|
|
401
|
+
timestamp=start_ns / 1_000_000_000.0,
|
|
402
|
+
status=payload.get("status", "ok"),
|
|
403
|
+
error=payload.get("error"),
|
|
404
|
+
was_result_used=was_used,
|
|
405
|
+
)
|
|
406
|
+
)
|
|
407
|
+
return records
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _check_result_used_from_dicts(
|
|
411
|
+
result: Any,
|
|
412
|
+
subsequent_payloads: list[dict[str, Any]],
|
|
413
|
+
) -> bool | None:
|
|
414
|
+
"""Dict-based variant of the heuristic used by :func:`inspect_trace`."""
|
|
415
|
+
if result is None:
|
|
416
|
+
return None
|
|
417
|
+
result_str = str(result)
|
|
418
|
+
if not result_str or result_str in ("None", "<unrepresentable>", "''", '""'):
|
|
419
|
+
return None
|
|
420
|
+
if not subsequent_payloads:
|
|
421
|
+
return None
|
|
422
|
+
for payload in subsequent_payloads:
|
|
423
|
+
sp_attrs: dict[str, Any] = payload.get("attributes") or {}
|
|
424
|
+
for v in sp_attrs.values():
|
|
425
|
+
if isinstance(v, str) and result_str in v:
|
|
426
|
+
return True
|
|
427
|
+
return False
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Third-party provider and framework integrations for SpanForge.
|
|
2
|
+
|
|
3
|
+
Each sub-module is an optional extra that sits on top of the zero-dependency
|
|
4
|
+
core SDK. Install the relevant extra before importing:
|
|
5
|
+
|
|
6
|
+
pip install "spanforge[openai]" # OpenAI / Azure OpenAI instrumentation
|
|
7
|
+
pip install "spanforge[anthropic]" # Anthropic Claude auto-instrumentation
|
|
8
|
+
pip install "spanforge[gemini]" # Google Gemini auto-instrumentation
|
|
9
|
+
pip install "spanforge[bedrock]" # AWS Bedrock auto-instrumentation
|
|
10
|
+
pip install "spanforge[ollama]" # Ollama local model auto-instrumentation
|
|
11
|
+
pip install "spanforge[groq]" # Groq API auto-instrumentation
|
|
12
|
+
pip install "spanforge[together]" # Together AI auto-instrumentation
|
|
13
|
+
pip install "spanforge[langchain]" # LangChain callback handler
|
|
14
|
+
pip install "spanforge[langgraph]" # LangGraph governance handler
|
|
15
|
+
pip install "spanforge[llamaindex]" # LlamaIndex event handler
|
|
16
|
+
|
|
17
|
+
Available integrations
|
|
18
|
+
----------------------
|
|
19
|
+
* :mod:`spanforge.integrations.openai` - OpenAI chat completions
|
|
20
|
+
* :mod:`spanforge.integrations.azure_openai` - Azure OpenAI client instances
|
|
21
|
+
* :mod:`spanforge.integrations.anthropic` - Anthropic Claude
|
|
22
|
+
* :mod:`spanforge.integrations.gemini` - Google Gemini
|
|
23
|
+
* :mod:`spanforge.integrations.bedrock` - AWS Bedrock
|
|
24
|
+
* :mod:`spanforge.integrations.ollama` - Ollama local models
|
|
25
|
+
* :mod:`spanforge.integrations.groq` - Groq API
|
|
26
|
+
* :mod:`spanforge.integrations.together` - Together AI
|
|
27
|
+
* :mod:`spanforge.integrations.langchain` - LangChain callback handler
|
|
28
|
+
* :mod:`spanforge.integrations.langgraph` - LangGraph governance callbacks
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
__all__: list[str] = [
|
|
34
|
+
"anthropic",
|
|
35
|
+
"azure_openai",
|
|
36
|
+
"bedrock",
|
|
37
|
+
"gemini",
|
|
38
|
+
"groq",
|
|
39
|
+
"langchain",
|
|
40
|
+
"langgraph",
|
|
41
|
+
"llamaindex",
|
|
42
|
+
"ollama",
|
|
43
|
+
"openai",
|
|
44
|
+
"together",
|
|
45
|
+
]
|