struct-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- struct_sdk/__init__.py +14 -0
- struct_sdk/anthropic.py +938 -0
- struct_sdk/claude_agent.py +85 -0
- struct_sdk/core.py +755 -0
- struct_sdk/langchain.py +1450 -0
- struct_sdk-0.1.0.dist-info/METADATA +333 -0
- struct_sdk-0.1.0.dist-info/RECORD +9 -0
- struct_sdk-0.1.0.dist-info/WHEEL +4 -0
- struct_sdk-0.1.0.dist-info/licenses/LICENSE +201 -0
struct_sdk/core.py
ADDED
|
@@ -0,0 +1,755 @@
|
|
|
1
|
+
"""Core SDK — wraps OpenTelemetry with Struct-specific defaults.
|
|
2
|
+
|
|
3
|
+
Users call struct.init() once at startup. The SDK auto-instruments any
|
|
4
|
+
supported libraries that are installed (anthropic, claude_agent_sdk).
|
|
5
|
+
|
|
6
|
+
OTel GenAI Semantic Conventions v1.37+ compliant.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import atexit
|
|
11
|
+
import contextvars
|
|
12
|
+
import functools
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import threading
|
|
16
|
+
import uuid
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any, Callable, Optional
|
|
19
|
+
|
|
20
|
+
from opentelemetry import trace
|
|
21
|
+
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
|
|
22
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
23
|
+
from opentelemetry.sdk._logs import LoggerProvider
|
|
24
|
+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
|
|
25
|
+
from opentelemetry.sdk.resources import Resource
|
|
26
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
27
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
28
|
+
from opentelemetry.trace import StatusCode
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger("struct_sdk")
|
|
31
|
+
|
|
32
|
+
DEFAULT_ENDPOINT = "https://ingest.struct.ai"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ContentCaptureMode(str, Enum):
|
|
36
|
+
"""Controls how LLM message content is captured in telemetry.
|
|
37
|
+
|
|
38
|
+
Follows the OTel GenAI spec content capture modes.
|
|
39
|
+
"""
|
|
40
|
+
NONE = "none" # No message content captured
|
|
41
|
+
EVENT_ONLY = "event_only" # Content in log events only (default)
|
|
42
|
+
SPAN_ONLY = "span_only" # Content in span attributes only (legacy)
|
|
43
|
+
SPAN_AND_EVENT = "span_and_event" # Both log events and span attributes
|
|
44
|
+
|
|
45
|
+
# Context vars for propagating session context to child spans
|
|
46
|
+
_current_session_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar("_current_session_id", default=None)
|
|
47
|
+
_current_conversation_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar("_current_conversation_id", default=None)
|
|
48
|
+
_current_agent_span: contextvars.ContextVar[Optional[trace.Span]] = contextvars.ContextVar("_current_agent_span", default=None)
|
|
49
|
+
|
|
50
|
+
# Pending tool_use ids keyed by tool name (FIFO per name).
|
|
51
|
+
# Populated by the Anthropic monkey-patch when a chat response arrives with
|
|
52
|
+
# tool_use blocks, consumed by @struct.tool() / struct.tool(...) when the
|
|
53
|
+
# decorated function/context manager starts its span. Lets us auto-link
|
|
54
|
+
# execute_tool spans to the originating tool_use.id without forcing callers
|
|
55
|
+
# to pass tool_call_id= explicitly. Customers who already pass it keep
|
|
56
|
+
# working unchanged (explicit override wins).
|
|
57
|
+
_pending_tool_calls: contextvars.ContextVar[Optional[dict[str, list[str]]]] = contextvars.ContextVar("_pending_tool_calls", default=None)
|
|
58
|
+
|
|
59
|
+
# Registry of patched integrations — prevents double-patching
|
|
60
|
+
_patched_integrations: set[str] = set()
|
|
61
|
+
|
|
62
|
+
# Sites that have already logged a first-failure WARN; subsequent failures log at DEBUG.
|
|
63
|
+
_first_failure_logged: set[str] = set()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _safe(fn: Callable[[], None], *, site: str) -> None:
|
|
67
|
+
"""Run fn(); swallow any exception. First failure per site logs at WARN with stack; subsequent at DEBUG."""
|
|
68
|
+
try:
|
|
69
|
+
fn()
|
|
70
|
+
except Exception:
|
|
71
|
+
if site in _first_failure_logged:
|
|
72
|
+
logger.debug("Struct SDK suppressed exception at %s", site, exc_info=True)
|
|
73
|
+
else:
|
|
74
|
+
_first_failure_logged.add(site)
|
|
75
|
+
logger.warning("Struct SDK suppressed exception at %s", site, exc_info=True)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class StructSDK:
|
|
79
|
+
"""Global SDK instance. Call init() once at startup."""
|
|
80
|
+
|
|
81
|
+
def __init__(self):
|
|
82
|
+
self._initialized = False
|
|
83
|
+
self._tracer_provider: Optional[TracerProvider] = None
|
|
84
|
+
self._logger_provider: Optional[LoggerProvider] = None
|
|
85
|
+
self._ingest_key: str = ""
|
|
86
|
+
self._endpoint: str = DEFAULT_ENDPOINT
|
|
87
|
+
self._content_capture: ContentCaptureMode = ContentCaptureMode.EVENT_ONLY
|
|
88
|
+
# Atexit/_shutdown reads this even when init() was never called or
|
|
89
|
+
# failed before reaching the assignment in init() — keep it set here.
|
|
90
|
+
self._shutdown_timeout_seconds: float = 5.0
|
|
91
|
+
# Serializes init() so concurrent callers cannot both pass the
|
|
92
|
+
# early-return check and double-run the init body. A second concurrent
|
|
93
|
+
# init blocks on this lock instead of racing.
|
|
94
|
+
self._init_lock: threading.Lock = threading.Lock()
|
|
95
|
+
|
|
96
|
+
def init(
|
|
97
|
+
self,
|
|
98
|
+
ingest_key: str,
|
|
99
|
+
*,
|
|
100
|
+
service_name: str = "default-agent",
|
|
101
|
+
service_version: str = "0.0.0",
|
|
102
|
+
environment: str = "development",
|
|
103
|
+
endpoint: str = DEFAULT_ENDPOINT,
|
|
104
|
+
shutdown_timeout_seconds: float = 5.0,
|
|
105
|
+
capture_content: bool = True,
|
|
106
|
+
content_capture: Optional[ContentCaptureMode] = None,
|
|
107
|
+
):
|
|
108
|
+
"""Initialize the Struct SDK and auto-instrument installed libraries.
|
|
109
|
+
|
|
110
|
+
After calling this, any installed supported library is automatically
|
|
111
|
+
instrumented — no additional setup required.
|
|
112
|
+
|
|
113
|
+
Auto-detected integrations:
|
|
114
|
+
- ``anthropic`` — patches messages.create() and messages.stream()
|
|
115
|
+
- ``claude_agent_sdk`` — patches ClaudeAgentOptions to inject OTel env vars
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
ingest_key: Your Struct ingest key (pk-...). Write-only, safe to expose.
|
|
119
|
+
service_name: Name of your agent/service (e.g., "checkout-agent").
|
|
120
|
+
service_version: Version string for your agent.
|
|
121
|
+
environment: Deployment environment (development, staging, production).
|
|
122
|
+
endpoint: Struct ingestion endpoint. Override for self-hosted or local dev.
|
|
123
|
+
shutdown_timeout_seconds: Maximum time the SDK's atexit shutdown is
|
|
124
|
+
allowed to spend flushing telemetry providers. If the ingest
|
|
125
|
+
endpoint is dead or slow, shutdown returns within this budget
|
|
126
|
+
rather than hanging the user's process exit. Defaults to 5.0.
|
|
127
|
+
capture_content: Whether to capture LLM prompts/completions. Disable for privacy.
|
|
128
|
+
Deprecated — use ``content_capture`` instead.
|
|
129
|
+
content_capture: Controls how LLM message content is captured.
|
|
130
|
+
Takes precedence over ``capture_content``. Defaults to EVENT_ONLY.
|
|
131
|
+
"""
|
|
132
|
+
# Serialize init() so concurrent callers cannot both pass the early-return
|
|
133
|
+
# check and double-run the body (which would leak the first provider — the
|
|
134
|
+
# second assignment to self._tracer_provider would orphan the first without
|
|
135
|
+
# ever shutting it down). The single-threaded healthy path is unaffected:
|
|
136
|
+
# the lock is uncontended and acquired once.
|
|
137
|
+
with self._init_lock:
|
|
138
|
+
if self._initialized:
|
|
139
|
+
logger.warning("struct.init() called multiple times — ignoring")
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
self._ingest_key = ingest_key
|
|
143
|
+
self._endpoint = endpoint
|
|
144
|
+
|
|
145
|
+
# Resolve content capture mode: explicit content_capture > capture_content bool
|
|
146
|
+
if content_capture is not None:
|
|
147
|
+
self._content_capture = content_capture
|
|
148
|
+
elif not capture_content:
|
|
149
|
+
self._content_capture = ContentCaptureMode.NONE
|
|
150
|
+
else:
|
|
151
|
+
self._content_capture = ContentCaptureMode.EVENT_ONLY
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
# Stored before atexit registers _shutdown so the hook always sees
|
|
155
|
+
# the user-configured timeout (or the default).
|
|
156
|
+
self._shutdown_timeout_seconds = shutdown_timeout_seconds
|
|
157
|
+
|
|
158
|
+
headers = {"x-struct-ingest-key": ingest_key}
|
|
159
|
+
|
|
160
|
+
# --- TracerProvider ---
|
|
161
|
+
span_exporter = OTLPSpanExporter(
|
|
162
|
+
endpoint=f"{endpoint.rstrip('/')}/v1/traces",
|
|
163
|
+
headers=headers,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
span_processor = BatchSpanProcessor(
|
|
167
|
+
span_exporter,
|
|
168
|
+
max_queue_size=10000,
|
|
169
|
+
max_export_batch_size=100,
|
|
170
|
+
schedule_delay_millis=1000,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Isolated TracerProvider — NOT set as the global.
|
|
174
|
+
resource = Resource.create(
|
|
175
|
+
{
|
|
176
|
+
"service.name": service_name,
|
|
177
|
+
"service.version": service_version,
|
|
178
|
+
"deployment.environment": environment,
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
self._tracer_provider = TracerProvider(resource=resource)
|
|
182
|
+
self._tracer_provider.add_span_processor(span_processor)
|
|
183
|
+
|
|
184
|
+
# --- LoggerProvider (for gen_ai message events) ---
|
|
185
|
+
log_exporter = OTLPLogExporter(
|
|
186
|
+
endpoint=f"{endpoint.rstrip('/')}/v1/logs",
|
|
187
|
+
headers=headers,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
log_processor = BatchLogRecordProcessor(
|
|
191
|
+
log_exporter,
|
|
192
|
+
max_queue_size=10000,
|
|
193
|
+
max_export_batch_size=100,
|
|
194
|
+
schedule_delay_millis=1000,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
self._logger_provider = LoggerProvider(resource=resource)
|
|
198
|
+
self._logger_provider.add_log_record_processor(log_processor)
|
|
199
|
+
|
|
200
|
+
self._initialized = True
|
|
201
|
+
|
|
202
|
+
self._auto_instrument()
|
|
203
|
+
except Exception:
|
|
204
|
+
logger.warning(
|
|
205
|
+
"Struct SDK init failed; SDK disabled: service=%s endpoint=%s",
|
|
206
|
+
service_name, endpoint, exc_info=True,
|
|
207
|
+
)
|
|
208
|
+
self._initialized = False
|
|
209
|
+
self._tracer_provider = None
|
|
210
|
+
self._logger_provider = None
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
atexit.register(self._shutdown)
|
|
214
|
+
|
|
215
|
+
logger.info(
|
|
216
|
+
"Struct SDK initialized: service=%s endpoint=%s content_capture=%s",
|
|
217
|
+
service_name, endpoint, self._content_capture.value,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
def _auto_instrument(self):
|
|
221
|
+
"""Detect and patch installed libraries."""
|
|
222
|
+
integrations = [
|
|
223
|
+
("anthropic", "struct_sdk.anthropic"),
|
|
224
|
+
("claude_agent_sdk", "struct_sdk.claude_agent"),
|
|
225
|
+
("langchain_core", "struct_sdk.langchain"),
|
|
226
|
+
]
|
|
227
|
+
|
|
228
|
+
for lib_name, module_path in integrations:
|
|
229
|
+
if lib_name in _patched_integrations:
|
|
230
|
+
continue
|
|
231
|
+
try:
|
|
232
|
+
import importlib
|
|
233
|
+
mod = importlib.import_module(module_path)
|
|
234
|
+
mod.patch(self) # type: ignore[attr-defined]
|
|
235
|
+
_patched_integrations.add(lib_name)
|
|
236
|
+
logger.info("Auto-instrumented: %s", lib_name)
|
|
237
|
+
except ImportError:
|
|
238
|
+
pass
|
|
239
|
+
except Exception:
|
|
240
|
+
logger.debug("Failed to instrument %s", lib_name, exc_info=True)
|
|
241
|
+
|
|
242
|
+
def get_tracer(self, name: str = "struct-sdk") -> trace.Tracer:
|
|
243
|
+
"""Get an OTel tracer from our isolated provider."""
|
|
244
|
+
if self._tracer_provider is None:
|
|
245
|
+
raise RuntimeError("Call struct.init() before using the SDK")
|
|
246
|
+
return self._tracer_provider.get_tracer(name)
|
|
247
|
+
|
|
248
|
+
def get_logger(self, name: str = "struct-sdk") -> Any:
|
|
249
|
+
"""Get an OTel logger from our isolated provider (for gen_ai log events)."""
|
|
250
|
+
if self._logger_provider is None:
|
|
251
|
+
raise RuntimeError("Call struct.init() before using the SDK")
|
|
252
|
+
return self._logger_provider.get_logger(name)
|
|
253
|
+
|
|
254
|
+
@property
|
|
255
|
+
def capture_content(self) -> bool:
|
|
256
|
+
"""Backward-compatible property. True if any content capture is enabled."""
|
|
257
|
+
return self._content_capture != ContentCaptureMode.NONE
|
|
258
|
+
|
|
259
|
+
@property
|
|
260
|
+
def content_capture(self) -> ContentCaptureMode:
|
|
261
|
+
"""The active content capture mode."""
|
|
262
|
+
return self._content_capture
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def emit_events(self) -> bool:
|
|
266
|
+
"""True if content should be emitted as log events."""
|
|
267
|
+
return self._content_capture in (ContentCaptureMode.EVENT_ONLY, ContentCaptureMode.SPAN_AND_EVENT)
|
|
268
|
+
|
|
269
|
+
@property
|
|
270
|
+
def emit_span_content(self) -> bool:
|
|
271
|
+
"""True if content should be set as span attributes."""
|
|
272
|
+
return self._content_capture in (ContentCaptureMode.SPAN_ONLY, ContentCaptureMode.SPAN_AND_EVENT)
|
|
273
|
+
|
|
274
|
+
# ── Decorators / Context Managers ──
|
|
275
|
+
|
|
276
|
+
def agent(
|
|
277
|
+
self,
|
|
278
|
+
fn: Any = None,
|
|
279
|
+
*,
|
|
280
|
+
name: Optional[str] = None,
|
|
281
|
+
session_id: Optional[str] = None,
|
|
282
|
+
agent_id: Optional[str] = None,
|
|
283
|
+
version: Optional[str] = None,
|
|
284
|
+
metadata: Optional[dict[str, str]] = None,
|
|
285
|
+
) -> Any:
|
|
286
|
+
"""Mark a function or block as an agent session.
|
|
287
|
+
|
|
288
|
+
Creates an ``invoke_agent`` span per the OTel GenAI spec that groups
|
|
289
|
+
all LLM and tool calls within the scope.
|
|
290
|
+
|
|
291
|
+
Usage::
|
|
292
|
+
|
|
293
|
+
@struct.agent()
|
|
294
|
+
async def my_agent():
|
|
295
|
+
response = await client.messages.create(...)
|
|
296
|
+
|
|
297
|
+
async with struct.agent(session_id=agent_id, name="checkout"):
|
|
298
|
+
...
|
|
299
|
+
"""
|
|
300
|
+
if not self._initialized:
|
|
301
|
+
return fn if fn is not None else _NoOpContext()
|
|
302
|
+
|
|
303
|
+
ctx = _AgentContext(self, name=name, session_id=session_id, agent_id=agent_id, version=version, metadata=metadata)
|
|
304
|
+
if fn is None:
|
|
305
|
+
return ctx
|
|
306
|
+
if callable(fn):
|
|
307
|
+
return ctx(fn)
|
|
308
|
+
raise TypeError("agent() argument must be callable or used as context manager")
|
|
309
|
+
|
|
310
|
+
def tool(
|
|
311
|
+
self,
|
|
312
|
+
fn: Any = None,
|
|
313
|
+
*,
|
|
314
|
+
name: Optional[str] = None,
|
|
315
|
+
tool_call_id: Optional[str] = None,
|
|
316
|
+
) -> Any:
|
|
317
|
+
"""Mark a function or block as a tool execution.
|
|
318
|
+
|
|
319
|
+
Creates an ``execute_tool`` span per the OTel GenAI spec.
|
|
320
|
+
|
|
321
|
+
``gen_ai.tool.call.id`` is populated automatically when this is invoked
|
|
322
|
+
after a patched Anthropic call flow — the SDK stashes each ``tool_use.id``
|
|
323
|
+
from the assistant response and pops the first matching tool name when
|
|
324
|
+
this span starts. Pass ``tool_call_id=`` explicitly to override.
|
|
325
|
+
|
|
326
|
+
Usage::
|
|
327
|
+
|
|
328
|
+
@struct.tool()
|
|
329
|
+
async def search_investigations(query: str, limit: int):
|
|
330
|
+
return await repo.search(query, limit)
|
|
331
|
+
|
|
332
|
+
async with struct.tool(name="search"):
|
|
333
|
+
result = await do_search()
|
|
334
|
+
"""
|
|
335
|
+
if not self._initialized:
|
|
336
|
+
return fn if fn is not None else _NoOpContext()
|
|
337
|
+
|
|
338
|
+
ctx = _ToolContext(self, name=name, tool_call_id=tool_call_id)
|
|
339
|
+
if fn is None:
|
|
340
|
+
return ctx
|
|
341
|
+
if callable(fn):
|
|
342
|
+
return ctx(fn)
|
|
343
|
+
raise TypeError("tool() argument must be callable or used as context manager")
|
|
344
|
+
|
|
345
|
+
def _shutdown(self) -> None:
|
|
346
|
+
"""Best-effort shutdown bounded by ``shutdown_timeout_seconds``.
|
|
347
|
+
|
|
348
|
+
Runs ``provider.shutdown()`` in a daemon worker thread and waits up
|
|
349
|
+
to the configured timeout for it to finish. On timeout, the daemon
|
|
350
|
+
is abandoned (it dies with the process). On any failure — thread
|
|
351
|
+
creation, provider faults, attribute errors — we swallow the
|
|
352
|
+
exception: this runs in atexit context where propagating to the
|
|
353
|
+
interpreter would mean the user's process exits with a confusing
|
|
354
|
+
traceback for telemetry the SDK was supposed to hide.
|
|
355
|
+
"""
|
|
356
|
+
try:
|
|
357
|
+
worker = threading.Thread(
|
|
358
|
+
target=self._do_shutdown,
|
|
359
|
+
name="struct-sdk-shutdown",
|
|
360
|
+
daemon=True,
|
|
361
|
+
)
|
|
362
|
+
worker.start()
|
|
363
|
+
worker.join(timeout=self._shutdown_timeout_seconds)
|
|
364
|
+
if worker.is_alive():
|
|
365
|
+
# Thread keeps running; daemon=True so it dies with the
|
|
366
|
+
# process. We log at DEBUG: stderr WARNs at atexit time
|
|
367
|
+
# clutter user output for a fault we already handled.
|
|
368
|
+
logger.debug(
|
|
369
|
+
"Struct SDK shutdown timed out after %.1fs (ingest may be unreachable)",
|
|
370
|
+
self._shutdown_timeout_seconds,
|
|
371
|
+
)
|
|
372
|
+
except Exception:
|
|
373
|
+
logger.debug("Struct SDK shutdown failed", exc_info=True)
|
|
374
|
+
|
|
375
|
+
def _do_shutdown(self) -> None:
|
|
376
|
+
"""Worker that calls each provider's shutdown(); each call is isolated.
|
|
377
|
+
|
|
378
|
+
Order matches the previous behavior (logger first, tracer second) so
|
|
379
|
+
any consumer that depended on relative ordering keeps working.
|
|
380
|
+
"""
|
|
381
|
+
if self._logger_provider is not None:
|
|
382
|
+
try:
|
|
383
|
+
self._logger_provider.shutdown()
|
|
384
|
+
except Exception:
|
|
385
|
+
logger.debug("LoggerProvider shutdown failed", exc_info=True)
|
|
386
|
+
if self._tracer_provider is not None:
|
|
387
|
+
try:
|
|
388
|
+
self._tracer_provider.shutdown()
|
|
389
|
+
except Exception:
|
|
390
|
+
logger.debug("TracerProvider shutdown failed", exc_info=True)
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
# ---------------------------------------------------------------------------
|
|
394
|
+
# No-op context (when SDK not initialized)
|
|
395
|
+
# ---------------------------------------------------------------------------
|
|
396
|
+
|
|
397
|
+
class _NoOpContext:
|
|
398
|
+
"""No-op context manager / decorator when SDK is not initialized."""
|
|
399
|
+
def __call__(self, fn: Any) -> Any:
|
|
400
|
+
return fn
|
|
401
|
+
def __enter__(self) -> "_NoOpContext":
|
|
402
|
+
return self
|
|
403
|
+
def __exit__(self, *args: Any) -> None:
|
|
404
|
+
pass
|
|
405
|
+
async def __aenter__(self) -> "_NoOpContext":
|
|
406
|
+
return self
|
|
407
|
+
async def __aexit__(self, *args: Any) -> None:
|
|
408
|
+
pass
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
# ---------------------------------------------------------------------------
|
|
412
|
+
# Agent context — invoke_agent span (GenAI spec)
|
|
413
|
+
# ---------------------------------------------------------------------------
|
|
414
|
+
|
|
415
|
+
class _AgentContext:
|
|
416
|
+
"""Creates an ``invoke_agent {name}`` span per the OTel GenAI spec."""
|
|
417
|
+
|
|
418
|
+
def __init__(self, sdk: StructSDK, *, name: Optional[str] = None, session_id: Optional[str] = None, agent_id: Optional[str] = None, version: Optional[str] = None, metadata: Optional[dict[str, str]] = None):
|
|
419
|
+
self._sdk = sdk
|
|
420
|
+
self._name = name
|
|
421
|
+
self._session_id = session_id or str(uuid.uuid4())
|
|
422
|
+
self._agent_id = agent_id
|
|
423
|
+
self._version = version
|
|
424
|
+
self._metadata = metadata
|
|
425
|
+
self._span: Optional[trace.Span] = None
|
|
426
|
+
self._ctx_manager: Optional[Any] = None
|
|
427
|
+
self._session_token: Optional[contextvars.Token[Optional[str]]] = None
|
|
428
|
+
self._conversation_token: Optional[contextvars.Token[Optional[str]]] = None
|
|
429
|
+
self._agent_span_token: Optional[contextvars.Token[Optional[trace.Span]]] = None
|
|
430
|
+
self._pending_tool_token: Optional[contextvars.Token[Optional[dict[str, list[str]]]]] = None
|
|
431
|
+
|
|
432
|
+
def __call__(self, fn: Any) -> Any:
|
|
433
|
+
"""Use as decorator."""
|
|
434
|
+
span_name = self._name or fn.__name__
|
|
435
|
+
sdk = self._sdk
|
|
436
|
+
session_id = self._session_id
|
|
437
|
+
agent_id = self._agent_id
|
|
438
|
+
version = self._version
|
|
439
|
+
metadata = self._metadata
|
|
440
|
+
|
|
441
|
+
if asyncio.iscoroutinefunction(fn):
|
|
442
|
+
@functools.wraps(fn)
|
|
443
|
+
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
444
|
+
async with _AgentContext(sdk, name=span_name, session_id=session_id, agent_id=agent_id, version=version, metadata=metadata):
|
|
445
|
+
return await fn(*args, **kwargs)
|
|
446
|
+
return wrapper
|
|
447
|
+
else:
|
|
448
|
+
@functools.wraps(fn)
|
|
449
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
450
|
+
with _AgentContext(sdk, name=span_name, session_id=session_id, agent_id=agent_id, version=version, metadata=metadata):
|
|
451
|
+
return fn(*args, **kwargs)
|
|
452
|
+
return wrapper
|
|
453
|
+
|
|
454
|
+
def _start_span(self) -> None:
|
|
455
|
+
started = False
|
|
456
|
+
entered = False
|
|
457
|
+
|
|
458
|
+
def body() -> None:
|
|
459
|
+
nonlocal started, entered
|
|
460
|
+
agent_name = self._name or "agent"
|
|
461
|
+
tracer = self._sdk.get_tracer("struct-sdk")
|
|
462
|
+
|
|
463
|
+
# Capture the outer session id BEFORE overwriting the contextvar so we
|
|
464
|
+
# can link nested agents (subagents) back to the agent that spawned them.
|
|
465
|
+
# Subagent pattern: an outer @struct.agent() wraps a function; that function
|
|
466
|
+
# calls a tool that itself enters another @struct.agent() scope. The inner
|
|
467
|
+
# scope's struct.agent.parent_session_id points to the outer session_id.
|
|
468
|
+
parent_session_id = _current_session_id.get(None)
|
|
469
|
+
|
|
470
|
+
self._span = tracer.start_span(
|
|
471
|
+
f"invoke_agent {agent_name}",
|
|
472
|
+
kind=trace.SpanKind.INTERNAL,
|
|
473
|
+
)
|
|
474
|
+
# Required
|
|
475
|
+
self._span.set_attribute("gen_ai.operation.name", "invoke_agent")
|
|
476
|
+
self._span.set_attribute("gen_ai.provider.name", "struct")
|
|
477
|
+
# Conditionally required
|
|
478
|
+
self._span.set_attribute("gen_ai.agent.name", agent_name)
|
|
479
|
+
# gen_ai.agent.id is the stable agent-definition identifier per the
|
|
480
|
+
# OTel GenAI spec — not a per-invocation value. Only set it when the
|
|
481
|
+
# caller supplies one.
|
|
482
|
+
if self._agent_id:
|
|
483
|
+
self._span.set_attribute("gen_ai.agent.id", self._agent_id)
|
|
484
|
+
if self._version:
|
|
485
|
+
self._span.set_attribute("gen_ai.agent.version", self._version)
|
|
486
|
+
# gen_ai.conversation.id is the spec-blessed name; we drop the
|
|
487
|
+
# redundant session.id.
|
|
488
|
+
self._span.set_attribute("gen_ai.conversation.id", self._session_id)
|
|
489
|
+
# Link to the outer agent's session, if we're nested under one.
|
|
490
|
+
if parent_session_id and parent_session_id != self._session_id:
|
|
491
|
+
self._span.set_attribute("struct.agent.parent_session_id", parent_session_id)
|
|
492
|
+
# Custom metadata
|
|
493
|
+
if self._metadata:
|
|
494
|
+
for key, value in self._metadata.items():
|
|
495
|
+
self._span.set_attribute(f"struct.metadata.{key}", value)
|
|
496
|
+
|
|
497
|
+
self._ctx_manager = trace.use_span(self._span, end_on_exit=False)
|
|
498
|
+
self._ctx_manager.__enter__()
|
|
499
|
+
# Tracks whether the OTel context stack was actually pushed; only
|
|
500
|
+
# then is it correct to call __exit__ on rollback. If body raised
|
|
501
|
+
# between assigning self._ctx_manager and __enter__ returning,
|
|
502
|
+
# nothing was pushed and __exit__ would corrupt the stack.
|
|
503
|
+
entered = True
|
|
504
|
+
# Set context vars so child spans inherit session context
|
|
505
|
+
self._session_token = _current_session_id.set(self._session_id)
|
|
506
|
+
self._conversation_token = _current_conversation_id.set(self._session_id)
|
|
507
|
+
self._agent_span_token = _current_agent_span.set(self._span)
|
|
508
|
+
# Fresh pending-tool-calls dict scoped to this agent run, so tool_use
|
|
509
|
+
# ids from an outer agent cannot leak in or out.
|
|
510
|
+
self._pending_tool_token = _pending_tool_calls.set({})
|
|
511
|
+
started = True
|
|
512
|
+
|
|
513
|
+
_safe(body, site="agent.start_span")
|
|
514
|
+
if not started:
|
|
515
|
+
# body() raised partway. Roll back any partial state so __exit__ /
|
|
516
|
+
# _end_span see a clean "no telemetry" view: tokens are reset
|
|
517
|
+
# best-effort, the OTel context stack is popped if it was pushed,
|
|
518
|
+
# the span is ended if it was started, and references are dropped.
|
|
519
|
+
pending_tok = self._pending_tool_token
|
|
520
|
+
if pending_tok is not None:
|
|
521
|
+
_safe(lambda: _pending_tool_calls.reset(pending_tok),
|
|
522
|
+
site="agent.start_span.reset_pending_tool")
|
|
523
|
+
self._pending_tool_token = None
|
|
524
|
+
agent_tok = self._agent_span_token
|
|
525
|
+
if agent_tok is not None:
|
|
526
|
+
_safe(lambda: _current_agent_span.reset(agent_tok),
|
|
527
|
+
site="agent.start_span.reset_agent_span")
|
|
528
|
+
self._agent_span_token = None
|
|
529
|
+
conv_tok = self._conversation_token
|
|
530
|
+
if conv_tok is not None:
|
|
531
|
+
_safe(lambda: _current_conversation_id.reset(conv_tok),
|
|
532
|
+
site="agent.start_span.reset_conversation")
|
|
533
|
+
self._conversation_token = None
|
|
534
|
+
session_tok = self._session_token
|
|
535
|
+
if session_tok is not None:
|
|
536
|
+
_safe(lambda: _current_session_id.reset(session_tok),
|
|
537
|
+
site="agent.start_span.reset_session")
|
|
538
|
+
self._session_token = None
|
|
539
|
+
# Pop the OTel context stack first — use_span's __exit__ depends on
|
|
540
|
+
# the span still being current. Only call it if __enter__ ran.
|
|
541
|
+
ctx = self._ctx_manager
|
|
542
|
+
if entered and ctx is not None:
|
|
543
|
+
_safe(lambda: ctx.__exit__(None, None, None),
|
|
544
|
+
site="agent.start_span.rollback_ctx_exit")
|
|
545
|
+
self._ctx_manager = None
|
|
546
|
+
# Then end the span so it isn't leaked unended.
|
|
547
|
+
span = self._span
|
|
548
|
+
if span is not None:
|
|
549
|
+
_safe(span.end, site="agent.start_span.rollback_span_end")
|
|
550
|
+
self._span = None
|
|
551
|
+
|
|
552
|
+
def _end_span(self, exc_val: Any = None) -> None:
|
|
553
|
+
# Contextvar resets must always run — they're cheap, can't fault on the
|
|
554
|
+
# span, and leaving them set leaks session context into the caller.
|
|
555
|
+
pending_tok = self._pending_tool_token
|
|
556
|
+
if pending_tok is not None:
|
|
557
|
+
_safe(lambda: _pending_tool_calls.reset(pending_tok),
|
|
558
|
+
site="agent.end_span.reset_pending_tool")
|
|
559
|
+
agent_tok = self._agent_span_token
|
|
560
|
+
if agent_tok is not None:
|
|
561
|
+
_safe(lambda: _current_agent_span.reset(agent_tok),
|
|
562
|
+
site="agent.end_span.reset_agent_span")
|
|
563
|
+
session_tok = self._session_token
|
|
564
|
+
if session_tok is not None:
|
|
565
|
+
_safe(lambda: _current_session_id.reset(session_tok),
|
|
566
|
+
site="agent.end_span.reset_session")
|
|
567
|
+
conv_tok = self._conversation_token
|
|
568
|
+
if conv_tok is not None:
|
|
569
|
+
_safe(lambda: _current_conversation_id.reset(conv_tok),
|
|
570
|
+
site="agent.end_span.reset_conversation")
|
|
571
|
+
span = self._span
|
|
572
|
+
if span is not None:
|
|
573
|
+
if exc_val:
|
|
574
|
+
_safe(lambda: span.set_attribute("error.type", type(exc_val).__name__),
|
|
575
|
+
site="agent.end_span.error_type")
|
|
576
|
+
_safe(lambda: span.set_status(StatusCode.ERROR, str(exc_val)),
|
|
577
|
+
site="agent.end_span.error_status")
|
|
578
|
+
_safe(lambda: span.record_exception(exc_val),
|
|
579
|
+
site="agent.end_span.record_exception")
|
|
580
|
+
else:
|
|
581
|
+
_safe(lambda: span.set_status(StatusCode.OK),
|
|
582
|
+
site="agent.end_span.set_ok")
|
|
583
|
+
_safe(span.end, site="agent.end_span.end")
|
|
584
|
+
ctx = self._ctx_manager
|
|
585
|
+
if ctx is not None:
|
|
586
|
+
_safe(lambda: ctx.__exit__(None, None, None), site="agent.end_span.ctx_exit")
|
|
587
|
+
|
|
588
|
+
def __enter__(self) -> "_AgentContext":
|
|
589
|
+
self._start_span()
|
|
590
|
+
return self
|
|
591
|
+
|
|
592
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
593
|
+
self._end_span(exc_val)
|
|
594
|
+
|
|
595
|
+
async def __aenter__(self) -> "_AgentContext":
|
|
596
|
+
self._start_span()
|
|
597
|
+
return self
|
|
598
|
+
|
|
599
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
600
|
+
self._end_span(exc_val)
|
|
601
|
+
|
|
602
|
+
@property
|
|
603
|
+
def session_id(self) -> str:
|
|
604
|
+
return self._session_id
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
# ---------------------------------------------------------------------------
|
|
608
|
+
# Tool context — execute_tool span (GenAI spec)
|
|
609
|
+
# ---------------------------------------------------------------------------
|
|
610
|
+
|
|
611
|
+
class _ToolContext:
|
|
612
|
+
"""Creates an ``execute_tool {name}`` span per the OTel GenAI spec."""
|
|
613
|
+
|
|
614
|
+
def __init__(self, sdk: StructSDK, *, name: Optional[str] = None, tool_call_id: Optional[str] = None):
|
|
615
|
+
self._sdk = sdk
|
|
616
|
+
self._name = name
|
|
617
|
+
self._tool_call_id = tool_call_id
|
|
618
|
+
self._span: Optional[trace.Span] = None
|
|
619
|
+
self._ctx_manager: Optional[Any] = None
|
|
620
|
+
self._result: Any = None
|
|
621
|
+
|
|
622
|
+
def __call__(self, fn: Any) -> Any:
|
|
623
|
+
"""Use as decorator. Captures function args and return value."""
|
|
624
|
+
tool_name = self._name or fn.__name__
|
|
625
|
+
sdk = self._sdk
|
|
626
|
+
tool_call_id = self._tool_call_id
|
|
627
|
+
|
|
628
|
+
if asyncio.iscoroutinefunction(fn):
|
|
629
|
+
@functools.wraps(fn)
|
|
630
|
+
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
631
|
+
async with _ToolContext(sdk, name=tool_name, tool_call_id=tool_call_id) as ctx:
|
|
632
|
+
# Capture arguments (opt-in)
|
|
633
|
+
if sdk.capture_content and kwargs:
|
|
634
|
+
ctx._set_arguments(kwargs)
|
|
635
|
+
result = await fn(*args, **kwargs)
|
|
636
|
+
# Capture result (opt-in)
|
|
637
|
+
if sdk.capture_content and result is not None:
|
|
638
|
+
ctx._set_result(result)
|
|
639
|
+
return result
|
|
640
|
+
return wrapper
|
|
641
|
+
else:
|
|
642
|
+
@functools.wraps(fn)
|
|
643
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
644
|
+
with _ToolContext(sdk, name=tool_name, tool_call_id=tool_call_id) as ctx:
|
|
645
|
+
if sdk.capture_content and kwargs:
|
|
646
|
+
ctx._set_arguments(kwargs)
|
|
647
|
+
result = fn(*args, **kwargs)
|
|
648
|
+
if sdk.capture_content and result is not None:
|
|
649
|
+
ctx._set_result(result)
|
|
650
|
+
return result
|
|
651
|
+
return wrapper
|
|
652
|
+
|
|
653
|
+
def _start_span(self) -> None:
|
|
654
|
+
started = False
|
|
655
|
+
entered = False
|
|
656
|
+
|
|
657
|
+
def body() -> None:
|
|
658
|
+
nonlocal started, entered
|
|
659
|
+
tool_name = self._name or "tool"
|
|
660
|
+
tracer = self._sdk.get_tracer("struct-sdk")
|
|
661
|
+
self._span = tracer.start_span(
|
|
662
|
+
f"execute_tool {tool_name}",
|
|
663
|
+
kind=trace.SpanKind.INTERNAL,
|
|
664
|
+
)
|
|
665
|
+
# Required
|
|
666
|
+
self._span.set_attribute("gen_ai.operation.name", "execute_tool")
|
|
667
|
+
self._span.set_attribute("gen_ai.provider.name", "struct")
|
|
668
|
+
# Recommended
|
|
669
|
+
self._span.set_attribute("gen_ai.tool.name", tool_name)
|
|
670
|
+
# Auto-link to the originating tool_use.id from the preceding chat
|
|
671
|
+
# response if the caller didn't pass one explicitly. Explicit
|
|
672
|
+
# tool_call_id= always wins — pop from the pending queue only when
|
|
673
|
+
# the caller left it unset.
|
|
674
|
+
if self._tool_call_id is None:
|
|
675
|
+
pending = _pending_tool_calls.get()
|
|
676
|
+
if pending:
|
|
677
|
+
ids = pending.get(tool_name)
|
|
678
|
+
if ids:
|
|
679
|
+
self._tool_call_id = ids.pop(0)
|
|
680
|
+
if self._tool_call_id:
|
|
681
|
+
self._span.set_attribute("gen_ai.tool.call.id", self._tool_call_id)
|
|
682
|
+
# Propagate session context
|
|
683
|
+
session_id = _current_session_id.get(None)
|
|
684
|
+
if session_id:
|
|
685
|
+
self._span.set_attribute("gen_ai.conversation.id", session_id)
|
|
686
|
+
|
|
687
|
+
self._ctx_manager = trace.use_span(self._span, end_on_exit=False)
|
|
688
|
+
self._ctx_manager.__enter__()
|
|
689
|
+
# Tracks whether the OTel context stack was actually pushed; only
|
|
690
|
+
# then is it correct to call __exit__ on rollback.
|
|
691
|
+
entered = True
|
|
692
|
+
started = True
|
|
693
|
+
|
|
694
|
+
_safe(body, site="tool.start_span")
|
|
695
|
+
if not started:
|
|
696
|
+
# Body raised partway. Pop the OTel context stack if it was pushed,
|
|
697
|
+
# end the span if it was started, then drop references so
|
|
698
|
+
# _end_span sees a clean "no telemetry" view.
|
|
699
|
+
ctx = self._ctx_manager
|
|
700
|
+
if entered and ctx is not None:
|
|
701
|
+
_safe(lambda: ctx.__exit__(None, None, None),
|
|
702
|
+
site="tool.start_span.rollback_ctx_exit")
|
|
703
|
+
self._ctx_manager = None
|
|
704
|
+
span = self._span
|
|
705
|
+
if span is not None:
|
|
706
|
+
_safe(span.end, site="tool.start_span.rollback_span_end")
|
|
707
|
+
self._span = None
|
|
708
|
+
|
|
709
|
+
def _end_span(self, exc_val: Any = None) -> None:
|
|
710
|
+
span = self._span
|
|
711
|
+
if span is not None:
|
|
712
|
+
if exc_val:
|
|
713
|
+
_safe(lambda: span.set_attribute("error.type", type(exc_val).__name__),
|
|
714
|
+
site="tool.end_span.error_type")
|
|
715
|
+
_safe(lambda: span.set_status(StatusCode.ERROR, str(exc_val)),
|
|
716
|
+
site="tool.end_span.error_status")
|
|
717
|
+
_safe(lambda: span.record_exception(exc_val),
|
|
718
|
+
site="tool.end_span.record_exception")
|
|
719
|
+
else:
|
|
720
|
+
_safe(lambda: span.set_status(StatusCode.OK),
|
|
721
|
+
site="tool.end_span.set_ok")
|
|
722
|
+
_safe(span.end, site="tool.end_span.end")
|
|
723
|
+
ctx = self._ctx_manager
|
|
724
|
+
if ctx is not None:
|
|
725
|
+
_safe(lambda: ctx.__exit__(None, None, None), site="tool.end_span.ctx_exit")
|
|
726
|
+
|
|
727
|
+
def _set_arguments(self, kwargs: dict) -> None:
|
|
728
|
+
"""Set tool call arguments (opt-in)."""
|
|
729
|
+
if self._span:
|
|
730
|
+
try:
|
|
731
|
+
self._span.set_attribute("gen_ai.tool.call.arguments", json.dumps(kwargs, default=str)[:8192])
|
|
732
|
+
except Exception:
|
|
733
|
+
pass
|
|
734
|
+
|
|
735
|
+
def _set_result(self, result: Any) -> None:
|
|
736
|
+
"""Set tool call result (opt-in)."""
|
|
737
|
+
if self._span:
|
|
738
|
+
try:
|
|
739
|
+
self._span.set_attribute("gen_ai.tool.call.result", json.dumps(result, default=str)[:8192])
|
|
740
|
+
except Exception:
|
|
741
|
+
pass
|
|
742
|
+
|
|
743
|
+
def __enter__(self) -> "_ToolContext":
|
|
744
|
+
self._start_span()
|
|
745
|
+
return self
|
|
746
|
+
|
|
747
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
748
|
+
self._end_span(exc_val)
|
|
749
|
+
|
|
750
|
+
async def __aenter__(self) -> "_ToolContext":
|
|
751
|
+
self._start_span()
|
|
752
|
+
return self
|
|
753
|
+
|
|
754
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
755
|
+
self._end_span(exc_val)
|