struct-sdk 0.2.5__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: struct-sdk
3
- Version: 0.2.5
3
+ Version: 0.2.8
4
4
  Summary: Struct agent observability SDK — auto-instruments AI agent frameworks with OpenTelemetry
5
5
  Project-URL: Homepage, https://struct.ai
6
6
  Project-URL: Documentation, https://struct.ai/docs
@@ -87,16 +87,18 @@ struct.init(
87
87
  import anthropic
88
88
  client = anthropic.AsyncAnthropic()
89
89
 
90
+ # Decorate each tool — auto-captures arguments + result + tool_call_id.
91
+ @struct.tool()
92
+ async def search(query: str):
93
+ ...
94
+
90
95
  async with struct.agent(name="checkout"):
91
96
  msg = await client.messages.create(
92
97
  model="claude-3-5-sonnet-20241022",
93
98
  max_tokens=1024,
94
99
  messages=[{"role": "user", "content": "plan my checkout flow"}],
95
100
  )
96
-
97
- # tool_call_id is auto-filled from the preceding Anthropic response
98
- async with struct.tool(name="search"):
99
- result = await search(msg)
101
+ result = await search(query="...")
100
102
  ```
101
103
 
102
104
  ## What gets traced
@@ -207,6 +209,14 @@ struct.init(ingest_key="pk-...", service_name="checkout-agent")
207
209
  import anthropic
208
210
  client = anthropic.AsyncAnthropic()
209
211
 
212
+ # Recommended: define each tool as a function and DECORATE it. The decorator
213
+ # auto-captures the tool's arguments + result on the execute_tool span and
214
+ # auto-fills tool_call_id from the preceding Anthropic response — no manual
215
+ # bookkeeping.
216
+ @struct.tool()
217
+ async def search(query: str):
218
+ ...
219
+
210
220
  # Required: wrap the agent loop yourself.
211
221
  async with struct.agent(name="checkout"):
212
222
  msg = await client.messages.create(
@@ -214,16 +224,60 @@ async with struct.agent(name="checkout"):
214
224
  max_tokens=1024,
215
225
  messages=[...],
216
226
  )
227
+ # Dispatching a decorated tool inside the agent emits a fully-populated
228
+ # execute_tool span (name, id, arguments, result):
229
+ result = await search(query="...")
230
+ ```
217
231
 
218
- # Required: wrap each tool execution.
219
- # tool_call_id is auto-filled from the preceding Anthropic response.
220
- async with struct.tool(name="search"):
221
- result = await search(...)
232
+ For **dynamic dispatch** (the LLM picks a tool from a registry at runtime),
233
+ apply the decorator at runtime — still automatic, just bind the name when you
234
+ wrap the callable:
235
+
236
+ ```python
237
+ registry = {t.name: struct.tool(name=t.name)(t.execute) for t in tools}
238
+ result = await registry[block.name](**block.input) # arguments + result captured
222
239
  ```
223
240
 
241
+ > `struct.tool()` can also be used as a context manager
242
+ > (`async with struct.tool(name=...): ...`) to instrument an arbitrary block of
243
+ > code as a tool span. That form is a **manual escape hatch** — it does NOT
244
+ > auto-capture arguments/result (a `with` block can't see the body's return
245
+ > value), so prefer the decorator for actual tool calls. See
246
+ > [Parallel tool calls](#parallel-tool-calls--pass-tool_call_id-explicitly) for
247
+ > the one runtime value (`tool_call_id`) you must supply under concurrency.
248
+
224
249
  `anthropic.Anthropic`, `anthropic.AsyncAnthropic`, and the bedrock/vertex
225
250
  clients are all auto-instrumented for chat spans.
226
251
 
252
+ #### Parallel tool calls — pass `tool_call_id` explicitly
253
+
254
+ When you execute an assistant turn's tool calls **sequentially** — one
255
+ `await` at a time, in the order the `tool_use` blocks appear — `struct.tool()`
256
+ auto-fills `gen_ai.tool.call.id` by matching each span to the next pending
257
+ `tool_use` of the same tool name. Nothing extra to do.
258
+
259
+ When you execute them **concurrently** (e.g. `asyncio.gather`), that
260
+ name-and-order matching is ambiguous: two `struct.tool(name="search")` spans
261
+ can start in any order, so the auto-fill may attach the wrong id (and thus the
262
+ wrong arguments/result) to a call. In that case **pass `tool_call_id`
263
+ explicitly** from the originating `tool_use` block — an explicit id always
264
+ overrides the auto-linkage:
265
+
266
+ ```python
267
+ async def run_one(block):
268
+ # The id from THIS block overrides the name/order auto-fill.
269
+ async with struct.tool(name=block.name, tool_call_id=block.id):
270
+ return await dispatch(block.name, **block.input)
271
+
272
+ # Concurrent execution — each tool span still carries the correct id.
273
+ results = await asyncio.gather(*[run_one(b) for b in tool_use_blocks])
274
+ ```
275
+
276
+ Rule of thumb: **serial tool execution → automatic; concurrent tool execution
277
+ → provide `tool_call_id=` yourself.** (Auto-instrumented frameworks such as
278
+ LangChain read the id from the framework's `ToolCall`, so this only applies
279
+ when you drive the tool loop directly against an LLM SDK.)
280
+
227
281
  #### LangChain `BaseChatModel` (no agent/graph)
228
282
 
229
283
  If you call `ChatAnthropic.invoke(...)` (or any other `BaseChatModel`)
@@ -41,16 +41,18 @@ struct.init(
41
41
  import anthropic
42
42
  client = anthropic.AsyncAnthropic()
43
43
 
44
+ # Decorate each tool — auto-captures arguments + result + tool_call_id.
45
+ @struct.tool()
46
+ async def search(query: str):
47
+ ...
48
+
44
49
  async with struct.agent(name="checkout"):
45
50
  msg = await client.messages.create(
46
51
  model="claude-3-5-sonnet-20241022",
47
52
  max_tokens=1024,
48
53
  messages=[{"role": "user", "content": "plan my checkout flow"}],
49
54
  )
50
-
51
- # tool_call_id is auto-filled from the preceding Anthropic response
52
- async with struct.tool(name="search"):
53
- result = await search(msg)
55
+ result = await search(query="...")
54
56
  ```
55
57
 
56
58
  ## What gets traced
@@ -161,6 +163,14 @@ struct.init(ingest_key="pk-...", service_name="checkout-agent")
161
163
  import anthropic
162
164
  client = anthropic.AsyncAnthropic()
163
165
 
166
+ # Recommended: define each tool as a function and DECORATE it. The decorator
167
+ # auto-captures the tool's arguments + result on the execute_tool span and
168
+ # auto-fills tool_call_id from the preceding Anthropic response — no manual
169
+ # bookkeeping.
170
+ @struct.tool()
171
+ async def search(query: str):
172
+ ...
173
+
164
174
  # Required: wrap the agent loop yourself.
165
175
  async with struct.agent(name="checkout"):
166
176
  msg = await client.messages.create(
@@ -168,16 +178,60 @@ async with struct.agent(name="checkout"):
168
178
  max_tokens=1024,
169
179
  messages=[...],
170
180
  )
181
+ # Dispatching a decorated tool inside the agent emits a fully-populated
182
+ # execute_tool span (name, id, arguments, result):
183
+ result = await search(query="...")
184
+ ```
171
185
 
172
- # Required: wrap each tool execution.
173
- # tool_call_id is auto-filled from the preceding Anthropic response.
174
- async with struct.tool(name="search"):
175
- result = await search(...)
186
+ For **dynamic dispatch** (the LLM picks a tool from a registry at runtime),
187
+ apply the decorator at runtime — still automatic, just bind the name when you
188
+ wrap the callable:
189
+
190
+ ```python
191
+ registry = {t.name: struct.tool(name=t.name)(t.execute) for t in tools}
192
+ result = await registry[block.name](**block.input) # arguments + result captured
176
193
  ```
177
194
 
195
+ > `struct.tool()` can also be used as a context manager
196
+ > (`async with struct.tool(name=...): ...`) to instrument an arbitrary block of
197
+ > code as a tool span. That form is a **manual escape hatch** — it does NOT
198
+ > auto-capture arguments/result (a `with` block can't see the body's return
199
+ > value), so prefer the decorator for actual tool calls. See
200
+ > [Parallel tool calls](#parallel-tool-calls--pass-tool_call_id-explicitly) for
201
+ > the one runtime value (`tool_call_id`) you must supply under concurrency.
202
+
178
203
  `anthropic.Anthropic`, `anthropic.AsyncAnthropic`, and the bedrock/vertex
179
204
  clients are all auto-instrumented for chat spans.
180
205
 
206
+ #### Parallel tool calls — pass `tool_call_id` explicitly
207
+
208
+ When you execute an assistant turn's tool calls **sequentially** — one
209
+ `await` at a time, in the order the `tool_use` blocks appear — `struct.tool()`
210
+ auto-fills `gen_ai.tool.call.id` by matching each span to the next pending
211
+ `tool_use` of the same tool name. Nothing extra to do.
212
+
213
+ When you execute them **concurrently** (e.g. `asyncio.gather`), that
214
+ name-and-order matching is ambiguous: two `struct.tool(name="search")` spans
215
+ can start in any order, so the auto-fill may attach the wrong id (and thus the
216
+ wrong arguments/result) to a call. In that case **pass `tool_call_id`
217
+ explicitly** from the originating `tool_use` block — an explicit id always
218
+ overrides the auto-linkage:
219
+
220
+ ```python
221
+ async def run_one(block):
222
+ # The id from THIS block overrides the name/order auto-fill.
223
+ async with struct.tool(name=block.name, tool_call_id=block.id):
224
+ return await dispatch(block.name, **block.input)
225
+
226
+ # Concurrent execution — each tool span still carries the correct id.
227
+ results = await asyncio.gather(*[run_one(b) for b in tool_use_blocks])
228
+ ```
229
+
230
+ Rule of thumb: **serial tool execution → automatic; concurrent tool execution
231
+ → provide `tool_call_id=` yourself.** (Auto-instrumented frameworks such as
232
+ LangChain read the id from the framework's `ToolCall`, so this only applies
233
+ when you drive the tool loop directly against an LLM SDK.)
234
+
181
235
  #### LangChain `BaseChatModel` (no agent/graph)
182
236
 
183
237
  If you call `ChatAnthropic.invoke(...)` (or any other `BaseChatModel`)
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "struct-sdk"
7
- version = "0.2.5"
7
+ version = "0.2.8"
8
8
  description = "Struct agent observability SDK — auto-instruments AI agent frameworks with OpenTelemetry"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -72,6 +72,11 @@ override-dependencies = [
72
72
  "starlette>=1.3.1",
73
73
  ]
74
74
 
75
+ [tool.pytest.ini_options]
76
+ markers = [
77
+ "integration: real-model integration tests (requires ANTHROPIC_API_KEY / OPENAI_API_KEY; skipped in default CI)",
78
+ ]
79
+
75
80
  [tool.mypy]
76
81
  [[tool.mypy.overrides]]
77
82
  module = ["anthropic", "anthropic.*", "claude_agent_sdk", "claude_agent_sdk.*", "langchain_core", "langchain_core.*", "langchain", "langchain.*", "langgraph", "langgraph.*"]
@@ -119,44 +119,23 @@ def _create_common(
119
119
 
120
120
  Two paths:
121
121
 
122
- 1. **Enrich** — when ``_current_langchain_chat_span`` is set, this call
123
- is happening underneath a LangChain handler that's already created
124
- a ``chat <model>`` span. We do NOT create our own span (that's the
125
- duplicate-Anthropic-spans issue). Instead we attach HTTP-layer
126
- attrs (the real provider msg_id, exact response_model, usage,
127
- finish_reasons, error info on failure) onto the langchain span.
128
- Pre-call attrs are skipped LangChain already set them.
129
-
130
- 2. **Standalone** — no LangChain in the picture. Create our own span
131
- and set the full attribute set as before.
122
+ 1. **Suppressed** — when ``is_genai_suppressed()`` is True, a framework
123
+ layer (e.g. the LangChain callback handler) already owns a ``chat
124
+ <model>`` span for this call. We run the original call to completion
125
+ and emit NO span — avoiding the duplicate-Anthropic-spans problem.
126
+
127
+ 2. **Standalone** no framework suppression in the picture. Create our
128
+ own span and set the full attribute set as before.
132
129
  """
133
- from struct_sdk.core import _safe, _current_langchain_chat_span
130
+ from struct_sdk.core import _safe, is_genai_suppressed
134
131
 
135
132
  model = kwargs.get("model", "unknown")
136
133
 
137
- # Enrich path: a LangChain handler upstream already created a ``chat
138
- # <model>`` span for this call. Attach Anthropic HTTP-layer detail to it
139
- # without creating a duplicate span.
140
- host_span = _current_langchain_chat_span.get(None)
141
- if host_span is not None:
142
- try:
143
- result = yield f, args, kwargs
144
- except Exception as e:
145
- # Capture the type name OUTSIDE the lambda — ``except X as e``
146
- # binds ``e`` only for the duration of the except block, but
147
- # ``_safe`` is opaque to static analysis (ruff flags F841 +
148
- # F821 thinking the lambda outlives the binding). Snapshotting
149
- # to a local makes the closure capture trivially correct.
150
- err_type = type(e).__name__
151
- _safe(
152
- lambda: host_span.set_attribute("error.type", err_type),
153
- site="anthropic.create.enrich.error_type",
154
- )
155
- raise
156
- _safe(
157
- lambda: _set_response_attrs(host_span, sdk, model, result, otel_logger),
158
- site="anthropic.create.enrich.set_response_attrs",
159
- )
134
+ # Suppression path: a framework layer (LangChain handler) already owns the
135
+ # ``chat <model>`` span for this call. Run the original call to completion
136
+ # and emit NO span — the framework's span covers this invocation.
137
+ if is_genai_suppressed():
138
+ result = yield f, args, kwargs
160
139
  return result # noqa: B901
161
140
 
162
141
  with tracer.start_as_current_span(
@@ -326,14 +305,12 @@ def _wrap_stream(original: Any, tracer: trace.Tracer, sdk: StructSDK, otel_logge
326
305
  if is_async:
327
306
  @functools.wraps(original)
328
307
  async def wrapper(*args: Any, **kwargs: Any) -> Any:
329
- from struct_sdk.core import _safe, _current_session_id, _current_langchain_chat_span
308
+ from struct_sdk.core import _safe, _current_session_id, is_genai_suppressed
330
309
  model = kwargs.get("model", "unknown")
331
310
 
332
- # Enrich path: a LangChain handler upstream already owns a chat
333
- # span for this call. Don't create a duplicate; just pass through.
334
- # (Stream end-handling will set response attrs on the host span
335
- # when the LangChain handler's on_llm_end fires.)
336
- if _current_langchain_chat_span.get(None) is not None:
311
+ # Suppression path: a framework layer (LangChain handler) already
312
+ # owns the chat span. Don't create a duplicate; pass through.
313
+ if is_genai_suppressed():
337
314
  return await original(*args, **kwargs) if _is_coroutine(original) else original(*args, **kwargs)
338
315
 
339
316
  span: Optional[trace.Span] = None
@@ -378,12 +355,12 @@ def _wrap_stream(original: Any, tracer: trace.Tracer, sdk: StructSDK, otel_logge
378
355
  else:
379
356
  @functools.wraps(original)
380
357
  def wrapper(*args: Any, **kwargs: Any) -> Any:
381
- from struct_sdk.core import _safe, _current_session_id, _current_langchain_chat_span
358
+ from struct_sdk.core import _safe, _current_session_id, is_genai_suppressed
382
359
  model = kwargs.get("model", "unknown")
383
360
 
384
- # Enrich path: a LangChain handler upstream already owns a chat
385
- # span for this call. Don't create a duplicate; just pass through.
386
- if _current_langchain_chat_span.get(None) is not None:
361
+ # Suppression path: a framework layer (LangChain handler) already
362
+ # owns the chat span. Don't create a duplicate; pass through.
363
+ if is_genai_suppressed():
387
364
  return original(*args, **kwargs)
388
365
 
389
366
  span: Optional[trace.Span] = None
@@ -552,7 +529,7 @@ def _emit_message_events(
552
529
  etc.) — human-readable signal.
553
530
  - ``attributes['body']`` (log record attribute): the JSON-serialised
554
531
  structured payload ``{"role": ..., "parts": [...]}``.
555
- - Other attributes: ``event.name``, ``gen_ai.system``,
532
+ - Other attributes: ``event.name``, ``gen_ai.provider.name``,
556
533
  ``gen_ai.message.index``, ``gen_ai.conversation.id``.
557
534
 
558
535
  ``span`` — if provided, its span context is used for the LogRecord's
@@ -585,7 +562,7 @@ def _emit_message_events(
585
562
  attrs: dict[str, Any] = {
586
563
  "event.name": event_name,
587
564
  "body": payload,
588
- "gen_ai.system": "anthropic",
565
+ "gen_ai.provider.name": "anthropic",
589
566
  "gen_ai.message.index": msg_index,
590
567
  }
591
568
  if session_id:
@@ -618,7 +595,7 @@ def _emit_message_events(
618
595
  attrs = {
619
596
  "event.name": event_name,
620
597
  "body": payload,
621
- "gen_ai.system": "anthropic",
598
+ "gen_ai.provider.name": "anthropic",
622
599
  "gen_ai.message.index": msg_index,
623
600
  }
624
601
  if session_id:
@@ -696,7 +673,7 @@ def _emit_choice_event(
696
673
  attrs: dict[str, Any] = {
697
674
  "event.name": event_name,
698
675
  "body": payload,
699
- "gen_ai.system": "anthropic",
676
+ "gen_ai.provider.name": "anthropic",
700
677
  }
701
678
  if session_id:
702
679
  attrs["gen_ai.conversation.id"] = session_id
@@ -9,6 +9,7 @@ OTel GenAI Semantic Conventions v1.37+ compliant.
9
9
  import asyncio
10
10
  import atexit
11
11
  import contextvars
12
+ from contextvars import Token
12
13
  import functools
13
14
  import json
14
15
  import logging
@@ -16,8 +17,12 @@ import threading
16
17
  import uuid
17
18
  from enum import Enum
18
19
  from typing import Any, Callable, Optional
20
+ from importlib.metadata import version as _pkg_version
19
21
 
22
+ from opentelemetry import context as _otel_context
20
23
  from opentelemetry import trace
24
+ from opentelemetry.context import create_key
25
+ from opentelemetry.context.context import Context as _OtelContext
21
26
  from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
22
27
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
23
28
  from opentelemetry.sdk._logs import LoggerProvider
@@ -29,6 +34,11 @@ from opentelemetry.trace import StatusCode
29
34
 
30
35
  logger = logging.getLogger("struct_sdk")
31
36
 
37
+ try:
38
+ _SDK_VERSION = _pkg_version("struct-sdk")
39
+ except Exception: # noqa: BLE001
40
+ _SDK_VERSION = "0.0.0+local"
41
+
32
42
  DEFAULT_ENDPOINT = "https://ingest.struct.ai"
33
43
 
34
44
 
@@ -46,27 +56,11 @@ class ContentCaptureMode(str, Enum):
46
56
  _current_session_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar("_current_session_id", default=None)
47
57
  _current_conversation_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar("_current_conversation_id", default=None)
48
58
  _current_agent_span: contextvars.ContextVar[Optional[trace.Span]] = contextvars.ContextVar("_current_agent_span", default=None)
49
-
50
- # When the LangChain handler is creating a ``chat <model>`` span for an LLM
51
- # call that LangChain will dispatch through a provider SDK (anthropic,
52
- # openai, etc.) that we ALSO instrument, set this contextvar to the
53
- # in-progress langchain chat span. Provider-SDK instrumentations check it
54
- # at the top of their ``messages.create`` / equivalent wrapper:
55
- #
56
- # - If set: enrich the existing langchain span with HTTP-layer attributes
57
- # (real provider response.id, exact retries, rate-limit headers, etc.)
58
- # and SKIP creating their own span — there's already a span for this
59
- # call, we just want to attach more data to it.
60
- #
61
- # - If not set: this is a standalone provider-SDK invocation (no LangChain
62
- # in the picture); the provider instrumentation creates its own span as
63
- # usual.
64
- #
65
- # This eliminates the duplicate-span / orphan-Anthropic-span problem while
66
- # preserving both layers' data on a single span.
67
- _current_langchain_chat_span: contextvars.ContextVar[Optional[trace.Span]] = contextvars.ContextVar(
68
- "_current_langchain_chat_span", default=None
69
- )
59
+ # Set by struct.agent() when it creates the top-level invoke_agent span. The
60
+ # LangChain handler checks this in on_chain_start: when a manual agent already
61
+ # owns the scope (parent_run_id is None), the handler suppresses its own
62
+ # invoke_agent span and instead parents its children to this span.
63
+ _manual_agent_active: contextvars.ContextVar[Optional[trace.Span]] = contextvars.ContextVar("_manual_agent_active", default=None)
70
64
 
71
65
  # Pending tool_use ids keyed by tool name (FIFO per name).
72
66
  # Populated by the Anthropic monkey-patch when a chat response arrives with
@@ -77,6 +71,53 @@ _current_langchain_chat_span: contextvars.ContextVar[Optional[trace.Span]] = con
77
71
  # working unchanged (explicit override wins).
78
72
  _pending_tool_calls: contextvars.ContextVar[Optional[dict[str, list[str]]]] = contextvars.ContextVar("_pending_tool_calls", default=None)
79
73
 
74
+ # ---------------------------------------------------------------------------
75
+ # OTel GenAI suppression key
76
+ # ---------------------------------------------------------------------------
77
+ #
78
+ # When the LangChain callback handler owns a ``chat <model>`` span for an LLM
79
+ # call that also flows through a provider SDK (anthropic, etc.) that we
80
+ # instrument, the handler attaches this key to the OTel context for the
81
+ # duration of the call. The provider-SDK wrapper checks ``is_genai_suppressed()``
82
+ # at entry and, when True, runs the original call to completion WITHOUT creating
83
+ # a duplicate span. On exit the handler detaches the key via ``reset_genai()``.
84
+ #
85
+ # This replaces the fragile ``_current_langchain_chat_span`` enrich contextvar
86
+ # (which required the provider wrapper to set attributes on a span it didn't own,
87
+ # and whose contextvar token could raise ``ValueError: Token created in a different
88
+ # Context`` when detached from a different async context).
89
+ _GENAI_SUPPRESS_KEY = create_key("struct.suppress_genai")
90
+
91
+
92
+ def is_genai_suppressed() -> bool:
93
+ """Return True if a framework layer (e.g. LangChain handler) already owns
94
+ the chat span for the current call — provider SDK patches should skip
95
+ creating their own span."""
96
+ return bool(_otel_context.get_value(_GENAI_SUPPRESS_KEY))
97
+
98
+
99
+ def suppress_genai_token() -> Token[_OtelContext]:
100
+ """Attach the suppression key to the current OTel context.
101
+
102
+ Returns an opaque token that MUST be passed to ``reset_genai()`` when the
103
+ suppression window ends. Follows the same attach/detach contract as
104
+ ``opentelemetry.context.attach`` / ``detach``.
105
+ """
106
+ return _otel_context.attach(_otel_context.set_value(_GENAI_SUPPRESS_KEY, True))
107
+
108
+
109
+ def reset_genai(token: Token[_OtelContext]) -> None:
110
+ """Detach the suppression key token.
111
+
112
+ Tolerant of cross-context detach (e.g. async tasks that detach from a
113
+ different context than the one that attached): the exception is swallowed
114
+ so instrumentation never fails the host call.
115
+ """
116
+ try:
117
+ _otel_context.detach(token)
118
+ except Exception: # noqa: BLE001 — cross-context detach is a no-op, never fail the host
119
+ pass
120
+
80
121
  # Registry of patched integrations — prevents double-patching
81
122
  _patched_integrations: set[str] = set()
82
123
 
@@ -264,7 +305,7 @@ class StructSDK:
264
305
  """Get an OTel tracer from our isolated provider."""
265
306
  if self._tracer_provider is None:
266
307
  raise RuntimeError("Call struct.init() before using the SDK")
267
- return self._tracer_provider.get_tracer(name)
308
+ return self._tracer_provider.get_tracer(name, _SDK_VERSION)
268
309
 
269
310
  def get_logger(self, name: str = "struct-sdk") -> Any:
270
311
  """Get an OTel logger from our isolated provider (for gen_ai log events)."""
@@ -439,7 +480,13 @@ class _AgentContext:
439
480
  def __init__(self, sdk: StructSDK, *, name: Optional[str] = None, session_id: Optional[str] = None, agent_id: Optional[str] = None, version: Optional[str] = None, metadata: Optional[dict[str, str]] = None):
440
481
  self._sdk = sdk
441
482
  self._name = name
442
- self._session_id = session_id or str(uuid.uuid4())
483
+ # _explicit_session_id is the CALLER-SUPPLIED value (may be None).
484
+ # The resolved self._session_id is computed in _start_span after we can
485
+ # read the ambient _current_session_id — this lets us inherit the
486
+ # enclosing agent's id when the caller did not supply one, and defer the
487
+ # "mint a fresh UUID" case until start-span time.
488
+ self._explicit_session_id: Optional[str] = session_id
489
+ self._session_id: str = session_id or "" # placeholder; overwritten in _start_span
443
490
  self._agent_id = agent_id
444
491
  self._version = version
445
492
  self._metadata = metadata
@@ -449,12 +496,16 @@ class _AgentContext:
449
496
  self._conversation_token: Optional[contextvars.Token[Optional[str]]] = None
450
497
  self._agent_span_token: Optional[contextvars.Token[Optional[trace.Span]]] = None
451
498
  self._pending_tool_token: Optional[contextvars.Token[Optional[dict[str, list[str]]]]] = None
499
+ self._manual_token: Optional[contextvars.Token[Optional[trace.Span]]] = None
452
500
 
453
501
  def __call__(self, fn: Any) -> Any:
454
502
  """Use as decorator."""
455
503
  span_name = self._name or fn.__name__
456
504
  sdk = self._sdk
457
- session_id = self._session_id
505
+ # Preserve the CALLER-SUPPLIED value so each invocation resolves the
506
+ # ambient session fresh (rather than baking in the UUID minted at
507
+ # decoration time).
508
+ explicit_session_id = self._explicit_session_id
458
509
  agent_id = self._agent_id
459
510
  version = self._version
460
511
  metadata = self._metadata
@@ -462,13 +513,13 @@ class _AgentContext:
462
513
  if asyncio.iscoroutinefunction(fn):
463
514
  @functools.wraps(fn)
464
515
  async def wrapper(*args: Any, **kwargs: Any) -> Any:
465
- async with _AgentContext(sdk, name=span_name, session_id=session_id, agent_id=agent_id, version=version, metadata=metadata):
516
+ async with _AgentContext(sdk, name=span_name, session_id=explicit_session_id, agent_id=agent_id, version=version, metadata=metadata):
466
517
  return await fn(*args, **kwargs)
467
518
  return wrapper
468
519
  else:
469
520
  @functools.wraps(fn)
470
521
  def wrapper(*args: Any, **kwargs: Any) -> Any:
471
- with _AgentContext(sdk, name=span_name, session_id=session_id, agent_id=agent_id, version=version, metadata=metadata):
522
+ with _AgentContext(sdk, name=span_name, session_id=explicit_session_id, agent_id=agent_id, version=version, metadata=metadata):
472
523
  return fn(*args, **kwargs)
473
524
  return wrapper
474
525
 
@@ -481,17 +532,92 @@ class _AgentContext:
481
532
  agent_name = self._name or "agent"
482
533
  tracer = self._sdk.get_tracer("struct-sdk")
483
534
 
484
- # Capture the outer session id BEFORE overwriting the contextvar so we
485
- # can link nested agents (subagents) back to the agent that spawned them.
486
- # Subagent pattern: an outer @struct.agent() wraps a function; that function
487
- # calls a tool that itself enters another @struct.agent() scope. The inner
488
- # scope's struct.agent.parent_session_id points to the outer session_id.
489
- parent_session_id = _current_session_id.get(None)
535
+ # Capture the enclosing agent's session id and span BEFORE we overwrite
536
+ # the contextvars. These are used to:
537
+ # 1. Detect whether this agent is a break-out (explicit, different id).
538
+ # 2. Attach a spawned-by OTel Link if it is.
539
+ # 3. Set struct.agent.parent_session_id for the UI affordance.
540
+ enclosing_session_id = _current_session_id.get(None)
541
+ enclosing_agent_span = _current_agent_span.get(None)
542
+
543
+ # ── Resolve session_id (REVISION R1 grouping model) ──────────────
544
+ # Resolution order:
545
+ # explicit caller arg > ambient (enclosing agent) > fresh UUID
546
+ # A caller-supplied None means "inherit"; a caller-supplied value
547
+ # that equals the enclosing id also means "inline".
548
+ if self._explicit_session_id is not None:
549
+ self._session_id = self._explicit_session_id
550
+ elif enclosing_session_id is not None:
551
+ # No explicit id → inherit the enclosing agent's session (inline).
552
+ self._session_id = enclosing_session_id
553
+ else:
554
+ # No ambient context → mint a fresh id for this root agent.
555
+ self._session_id = str(uuid.uuid4())
556
+
557
+ # ── Break-out detection ──────────────────────────────────────────
558
+ # Condition: caller supplied an EXPLICIT id AND it differs from the
559
+ # enclosing agent's id AND there IS an enclosing agent span.
560
+ # In that case this agent starts a new root trace (no OTel parent)
561
+ # and carries a Link back to the enclosing span.
562
+ break_out = (
563
+ self._explicit_session_id is not None
564
+ and enclosing_session_id is not None
565
+ and self._explicit_session_id != enclosing_session_id
566
+ and enclosing_agent_span is not None
567
+ )
490
568
 
491
- self._span = tracer.start_span(
492
- f"invoke_agent {agent_name}",
493
- kind=trace.SpanKind.INTERNAL,
569
+ # ── Foreign-context guard (top-level agent run) ──────────────────
570
+ # A top-level agent run (no enclosing STRUCT agent) must NOT inherit
571
+ # whatever OTel span happens to be active. That active span may be a
572
+ # FOREIGN span leaked across an async boundary — e.g. a tool span from
573
+ # a PRIOR turn that a Temporal/queue context propagator carried into
574
+ # this later wake-up. Inheriting it would mis-parent this brand-new
575
+ # turn UNDER unrelated, long-finished work (it would show up nested in
576
+ # that old tool call instead of as its own turn).
577
+ #
578
+ # So: when there is no enclosing Struct agent but some other span is
579
+ # active, start a FRESH ROOT trace and record that active span as a
580
+ # causal OTel Link (preserving "this turn was triggered by that") —
581
+ # never as the parent.
582
+ #
583
+ # Genuine in-run sub-agents are unaffected: they always run with an
584
+ # enclosing Struct agent in scope (``enclosing_session_id`` set, because
585
+ # the contextvar is live in the same task), so they fall through to the
586
+ # inherit path below and stay nested in the same trace.
587
+ active_span_context = trace.get_current_span().get_span_context()
588
+ foreign_root = (
589
+ not break_out
590
+ and enclosing_session_id is None
591
+ and active_span_context.is_valid
494
592
  )
593
+
594
+ if break_out:
595
+ # Start a fresh root span: pass context=trace.Context() to create
596
+ # a span with no parent (new TraceId) while keeping the current
597
+ # context vars readable for the span's children.
598
+ assert enclosing_agent_span is not None # narrowing for mypy
599
+ links = [trace.Link(enclosing_agent_span.get_span_context())]
600
+ self._span = tracer.start_span(
601
+ f"invoke_agent {agent_name}",
602
+ kind=trace.SpanKind.INTERNAL,
603
+ context=_OtelContext(), # empty context → new root trace
604
+ links=links,
605
+ )
606
+ elif foreign_root:
607
+ # New root trace; the leaked/foreign active span becomes a Link
608
+ # (causal origin), NOT this turn's parent.
609
+ self._span = tracer.start_span(
610
+ f"invoke_agent {agent_name}",
611
+ kind=trace.SpanKind.INTERNAL,
612
+ context=_OtelContext(), # empty context → new root trace
613
+ links=[trace.Link(active_span_context)],
614
+ )
615
+ else:
616
+ self._span = tracer.start_span(
617
+ f"invoke_agent {agent_name}",
618
+ kind=trace.SpanKind.INTERNAL,
619
+ )
620
+
495
621
  # Required
496
622
  self._span.set_attribute("gen_ai.operation.name", "invoke_agent")
497
623
  self._span.set_attribute("gen_ai.provider.name", "struct")
@@ -508,8 +634,25 @@ class _AgentContext:
508
634
  # redundant session.id.
509
635
  self._span.set_attribute("gen_ai.conversation.id", self._session_id)
510
636
  # Link to the outer agent's session, if we're nested under one.
511
- if parent_session_id and parent_session_id != self._session_id:
512
- self._span.set_attribute("struct.agent.parent_session_id", parent_session_id)
637
+ # For break-out agents: the parent is the enclosing agent.
638
+ # For inline nested agents: parent is the same session (same id).
639
+ # ``struct.agent.parent_session_id`` is a SPAWNED-BY marker — set it
640
+ # ONLY when this agent broke out into its own root session. For an
641
+ # inline subagent (same session as the enclosing agent) the parent
642
+ # relationship is already encoded by the OTel span tree
643
+ # (ParentSpanId), so stamping parent_session_id = own-session-id would
644
+ # be self-referential noise. Structure comes from the tree/Link, not
645
+ # this attr (Link-canonical decision).
646
+ if break_out:
647
+ # parent_session_id is the spawner's session (enclosing_session_id).
648
+ # break_out=True implies enclosing_session_id is not None (see condition above).
649
+ assert enclosing_session_id is not None
650
+ self._span.set_attribute("struct.agent.parent_session_id", enclosing_session_id)
651
+ elif enclosing_session_id is not None and enclosing_session_id != self._session_id:
652
+ # Legacy path: enclosing session exists with a DIFFERENT id but no
653
+ # enclosing span (break_out was False). Records the cross-session
654
+ # parent. Same-session inline subagents fall through with nothing.
655
+ self._span.set_attribute("struct.agent.parent_session_id", enclosing_session_id)
513
656
  # Custom metadata
514
657
  if self._metadata:
515
658
  for key, value in self._metadata.items():
@@ -529,6 +672,11 @@ class _AgentContext:
529
672
  # Fresh pending-tool-calls dict scoped to this agent run, so tool_use
530
673
  # ids from an outer agent cannot leak in or out.
531
674
  self._pending_tool_token = _pending_tool_calls.set({})
675
+ # Signal to the LangChain handler that a manual struct.agent() owns
676
+ # this scope. The handler will suppress its own invoke_agent span for
677
+ # the same top-level chain (parent_run_id is None) and parent its
678
+ # children under this span instead.
679
+ self._manual_token = _manual_agent_active.set(self._span)
532
680
  started = True
533
681
 
534
682
  _safe(body, site="agent.start_span")
@@ -537,6 +685,11 @@ class _AgentContext:
537
685
  # _end_span see a clean "no telemetry" view: tokens are reset
538
686
  # best-effort, the OTel context stack is popped if it was pushed,
539
687
  # the span is ended if it was started, and references are dropped.
688
+ manual_tok = self._manual_token
689
+ if manual_tok is not None:
690
+ _safe(lambda: _manual_agent_active.reset(manual_tok),
691
+ site="agent.start_span.reset_manual")
692
+ self._manual_token = None
540
693
  pending_tok = self._pending_tool_token
541
694
  if pending_tok is not None:
542
695
  _safe(lambda: _pending_tool_calls.reset(pending_tok),
@@ -573,6 +726,10 @@ class _AgentContext:
573
726
  def _end_span(self, exc_val: Any = None) -> None:
574
727
  # Contextvar resets must always run — they're cheap, can't fault on the
575
728
  # span, and leaving them set leaks session context into the caller.
729
+ manual_tok = self._manual_token
730
+ if manual_tok is not None:
731
+ _safe(lambda: _manual_agent_active.reset(manual_tok),
732
+ site="agent.exit.manual_reset")
576
733
  pending_tok = self._pending_tool_token
577
734
  if pending_tok is not None:
578
735
  _safe(lambda: _pending_tool_calls.reset(pending_tok),
@@ -107,6 +107,7 @@ def patch(sdk: StructSDK) -> None:
107
107
  inheritable_callbacks = _inject_handler(
108
108
  inheritable_callbacks, _active_handler
109
109
  )
110
+ local_callbacks = _strip_struct(local_callbacks)
110
111
  return orig_func(
111
112
  cls,
112
113
  inheritable_callbacks,
@@ -153,6 +154,19 @@ def _build_handler(sdk: StructSDK) -> "StructCallbackHandler":
153
154
  )
154
155
 
155
156
 
157
+ def _strip_struct(cbs: Any) -> Any:
158
+ """Remove any handler named 'struct' from a local callbacks list.
159
+
160
+ Called by the configure wrapper after injecting our handler as inheritable,
161
+ so a user-supplied ``config={"callbacks": [get_langchain_handler()]}`` does
162
+ not end up with the handler in BOTH the inheritable and local lists — which
163
+ would cause every ``on_*`` callback to fire twice.
164
+ """
165
+ if isinstance(cbs, list):
166
+ return [h for h in cbs if getattr(h, "name", None) != "struct"]
167
+ return cbs
168
+
169
+
156
170
  def _inject_handler(existing: Any, handler: Optional["StructCallbackHandler"]) -> Any:
157
171
  """Merge our handler into the inheritable_handlers argument, with de-dup."""
158
172
  if handler is None:
@@ -308,6 +322,24 @@ _INTERNAL_RUN_NAME_PREFIXES = (
308
322
  _THREAD_KEYS: tuple[str, ...] = ("thread_id", "session_id", "conversation_id")
309
323
 
310
324
 
325
+ def _checkpoint_ns(metadata: Optional[dict[str, Any]]) -> Optional[str]:
326
+ """LangGraph stamps a unique ``langgraph_checkpoint_ns`` (``tools:<uuid>``)
327
+ on each tool-call branch, and the SAME value on the sub-agent graph that the
328
+ tool triggers — even across parallel same-named tool calls. We use it to
329
+ re-parent a sub-agent's ``invoke_agent`` span under its triggering
330
+ ``execute_tool`` span. Returns the namespace, or ``None`` if absent.
331
+ """
332
+ # Guard on ``isinstance`` (not just truthiness): LangChain normally passes a
333
+ # dict, but a truthy non-dict would make ``.get`` raise. This runs inside the
334
+ # callback handler in the customer's process, so it must never throw.
335
+ if not isinstance(metadata, dict):
336
+ return None
337
+ ns = metadata.get("langgraph_checkpoint_ns")
338
+ if isinstance(ns, str) and ns:
339
+ return ns
340
+ return None
341
+
342
+
311
343
  def _metadata_thread_id(metadata: Optional[dict[str, Any]]) -> Optional[str]:
312
344
  """Pull the conversation/thread id from a LangChain ``metadata`` dict.
313
345
 
@@ -536,25 +568,17 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
536
568
  in favour of the GenAI-spec name.
537
569
 
538
570
  For SUBAGENTS (an agent invoked from inside another's tool body) we
539
- deliberately assign a DIFFERENT ``conversation.id`` either the
540
- subagent's own ``thread_id`` if supplied, or a fresh UUID. The resulting
541
- subagent span is linked to the outer agent via our
542
- ``struct.agent.parent_session_id`` attribute (what powers "Spawned by"
543
- navigation in the UI). Without this split, subagent spans would collapse
544
- into the outer session and hide delegation.
545
-
546
- LangChain quirk (handled automatically): when ``agent.invoke(...)`` runs
547
- nested inside a parent call, LangChain's config-merge inherits the
548
- parent's ``metadata.thread_id`` onto the child — even if the child
549
- config supplied its own. We detect that by comparing against the
550
- nearest agent ancestor's session; if they match, treat as "inherited,
551
- not user-intended" and assign a fresh UUID.
571
+ INHERIT the parent's ``gen_ai.conversation.id`` so that the entire run
572
+ shares one id. If a subagent supplies its own ``thread_id`` in metadata,
573
+ that value is recorded as the non-grouping ``struct.agent.thread_id``
574
+ attribute it is NOT used as the conversation grouping key. The
575
+ structural parent→child relationship is recorded via
576
+ ``struct.agent.parent_session_id``.
552
577
 
553
578
  End-user guidance:
554
579
  * Use thread_id per conversation; multi-turn chats reuse it.
555
- * For a subagent call, pass a DIFFERENT thread_id (or omit it and let
556
- LangGraph generate one). Subagents then surface as their own
557
- sessions in the UI, linked back via parent_session_id.
580
+ * A subagent's thread_id (if any) is preserved as ``struct.agent.thread_id``
581
+ and does not split the run into a new session.
558
582
  """
559
583
 
560
584
  name = "struct"
@@ -573,6 +597,13 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
573
597
  self._tracer = tracer
574
598
  self._logger = otel_logger
575
599
  self._runs: dict[str, _RunState] = {}
600
+ # LangChain agent-as-tool correlation: index live ``execute_tool`` spans
601
+ # by their ``langgraph_checkpoint_ns`` so a sub-agent graph triggered
602
+ # inside a tool (a SIBLING in the run tree, sharing that exact ns) can
603
+ # re-parent its ``invoke_agent`` span under the tool. ``_tool_ns_by_run``
604
+ # lets on_tool_end / on_tool_error remove the index entry.
605
+ self._tool_spans_by_ns: dict[str, trace.Span] = {}
606
+ self._tool_ns_by_run: dict[str, str] = {}
576
607
 
577
608
  # ── Chain / Agent ───────────────────────────────────────────────────────
578
609
 
@@ -616,9 +647,38 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
616
647
  session_id = self._resolve_agent_session_id(metadata, parent_agent_session_id)
617
648
  parent = self._resolve_parent(parent_key)
618
649
 
650
+ # Suppress twin invoke_agent: when struct.agent() already owns this top-level
651
+ # run (parent_key is None, manual ownership contextvar is set), record the
652
+ # run pointing at the manual span so descendants parent under it — but emit
653
+ # NO new invoke_agent span.
654
+ if parent_key is None:
655
+ from struct_sdk.core import _manual_agent_active, _current_session_id
656
+ manual_span = _manual_agent_active.get(None)
657
+ if manual_span is not None:
658
+ manual_session = _current_session_id.get(None) or session_id
659
+ self._runs[key] = _RunState(
660
+ span=manual_span,
661
+ effective_parent_span=manual_span,
662
+ session_id=manual_session,
663
+ nearest_agent_session_id=manual_session,
664
+ nearest_agent_span=manual_span,
665
+ kind="suppressed-twin",
666
+ )
667
+ return
668
+
619
669
  agent_name: str = "agent"
620
670
  span: Optional[trace.Span] = None
621
671
 
672
+ # LangChain agent-as-tool: a sub-agent graph runs as a SIBLING of its
673
+ # triggering execute_tool (parent_run_id points at the ToolNode, not the
674
+ # tool), so normal resolution would emit this invoke_agent as a sibling.
675
+ # But LangGraph stamps the tool branch and its sub-agent with the same
676
+ # ``langgraph_checkpoint_ns`` — unique even across parallel same-named
677
+ # calls — so re-parent under the matching live execute_tool span to nest
678
+ # natively (the UI's direct-tool-child path). Falls back to the resolved
679
+ # parent when there is no tool match (native struct-sdk / non-tool chains).
680
+ delegating_tool_span = self._tool_spans_by_ns.get(_checkpoint_ns(metadata) or "")
681
+
622
682
  def create_span() -> None:
623
683
  nonlocal span, agent_name
624
684
  agent_name = (
@@ -627,7 +687,8 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
627
687
  or (inputs.get("name") if isinstance(inputs, dict) else None)
628
688
  or "agent"
629
689
  )
630
- parent_ctx = trace.set_span_in_context(parent.span) if parent.span else None
690
+ ctx_span = delegating_tool_span or parent.span
691
+ parent_ctx = trace.set_span_in_context(ctx_span) if ctx_span else None
631
692
  span = self._tracer.start_span(
632
693
  f"invoke_agent {agent_name}",
633
694
  kind=trace.SpanKind.INTERNAL,
@@ -672,6 +733,13 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
672
733
  # existing "View sub-agent →" drill-in flow kicks in.
673
734
  if parent_agent_session_id:
674
735
  span.set_attribute("struct.agent.parent_session_id", parent_agent_session_id)
736
+ # If the caller supplied a local thread_id that differs from the
737
+ # inherited conversation id, preserve it as a non-grouping attribute.
738
+ # This lets downstream consumers see the subagent's own thread
739
+ # identity without splitting the run into separate sessions.
740
+ local_thread = _metadata_thread_id(metadata)
741
+ if local_thread and local_thread != session_id:
742
+ span.set_attribute("struct.agent.thread_id", local_thread)
675
743
 
676
744
  _safe(set_attrs, site="langchain.on_chain_start.start_attrs")
677
745
 
@@ -697,6 +765,10 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
697
765
  r = self._runs.pop(str(run_id), None)
698
766
  if not r or not r.span:
699
767
  return
768
+ if r.kind == "suppressed-twin":
769
+ # The span is the manual struct.agent() span owned by core.py's
770
+ # _AgentContext.__aexit__. Do not end it here.
771
+ return
700
772
  span = r.span
701
773
  _safe(lambda: span.set_status(StatusCode.OK),
702
774
  site="langchain.on_chain_end.set_status")
@@ -715,6 +787,10 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
715
787
  r = self._runs.pop(str(run_id), None)
716
788
  if not r or not r.span:
717
789
  return
790
+ if r.kind == "suppressed-twin":
791
+ # The span is the manual struct.agent() span owned by core.py's
792
+ # _AgentContext.__aexit__. Do not end or record error here.
793
+ return
718
794
  span = r.span
719
795
  _safe(lambda: _record_error(span, error),
720
796
  site="langchain.on_chain_error.record_error")
@@ -799,14 +875,15 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
799
875
 
800
876
  _safe(set_attrs, site="langchain.on_chat_model_start.start_attrs")
801
877
 
802
- # Announce this langchain chat span to any provider-SDK instrumentation
803
- # (anthropic, etc.) that runs UNDER LangChain they'll enrich this
804
- # span with HTTP-layer attrs instead of creating their own duplicate.
805
- # The token is saved on the RunState so on_llm_end can reset it.
806
- from struct_sdk.core import _current_langchain_chat_span
878
+ # Attach the OTel suppression key so any provider-SDK instrumentation
879
+ # (anthropic, etc.) running under LangChain skips creating a duplicate
880
+ # span the handler already owns the ``chat <model>`` span.
881
+ # The token is saved on the RunState so on_llm_end / on_llm_error can
882
+ # detach it via reset_genai().
883
+ from struct_sdk.core import suppress_genai_token
807
884
  enrich_token = None
808
885
  try:
809
- enrich_token = _current_langchain_chat_span.set(span)
886
+ enrich_token = suppress_genai_token()
810
887
  except Exception: # noqa: BLE001 — never fail the host call on instrumentation
811
888
  enrich_token = None
812
889
 
@@ -850,29 +927,27 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
850
927
  parent_run_id: Optional[UUID] = None,
851
928
  **kwargs: Any,
852
929
  ) -> None:
853
- from struct_sdk.core import _safe, _current_langchain_chat_span
930
+ from struct_sdk.core import _safe, reset_genai
854
931
 
855
932
  r = self._runs.pop(str(run_id), None)
856
933
  if not r or not r.span:
857
934
  # Even if the span was never created (telemetry-disabled fallback),
858
- # we still need to reset the enrich-contextvar token so it doesn't
935
+ # we still need to detach the suppression token so it doesn't
859
936
  # leak into the next operation in this task.
860
937
  if r is not None and r.enrich_token is not None:
861
938
  _safe(
862
- lambda: _current_langchain_chat_span.reset(r.enrich_token),
863
- site="langchain.on_llm_end.reset_enrich_token",
939
+ lambda: reset_genai(r.enrich_token),
940
+ site="langchain.on_llm_end.reset_suppress",
864
941
  )
865
942
  return
866
943
  span = r.span
867
944
 
868
- # Reset the enrich-token contextvar BEFORE ending the span. Any
869
- # post-end attribute set by provider-SDK instrumentation would race
870
- # against ``span.end()`` and likely no-op anyway, so we close the
871
- # door before we close the span.
945
+ # Detach the OTel suppression key BEFORE ending the span so the
946
+ # provider-SDK window is cleanly closed before we finalize the span.
872
947
  if r.enrich_token is not None:
873
948
  _safe(
874
- lambda: _current_langchain_chat_span.reset(r.enrich_token),
875
- site="langchain.on_llm_end.reset_enrich_token",
949
+ lambda: reset_genai(r.enrich_token),
950
+ site="langchain.on_llm_end.reset_suppress",
876
951
  )
877
952
 
878
953
  def set_response_attrs() -> None:
@@ -918,18 +993,18 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
918
993
  parent_run_id: Optional[UUID] = None,
919
994
  **kwargs: Any,
920
995
  ) -> None:
921
- from struct_sdk.core import _safe, _current_langchain_chat_span
996
+ from struct_sdk.core import _safe, reset_genai
922
997
 
923
998
  r = self._runs.pop(str(run_id), None)
924
999
  if not r:
925
1000
  return
926
- # Always reset the enrich-token contextvar, even when there's no
927
- # span — leaving it set would leak the (now-defunct) span into the
928
- # next operation in this task.
1001
+ # Always detach the suppression token, even when there's no span —
1002
+ # leaving it attached would suppress the provider's span for the next
1003
+ # operation in this task.
929
1004
  if r.enrich_token is not None:
930
1005
  _safe(
931
- lambda: _current_langchain_chat_span.reset(r.enrich_token),
932
- site="langchain.on_llm_error.reset_enrich_token",
1006
+ lambda: reset_genai(r.enrich_token),
1007
+ site="langchain.on_llm_error.reset_suppress",
933
1008
  )
934
1009
  if not r.span:
935
1010
  return
@@ -997,7 +1072,11 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
997
1072
  span.set_attribute("gen_ai.provider.name", "langchain")
998
1073
  span.set_attribute("gen_ai.tool.name", str(tool_name))
999
1074
 
1000
- tool_call_id = _extract_tool_call_id_from_inputs(inputs) or _pop_pending_tool_call_id(str(tool_name))
1075
+ tool_call_id = (
1076
+ kwargs.get("tool_call_id")
1077
+ or _extract_tool_call_id_from_inputs(inputs)
1078
+ or _pop_pending_tool_call_id(str(tool_name))
1079
+ )
1001
1080
  if tool_call_id:
1002
1081
  span.set_attribute("gen_ai.tool.call.id", tool_call_id)
1003
1082
 
@@ -1020,6 +1099,13 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
1020
1099
  kind="tool",
1021
1100
  )
1022
1101
 
1102
+ # Index by checkpoint ns so a sub-agent graph triggered inside this tool
1103
+ # (sharing this exact ns) re-parents its invoke_agent span under us.
1104
+ tool_ns = _checkpoint_ns(metadata)
1105
+ if tool_ns is not None:
1106
+ self._tool_spans_by_ns[tool_ns] = span
1107
+ self._tool_ns_by_run[key] = tool_ns
1108
+
1023
1109
  def on_tool_end(
1024
1110
  self,
1025
1111
  output: Any,
@@ -1030,6 +1116,9 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
1030
1116
  ) -> None:
1031
1117
  from struct_sdk.core import _safe
1032
1118
 
1119
+ ns = self._tool_ns_by_run.pop(str(run_id), None)
1120
+ if ns is not None:
1121
+ self._tool_spans_by_ns.pop(ns, None)
1033
1122
  r = self._runs.pop(str(run_id), None)
1034
1123
  if not r or not r.span:
1035
1124
  return
@@ -1057,6 +1146,9 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
1057
1146
  ) -> None:
1058
1147
  from struct_sdk.core import _safe
1059
1148
 
1149
+ ns = self._tool_ns_by_run.pop(str(run_id), None)
1150
+ if ns is not None:
1151
+ self._tool_spans_by_ns.pop(ns, None)
1060
1152
  r = self._runs.pop(str(run_id), None)
1061
1153
  if not r or not r.span:
1062
1154
  return
@@ -1265,22 +1357,14 @@ class StructCallbackHandler(BaseCallbackHandler): # type: ignore[misc]
1265
1357
  metadata: Optional[dict[str, Any]],
1266
1358
  parent_agent_session_id: Optional[str] = None,
1267
1359
  ) -> str:
1268
- """For AGENT spans each invocation gets its own conversation id.
1269
-
1270
- Prefer config-supplied thread_id for multi-turn continuity, fall
1271
- back to a fresh UUID. Never inherit from the parent run —
1272
- subagents should surface as separate sessions in the UI.
1273
-
1274
- LangChain quirk: when a nested invoke runs inside a parent call,
1275
- LangChain inherits the parent's metadata.thread_id onto the child
1276
- even if the child supplied its own. Detect that by comparing
1277
- against the nearest-agent-ancestor's session and assign a fresh
1278
- UUID if they match.
1360
+ """Agent spans INHERIT the nearest-agent ancestor's conversation id so a
1361
+ whole run shares one id. A locally-supplied thread_id is preserved by the
1362
+ caller as ``struct.agent.thread_id`` (non-grouping) it never splits the run.
1279
1363
  """
1364
+ if parent_agent_session_id:
1365
+ return parent_agent_session_id
1280
1366
  thread_id = _metadata_thread_id(metadata)
1281
1367
  if thread_id:
1282
- if parent_agent_session_id and thread_id == parent_agent_session_id:
1283
- return str(uuid.uuid4())
1284
1368
  return thread_id
1285
1369
  from struct_sdk.core import _current_session_id
1286
1370
  ambient = _current_session_id.get(None)
@@ -1304,12 +1388,11 @@ class _RunState:
1304
1388
  "nearest_agent_session_id",
1305
1389
  "nearest_agent_span",
1306
1390
  "kind",
1307
- # Only set on LLM / chat runs. Holds the ``contextvars.Token`` returned
1308
- # by ``_current_langchain_chat_span.set(span)`` so on_llm_end /
1309
- # on_llm_error can reset the contextvar. The contextvar's purpose:
1310
- # tell provider-SDK instrumentations (anthropic, openai, etc.)
1311
- # "you're running underneath this LangChain chat span — enrich it
1312
- # with your HTTP-layer attrs, don't create a duplicate span."
1391
+ # Only set on LLM / chat runs. Holds the OTel context token returned
1392
+ # by ``suppress_genai_token()`` so on_llm_end / on_llm_error can
1393
+ # detach it via ``reset_genai()``. The suppression key tells
1394
+ # provider-SDK instrumentations (anthropic, openai, etc.)
1395
+ # "a framework layer owns the chat span — skip creating a duplicate."
1313
1396
  "enrich_token",
1314
1397
  )
1315
1398
 
@@ -1407,25 +1490,24 @@ def _set_llm_response_attrs(
1407
1490
  mapped = _LANGCHAIN_FINISH_REASON_MAP.get(finish, finish)
1408
1491
  span.set_attribute("gen_ai.response.finish_reasons", [mapped])
1409
1492
 
1410
- resp_id = getattr(message, "id", None) or resp_meta.get("id")
1493
+ # Prefer the provider message id (``msg_…`` / ``chatcmpl-…``) from
1494
+ # response_metadata over LangChain's generated run id (``run-…`` /
1495
+ # ``lc_run--…``). ChatAnthropic (and most LangChain chat model adapters)
1496
+ # place the real API-level id in ``response_metadata["id"]`` while
1497
+ # ``message.id`` carries a LangChain-internal run UUID. We use
1498
+ # gen_ai.response.id as the duplicate-detection fingerprint, so the
1499
+ # provider id must take priority. The LangChain run id is preserved
1500
+ # under ``langchain.run.id`` for joining back to LangSmith / LangChain
1501
+ # run data.
1502
+ provider_id = resp_meta.get("id")
1503
+ lc_run_id = getattr(message, "id", None)
1504
+ resp_id = provider_id or lc_run_id
1411
1505
  if isinstance(resp_id, str):
1412
- # If a provider-SDK instrumentation (e.g. struct-sdk-anthropic via
1413
- # the enrich path) has already set gen_ai.response.id to the
1414
- # real provider message id (e.g. ``msg_…``), don't clobber it
1415
- # with LangChain's run UUID (``lc_run--…``). The provider id is
1416
- # more useful for API-level debugging. LangChain's run UUID is
1417
- # preserved under ``langchain.run.id`` for joining back to
1418
- # LangSmith / LangChain run data.
1419
- try:
1420
- existing = (span.attributes or {}).get("gen_ai.response.id") \
1421
- if hasattr(span, "attributes") else None
1422
- except Exception: # noqa: BLE001
1423
- existing = None
1424
- if not existing:
1425
- span.set_attribute("gen_ai.response.id", resp_id)
1426
- elif resp_id != existing:
1427
- # LangChain's id is distinct from the provider's — keep both.
1428
- span.set_attribute("langchain.run.id", resp_id)
1506
+ span.set_attribute("gen_ai.response.id", resp_id)
1507
+ # When the LangChain run id differs from the provider id, record
1508
+ # it separately so consumers can still join on the run UUID.
1509
+ if isinstance(lc_run_id, str) and lc_run_id != resp_id:
1510
+ span.set_attribute("langchain.run.id", lc_run_id)
1429
1511
 
1430
1512
  if sdk.emit_events and otel_logger:
1431
1513
  _emit_choice_event(otel_logger, message, provider or "langchain", session_id, span)
@@ -1535,7 +1617,7 @@ def _emit_message_events(
1535
1617
  attrs: dict[str, Any] = {
1536
1618
  "event.name": event_name,
1537
1619
  "body": payload,
1538
- "gen_ai.system": provider,
1620
+ "gen_ai.provider.name": provider,
1539
1621
  "gen_ai.message.index": idx,
1540
1622
  "gen_ai.conversation.id": session_id,
1541
1623
  }
@@ -1577,7 +1659,7 @@ def _emit_choice_event(
1577
1659
  attrs: dict[str, Any] = {
1578
1660
  "event.name": event_name,
1579
1661
  "body": payload,
1580
- "gen_ai.system": provider,
1662
+ "gen_ai.provider.name": provider,
1581
1663
  "gen_ai.conversation.id": session_id,
1582
1664
  }
1583
1665
  otel_logger.emit(LogRecord(
File without changes
File without changes