spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
spanforge/_span.py ADDED
@@ -0,0 +1,1015 @@
1
+ """spanforge._span — Span, SpanContextManager, and agent context managers.
2
+
3
+ Provides the runtime tracing primitives that back ``tracer.span()``,
4
+ ``tracer.agent_run()``, and ``tracer.agent_step()``.
5
+
6
+ Design notes
7
+ ------------
8
+ * **Context-variable stacks** — uses :mod:`contextvars` so that context
9
+ propagates correctly across asyncio tasks, thread-pool executors, and
10
+ concurrent threads without manual ID management.
11
+ * **Immutable stack tuples** — each ``__enter__`` sets a *new* tuple on the
12
+ ContextVar and saves the reset token; ``__exit__`` calls
13
+ ``ContextVar.reset(token)`` so concurrent tasks each see their own stack
14
+ slice and cannot bleed into each other.
15
+ * **OTel-compatible IDs** — ``span_id`` is 8 random bytes (16 hex chars),
16
+ ``trace_id`` is 16 random bytes (32 hex chars), matching the OTel wire
17
+ format expected by :class:`~spanforge.namespaces.trace.SpanPayload`.
18
+ * **Zero external dependencies** — stdlib only (``contextvars``, ``os``,
19
+ ``time``, ``types``).
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import contextvars
25
+ import os
26
+ import time
27
+ from collections import deque
28
+ from dataclasses import dataclass, field
29
+ from typing import TYPE_CHECKING, Any
30
+
31
+ from spanforge.namespaces.trace import (
32
+ AgentRunPayload,
33
+ AgentStepPayload,
34
+ CostBreakdown,
35
+ DecisionPoint,
36
+ GenAIOperationName,
37
+ GenAISystem,
38
+ ModelInfo,
39
+ ReasoningStep,
40
+ SpanEvent,
41
+ SpanKind,
42
+ SpanPayload,
43
+ TokenUsage,
44
+ ToolCall,
45
+ )
46
+
47
+ if TYPE_CHECKING:
48
+ import threading
49
+ from types import TracebackType
50
+
51
+ __all__ = [
52
+ "AgentRunContext",
53
+ "AgentRunContextManager",
54
+ "AgentStepContext",
55
+ "AgentStepContextManager",
56
+ "Span",
57
+ "SpanContextManager",
58
+ "copy_context",
59
+ "extract_traceparent",
60
+ "inject_traceparent",
61
+ ]
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # ID generation helpers
65
+ # ---------------------------------------------------------------------------
66
+
67
+
68
+ def _span_id() -> str:
69
+ """Generate an OTel-compatible span ID: 8 random bytes → 16 lowercase hex chars."""
70
+ return os.urandom(8).hex()
71
+
72
+
73
+ def _trace_id() -> str:
74
+ """Generate an OTel-compatible trace ID: 16 random bytes → 32 lowercase hex chars."""
75
+ return os.urandom(16).hex()
76
+
77
+
78
+ def _now_ns() -> int:
79
+ """Current time as integer nanoseconds since the Unix epoch."""
80
+ return time.time_ns()
81
+
82
+
83
+ # ---------------------------------------------------------------------------
84
+ # Context-variable stacks (asyncio-safe, thread-safe)
85
+ # ---------------------------------------------------------------------------
86
+
87
+ # Each ContextVar stores an *immutable tuple* so that asyncio tasks spawned
88
+ # inside a span inherit the parent's stack slice without mutating it.
89
+ _span_stack_var: contextvars.ContextVar[tuple[Span, ...]] = contextvars.ContextVar(
90
+ "spanforge_span_stack", default=()
91
+ )
92
+ _run_stack_var: contextvars.ContextVar[tuple[AgentRunContext, ...]] = contextvars.ContextVar(
93
+ "spanforge_run_stack", default=()
94
+ )
95
+
96
+
97
+ def _span_stack() -> tuple[Span, ...]:
98
+ """Return the current context's span stack (immutable tuple)."""
99
+ return _span_stack_var.get()
100
+
101
+
102
+ def _run_stack() -> tuple[AgentRunContext, ...]:
103
+ """Return the current context's agent-run stack (immutable tuple)."""
104
+ return _run_stack_var.get()
105
+
106
+
107
+ def copy_context() -> contextvars.Context:
108
+ """Return a shallow copy of the current :mod:`contextvars` context.
109
+
110
+ Pass this to :func:`contextvars.Context.run` when spawning threads or
111
+ ``loop.run_in_executor`` tasks that should inherit the active span::
112
+
113
+ ctx = spanforge.copy_context()
114
+ loop.run_in_executor(None, ctx.run, my_blocking_fn)
115
+ """
116
+ return contextvars.copy_context()
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # W3C Trace Context helpers (RFC-0001 §15)
121
+ # ---------------------------------------------------------------------------
122
+
123
+ _TRACEPARENT_PARTS = 4
124
+ _TRACEPARENT_VERSION = "00"
125
+
126
+
127
+ def _parse_traceparent(header: str) -> tuple[str, str] | None:
128
+ """Parse a W3C ``traceparent`` header value.
129
+
130
+ Format: ``{version}-{trace-id}-{parent-id}-{trace-flags}``
131
+
132
+ Args:
133
+ header: The raw ``traceparent`` header value (e.g. from an HTTP request).
134
+
135
+ Returns:
136
+ ``(trace_id, parent_span_id)`` if the header is valid, else ``None``.
137
+ """
138
+ parts = header.strip().split("-")
139
+ if len(parts) != _TRACEPARENT_PARTS:
140
+ return None
141
+ version, trace_id, parent_id, _flags = parts
142
+ if version != _TRACEPARENT_VERSION:
143
+ return None
144
+ if len(trace_id) != 32 or not all(c in "0123456789abcdef" for c in trace_id):
145
+ return None
146
+ if len(parent_id) != 16 or not all(c in "0123456789abcdef" for c in parent_id):
147
+ return None
148
+ if trace_id == "0" * 32 or parent_id == "0" * 16:
149
+ return None # invalid all-zeros IDs per spec
150
+ return trace_id, parent_id
151
+
152
+
153
+ def extract_traceparent(headers: dict[str, str]) -> tuple[str, str] | None:
154
+ """Extract ``(trace_id, parent_span_id)`` from W3C Trace Context headers.
155
+
156
+ Looks for the ``traceparent`` key (case-insensitive) in *headers*.
157
+
158
+ Args:
159
+ headers: HTTP request headers dict.
160
+
161
+ Returns:
162
+ ``(trace_id, parent_span_id)`` if a valid ``traceparent`` header is
163
+ present, else ``None``.
164
+
165
+ Example::
166
+
167
+ ctx = extract_traceparent(request.headers)
168
+ if ctx:
169
+ trace_id, parent_id = ctx
170
+ with tracer.span("handle", incoming_traceparent=request.headers.get("traceparent")):
171
+ ...
172
+ """
173
+ # Case-insensitive lookup — HTTP/1.1 headers are case-insensitive (RFC 7230 §3.2).
174
+ raw = next((v for k, v in headers.items() if k.lower() == "traceparent"), "")
175
+ return _parse_traceparent(raw) if raw else None
176
+
177
+
178
+ def inject_traceparent(span: "Span", headers: dict[str, str]) -> None:
179
+ """Inject W3C Trace Context into *headers* for downstream propagation.
180
+
181
+ Sets ``traceparent`` using the active trace and span IDs.
182
+
183
+ Args:
184
+ span: The currently active :class:`Span`.
185
+ headers: Mutable HTTP headers dict to inject into.
186
+
187
+ Example::
188
+
189
+ headers = {}
190
+ inject_traceparent(span, headers)
191
+ httpx.get(url, headers=headers)
192
+ """
193
+ span.inject(headers)
194
+
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # Span helpers
198
+ # ---------------------------------------------------------------------------
199
+
200
+
201
+ def _default_span_events() -> "deque[SpanEvent]":
202
+ """Return a deque with maxlen read from the global config (H2: configurable)."""
203
+ try:
204
+ from spanforge.config import get_config # noqa: PLC0415
205
+ maxlen = get_config().max_span_events
206
+ return deque(maxlen=maxlen if maxlen > 0 else None)
207
+ except Exception: # config not yet initialised
208
+ return deque(maxlen=1000)
209
+
210
+
211
+ # ---------------------------------------------------------------------------
212
+ # Span
213
+ # ---------------------------------------------------------------------------
214
+
215
+
216
+ @dataclass
217
+ class Span:
218
+ """Mutable span record accumulated during a ``with tracer.span(...)`` block.
219
+
220
+ Create via :class:`SpanContextManager` (i.e. ``tracer.span(...)``).
221
+ Direct construction is supported for testing.
222
+
223
+ Auto-populated fields
224
+ ----------------------
225
+ ``span_id``, ``trace_id``, and ``start_ns`` are assigned by
226
+ :class:`SpanContextManager.__enter__`; do not set them manually unless
227
+ you need custom IDs for testing.
228
+
229
+ Attributes:
230
+ name: Human-readable span name.
231
+ span_id: 16 lowercase hex chars (OTel span ID).
232
+ trace_id: 32 lowercase hex chars (OTel trace ID).
233
+ parent_span_id: Parent span ID if nested; ``None`` for root spans.
234
+ agent_run_id: ULID of the enclosing agent run, if any.
235
+ model: Model name string (e.g. ``"gpt-4o"``).
236
+ operation: GenAI operation name (default ``"chat"``).
237
+ attributes: Arbitrary key-value metadata set by the user.
238
+ start_ns: Start time as nanoseconds since Unix epoch.
239
+ end_ns: End time (set on :meth:`end`).
240
+ duration_ms: Computed duration in milliseconds.
241
+ status: ``"ok"`` or ``"error"`` or ``"timeout"``.
242
+ error: Error message if ``status == "error"``.
243
+ error_type: Exception class name if ``status == "error"``.
244
+ token_usage: Optional token counts (set by provider integrations).
245
+ cost: Optional cost breakdown (set by provider integrations).
246
+ """
247
+
248
+ name: str
249
+ span_id: str = field(default_factory=_span_id)
250
+ trace_id: str = field(default_factory=_trace_id)
251
+ parent_span_id: str | None = None
252
+ agent_run_id: str | None = None
253
+ model: str | None = None
254
+ operation: str = "chat"
255
+ attributes: dict[str, Any] = field(default_factory=dict)
256
+ start_ns: int = field(default_factory=_now_ns)
257
+ end_ns: int | None = None
258
+ duration_ms: float | None = None
259
+ status: str = "ok"
260
+ error: str | None = None
261
+ error_type: str | None = None
262
+ token_usage: TokenUsage | None = None
263
+ cost: CostBreakdown | None = None
264
+ tool_calls: list[ToolCall] = field(default_factory=list)
265
+ events: deque[SpanEvent] = field(default_factory=_default_span_events)
266
+ temperature: float | None = None
267
+ top_p: float | None = None
268
+ max_tokens: int | None = None
269
+ error_category: str | None = None # one of SpanErrorCategory literals
270
+ session_id: str | None = None # conversation / session identifier
271
+ user_id: str | None = None # end-user identifier
272
+ traceparent: str | None = None # incoming W3C traceparent (for propagation)
273
+ _timeout_timer: "threading.Timer | None" = field(default=None, init=False, repr=False)
274
+
275
+ # ------------------------------------------------------------------
276
+ # Mutation methods (call from inside ``with tracer.span(...) as s:``)
277
+ # ------------------------------------------------------------------
278
+
279
+ def set_attribute(self, key: str, value: Any) -> None: # noqa: ANN401
280
+ """Add or update a key-value attribute on this span.
281
+
282
+ Args:
283
+ key: Attribute name (non-empty string).
284
+ value: Attribute value (any JSON-serialisable type).
285
+ """
286
+ if not isinstance(key, str) or not key:
287
+ raise ValueError("set_attribute: key must be a non-empty string")
288
+ self.attributes[key] = value
289
+
290
+ def add_event(self, name: str, metadata: dict[str, Any] | None = None) -> None:
291
+ """Record a named event at this point in time within the span.
292
+
293
+ Args:
294
+ name: Event name (non-empty string).
295
+ metadata: Optional key-value metadata for this event.
296
+ """
297
+ self.events.append(SpanEvent(name=name, metadata=metadata or {}))
298
+
299
+ def inject(self, headers: dict[str, str]) -> None:
300
+ """Inject W3C Trace Context headers for downstream propagation.
301
+
302
+ Sets ``traceparent`` (and optionally ``tracestate``) on *headers* so
303
+ that downstream services can correlate their spans with this one.
304
+
305
+ Args:
306
+ headers: Mutable dict-like object representing outgoing HTTP headers.
307
+ ``traceparent`` will be set (and ``tracestate`` cleared).
308
+
309
+ Example::
310
+
311
+ headers = {}
312
+ span.inject(headers)
313
+ requests.get(url, headers=headers)
314
+ """
315
+ flags = "01" # sampled
316
+ headers["traceparent"] = f"00-{self.trace_id}-{self.span_id}-{flags}"
317
+
318
+ def record_error(
319
+ self,
320
+ exc: Exception,
321
+ category: str | None = None,
322
+ ) -> None:
323
+ """Record an exception on this span, setting ``status = "error"``.
324
+
325
+ Args:
326
+ exc: The exception that caused the failure.
327
+ category: Optional error category — one of ``"agent_error"``,
328
+ ``"llm_error"``, ``"tool_error"``, ``"timeout_error"``,
329
+ ``"unknown_error"``. When omitted, :class:`TimeoutError`
330
+ is automatically mapped to ``"timeout_error"``; all
331
+ others default to ``"unknown_error"``.
332
+ """
333
+ self.status = "error"
334
+ self.error = str(exc)
335
+ self.error_type = type(exc).__qualname__
336
+ if category is not None:
337
+ self.error_category = category
338
+ elif isinstance(exc, TimeoutError):
339
+ self.error_category = "timeout_error"
340
+ else:
341
+ self.error_category = "unknown_error"
342
+
343
+ def set_token_usage(self, token_usage: TokenUsage) -> None:
344
+ """Attach token usage data (called by provider integrations)."""
345
+ self.token_usage = token_usage
346
+
347
+ def set_cost(self, cost: CostBreakdown) -> None:
348
+ """Attach cost breakdown data (called by provider integrations)."""
349
+ self.cost = cost
350
+
351
+ # ------------------------------------------------------------------
352
+ # Internal lifecycle
353
+ # ------------------------------------------------------------------
354
+
355
+ def set_timeout_deadline(self, seconds: float) -> None:
356
+ """Schedule this span to auto-timeout if not closed within *seconds*.
357
+
358
+ If the span is still open when the deadline passes, its ``status``
359
+ is set to ``"timeout"`` and ``error_category`` to ``"timeout_error"``.
360
+ The background timer is automatically cancelled when the span closes
361
+ normally via :meth:`end`.
362
+
363
+ Args:
364
+ seconds: Deadline in seconds (must be > 0).
365
+
366
+ Raises:
367
+ ValueError: If *seconds* is not greater than zero.
368
+ """
369
+ if seconds <= 0:
370
+ raise ValueError(f"set_timeout_deadline: seconds must be > 0, got {seconds!r}")
371
+ import threading # noqa: PLC0415
372
+
373
+ # Cancel any previously registered timer before installing a new one.
374
+ # Without this guard, double-calling would orphan the first timer.
375
+ if self._timeout_timer is not None:
376
+ self._timeout_timer.cancel()
377
+ self._timeout_timer = None
378
+
379
+ def _timeout_fn() -> None:
380
+ # Guard is evaluated on CPython under the GIL. end_ns is set by
381
+ # end() before cancel() is called; on CPython this sequence is
382
+ # safe. The double guard (end_ns + status) means a span that has
383
+ # already errored or finished is never overwritten.
384
+ if self.end_ns is None and self.status == "ok":
385
+ self.status = "timeout"
386
+ self.error = f"Span timed out after {seconds:.3f}s"
387
+ self.error_category = "timeout_error"
388
+
389
+ timer = threading.Timer(seconds, _timeout_fn)
390
+ timer.daemon = True
391
+ timer.start()
392
+ self._timeout_timer = timer
393
+
394
+ def end(self) -> None:
395
+ """Finalise the span by recording the end time and computing duration."""
396
+ if self.end_ns is None:
397
+ self.end_ns = _now_ns()
398
+ self.duration_ms = (self.end_ns - self.start_ns) / 1_000_000.0
399
+ if self._timeout_timer is not None:
400
+ self._timeout_timer.cancel()
401
+ self._timeout_timer = None
402
+
403
+ def to_span_payload(self) -> SpanPayload:
404
+ """Serialise this span to a :class:`~spanforge.namespaces.trace.SpanPayload`.
405
+
406
+ Called internally by :class:`SpanContextManager.__exit__` just before
407
+ event emission.
408
+ """
409
+ end_ns = self.end_ns if self.end_ns is not None else _now_ns()
410
+ duration_ms = (end_ns - self.start_ns) / 1_000_000.0
411
+
412
+ # Resolve ModelInfo from the model name string.
413
+ model_info: ModelInfo | None = None
414
+ if self.model:
415
+ model_info = _resolve_model_info(self.model)
416
+
417
+ # Resolve operation enum.
418
+ try:
419
+ operation: GenAIOperationName | str = GenAIOperationName(self.operation)
420
+ except ValueError:
421
+ operation = self.operation
422
+
423
+ return SpanPayload(
424
+ span_id=self.span_id,
425
+ trace_id=self.trace_id,
426
+ span_name=self.name,
427
+ operation=operation,
428
+ span_kind=SpanKind.CLIENT,
429
+ status=self.status,
430
+ start_time_unix_nano=self.start_ns,
431
+ end_time_unix_nano=end_ns,
432
+ duration_ms=duration_ms,
433
+ parent_span_id=self.parent_span_id,
434
+ agent_run_id=self.agent_run_id,
435
+ model=model_info,
436
+ token_usage=self.token_usage,
437
+ cost=self.cost,
438
+ tool_calls=list(self.tool_calls),
439
+ error=self.error,
440
+ error_type=self.error_type,
441
+ attributes=self.attributes if self.attributes else None,
442
+ temperature=self.temperature,
443
+ top_p=self.top_p,
444
+ max_tokens=self.max_tokens,
445
+ error_category=self.error_category,
446
+ events=list(self.events),
447
+ session_id=self.session_id,
448
+ user_id=self.user_id,
449
+ incoming_traceparent=self.traceparent,
450
+ )
451
+
452
+
453
+ # ---------------------------------------------------------------------------
454
+ # SpanContextManager
455
+ # ---------------------------------------------------------------------------
456
+
457
+
458
+ class SpanContextManager:
459
+ """Context manager returned by :meth:`~spanforge._tracer.Tracer.span`.
460
+
461
+ Usage::
462
+
463
+ with tracer.span("my-llm-call", model="gpt-4o") as span:
464
+ span.set_attribute("prompt_length", 256)
465
+ # ... call LLM ...
466
+ # → SpanPayload event emitted on exit
467
+
468
+ The :class:`Span` instance is bound to the ``as`` target and is also
469
+ pushed onto the context-variable span stack so nested spans can inherit the
470
+ ``trace_id``.
471
+ """
472
+
473
+ def __init__(
474
+ self,
475
+ name: str,
476
+ model: str | None = None,
477
+ operation: str = "chat",
478
+ temperature: float | None = None,
479
+ top_p: float | None = None,
480
+ max_tokens: int | None = None,
481
+ attributes: dict[str, Any] | None = None,
482
+ incoming_traceparent: str | None = None,
483
+ session_id: str | None = None,
484
+ user_id: str | None = None,
485
+ ) -> None:
486
+ self._name = name
487
+ self._model = model
488
+ self._operation = operation
489
+ self._temperature = temperature
490
+ self._top_p = top_p
491
+ self._max_tokens = max_tokens
492
+ self._initial_attributes = dict(attributes or {})
493
+ self._incoming_traceparent = incoming_traceparent
494
+ self._session_id = session_id
495
+ self._user_id = user_id
496
+ self._span: Span | None = None
497
+
498
+ # ------------------------------------------------------------------
499
+ # Context manager protocol
500
+ # ------------------------------------------------------------------
501
+
502
+ def __enter__(self) -> Span:
503
+ stack = _span_stack()
504
+ run_tuple = _run_stack()
505
+
506
+ # Inherit trace_id and parent_span_id from the enclosing span.
507
+ if stack:
508
+ parent = stack[-1]
509
+ trace_id = parent.trace_id
510
+ parent_span_id = parent.span_id
511
+ elif self._incoming_traceparent:
512
+ # Extract W3C traceparent from incoming headers for distributed tracing.
513
+ extracted = _parse_traceparent(self._incoming_traceparent)
514
+ if extracted is not None:
515
+ trace_id, parent_span_id = extracted
516
+ else:
517
+ trace_id = run_tuple[-1].trace_id if run_tuple else _trace_id()
518
+ parent_span_id = None
519
+ else:
520
+ # Fall back to the enclosing run context's trace_id when available
521
+ # so that all spans within a Trace share one trace_id.
522
+ trace_id = run_tuple[-1].trace_id if run_tuple else _trace_id()
523
+ parent_span_id = None
524
+
525
+ # Inherit agent_run_id from the enclosing run context.
526
+ agent_run_id = run_tuple[-1].agent_run_id if run_tuple else None
527
+
528
+ # Resolve session_id and user_id from explicit arg or config defaults.
529
+ try:
530
+ from spanforge.config import get_config as _gc # noqa: PLC0415
531
+ _cfg = _gc()
532
+ session_id = self._session_id or _cfg.default_session_id
533
+ user_id = self._user_id or _cfg.default_user_id
534
+ except Exception: # NOSONAR
535
+ session_id = self._session_id
536
+ user_id = self._user_id
537
+
538
+ self._span = Span(
539
+ name=self._name,
540
+ span_id=_span_id(),
541
+ trace_id=trace_id,
542
+ parent_span_id=parent_span_id,
543
+ agent_run_id=agent_run_id,
544
+ model=self._model,
545
+ operation=self._operation,
546
+ temperature=self._temperature,
547
+ top_p=self._top_p,
548
+ max_tokens=self._max_tokens,
549
+ attributes=dict(self._initial_attributes),
550
+ start_ns=_now_ns(),
551
+ session_id=session_id,
552
+ user_id=user_id,
553
+ traceparent=self._incoming_traceparent,
554
+ )
555
+ # Push onto an immutable tuple and save the reset token.
556
+ self._stack_token: contextvars.Token[tuple[Span, ...]] = _span_stack_var.set(
557
+ stack + (self._span,)
558
+ )
559
+ # Fire span processors on_start (errors suppressed).
560
+ try:
561
+ from spanforge.processor import _run_on_start # noqa: PLC0415
562
+ _run_on_start(self._span)
563
+ except Exception: # NOSONAR
564
+ pass
565
+ # Fire start hooks (errors suppressed — hooks must never abort user code).
566
+ try:
567
+ from spanforge._hooks import hooks as _hooks # noqa: PLC0415
568
+ _hooks._fire_start(self._span)
569
+ except Exception: # NOSONAR
570
+ pass
571
+ return self._span
572
+
573
+ def __exit__(
574
+ self,
575
+ exc_type: type[BaseException] | None,
576
+ exc_val: BaseException | None,
577
+ exc_tb: TracebackType | None,
578
+ ) -> bool:
579
+ assert self._span is not None, "SpanContextManager.__exit__ called before __enter__"
580
+
581
+ # Record any unhandled exception on the span.
582
+ # Exclude BaseException subclasses that are control-flow signals
583
+ # (KeyboardInterrupt, SystemExit, GeneratorExit) — only true
584
+ # application exceptions (Exception subclasses) are recorded.
585
+ if exc_val is not None and isinstance(exc_val, Exception) and self._span.status == "ok":
586
+ self._span.record_error(exc_val)
587
+
588
+ self._span.end()
589
+
590
+ # Restore the stack to its pre-enter state.
591
+ _span_stack_var.reset(self._stack_token)
592
+
593
+ # Fire span processors on_end (errors suppressed).
594
+ try:
595
+ from spanforge.processor import _run_on_end # noqa: PLC0415
596
+ _run_on_end(self._span)
597
+ except Exception: # NOSONAR
598
+ pass
599
+ # Fire end hooks before export (errors suppressed).
600
+ try:
601
+ from spanforge._hooks import hooks as _hooks # noqa: PLC0415
602
+ _hooks._fire_end(self._span)
603
+ except Exception: # NOSONAR
604
+ pass
605
+
606
+ # Emit the event.
607
+ _s = None
608
+ try:
609
+ from spanforge import _stream as _s # noqa: PLC0415
610
+ _s.emit_span(self._span)
611
+ except Exception as exc:
612
+ if _s is not None:
613
+ _s._handle_export_error(exc)
614
+
615
+ # Auto-emit cost event when configured (Tool 2).
616
+ if self._span.cost is not None:
617
+ try:
618
+ from spanforge.config import get_config as _gc # noqa: PLC0415
619
+ if _gc().auto_emit_cost:
620
+ from spanforge.cost import emit_cost_event # noqa: PLC0415
621
+ emit_cost_event(self._span)
622
+ except Exception: # NOSONAR — cost emission must never affect user code
623
+ pass
624
+
625
+ # Do NOT suppress the original exception.
626
+ return False
627
+
628
+ # ------------------------------------------------------------------
629
+ # Async context manager protocol (delegates to sync implementation)
630
+ # ------------------------------------------------------------------
631
+
632
+ async def __aenter__(self) -> Span:
633
+ """Async entry — identical to ``__enter__``; safe for ``async with``."""
634
+ return self.__enter__()
635
+
636
+ async def __aexit__(
637
+ self,
638
+ exc_type: type[BaseException] | None,
639
+ exc_val: BaseException | None,
640
+ exc_tb: TracebackType | None,
641
+ ) -> bool:
642
+ """Async exit — identical to ``__exit__``; safe for ``async with``."""
643
+ return self.__exit__(exc_type, exc_val, exc_tb)
644
+
645
+
646
+ # ---------------------------------------------------------------------------
647
+ # Agent step context
648
+ # ---------------------------------------------------------------------------
649
+
650
+
651
+ @dataclass
652
+ class AgentStepContext:
653
+ """Mutable record accumulated during ``with tracer.agent_step(...)``."""
654
+
655
+ step_name: str
656
+ agent_run_id: str
657
+ step_index: int
658
+ span_id: str = field(default_factory=_span_id)
659
+ trace_id: str = field(default_factory=_trace_id)
660
+ parent_span_id: str | None = None
661
+ operation: str = "invoke_agent"
662
+ start_ns: int = field(default_factory=_now_ns)
663
+ end_ns: int | None = None
664
+ duration_ms: float | None = None
665
+ status: str = "ok"
666
+ error: str | None = None
667
+ error_type: str | None = None
668
+ model: str | None = None
669
+ token_usage: TokenUsage | None = None
670
+ cost: CostBreakdown | None = None
671
+ tool_calls: list[ToolCall] = field(default_factory=list)
672
+ reasoning_steps: list[ReasoningStep] = field(default_factory=list)
673
+ decision_points: list[DecisionPoint] = field(default_factory=list)
674
+ attributes: dict[str, Any] = field(default_factory=dict)
675
+
676
+ def set_attribute(self, key: str, value: Any) -> None: # noqa: ANN401
677
+ if not isinstance(key, str) or not key:
678
+ raise ValueError("set_attribute: key must be a non-empty string")
679
+ self.attributes[key] = value
680
+
681
+ def record_error(self, exc: Exception) -> None:
682
+ self.status = "error"
683
+ self.error = str(exc)
684
+ self.error_type = type(exc).__qualname__
685
+
686
+ def end(self) -> None:
687
+ if self.end_ns is None:
688
+ self.end_ns = _now_ns()
689
+ self.duration_ms = (self.end_ns - self.start_ns) / 1_000_000.0
690
+
691
+ def to_agent_step_payload(self) -> AgentStepPayload:
692
+ end_ns = self.end_ns if self.end_ns is not None else _now_ns()
693
+ duration_ms = (end_ns - self.start_ns) / 1_000_000.0
694
+ try:
695
+ operation: GenAIOperationName | str = GenAIOperationName(self.operation)
696
+ except ValueError:
697
+ operation = self.operation
698
+ return AgentStepPayload(
699
+ agent_run_id=self.agent_run_id,
700
+ step_index=self.step_index,
701
+ span_id=self.span_id,
702
+ trace_id=self.trace_id,
703
+ operation=operation,
704
+ tool_calls=list(self.tool_calls),
705
+ reasoning_steps=list(self.reasoning_steps),
706
+ decision_points=list(self.decision_points),
707
+ status=self.status,
708
+ start_time_unix_nano=self.start_ns,
709
+ end_time_unix_nano=end_ns,
710
+ duration_ms=duration_ms,
711
+ parent_span_id=self.parent_span_id,
712
+ model=_resolve_model_info(self.model) if self.model else None,
713
+ token_usage=self.token_usage,
714
+ cost=self.cost,
715
+ error=self.error,
716
+ error_type=self.error_type,
717
+ step_name=self.step_name,
718
+ )
719
+
720
+
721
+ class AgentStepContextManager:
722
+ """Context manager returned by :meth:`~spanforge._tracer.Tracer.agent_step`."""
723
+
724
+ def __init__(
725
+ self,
726
+ step_name: str,
727
+ operation: str = "invoke_agent",
728
+ attributes: dict[str, Any] | None = None,
729
+ ) -> None:
730
+ self._step_name = step_name
731
+ self._operation = operation
732
+ self._initial_attributes = dict(attributes or {})
733
+ self._ctx: AgentStepContext | None = None
734
+
735
+ def __enter__(self) -> AgentStepContext:
736
+ run_tuple = _run_stack()
737
+ if not run_tuple:
738
+ raise RuntimeError(
739
+ "tracer.agent_step() must be used inside a tracer.agent_run() context"
740
+ )
741
+ run = run_tuple[-1]
742
+
743
+ # Inherit trace_id + parent from any enclosing span.
744
+ span_tuple = _span_stack()
745
+ if span_tuple:
746
+ parent = span_tuple[-1]
747
+ trace_id = parent.trace_id
748
+ parent_span_id = parent.span_id
749
+ else:
750
+ trace_id = run.trace_id
751
+ parent_span_id = None
752
+
753
+ step_index = run.next_step_index()
754
+
755
+ self._ctx = AgentStepContext(
756
+ step_name=self._step_name,
757
+ agent_run_id=run.agent_run_id,
758
+ step_index=step_index,
759
+ span_id=_span_id(),
760
+ trace_id=trace_id,
761
+ parent_span_id=parent_span_id,
762
+ operation=self._operation,
763
+ start_ns=_now_ns(),
764
+ attributes=dict(self._initial_attributes),
765
+ )
766
+ return self._ctx
767
+
768
+ def __exit__(
769
+ self,
770
+ exc_type: type[BaseException] | None,
771
+ exc_val: BaseException | None,
772
+ exc_tb: TracebackType | None,
773
+ ) -> bool:
774
+ assert self._ctx is not None
775
+
776
+ if exc_val is not None and self._ctx.status == "ok":
777
+ self._ctx.record_error(exc_val)
778
+ self._ctx.end()
779
+
780
+ # Register step with the parent run context.
781
+ run_tuple = _run_stack()
782
+ if run_tuple:
783
+ run_tuple[-1].record_step(self._ctx)
784
+
785
+ # Emit agent step event.
786
+ _s = None
787
+ try:
788
+ from spanforge import _stream as _s # noqa: PLC0415
789
+ _s.emit_agent_step(self._ctx)
790
+ except Exception as exc:
791
+ if _s is not None:
792
+ _s._handle_export_error(exc)
793
+
794
+ return False
795
+
796
+ # ------------------------------------------------------------------
797
+ # Async context manager protocol
798
+ # ------------------------------------------------------------------
799
+
800
+ async def __aenter__(self) -> AgentStepContext:
801
+ """Async entry — identical to ``__enter__``."""
802
+ return self.__enter__()
803
+
804
+ async def __aexit__(
805
+ self,
806
+ exc_type: type[BaseException] | None,
807
+ exc_val: BaseException | None,
808
+ exc_tb: TracebackType | None,
809
+ ) -> bool:
810
+ """Async exit — identical to ``__exit__``."""
811
+ return self.__exit__(exc_type, exc_val, exc_tb)
812
+
813
+
814
+ # ---------------------------------------------------------------------------
815
+ # Agent run context
816
+ # ---------------------------------------------------------------------------
817
+
818
+
819
+ @dataclass
820
+ class AgentRunContext:
821
+ """Mutable record accumulated during ``with tracer.agent_run(...)``."""
822
+
823
+ agent_name: str
824
+ agent_run_id: str = field(default_factory=_span_id) # 16 hex chars
825
+ trace_id: str = field(default_factory=_trace_id)
826
+ root_span_id: str = field(default_factory=_span_id)
827
+ start_ns: int = field(default_factory=_now_ns)
828
+ end_ns: int | None = None
829
+ duration_ms: float | None = None
830
+ status: str = "ok"
831
+ error: str | None = None
832
+ termination_reason: str | None = None
833
+ _step_count: int = field(default=0, init=False, repr=False)
834
+ _steps: list[AgentStepContext] = field(default_factory=list, init=False, repr=False)
835
+ _child_run_costs: list[CostBreakdown] = field(default_factory=list, init=False, repr=False)
836
+
837
+ def next_step_index(self) -> int:
838
+ idx = self._step_count
839
+ self._step_count += 1
840
+ return idx
841
+
842
+ def record_step(self, step: AgentStepContext) -> None:
843
+ self._steps.append(step)
844
+
845
+ def record_error(self, exc: Exception) -> None:
846
+ self.status = "error"
847
+ self.error = str(exc)
848
+
849
+ def record_child_run_cost(self, cost: CostBreakdown) -> None:
850
+ """Accumulate cost from a completed child agent run."""
851
+ self._child_run_costs.append(cost)
852
+
853
+ def end(self) -> None:
854
+ if self.end_ns is None:
855
+ self.end_ns = _now_ns()
856
+ self.duration_ms = (self.end_ns - self.start_ns) / 1_000_000.0
857
+
858
+ def to_agent_run_payload(self) -> AgentRunPayload:
859
+ end_ns = self.end_ns if self.end_ns is not None else _now_ns()
860
+ duration_ms = (end_ns - self.start_ns) / 1_000_000.0
861
+
862
+ # Aggregate token usage and cost across all steps.
863
+ total_input = 0
864
+ total_output = 0
865
+ total_tokens = 0
866
+ total_in_cost = 0.0
867
+ total_out_cost = 0.0
868
+ total_model_calls = 0
869
+ total_tool_calls = 0
870
+ for step in self._steps:
871
+ if step.token_usage:
872
+ total_input += step.token_usage.input_tokens
873
+ total_output += step.token_usage.output_tokens
874
+ total_tokens += step.token_usage.total_tokens
875
+ total_model_calls += 1
876
+ total_tool_calls += len(step.tool_calls)
877
+ if step.cost:
878
+ total_in_cost += step.cost.input_cost_usd
879
+ total_out_cost += step.cost.output_cost_usd
880
+
881
+ # Include costs bubbled up from child agent runs.
882
+ child_in_cost = 0.0
883
+ child_out_cost = 0.0
884
+ for child_cost in self._child_run_costs:
885
+ child_in_cost += child_cost.input_cost_usd
886
+ child_out_cost += child_cost.output_cost_usd
887
+
888
+ total_in_cost += child_in_cost
889
+ total_out_cost += child_out_cost
890
+
891
+ total_token_usage = TokenUsage(
892
+ input_tokens=total_input,
893
+ output_tokens=total_output,
894
+ total_tokens=total_tokens,
895
+ )
896
+ total_cost = CostBreakdown(
897
+ input_cost_usd=total_in_cost,
898
+ output_cost_usd=total_out_cost,
899
+ total_cost_usd=total_in_cost + total_out_cost,
900
+ )
901
+
902
+ return AgentRunPayload(
903
+ agent_run_id=self.agent_run_id,
904
+ agent_name=self.agent_name,
905
+ trace_id=self.trace_id,
906
+ root_span_id=self.root_span_id,
907
+ total_steps=len(self._steps),
908
+ total_model_calls=total_model_calls,
909
+ total_tool_calls=total_tool_calls,
910
+ total_token_usage=total_token_usage,
911
+ total_cost=total_cost,
912
+ status=self.status,
913
+ start_time_unix_nano=self.start_ns,
914
+ end_time_unix_nano=end_ns,
915
+ duration_ms=duration_ms,
916
+ termination_reason=self.termination_reason,
917
+ )
918
+
919
+
920
+ class AgentRunContextManager:
921
+ """Context manager returned by :meth:`~spanforge._tracer.Tracer.agent_run`."""
922
+
923
+ def __init__(self, agent_name: str) -> None:
924
+ self._agent_name = agent_name
925
+ self._ctx: AgentRunContext | None = None
926
+
927
+ def __enter__(self) -> AgentRunContext:
928
+ self._ctx = AgentRunContext(
929
+ agent_name=self._agent_name,
930
+ agent_run_id=_span_id(),
931
+ trace_id=_trace_id(),
932
+ root_span_id=_span_id(),
933
+ start_ns=_now_ns(),
934
+ )
935
+ # Push onto the immutable run-stack tuple and save the reset token.
936
+ self._run_token: contextvars.Token[tuple[AgentRunContext, ...]] = _run_stack_var.set(
937
+ _run_stack() + (self._ctx,)
938
+ )
939
+ return self._ctx
940
+
941
+ def __exit__(
942
+ self,
943
+ exc_type: type[BaseException] | None,
944
+ exc_val: BaseException | None,
945
+ exc_tb: TracebackType | None,
946
+ ) -> bool:
947
+ assert self._ctx is not None
948
+
949
+ if exc_val is not None and self._ctx.status == "ok":
950
+ self._ctx.record_error(exc_val)
951
+ self._ctx.end()
952
+
953
+ # Restore the run-stack to its pre-enter state.
954
+ _run_stack_var.reset(self._run_token)
955
+
956
+ # Bubble this run's total cost up to the parent run (if any).
957
+ parent_stack = _run_stack()
958
+ if parent_stack:
959
+ parent_run = parent_stack[-1]
960
+ run_payload = self._ctx.to_agent_run_payload()
961
+ parent_run.record_child_run_cost(run_payload.total_cost)
962
+
963
+ _s = None
964
+ try:
965
+ from spanforge import _stream as _s # noqa: PLC0415
966
+ _s.emit_agent_run(self._ctx)
967
+ except Exception as exc:
968
+ if _s is not None:
969
+ _s._handle_export_error(exc)
970
+
971
+ return False
972
+
973
+ # ------------------------------------------------------------------
974
+ # Async context manager protocol
975
+ # ------------------------------------------------------------------
976
+
977
+ async def __aenter__(self) -> AgentRunContext:
978
+ """Async entry — identical to ``__enter__``."""
979
+ return self.__enter__()
980
+
981
+ async def __aexit__(
982
+ self,
983
+ exc_type: type[BaseException] | None,
984
+ exc_val: BaseException | None,
985
+ exc_tb: TracebackType | None,
986
+ ) -> bool:
987
+ """Async exit — identical to ``__exit__``."""
988
+ return self.__exit__(exc_type, exc_val, exc_tb)
989
+
990
+
991
+ # ---------------------------------------------------------------------------
992
+ # Helper: model name → ModelInfo
993
+ # ---------------------------------------------------------------------------
994
+
995
+
996
+ def _resolve_model_info(model_name: str) -> ModelInfo:
997
+ """Infer :class:`~spanforge.namespaces.trace.ModelInfo` from a model name string.
998
+
999
+ Uses prefix heuristics (``"claude-"`` → Anthropic, etc.) with
1000
+ :attr:`~spanforge.namespaces.trace.GenAISystem.OPENAI` as the fallback.
1001
+ """
1002
+ name_lower = model_name.lower()
1003
+ if name_lower.startswith("claude"):
1004
+ system = GenAISystem.ANTHROPIC
1005
+ elif name_lower.startswith("gemini"):
1006
+ system = GenAISystem.VERTEX_AI
1007
+ elif name_lower.startswith("command"):
1008
+ system = GenAISystem.COHERE
1009
+ elif name_lower.startswith("mistral") or name_lower.startswith("mixtral"):
1010
+ system = GenAISystem.MISTRAL_AI
1011
+ elif name_lower.startswith("llama") or name_lower.startswith("phi") or name_lower.startswith("qwen"): # noqa: E501
1012
+ system = GenAISystem.OLLAMA
1013
+ else:
1014
+ system = GenAISystem.OPENAI
1015
+ return ModelInfo(system=system, name=model_name)