spanforge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. spanforge/__init__.py +815 -0
  2. spanforge/_ansi.py +93 -0
  3. spanforge/_batch_exporter.py +409 -0
  4. spanforge/_cli.py +2094 -0
  5. spanforge/_cli_audit.py +639 -0
  6. spanforge/_cli_compliance.py +711 -0
  7. spanforge/_cli_cost.py +243 -0
  8. spanforge/_cli_ops.py +791 -0
  9. spanforge/_cli_phase11.py +356 -0
  10. spanforge/_hooks.py +337 -0
  11. spanforge/_server.py +1708 -0
  12. spanforge/_span.py +1036 -0
  13. spanforge/_store.py +288 -0
  14. spanforge/_stream.py +664 -0
  15. spanforge/_trace.py +335 -0
  16. spanforge/_tracer.py +254 -0
  17. spanforge/actor.py +141 -0
  18. spanforge/alerts.py +469 -0
  19. spanforge/auto.py +464 -0
  20. spanforge/baseline.py +335 -0
  21. spanforge/cache.py +635 -0
  22. spanforge/compliance.py +325 -0
  23. spanforge/config.py +532 -0
  24. spanforge/consent.py +228 -0
  25. spanforge/consumer.py +377 -0
  26. spanforge/core/__init__.py +5 -0
  27. spanforge/core/compliance_mapping.py +1254 -0
  28. spanforge/cost.py +600 -0
  29. spanforge/debug.py +548 -0
  30. spanforge/deprecations.py +205 -0
  31. spanforge/drift.py +482 -0
  32. spanforge/egress.py +58 -0
  33. spanforge/eval.py +648 -0
  34. spanforge/event.py +1064 -0
  35. spanforge/exceptions.py +240 -0
  36. spanforge/explain.py +178 -0
  37. spanforge/export/__init__.py +69 -0
  38. spanforge/export/append_only.py +337 -0
  39. spanforge/export/cloud.py +357 -0
  40. spanforge/export/datadog.py +497 -0
  41. spanforge/export/grafana.py +320 -0
  42. spanforge/export/jsonl.py +195 -0
  43. spanforge/export/openinference.py +158 -0
  44. spanforge/export/otel_bridge.py +294 -0
  45. spanforge/export/otlp.py +811 -0
  46. spanforge/export/otlp_bridge.py +233 -0
  47. spanforge/export/redis_backend.py +282 -0
  48. spanforge/export/siem_schema.py +98 -0
  49. spanforge/export/siem_splunk.py +264 -0
  50. spanforge/export/siem_syslog.py +212 -0
  51. spanforge/export/webhook.py +299 -0
  52. spanforge/exporters/__init__.py +30 -0
  53. spanforge/exporters/console.py +271 -0
  54. spanforge/exporters/jsonl.py +144 -0
  55. spanforge/exporters/sqlite.py +142 -0
  56. spanforge/gate.py +1150 -0
  57. spanforge/governance.py +181 -0
  58. spanforge/hitl.py +295 -0
  59. spanforge/http.py +187 -0
  60. spanforge/inspect.py +427 -0
  61. spanforge/integrations/__init__.py +45 -0
  62. spanforge/integrations/_pricing.py +280 -0
  63. spanforge/integrations/anthropic.py +388 -0
  64. spanforge/integrations/azure_openai.py +133 -0
  65. spanforge/integrations/bedrock.py +292 -0
  66. spanforge/integrations/crewai.py +251 -0
  67. spanforge/integrations/gemini.py +351 -0
  68. spanforge/integrations/groq.py +442 -0
  69. spanforge/integrations/langchain.py +349 -0
  70. spanforge/integrations/langgraph.py +306 -0
  71. spanforge/integrations/llamaindex.py +373 -0
  72. spanforge/integrations/ollama.py +287 -0
  73. spanforge/integrations/openai.py +368 -0
  74. spanforge/integrations/together.py +483 -0
  75. spanforge/io.py +214 -0
  76. spanforge/lint.py +322 -0
  77. spanforge/metrics.py +417 -0
  78. spanforge/metrics_export.py +343 -0
  79. spanforge/migrate.py +402 -0
  80. spanforge/model_registry.py +278 -0
  81. spanforge/models.py +389 -0
  82. spanforge/namespaces/__init__.py +254 -0
  83. spanforge/namespaces/audit.py +256 -0
  84. spanforge/namespaces/cache.py +237 -0
  85. spanforge/namespaces/chain.py +77 -0
  86. spanforge/namespaces/confidence.py +72 -0
  87. spanforge/namespaces/consent.py +92 -0
  88. spanforge/namespaces/cost.py +179 -0
  89. spanforge/namespaces/decision.py +143 -0
  90. spanforge/namespaces/diff.py +157 -0
  91. spanforge/namespaces/drift.py +80 -0
  92. spanforge/namespaces/eval_.py +251 -0
  93. spanforge/namespaces/feedback.py +241 -0
  94. spanforge/namespaces/fence.py +193 -0
  95. spanforge/namespaces/guard.py +105 -0
  96. spanforge/namespaces/hitl.py +91 -0
  97. spanforge/namespaces/latency.py +72 -0
  98. spanforge/namespaces/prompt.py +190 -0
  99. spanforge/namespaces/redact.py +173 -0
  100. spanforge/namespaces/retrieval.py +379 -0
  101. spanforge/namespaces/runtime_governance.py +494 -0
  102. spanforge/namespaces/template.py +208 -0
  103. spanforge/namespaces/tool_call.py +77 -0
  104. spanforge/namespaces/trace.py +1029 -0
  105. spanforge/normalizer.py +171 -0
  106. spanforge/plugins.py +82 -0
  107. spanforge/presidio_backend.py +349 -0
  108. spanforge/processor.py +258 -0
  109. spanforge/prompt_registry.py +418 -0
  110. spanforge/py.typed +0 -0
  111. spanforge/redact.py +914 -0
  112. spanforge/regression.py +192 -0
  113. spanforge/runtime_policy.py +159 -0
  114. spanforge/sampling.py +511 -0
  115. spanforge/schema.py +183 -0
  116. spanforge/schemas/v1.0/schema.json +170 -0
  117. spanforge/schemas/v2.0/schema.json +536 -0
  118. spanforge/sdk/__init__.py +625 -0
  119. spanforge/sdk/_base.py +584 -0
  120. spanforge/sdk/_base.pyi +71 -0
  121. spanforge/sdk/_exceptions.py +1096 -0
  122. spanforge/sdk/_types.py +2184 -0
  123. spanforge/sdk/alert.py +1514 -0
  124. spanforge/sdk/alert.pyi +56 -0
  125. spanforge/sdk/audit.py +1196 -0
  126. spanforge/sdk/audit.pyi +67 -0
  127. spanforge/sdk/cec.py +1215 -0
  128. spanforge/sdk/cec.pyi +37 -0
  129. spanforge/sdk/config.py +641 -0
  130. spanforge/sdk/config.pyi +55 -0
  131. spanforge/sdk/enterprise.py +714 -0
  132. spanforge/sdk/enterprise.pyi +79 -0
  133. spanforge/sdk/explain.py +170 -0
  134. spanforge/sdk/fallback.py +432 -0
  135. spanforge/sdk/feedback.py +351 -0
  136. spanforge/sdk/gate.py +874 -0
  137. spanforge/sdk/gate.pyi +51 -0
  138. spanforge/sdk/identity.py +2114 -0
  139. spanforge/sdk/identity.pyi +47 -0
  140. spanforge/sdk/lineage.py +175 -0
  141. spanforge/sdk/observe.py +1065 -0
  142. spanforge/sdk/observe.pyi +50 -0
  143. spanforge/sdk/operator.py +338 -0
  144. spanforge/sdk/pii.py +1473 -0
  145. spanforge/sdk/pii.pyi +119 -0
  146. spanforge/sdk/pipelines.py +458 -0
  147. spanforge/sdk/pipelines.pyi +39 -0
  148. spanforge/sdk/policy.py +930 -0
  149. spanforge/sdk/rag.py +594 -0
  150. spanforge/sdk/rbac.py +280 -0
  151. spanforge/sdk/registry.py +430 -0
  152. spanforge/sdk/registry.pyi +46 -0
  153. spanforge/sdk/scope.py +279 -0
  154. spanforge/sdk/secrets.py +293 -0
  155. spanforge/sdk/secrets.pyi +25 -0
  156. spanforge/sdk/security.py +560 -0
  157. spanforge/sdk/security.pyi +57 -0
  158. spanforge/sdk/trust.py +472 -0
  159. spanforge/sdk/trust.pyi +41 -0
  160. spanforge/secrets.py +799 -0
  161. spanforge/signing.py +1179 -0
  162. spanforge/stats.py +100 -0
  163. spanforge/stream.py +560 -0
  164. spanforge/testing.py +378 -0
  165. spanforge/testing_mocks.py +1052 -0
  166. spanforge/trace.py +199 -0
  167. spanforge/types.py +696 -0
  168. spanforge/ulid.py +300 -0
  169. spanforge/validate.py +379 -0
  170. spanforge-1.0.0.dist-info/METADATA +1509 -0
  171. spanforge-1.0.0.dist-info/RECORD +174 -0
  172. spanforge-1.0.0.dist-info/WHEEL +4 -0
  173. spanforge-1.0.0.dist-info/entry_points.txt +5 -0
  174. spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
@@ -0,0 +1,811 @@
1
+ """OTLP-compatible JSON exporter for spanforge events.
2
+
3
+ Produces OTLP/JSON payloads (spans *or* log records) that can be forwarded to
4
+ any OTLP collector (Datadog, Grafana Tempo, Honeycomb, Elastic, Splunk, …).
5
+
6
+ **No opentelemetry-sdk dependency** — this module builds the OTLP wire format
7
+ from the stdlib only. If you already have the OTel SDK installed you can pipe
8
+ the output through the SDK's exporters as a dict; the schema is 1-to-1.
9
+
10
+ Format selection
11
+ ----------------
12
+ * Event **with** ``trace_id`` → OTLP *span* (``resourceSpans``).
13
+ * Event **without** ``trace_id`` → OTLP *log record* (``resourceLogs``).
14
+
15
+ Performance
16
+ -----------
17
+ Serialisation of 500 events is well under 200 ms (target: < 200 ms) because
18
+ every field mapping is a pure Python dict operation with no I/O on the hot path.
19
+ Network I/O is isolated in :meth:`OTLPExporter._send` and runs in a thread-pool
20
+ executor so the event loop is never blocked.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import asyncio
26
+ import concurrent.futures
27
+ import contextvars
28
+ import hashlib
29
+ import ipaddress
30
+ import json
31
+ import socket
32
+ import urllib.error
33
+ import urllib.parse
34
+ import urllib.request
35
+ from dataclasses import dataclass, field
36
+ from datetime import datetime, timezone
37
+ from typing import TYPE_CHECKING, Any
38
+
39
+ from spanforge.exceptions import ExportError
40
+
41
+ if TYPE_CHECKING:
42
+ from collections.abc import Sequence
43
+
44
+ from spanforge.event import Event
45
+
46
+ __all__ = ["OTLPExporter", "ResourceAttributes", "extract_trace_context", "make_traceparent"]
47
+
48
+ # Scope name embedded in every OTLP payload (instrumentation scope).
49
+ _SCOPE_NAME = "spanforge"
50
+
51
+ # Hex-string lengths for W3C TraceContext IDs.
52
+ _TRACE_ID_HEX_LEN = 32
53
+ _SPAN_ID_HEX_LEN = 16
54
+
55
+ _FINISH_REASONS_KEY = "gen_ai.response.finish_reasons"
56
+ _TRACEPARENT_PARTS_COUNT = 4
57
+
58
+
59
+ def _is_private_ip_literal(host: str) -> bool:
60
+ """Return ``True`` if *host* is a private/loopback/link-local **literal** IP.
61
+
62
+ .. warning::
63
+ **SSRF limitation** — DNS hostnames are **not** resolved by this check.
64
+ A hostname such as ``"localhost"`` or ``"internal.corp"`` is *not*
65
+ blocked here. Only literal IPv4/IPv6 addresses are evaluated.
66
+ Use ``allow_private_endpoints=True`` in non-production environments when
67
+ targeting private endpoints by name.
68
+ """
69
+ try:
70
+ addr = ipaddress.ip_address(host)
71
+ except ValueError:
72
+ return False
73
+ return addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_multicast
74
+
75
+
76
+ def _validate_http_url(
77
+ url: str,
78
+ param_name: str = "url",
79
+ *,
80
+ allow_private_addresses: bool = False,
81
+ ) -> None:
82
+ """Raise *ValueError* if *url* is not a valid ``http://`` or ``https://`` URL."""
83
+ parsed = urllib.parse.urlparse(url)
84
+ if parsed.scheme not in {"http", "https"} or not parsed.netloc:
85
+ raise ValueError(f"{param_name} must be a valid http:// or https:// URL; got {url!r}")
86
+ if not allow_private_addresses:
87
+ host = parsed.hostname or ""
88
+ if _is_private_ip_literal(host):
89
+ raise ValueError(
90
+ f"{param_name} resolves to a private/loopback/link-local IP address "
91
+ f"({host!r}). Set allow_private_addresses=True to permit this."
92
+ )
93
+ # DNS-based SSRF check — best-effort; DNS failure is non-fatal.
94
+ if host and not _is_private_ip_literal(host):
95
+ try:
96
+ resolved = socket.gethostbyname(host)
97
+ addr = ipaddress.ip_address(resolved)
98
+ if addr.is_private or addr.is_loopback or addr.is_link_local:
99
+ raise ValueError(
100
+ f"{param_name} hostname {host!r} resolves to a private/loopback/"
101
+ f"link-local address ({resolved}). "
102
+ "Set allow_private_addresses=True to permit this."
103
+ )
104
+ except OSError: # DNS failure — allow through
105
+ pass
106
+
107
+
108
+ # ---------------------------------------------------------------------------
109
+ # Resource attributes
110
+ # ---------------------------------------------------------------------------
111
+
112
+
113
+ @dataclass(frozen=True)
114
+ class ResourceAttributes:
115
+ """OTel resource attributes attached to every exported payload.
116
+
117
+ Attributes:
118
+ service_name: Value for the ``service.name`` resource attr.
119
+ deployment_environment: Value for ``deployment.environment``.
120
+ extra: Additional arbitrary resource attributes.
121
+
122
+ Example::
123
+
124
+ res = ResourceAttributes(
125
+ service_name="my-service",
126
+ deployment_environment="staging",
127
+ extra={"k8s.namespace": "default"},
128
+ )
129
+ """
130
+
131
+ service_name: str
132
+ deployment_environment: str = "production"
133
+ extra: dict[str, str] = field(default_factory=dict)
134
+
135
+ def to_otlp(self) -> list[dict[str, Any]]:
136
+ """Return a list of OTLP ``KeyValue`` dicts for the resource."""
137
+ attrs: list[dict[str, Any]] = [
138
+ _kv("service.name", self.service_name),
139
+ # deployment.environment.name supersedes deployment.environment (semconv 1.21+)
140
+ _kv("deployment.environment.name", self.deployment_environment),
141
+ ]
142
+ for k, v in self.extra.items():
143
+ attrs.append(_kv(k, v))
144
+ return attrs
145
+
146
+
147
+ # ---------------------------------------------------------------------------
148
+ # OTLP wire-format helpers
149
+ # ---------------------------------------------------------------------------
150
+
151
+
152
+ def _kv(key: str, value: Any) -> dict[str, Any]:
153
+ """Build an OTLP ``{key, value}`` attribute dict."""
154
+ return {"key": key, "value": _otlp_value(value)}
155
+
156
+
157
+ def _otlp_value(v: Any) -> dict[str, Any]:
158
+ """Wrap a Python scalar in the appropriate OTLP ``AnyValue`` dict."""
159
+ if isinstance(v, bool):
160
+ return {"boolValue": v}
161
+ if isinstance(v, int):
162
+ # OTLP int64 is encoded as a JSON string to preserve precision.
163
+ return {"intValue": str(v)}
164
+ if isinstance(v, float):
165
+ return {"doubleValue": v}
166
+ return {"stringValue": str(v)}
167
+
168
+
169
+ def _ts_to_unix_nano(ts: str) -> int:
170
+ """Convert an ISO-8601 UTC timestamp string to nanoseconds since epoch.
171
+
172
+ Supports both ``Z`` and ``+00:00`` suffixes. Microsecond precision is
173
+ preserved; fractional nanoseconds are truncated.
174
+
175
+ Args:
176
+ ts: ISO-8601 UTC string, e.g. ``"2024-01-15T12:34:56.789012Z"``.
177
+
178
+ Returns:
179
+ Integer nanoseconds since the Unix epoch.
180
+ """
181
+ normalised = ts.replace("Z", "+00:00")
182
+ dt = datetime.fromisoformat(normalised)
183
+ if dt.tzinfo is None:
184
+ dt = dt.replace(tzinfo=timezone.utc)
185
+ epoch = datetime(1970, 1, 1, tzinfo=timezone.utc)
186
+ delta = dt - epoch
187
+ # total_seconds() gives float with microsecond resolution; scale to ns.
188
+ return int(delta.total_seconds() * 1_000_000_000)
189
+
190
+
191
+ def _derive_span_id(event_id: str) -> str:
192
+ """Derive a valid 16-hex-char span ID from a ULID event ID.
193
+
194
+ Used as a fallback when the event carries no explicit ``span_id``.
195
+ The derivation is deterministic so the same event always maps to the
196
+ same synthetic span ID.
197
+
198
+ Args:
199
+ event_id: A 26-character ULID string.
200
+
201
+ Returns:
202
+ 16-character lower-case hex string.
203
+ """
204
+ return hashlib.sha256(event_id.encode("utf-8")).hexdigest()[:16]
205
+
206
+
207
+ # ---------------------------------------------------------------------------
208
+ # OpenTelemetry semantic convention helpers
209
+ # ---------------------------------------------------------------------------
210
+
211
+ # OTLP StatusCode integers (google.rpc.Status / OTLP spec).
212
+ _STATUS_CODE_OK = 1
213
+ _STATUS_CODE_ERROR = 2
214
+
215
+
216
+ def _gen_ai_model_attrs(model: dict[str, Any], attrs: list[dict[str, Any]]) -> None:
217
+ """Append gen_ai.system / gen_ai.request.model attrs from model dict."""
218
+ provider = model.get("provider")
219
+ if provider:
220
+ attrs.append(_kv("gen_ai.system", str(provider)))
221
+ name = model.get("name")
222
+ if name:
223
+ attrs.append(_kv("gen_ai.request.model", str(name)))
224
+ version = model.get("version")
225
+ if version:
226
+ attrs.append(_kv("gen_ai.request.model_version", str(version)))
227
+
228
+
229
+ def _gen_ai_token_attrs(token_usage: dict[str, Any], attrs: list[dict[str, Any]]) -> None:
230
+ """Append gen_ai usage token attrs from token_usage dict."""
231
+ prompt_tokens = token_usage.get("prompt_tokens")
232
+ if prompt_tokens is not None:
233
+ attrs.append(_kv("gen_ai.usage.input_tokens", int(prompt_tokens)))
234
+ completion_tokens = token_usage.get("completion_tokens")
235
+ if completion_tokens is not None:
236
+ attrs.append(_kv("gen_ai.usage.output_tokens", int(completion_tokens)))
237
+
238
+
239
+ def _gen_ai_attributes(event: Event) -> list[dict[str, Any]]:
240
+ """Build ``gen_ai.*`` OpenTelemetry GenAI semantic convention attributes.
241
+
242
+ Maps model info, token usage, and operation metadata from the event payload
243
+ to the standard OTel GenAI semconv namespace (semconv 1.27+).
244
+
245
+ See: https://opentelemetry.io/docs/specs/semconv/gen-ai/
246
+
247
+ Args:
248
+ event: The event whose payload is inspected.
249
+
250
+ Returns:
251
+ A (possibly empty) list of OTLP ``KeyValue`` dicts.
252
+ """
253
+ attrs: list[dict[str, Any]] = []
254
+ payload = event.payload
255
+
256
+ span_name = payload.get("span_name")
257
+ if span_name:
258
+ attrs.append(_kv("gen_ai.operation.name", str(span_name)))
259
+
260
+ model = payload.get("model")
261
+ if isinstance(model, dict):
262
+ _gen_ai_model_attrs(model, attrs)
263
+
264
+ token_usage = payload.get("token_usage")
265
+ if isinstance(token_usage, dict):
266
+ _gen_ai_token_attrs(token_usage, attrs)
267
+
268
+ status = payload.get("status")
269
+ error = payload.get("error")
270
+ if status == "error" or error:
271
+ attrs.append(_kv(_FINISH_REASONS_KEY, "error"))
272
+ elif status == "timeout":
273
+ attrs.append(_kv(_FINISH_REASONS_KEY, "timeout"))
274
+ elif status == "ok":
275
+ attrs.append(_kv(_FINISH_REASONS_KEY, "stop"))
276
+
277
+ return attrs
278
+
279
+
280
+ def _map_span_status(event: Event) -> dict[str, Any]:
281
+ """Map event payload ``status`` to an OTLP ``SpanStatus`` dict.
282
+
283
+ ``"error"`` and ``"timeout"`` outcomes yield ``STATUS_CODE_ERROR`` (2).
284
+ Everything else yields ``STATUS_CODE_OK`` (1). An error message is
285
+ included when the payload carries an ``error`` field.
286
+
287
+ Args:
288
+ event: The event carrying a ``status`` and optional ``error`` field.
289
+
290
+ Returns:
291
+ An OTLP ``{code, [message]}`` status dict.
292
+ """
293
+ payload = event.payload
294
+ status = payload.get("status", "ok")
295
+ if status in ("error", "timeout"):
296
+ result: dict[str, Any] = {"code": _STATUS_CODE_ERROR}
297
+ error_msg = payload.get("error")
298
+ if error_msg:
299
+ result["message"] = str(error_msg)
300
+ elif status == "timeout":
301
+ result["message"] = "Operation timed out"
302
+ return result
303
+ return {"code": _STATUS_CODE_OK}
304
+
305
+
306
+ def _compute_end_nano(start_nano: int, event: Event) -> int:
307
+ """Compute ``endTimeUnixNano`` from start time plus payload ``duration_ms``.
308
+
309
+ If ``duration_ms`` is absent or cannot be parsed, falls back to
310
+ ``start_nano`` (zero-duration span — should only happen for events without
311
+ timing information such as ``span.started`` events).
312
+
313
+ Args:
314
+ start_nano: Span start time in nanoseconds since Unix epoch.
315
+ event: Event that may carry a ``duration_ms`` payload field.
316
+
317
+ Returns:
318
+ End time in nanoseconds since Unix epoch.
319
+ """
320
+ duration_ms = event.payload.get("duration_ms")
321
+ if duration_ms is not None:
322
+ try:
323
+ return start_nano + int(float(duration_ms) * 1_000_000)
324
+ except (TypeError, ValueError):
325
+ pass
326
+ return start_nano
327
+
328
+
329
+ def _flatten_payload(
330
+ payload: dict[str, Any],
331
+ prefix: str = "llm.payload",
332
+ ) -> list[dict[str, Any]]:
333
+ """Recursively flatten a nested dict to OTLP attribute key-value pairs.
334
+
335
+ Nested keys are joined with ``"."`` (dot notation).
336
+
337
+ Args:
338
+ payload: The dict to flatten.
339
+ prefix: Key prefix for all emitted attributes.
340
+
341
+ Returns:
342
+ A list of OTLP ``KeyValue`` dicts.
343
+ """
344
+ result: list[dict[str, Any]] = []
345
+ for k, v in payload.items():
346
+ full_key = f"{prefix}.{k}"
347
+ if isinstance(v, dict):
348
+ result.extend(_flatten_payload(v, full_key))
349
+ else:
350
+ result.append(_kv(full_key, v))
351
+ return result
352
+
353
+
354
+ def _event_to_attributes(event: Event) -> list[dict[str, Any]]:
355
+ """Build the full OTLP attribute list for an :class:`~spanforge.event.Event`.
356
+
357
+ Envelope metadata, identity, tags, integrity fields, and payload are all
358
+ mapped to well-known ``llm.*`` namespace attributes.
359
+ """
360
+ attrs: list[dict[str, Any]] = [
361
+ _kv("llm.schema_version", event.schema_version),
362
+ _kv("llm.event_id", event.event_id),
363
+ _kv("llm.event_type", event.event_type),
364
+ _kv("llm.source", event.source),
365
+ ]
366
+
367
+ # Identity fields
368
+ if event.org_id is not None:
369
+ attrs.append(_kv("llm.org_id", event.org_id))
370
+ if event.team_id is not None:
371
+ attrs.append(_kv("llm.team_id", event.team_id))
372
+ if event.actor_id is not None:
373
+ attrs.append(_kv("llm.actor_id", event.actor_id))
374
+ if event.session_id is not None:
375
+ attrs.append(_kv("llm.session_id", event.session_id))
376
+
377
+ # Tags
378
+ if event.tags is not None:
379
+ for tag_key, tag_val in event.tags.items():
380
+ attrs.append(_kv(f"llm.tag.{tag_key}", tag_val))
381
+
382
+ # Integrity / audit chain fields
383
+ if event.checksum is not None:
384
+ attrs.append(_kv("llm.checksum", event.checksum))
385
+ if event.signature is not None:
386
+ attrs.append(_kv("llm.signature", event.signature))
387
+ if event.prev_id is not None:
388
+ attrs.append(_kv("llm.prev_id", event.prev_id))
389
+
390
+ # Flatten payload fields into span/log attributes.
391
+ attrs.extend(_flatten_payload(dict(event.payload)))
392
+
393
+ # OpenTelemetry GenAI semantic conventions (semconv 1.27+)
394
+ # These sit alongside the llm.* namespace so both ecosystems work.
395
+ attrs.extend(_gen_ai_attributes(event))
396
+
397
+ return attrs
398
+
399
+
400
+ # ---------------------------------------------------------------------------
401
+ # OTLPExporter
402
+ # ---------------------------------------------------------------------------
403
+
404
+
405
+ class OTLPExporter:
406
+ """Async exporter that serialises spanforge events to the OTLP/JSON format.
407
+
408
+ Events that carry a ``trace_id`` are emitted as **OTLP spans**
409
+ (``resourceSpans``). Events without a ``trace_id`` are emitted as **OTLP
410
+ log records** (``resourceLogs``).
411
+
412
+ HTTP transport uses :func:`urllib.request.urlopen` inside a thread-pool
413
+ executor so the async event loop is never blocked.
414
+
415
+ Args:
416
+ endpoint: Full OTLP HTTP URL, e.g.
417
+ ``"http://otel-collector:4318/v1/traces"``.
418
+ headers: Optional extra HTTP request headers (e.g. API keys).
419
+ resource_attrs: :class:`ResourceAttributes` attached to every payload.
420
+ timeout: HTTP request timeout in seconds (default 5.0).
421
+ batch_size: Maximum events per :meth:`export_batch` call (default
422
+ 500). Larger batches are split automatically.
423
+
424
+ Example::
425
+
426
+ exporter = OTLPExporter(
427
+ endpoint="http://localhost:4318/v1/traces", # NOSONAR
428
+ resource_attrs=ResourceAttributes(service_name="llm-trace"),
429
+ )
430
+ await exporter.export(event)
431
+ """
432
+
433
+ def __init__(
434
+ self,
435
+ endpoint: str,
436
+ *,
437
+ headers: dict[str, str] | None = None,
438
+ resource_attrs: ResourceAttributes | None = None,
439
+ timeout: float = 5.0,
440
+ batch_size: int = 500,
441
+ allow_private_addresses: bool = False,
442
+ max_workers: int = 4,
443
+ ) -> None:
444
+ if not endpoint:
445
+ raise ValueError("endpoint must be a non-empty string")
446
+ _validate_http_url(endpoint, "endpoint", allow_private_addresses=allow_private_addresses)
447
+ if timeout <= 0:
448
+ raise ValueError("timeout must be positive")
449
+ if batch_size < 1:
450
+ raise ValueError("batch_size must be >= 1")
451
+ if max_workers < 1:
452
+ raise ValueError("max_workers must be >= 1")
453
+ self._endpoint = endpoint
454
+ self._headers: dict[str, str] = dict(headers) if headers else {}
455
+ self._resource_attrs: ResourceAttributes = resource_attrs or ResourceAttributes(
456
+ service_name="spanforge"
457
+ )
458
+ self._timeout = timeout
459
+ self._batch_size = batch_size
460
+ self._executor = concurrent.futures.ThreadPoolExecutor(
461
+ max_workers=max_workers,
462
+ thread_name_prefix="spanforge-otlp",
463
+ )
464
+
465
+ # ------------------------------------------------------------------
466
+ # Sync mapping API (pure, no I/O — safe to call in hot loops)
467
+ # ------------------------------------------------------------------
468
+
469
+ def to_otlp_span(self, event: Event) -> dict[str, Any]:
470
+ """Map a single event to an OTLP span dict.
471
+
472
+ If the event has no ``span_id``, a deterministic synthetic ID is derived
473
+ from the ``event_id``. If the event has no ``trace_id``, a zero-filled
474
+ placeholder is used (``"00…0"``).
475
+
476
+ Args:
477
+ event: The :class:`~spanforge.event.Event` to map.
478
+
479
+ Returns:
480
+ An OTLP-compatible span dict.
481
+ """
482
+ ts_nano = _ts_to_unix_nano(event.timestamp)
483
+ end_nano = _compute_end_nano(ts_nano, event)
484
+ span_id = event.span_id or _derive_span_id(event.event_id)
485
+ trace_id = event.trace_id or ("0" * 32)
486
+
487
+ span: dict[str, Any] = {
488
+ "traceId": trace_id,
489
+ "spanId": span_id,
490
+ "name": event.event_type,
491
+ # SPAN_KIND_CLIENT (3) — LLM calls are outgoing client requests.
492
+ "kind": 3,
493
+ "startTimeUnixNano": str(ts_nano),
494
+ "endTimeUnixNano": str(end_nano),
495
+ "attributes": _event_to_attributes(event),
496
+ "status": _map_span_status(event),
497
+ # Bit 0 set = sampled (W3C TraceContext §7.1.2)
498
+ "traceFlags": 1,
499
+ }
500
+ if event.parent_span_id is not None:
501
+ span["parentSpanId"] = event.parent_span_id
502
+
503
+ return span
504
+
505
+ def to_otlp_log(self, event: Event) -> dict[str, Any]:
506
+ """Map a single event to an OTLP log record dict.
507
+
508
+ Args:
509
+ event: The :class:`~spanforge.event.Event` to map.
510
+
511
+ Returns:
512
+ An OTLP-compatible log record dict.
513
+ """
514
+ ts_nano = _ts_to_unix_nano(event.timestamp)
515
+
516
+ record: dict[str, Any] = {
517
+ "timeUnixNano": str(ts_nano),
518
+ "observedTimeUnixNano": str(ts_nano),
519
+ "severityNumber": 9, # SEVERITY_NUMBER_INFO
520
+ "severityText": "INFO",
521
+ "body": {"stringValue": event.event_type},
522
+ "attributes": _event_to_attributes(event),
523
+ }
524
+ # Include tracing context even for log records if present.
525
+ if event.trace_id is not None:
526
+ record["traceId"] = event.trace_id
527
+ if event.span_id is not None:
528
+ record["spanId"] = event.span_id
529
+
530
+ return record
531
+
532
+ # ------------------------------------------------------------------
533
+ # Async export API
534
+ # ------------------------------------------------------------------
535
+
536
+ async def export(self, event: Event) -> dict[str, Any]:
537
+ """Export a single event as an OTLP payload and HTTP POST it.
538
+
539
+ Span vs log selection is automatic: events with a ``trace_id`` become
540
+ spans; all others become log records.
541
+
542
+ Args:
543
+ event: The event to export.
544
+
545
+ Returns:
546
+ The OTLP span or log record dict that was sent.
547
+
548
+ Raises:
549
+ ExportError: If the HTTP request fails.
550
+ """
551
+ if event.trace_id is not None:
552
+ record = self.to_otlp_span(event)
553
+ payload = self._wrap_spans([record])
554
+ else:
555
+ record = self.to_otlp_log(event)
556
+ payload = self._wrap_logs([record])
557
+
558
+ await self._send(payload)
559
+ return record
560
+
561
+ async def export_batch(self, events: Sequence[Event]) -> list[dict[str, Any]]:
562
+ """Export a sequence of events, batching spans and logs separately.
563
+
564
+ Spans and log records are split into two HTTP requests so each request
565
+ targets the correct OTLP endpoint format.
566
+
567
+ Args:
568
+ events: Sequence of events (at most :attr:`batch_size` per call;
569
+ larger sequences should be chunked by the caller).
570
+
571
+ Returns:
572
+ List of OTLP record dicts (spans first, then log records, in
573
+ original insertion order within each group).
574
+
575
+ Raises:
576
+ ExportError: If any HTTP request fails.
577
+ """
578
+ spans: list[dict[str, Any]] = []
579
+ logs: list[dict[str, Any]] = []
580
+ # Preserve per-type insertion order for the returned list.
581
+ records: list[dict[str, Any]] = []
582
+
583
+ for event in events:
584
+ if event.trace_id is not None:
585
+ r = self.to_otlp_span(event)
586
+ spans.append(r)
587
+ else:
588
+ r = self.to_otlp_log(event)
589
+ logs.append(r)
590
+ records.append(r)
591
+
592
+ if spans:
593
+ for i in range(0, len(spans), self._batch_size):
594
+ await self._send(self._wrap_spans(spans[i : i + self._batch_size]))
595
+ if logs:
596
+ for i in range(0, len(logs), self._batch_size):
597
+ await self._send(self._wrap_logs(logs[i : i + self._batch_size]))
598
+
599
+ return records
600
+
601
+ # ------------------------------------------------------------------
602
+ # OTLP envelope helpers
603
+ # ------------------------------------------------------------------
604
+
605
+ def _wrap_spans(self, spans: list[dict[str, Any]]) -> dict[str, Any]:
606
+ """Wrap span records in a ``resourceSpans`` OTLP envelope."""
607
+ return {
608
+ "resourceSpans": [
609
+ {
610
+ "resource": {"attributes": self._resource_attrs.to_otlp()},
611
+ "scopeSpans": [
612
+ {
613
+ "scope": {"name": _SCOPE_NAME},
614
+ "spans": spans,
615
+ }
616
+ ],
617
+ }
618
+ ]
619
+ }
620
+
621
+ def _wrap_logs(self, logs: list[dict[str, Any]]) -> dict[str, Any]:
622
+ """Wrap log records in a ``resourceLogs`` OTLP envelope."""
623
+ return {
624
+ "resourceLogs": [
625
+ {
626
+ "resource": {"attributes": self._resource_attrs.to_otlp()},
627
+ "scopeLogs": [
628
+ {
629
+ "scope": {"name": _SCOPE_NAME},
630
+ "logRecords": logs,
631
+ }
632
+ ],
633
+ }
634
+ ]
635
+ }
636
+
637
+ # ------------------------------------------------------------------
638
+ # HTTP transport (executor-based, non-blocking)
639
+ # ------------------------------------------------------------------
640
+
641
+ async def _send(self, payload: dict[str, Any]) -> None:
642
+ """Serialise *payload* to JSON and POST it to :attr:`_endpoint`.
643
+
644
+ Runs in a thread-pool executor so the async event loop is not blocked
645
+ during network I/O.
646
+
647
+ Args:
648
+ payload: A fully-built OTLP envelope dict.
649
+
650
+ Raises:
651
+ ExportError: On HTTP 4xx/5xx or network errors.
652
+ EgressViolationError: If the endpoint is blocked by egress policy.
653
+ """
654
+ from spanforge.egress import check_egress
655
+
656
+ check_egress(self._endpoint, backend="otlp")
657
+ body = json.dumps(payload, separators=(",", ":")).encode("utf-8")
658
+ request_headers = {"Content-Type": "application/json", **self._headers}
659
+ endpoint = self._endpoint
660
+ timeout = self._timeout
661
+
662
+ def _do_request() -> None:
663
+ req = urllib.request.Request( # NOSONAR
664
+ url=endpoint,
665
+ data=body,
666
+ headers=request_headers,
667
+ method="POST",
668
+ )
669
+ try:
670
+ with urllib.request.urlopen(req, timeout=timeout) as resp: # nosec B310
671
+ resp.read()
672
+ except urllib.error.HTTPError as exc:
673
+ raise ExportError(
674
+ "otlp",
675
+ f"HTTP {exc.code}: {exc.reason}",
676
+ ) from exc
677
+ except OSError as exc:
678
+ raise ExportError("otlp", str(exc)) from exc
679
+
680
+ loop = asyncio.get_running_loop()
681
+ # Propagate contextvars into the executor thread so active span/tracer
682
+ # context is visible to any code running inside _do_request.
683
+ ctx = contextvars.copy_context()
684
+ await loop.run_in_executor(self._executor, ctx.run, _do_request)
685
+
686
+ # ------------------------------------------------------------------
687
+ # Repr
688
+ # ------------------------------------------------------------------
689
+
690
+ def __repr__(self) -> str:
691
+ # Scrub credentials from endpoint URL before display (H6).
692
+ # urlparse fields are scheme/netloc/path/params/query/fragment;
693
+ # username & password are derived properties, not fields. Rebuild
694
+ # the netloc without any embedded user-info component.
695
+ parsed = urllib.parse.urlparse(self._endpoint)
696
+ host = parsed.hostname or ""
697
+ port = f":{parsed.port}" if parsed.port else ""
698
+ safe = parsed._replace(netloc=f"{host}{port}")
699
+ return (
700
+ f"OTLPExporter(endpoint={urllib.parse.urlunparse(safe)!r}, "
701
+ f"batch_size={self._batch_size!r})"
702
+ )
703
+
704
+
705
+ # ---------------------------------------------------------------------------
706
+ # W3C TraceContext utilities (RFC 9429)
707
+ # ---------------------------------------------------------------------------
708
+
709
+
710
+ def make_traceparent(
711
+ trace_id: str,
712
+ span_id: str,
713
+ *,
714
+ sampled: bool = True,
715
+ ) -> str:
716
+ """Build a W3C ``traceparent`` header value.
717
+
718
+ Produces a version-``00`` ``traceparent`` string that can be injected into
719
+ outgoing HTTP requests to propagate distributed trace context.
720
+
721
+ Args:
722
+ trace_id: 32 lowercase hex characters (OTel trace ID).
723
+ span_id: 16 lowercase hex characters (current span ID).
724
+ sampled: Whether the trace is sampled. Sets the ``sampled`` flag
725
+ (W3C TraceContext §7.1.2). Defaults to ``True``.
726
+
727
+ Returns:
728
+ A ``traceparent`` header value, e.g.
729
+ ``"00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"``.
730
+
731
+ Raises:
732
+ ValueError: If ``trace_id`` or ``span_id`` do not match the required
733
+ format.
734
+
735
+ Example::
736
+
737
+ headers["traceparent"] = make_traceparent(
738
+ event.trace_id, event.span_id
739
+ )
740
+ """
741
+ if len(trace_id) != _TRACE_ID_HEX_LEN or not all(c in "0123456789abcdef" for c in trace_id):
742
+ raise ValueError(f"trace_id must be 32 lowercase hex characters; got {trace_id!r}")
743
+ if len(span_id) != _SPAN_ID_HEX_LEN or not all(c in "0123456789abcdef" for c in span_id):
744
+ raise ValueError(f"span_id must be 16 lowercase hex characters; got {span_id!r}")
745
+ flags = "01" if sampled else "00"
746
+ return f"00-{trace_id}-{span_id}-{flags}"
747
+
748
+
749
+ def extract_trace_context(
750
+ headers: dict[str, str],
751
+ ) -> dict[str, Any] | None:
752
+ """Extract W3C TraceContext from a ``traceparent`` / ``tracestate`` header dict.
753
+
754
+ Parses the incoming ``traceparent`` header (case-insensitive key lookup)
755
+ and returns the extracted trace context. Returns ``None`` if the header
756
+ is absent or malformed.
757
+
758
+ Args:
759
+ headers: A dict of HTTP headers (keys are matched case-insensitively).
760
+
761
+ Returns:
762
+ A dict with keys ``trace_id``, ``span_id``, ``sampled`` (bool), and
763
+ optionally ``tracestate`` (str) if that header is present; or ``None``
764
+ if no valid ``traceparent`` was found.
765
+
766
+ Example::
767
+
768
+ ctx = extract_trace_context(request.headers)
769
+ if ctx:
770
+ event = Event(
771
+ event_type=...,
772
+ source=...,
773
+ payload=...,
774
+ trace_id=ctx["trace_id"],
775
+ parent_span_id=ctx["span_id"],
776
+ )
777
+ """
778
+ # Case-insensitive header lookup.
779
+ lower_headers = {k.lower(): v for k, v in headers.items()}
780
+ traceparent = lower_headers.get("traceparent")
781
+ if not traceparent:
782
+ return None
783
+
784
+ parts = traceparent.strip().split("-")
785
+ if len(parts) != _TRACEPARENT_PARTS_COUNT:
786
+ return None
787
+ version, trace_id, parent_span_id, trace_flags_hex = parts
788
+ # Only version 00 is supported (future versions may have more parts).
789
+ if version != "00":
790
+ return None
791
+ if len(trace_id) != _TRACE_ID_HEX_LEN or len(parent_span_id) != _SPAN_ID_HEX_LEN:
792
+ return None
793
+ if not all(c in "0123456789abcdef" for c in trace_id):
794
+ return None
795
+ if not all(c in "0123456789abcdef" for c in parent_span_id):
796
+ return None
797
+
798
+ try:
799
+ flags_int = int(trace_flags_hex, 16)
800
+ except ValueError:
801
+ return None
802
+
803
+ result: dict[str, Any] = {
804
+ "trace_id": trace_id,
805
+ "span_id": parent_span_id,
806
+ "sampled": bool(flags_int & 0x01),
807
+ }
808
+ tracestate = lower_headers.get("tracestate")
809
+ if tracestate:
810
+ result["tracestate"] = tracestate
811
+ return result