spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,495 @@
1
+ """spanforge.export.datadog — Datadog trace/metric exporter.
2
+
3
+ Delivers SpanForge events to the **Datadog Agent** (trace intake on
4
+ ``/v0.3/traces``) and, when an API key is supplied, to the **Datadog Metrics
5
+ API** (``/api/v2/series``).
6
+
7
+ Transport
8
+ ---------
9
+ Uses :func:`urllib.request.urlopen` in a thread-pool executor so the event
10
+ loop is never blocked. No external dependencies are required — stdlib only.
11
+
12
+ Traces vs metrics
13
+ -----------------
14
+ * Every event that carries a ``trace_id`` is forwarded to the Agent's trace
15
+ intake as a Datadog APM span.
16
+ * Numeric fields enumerated in :data:`_METRIC_FIELDS` are forwarded to the
17
+ Datadog Metrics API (requires ``api_key``).
18
+
19
+ Usage::
20
+
21
+ from spanforge.export.datadog import DatadogExporter
22
+
23
+ exporter = DatadogExporter(
24
+ service="my-llm-app",
25
+ env="production",
26
+ api_key="dd-api-key",
27
+ )
28
+ await exporter.export(event)
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import asyncio
34
+ import ipaddress
35
+ import json
36
+ import secrets
37
+ import socket
38
+ import urllib.error
39
+ import urllib.parse
40
+ import urllib.request
41
+ from dataclasses import dataclass, field
42
+ from datetime import datetime, timezone
43
+ from typing import TYPE_CHECKING, Any
44
+
45
+ from spanforge.exceptions import ExportError
46
+
47
+ if TYPE_CHECKING:
48
+ from collections.abc import Sequence
49
+
50
+ from spanforge.event import Event
51
+
52
+ __all__ = [
53
+ "_METRIC_FIELDS",
54
+ "DatadogExporter",
55
+ "DatadogResourceAttributes",
56
+ ]
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # Metric fields extracted from event payloads
60
+ # ---------------------------------------------------------------------------
61
+
62
+ #: Payload keys that are surfaced as Datadog custom metrics when numeric.
63
+ _METRIC_FIELDS: frozenset[str] = frozenset(
64
+ {
65
+ "cost_usd",
66
+ "token_count",
67
+ "latency_ms",
68
+ "duration_ms",
69
+ "prompt_tokens",
70
+ "completion_tokens",
71
+ "total_tokens",
72
+ "input_tokens",
73
+ "output_tokens",
74
+ "cached_tokens",
75
+ "reasoning_tokens",
76
+ }
77
+ )
78
+
79
+ # ---------------------------------------------------------------------------
80
+ # Resource attributes
81
+ # ---------------------------------------------------------------------------
82
+
83
+
84
+ @dataclass(frozen=True)
85
+ class DatadogResourceAttributes:
86
+ """Datadog resource-level metadata emitted as ``key:value`` tags.
87
+
88
+ Args:
89
+ service: Datadog ``service`` tag value.
90
+ env: Datadog ``env`` tag value.
91
+ version: Datadog ``version`` tag value (default ``"0.0.0"``).
92
+ extra: Additional ``key:value`` tags to emit.
93
+ """
94
+
95
+ service: str
96
+ env: str
97
+ version: str = "0.0.0"
98
+ extra: dict[str, str] = field(default_factory=dict)
99
+
100
+ def to_tags(self) -> list[str]:
101
+ """Return a list of ``"key:value"`` tag strings."""
102
+ tags = [
103
+ f"service:{self.service}",
104
+ f"env:{self.env}",
105
+ f"version:{self.version}",
106
+ ]
107
+ for k, v in self.extra.items():
108
+ tags.append(f"{k}:{v}")
109
+ return tags
110
+
111
+
112
+ # ---------------------------------------------------------------------------
113
+ # Validation helpers
114
+ # ---------------------------------------------------------------------------
115
+
116
+
117
+ def _is_private_ip_literal(host: str) -> bool:
118
+ try:
119
+ addr = ipaddress.ip_address(host)
120
+ except ValueError:
121
+ return False
122
+ return addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_multicast
123
+
124
+
125
+ def _validate_http_url(url: str, param_name: str = "url", *, allow_private_addresses: bool = False) -> None: # noqa: E501
126
+ parsed = urllib.parse.urlparse(url)
127
+ if parsed.scheme not in {"http", "https"} or not parsed.netloc:
128
+ raise ValueError(
129
+ f"{param_name} must be a valid http:// or https:// URL; got {url!r}"
130
+ )
131
+ if not allow_private_addresses:
132
+ host = parsed.hostname or ""
133
+ if _is_private_ip_literal(host):
134
+ raise ValueError(
135
+ f"{param_name} resolves to a private/loopback/link-local IP address "
136
+ f"({host!r}). Set allow_private_addresses=True to permit this."
137
+ )
138
+ # DNS-based SSRF check — best-effort; DNS failure is non-fatal.
139
+ if host and not _is_private_ip_literal(host):
140
+ try:
141
+ resolved = socket.gethostbyname(host)
142
+ addr = ipaddress.ip_address(resolved)
143
+ if addr.is_private or addr.is_loopback or addr.is_link_local:
144
+ raise ValueError(
145
+ f"{param_name} hostname {host!r} resolves to a private/loopback/"
146
+ f"link-local address ({resolved}). "
147
+ "Set allow_private_addresses=True to permit this."
148
+ )
149
+ except OSError: # DNS failure — allow through
150
+ pass
151
+
152
+
153
+ def _validate_dd_site(dd_site: str) -> None:
154
+ """Raise *ValueError* if *dd_site* is not a plain hostname (no scheme, no spaces, has a dot)."""
155
+ if not dd_site:
156
+ raise ValueError("dd_site must be a non-empty hostname (e.g. 'datadoghq.com'), got empty string") # noqa: E501
157
+ if "/" in dd_site:
158
+ raise ValueError(
159
+ f"dd_site must be a plain hostname without a URL scheme or path; got {dd_site!r}"
160
+ )
161
+ if " " in dd_site:
162
+ raise ValueError(
163
+ f"dd_site must not contain spaces; got {dd_site!r}"
164
+ )
165
+ if "." not in dd_site:
166
+ raise ValueError(
167
+ f"dd_site must be a fully-qualified hostname with at least one dot; got {dd_site!r}"
168
+ )
169
+
170
+
171
+ # ---------------------------------------------------------------------------
172
+ # Timestamp helpers
173
+ # ---------------------------------------------------------------------------
174
+
175
+
176
+ def _iso_to_epoch_ns(ts: str) -> int:
177
+ """Convert an ISO-8601 timestamp string to nanoseconds since the Unix epoch."""
178
+ try:
179
+ dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
180
+ if dt.tzinfo is None:
181
+ dt = dt.replace(tzinfo=timezone.utc)
182
+ return int(dt.timestamp() * 1_000_000_000)
183
+ except ValueError as exc:
184
+ raise ExportError(
185
+ "datadog",
186
+ f"cannot parse event timestamp {ts!r}: {exc}",
187
+ ) from exc
188
+
189
+
190
+ def _iso_to_epoch_us(ts: str) -> int:
191
+ """Return microseconds since epoch (used for Datadog span start time)."""
192
+ return _iso_to_epoch_ns(ts) // 1_000
193
+
194
+
195
+ # ---------------------------------------------------------------------------
196
+ # Span helpers
197
+ # ---------------------------------------------------------------------------
198
+
199
+
200
+ def _make_span_id() -> int:
201
+ """Generate a random 64-bit span ID as an unsigned integer."""
202
+ return secrets.randbits(64)
203
+
204
+
205
+ def _trace_id_to_int(trace_id: str | None) -> int:
206
+ """Convert a hex trace-id string to an unsigned 64-bit integer (low 64 bits)."""
207
+ if not trace_id:
208
+ return _make_span_id()
209
+ try:
210
+ # Datadog uses 64-bit trace IDs; take the low 64 bits
211
+ return int(trace_id[-16:], 16)
212
+ except (ValueError, TypeError):
213
+ return _make_span_id()
214
+
215
+
216
+ def _span_id_to_int(span_id: str | None) -> int:
217
+ """Convert a hex span-id string to an unsigned 64-bit integer."""
218
+ if not span_id:
219
+ return _make_span_id()
220
+ try:
221
+ return int(span_id[-16:], 16)
222
+ except (ValueError, TypeError):
223
+ return _make_span_id()
224
+
225
+
226
+ # ---------------------------------------------------------------------------
227
+ # Main exporter
228
+ # ---------------------------------------------------------------------------
229
+
230
+
231
+ class DatadogExporter:
232
+ """Async exporter that sends SpanForge events to Datadog.
233
+
234
+ Events with a ``trace_id`` are forwarded to the Datadog Agent as APM
235
+ spans. Numeric fields listed in :data:`_METRIC_FIELDS` are sent as
236
+ custom metrics (requires ``api_key``).
237
+
238
+ Args:
239
+ service: Datadog ``service`` tag.
240
+ env: Datadog ``env`` tag.
241
+ agent_url: Datadog Agent base URL (default ``"http://localhost:8126"``).
242
+ api_key: Datadog API key for the Metrics API (optional).
243
+ dd_site: Datadog site hostname used for Metrics API
244
+ (e.g. ``"datadoghq.com"``). Required when ``api_key``
245
+ is provided and you want metrics to go to DD cloud.
246
+ timeout: Per-request timeout in seconds (default 10.0).
247
+
248
+ Raises:
249
+ ValueError: If any constructor argument fails validation.
250
+ """
251
+
252
+ def __init__( # noqa: PLR0913
253
+ self,
254
+ service: str,
255
+ env: str = "production",
256
+ *,
257
+ agent_url: str = "http://localhost:8126", # NOSONAR
258
+ api_key: str | None = None,
259
+ dd_site: str | None = None,
260
+ timeout: float = 10.0,
261
+ allow_private_addresses: bool = False,
262
+ ) -> None:
263
+ if not service:
264
+ raise ValueError("service must be a non-empty string")
265
+ if timeout <= 0:
266
+ raise ValueError("timeout must be positive")
267
+ _validate_http_url(agent_url, "agent_url", allow_private_addresses=allow_private_addresses)
268
+ if dd_site is not None:
269
+ _validate_dd_site(dd_site)
270
+
271
+ self._service = service
272
+ self._env = env
273
+ self._agent_url = agent_url.rstrip("/")
274
+ self._api_key: str | None = api_key
275
+ self._dd_site: str | None = dd_site
276
+ self._timeout = timeout
277
+ self._resource = DatadogResourceAttributes(service=service, env=env)
278
+
279
+ # ------------------------------------------------------------------
280
+ # Public conversion API
281
+ # ------------------------------------------------------------------
282
+
283
+ def to_dd_span(self, event: Event) -> dict[str, Any]:
284
+ """Convert a SpanForge :class:`~spanforge.event.Event` to a Datadog APM span dict.
285
+
286
+ Args:
287
+ event: The event to convert.
288
+
289
+ Returns:
290
+ A dict compatible with the Datadog Agent v0.3/traces payload.
291
+ """
292
+ start_ns = _iso_to_epoch_ns(event.timestamp)
293
+ duration_ns = int(event.payload.get("duration_ms", 0) * 1_000_000)
294
+
295
+ trace_id = _trace_id_to_int(event.trace_id)
296
+ span_id = _span_id_to_int(event.span_id)
297
+
298
+ meta: dict[str, str] = {
299
+ "llm.source": event.source,
300
+ "llm.event_type": str(event.event_type),
301
+ }
302
+ if event.org_id:
303
+ meta["llm.org_id"] = event.org_id
304
+ if event.team_id:
305
+ meta["llm.team_id"] = event.team_id
306
+ if event.actor_id:
307
+ meta["llm.actor_id"] = event.actor_id
308
+ if event.session_id:
309
+ meta["llm.session_id"] = event.session_id
310
+
311
+ # Surface tags — Tags is dict-like, use .get() not getattr
312
+ if event.tags:
313
+ for tag_field in ("env", "model", "region", "version"):
314
+ val = event.tags.get(tag_field, None)
315
+ if val:
316
+ meta[f"llm.tag.{tag_field}"] = str(val)
317
+
318
+ # Flatten top-level payload string fields into meta
319
+ for k, v in event.payload.items():
320
+ if isinstance(v, str):
321
+ meta[f"llm.{k}"] = v
322
+
323
+ return {
324
+ "name": str(event.event_type),
325
+ "service": self._service,
326
+ "resource": str(event.event_type),
327
+ "type": "custom",
328
+ "trace_id": trace_id,
329
+ "span_id": span_id,
330
+ "start": start_ns,
331
+ "duration": max(0, duration_ns),
332
+ "meta": meta,
333
+ "error": 0,
334
+ }
335
+
336
+ def to_dd_metric_series(self, event: Event) -> list[dict[str, Any]]:
337
+ """Extract numeric payload fields as Datadog metric series entries.
338
+
339
+ Only fields listed in :data:`_METRIC_FIELDS` with non-bool numeric
340
+ values are emitted. Returns an empty list if none qualify.
341
+
342
+ Args:
343
+ event: The event to inspect.
344
+
345
+ Returns:
346
+ A list of Datadog metric series dicts (may be empty).
347
+ """
348
+ series: list[dict[str, Any]] = []
349
+ ts_sec = _iso_to_epoch_ns(event.timestamp) // 1_000_000_000
350
+ tags = list(self._resource.to_tags())
351
+ if event.org_id:
352
+ tags.append(f"org:{event.org_id}")
353
+
354
+ for key, value in event.payload.items():
355
+ if key not in _METRIC_FIELDS:
356
+ continue
357
+ # Skip booleans — they satisfy isinstance(v, (int, float)) on Python
358
+ if isinstance(value, bool):
359
+ continue
360
+ if not isinstance(value, (int, float)):
361
+ continue
362
+ series.append(
363
+ {
364
+ "metric": f"llm.{key}",
365
+ "type": 0,
366
+ "points": [{"timestamp": ts_sec, "value": float(value)}],
367
+ "tags": tags,
368
+ }
369
+ )
370
+ return series
371
+
372
+ async def export(self, event: Event) -> None:
373
+ """Export a single event to Datadog.
374
+
375
+ Sends as an APM trace span when ``event.trace_id`` is set. Additionally
376
+ sends any numeric metric fields to the Metrics API if ``api_key`` is set.
377
+
378
+ Args:
379
+ event: The event to export.
380
+
381
+ Raises:
382
+ ExportError: On HTTP or network errors.
383
+ """
384
+ tasks = []
385
+
386
+ if event.trace_id:
387
+ tasks.append(self._send_traces([event]))
388
+
389
+ metric_series = self.to_dd_metric_series(event)
390
+ if metric_series and self._api_key:
391
+ tasks.append(self._send_metrics(metric_series))
392
+
393
+ if tasks:
394
+ await asyncio.gather(*tasks)
395
+
396
+ async def export_batch(self, events: Sequence[Event]) -> None:
397
+ """Export multiple events to Datadog in parallel.
398
+
399
+ Args:
400
+ events: Sequence of events to deliver.
401
+ """
402
+ if not events:
403
+ return
404
+
405
+ trace_events = [e for e in events if e.trace_id]
406
+ if trace_events:
407
+ await self._send_traces(trace_events)
408
+
409
+ if self._api_key:
410
+ all_series: list[dict[str, Any]] = []
411
+ for event in events:
412
+ all_series.extend(self.to_dd_metric_series(event))
413
+ if all_series:
414
+ await self._send_metrics(all_series)
415
+
416
+ # ------------------------------------------------------------------
417
+ # Internal HTTP helpers
418
+ # ------------------------------------------------------------------
419
+
420
+ async def _send_traces(self, events: Sequence[Event]) -> None:
421
+ """Send *events* to the Datadog Agent trace intake.
422
+
423
+ Args:
424
+ events: Events to convert to APM spans and send.
425
+
426
+ Raises:
427
+ ExportError: On HTTP or network errors.
428
+ """
429
+ spans = [self.to_dd_span(e) for e in events]
430
+ # Agent expects: [[span, span, ...]] — list of traces, each trace is a list of spans
431
+ payload = json.dumps([spans]).encode("utf-8")
432
+
433
+ url = f"{self._agent_url}/v0.3/traces"
434
+ headers = {
435
+ "Content-Type": "application/json",
436
+ "Datadog-Meta-Lang": "python",
437
+ }
438
+
439
+ await asyncio.get_event_loop().run_in_executor(None, lambda: self._do_post(url, payload, headers, "datadog-traces")) # noqa: E501
440
+
441
+ async def _send_metrics(self, series: list[dict[str, Any]]) -> None:
442
+ """Send *series* to the Datadog Metrics API.
443
+
444
+ Args:
445
+ series: List of metric series dicts.
446
+
447
+ Raises:
448
+ ExportError: On HTTP or network errors.
449
+ """
450
+ dd_site = self._dd_site or "datadoghq.com"
451
+ url = f"https://api.{dd_site}/api/v2/series"
452
+ payload = json.dumps({"series": series}).encode("utf-8")
453
+ headers = {
454
+ "Content-Type": "application/json",
455
+ "DD-API-KEY": self._api_key or "",
456
+ }
457
+ await asyncio.get_event_loop().run_in_executor(None, lambda: self._do_post(url, payload, headers, "datadog-metrics")) # noqa: E501
458
+
459
+ def _do_post(self, url: str, body: bytes, headers: dict[str, str], context: str) -> None:
460
+ """Perform a synchronous HTTP POST (called in executor).
461
+
462
+ Args:
463
+ url: Target URL.
464
+ body: Request body bytes.
465
+ headers: HTTP headers.
466
+ context: Human-readable context for error messages.
467
+
468
+ Raises:
469
+ ExportError: On HTTP or network failure.
470
+ EgressViolationError: If the endpoint is blocked by egress policy.
471
+ """
472
+ from spanforge.egress import check_egress # noqa: PLC0415
473
+
474
+ check_egress(url, backend="datadog")
475
+
476
+ req = urllib.request.Request(url=url, data=body, headers=headers, method="POST") # noqa: S310 # NOSONAR
477
+ try:
478
+ with urllib.request.urlopen(req, timeout=self._timeout) as resp: # noqa: S310 # NOSONAR
479
+ resp.read()
480
+ except urllib.error.HTTPError as exc:
481
+ raise ExportError(
482
+ "datadog", f"HTTP {exc.code} from {url}: {exc.reason}"
483
+ ) from exc
484
+ except OSError as exc:
485
+ raise ExportError("datadog", f"network error posting to {url}: {exc}") from exc
486
+
487
+ # ------------------------------------------------------------------
488
+ # dunder
489
+ # ------------------------------------------------------------------
490
+
491
+ def __repr__(self) -> str:
492
+ return (
493
+ f"DatadogExporter(service={self._service!r}, env={self._env!r}, "
494
+ f"agent_url={self._agent_url!r})"
495
+ )