spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
spanforge/event.py ADDED
@@ -0,0 +1,1052 @@
1
+ """Core event envelope for spanforge v0.1.
2
+
3
+ Every event emitted by every tool in the LLM Developer Toolkit must conform to
4
+ the :class:`Event` class defined here. This is the canonical Python
5
+ representation of the JSON event envelope specified in the Enterprise Product
6
+ Specification §3.1.
7
+
8
+ Design goals
9
+ ------------
10
+ * **Zero external dependencies** — only :mod:`datetime`, :mod:`json`,
11
+ :mod:`hashlib`, and :mod:`re` from the standard library.
12
+ * **``__slots__``** on all hot-path classes for minimal heap allocation.
13
+ * **Deterministic serialisation** — the same :class:`Event` always produces
14
+ the same JSON string; critical for HMAC signing.
15
+ * **Typed validation** — every validation failure is a
16
+ :class:`~spanforge.exceptions.SchemaValidationError` with the field name,
17
+ received value, and a clear reason; never a bare :exc:`ValueError`.
18
+ * **Immutability after creation** — envelope fields are read-only via
19
+ properties; mutation is limited to the ``sign()`` method (Phase 3) which sets
20
+ ``checksum``, ``signature``, and ``prev_id``.
21
+
22
+ Serialisation contract
23
+ ----------------------
24
+ ``Event.to_json()`` produces canonical JSON with:
25
+
26
+ * Keys sorted alphabetically at every nesting level.
27
+ * ``None`` values **omitted** (reduces wire size; missing key == ``null``).
28
+ * :class:`datetime.datetime` values formatted as ``"YYYY-MM-DDTHH:MM:SS.ffffffZ"``.
29
+ * :class:`~spanforge.types.EventType` values serialised as their string value.
30
+ * :class:`Tags` serialised as a JSON object with sorted string keys.
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ import datetime
36
+ import hashlib
37
+ import json
38
+ import re
39
+ import sys
40
+ from types import MappingProxyType
41
+ from typing import TYPE_CHECKING, Any, Final
42
+
43
+ from spanforge.exceptions import (
44
+ DeserializationError,
45
+ EventTypeError,
46
+ SchemaValidationError,
47
+ SerializationError,
48
+ )
49
+ from spanforge.types import _EVENT_TYPE_RE, EventType, is_registered, validate_custom
50
+ from spanforge.ulid import generate as _generate_ulid
51
+ from spanforge.ulid import validate as _validate_ulid
52
+
53
+ if TYPE_CHECKING:
54
+ from collections.abc import ItemsView, KeysView, Mapping, ValuesView
55
+
56
+ __all__ = ["SCHEMA_VERSION", "Event", "Tags"]
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # Constants
60
+ # ---------------------------------------------------------------------------
61
+
62
+ SCHEMA_VERSION: Final[str] = "2.0"
63
+
64
+ #: Accepted schema versions for backward-compatibility (RFC-0001 §15.5).
65
+ _ACCEPTED_SCHEMA_VERSIONS: Final[frozenset[str]] = frozenset({"1.0", "2.0"})
66
+
67
+ _MUST_BE_STRING: Final[str] = "must be a string"
68
+
69
+ #: ``service-name@semver`` — e.g. ``my-agent@1.2.0`` or ``MyAgent@1.0.0``
70
+ #: RFC-0001 §5.1: first char letter, then letters/digits/._- ; ``@`` ; semver
71
+ _SOURCE_PATTERN: Final[re.Pattern[str]] = re.compile(
72
+ r"^[a-zA-Z][a-zA-Z0-9._\-]*@\d+\.\d+\.\d+(?:[.\-][a-zA-Z0-9.]+)?$"
73
+ )
74
+ #: ISO-8601 UTC datetime — EXACTLY 6 decimal places (RFC-0001 §6.1)
75
+ _TIMESTAMP_PATTERN: Final[re.Pattern[str]] = re.compile(
76
+ r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z$"
77
+ )
78
+ #: Schema version — accepts major.minor or major.minor.patch (+ optional prerelease)
79
+ _SEMVER_PATTERN: Final[re.Pattern[str]] = re.compile(
80
+ r"^\d+\.\d+(?:\.\d+)?(?:[.-][a-zA-Z0-9.]+)?$"
81
+ )
82
+ #: Trace ID — exactly 32 lowercase hex characters
83
+ _TRACE_ID_PATTERN: Final[re.Pattern[str]] = re.compile(r"^[0-9a-f]{32}$")
84
+ #: Span ID — exactly 16 lowercase hex characters
85
+ _SPAN_ID_PATTERN: Final[re.Pattern[str]] = re.compile(r"^[0-9a-f]{16}$")
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # Tags
90
+ # ---------------------------------------------------------------------------
91
+
92
+
93
+ class Tags:
94
+ """Immutable key-value tag container for :class:`Event`.
95
+
96
+ Tags are arbitrary string key→value pairs that enrich an event with
97
+ contextual metadata (e.g. ``env``, ``model``, ``region``).
98
+
99
+ All keys and values must be non-empty strings. The container is
100
+ immutable after construction to prevent accidental mutation of a live event.
101
+
102
+ Example::
103
+
104
+ tags = Tags(env="production", model="gpt-4o", region="us-east-1")
105
+ tags["env"] # "production"
106
+ "model" in tags # True
107
+ dict(tags) # {"env": "production", "model": "gpt-4o", ...}
108
+ """
109
+
110
+ __slots__ = ("_data",)
111
+
112
+ def __init__(self, **kwargs: str) -> None:
113
+ """Create a new :class:`Tags` instance.
114
+
115
+ Args:
116
+ **kwargs: Arbitrary string key=value pairs.
117
+
118
+ Raises:
119
+ SchemaValidationError: If any key or value is not a non-empty string.
120
+ """
121
+ for key, value in kwargs.items():
122
+ if not isinstance(key, str) or not key:
123
+ raise SchemaValidationError(
124
+ field=f"tags.{key!r}",
125
+ received=key,
126
+ reason="tag key must be a non-empty string",
127
+ )
128
+ if not isinstance(value, str) or not value:
129
+ raise SchemaValidationError(
130
+ field=f"tags.{key}",
131
+ received=value,
132
+ reason="tag value must be a non-empty string",
133
+ )
134
+ # Store as a sorted immutable snapshot.
135
+ object.__setattr__(self, "_data", dict(sorted(kwargs.items())))
136
+
137
+ # ------------------------------------------------------------------
138
+ # Read-only mapping interface
139
+ # ------------------------------------------------------------------
140
+
141
+ def __getitem__(self, key: str) -> str:
142
+ return self._data[key] # type: ignore[index]
143
+
144
+ def __contains__(self, key: object) -> bool:
145
+ return key in self._data
146
+
147
+ def __iter__(self): # type: ignore[override] # noqa: ANN204
148
+ return iter(self._data)
149
+
150
+ def __len__(self) -> int:
151
+ return len(self._data)
152
+
153
+ def __setattr__(self, name: str, value: object) -> None:
154
+ raise AttributeError("Tags is immutable; create a new instance instead")
155
+
156
+ def __eq__(self, other: object) -> bool:
157
+ if isinstance(other, Tags):
158
+ return self._data == other._data
159
+ if isinstance(other, dict):
160
+ return self._data == other
161
+ return NotImplemented
162
+
163
+ __hash__: None = None # Tags is unhashable (mutable-equivalent semantics)
164
+
165
+ def __repr__(self) -> str:
166
+ kv = ", ".join(f"{k}={v!r}" for k, v in self._data.items())
167
+ return f"Tags({kv})"
168
+
169
+ def get(self, key: str, default: str | None = None) -> str | None:
170
+ """Return the value for *key*, or *default* if not present."""
171
+ return self._data.get(key, default)
172
+
173
+ def keys(self) -> KeysView[str]: # type: ignore[override]
174
+ """Return tag keys."""
175
+ return self._data.keys()
176
+
177
+ def values(self) -> ValuesView[str]: # type: ignore[override]
178
+ """Return tag values."""
179
+ return self._data.values()
180
+
181
+ def items(self) -> ItemsView[str, str]: # type: ignore[override]
182
+ """Return (key, value) pairs."""
183
+ return self._data.items()
184
+
185
+ def to_dict(self) -> dict[str, str]:
186
+ """Return a plain :class:`dict` copy of the tags."""
187
+ return dict(self._data)
188
+
189
+
190
+ # ---------------------------------------------------------------------------
191
+ # Event
192
+ # ---------------------------------------------------------------------------
193
+
194
+
195
+ class Event:
196
+ """The canonical event envelope for the LLM Developer Toolkit.
197
+
198
+ Every tool in the ecosystem creates events that conform to this class.
199
+ The envelope is designed to map cleanly to OTLP spans/log records (Phase 4)
200
+ and to carry optional HMAC signing for audit integrity (Phase 3).
201
+
202
+ Quick start
203
+ -----------
204
+ ::
205
+
206
+ from spanforge import Event, EventType, Tags
207
+
208
+ event = Event(
209
+ event_type=EventType.TRACE_SPAN_COMPLETED,
210
+ source="llm-trace@0.3.1",
211
+ payload={"span_name": "run_agent", "status": "ok"},
212
+ tags=Tags(env="production", model="gpt-4o"),
213
+ )
214
+ event.validate()
215
+ json_str = event.to_json()
216
+
217
+ Required fields
218
+ ---------------
219
+ * ``schema_version`` — automatically set to ``"1.0"``
220
+ * ``event_id`` — auto-generated ULID if not supplied
221
+ * ``event_type`` — namespaced string or :class:`~spanforge.types.EventType`
222
+ * ``timestamp`` — UTC ISO-8601; auto-generated if not supplied
223
+ * ``source`` — ``"tool-name@semver"``
224
+ * ``payload`` — tool-specific data (non-empty dict)
225
+
226
+ All other fields are optional.
227
+
228
+ Thread safety
229
+ -------------
230
+ :class:`Event` instances are **not** thread-safe for concurrent mutation.
231
+ Create separate instances per thread/task.
232
+ """
233
+
234
+ __slots__ = (
235
+ "_actor_id",
236
+ # Integrity (mutated by sign() in Phase 3)
237
+ "_checksum",
238
+ "_event_id",
239
+ "_event_type",
240
+ # Context
241
+ "_org_id",
242
+ "_parent_span_id",
243
+ "_payload",
244
+ "_prev_id",
245
+ "_schema_version",
246
+ "_session_id",
247
+ "_signature",
248
+ "_source",
249
+ "_span_id",
250
+ # Tags
251
+ "_tags",
252
+ "_team_id",
253
+ "_timestamp",
254
+ # Tracing
255
+ "_trace_id",
256
+ # GA-05-D: Unknown fields preserved during deserialization
257
+ "_unknown_fields",
258
+ )
259
+
260
+ def __init__( # noqa: PLR0913 # NOSONAR
261
+ self,
262
+ *,
263
+ event_type: str | EventType,
264
+ source: str,
265
+ payload: dict[str, Any],
266
+ schema_version: str = SCHEMA_VERSION,
267
+ event_id: str | None = None,
268
+ timestamp: str | None = None,
269
+ trace_id: str | None = None,
270
+ span_id: str | None = None,
271
+ parent_span_id: str | None = None,
272
+ org_id: str | None = None,
273
+ team_id: str | None = None,
274
+ actor_id: str | None = None,
275
+ session_id: str | None = None,
276
+ tags: Tags | None = None,
277
+ checksum: str | None = None,
278
+ signature: str | None = None,
279
+ prev_id: str | None = None,
280
+ ) -> None:
281
+ """Create a new :class:`Event`.
282
+
283
+ Auto-generated fields
284
+ ---------------------
285
+ * ``event_id`` — a new ULID is generated if not provided.
286
+ * ``timestamp`` — current UTC time is used if not provided.
287
+
288
+ Args:
289
+ event_type: Namespaced event type (string or :class:`EventType`).
290
+ source: Emitting tool in ``"name@semver"`` format.
291
+ payload: Tool-specific event data (non-empty dict).
292
+ schema_version: Schema version string. Defaults to current ``"1.0"``.
293
+ event_id: ULID. Auto-generated if omitted.
294
+ timestamp: UTC ISO-8601 string. Set to ``utcnow()`` if omitted.
295
+ trace_id: 32-hex-char OpenTelemetry trace ID.
296
+ span_id: 16-hex-char OpenTelemetry span ID.
297
+ parent_span_id: 16-hex-char parent span ID.
298
+ org_id: Organisation identifier.
299
+ team_id: Team identifier.
300
+ actor_id: User or service-account identifier.
301
+ session_id: Session identifier grouping related events.
302
+ tags: :class:`Tags` instance with string metadata.
303
+ checksum: SHA-256 payload checksum (set by ``sign()``).
304
+ signature: HMAC-SHA256 chain signature (set by ``sign()``).
305
+ prev_id: ULID of previous event in audit chain (set by ``sign()``).
306
+
307
+ Raises:
308
+ SchemaValidationError: If any supplied field has an invalid type or
309
+ value. The exception carries :attr:`~SchemaValidationError.field`
310
+ and :attr:`~SchemaValidationError.reason`.
311
+ """
312
+ # --- Required fields -------------------------------------------
313
+ object.__setattr__(self, "_schema_version", schema_version)
314
+ object.__setattr__(
315
+ self, "_event_id", event_id if event_id is not None else _generate_ulid()
316
+ )
317
+ # .value gives the canonical string for EventType members; str() is
318
+ # unreliable across Python versions for mixed str+Enum types.
319
+ _et_value: str = (
320
+ event_type.value
321
+ if isinstance(event_type, EventType)
322
+ else str(event_type)
323
+ )
324
+ object.__setattr__(self, "_event_type", _et_value)
325
+ object.__setattr__(
326
+ self,
327
+ "_timestamp",
328
+ timestamp if timestamp is not None else _utcnow_iso(),
329
+ )
330
+ object.__setattr__(self, "_source", source)
331
+ # When the event is already signed (checksum set), freeze the payload
332
+ # so that any post-signing mutation raises TypeError immediately.
333
+ # For unsigned events, store a shallow dict copy to protect against
334
+ # external-reference mutations invalidating the payload at sign time.
335
+ # Guard: only convert when payload is a dict/Mapping — non-dict payloads
336
+ # are stored as-is so that validate() can report the type error cleanly.
337
+ if isinstance(payload, dict):
338
+ if checksum is not None:
339
+ object.__setattr__(self, "_payload", MappingProxyType(dict(payload)))
340
+ else:
341
+ object.__setattr__(self, "_payload", dict(payload))
342
+ elif isinstance(payload, MappingProxyType):
343
+ # Accept MappingProxyType directly (e.g. from to_dict round-trips)
344
+ object.__setattr__(self, "_payload", payload if checksum is not None else dict(payload))
345
+ else:
346
+ # Non-dict payload: store as-is; validate() will raise SchemaValidationError
347
+ object.__setattr__(self, "_payload", payload)
348
+
349
+ # --- Tracing ---------------------------------------------------
350
+ object.__setattr__(self, "_trace_id", trace_id)
351
+ object.__setattr__(self, "_span_id", span_id)
352
+ object.__setattr__(self, "_parent_span_id", parent_span_id)
353
+
354
+ # --- Context ---------------------------------------------------
355
+ object.__setattr__(self, "_org_id", org_id)
356
+ object.__setattr__(self, "_team_id", team_id)
357
+ object.__setattr__(self, "_actor_id", actor_id)
358
+ object.__setattr__(self, "_session_id", session_id)
359
+
360
+ # --- Tags / Integrity ------------------------------------------
361
+ object.__setattr__(self, "_tags", tags)
362
+ object.__setattr__(self, "_checksum", checksum)
363
+ object.__setattr__(self, "_signature", signature)
364
+ object.__setattr__(self, "_prev_id", prev_id)
365
+
366
+ # --- Unknown fields (GA-05-D: forward-compat) ---------------------
367
+ object.__setattr__(self, "_unknown_fields", {})
368
+
369
+ # ------------------------------------------------------------------
370
+ # Read-only properties
371
+ # ------------------------------------------------------------------
372
+
373
+ @property
374
+ def schema_version(self) -> str:
375
+ """Schema version string (SemVer)."""
376
+ return self._schema_version # type: ignore[return-value]
377
+
378
+ @property
379
+ def event_id(self) -> str:
380
+ """ULID event identifier."""
381
+ return self._event_id # type: ignore[return-value]
382
+
383
+ @property
384
+ def event_type(self) -> str:
385
+ """Namespaced event type string."""
386
+ return self._event_type # type: ignore[return-value]
387
+
388
+ @property
389
+ def timestamp(self) -> str:
390
+ """UTC ISO-8601 timestamp string."""
391
+ return self._timestamp # type: ignore[return-value]
392
+
393
+ @property
394
+ def source(self) -> str:
395
+ """Emitting tool in ``"name@semver"`` format."""
396
+ return self._source # type: ignore[return-value]
397
+
398
+ @property
399
+ def payload(self) -> Mapping[str, Any]:
400
+ """Tool-specific event payload.
401
+
402
+ Returns a read-only :class:`~types.MappingProxyType` view.
403
+ For signed events (where ``checksum`` is set) the internal store is
404
+ already a ``MappingProxyType``; any attempt to mutate via ``dict``
405
+ sub-access at the top level raises :exc:`TypeError` immediately.
406
+ """
407
+ p = self._payload
408
+ if isinstance(p, MappingProxyType):
409
+ return p # already frozen — return directly, no double-wrap
410
+ return MappingProxyType(p) # type: ignore[return-value]
411
+
412
+ @property
413
+ def trace_id(self) -> str | None:
414
+ """32-hex-char OpenTelemetry trace ID."""
415
+ return self._trace_id # type: ignore[return-value]
416
+
417
+ @property
418
+ def span_id(self) -> str | None:
419
+ """16-hex-char OpenTelemetry span ID."""
420
+ return self._span_id # type: ignore[return-value]
421
+
422
+ @property
423
+ def parent_span_id(self) -> str | None:
424
+ """16-hex-char parent span ID."""
425
+ return self._parent_span_id # type: ignore[return-value]
426
+
427
+ @property
428
+ def org_id(self) -> str | None:
429
+ """Organisation identifier."""
430
+ return self._org_id # type: ignore[return-value]
431
+
432
+ @property
433
+ def team_id(self) -> str | None:
434
+ """Team identifier."""
435
+ return self._team_id # type: ignore[return-value]
436
+
437
+ @property
438
+ def actor_id(self) -> str | None:
439
+ """User or service-account identifier."""
440
+ return self._actor_id # type: ignore[return-value]
441
+
442
+ @property
443
+ def session_id(self) -> str | None:
444
+ """Session identifier grouping related events."""
445
+ return self._session_id # type: ignore[return-value]
446
+
447
+ @property
448
+ def tags(self) -> Tags | None:
449
+ """Metadata tags."""
450
+ return self._tags # type: ignore[return-value]
451
+
452
+ @property
453
+ def checksum(self) -> str | None:
454
+ """SHA-256 payload checksum. Set by ``sign()``."""
455
+ return self._checksum # type: ignore[return-value]
456
+
457
+ @property
458
+ def signature(self) -> str | None:
459
+ """HMAC-SHA256 chain signature. Set by ``sign()``."""
460
+ return self._signature # type: ignore[return-value]
461
+
462
+ @property
463
+ def unknown_fields(self) -> dict[str, Any]:
464
+ """Fields present during deserialization that are not part of the known schema.
465
+
466
+ Returns a shallow copy to prevent mutation of the internal store.
467
+ """
468
+ return dict(self._unknown_fields) # type: ignore[arg-type]
469
+
470
+ @property
471
+ def prev_id(self) -> str | None:
472
+ """ULID of the preceding event in the audit chain. Set by ``sign()``."""
473
+ return self._prev_id # type: ignore[return-value]
474
+
475
+ # ------------------------------------------------------------------
476
+ # Equality & representation
477
+ # ------------------------------------------------------------------
478
+
479
+ def __eq__(self, other: object) -> bool:
480
+ if not isinstance(other, Event):
481
+ return NotImplemented
482
+ return self._event_id == other._event_id
483
+
484
+ def __hash__(self) -> int:
485
+ """Hash by event_id (ULID) — enables set/dict membership."""
486
+ return hash(self._event_id)
487
+
488
+ def __repr__(self) -> str:
489
+ return (
490
+ f"Event(event_id={self._event_id!r}, "
491
+ f"event_type={self._event_type!r}, "
492
+ f"source={self._source!r})"
493
+ )
494
+
495
+ # ------------------------------------------------------------------
496
+ # Validation
497
+ # ------------------------------------------------------------------
498
+
499
+ def validate(self) -> None:
500
+ """Validate all envelope fields against the schema specification.
501
+
502
+ This method performs deep validation of every field. Call it
503
+ immediately after constructing an event and before signing or
504
+ exporting.
505
+
506
+ Raises:
507
+ SchemaValidationError: On the first field that fails validation.
508
+ ``exc.field`` names the failing field;
509
+ ``exc.reason`` explains the constraint.
510
+
511
+ Example::
512
+
513
+ event.validate() # raises SchemaValidationError if invalid
514
+ """
515
+ _validate_schema_version(self._schema_version) # type: ignore[arg-type]
516
+ _validate_event_id(self._event_id) # type: ignore[arg-type]
517
+ _validate_event_type(self._event_type) # type: ignore[arg-type]
518
+ _validate_timestamp(self._timestamp) # type: ignore[arg-type]
519
+ _validate_source(self._source) # type: ignore[arg-type]
520
+ _validate_payload(self._payload) # type: ignore[arg-type]
521
+
522
+ # Optional tracing fields
523
+ if self._trace_id is not None:
524
+ _validate_hex_id("trace_id", self._trace_id, 32) # type: ignore[arg-type]
525
+ if self._span_id is not None:
526
+ _validate_hex_id("span_id", self._span_id, 16) # type: ignore[arg-type]
527
+ if self._parent_span_id is not None:
528
+ _validate_hex_id("parent_span_id", self._parent_span_id, 16) # type: ignore[arg-type]
529
+
530
+ # Optional context fields
531
+ for field_name, value in [
532
+ ("org_id", self._org_id),
533
+ ("team_id", self._team_id),
534
+ ("actor_id", self._actor_id),
535
+ ("session_id", self._session_id),
536
+ ]:
537
+ if value is not None:
538
+ _validate_string_id(field_name, value) # type: ignore[arg-type]
539
+
540
+ # Optional integrity fields
541
+ if self._prev_id is not None:
542
+ _validate_ulid_field("prev_id", self._prev_id) # type: ignore[arg-type]
543
+
544
+ # ------------------------------------------------------------------
545
+ # Serialisation
546
+ # ------------------------------------------------------------------
547
+
548
+ def to_dict(self, *, omit_none: bool = True) -> dict[str, Any]:
549
+ """Return a plain :class:`dict` representation.
550
+
551
+ The dictionary uses the same field names as the JSON wire format.
552
+ Suitable for passing to logging frameworks or other serialisation
553
+ layers.
554
+
555
+ Args:
556
+ omit_none: When ``True`` (default), fields with ``None`` values are
557
+ excluded. Set to ``False`` to include explicit ``null`` values.
558
+
559
+ Returns:
560
+ An ordered dict with string keys and JSON-serialisable values.
561
+ """
562
+ raw: dict[str, Any] = {
563
+ "schema_version": self._schema_version,
564
+ "event_id": self._event_id,
565
+ "event_type": self._event_type,
566
+ "timestamp": self._timestamp,
567
+ "source": self._source,
568
+ "payload": dict(self._payload) if isinstance(self._payload, (dict, MappingProxyType)) else self._payload,
569
+ "trace_id": self._trace_id,
570
+ "span_id": self._span_id,
571
+ "parent_span_id": self._parent_span_id,
572
+ "org_id": self._org_id,
573
+ "team_id": self._team_id,
574
+ "actor_id": self._actor_id,
575
+ "session_id": self._session_id,
576
+ "tags": self._tags.to_dict() if self._tags is not None else None,
577
+ "checksum": self._checksum,
578
+ "signature": self._signature,
579
+ "prev_id": self._prev_id,
580
+ }
581
+ # GA-05-D: round-trip unknown fields
582
+ if self._unknown_fields: # type: ignore[truthy-bool]
583
+ raw.update(self._unknown_fields) # type: ignore[arg-type]
584
+ if omit_none:
585
+ return {k: v for k, v in raw.items() if v is not None}
586
+ return raw
587
+
588
+ def to_json(self) -> str:
589
+ """Serialise to a canonical, deterministic JSON string.
590
+
591
+ Properties
592
+ ----------
593
+ * Keys are sorted alphabetically at every nesting level.
594
+ * ``None`` values are omitted (not serialised as ``null``).
595
+ * Uses compact separators — no whitespace.
596
+ * Guaranteed to be byte-for-byte identical for the same event on any
597
+ supported platform and Python version.
598
+
599
+ Returns:
600
+ A compact, canonical JSON string.
601
+
602
+ Raises:
603
+ SerializationError: If the payload contains a value that cannot
604
+ be serialised to JSON.
605
+
606
+ Example::
607
+
608
+ json_str = event.to_json()
609
+ assert json_str == event.to_json() # deterministic
610
+ """
611
+ try:
612
+ return json.dumps(
613
+ self.to_dict(),
614
+ sort_keys=True,
615
+ separators=(",", ":"),
616
+ default=_json_default,
617
+ ensure_ascii=False,
618
+ )
619
+ except (TypeError, ValueError, OverflowError) as exc:
620
+ raise SerializationError(
621
+ event_id=self._event_id, # type: ignore[arg-type]
622
+ reason=f"payload contains non-serialisable value: {exc}",
623
+ ) from exc
624
+
625
+ def payload_checksum(self) -> str:
626
+ """Compute SHA-256 of the canonical JSON of the payload.
627
+
628
+ Used internally by ``sign()`` (Phase 3). Safe to call at any time to
629
+ get the current payload digest.
630
+
631
+ Returns:
632
+ A hex-encoded SHA-256 digest prefixed with ``"sha256:"``.
633
+ """
634
+ canonical = json.dumps(
635
+ self._payload,
636
+ sort_keys=True,
637
+ separators=(",", ":"),
638
+ default=_json_default,
639
+ ensure_ascii=False,
640
+ )
641
+ digest = hashlib.sha256(canonical.encode("utf-8")).hexdigest()
642
+ return f"sha256:{digest}"
643
+
644
+ # ------------------------------------------------------------------
645
+ # Deserialisation
646
+ # ------------------------------------------------------------------
647
+
648
+ @classmethod
649
+ def from_dict(
650
+ cls,
651
+ data: dict[str, Any],
652
+ *,
653
+ max_size_bytes: int = 1_048_576,
654
+ max_payload_depth: int = 10,
655
+ max_tags: int = 50,
656
+ source_hint: str = "<dict>",
657
+ ) -> Event:
658
+ """Construct an :class:`Event` from a plain dictionary.
659
+
660
+ The dictionary shape matches the output of :meth:`to_dict`.
661
+
662
+ Args:
663
+ data: Dictionary with event fields.
664
+ max_size_bytes: Maximum serialised size in bytes (RFC §19.4).
665
+ Defaults to 1 MiB. Pass 0 to disable.
666
+ max_payload_depth: Maximum nesting depth of the payload object
667
+ (RFC §19.4). Defaults to 10.
668
+ max_tags: Maximum number of tag keys allowed (RFC §19.4).
669
+ Defaults to 50.
670
+ source_hint: Short label for error messages (e.g. a filename).
671
+
672
+ Returns:
673
+ A new :class:`Event` instance (not yet validated).
674
+
675
+ Raises:
676
+ DeserializationError: If a required field is missing or has an
677
+ unexpected type, or if any DoS limit is exceeded.
678
+
679
+ Example::
680
+
681
+ event = Event.from_dict(json.loads(raw_json))
682
+ event.validate()
683
+ """
684
+ _require_dict(data, source_hint)
685
+
686
+ # RFC §19.4 — DoS guards
687
+ if max_size_bytes > 0:
688
+ try:
689
+ _encoded = json.dumps(data, separators=(",", ":")).encode()
690
+ except (TypeError, ValueError):
691
+ _encoded = b""
692
+ if len(_encoded) > max_size_bytes:
693
+ raise DeserializationError(
694
+ reason=(
695
+ f"event exceeds max_size_bytes limit of {max_size_bytes} "
696
+ f"(got {len(_encoded)} bytes)"
697
+ ),
698
+ source_hint=source_hint,
699
+ )
700
+
701
+ if max_tags > 0:
702
+ tags_raw = data.get("tags")
703
+ if isinstance(tags_raw, dict) and len(tags_raw) > max_tags:
704
+ raise DeserializationError(
705
+ reason=(
706
+ f"event has {len(tags_raw)} tags, exceeding max_tags={max_tags} "
707
+ "(RFC §19.4)"
708
+ ),
709
+ source_hint=source_hint,
710
+ )
711
+
712
+ if max_payload_depth > 0:
713
+ payload_raw = data.get("payload")
714
+ if payload_raw is not None:
715
+ _check_nesting_depth(payload_raw, max_payload_depth, source_hint)
716
+
717
+ try:
718
+ tags_raw = data.get("tags")
719
+ tags: Tags | None = (
720
+ Tags(**dict(tags_raw.items()))
721
+ if tags_raw is not None
722
+ else None
723
+ )
724
+
725
+ _KNOWN_KEYS = {
726
+ "schema_version", "event_id", "event_type", "timestamp",
727
+ "source", "payload", "trace_id", "span_id",
728
+ "parent_span_id", "org_id", "team_id", "actor_id",
729
+ "session_id", "tags", "checksum", "signature", "prev_id",
730
+ }
731
+ _extra = {k: v for k, v in data.items() if k not in _KNOWN_KEYS}
732
+
733
+ evt = cls(
734
+ schema_version=_require_str(data, "schema_version", source_hint),
735
+ event_id=_require_str(data, "event_id", source_hint),
736
+ event_type=_require_str(data, "event_type", source_hint),
737
+ timestamp=_require_str(data, "timestamp", source_hint),
738
+ source=_require_str(data, "source", source_hint),
739
+ payload=_require_dict_field(data, "payload", source_hint),
740
+ trace_id=data.get("trace_id"),
741
+ span_id=data.get("span_id"),
742
+ parent_span_id=data.get("parent_span_id"),
743
+ org_id=data.get("org_id"),
744
+ team_id=data.get("team_id"),
745
+ actor_id=data.get("actor_id"),
746
+ session_id=data.get("session_id"),
747
+ tags=tags,
748
+ checksum=data.get("checksum"),
749
+ signature=data.get("signature"),
750
+ prev_id=data.get("prev_id"),
751
+ )
752
+ if _extra:
753
+ object.__setattr__(evt, "_unknown_fields", _extra)
754
+ return evt
755
+ except (KeyError, AttributeError) as exc:
756
+ raise DeserializationError(
757
+ reason=f"unexpected structure: {exc}",
758
+ source_hint=source_hint,
759
+ ) from exc
760
+ # Note: from_json delegates to from_dict, which handles _unknown_fields.
761
+
762
+ @classmethod
763
+ def from_json(
764
+ cls,
765
+ json_str: str,
766
+ *,
767
+ max_size_bytes: int = 1_048_576,
768
+ max_payload_depth: int = 10,
769
+ max_tags: int = 50,
770
+ source_hint: str = "<json>",
771
+ ) -> Event:
772
+ """Construct an :class:`Event` from a JSON string.
773
+
774
+ Args:
775
+ json_str: A JSON string in the format produced by :meth:`to_json`.
776
+ max_size_bytes: Maximum string size in UTF-8 bytes (RFC §19.4).
777
+ Defaults to 1 MiB. Pass 0 to disable.
778
+ max_payload_depth: Maximum nesting depth forwarded to :meth:`from_dict`.
779
+ max_tags: Maximum number of tag keys forwarded to :meth:`from_dict`.
780
+ source_hint: Short label for error messages.
781
+
782
+ Returns:
783
+ A new :class:`Event` instance (not yet validated).
784
+
785
+ Raises:
786
+ DeserializationError: If *json_str* is not valid JSON, is missing
787
+ required fields, or exceeds any DoS limit.
788
+
789
+ Example::
790
+
791
+ event = Event.from_json(raw_json_str)
792
+ event.validate()
793
+ """
794
+ # RFC §19.4 — byte-length check before parsing to prevent parse-bomb attacks.
795
+ if max_size_bytes > 0 and len(json_str.encode()) > max_size_bytes:
796
+ raise DeserializationError(
797
+ reason=(
798
+ f"JSON string exceeds max_size_bytes limit of {max_size_bytes} "
799
+ f"(got {len(json_str.encode())} bytes)"
800
+ ),
801
+ source_hint=source_hint,
802
+ )
803
+ try:
804
+ data: dict[str, Any] = json.loads(json_str)
805
+ except json.JSONDecodeError as exc:
806
+ raise DeserializationError(
807
+ reason=f"invalid JSON: {exc}",
808
+ source_hint=source_hint,
809
+ ) from exc
810
+ return cls.from_dict(
811
+ data,
812
+ max_size_bytes=0, # already checked above
813
+ max_payload_depth=max_payload_depth,
814
+ max_tags=max_tags,
815
+ source_hint=source_hint,
816
+ )
817
+
818
+
819
+ # ---------------------------------------------------------------------------
820
+ # Validation helpers (module-private)
821
+ # ---------------------------------------------------------------------------
822
+
823
+
824
+ def _check_nesting_depth(
825
+ obj: Any,
826
+ max_depth: int,
827
+ source_hint: str,
828
+ _current: int = 0,
829
+ ) -> None:
830
+ """Recursively check that *obj* does not exceed *max_depth* nesting levels.
831
+
832
+ Raises :exc:`~spanforge.exceptions.DeserializationError` if the depth
833
+ limit is exceeded. This guards against deeply nested JSON that could
834
+ cause stack overflows or excessive CPU use (RFC §19.4).
835
+ """
836
+ if _current >= max_depth:
837
+ raise DeserializationError(
838
+ reason=(
839
+ f"payload exceeds max nesting depth of {max_depth} levels "
840
+ "(RFC §19.4)"
841
+ ),
842
+ source_hint=source_hint,
843
+ )
844
+ if isinstance(obj, dict):
845
+ for v in obj.values():
846
+ _check_nesting_depth(v, max_depth, source_hint, _current + 1)
847
+ elif isinstance(obj, list):
848
+ for item in obj:
849
+ _check_nesting_depth(item, max_depth, source_hint, _current + 1)
850
+
851
+
852
+ def _validate_schema_version(value: str) -> None:
853
+ if not isinstance(value, str):
854
+ raise SchemaValidationError(
855
+ "schema_version", value, _MUST_BE_STRING
856
+ )
857
+ if value not in _ACCEPTED_SCHEMA_VERSIONS:
858
+ raise SchemaValidationError(
859
+ "schema_version",
860
+ value,
861
+ f"must be one of {sorted(_ACCEPTED_SCHEMA_VERSIONS)!r} (RFC-0001 §15.5)",
862
+ )
863
+
864
+
865
+ def _validate_event_id(value: str) -> None:
866
+ if not isinstance(value, str):
867
+ raise SchemaValidationError("event_id", value, _MUST_BE_STRING)
868
+ if not _validate_ulid(value):
869
+ raise SchemaValidationError(
870
+ "event_id",
871
+ value,
872
+ "must be a valid 26-character ULID (Crockford Base32)",
873
+ )
874
+
875
+
876
+ def _validate_event_type(value: str) -> None:
877
+ if not isinstance(value, str):
878
+ raise SchemaValidationError("event_type", value, _MUST_BE_STRING)
879
+ if not _EVENT_TYPE_RE.match(value):
880
+ raise SchemaValidationError(
881
+ "event_type",
882
+ value,
883
+ "must match 'llm.<ns>.<entity>.<action>' or 'x.<company>.<…>'",
884
+ )
885
+ if not is_registered(value):
886
+ try:
887
+ validate_custom(value)
888
+ except EventTypeError as exc:
889
+ raise SchemaValidationError(
890
+ "event_type",
891
+ value,
892
+ str(exc),
893
+ ) from exc
894
+
895
+
896
+ def _validate_timestamp(value: str) -> None:
897
+ if not isinstance(value, str):
898
+ raise SchemaValidationError("timestamp", value, _MUST_BE_STRING)
899
+ if not _TIMESTAMP_PATTERN.match(value):
900
+ raise SchemaValidationError(
901
+ "timestamp",
902
+ value,
903
+ "must be UTC ISO-8601 format: 'YYYY-MM-DDTHH:MM:SS[.ffffff]Z'",
904
+ )
905
+ # Further check that it is a real date/time
906
+ try:
907
+ _parse_timestamp(value)
908
+ except ValueError as exc:
909
+ raise SchemaValidationError(
910
+ "timestamp", value, f"not a valid date/time: {exc}"
911
+ ) from exc
912
+
913
+
914
+ def _validate_source(value: str) -> None:
915
+ if not isinstance(value, str):
916
+ raise SchemaValidationError("source", value, _MUST_BE_STRING)
917
+ if not _SOURCE_PATTERN.match(value):
918
+ raise SchemaValidationError(
919
+ "source",
920
+ value,
921
+ "must match 'tool-name@semver', e.g. 'llm-trace@0.3.1'",
922
+ )
923
+
924
+
925
+ def _validate_payload(value: object) -> None:
926
+ if not isinstance(value, (dict, MappingProxyType)):
927
+ raise SchemaValidationError(
928
+ "payload", value, "must be a non-empty dict"
929
+ )
930
+ if not value:
931
+ raise SchemaValidationError(
932
+ "payload", value, "must be a non-empty dict (empty dict is not allowed)"
933
+ )
934
+
935
+
936
+ def _validate_hex_id(field: str, value: str, expected_len: int) -> None:
937
+ if not isinstance(value, str):
938
+ raise SchemaValidationError(field, value, _MUST_BE_STRING)
939
+ pattern = _TRACE_ID_PATTERN if expected_len == 32 else _SPAN_ID_PATTERN # noqa: PLR2004
940
+ if not pattern.match(value):
941
+ raise SchemaValidationError(
942
+ field,
943
+ value,
944
+ f"must be exactly {expected_len} lowercase hex characters",
945
+ )
946
+
947
+
948
+ def _validate_string_id(field: str, value: str) -> None:
949
+ if not isinstance(value, str):
950
+ raise SchemaValidationError(field, value, _MUST_BE_STRING)
951
+ if not value:
952
+ raise SchemaValidationError(
953
+ field, value, "must be a non-empty string"
954
+ )
955
+
956
+
957
+ def _validate_ulid_field(field: str, value: str) -> None:
958
+ if not isinstance(value, str):
959
+ raise SchemaValidationError(field, value, _MUST_BE_STRING)
960
+ if not _validate_ulid(value):
961
+ raise SchemaValidationError(
962
+ field, value, "must be a valid 26-character ULID"
963
+ )
964
+
965
+
966
+ # ---------------------------------------------------------------------------
967
+ # Deserialisation helpers (module-private)
968
+ # ---------------------------------------------------------------------------
969
+
970
+
971
+ def _require_dict(data: object, source_hint: str) -> None:
972
+ if not isinstance(data, dict):
973
+ raise DeserializationError(
974
+ reason=f"expected a JSON object at top level, got {type(data).__name__}",
975
+ source_hint=source_hint,
976
+ )
977
+
978
+
979
+ def _require_str(data: dict[str, Any], key: str, source_hint: str) -> str:
980
+ value = data.get(key)
981
+ if value is None:
982
+ raise DeserializationError(
983
+ reason=f"required field '{key}' is missing",
984
+ source_hint=source_hint,
985
+ )
986
+ if not isinstance(value, str):
987
+ raise DeserializationError(
988
+ reason=f"field '{key}' must be a string, got {type(value).__name__}",
989
+ source_hint=source_hint,
990
+ )
991
+ return value
992
+
993
+
994
+ def _require_dict_field(
995
+ data: dict[str, Any], key: str, source_hint: str
996
+ ) -> dict[str, Any]:
997
+ value = data.get(key)
998
+ if value is None:
999
+ raise DeserializationError(
1000
+ reason=f"required field '{key}' is missing",
1001
+ source_hint=source_hint,
1002
+ )
1003
+ if not isinstance(value, dict):
1004
+ raise DeserializationError(
1005
+ reason=f"field '{key}' must be an object, got {type(value).__name__}",
1006
+ source_hint=source_hint,
1007
+ )
1008
+ return value # type: ignore[return-value]
1009
+
1010
+
1011
+ # ---------------------------------------------------------------------------
1012
+ # Serialisation helpers (module-private)
1013
+ # ---------------------------------------------------------------------------
1014
+
1015
+
1016
+ def _json_default(obj: object) -> object:
1017
+ """JSON serialiser fallback for non-standard types."""
1018
+ if isinstance(obj, datetime.datetime):
1019
+ return _datetime_to_iso(obj)
1020
+ if isinstance(obj, EventType):
1021
+ return obj.value
1022
+ raise TypeError(f"Object of type {type(obj).__name__!r} is not JSON serialisable")
1023
+
1024
+
1025
+ def _utcnow_iso() -> str:
1026
+ """Return the current UTC time as an ISO-8601 string."""
1027
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
1028
+ return _datetime_to_iso(now)
1029
+
1030
+
1031
+ def _datetime_to_iso(dt: datetime.datetime) -> str:
1032
+ """Format a :class:`datetime.datetime` as ``'YYYY-MM-DDTHH:MM:SS.ffffffZ'``."""
1033
+ if dt.tzinfo is None:
1034
+ # Assume UTC if naive
1035
+ dt = dt.replace(tzinfo=datetime.timezone.utc)
1036
+ # Normalise to UTC
1037
+ dt_utc = dt.astimezone(datetime.timezone.utc)
1038
+ return dt_utc.strftime("%Y-%m-%dT%H:%M:%S.%f") + "Z"
1039
+
1040
+
1041
+ def _parse_timestamp(value: str) -> datetime.datetime:
1042
+ """Parse an ISO-8601 UTC timestamp string."""
1043
+ # Python < 3.11 does not support fromisoformat with trailing 'Z'
1044
+ if value.endswith("Z"):
1045
+ value = value[:-1] + "+00:00"
1046
+ if sys.version_info >= (3, 11):
1047
+ return datetime.datetime.fromisoformat(value)
1048
+ # Fallback for Python 3.9 / 3.10 # pragma: no cover
1049
+ try: # pragma: no cover
1050
+ return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f+00:00") # pragma: no cover
1051
+ except ValueError: # pragma: no cover
1052
+ return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S+00:00") # pragma: no cover