spanforge 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +695 -0
- spanforge/_batch_exporter.py +322 -0
- spanforge/_cli.py +3081 -0
- spanforge/_hooks.py +340 -0
- spanforge/_server.py +953 -0
- spanforge/_span.py +1015 -0
- spanforge/_store.py +287 -0
- spanforge/_stream.py +654 -0
- spanforge/_trace.py +334 -0
- spanforge/_tracer.py +253 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +464 -0
- spanforge/auto.py +181 -0
- spanforge/baseline.py +336 -0
- spanforge/config.py +460 -0
- spanforge/consent.py +227 -0
- spanforge/consumer.py +379 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1060 -0
- spanforge/cost.py +597 -0
- spanforge/debug.py +514 -0
- spanforge/drift.py +488 -0
- spanforge/egress.py +63 -0
- spanforge/eval.py +575 -0
- spanforge/event.py +1052 -0
- spanforge/exceptions.py +246 -0
- spanforge/explain.py +181 -0
- spanforge/export/__init__.py +50 -0
- spanforge/export/append_only.py +342 -0
- spanforge/export/cloud.py +349 -0
- spanforge/export/datadog.py +495 -0
- spanforge/export/grafana.py +331 -0
- spanforge/export/jsonl.py +198 -0
- spanforge/export/otel_bridge.py +291 -0
- spanforge/export/otlp.py +817 -0
- spanforge/export/otlp_bridge.py +231 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/webhook.py +302 -0
- spanforge/exporters/__init__.py +29 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/hitl.py +297 -0
- spanforge/inspect.py +429 -0
- spanforge/integrations/__init__.py +39 -0
- spanforge/integrations/_pricing.py +277 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/bedrock.py +306 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +349 -0
- spanforge/integrations/groq.py +444 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/llamaindex.py +370 -0
- spanforge/integrations/ollama.py +286 -0
- spanforge/integrations/openai.py +370 -0
- spanforge/integrations/together.py +485 -0
- spanforge/metrics.py +393 -0
- spanforge/metrics_export.py +342 -0
- spanforge/migrate.py +278 -0
- spanforge/model_registry.py +282 -0
- spanforge/models.py +407 -0
- spanforge/namespaces/__init__.py +215 -0
- spanforge/namespaces/audit.py +253 -0
- spanforge/namespaces/cache.py +209 -0
- spanforge/namespaces/chain.py +74 -0
- spanforge/namespaces/confidence.py +69 -0
- spanforge/namespaces/consent.py +85 -0
- spanforge/namespaces/cost.py +175 -0
- spanforge/namespaces/decision.py +135 -0
- spanforge/namespaces/diff.py +146 -0
- spanforge/namespaces/drift.py +79 -0
- spanforge/namespaces/eval_.py +232 -0
- spanforge/namespaces/fence.py +180 -0
- spanforge/namespaces/guard.py +104 -0
- spanforge/namespaces/hitl.py +92 -0
- spanforge/namespaces/latency.py +69 -0
- spanforge/namespaces/prompt.py +185 -0
- spanforge/namespaces/redact.py +172 -0
- spanforge/namespaces/template.py +197 -0
- spanforge/namespaces/tool_call.py +76 -0
- spanforge/namespaces/trace.py +1006 -0
- spanforge/normalizer.py +183 -0
- spanforge/presidio_backend.py +149 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +415 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +780 -0
- spanforge/sampling.py +500 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/signing.py +1152 -0
- spanforge/stream.py +559 -0
- spanforge/testing.py +376 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +304 -0
- spanforge/validate.py +383 -0
- spanforge-2.0.0.dist-info/METADATA +1777 -0
- spanforge-2.0.0.dist-info/RECORD +101 -0
- spanforge-2.0.0.dist-info/WHEEL +4 -0
- spanforge-2.0.0.dist-info/entry_points.txt +5 -0
- spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
spanforge/event.py
ADDED
|
@@ -0,0 +1,1052 @@
|
|
|
1
|
+
"""Core event envelope for spanforge v0.1.
|
|
2
|
+
|
|
3
|
+
Every event emitted by every tool in the LLM Developer Toolkit must conform to
|
|
4
|
+
the :class:`Event` class defined here. This is the canonical Python
|
|
5
|
+
representation of the JSON event envelope specified in the Enterprise Product
|
|
6
|
+
Specification §3.1.
|
|
7
|
+
|
|
8
|
+
Design goals
|
|
9
|
+
------------
|
|
10
|
+
* **Zero external dependencies** — only :mod:`datetime`, :mod:`json`,
|
|
11
|
+
:mod:`hashlib`, and :mod:`re` from the standard library.
|
|
12
|
+
* **``__slots__``** on all hot-path classes for minimal heap allocation.
|
|
13
|
+
* **Deterministic serialisation** — the same :class:`Event` always produces
|
|
14
|
+
the same JSON string; critical for HMAC signing.
|
|
15
|
+
* **Typed validation** — every validation failure is a
|
|
16
|
+
:class:`~spanforge.exceptions.SchemaValidationError` with the field name,
|
|
17
|
+
received value, and a clear reason; never a bare :exc:`ValueError`.
|
|
18
|
+
* **Immutability after creation** — envelope fields are read-only via
|
|
19
|
+
properties; mutation is limited to the ``sign()`` method (Phase 3) which sets
|
|
20
|
+
``checksum``, ``signature``, and ``prev_id``.
|
|
21
|
+
|
|
22
|
+
Serialisation contract
|
|
23
|
+
----------------------
|
|
24
|
+
``Event.to_json()`` produces canonical JSON with:
|
|
25
|
+
|
|
26
|
+
* Keys sorted alphabetically at every nesting level.
|
|
27
|
+
* ``None`` values **omitted** (reduces wire size; missing key == ``null``).
|
|
28
|
+
* :class:`datetime.datetime` values formatted as ``"YYYY-MM-DDTHH:MM:SS.ffffffZ"``.
|
|
29
|
+
* :class:`~spanforge.types.EventType` values serialised as their string value.
|
|
30
|
+
* :class:`Tags` serialised as a JSON object with sorted string keys.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import datetime
|
|
36
|
+
import hashlib
|
|
37
|
+
import json
|
|
38
|
+
import re
|
|
39
|
+
import sys
|
|
40
|
+
from types import MappingProxyType
|
|
41
|
+
from typing import TYPE_CHECKING, Any, Final
|
|
42
|
+
|
|
43
|
+
from spanforge.exceptions import (
|
|
44
|
+
DeserializationError,
|
|
45
|
+
EventTypeError,
|
|
46
|
+
SchemaValidationError,
|
|
47
|
+
SerializationError,
|
|
48
|
+
)
|
|
49
|
+
from spanforge.types import _EVENT_TYPE_RE, EventType, is_registered, validate_custom
|
|
50
|
+
from spanforge.ulid import generate as _generate_ulid
|
|
51
|
+
from spanforge.ulid import validate as _validate_ulid
|
|
52
|
+
|
|
53
|
+
if TYPE_CHECKING:
|
|
54
|
+
from collections.abc import ItemsView, KeysView, Mapping, ValuesView
|
|
55
|
+
|
|
56
|
+
__all__ = ["SCHEMA_VERSION", "Event", "Tags"]
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# Constants
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
SCHEMA_VERSION: Final[str] = "2.0"
|
|
63
|
+
|
|
64
|
+
#: Accepted schema versions for backward-compatibility (RFC-0001 §15.5).
|
|
65
|
+
_ACCEPTED_SCHEMA_VERSIONS: Final[frozenset[str]] = frozenset({"1.0", "2.0"})
|
|
66
|
+
|
|
67
|
+
_MUST_BE_STRING: Final[str] = "must be a string"
|
|
68
|
+
|
|
69
|
+
#: ``service-name@semver`` — e.g. ``my-agent@1.2.0`` or ``MyAgent@1.0.0``
|
|
70
|
+
#: RFC-0001 §5.1: first char letter, then letters/digits/._- ; ``@`` ; semver
|
|
71
|
+
_SOURCE_PATTERN: Final[re.Pattern[str]] = re.compile(
|
|
72
|
+
r"^[a-zA-Z][a-zA-Z0-9._\-]*@\d+\.\d+\.\d+(?:[.\-][a-zA-Z0-9.]+)?$"
|
|
73
|
+
)
|
|
74
|
+
#: ISO-8601 UTC datetime — EXACTLY 6 decimal places (RFC-0001 §6.1)
|
|
75
|
+
_TIMESTAMP_PATTERN: Final[re.Pattern[str]] = re.compile(
|
|
76
|
+
r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}Z$"
|
|
77
|
+
)
|
|
78
|
+
#: Schema version — accepts major.minor or major.minor.patch (+ optional prerelease)
|
|
79
|
+
_SEMVER_PATTERN: Final[re.Pattern[str]] = re.compile(
|
|
80
|
+
r"^\d+\.\d+(?:\.\d+)?(?:[.-][a-zA-Z0-9.]+)?$"
|
|
81
|
+
)
|
|
82
|
+
#: Trace ID — exactly 32 lowercase hex characters
|
|
83
|
+
_TRACE_ID_PATTERN: Final[re.Pattern[str]] = re.compile(r"^[0-9a-f]{32}$")
|
|
84
|
+
#: Span ID — exactly 16 lowercase hex characters
|
|
85
|
+
_SPAN_ID_PATTERN: Final[re.Pattern[str]] = re.compile(r"^[0-9a-f]{16}$")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
# Tags
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class Tags:
|
|
94
|
+
"""Immutable key-value tag container for :class:`Event`.
|
|
95
|
+
|
|
96
|
+
Tags are arbitrary string key→value pairs that enrich an event with
|
|
97
|
+
contextual metadata (e.g. ``env``, ``model``, ``region``).
|
|
98
|
+
|
|
99
|
+
All keys and values must be non-empty strings. The container is
|
|
100
|
+
immutable after construction to prevent accidental mutation of a live event.
|
|
101
|
+
|
|
102
|
+
Example::
|
|
103
|
+
|
|
104
|
+
tags = Tags(env="production", model="gpt-4o", region="us-east-1")
|
|
105
|
+
tags["env"] # "production"
|
|
106
|
+
"model" in tags # True
|
|
107
|
+
dict(tags) # {"env": "production", "model": "gpt-4o", ...}
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
__slots__ = ("_data",)
|
|
111
|
+
|
|
112
|
+
def __init__(self, **kwargs: str) -> None:
|
|
113
|
+
"""Create a new :class:`Tags` instance.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
**kwargs: Arbitrary string key=value pairs.
|
|
117
|
+
|
|
118
|
+
Raises:
|
|
119
|
+
SchemaValidationError: If any key or value is not a non-empty string.
|
|
120
|
+
"""
|
|
121
|
+
for key, value in kwargs.items():
|
|
122
|
+
if not isinstance(key, str) or not key:
|
|
123
|
+
raise SchemaValidationError(
|
|
124
|
+
field=f"tags.{key!r}",
|
|
125
|
+
received=key,
|
|
126
|
+
reason="tag key must be a non-empty string",
|
|
127
|
+
)
|
|
128
|
+
if not isinstance(value, str) or not value:
|
|
129
|
+
raise SchemaValidationError(
|
|
130
|
+
field=f"tags.{key}",
|
|
131
|
+
received=value,
|
|
132
|
+
reason="tag value must be a non-empty string",
|
|
133
|
+
)
|
|
134
|
+
# Store as a sorted immutable snapshot.
|
|
135
|
+
object.__setattr__(self, "_data", dict(sorted(kwargs.items())))
|
|
136
|
+
|
|
137
|
+
# ------------------------------------------------------------------
|
|
138
|
+
# Read-only mapping interface
|
|
139
|
+
# ------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
def __getitem__(self, key: str) -> str:
|
|
142
|
+
return self._data[key] # type: ignore[index]
|
|
143
|
+
|
|
144
|
+
def __contains__(self, key: object) -> bool:
|
|
145
|
+
return key in self._data
|
|
146
|
+
|
|
147
|
+
def __iter__(self): # type: ignore[override] # noqa: ANN204
|
|
148
|
+
return iter(self._data)
|
|
149
|
+
|
|
150
|
+
def __len__(self) -> int:
|
|
151
|
+
return len(self._data)
|
|
152
|
+
|
|
153
|
+
def __setattr__(self, name: str, value: object) -> None:
|
|
154
|
+
raise AttributeError("Tags is immutable; create a new instance instead")
|
|
155
|
+
|
|
156
|
+
def __eq__(self, other: object) -> bool:
|
|
157
|
+
if isinstance(other, Tags):
|
|
158
|
+
return self._data == other._data
|
|
159
|
+
if isinstance(other, dict):
|
|
160
|
+
return self._data == other
|
|
161
|
+
return NotImplemented
|
|
162
|
+
|
|
163
|
+
__hash__: None = None # Tags is unhashable (mutable-equivalent semantics)
|
|
164
|
+
|
|
165
|
+
def __repr__(self) -> str:
|
|
166
|
+
kv = ", ".join(f"{k}={v!r}" for k, v in self._data.items())
|
|
167
|
+
return f"Tags({kv})"
|
|
168
|
+
|
|
169
|
+
def get(self, key: str, default: str | None = None) -> str | None:
|
|
170
|
+
"""Return the value for *key*, or *default* if not present."""
|
|
171
|
+
return self._data.get(key, default)
|
|
172
|
+
|
|
173
|
+
def keys(self) -> KeysView[str]: # type: ignore[override]
|
|
174
|
+
"""Return tag keys."""
|
|
175
|
+
return self._data.keys()
|
|
176
|
+
|
|
177
|
+
def values(self) -> ValuesView[str]: # type: ignore[override]
|
|
178
|
+
"""Return tag values."""
|
|
179
|
+
return self._data.values()
|
|
180
|
+
|
|
181
|
+
def items(self) -> ItemsView[str, str]: # type: ignore[override]
|
|
182
|
+
"""Return (key, value) pairs."""
|
|
183
|
+
return self._data.items()
|
|
184
|
+
|
|
185
|
+
def to_dict(self) -> dict[str, str]:
|
|
186
|
+
"""Return a plain :class:`dict` copy of the tags."""
|
|
187
|
+
return dict(self._data)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# ---------------------------------------------------------------------------
|
|
191
|
+
# Event
|
|
192
|
+
# ---------------------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class Event:
|
|
196
|
+
"""The canonical event envelope for the LLM Developer Toolkit.
|
|
197
|
+
|
|
198
|
+
Every tool in the ecosystem creates events that conform to this class.
|
|
199
|
+
The envelope is designed to map cleanly to OTLP spans/log records (Phase 4)
|
|
200
|
+
and to carry optional HMAC signing for audit integrity (Phase 3).
|
|
201
|
+
|
|
202
|
+
Quick start
|
|
203
|
+
-----------
|
|
204
|
+
::
|
|
205
|
+
|
|
206
|
+
from spanforge import Event, EventType, Tags
|
|
207
|
+
|
|
208
|
+
event = Event(
|
|
209
|
+
event_type=EventType.TRACE_SPAN_COMPLETED,
|
|
210
|
+
source="llm-trace@0.3.1",
|
|
211
|
+
payload={"span_name": "run_agent", "status": "ok"},
|
|
212
|
+
tags=Tags(env="production", model="gpt-4o"),
|
|
213
|
+
)
|
|
214
|
+
event.validate()
|
|
215
|
+
json_str = event.to_json()
|
|
216
|
+
|
|
217
|
+
Required fields
|
|
218
|
+
---------------
|
|
219
|
+
* ``schema_version`` — automatically set to ``"1.0"``
|
|
220
|
+
* ``event_id`` — auto-generated ULID if not supplied
|
|
221
|
+
* ``event_type`` — namespaced string or :class:`~spanforge.types.EventType`
|
|
222
|
+
* ``timestamp`` — UTC ISO-8601; auto-generated if not supplied
|
|
223
|
+
* ``source`` — ``"tool-name@semver"``
|
|
224
|
+
* ``payload`` — tool-specific data (non-empty dict)
|
|
225
|
+
|
|
226
|
+
All other fields are optional.
|
|
227
|
+
|
|
228
|
+
Thread safety
|
|
229
|
+
-------------
|
|
230
|
+
:class:`Event` instances are **not** thread-safe for concurrent mutation.
|
|
231
|
+
Create separate instances per thread/task.
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
__slots__ = (
|
|
235
|
+
"_actor_id",
|
|
236
|
+
# Integrity (mutated by sign() in Phase 3)
|
|
237
|
+
"_checksum",
|
|
238
|
+
"_event_id",
|
|
239
|
+
"_event_type",
|
|
240
|
+
# Context
|
|
241
|
+
"_org_id",
|
|
242
|
+
"_parent_span_id",
|
|
243
|
+
"_payload",
|
|
244
|
+
"_prev_id",
|
|
245
|
+
"_schema_version",
|
|
246
|
+
"_session_id",
|
|
247
|
+
"_signature",
|
|
248
|
+
"_source",
|
|
249
|
+
"_span_id",
|
|
250
|
+
# Tags
|
|
251
|
+
"_tags",
|
|
252
|
+
"_team_id",
|
|
253
|
+
"_timestamp",
|
|
254
|
+
# Tracing
|
|
255
|
+
"_trace_id",
|
|
256
|
+
# GA-05-D: Unknown fields preserved during deserialization
|
|
257
|
+
"_unknown_fields",
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
def __init__( # noqa: PLR0913 # NOSONAR
|
|
261
|
+
self,
|
|
262
|
+
*,
|
|
263
|
+
event_type: str | EventType,
|
|
264
|
+
source: str,
|
|
265
|
+
payload: dict[str, Any],
|
|
266
|
+
schema_version: str = SCHEMA_VERSION,
|
|
267
|
+
event_id: str | None = None,
|
|
268
|
+
timestamp: str | None = None,
|
|
269
|
+
trace_id: str | None = None,
|
|
270
|
+
span_id: str | None = None,
|
|
271
|
+
parent_span_id: str | None = None,
|
|
272
|
+
org_id: str | None = None,
|
|
273
|
+
team_id: str | None = None,
|
|
274
|
+
actor_id: str | None = None,
|
|
275
|
+
session_id: str | None = None,
|
|
276
|
+
tags: Tags | None = None,
|
|
277
|
+
checksum: str | None = None,
|
|
278
|
+
signature: str | None = None,
|
|
279
|
+
prev_id: str | None = None,
|
|
280
|
+
) -> None:
|
|
281
|
+
"""Create a new :class:`Event`.
|
|
282
|
+
|
|
283
|
+
Auto-generated fields
|
|
284
|
+
---------------------
|
|
285
|
+
* ``event_id`` — a new ULID is generated if not provided.
|
|
286
|
+
* ``timestamp`` — current UTC time is used if not provided.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
event_type: Namespaced event type (string or :class:`EventType`).
|
|
290
|
+
source: Emitting tool in ``"name@semver"`` format.
|
|
291
|
+
payload: Tool-specific event data (non-empty dict).
|
|
292
|
+
schema_version: Schema version string. Defaults to current ``"1.0"``.
|
|
293
|
+
event_id: ULID. Auto-generated if omitted.
|
|
294
|
+
timestamp: UTC ISO-8601 string. Set to ``utcnow()`` if omitted.
|
|
295
|
+
trace_id: 32-hex-char OpenTelemetry trace ID.
|
|
296
|
+
span_id: 16-hex-char OpenTelemetry span ID.
|
|
297
|
+
parent_span_id: 16-hex-char parent span ID.
|
|
298
|
+
org_id: Organisation identifier.
|
|
299
|
+
team_id: Team identifier.
|
|
300
|
+
actor_id: User or service-account identifier.
|
|
301
|
+
session_id: Session identifier grouping related events.
|
|
302
|
+
tags: :class:`Tags` instance with string metadata.
|
|
303
|
+
checksum: SHA-256 payload checksum (set by ``sign()``).
|
|
304
|
+
signature: HMAC-SHA256 chain signature (set by ``sign()``).
|
|
305
|
+
prev_id: ULID of previous event in audit chain (set by ``sign()``).
|
|
306
|
+
|
|
307
|
+
Raises:
|
|
308
|
+
SchemaValidationError: If any supplied field has an invalid type or
|
|
309
|
+
value. The exception carries :attr:`~SchemaValidationError.field`
|
|
310
|
+
and :attr:`~SchemaValidationError.reason`.
|
|
311
|
+
"""
|
|
312
|
+
# --- Required fields -------------------------------------------
|
|
313
|
+
object.__setattr__(self, "_schema_version", schema_version)
|
|
314
|
+
object.__setattr__(
|
|
315
|
+
self, "_event_id", event_id if event_id is not None else _generate_ulid()
|
|
316
|
+
)
|
|
317
|
+
# .value gives the canonical string for EventType members; str() is
|
|
318
|
+
# unreliable across Python versions for mixed str+Enum types.
|
|
319
|
+
_et_value: str = (
|
|
320
|
+
event_type.value
|
|
321
|
+
if isinstance(event_type, EventType)
|
|
322
|
+
else str(event_type)
|
|
323
|
+
)
|
|
324
|
+
object.__setattr__(self, "_event_type", _et_value)
|
|
325
|
+
object.__setattr__(
|
|
326
|
+
self,
|
|
327
|
+
"_timestamp",
|
|
328
|
+
timestamp if timestamp is not None else _utcnow_iso(),
|
|
329
|
+
)
|
|
330
|
+
object.__setattr__(self, "_source", source)
|
|
331
|
+
# When the event is already signed (checksum set), freeze the payload
|
|
332
|
+
# so that any post-signing mutation raises TypeError immediately.
|
|
333
|
+
# For unsigned events, store a shallow dict copy to protect against
|
|
334
|
+
# external-reference mutations invalidating the payload at sign time.
|
|
335
|
+
# Guard: only convert when payload is a dict/Mapping — non-dict payloads
|
|
336
|
+
# are stored as-is so that validate() can report the type error cleanly.
|
|
337
|
+
if isinstance(payload, dict):
|
|
338
|
+
if checksum is not None:
|
|
339
|
+
object.__setattr__(self, "_payload", MappingProxyType(dict(payload)))
|
|
340
|
+
else:
|
|
341
|
+
object.__setattr__(self, "_payload", dict(payload))
|
|
342
|
+
elif isinstance(payload, MappingProxyType):
|
|
343
|
+
# Accept MappingProxyType directly (e.g. from to_dict round-trips)
|
|
344
|
+
object.__setattr__(self, "_payload", payload if checksum is not None else dict(payload))
|
|
345
|
+
else:
|
|
346
|
+
# Non-dict payload: store as-is; validate() will raise SchemaValidationError
|
|
347
|
+
object.__setattr__(self, "_payload", payload)
|
|
348
|
+
|
|
349
|
+
# --- Tracing ---------------------------------------------------
|
|
350
|
+
object.__setattr__(self, "_trace_id", trace_id)
|
|
351
|
+
object.__setattr__(self, "_span_id", span_id)
|
|
352
|
+
object.__setattr__(self, "_parent_span_id", parent_span_id)
|
|
353
|
+
|
|
354
|
+
# --- Context ---------------------------------------------------
|
|
355
|
+
object.__setattr__(self, "_org_id", org_id)
|
|
356
|
+
object.__setattr__(self, "_team_id", team_id)
|
|
357
|
+
object.__setattr__(self, "_actor_id", actor_id)
|
|
358
|
+
object.__setattr__(self, "_session_id", session_id)
|
|
359
|
+
|
|
360
|
+
# --- Tags / Integrity ------------------------------------------
|
|
361
|
+
object.__setattr__(self, "_tags", tags)
|
|
362
|
+
object.__setattr__(self, "_checksum", checksum)
|
|
363
|
+
object.__setattr__(self, "_signature", signature)
|
|
364
|
+
object.__setattr__(self, "_prev_id", prev_id)
|
|
365
|
+
|
|
366
|
+
# --- Unknown fields (GA-05-D: forward-compat) ---------------------
|
|
367
|
+
object.__setattr__(self, "_unknown_fields", {})
|
|
368
|
+
|
|
369
|
+
# ------------------------------------------------------------------
|
|
370
|
+
# Read-only properties
|
|
371
|
+
# ------------------------------------------------------------------
|
|
372
|
+
|
|
373
|
+
@property
|
|
374
|
+
def schema_version(self) -> str:
|
|
375
|
+
"""Schema version string (SemVer)."""
|
|
376
|
+
return self._schema_version # type: ignore[return-value]
|
|
377
|
+
|
|
378
|
+
@property
|
|
379
|
+
def event_id(self) -> str:
|
|
380
|
+
"""ULID event identifier."""
|
|
381
|
+
return self._event_id # type: ignore[return-value]
|
|
382
|
+
|
|
383
|
+
@property
|
|
384
|
+
def event_type(self) -> str:
|
|
385
|
+
"""Namespaced event type string."""
|
|
386
|
+
return self._event_type # type: ignore[return-value]
|
|
387
|
+
|
|
388
|
+
@property
|
|
389
|
+
def timestamp(self) -> str:
|
|
390
|
+
"""UTC ISO-8601 timestamp string."""
|
|
391
|
+
return self._timestamp # type: ignore[return-value]
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def source(self) -> str:
|
|
395
|
+
"""Emitting tool in ``"name@semver"`` format."""
|
|
396
|
+
return self._source # type: ignore[return-value]
|
|
397
|
+
|
|
398
|
+
@property
|
|
399
|
+
def payload(self) -> Mapping[str, Any]:
|
|
400
|
+
"""Tool-specific event payload.
|
|
401
|
+
|
|
402
|
+
Returns a read-only :class:`~types.MappingProxyType` view.
|
|
403
|
+
For signed events (where ``checksum`` is set) the internal store is
|
|
404
|
+
already a ``MappingProxyType``; any attempt to mutate via ``dict``
|
|
405
|
+
sub-access at the top level raises :exc:`TypeError` immediately.
|
|
406
|
+
"""
|
|
407
|
+
p = self._payload
|
|
408
|
+
if isinstance(p, MappingProxyType):
|
|
409
|
+
return p # already frozen — return directly, no double-wrap
|
|
410
|
+
return MappingProxyType(p) # type: ignore[return-value]
|
|
411
|
+
|
|
412
|
+
@property
|
|
413
|
+
def trace_id(self) -> str | None:
|
|
414
|
+
"""32-hex-char OpenTelemetry trace ID."""
|
|
415
|
+
return self._trace_id # type: ignore[return-value]
|
|
416
|
+
|
|
417
|
+
@property
|
|
418
|
+
def span_id(self) -> str | None:
|
|
419
|
+
"""16-hex-char OpenTelemetry span ID."""
|
|
420
|
+
return self._span_id # type: ignore[return-value]
|
|
421
|
+
|
|
422
|
+
@property
|
|
423
|
+
def parent_span_id(self) -> str | None:
|
|
424
|
+
"""16-hex-char parent span ID."""
|
|
425
|
+
return self._parent_span_id # type: ignore[return-value]
|
|
426
|
+
|
|
427
|
+
@property
|
|
428
|
+
def org_id(self) -> str | None:
|
|
429
|
+
"""Organisation identifier."""
|
|
430
|
+
return self._org_id # type: ignore[return-value]
|
|
431
|
+
|
|
432
|
+
@property
|
|
433
|
+
def team_id(self) -> str | None:
|
|
434
|
+
"""Team identifier."""
|
|
435
|
+
return self._team_id # type: ignore[return-value]
|
|
436
|
+
|
|
437
|
+
@property
|
|
438
|
+
def actor_id(self) -> str | None:
|
|
439
|
+
"""User or service-account identifier."""
|
|
440
|
+
return self._actor_id # type: ignore[return-value]
|
|
441
|
+
|
|
442
|
+
@property
|
|
443
|
+
def session_id(self) -> str | None:
|
|
444
|
+
"""Session identifier grouping related events."""
|
|
445
|
+
return self._session_id # type: ignore[return-value]
|
|
446
|
+
|
|
447
|
+
@property
|
|
448
|
+
def tags(self) -> Tags | None:
|
|
449
|
+
"""Metadata tags."""
|
|
450
|
+
return self._tags # type: ignore[return-value]
|
|
451
|
+
|
|
452
|
+
@property
|
|
453
|
+
def checksum(self) -> str | None:
|
|
454
|
+
"""SHA-256 payload checksum. Set by ``sign()``."""
|
|
455
|
+
return self._checksum # type: ignore[return-value]
|
|
456
|
+
|
|
457
|
+
@property
|
|
458
|
+
def signature(self) -> str | None:
|
|
459
|
+
"""HMAC-SHA256 chain signature. Set by ``sign()``."""
|
|
460
|
+
return self._signature # type: ignore[return-value]
|
|
461
|
+
|
|
462
|
+
@property
|
|
463
|
+
def unknown_fields(self) -> dict[str, Any]:
|
|
464
|
+
"""Fields present during deserialization that are not part of the known schema.
|
|
465
|
+
|
|
466
|
+
Returns a shallow copy to prevent mutation of the internal store.
|
|
467
|
+
"""
|
|
468
|
+
return dict(self._unknown_fields) # type: ignore[arg-type]
|
|
469
|
+
|
|
470
|
+
@property
|
|
471
|
+
def prev_id(self) -> str | None:
|
|
472
|
+
"""ULID of the preceding event in the audit chain. Set by ``sign()``."""
|
|
473
|
+
return self._prev_id # type: ignore[return-value]
|
|
474
|
+
|
|
475
|
+
# ------------------------------------------------------------------
|
|
476
|
+
# Equality & representation
|
|
477
|
+
# ------------------------------------------------------------------
|
|
478
|
+
|
|
479
|
+
def __eq__(self, other: object) -> bool:
|
|
480
|
+
if not isinstance(other, Event):
|
|
481
|
+
return NotImplemented
|
|
482
|
+
return self._event_id == other._event_id
|
|
483
|
+
|
|
484
|
+
def __hash__(self) -> int:
|
|
485
|
+
"""Hash by event_id (ULID) — enables set/dict membership."""
|
|
486
|
+
return hash(self._event_id)
|
|
487
|
+
|
|
488
|
+
def __repr__(self) -> str:
|
|
489
|
+
return (
|
|
490
|
+
f"Event(event_id={self._event_id!r}, "
|
|
491
|
+
f"event_type={self._event_type!r}, "
|
|
492
|
+
f"source={self._source!r})"
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
# ------------------------------------------------------------------
|
|
496
|
+
# Validation
|
|
497
|
+
# ------------------------------------------------------------------
|
|
498
|
+
|
|
499
|
+
def validate(self) -> None:
|
|
500
|
+
"""Validate all envelope fields against the schema specification.
|
|
501
|
+
|
|
502
|
+
This method performs deep validation of every field. Call it
|
|
503
|
+
immediately after constructing an event and before signing or
|
|
504
|
+
exporting.
|
|
505
|
+
|
|
506
|
+
Raises:
|
|
507
|
+
SchemaValidationError: On the first field that fails validation.
|
|
508
|
+
``exc.field`` names the failing field;
|
|
509
|
+
``exc.reason`` explains the constraint.
|
|
510
|
+
|
|
511
|
+
Example::
|
|
512
|
+
|
|
513
|
+
event.validate() # raises SchemaValidationError if invalid
|
|
514
|
+
"""
|
|
515
|
+
_validate_schema_version(self._schema_version) # type: ignore[arg-type]
|
|
516
|
+
_validate_event_id(self._event_id) # type: ignore[arg-type]
|
|
517
|
+
_validate_event_type(self._event_type) # type: ignore[arg-type]
|
|
518
|
+
_validate_timestamp(self._timestamp) # type: ignore[arg-type]
|
|
519
|
+
_validate_source(self._source) # type: ignore[arg-type]
|
|
520
|
+
_validate_payload(self._payload) # type: ignore[arg-type]
|
|
521
|
+
|
|
522
|
+
# Optional tracing fields
|
|
523
|
+
if self._trace_id is not None:
|
|
524
|
+
_validate_hex_id("trace_id", self._trace_id, 32) # type: ignore[arg-type]
|
|
525
|
+
if self._span_id is not None:
|
|
526
|
+
_validate_hex_id("span_id", self._span_id, 16) # type: ignore[arg-type]
|
|
527
|
+
if self._parent_span_id is not None:
|
|
528
|
+
_validate_hex_id("parent_span_id", self._parent_span_id, 16) # type: ignore[arg-type]
|
|
529
|
+
|
|
530
|
+
# Optional context fields
|
|
531
|
+
for field_name, value in [
|
|
532
|
+
("org_id", self._org_id),
|
|
533
|
+
("team_id", self._team_id),
|
|
534
|
+
("actor_id", self._actor_id),
|
|
535
|
+
("session_id", self._session_id),
|
|
536
|
+
]:
|
|
537
|
+
if value is not None:
|
|
538
|
+
_validate_string_id(field_name, value) # type: ignore[arg-type]
|
|
539
|
+
|
|
540
|
+
# Optional integrity fields
|
|
541
|
+
if self._prev_id is not None:
|
|
542
|
+
_validate_ulid_field("prev_id", self._prev_id) # type: ignore[arg-type]
|
|
543
|
+
|
|
544
|
+
# ------------------------------------------------------------------
|
|
545
|
+
# Serialisation
|
|
546
|
+
# ------------------------------------------------------------------
|
|
547
|
+
|
|
548
|
+
def to_dict(self, *, omit_none: bool = True) -> dict[str, Any]:
|
|
549
|
+
"""Return a plain :class:`dict` representation.
|
|
550
|
+
|
|
551
|
+
The dictionary uses the same field names as the JSON wire format.
|
|
552
|
+
Suitable for passing to logging frameworks or other serialisation
|
|
553
|
+
layers.
|
|
554
|
+
|
|
555
|
+
Args:
|
|
556
|
+
omit_none: When ``True`` (default), fields with ``None`` values are
|
|
557
|
+
excluded. Set to ``False`` to include explicit ``null`` values.
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
An ordered dict with string keys and JSON-serialisable values.
|
|
561
|
+
"""
|
|
562
|
+
raw: dict[str, Any] = {
|
|
563
|
+
"schema_version": self._schema_version,
|
|
564
|
+
"event_id": self._event_id,
|
|
565
|
+
"event_type": self._event_type,
|
|
566
|
+
"timestamp": self._timestamp,
|
|
567
|
+
"source": self._source,
|
|
568
|
+
"payload": dict(self._payload) if isinstance(self._payload, (dict, MappingProxyType)) else self._payload,
|
|
569
|
+
"trace_id": self._trace_id,
|
|
570
|
+
"span_id": self._span_id,
|
|
571
|
+
"parent_span_id": self._parent_span_id,
|
|
572
|
+
"org_id": self._org_id,
|
|
573
|
+
"team_id": self._team_id,
|
|
574
|
+
"actor_id": self._actor_id,
|
|
575
|
+
"session_id": self._session_id,
|
|
576
|
+
"tags": self._tags.to_dict() if self._tags is not None else None,
|
|
577
|
+
"checksum": self._checksum,
|
|
578
|
+
"signature": self._signature,
|
|
579
|
+
"prev_id": self._prev_id,
|
|
580
|
+
}
|
|
581
|
+
# GA-05-D: round-trip unknown fields
|
|
582
|
+
if self._unknown_fields: # type: ignore[truthy-bool]
|
|
583
|
+
raw.update(self._unknown_fields) # type: ignore[arg-type]
|
|
584
|
+
if omit_none:
|
|
585
|
+
return {k: v for k, v in raw.items() if v is not None}
|
|
586
|
+
return raw
|
|
587
|
+
|
|
588
|
+
def to_json(self) -> str:
|
|
589
|
+
"""Serialise to a canonical, deterministic JSON string.
|
|
590
|
+
|
|
591
|
+
Properties
|
|
592
|
+
----------
|
|
593
|
+
* Keys are sorted alphabetically at every nesting level.
|
|
594
|
+
* ``None`` values are omitted (not serialised as ``null``).
|
|
595
|
+
* Uses compact separators — no whitespace.
|
|
596
|
+
* Guaranteed to be byte-for-byte identical for the same event on any
|
|
597
|
+
supported platform and Python version.
|
|
598
|
+
|
|
599
|
+
Returns:
|
|
600
|
+
A compact, canonical JSON string.
|
|
601
|
+
|
|
602
|
+
Raises:
|
|
603
|
+
SerializationError: If the payload contains a value that cannot
|
|
604
|
+
be serialised to JSON.
|
|
605
|
+
|
|
606
|
+
Example::
|
|
607
|
+
|
|
608
|
+
json_str = event.to_json()
|
|
609
|
+
assert json_str == event.to_json() # deterministic
|
|
610
|
+
"""
|
|
611
|
+
try:
|
|
612
|
+
return json.dumps(
|
|
613
|
+
self.to_dict(),
|
|
614
|
+
sort_keys=True,
|
|
615
|
+
separators=(",", ":"),
|
|
616
|
+
default=_json_default,
|
|
617
|
+
ensure_ascii=False,
|
|
618
|
+
)
|
|
619
|
+
except (TypeError, ValueError, OverflowError) as exc:
|
|
620
|
+
raise SerializationError(
|
|
621
|
+
event_id=self._event_id, # type: ignore[arg-type]
|
|
622
|
+
reason=f"payload contains non-serialisable value: {exc}",
|
|
623
|
+
) from exc
|
|
624
|
+
|
|
625
|
+
def payload_checksum(self) -> str:
|
|
626
|
+
"""Compute SHA-256 of the canonical JSON of the payload.
|
|
627
|
+
|
|
628
|
+
Used internally by ``sign()`` (Phase 3). Safe to call at any time to
|
|
629
|
+
get the current payload digest.
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
A hex-encoded SHA-256 digest prefixed with ``"sha256:"``.
|
|
633
|
+
"""
|
|
634
|
+
canonical = json.dumps(
|
|
635
|
+
self._payload,
|
|
636
|
+
sort_keys=True,
|
|
637
|
+
separators=(",", ":"),
|
|
638
|
+
default=_json_default,
|
|
639
|
+
ensure_ascii=False,
|
|
640
|
+
)
|
|
641
|
+
digest = hashlib.sha256(canonical.encode("utf-8")).hexdigest()
|
|
642
|
+
return f"sha256:{digest}"
|
|
643
|
+
|
|
644
|
+
# ------------------------------------------------------------------
|
|
645
|
+
# Deserialisation
|
|
646
|
+
# ------------------------------------------------------------------
|
|
647
|
+
|
|
648
|
+
@classmethod
|
|
649
|
+
def from_dict(
|
|
650
|
+
cls,
|
|
651
|
+
data: dict[str, Any],
|
|
652
|
+
*,
|
|
653
|
+
max_size_bytes: int = 1_048_576,
|
|
654
|
+
max_payload_depth: int = 10,
|
|
655
|
+
max_tags: int = 50,
|
|
656
|
+
source_hint: str = "<dict>",
|
|
657
|
+
) -> Event:
|
|
658
|
+
"""Construct an :class:`Event` from a plain dictionary.
|
|
659
|
+
|
|
660
|
+
The dictionary shape matches the output of :meth:`to_dict`.
|
|
661
|
+
|
|
662
|
+
Args:
|
|
663
|
+
data: Dictionary with event fields.
|
|
664
|
+
max_size_bytes: Maximum serialised size in bytes (RFC §19.4).
|
|
665
|
+
Defaults to 1 MiB. Pass 0 to disable.
|
|
666
|
+
max_payload_depth: Maximum nesting depth of the payload object
|
|
667
|
+
(RFC §19.4). Defaults to 10.
|
|
668
|
+
max_tags: Maximum number of tag keys allowed (RFC §19.4).
|
|
669
|
+
Defaults to 50.
|
|
670
|
+
source_hint: Short label for error messages (e.g. a filename).
|
|
671
|
+
|
|
672
|
+
Returns:
|
|
673
|
+
A new :class:`Event` instance (not yet validated).
|
|
674
|
+
|
|
675
|
+
Raises:
|
|
676
|
+
DeserializationError: If a required field is missing or has an
|
|
677
|
+
unexpected type, or if any DoS limit is exceeded.
|
|
678
|
+
|
|
679
|
+
Example::
|
|
680
|
+
|
|
681
|
+
event = Event.from_dict(json.loads(raw_json))
|
|
682
|
+
event.validate()
|
|
683
|
+
"""
|
|
684
|
+
_require_dict(data, source_hint)
|
|
685
|
+
|
|
686
|
+
# RFC §19.4 — DoS guards
|
|
687
|
+
if max_size_bytes > 0:
|
|
688
|
+
try:
|
|
689
|
+
_encoded = json.dumps(data, separators=(",", ":")).encode()
|
|
690
|
+
except (TypeError, ValueError):
|
|
691
|
+
_encoded = b""
|
|
692
|
+
if len(_encoded) > max_size_bytes:
|
|
693
|
+
raise DeserializationError(
|
|
694
|
+
reason=(
|
|
695
|
+
f"event exceeds max_size_bytes limit of {max_size_bytes} "
|
|
696
|
+
f"(got {len(_encoded)} bytes)"
|
|
697
|
+
),
|
|
698
|
+
source_hint=source_hint,
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
if max_tags > 0:
|
|
702
|
+
tags_raw = data.get("tags")
|
|
703
|
+
if isinstance(tags_raw, dict) and len(tags_raw) > max_tags:
|
|
704
|
+
raise DeserializationError(
|
|
705
|
+
reason=(
|
|
706
|
+
f"event has {len(tags_raw)} tags, exceeding max_tags={max_tags} "
|
|
707
|
+
"(RFC §19.4)"
|
|
708
|
+
),
|
|
709
|
+
source_hint=source_hint,
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
if max_payload_depth > 0:
|
|
713
|
+
payload_raw = data.get("payload")
|
|
714
|
+
if payload_raw is not None:
|
|
715
|
+
_check_nesting_depth(payload_raw, max_payload_depth, source_hint)
|
|
716
|
+
|
|
717
|
+
try:
|
|
718
|
+
tags_raw = data.get("tags")
|
|
719
|
+
tags: Tags | None = (
|
|
720
|
+
Tags(**dict(tags_raw.items()))
|
|
721
|
+
if tags_raw is not None
|
|
722
|
+
else None
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
_KNOWN_KEYS = {
|
|
726
|
+
"schema_version", "event_id", "event_type", "timestamp",
|
|
727
|
+
"source", "payload", "trace_id", "span_id",
|
|
728
|
+
"parent_span_id", "org_id", "team_id", "actor_id",
|
|
729
|
+
"session_id", "tags", "checksum", "signature", "prev_id",
|
|
730
|
+
}
|
|
731
|
+
_extra = {k: v for k, v in data.items() if k not in _KNOWN_KEYS}
|
|
732
|
+
|
|
733
|
+
evt = cls(
|
|
734
|
+
schema_version=_require_str(data, "schema_version", source_hint),
|
|
735
|
+
event_id=_require_str(data, "event_id", source_hint),
|
|
736
|
+
event_type=_require_str(data, "event_type", source_hint),
|
|
737
|
+
timestamp=_require_str(data, "timestamp", source_hint),
|
|
738
|
+
source=_require_str(data, "source", source_hint),
|
|
739
|
+
payload=_require_dict_field(data, "payload", source_hint),
|
|
740
|
+
trace_id=data.get("trace_id"),
|
|
741
|
+
span_id=data.get("span_id"),
|
|
742
|
+
parent_span_id=data.get("parent_span_id"),
|
|
743
|
+
org_id=data.get("org_id"),
|
|
744
|
+
team_id=data.get("team_id"),
|
|
745
|
+
actor_id=data.get("actor_id"),
|
|
746
|
+
session_id=data.get("session_id"),
|
|
747
|
+
tags=tags,
|
|
748
|
+
checksum=data.get("checksum"),
|
|
749
|
+
signature=data.get("signature"),
|
|
750
|
+
prev_id=data.get("prev_id"),
|
|
751
|
+
)
|
|
752
|
+
if _extra:
|
|
753
|
+
object.__setattr__(evt, "_unknown_fields", _extra)
|
|
754
|
+
return evt
|
|
755
|
+
except (KeyError, AttributeError) as exc:
|
|
756
|
+
raise DeserializationError(
|
|
757
|
+
reason=f"unexpected structure: {exc}",
|
|
758
|
+
source_hint=source_hint,
|
|
759
|
+
) from exc
|
|
760
|
+
# Note: from_json delegates to from_dict, which handles _unknown_fields.
|
|
761
|
+
|
|
762
|
+
@classmethod
|
|
763
|
+
def from_json(
|
|
764
|
+
cls,
|
|
765
|
+
json_str: str,
|
|
766
|
+
*,
|
|
767
|
+
max_size_bytes: int = 1_048_576,
|
|
768
|
+
max_payload_depth: int = 10,
|
|
769
|
+
max_tags: int = 50,
|
|
770
|
+
source_hint: str = "<json>",
|
|
771
|
+
) -> Event:
|
|
772
|
+
"""Construct an :class:`Event` from a JSON string.
|
|
773
|
+
|
|
774
|
+
Args:
|
|
775
|
+
json_str: A JSON string in the format produced by :meth:`to_json`.
|
|
776
|
+
max_size_bytes: Maximum string size in UTF-8 bytes (RFC §19.4).
|
|
777
|
+
Defaults to 1 MiB. Pass 0 to disable.
|
|
778
|
+
max_payload_depth: Maximum nesting depth forwarded to :meth:`from_dict`.
|
|
779
|
+
max_tags: Maximum number of tag keys forwarded to :meth:`from_dict`.
|
|
780
|
+
source_hint: Short label for error messages.
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
A new :class:`Event` instance (not yet validated).
|
|
784
|
+
|
|
785
|
+
Raises:
|
|
786
|
+
DeserializationError: If *json_str* is not valid JSON, is missing
|
|
787
|
+
required fields, or exceeds any DoS limit.
|
|
788
|
+
|
|
789
|
+
Example::
|
|
790
|
+
|
|
791
|
+
event = Event.from_json(raw_json_str)
|
|
792
|
+
event.validate()
|
|
793
|
+
"""
|
|
794
|
+
# RFC §19.4 — byte-length check before parsing to prevent parse-bomb attacks.
|
|
795
|
+
if max_size_bytes > 0 and len(json_str.encode()) > max_size_bytes:
|
|
796
|
+
raise DeserializationError(
|
|
797
|
+
reason=(
|
|
798
|
+
f"JSON string exceeds max_size_bytes limit of {max_size_bytes} "
|
|
799
|
+
f"(got {len(json_str.encode())} bytes)"
|
|
800
|
+
),
|
|
801
|
+
source_hint=source_hint,
|
|
802
|
+
)
|
|
803
|
+
try:
|
|
804
|
+
data: dict[str, Any] = json.loads(json_str)
|
|
805
|
+
except json.JSONDecodeError as exc:
|
|
806
|
+
raise DeserializationError(
|
|
807
|
+
reason=f"invalid JSON: {exc}",
|
|
808
|
+
source_hint=source_hint,
|
|
809
|
+
) from exc
|
|
810
|
+
return cls.from_dict(
|
|
811
|
+
data,
|
|
812
|
+
max_size_bytes=0, # already checked above
|
|
813
|
+
max_payload_depth=max_payload_depth,
|
|
814
|
+
max_tags=max_tags,
|
|
815
|
+
source_hint=source_hint,
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
# ---------------------------------------------------------------------------
|
|
820
|
+
# Validation helpers (module-private)
|
|
821
|
+
# ---------------------------------------------------------------------------
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
def _check_nesting_depth(
|
|
825
|
+
obj: Any,
|
|
826
|
+
max_depth: int,
|
|
827
|
+
source_hint: str,
|
|
828
|
+
_current: int = 0,
|
|
829
|
+
) -> None:
|
|
830
|
+
"""Recursively check that *obj* does not exceed *max_depth* nesting levels.
|
|
831
|
+
|
|
832
|
+
Raises :exc:`~spanforge.exceptions.DeserializationError` if the depth
|
|
833
|
+
limit is exceeded. This guards against deeply nested JSON that could
|
|
834
|
+
cause stack overflows or excessive CPU use (RFC §19.4).
|
|
835
|
+
"""
|
|
836
|
+
if _current >= max_depth:
|
|
837
|
+
raise DeserializationError(
|
|
838
|
+
reason=(
|
|
839
|
+
f"payload exceeds max nesting depth of {max_depth} levels "
|
|
840
|
+
"(RFC §19.4)"
|
|
841
|
+
),
|
|
842
|
+
source_hint=source_hint,
|
|
843
|
+
)
|
|
844
|
+
if isinstance(obj, dict):
|
|
845
|
+
for v in obj.values():
|
|
846
|
+
_check_nesting_depth(v, max_depth, source_hint, _current + 1)
|
|
847
|
+
elif isinstance(obj, list):
|
|
848
|
+
for item in obj:
|
|
849
|
+
_check_nesting_depth(item, max_depth, source_hint, _current + 1)
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
def _validate_schema_version(value: str) -> None:
|
|
853
|
+
if not isinstance(value, str):
|
|
854
|
+
raise SchemaValidationError(
|
|
855
|
+
"schema_version", value, _MUST_BE_STRING
|
|
856
|
+
)
|
|
857
|
+
if value not in _ACCEPTED_SCHEMA_VERSIONS:
|
|
858
|
+
raise SchemaValidationError(
|
|
859
|
+
"schema_version",
|
|
860
|
+
value,
|
|
861
|
+
f"must be one of {sorted(_ACCEPTED_SCHEMA_VERSIONS)!r} (RFC-0001 §15.5)",
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
def _validate_event_id(value: str) -> None:
|
|
866
|
+
if not isinstance(value, str):
|
|
867
|
+
raise SchemaValidationError("event_id", value, _MUST_BE_STRING)
|
|
868
|
+
if not _validate_ulid(value):
|
|
869
|
+
raise SchemaValidationError(
|
|
870
|
+
"event_id",
|
|
871
|
+
value,
|
|
872
|
+
"must be a valid 26-character ULID (Crockford Base32)",
|
|
873
|
+
)
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
def _validate_event_type(value: str) -> None:
|
|
877
|
+
if not isinstance(value, str):
|
|
878
|
+
raise SchemaValidationError("event_type", value, _MUST_BE_STRING)
|
|
879
|
+
if not _EVENT_TYPE_RE.match(value):
|
|
880
|
+
raise SchemaValidationError(
|
|
881
|
+
"event_type",
|
|
882
|
+
value,
|
|
883
|
+
"must match 'llm.<ns>.<entity>.<action>' or 'x.<company>.<…>'",
|
|
884
|
+
)
|
|
885
|
+
if not is_registered(value):
|
|
886
|
+
try:
|
|
887
|
+
validate_custom(value)
|
|
888
|
+
except EventTypeError as exc:
|
|
889
|
+
raise SchemaValidationError(
|
|
890
|
+
"event_type",
|
|
891
|
+
value,
|
|
892
|
+
str(exc),
|
|
893
|
+
) from exc
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
def _validate_timestamp(value: str) -> None:
|
|
897
|
+
if not isinstance(value, str):
|
|
898
|
+
raise SchemaValidationError("timestamp", value, _MUST_BE_STRING)
|
|
899
|
+
if not _TIMESTAMP_PATTERN.match(value):
|
|
900
|
+
raise SchemaValidationError(
|
|
901
|
+
"timestamp",
|
|
902
|
+
value,
|
|
903
|
+
"must be UTC ISO-8601 format: 'YYYY-MM-DDTHH:MM:SS[.ffffff]Z'",
|
|
904
|
+
)
|
|
905
|
+
# Further check that it is a real date/time
|
|
906
|
+
try:
|
|
907
|
+
_parse_timestamp(value)
|
|
908
|
+
except ValueError as exc:
|
|
909
|
+
raise SchemaValidationError(
|
|
910
|
+
"timestamp", value, f"not a valid date/time: {exc}"
|
|
911
|
+
) from exc
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
def _validate_source(value: str) -> None:
|
|
915
|
+
if not isinstance(value, str):
|
|
916
|
+
raise SchemaValidationError("source", value, _MUST_BE_STRING)
|
|
917
|
+
if not _SOURCE_PATTERN.match(value):
|
|
918
|
+
raise SchemaValidationError(
|
|
919
|
+
"source",
|
|
920
|
+
value,
|
|
921
|
+
"must match 'tool-name@semver', e.g. 'llm-trace@0.3.1'",
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def _validate_payload(value: object) -> None:
|
|
926
|
+
if not isinstance(value, (dict, MappingProxyType)):
|
|
927
|
+
raise SchemaValidationError(
|
|
928
|
+
"payload", value, "must be a non-empty dict"
|
|
929
|
+
)
|
|
930
|
+
if not value:
|
|
931
|
+
raise SchemaValidationError(
|
|
932
|
+
"payload", value, "must be a non-empty dict (empty dict is not allowed)"
|
|
933
|
+
)
|
|
934
|
+
|
|
935
|
+
|
|
936
|
+
def _validate_hex_id(field: str, value: str, expected_len: int) -> None:
|
|
937
|
+
if not isinstance(value, str):
|
|
938
|
+
raise SchemaValidationError(field, value, _MUST_BE_STRING)
|
|
939
|
+
pattern = _TRACE_ID_PATTERN if expected_len == 32 else _SPAN_ID_PATTERN # noqa: PLR2004
|
|
940
|
+
if not pattern.match(value):
|
|
941
|
+
raise SchemaValidationError(
|
|
942
|
+
field,
|
|
943
|
+
value,
|
|
944
|
+
f"must be exactly {expected_len} lowercase hex characters",
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
|
|
948
|
+
def _validate_string_id(field: str, value: str) -> None:
|
|
949
|
+
if not isinstance(value, str):
|
|
950
|
+
raise SchemaValidationError(field, value, _MUST_BE_STRING)
|
|
951
|
+
if not value:
|
|
952
|
+
raise SchemaValidationError(
|
|
953
|
+
field, value, "must be a non-empty string"
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
def _validate_ulid_field(field: str, value: str) -> None:
|
|
958
|
+
if not isinstance(value, str):
|
|
959
|
+
raise SchemaValidationError(field, value, _MUST_BE_STRING)
|
|
960
|
+
if not _validate_ulid(value):
|
|
961
|
+
raise SchemaValidationError(
|
|
962
|
+
field, value, "must be a valid 26-character ULID"
|
|
963
|
+
)
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
# ---------------------------------------------------------------------------
|
|
967
|
+
# Deserialisation helpers (module-private)
|
|
968
|
+
# ---------------------------------------------------------------------------
|
|
969
|
+
|
|
970
|
+
|
|
971
|
+
def _require_dict(data: object, source_hint: str) -> None:
|
|
972
|
+
if not isinstance(data, dict):
|
|
973
|
+
raise DeserializationError(
|
|
974
|
+
reason=f"expected a JSON object at top level, got {type(data).__name__}",
|
|
975
|
+
source_hint=source_hint,
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
|
|
979
|
+
def _require_str(data: dict[str, Any], key: str, source_hint: str) -> str:
|
|
980
|
+
value = data.get(key)
|
|
981
|
+
if value is None:
|
|
982
|
+
raise DeserializationError(
|
|
983
|
+
reason=f"required field '{key}' is missing",
|
|
984
|
+
source_hint=source_hint,
|
|
985
|
+
)
|
|
986
|
+
if not isinstance(value, str):
|
|
987
|
+
raise DeserializationError(
|
|
988
|
+
reason=f"field '{key}' must be a string, got {type(value).__name__}",
|
|
989
|
+
source_hint=source_hint,
|
|
990
|
+
)
|
|
991
|
+
return value
|
|
992
|
+
|
|
993
|
+
|
|
994
|
+
def _require_dict_field(
|
|
995
|
+
data: dict[str, Any], key: str, source_hint: str
|
|
996
|
+
) -> dict[str, Any]:
|
|
997
|
+
value = data.get(key)
|
|
998
|
+
if value is None:
|
|
999
|
+
raise DeserializationError(
|
|
1000
|
+
reason=f"required field '{key}' is missing",
|
|
1001
|
+
source_hint=source_hint,
|
|
1002
|
+
)
|
|
1003
|
+
if not isinstance(value, dict):
|
|
1004
|
+
raise DeserializationError(
|
|
1005
|
+
reason=f"field '{key}' must be an object, got {type(value).__name__}",
|
|
1006
|
+
source_hint=source_hint,
|
|
1007
|
+
)
|
|
1008
|
+
return value # type: ignore[return-value]
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
# ---------------------------------------------------------------------------
|
|
1012
|
+
# Serialisation helpers (module-private)
|
|
1013
|
+
# ---------------------------------------------------------------------------
|
|
1014
|
+
|
|
1015
|
+
|
|
1016
|
+
def _json_default(obj: object) -> object:
|
|
1017
|
+
"""JSON serialiser fallback for non-standard types."""
|
|
1018
|
+
if isinstance(obj, datetime.datetime):
|
|
1019
|
+
return _datetime_to_iso(obj)
|
|
1020
|
+
if isinstance(obj, EventType):
|
|
1021
|
+
return obj.value
|
|
1022
|
+
raise TypeError(f"Object of type {type(obj).__name__!r} is not JSON serialisable")
|
|
1023
|
+
|
|
1024
|
+
|
|
1025
|
+
def _utcnow_iso() -> str:
|
|
1026
|
+
"""Return the current UTC time as an ISO-8601 string."""
|
|
1027
|
+
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
1028
|
+
return _datetime_to_iso(now)
|
|
1029
|
+
|
|
1030
|
+
|
|
1031
|
+
def _datetime_to_iso(dt: datetime.datetime) -> str:
|
|
1032
|
+
"""Format a :class:`datetime.datetime` as ``'YYYY-MM-DDTHH:MM:SS.ffffffZ'``."""
|
|
1033
|
+
if dt.tzinfo is None:
|
|
1034
|
+
# Assume UTC if naive
|
|
1035
|
+
dt = dt.replace(tzinfo=datetime.timezone.utc)
|
|
1036
|
+
# Normalise to UTC
|
|
1037
|
+
dt_utc = dt.astimezone(datetime.timezone.utc)
|
|
1038
|
+
return dt_utc.strftime("%Y-%m-%dT%H:%M:%S.%f") + "Z"
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
def _parse_timestamp(value: str) -> datetime.datetime:
|
|
1042
|
+
"""Parse an ISO-8601 UTC timestamp string."""
|
|
1043
|
+
# Python < 3.11 does not support fromisoformat with trailing 'Z'
|
|
1044
|
+
if value.endswith("Z"):
|
|
1045
|
+
value = value[:-1] + "+00:00"
|
|
1046
|
+
if sys.version_info >= (3, 11):
|
|
1047
|
+
return datetime.datetime.fromisoformat(value)
|
|
1048
|
+
# Fallback for Python 3.9 / 3.10 # pragma: no cover
|
|
1049
|
+
try: # pragma: no cover
|
|
1050
|
+
return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f+00:00") # pragma: no cover
|
|
1051
|
+
except ValueError: # pragma: no cover
|
|
1052
|
+
return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S+00:00") # pragma: no cover
|