spanforge 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +815 -0
- spanforge/_ansi.py +93 -0
- spanforge/_batch_exporter.py +409 -0
- spanforge/_cli.py +2094 -0
- spanforge/_cli_audit.py +639 -0
- spanforge/_cli_compliance.py +711 -0
- spanforge/_cli_cost.py +243 -0
- spanforge/_cli_ops.py +791 -0
- spanforge/_cli_phase11.py +356 -0
- spanforge/_hooks.py +337 -0
- spanforge/_server.py +1708 -0
- spanforge/_span.py +1036 -0
- spanforge/_store.py +288 -0
- spanforge/_stream.py +664 -0
- spanforge/_trace.py +335 -0
- spanforge/_tracer.py +254 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +469 -0
- spanforge/auto.py +464 -0
- spanforge/baseline.py +335 -0
- spanforge/cache.py +635 -0
- spanforge/compliance.py +325 -0
- spanforge/config.py +532 -0
- spanforge/consent.py +228 -0
- spanforge/consumer.py +377 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1254 -0
- spanforge/cost.py +600 -0
- spanforge/debug.py +548 -0
- spanforge/deprecations.py +205 -0
- spanforge/drift.py +482 -0
- spanforge/egress.py +58 -0
- spanforge/eval.py +648 -0
- spanforge/event.py +1064 -0
- spanforge/exceptions.py +240 -0
- spanforge/explain.py +178 -0
- spanforge/export/__init__.py +69 -0
- spanforge/export/append_only.py +337 -0
- spanforge/export/cloud.py +357 -0
- spanforge/export/datadog.py +497 -0
- spanforge/export/grafana.py +320 -0
- spanforge/export/jsonl.py +195 -0
- spanforge/export/openinference.py +158 -0
- spanforge/export/otel_bridge.py +294 -0
- spanforge/export/otlp.py +811 -0
- spanforge/export/otlp_bridge.py +233 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/siem_schema.py +98 -0
- spanforge/export/siem_splunk.py +264 -0
- spanforge/export/siem_syslog.py +212 -0
- spanforge/export/webhook.py +299 -0
- spanforge/exporters/__init__.py +30 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/exporters/sqlite.py +142 -0
- spanforge/gate.py +1150 -0
- spanforge/governance.py +181 -0
- spanforge/hitl.py +295 -0
- spanforge/http.py +187 -0
- spanforge/inspect.py +427 -0
- spanforge/integrations/__init__.py +45 -0
- spanforge/integrations/_pricing.py +280 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/azure_openai.py +133 -0
- spanforge/integrations/bedrock.py +292 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +351 -0
- spanforge/integrations/groq.py +442 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/langgraph.py +306 -0
- spanforge/integrations/llamaindex.py +373 -0
- spanforge/integrations/ollama.py +287 -0
- spanforge/integrations/openai.py +368 -0
- spanforge/integrations/together.py +483 -0
- spanforge/io.py +214 -0
- spanforge/lint.py +322 -0
- spanforge/metrics.py +417 -0
- spanforge/metrics_export.py +343 -0
- spanforge/migrate.py +402 -0
- spanforge/model_registry.py +278 -0
- spanforge/models.py +389 -0
- spanforge/namespaces/__init__.py +254 -0
- spanforge/namespaces/audit.py +256 -0
- spanforge/namespaces/cache.py +237 -0
- spanforge/namespaces/chain.py +77 -0
- spanforge/namespaces/confidence.py +72 -0
- spanforge/namespaces/consent.py +92 -0
- spanforge/namespaces/cost.py +179 -0
- spanforge/namespaces/decision.py +143 -0
- spanforge/namespaces/diff.py +157 -0
- spanforge/namespaces/drift.py +80 -0
- spanforge/namespaces/eval_.py +251 -0
- spanforge/namespaces/feedback.py +241 -0
- spanforge/namespaces/fence.py +193 -0
- spanforge/namespaces/guard.py +105 -0
- spanforge/namespaces/hitl.py +91 -0
- spanforge/namespaces/latency.py +72 -0
- spanforge/namespaces/prompt.py +190 -0
- spanforge/namespaces/redact.py +173 -0
- spanforge/namespaces/retrieval.py +379 -0
- spanforge/namespaces/runtime_governance.py +494 -0
- spanforge/namespaces/template.py +208 -0
- spanforge/namespaces/tool_call.py +77 -0
- spanforge/namespaces/trace.py +1029 -0
- spanforge/normalizer.py +171 -0
- spanforge/plugins.py +82 -0
- spanforge/presidio_backend.py +349 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +418 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +914 -0
- spanforge/regression.py +192 -0
- spanforge/runtime_policy.py +159 -0
- spanforge/sampling.py +511 -0
- spanforge/schema.py +183 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/sdk/__init__.py +625 -0
- spanforge/sdk/_base.py +584 -0
- spanforge/sdk/_base.pyi +71 -0
- spanforge/sdk/_exceptions.py +1096 -0
- spanforge/sdk/_types.py +2184 -0
- spanforge/sdk/alert.py +1514 -0
- spanforge/sdk/alert.pyi +56 -0
- spanforge/sdk/audit.py +1196 -0
- spanforge/sdk/audit.pyi +67 -0
- spanforge/sdk/cec.py +1215 -0
- spanforge/sdk/cec.pyi +37 -0
- spanforge/sdk/config.py +641 -0
- spanforge/sdk/config.pyi +55 -0
- spanforge/sdk/enterprise.py +714 -0
- spanforge/sdk/enterprise.pyi +79 -0
- spanforge/sdk/explain.py +170 -0
- spanforge/sdk/fallback.py +432 -0
- spanforge/sdk/feedback.py +351 -0
- spanforge/sdk/gate.py +874 -0
- spanforge/sdk/gate.pyi +51 -0
- spanforge/sdk/identity.py +2114 -0
- spanforge/sdk/identity.pyi +47 -0
- spanforge/sdk/lineage.py +175 -0
- spanforge/sdk/observe.py +1065 -0
- spanforge/sdk/observe.pyi +50 -0
- spanforge/sdk/operator.py +338 -0
- spanforge/sdk/pii.py +1473 -0
- spanforge/sdk/pii.pyi +119 -0
- spanforge/sdk/pipelines.py +458 -0
- spanforge/sdk/pipelines.pyi +39 -0
- spanforge/sdk/policy.py +930 -0
- spanforge/sdk/rag.py +594 -0
- spanforge/sdk/rbac.py +280 -0
- spanforge/sdk/registry.py +430 -0
- spanforge/sdk/registry.pyi +46 -0
- spanforge/sdk/scope.py +279 -0
- spanforge/sdk/secrets.py +293 -0
- spanforge/sdk/secrets.pyi +25 -0
- spanforge/sdk/security.py +560 -0
- spanforge/sdk/security.pyi +57 -0
- spanforge/sdk/trust.py +472 -0
- spanforge/sdk/trust.pyi +41 -0
- spanforge/secrets.py +799 -0
- spanforge/signing.py +1179 -0
- spanforge/stats.py +100 -0
- spanforge/stream.py +560 -0
- spanforge/testing.py +378 -0
- spanforge/testing_mocks.py +1052 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +300 -0
- spanforge/validate.py +379 -0
- spanforge-1.0.0.dist-info/METADATA +1509 -0
- spanforge-1.0.0.dist-info/RECORD +174 -0
- spanforge-1.0.0.dist-info/WHEEL +4 -0
- spanforge-1.0.0.dist-info/entry_points.txt +5 -0
- spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/migrate.py
ADDED
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
"""Schema migration utilities for spanforge events.
|
|
2
|
+
|
|
3
|
+
Provides forward-only migration functions to convert events from older schema
|
|
4
|
+
versions to the current version. Migrations are idempotent — migrating an
|
|
5
|
+
event that is already at the target version returns it unchanged.
|
|
6
|
+
|
|
7
|
+
Usage
|
|
8
|
+
-----
|
|
9
|
+
::
|
|
10
|
+
|
|
11
|
+
from spanforge.migrate import v1_to_v2, migrate_file
|
|
12
|
+
|
|
13
|
+
# Single event
|
|
14
|
+
v2_event = v1_to_v2(v1_event)
|
|
15
|
+
|
|
16
|
+
# Bulk file migration
|
|
17
|
+
stats = migrate_file("audit.jsonl", output="audit_v2.jsonl")
|
|
18
|
+
print(f"Migrated {stats.migrated} events")
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import hashlib
|
|
24
|
+
import json
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"MigrationStats",
|
|
31
|
+
"migrate_file",
|
|
32
|
+
"migrate_from_langsmith",
|
|
33
|
+
"v1_to_v2",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class MigrationStats:
|
|
39
|
+
"""Result of a bulk migration operation.
|
|
40
|
+
|
|
41
|
+
Attributes:
|
|
42
|
+
total: Total events processed.
|
|
43
|
+
migrated: Events that were upgraded to a new schema version.
|
|
44
|
+
skipped: Events already at the target version (not modified).
|
|
45
|
+
errors: Events that could not be parsed or migrated.
|
|
46
|
+
warnings: Non-fatal warnings encountered during migration.
|
|
47
|
+
output_path: Path where the migrated events were written.
|
|
48
|
+
transformed_fields: Mapping of field names to the count of events
|
|
49
|
+
where that field was transformed.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
total: int
|
|
53
|
+
migrated: int
|
|
54
|
+
skipped: int
|
|
55
|
+
errors: int
|
|
56
|
+
warnings: list[str] = field(default_factory=list)
|
|
57
|
+
output_path: str = ""
|
|
58
|
+
transformed_fields: dict[str, int] = field(default_factory=dict)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _rehash_md5_to_sha256(checksum: str | None, payload: dict[str, Any]) -> str | None:
|
|
62
|
+
"""If *checksum* starts with ``md5:``, recompute as ``sha256:``."""
|
|
63
|
+
if checksum and checksum.startswith("md5:"):
|
|
64
|
+
canonical = json.dumps(
|
|
65
|
+
payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False
|
|
66
|
+
).encode("utf-8")
|
|
67
|
+
return f"sha256:{hashlib.sha256(canonical).hexdigest()}"
|
|
68
|
+
return checksum
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _coerce_tag_values(tags: Any) -> dict[str, str]:
|
|
72
|
+
"""Ensure all tag values are strings."""
|
|
73
|
+
from spanforge.event import Tags as _Tags
|
|
74
|
+
|
|
75
|
+
if isinstance(tags, _Tags):
|
|
76
|
+
return tags.to_dict()
|
|
77
|
+
if not isinstance(tags, dict):
|
|
78
|
+
return {}
|
|
79
|
+
return {str(k): str(v) for k, v in tags.items()}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def v1_to_v2(event: Any) -> Any:
|
|
83
|
+
"""Migrate a single event from schema version 1.0 to 2.0.
|
|
84
|
+
|
|
85
|
+
Changes applied:
|
|
86
|
+
* ``schema_version`` is set to ``"2.0"``.
|
|
87
|
+
* Missing ``org_id`` is set to ``None`` (was not required in v1).
|
|
88
|
+
* Missing ``team_id`` is set to ``None``.
|
|
89
|
+
* Payload key ``model`` is normalised to ``model_id`` if present.
|
|
90
|
+
* ``tags`` is initialised to an empty dict if missing; all values
|
|
91
|
+
are coerced to strings.
|
|
92
|
+
* ``checksum`` is re-hashed from md5 to sha256 if applicable.
|
|
93
|
+
|
|
94
|
+
If the event is already at version ``"2.0"`` or later, it is returned
|
|
95
|
+
unchanged (idempotent).
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
event: Either an :class:`~spanforge.event.Event` instance or a plain
|
|
99
|
+
``dict`` (as loaded from JSONL).
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
The migrated event (same type as input).
|
|
103
|
+
"""
|
|
104
|
+
from spanforge.event import Event, Tags
|
|
105
|
+
|
|
106
|
+
if isinstance(event, Event):
|
|
107
|
+
if event.schema_version == "2.0":
|
|
108
|
+
return event
|
|
109
|
+
payload = dict(event.payload)
|
|
110
|
+
# Normalise model → model_id
|
|
111
|
+
if "model" in payload and "model_id" not in payload:
|
|
112
|
+
payload["model_id"] = payload.pop("model")
|
|
113
|
+
# Re-hash md5 checksum
|
|
114
|
+
checksum = _rehash_md5_to_sha256(event.checksum, payload)
|
|
115
|
+
# Coerce tag values to strings
|
|
116
|
+
raw_tags = _coerce_tag_values(event.tags) if event.tags else {}
|
|
117
|
+
tags = Tags(**raw_tags)
|
|
118
|
+
return Event(
|
|
119
|
+
schema_version="2.0",
|
|
120
|
+
event_id=event.event_id,
|
|
121
|
+
event_type=event.event_type,
|
|
122
|
+
timestamp=event.timestamp,
|
|
123
|
+
source=event.source,
|
|
124
|
+
payload=payload,
|
|
125
|
+
trace_id=event.trace_id,
|
|
126
|
+
span_id=event.span_id,
|
|
127
|
+
parent_span_id=event.parent_span_id,
|
|
128
|
+
org_id=event.org_id,
|
|
129
|
+
team_id=event.team_id,
|
|
130
|
+
actor_id=event.actor_id,
|
|
131
|
+
session_id=event.session_id,
|
|
132
|
+
tags=tags,
|
|
133
|
+
checksum=checksum,
|
|
134
|
+
signature=event.signature,
|
|
135
|
+
prev_id=event.prev_id,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Dict-based migration (e.g. raw JSONL parsing)
|
|
139
|
+
if isinstance(event, dict):
|
|
140
|
+
if event.get("schema_version") == "2.0":
|
|
141
|
+
return event
|
|
142
|
+
d = dict(event)
|
|
143
|
+
d["schema_version"] = "2.0"
|
|
144
|
+
d.setdefault("org_id", None)
|
|
145
|
+
d.setdefault("team_id", None)
|
|
146
|
+
# Coerce tag values
|
|
147
|
+
dict_tags: Any = d.get("tags")
|
|
148
|
+
if isinstance(dict_tags, dict):
|
|
149
|
+
d["tags"] = {str(k): str(v) for k, v in dict_tags.items()}
|
|
150
|
+
else:
|
|
151
|
+
d["tags"] = {}
|
|
152
|
+
payload = d.get("payload", {})
|
|
153
|
+
if isinstance(payload, dict) and "model" in payload and "model_id" not in payload:
|
|
154
|
+
payload["model_id"] = payload.pop("model")
|
|
155
|
+
# Re-hash md5 checksum
|
|
156
|
+
if d.get("checksum", "").startswith("md5:") and isinstance(payload, dict):
|
|
157
|
+
canonical = json.dumps(
|
|
158
|
+
payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False
|
|
159
|
+
).encode("utf-8")
|
|
160
|
+
d["checksum"] = f"sha256:{hashlib.sha256(canonical).hexdigest()}"
|
|
161
|
+
return d
|
|
162
|
+
|
|
163
|
+
raise TypeError(f"Cannot migrate object of type {type(event).__name__}")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def migrate_file(
|
|
167
|
+
input_path: str | Path,
|
|
168
|
+
*,
|
|
169
|
+
output: str | Path | None = None,
|
|
170
|
+
org_secret: str | None = None,
|
|
171
|
+
target_version: str = "2.0",
|
|
172
|
+
dry_run: bool = False,
|
|
173
|
+
) -> MigrationStats:
|
|
174
|
+
"""Migrate all events in a JSONL file from v1 to v2.
|
|
175
|
+
|
|
176
|
+
Reads line-by-line, applies :func:`v1_to_v2` to each JSON object, and
|
|
177
|
+
writes the result to *output* (defaults to ``<input>_v2.jsonl``).
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
input_path: Path to the source JSONL file.
|
|
181
|
+
output: Output file path (default: ``<stem>_v2.jsonl``).
|
|
182
|
+
org_secret: When provided, re-signs the migrated chain using HMAC.
|
|
183
|
+
target_version: Target schema version (default ``"2.0"``).
|
|
184
|
+
dry_run: When ``True``, report stats without writing output.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
A :class:`MigrationStats` summarising the operation.
|
|
188
|
+
"""
|
|
189
|
+
src = Path(input_path)
|
|
190
|
+
dst = src.with_name(f"{src.stem}_v2{src.suffix}") if output is None else Path(output)
|
|
191
|
+
|
|
192
|
+
total = 0
|
|
193
|
+
migrated = 0
|
|
194
|
+
skipped = 0
|
|
195
|
+
errors = 0
|
|
196
|
+
warnings: list[str] = []
|
|
197
|
+
transformed_fields: dict[str, int] = {}
|
|
198
|
+
|
|
199
|
+
migrated_dicts: list[str] = []
|
|
200
|
+
|
|
201
|
+
with src.open("r", encoding="utf-8") as fin:
|
|
202
|
+
for line_no, line in enumerate(fin, 1):
|
|
203
|
+
line = line.strip()
|
|
204
|
+
if not line:
|
|
205
|
+
continue
|
|
206
|
+
total += 1
|
|
207
|
+
try:
|
|
208
|
+
data = json.loads(line)
|
|
209
|
+
except json.JSONDecodeError:
|
|
210
|
+
errors += 1
|
|
211
|
+
migrated_dicts.append(line + "\n")
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
# Source format validation
|
|
215
|
+
if not isinstance(data, dict):
|
|
216
|
+
errors += 1
|
|
217
|
+
warnings.append(f"line {line_no}: not a JSON object")
|
|
218
|
+
migrated_dicts.append(line + "\n")
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
if data.get("schema_version") == target_version:
|
|
222
|
+
skipped += 1
|
|
223
|
+
migrated_dicts.append(line + "\n")
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
# Track which fields get transformed
|
|
228
|
+
payload = data.get("payload", {})
|
|
229
|
+
if isinstance(payload, dict) and "model" in payload and "model_id" not in payload:
|
|
230
|
+
transformed_fields["payload.model→model_id"] = (
|
|
231
|
+
transformed_fields.get("payload.model→model_id", 0) + 1
|
|
232
|
+
)
|
|
233
|
+
if data.get("checksum", "").startswith("md5:"):
|
|
234
|
+
transformed_fields["checksum.md5→sha256"] = (
|
|
235
|
+
transformed_fields.get("checksum.md5→sha256", 0) + 1
|
|
236
|
+
)
|
|
237
|
+
raw_tags = data.get("tags", {})
|
|
238
|
+
if isinstance(raw_tags, dict) and any(
|
|
239
|
+
not isinstance(v, str) for v in raw_tags.values()
|
|
240
|
+
):
|
|
241
|
+
transformed_fields["tags.value_coercion"] = (
|
|
242
|
+
transformed_fields.get("tags.value_coercion", 0) + 1
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
migrated_data = v1_to_v2(data)
|
|
246
|
+
migrated_dicts.append(
|
|
247
|
+
json.dumps(migrated_data, separators=(",", ":"), ensure_ascii=False) + "\n"
|
|
248
|
+
)
|
|
249
|
+
migrated += 1
|
|
250
|
+
except Exception: # NOSONAR
|
|
251
|
+
errors += 1
|
|
252
|
+
migrated_dicts.append(line + "\n")
|
|
253
|
+
|
|
254
|
+
# Re-sign if org_secret provided
|
|
255
|
+
if org_secret and not dry_run:
|
|
256
|
+
from spanforge.event import Event
|
|
257
|
+
from spanforge.signing import sign as _sign
|
|
258
|
+
|
|
259
|
+
signed_lines: list[str] = []
|
|
260
|
+
prev_event = None
|
|
261
|
+
for raw_line in migrated_dicts:
|
|
262
|
+
raw_line = raw_line.strip()
|
|
263
|
+
if not raw_line:
|
|
264
|
+
continue
|
|
265
|
+
try:
|
|
266
|
+
data = json.loads(raw_line)
|
|
267
|
+
evt = Event.from_dict(data)
|
|
268
|
+
signed_evt = _sign(evt, org_secret, prev_event=prev_event)
|
|
269
|
+
prev_event = signed_evt
|
|
270
|
+
signed_lines.append(signed_evt.to_json() + "\n")
|
|
271
|
+
except Exception:
|
|
272
|
+
signed_lines.append(raw_line + "\n")
|
|
273
|
+
migrated_dicts = signed_lines
|
|
274
|
+
|
|
275
|
+
if not dry_run:
|
|
276
|
+
with dst.open("w", encoding="utf-8") as fout:
|
|
277
|
+
for out_line in migrated_dicts:
|
|
278
|
+
fout.write(out_line)
|
|
279
|
+
|
|
280
|
+
return MigrationStats(
|
|
281
|
+
total=total,
|
|
282
|
+
migrated=migrated,
|
|
283
|
+
skipped=skipped,
|
|
284
|
+
errors=errors,
|
|
285
|
+
warnings=warnings,
|
|
286
|
+
output_path=str(dst),
|
|
287
|
+
transformed_fields=transformed_fields,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
# ---------------------------------------------------------------------------
|
|
292
|
+
# LangSmith migration (F-27)
|
|
293
|
+
# ---------------------------------------------------------------------------
|
|
294
|
+
|
|
295
|
+
_LANGSMITH_RUN_TYPE_MAP: dict[str, str] = {
|
|
296
|
+
"llm": "llm.trace.span.completed",
|
|
297
|
+
"tool": "llm.tool.call.completed",
|
|
298
|
+
"retriever": "llm.tool.call.completed",
|
|
299
|
+
"chain": "llm.chain.completed",
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def migrate_from_langsmith(
|
|
304
|
+
runs: list[dict[str, Any]],
|
|
305
|
+
*,
|
|
306
|
+
source: str = "langsmith-import",
|
|
307
|
+
) -> list[dict[str, Any]]:
|
|
308
|
+
"""Convert a list of LangSmith run dicts to SpanForge v2 event dicts.
|
|
309
|
+
|
|
310
|
+
Supports both the *JSON array* and *JSONL* line shapes that LangSmith
|
|
311
|
+
produces when you export a project. The function performs the run-type →
|
|
312
|
+
``event_type`` mapping documented in ADR-006 and returns a ready-to-use
|
|
313
|
+
list of SpanForge v2 event dicts suitable for writing as JSONL or passing
|
|
314
|
+
directly to :class:`~spanforge.sdk.audit.SFAuditClient`.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
runs: List of LangSmith run dicts (as loaded from a ``.json`` or
|
|
318
|
+
``.jsonl`` export).
|
|
319
|
+
source: ``source`` label stamped on every output event. Defaults to
|
|
320
|
+
``"langsmith-import"``.
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
A list of SpanForge v2 event dicts (one per input run).
|
|
324
|
+
|
|
325
|
+
Example::
|
|
326
|
+
|
|
327
|
+
import json
|
|
328
|
+
from spanforge.migrate import migrate_from_langsmith
|
|
329
|
+
|
|
330
|
+
with open("project_export.json") as fh:
|
|
331
|
+
runs = json.load(fh)
|
|
332
|
+
|
|
333
|
+
events = migrate_from_langsmith(runs, source="my-project")
|
|
334
|
+
"""
|
|
335
|
+
import time as _time
|
|
336
|
+
|
|
337
|
+
from spanforge.ulid import generate as _ulid_generate
|
|
338
|
+
|
|
339
|
+
events: list[dict[str, Any]] = []
|
|
340
|
+
for run in runs:
|
|
341
|
+
run_type = run.get("run_type", "chain")
|
|
342
|
+
run_name = run.get("name", "unknown")
|
|
343
|
+
run_id = run.get("id", _ulid_generate())
|
|
344
|
+
|
|
345
|
+
event_type = _LANGSMITH_RUN_TYPE_MAP.get(run_type, "llm.trace.span.completed")
|
|
346
|
+
|
|
347
|
+
payload: dict[str, Any] = {
|
|
348
|
+
"span_name": run_name,
|
|
349
|
+
"run_type": run_type,
|
|
350
|
+
"status": run.get("status", "ok"),
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
# Token usage
|
|
354
|
+
total_tok = run.get("total_tokens") or 0
|
|
355
|
+
prompt_tok = run.get("prompt_tokens") or 0
|
|
356
|
+
completion_tok = run.get("completion_tokens") or 0
|
|
357
|
+
if total_tok or prompt_tok or completion_tok:
|
|
358
|
+
payload["token_usage"] = {
|
|
359
|
+
"input_tokens": prompt_tok,
|
|
360
|
+
"output_tokens": completion_tok,
|
|
361
|
+
"total_tokens": total_tok or (prompt_tok + completion_tok),
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
# Timing
|
|
365
|
+
if run.get("start_time"):
|
|
366
|
+
payload["start_time"] = run["start_time"]
|
|
367
|
+
if run.get("end_time"):
|
|
368
|
+
payload["end_time"] = run["end_time"]
|
|
369
|
+
|
|
370
|
+
# Inputs / outputs (key names only — no raw content stored)
|
|
371
|
+
if run.get("inputs"):
|
|
372
|
+
payload["input_keys"] = (
|
|
373
|
+
list(run["inputs"].keys()) if isinstance(run["inputs"], dict) else ["input"]
|
|
374
|
+
)
|
|
375
|
+
if run.get("outputs"):
|
|
376
|
+
payload["output_keys"] = (
|
|
377
|
+
list(run["outputs"].keys()) if isinstance(run["outputs"], dict) else ["output"]
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# Error info (truncated to 500 chars for safety)
|
|
381
|
+
if run.get("error"):
|
|
382
|
+
payload["error"] = str(run["error"])[:500]
|
|
383
|
+
|
|
384
|
+
trace_id = run.get("trace_id") or run.get("session_id") or ""
|
|
385
|
+
parent_id = run.get("parent_run_id") or ""
|
|
386
|
+
|
|
387
|
+
event: dict[str, Any] = {
|
|
388
|
+
"event_id": _ulid_generate(),
|
|
389
|
+
"event_type": event_type,
|
|
390
|
+
"source": source,
|
|
391
|
+
"schema_version": "2.0",
|
|
392
|
+
"timestamp": run.get("start_time") or _time.time(),
|
|
393
|
+
"payload": payload,
|
|
394
|
+
"tags": {
|
|
395
|
+
"langsmith_run_id": str(run_id),
|
|
396
|
+
"langsmith_trace_id": str(trace_id) if trace_id else "",
|
|
397
|
+
"langsmith_parent_id": str(parent_id) if parent_id else "",
|
|
398
|
+
},
|
|
399
|
+
}
|
|
400
|
+
events.append(event)
|
|
401
|
+
|
|
402
|
+
return events
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""spanforge.model_registry — Model lifecycle tracking for AI compliance.
|
|
2
|
+
|
|
3
|
+
Provides a thread-safe in-memory registry of ML/AI models with lifecycle
|
|
4
|
+
transitions (active → deprecated → retired). Each mutation emits an
|
|
5
|
+
auditable event into the HMAC chain.
|
|
6
|
+
|
|
7
|
+
Emits ``model_registry.registered``, ``model_registry.deprecated``,
|
|
8
|
+
``model_registry.retired`` events via :func:`emit_rfc_event`.
|
|
9
|
+
|
|
10
|
+
Usage::
|
|
11
|
+
|
|
12
|
+
from spanforge.model_registry import ModelRegistry, ModelRegistryEntry
|
|
13
|
+
|
|
14
|
+
registry = ModelRegistry()
|
|
15
|
+
entry = registry.register(
|
|
16
|
+
model_id="gpt-4o-2024-05",
|
|
17
|
+
name="GPT-4o",
|
|
18
|
+
version="2024-05",
|
|
19
|
+
risk_tier="high",
|
|
20
|
+
owner="platform-team",
|
|
21
|
+
purpose="customer support agent",
|
|
22
|
+
)
|
|
23
|
+
registry.deprecate("gpt-4o-2024-05", reason="Replaced by gpt-4o-2024-08")
|
|
24
|
+
registry.retire("gpt-4o-2024-05")
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import contextlib
|
|
30
|
+
import json
|
|
31
|
+
import threading
|
|
32
|
+
from dataclasses import asdict, dataclass, field
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import Any, Literal
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"ModelRegistry",
|
|
38
|
+
"ModelRegistryEntry",
|
|
39
|
+
"deprecate_model",
|
|
40
|
+
"get_model",
|
|
41
|
+
"list_models",
|
|
42
|
+
"register_model",
|
|
43
|
+
"retire_model",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
_VALID_RISK_TIERS = frozenset({"low", "medium", "high", "critical"})
|
|
47
|
+
_VALID_STATUSES = frozenset({"active", "deprecated", "retired"})
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class ModelRegistryEntry:
|
|
52
|
+
"""A single model registered for compliance tracking."""
|
|
53
|
+
|
|
54
|
+
model_id: str
|
|
55
|
+
name: str
|
|
56
|
+
version: str
|
|
57
|
+
risk_tier: Literal["low", "medium", "high", "critical"]
|
|
58
|
+
owner: str
|
|
59
|
+
purpose: str
|
|
60
|
+
status: Literal["active", "deprecated", "retired"] = "active"
|
|
61
|
+
deployment_date: str | None = None
|
|
62
|
+
decommission_date: str | None = None
|
|
63
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
64
|
+
|
|
65
|
+
def __post_init__(self) -> None:
|
|
66
|
+
if not self.model_id:
|
|
67
|
+
raise ValueError("ModelRegistryEntry.model_id must be non-empty")
|
|
68
|
+
if not self.name:
|
|
69
|
+
raise ValueError("ModelRegistryEntry.name must be non-empty")
|
|
70
|
+
if not self.version:
|
|
71
|
+
raise ValueError("ModelRegistryEntry.version must be non-empty")
|
|
72
|
+
if self.risk_tier not in _VALID_RISK_TIERS:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"ModelRegistryEntry.risk_tier must be one of {sorted(_VALID_RISK_TIERS)}"
|
|
75
|
+
)
|
|
76
|
+
if not self.owner:
|
|
77
|
+
raise ValueError("ModelRegistryEntry.owner must be non-empty")
|
|
78
|
+
if not self.purpose:
|
|
79
|
+
raise ValueError("ModelRegistryEntry.purpose must be non-empty")
|
|
80
|
+
if self.status not in _VALID_STATUSES:
|
|
81
|
+
raise ValueError(f"ModelRegistryEntry.status must be one of {sorted(_VALID_STATUSES)}")
|
|
82
|
+
|
|
83
|
+
def to_dict(self) -> dict[str, Any]:
|
|
84
|
+
"""Serialise to a plain dict."""
|
|
85
|
+
return asdict(self)
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def from_dict(cls, data: dict[str, Any]) -> ModelRegistryEntry:
|
|
89
|
+
"""Deserialise from a plain dict."""
|
|
90
|
+
return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ModelRegistry:
|
|
94
|
+
"""Thread-safe in-memory model registry with lifecycle transitions.
|
|
95
|
+
|
|
96
|
+
Each mutation emits an audit event into the HMAC chain.
|
|
97
|
+
Optionally, the registry can persist to/from a JSON file.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __init__(self, *, auto_emit: bool = True) -> None:
|
|
101
|
+
self._lock = threading.Lock()
|
|
102
|
+
self._models: dict[str, ModelRegistryEntry] = {}
|
|
103
|
+
self._auto_emit = auto_emit
|
|
104
|
+
|
|
105
|
+
def register(
|
|
106
|
+
self,
|
|
107
|
+
model_id: str,
|
|
108
|
+
name: str,
|
|
109
|
+
version: str,
|
|
110
|
+
risk_tier: Literal["low", "medium", "high", "critical"],
|
|
111
|
+
owner: str,
|
|
112
|
+
purpose: str,
|
|
113
|
+
*,
|
|
114
|
+
deployment_date: str | None = None,
|
|
115
|
+
metadata: dict[str, Any] | None = None,
|
|
116
|
+
) -> ModelRegistryEntry:
|
|
117
|
+
"""Register a new model and emit ``model_registry.registered``."""
|
|
118
|
+
entry = ModelRegistryEntry(
|
|
119
|
+
model_id=model_id,
|
|
120
|
+
name=name,
|
|
121
|
+
version=version,
|
|
122
|
+
risk_tier=risk_tier,
|
|
123
|
+
owner=owner,
|
|
124
|
+
purpose=purpose,
|
|
125
|
+
status="active",
|
|
126
|
+
deployment_date=deployment_date or self._now(),
|
|
127
|
+
metadata=metadata or {},
|
|
128
|
+
)
|
|
129
|
+
with self._lock:
|
|
130
|
+
if model_id in self._models:
|
|
131
|
+
raise ValueError(
|
|
132
|
+
f"Model {model_id!r} already registered. "
|
|
133
|
+
"Use a unique model_id or retire the existing entry first."
|
|
134
|
+
)
|
|
135
|
+
self._models[model_id] = entry
|
|
136
|
+
|
|
137
|
+
if self._auto_emit:
|
|
138
|
+
self._emit("registered", entry)
|
|
139
|
+
return entry
|
|
140
|
+
|
|
141
|
+
def deprecate(self, model_id: str, *, reason: str = "") -> ModelRegistryEntry:
|
|
142
|
+
"""Mark a model as deprecated and emit ``model_registry.deprecated``."""
|
|
143
|
+
with self._lock:
|
|
144
|
+
entry = self._models.get(model_id)
|
|
145
|
+
if entry is None:
|
|
146
|
+
raise KeyError(f"Model {model_id!r} not found in registry")
|
|
147
|
+
if entry.status == "retired":
|
|
148
|
+
raise ValueError(f"Model {model_id!r} is already retired")
|
|
149
|
+
entry.status = "deprecated"
|
|
150
|
+
if reason:
|
|
151
|
+
entry.metadata["deprecation_reason"] = reason
|
|
152
|
+
|
|
153
|
+
if self._auto_emit:
|
|
154
|
+
self._emit("deprecated", entry)
|
|
155
|
+
return entry
|
|
156
|
+
|
|
157
|
+
def retire(self, model_id: str) -> ModelRegistryEntry:
|
|
158
|
+
"""Move a model to retired status and emit ``model_registry.retired``."""
|
|
159
|
+
with self._lock:
|
|
160
|
+
entry = self._models.get(model_id)
|
|
161
|
+
if entry is None:
|
|
162
|
+
raise KeyError(f"Model {model_id!r} not found in registry")
|
|
163
|
+
entry.status = "retired"
|
|
164
|
+
entry.decommission_date = self._now()
|
|
165
|
+
|
|
166
|
+
if self._auto_emit:
|
|
167
|
+
self._emit("retired", entry)
|
|
168
|
+
return entry
|
|
169
|
+
|
|
170
|
+
def get(self, model_id: str) -> ModelRegistryEntry | None:
|
|
171
|
+
"""Look up a model entry by ID."""
|
|
172
|
+
with self._lock:
|
|
173
|
+
return self._models.get(model_id)
|
|
174
|
+
|
|
175
|
+
def list_all(self) -> list[ModelRegistryEntry]:
|
|
176
|
+
"""Return all registered models."""
|
|
177
|
+
with self._lock:
|
|
178
|
+
return list(self._models.values())
|
|
179
|
+
|
|
180
|
+
def list_active(self) -> list[ModelRegistryEntry]:
|
|
181
|
+
"""Return only models with ``status == 'active'``."""
|
|
182
|
+
with self._lock:
|
|
183
|
+
return [m for m in self._models.values() if m.status == "active"]
|
|
184
|
+
|
|
185
|
+
def clear(self) -> None:
|
|
186
|
+
"""Remove all entries (for testing)."""
|
|
187
|
+
with self._lock:
|
|
188
|
+
self._models.clear()
|
|
189
|
+
|
|
190
|
+
# -----------------------------------------------------------------------
|
|
191
|
+
# Persistence
|
|
192
|
+
# -----------------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
def save(self, path: str | Path) -> None:
|
|
195
|
+
"""Persist registry to a JSON file."""
|
|
196
|
+
path = Path(path)
|
|
197
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
198
|
+
with self._lock:
|
|
199
|
+
data = [e.to_dict() for e in self._models.values()]
|
|
200
|
+
path.write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")
|
|
201
|
+
|
|
202
|
+
def load(self, path: str | Path) -> None:
|
|
203
|
+
"""Load registry from a JSON file, replacing current entries."""
|
|
204
|
+
path = Path(path)
|
|
205
|
+
raw = json.loads(path.read_text(encoding="utf-8"))
|
|
206
|
+
entries = [ModelRegistryEntry.from_dict(d) for d in raw]
|
|
207
|
+
with self._lock:
|
|
208
|
+
self._models = {e.model_id: e for e in entries}
|
|
209
|
+
|
|
210
|
+
# -----------------------------------------------------------------------
|
|
211
|
+
# Internal helpers
|
|
212
|
+
# -----------------------------------------------------------------------
|
|
213
|
+
|
|
214
|
+
@staticmethod
|
|
215
|
+
def _now() -> str:
|
|
216
|
+
import datetime
|
|
217
|
+
|
|
218
|
+
return datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
219
|
+
|
|
220
|
+
@staticmethod
|
|
221
|
+
def _emit(action: str, entry: ModelRegistryEntry) -> None:
|
|
222
|
+
"""Emit a model registry event into the HMAC audit chain."""
|
|
223
|
+
try:
|
|
224
|
+
from spanforge._stream import emit_rfc_event
|
|
225
|
+
from spanforge.types import EventType
|
|
226
|
+
|
|
227
|
+
_action_to_event = {
|
|
228
|
+
"registered": EventType.MODEL_REGISTERED,
|
|
229
|
+
"deprecated": EventType.MODEL_DEPRECATED,
|
|
230
|
+
"retired": EventType.MODEL_RETIRED,
|
|
231
|
+
}
|
|
232
|
+
et = _action_to_event.get(action)
|
|
233
|
+
if et is None:
|
|
234
|
+
return
|
|
235
|
+
with contextlib.suppress(Exception):
|
|
236
|
+
emit_rfc_event(et, entry.to_dict())
|
|
237
|
+
except ImportError:
|
|
238
|
+
pass
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# ---------------------------------------------------------------------------
|
|
242
|
+
# Module-level singleton & convenience functions
|
|
243
|
+
# ---------------------------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
_registry = ModelRegistry()
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def register_model(
|
|
249
|
+
model_id: str,
|
|
250
|
+
name: str,
|
|
251
|
+
version: str,
|
|
252
|
+
risk_tier: Literal["low", "medium", "high", "critical"],
|
|
253
|
+
owner: str,
|
|
254
|
+
purpose: str,
|
|
255
|
+
**kwargs: Any,
|
|
256
|
+
) -> ModelRegistryEntry:
|
|
257
|
+
"""Register a model via the module-level :class:`ModelRegistry`."""
|
|
258
|
+
return _registry.register(model_id, name, version, risk_tier, owner, purpose, **kwargs)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def deprecate_model(model_id: str, **kwargs: Any) -> ModelRegistryEntry:
|
|
262
|
+
"""Deprecate a model via the module-level :class:`ModelRegistry`."""
|
|
263
|
+
return _registry.deprecate(model_id, **kwargs)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def retire_model(model_id: str) -> ModelRegistryEntry:
|
|
267
|
+
"""Retire a model via the module-level :class:`ModelRegistry`."""
|
|
268
|
+
return _registry.retire(model_id)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def list_models() -> list[ModelRegistryEntry]:
|
|
272
|
+
"""List all models via the module-level :class:`ModelRegistry`."""
|
|
273
|
+
return _registry.list_all()
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def get_model(model_id: str) -> ModelRegistryEntry | None:
|
|
277
|
+
"""Get a model via the module-level :class:`ModelRegistry`."""
|
|
278
|
+
return _registry.get(model_id)
|