spanforge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. spanforge/__init__.py +815 -0
  2. spanforge/_ansi.py +93 -0
  3. spanforge/_batch_exporter.py +409 -0
  4. spanforge/_cli.py +2094 -0
  5. spanforge/_cli_audit.py +639 -0
  6. spanforge/_cli_compliance.py +711 -0
  7. spanforge/_cli_cost.py +243 -0
  8. spanforge/_cli_ops.py +791 -0
  9. spanforge/_cli_phase11.py +356 -0
  10. spanforge/_hooks.py +337 -0
  11. spanforge/_server.py +1708 -0
  12. spanforge/_span.py +1036 -0
  13. spanforge/_store.py +288 -0
  14. spanforge/_stream.py +664 -0
  15. spanforge/_trace.py +335 -0
  16. spanforge/_tracer.py +254 -0
  17. spanforge/actor.py +141 -0
  18. spanforge/alerts.py +469 -0
  19. spanforge/auto.py +464 -0
  20. spanforge/baseline.py +335 -0
  21. spanforge/cache.py +635 -0
  22. spanforge/compliance.py +325 -0
  23. spanforge/config.py +532 -0
  24. spanforge/consent.py +228 -0
  25. spanforge/consumer.py +377 -0
  26. spanforge/core/__init__.py +5 -0
  27. spanforge/core/compliance_mapping.py +1254 -0
  28. spanforge/cost.py +600 -0
  29. spanforge/debug.py +548 -0
  30. spanforge/deprecations.py +205 -0
  31. spanforge/drift.py +482 -0
  32. spanforge/egress.py +58 -0
  33. spanforge/eval.py +648 -0
  34. spanforge/event.py +1064 -0
  35. spanforge/exceptions.py +240 -0
  36. spanforge/explain.py +178 -0
  37. spanforge/export/__init__.py +69 -0
  38. spanforge/export/append_only.py +337 -0
  39. spanforge/export/cloud.py +357 -0
  40. spanforge/export/datadog.py +497 -0
  41. spanforge/export/grafana.py +320 -0
  42. spanforge/export/jsonl.py +195 -0
  43. spanforge/export/openinference.py +158 -0
  44. spanforge/export/otel_bridge.py +294 -0
  45. spanforge/export/otlp.py +811 -0
  46. spanforge/export/otlp_bridge.py +233 -0
  47. spanforge/export/redis_backend.py +282 -0
  48. spanforge/export/siem_schema.py +98 -0
  49. spanforge/export/siem_splunk.py +264 -0
  50. spanforge/export/siem_syslog.py +212 -0
  51. spanforge/export/webhook.py +299 -0
  52. spanforge/exporters/__init__.py +30 -0
  53. spanforge/exporters/console.py +271 -0
  54. spanforge/exporters/jsonl.py +144 -0
  55. spanforge/exporters/sqlite.py +142 -0
  56. spanforge/gate.py +1150 -0
  57. spanforge/governance.py +181 -0
  58. spanforge/hitl.py +295 -0
  59. spanforge/http.py +187 -0
  60. spanforge/inspect.py +427 -0
  61. spanforge/integrations/__init__.py +45 -0
  62. spanforge/integrations/_pricing.py +280 -0
  63. spanforge/integrations/anthropic.py +388 -0
  64. spanforge/integrations/azure_openai.py +133 -0
  65. spanforge/integrations/bedrock.py +292 -0
  66. spanforge/integrations/crewai.py +251 -0
  67. spanforge/integrations/gemini.py +351 -0
  68. spanforge/integrations/groq.py +442 -0
  69. spanforge/integrations/langchain.py +349 -0
  70. spanforge/integrations/langgraph.py +306 -0
  71. spanforge/integrations/llamaindex.py +373 -0
  72. spanforge/integrations/ollama.py +287 -0
  73. spanforge/integrations/openai.py +368 -0
  74. spanforge/integrations/together.py +483 -0
  75. spanforge/io.py +214 -0
  76. spanforge/lint.py +322 -0
  77. spanforge/metrics.py +417 -0
  78. spanforge/metrics_export.py +343 -0
  79. spanforge/migrate.py +402 -0
  80. spanforge/model_registry.py +278 -0
  81. spanforge/models.py +389 -0
  82. spanforge/namespaces/__init__.py +254 -0
  83. spanforge/namespaces/audit.py +256 -0
  84. spanforge/namespaces/cache.py +237 -0
  85. spanforge/namespaces/chain.py +77 -0
  86. spanforge/namespaces/confidence.py +72 -0
  87. spanforge/namespaces/consent.py +92 -0
  88. spanforge/namespaces/cost.py +179 -0
  89. spanforge/namespaces/decision.py +143 -0
  90. spanforge/namespaces/diff.py +157 -0
  91. spanforge/namespaces/drift.py +80 -0
  92. spanforge/namespaces/eval_.py +251 -0
  93. spanforge/namespaces/feedback.py +241 -0
  94. spanforge/namespaces/fence.py +193 -0
  95. spanforge/namespaces/guard.py +105 -0
  96. spanforge/namespaces/hitl.py +91 -0
  97. spanforge/namespaces/latency.py +72 -0
  98. spanforge/namespaces/prompt.py +190 -0
  99. spanforge/namespaces/redact.py +173 -0
  100. spanforge/namespaces/retrieval.py +379 -0
  101. spanforge/namespaces/runtime_governance.py +494 -0
  102. spanforge/namespaces/template.py +208 -0
  103. spanforge/namespaces/tool_call.py +77 -0
  104. spanforge/namespaces/trace.py +1029 -0
  105. spanforge/normalizer.py +171 -0
  106. spanforge/plugins.py +82 -0
  107. spanforge/presidio_backend.py +349 -0
  108. spanforge/processor.py +258 -0
  109. spanforge/prompt_registry.py +418 -0
  110. spanforge/py.typed +0 -0
  111. spanforge/redact.py +914 -0
  112. spanforge/regression.py +192 -0
  113. spanforge/runtime_policy.py +159 -0
  114. spanforge/sampling.py +511 -0
  115. spanforge/schema.py +183 -0
  116. spanforge/schemas/v1.0/schema.json +170 -0
  117. spanforge/schemas/v2.0/schema.json +536 -0
  118. spanforge/sdk/__init__.py +625 -0
  119. spanforge/sdk/_base.py +584 -0
  120. spanforge/sdk/_base.pyi +71 -0
  121. spanforge/sdk/_exceptions.py +1096 -0
  122. spanforge/sdk/_types.py +2184 -0
  123. spanforge/sdk/alert.py +1514 -0
  124. spanforge/sdk/alert.pyi +56 -0
  125. spanforge/sdk/audit.py +1196 -0
  126. spanforge/sdk/audit.pyi +67 -0
  127. spanforge/sdk/cec.py +1215 -0
  128. spanforge/sdk/cec.pyi +37 -0
  129. spanforge/sdk/config.py +641 -0
  130. spanforge/sdk/config.pyi +55 -0
  131. spanforge/sdk/enterprise.py +714 -0
  132. spanforge/sdk/enterprise.pyi +79 -0
  133. spanforge/sdk/explain.py +170 -0
  134. spanforge/sdk/fallback.py +432 -0
  135. spanforge/sdk/feedback.py +351 -0
  136. spanforge/sdk/gate.py +874 -0
  137. spanforge/sdk/gate.pyi +51 -0
  138. spanforge/sdk/identity.py +2114 -0
  139. spanforge/sdk/identity.pyi +47 -0
  140. spanforge/sdk/lineage.py +175 -0
  141. spanforge/sdk/observe.py +1065 -0
  142. spanforge/sdk/observe.pyi +50 -0
  143. spanforge/sdk/operator.py +338 -0
  144. spanforge/sdk/pii.py +1473 -0
  145. spanforge/sdk/pii.pyi +119 -0
  146. spanforge/sdk/pipelines.py +458 -0
  147. spanforge/sdk/pipelines.pyi +39 -0
  148. spanforge/sdk/policy.py +930 -0
  149. spanforge/sdk/rag.py +594 -0
  150. spanforge/sdk/rbac.py +280 -0
  151. spanforge/sdk/registry.py +430 -0
  152. spanforge/sdk/registry.pyi +46 -0
  153. spanforge/sdk/scope.py +279 -0
  154. spanforge/sdk/secrets.py +293 -0
  155. spanforge/sdk/secrets.pyi +25 -0
  156. spanforge/sdk/security.py +560 -0
  157. spanforge/sdk/security.pyi +57 -0
  158. spanforge/sdk/trust.py +472 -0
  159. spanforge/sdk/trust.pyi +41 -0
  160. spanforge/secrets.py +799 -0
  161. spanforge/signing.py +1179 -0
  162. spanforge/stats.py +100 -0
  163. spanforge/stream.py +560 -0
  164. spanforge/testing.py +378 -0
  165. spanforge/testing_mocks.py +1052 -0
  166. spanforge/trace.py +199 -0
  167. spanforge/types.py +696 -0
  168. spanforge/ulid.py +300 -0
  169. spanforge/validate.py +379 -0
  170. spanforge-1.0.0.dist-info/METADATA +1509 -0
  171. spanforge-1.0.0.dist-info/RECORD +174 -0
  172. spanforge-1.0.0.dist-info/WHEEL +4 -0
  173. spanforge-1.0.0.dist-info/entry_points.txt +5 -0
  174. spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/migrate.py ADDED
@@ -0,0 +1,402 @@
1
+ """Schema migration utilities for spanforge events.
2
+
3
+ Provides forward-only migration functions to convert events from older schema
4
+ versions to the current version. Migrations are idempotent — migrating an
5
+ event that is already at the target version returns it unchanged.
6
+
7
+ Usage
8
+ -----
9
+ ::
10
+
11
+ from spanforge.migrate import v1_to_v2, migrate_file
12
+
13
+ # Single event
14
+ v2_event = v1_to_v2(v1_event)
15
+
16
+ # Bulk file migration
17
+ stats = migrate_file("audit.jsonl", output="audit_v2.jsonl")
18
+ print(f"Migrated {stats.migrated} events")
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import hashlib
24
+ import json
25
+ from dataclasses import dataclass, field
26
+ from pathlib import Path
27
+ from typing import Any
28
+
29
+ __all__ = [
30
+ "MigrationStats",
31
+ "migrate_file",
32
+ "migrate_from_langsmith",
33
+ "v1_to_v2",
34
+ ]
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class MigrationStats:
39
+ """Result of a bulk migration operation.
40
+
41
+ Attributes:
42
+ total: Total events processed.
43
+ migrated: Events that were upgraded to a new schema version.
44
+ skipped: Events already at the target version (not modified).
45
+ errors: Events that could not be parsed or migrated.
46
+ warnings: Non-fatal warnings encountered during migration.
47
+ output_path: Path where the migrated events were written.
48
+ transformed_fields: Mapping of field names to the count of events
49
+ where that field was transformed.
50
+ """
51
+
52
+ total: int
53
+ migrated: int
54
+ skipped: int
55
+ errors: int
56
+ warnings: list[str] = field(default_factory=list)
57
+ output_path: str = ""
58
+ transformed_fields: dict[str, int] = field(default_factory=dict)
59
+
60
+
61
+ def _rehash_md5_to_sha256(checksum: str | None, payload: dict[str, Any]) -> str | None:
62
+ """If *checksum* starts with ``md5:``, recompute as ``sha256:``."""
63
+ if checksum and checksum.startswith("md5:"):
64
+ canonical = json.dumps(
65
+ payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False
66
+ ).encode("utf-8")
67
+ return f"sha256:{hashlib.sha256(canonical).hexdigest()}"
68
+ return checksum
69
+
70
+
71
+ def _coerce_tag_values(tags: Any) -> dict[str, str]:
72
+ """Ensure all tag values are strings."""
73
+ from spanforge.event import Tags as _Tags
74
+
75
+ if isinstance(tags, _Tags):
76
+ return tags.to_dict()
77
+ if not isinstance(tags, dict):
78
+ return {}
79
+ return {str(k): str(v) for k, v in tags.items()}
80
+
81
+
82
+ def v1_to_v2(event: Any) -> Any:
83
+ """Migrate a single event from schema version 1.0 to 2.0.
84
+
85
+ Changes applied:
86
+ * ``schema_version`` is set to ``"2.0"``.
87
+ * Missing ``org_id`` is set to ``None`` (was not required in v1).
88
+ * Missing ``team_id`` is set to ``None``.
89
+ * Payload key ``model`` is normalised to ``model_id`` if present.
90
+ * ``tags`` is initialised to an empty dict if missing; all values
91
+ are coerced to strings.
92
+ * ``checksum`` is re-hashed from md5 to sha256 if applicable.
93
+
94
+ If the event is already at version ``"2.0"`` or later, it is returned
95
+ unchanged (idempotent).
96
+
97
+ Args:
98
+ event: Either an :class:`~spanforge.event.Event` instance or a plain
99
+ ``dict`` (as loaded from JSONL).
100
+
101
+ Returns:
102
+ The migrated event (same type as input).
103
+ """
104
+ from spanforge.event import Event, Tags
105
+
106
+ if isinstance(event, Event):
107
+ if event.schema_version == "2.0":
108
+ return event
109
+ payload = dict(event.payload)
110
+ # Normalise model → model_id
111
+ if "model" in payload and "model_id" not in payload:
112
+ payload["model_id"] = payload.pop("model")
113
+ # Re-hash md5 checksum
114
+ checksum = _rehash_md5_to_sha256(event.checksum, payload)
115
+ # Coerce tag values to strings
116
+ raw_tags = _coerce_tag_values(event.tags) if event.tags else {}
117
+ tags = Tags(**raw_tags)
118
+ return Event(
119
+ schema_version="2.0",
120
+ event_id=event.event_id,
121
+ event_type=event.event_type,
122
+ timestamp=event.timestamp,
123
+ source=event.source,
124
+ payload=payload,
125
+ trace_id=event.trace_id,
126
+ span_id=event.span_id,
127
+ parent_span_id=event.parent_span_id,
128
+ org_id=event.org_id,
129
+ team_id=event.team_id,
130
+ actor_id=event.actor_id,
131
+ session_id=event.session_id,
132
+ tags=tags,
133
+ checksum=checksum,
134
+ signature=event.signature,
135
+ prev_id=event.prev_id,
136
+ )
137
+
138
+ # Dict-based migration (e.g. raw JSONL parsing)
139
+ if isinstance(event, dict):
140
+ if event.get("schema_version") == "2.0":
141
+ return event
142
+ d = dict(event)
143
+ d["schema_version"] = "2.0"
144
+ d.setdefault("org_id", None)
145
+ d.setdefault("team_id", None)
146
+ # Coerce tag values
147
+ dict_tags: Any = d.get("tags")
148
+ if isinstance(dict_tags, dict):
149
+ d["tags"] = {str(k): str(v) for k, v in dict_tags.items()}
150
+ else:
151
+ d["tags"] = {}
152
+ payload = d.get("payload", {})
153
+ if isinstance(payload, dict) and "model" in payload and "model_id" not in payload:
154
+ payload["model_id"] = payload.pop("model")
155
+ # Re-hash md5 checksum
156
+ if d.get("checksum", "").startswith("md5:") and isinstance(payload, dict):
157
+ canonical = json.dumps(
158
+ payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False
159
+ ).encode("utf-8")
160
+ d["checksum"] = f"sha256:{hashlib.sha256(canonical).hexdigest()}"
161
+ return d
162
+
163
+ raise TypeError(f"Cannot migrate object of type {type(event).__name__}")
164
+
165
+
166
+ def migrate_file(
167
+ input_path: str | Path,
168
+ *,
169
+ output: str | Path | None = None,
170
+ org_secret: str | None = None,
171
+ target_version: str = "2.0",
172
+ dry_run: bool = False,
173
+ ) -> MigrationStats:
174
+ """Migrate all events in a JSONL file from v1 to v2.
175
+
176
+ Reads line-by-line, applies :func:`v1_to_v2` to each JSON object, and
177
+ writes the result to *output* (defaults to ``<input>_v2.jsonl``).
178
+
179
+ Args:
180
+ input_path: Path to the source JSONL file.
181
+ output: Output file path (default: ``<stem>_v2.jsonl``).
182
+ org_secret: When provided, re-signs the migrated chain using HMAC.
183
+ target_version: Target schema version (default ``"2.0"``).
184
+ dry_run: When ``True``, report stats without writing output.
185
+
186
+ Returns:
187
+ A :class:`MigrationStats` summarising the operation.
188
+ """
189
+ src = Path(input_path)
190
+ dst = src.with_name(f"{src.stem}_v2{src.suffix}") if output is None else Path(output)
191
+
192
+ total = 0
193
+ migrated = 0
194
+ skipped = 0
195
+ errors = 0
196
+ warnings: list[str] = []
197
+ transformed_fields: dict[str, int] = {}
198
+
199
+ migrated_dicts: list[str] = []
200
+
201
+ with src.open("r", encoding="utf-8") as fin:
202
+ for line_no, line in enumerate(fin, 1):
203
+ line = line.strip()
204
+ if not line:
205
+ continue
206
+ total += 1
207
+ try:
208
+ data = json.loads(line)
209
+ except json.JSONDecodeError:
210
+ errors += 1
211
+ migrated_dicts.append(line + "\n")
212
+ continue
213
+
214
+ # Source format validation
215
+ if not isinstance(data, dict):
216
+ errors += 1
217
+ warnings.append(f"line {line_no}: not a JSON object")
218
+ migrated_dicts.append(line + "\n")
219
+ continue
220
+
221
+ if data.get("schema_version") == target_version:
222
+ skipped += 1
223
+ migrated_dicts.append(line + "\n")
224
+ continue
225
+
226
+ try:
227
+ # Track which fields get transformed
228
+ payload = data.get("payload", {})
229
+ if isinstance(payload, dict) and "model" in payload and "model_id" not in payload:
230
+ transformed_fields["payload.model→model_id"] = (
231
+ transformed_fields.get("payload.model→model_id", 0) + 1
232
+ )
233
+ if data.get("checksum", "").startswith("md5:"):
234
+ transformed_fields["checksum.md5→sha256"] = (
235
+ transformed_fields.get("checksum.md5→sha256", 0) + 1
236
+ )
237
+ raw_tags = data.get("tags", {})
238
+ if isinstance(raw_tags, dict) and any(
239
+ not isinstance(v, str) for v in raw_tags.values()
240
+ ):
241
+ transformed_fields["tags.value_coercion"] = (
242
+ transformed_fields.get("tags.value_coercion", 0) + 1
243
+ )
244
+
245
+ migrated_data = v1_to_v2(data)
246
+ migrated_dicts.append(
247
+ json.dumps(migrated_data, separators=(",", ":"), ensure_ascii=False) + "\n"
248
+ )
249
+ migrated += 1
250
+ except Exception: # NOSONAR
251
+ errors += 1
252
+ migrated_dicts.append(line + "\n")
253
+
254
+ # Re-sign if org_secret provided
255
+ if org_secret and not dry_run:
256
+ from spanforge.event import Event
257
+ from spanforge.signing import sign as _sign
258
+
259
+ signed_lines: list[str] = []
260
+ prev_event = None
261
+ for raw_line in migrated_dicts:
262
+ raw_line = raw_line.strip()
263
+ if not raw_line:
264
+ continue
265
+ try:
266
+ data = json.loads(raw_line)
267
+ evt = Event.from_dict(data)
268
+ signed_evt = _sign(evt, org_secret, prev_event=prev_event)
269
+ prev_event = signed_evt
270
+ signed_lines.append(signed_evt.to_json() + "\n")
271
+ except Exception:
272
+ signed_lines.append(raw_line + "\n")
273
+ migrated_dicts = signed_lines
274
+
275
+ if not dry_run:
276
+ with dst.open("w", encoding="utf-8") as fout:
277
+ for out_line in migrated_dicts:
278
+ fout.write(out_line)
279
+
280
+ return MigrationStats(
281
+ total=total,
282
+ migrated=migrated,
283
+ skipped=skipped,
284
+ errors=errors,
285
+ warnings=warnings,
286
+ output_path=str(dst),
287
+ transformed_fields=transformed_fields,
288
+ )
289
+
290
+
291
+ # ---------------------------------------------------------------------------
292
+ # LangSmith migration (F-27)
293
+ # ---------------------------------------------------------------------------
294
+
295
+ _LANGSMITH_RUN_TYPE_MAP: dict[str, str] = {
296
+ "llm": "llm.trace.span.completed",
297
+ "tool": "llm.tool.call.completed",
298
+ "retriever": "llm.tool.call.completed",
299
+ "chain": "llm.chain.completed",
300
+ }
301
+
302
+
303
+ def migrate_from_langsmith(
304
+ runs: list[dict[str, Any]],
305
+ *,
306
+ source: str = "langsmith-import",
307
+ ) -> list[dict[str, Any]]:
308
+ """Convert a list of LangSmith run dicts to SpanForge v2 event dicts.
309
+
310
+ Supports both the *JSON array* and *JSONL* line shapes that LangSmith
311
+ produces when you export a project. The function performs the run-type →
312
+ ``event_type`` mapping documented in ADR-006 and returns a ready-to-use
313
+ list of SpanForge v2 event dicts suitable for writing as JSONL or passing
314
+ directly to :class:`~spanforge.sdk.audit.SFAuditClient`.
315
+
316
+ Args:
317
+ runs: List of LangSmith run dicts (as loaded from a ``.json`` or
318
+ ``.jsonl`` export).
319
+ source: ``source`` label stamped on every output event. Defaults to
320
+ ``"langsmith-import"``.
321
+
322
+ Returns:
323
+ A list of SpanForge v2 event dicts (one per input run).
324
+
325
+ Example::
326
+
327
+ import json
328
+ from spanforge.migrate import migrate_from_langsmith
329
+
330
+ with open("project_export.json") as fh:
331
+ runs = json.load(fh)
332
+
333
+ events = migrate_from_langsmith(runs, source="my-project")
334
+ """
335
+ import time as _time
336
+
337
+ from spanforge.ulid import generate as _ulid_generate
338
+
339
+ events: list[dict[str, Any]] = []
340
+ for run in runs:
341
+ run_type = run.get("run_type", "chain")
342
+ run_name = run.get("name", "unknown")
343
+ run_id = run.get("id", _ulid_generate())
344
+
345
+ event_type = _LANGSMITH_RUN_TYPE_MAP.get(run_type, "llm.trace.span.completed")
346
+
347
+ payload: dict[str, Any] = {
348
+ "span_name": run_name,
349
+ "run_type": run_type,
350
+ "status": run.get("status", "ok"),
351
+ }
352
+
353
+ # Token usage
354
+ total_tok = run.get("total_tokens") or 0
355
+ prompt_tok = run.get("prompt_tokens") or 0
356
+ completion_tok = run.get("completion_tokens") or 0
357
+ if total_tok or prompt_tok or completion_tok:
358
+ payload["token_usage"] = {
359
+ "input_tokens": prompt_tok,
360
+ "output_tokens": completion_tok,
361
+ "total_tokens": total_tok or (prompt_tok + completion_tok),
362
+ }
363
+
364
+ # Timing
365
+ if run.get("start_time"):
366
+ payload["start_time"] = run["start_time"]
367
+ if run.get("end_time"):
368
+ payload["end_time"] = run["end_time"]
369
+
370
+ # Inputs / outputs (key names only — no raw content stored)
371
+ if run.get("inputs"):
372
+ payload["input_keys"] = (
373
+ list(run["inputs"].keys()) if isinstance(run["inputs"], dict) else ["input"]
374
+ )
375
+ if run.get("outputs"):
376
+ payload["output_keys"] = (
377
+ list(run["outputs"].keys()) if isinstance(run["outputs"], dict) else ["output"]
378
+ )
379
+
380
+ # Error info (truncated to 500 chars for safety)
381
+ if run.get("error"):
382
+ payload["error"] = str(run["error"])[:500]
383
+
384
+ trace_id = run.get("trace_id") or run.get("session_id") or ""
385
+ parent_id = run.get("parent_run_id") or ""
386
+
387
+ event: dict[str, Any] = {
388
+ "event_id": _ulid_generate(),
389
+ "event_type": event_type,
390
+ "source": source,
391
+ "schema_version": "2.0",
392
+ "timestamp": run.get("start_time") or _time.time(),
393
+ "payload": payload,
394
+ "tags": {
395
+ "langsmith_run_id": str(run_id),
396
+ "langsmith_trace_id": str(trace_id) if trace_id else "",
397
+ "langsmith_parent_id": str(parent_id) if parent_id else "",
398
+ },
399
+ }
400
+ events.append(event)
401
+
402
+ return events
@@ -0,0 +1,278 @@
1
+ """spanforge.model_registry — Model lifecycle tracking for AI compliance.
2
+
3
+ Provides a thread-safe in-memory registry of ML/AI models with lifecycle
4
+ transitions (active → deprecated → retired). Each mutation emits an
5
+ auditable event into the HMAC chain.
6
+
7
+ Emits ``model_registry.registered``, ``model_registry.deprecated``,
8
+ ``model_registry.retired`` events via :func:`emit_rfc_event`.
9
+
10
+ Usage::
11
+
12
+ from spanforge.model_registry import ModelRegistry, ModelRegistryEntry
13
+
14
+ registry = ModelRegistry()
15
+ entry = registry.register(
16
+ model_id="gpt-4o-2024-05",
17
+ name="GPT-4o",
18
+ version="2024-05",
19
+ risk_tier="high",
20
+ owner="platform-team",
21
+ purpose="customer support agent",
22
+ )
23
+ registry.deprecate("gpt-4o-2024-05", reason="Replaced by gpt-4o-2024-08")
24
+ registry.retire("gpt-4o-2024-05")
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import contextlib
30
+ import json
31
+ import threading
32
+ from dataclasses import asdict, dataclass, field
33
+ from pathlib import Path
34
+ from typing import Any, Literal
35
+
36
+ __all__ = [
37
+ "ModelRegistry",
38
+ "ModelRegistryEntry",
39
+ "deprecate_model",
40
+ "get_model",
41
+ "list_models",
42
+ "register_model",
43
+ "retire_model",
44
+ ]
45
+
46
+ _VALID_RISK_TIERS = frozenset({"low", "medium", "high", "critical"})
47
+ _VALID_STATUSES = frozenset({"active", "deprecated", "retired"})
48
+
49
+
50
+ @dataclass
51
+ class ModelRegistryEntry:
52
+ """A single model registered for compliance tracking."""
53
+
54
+ model_id: str
55
+ name: str
56
+ version: str
57
+ risk_tier: Literal["low", "medium", "high", "critical"]
58
+ owner: str
59
+ purpose: str
60
+ status: Literal["active", "deprecated", "retired"] = "active"
61
+ deployment_date: str | None = None
62
+ decommission_date: str | None = None
63
+ metadata: dict[str, Any] = field(default_factory=dict)
64
+
65
+ def __post_init__(self) -> None:
66
+ if not self.model_id:
67
+ raise ValueError("ModelRegistryEntry.model_id must be non-empty")
68
+ if not self.name:
69
+ raise ValueError("ModelRegistryEntry.name must be non-empty")
70
+ if not self.version:
71
+ raise ValueError("ModelRegistryEntry.version must be non-empty")
72
+ if self.risk_tier not in _VALID_RISK_TIERS:
73
+ raise ValueError(
74
+ f"ModelRegistryEntry.risk_tier must be one of {sorted(_VALID_RISK_TIERS)}"
75
+ )
76
+ if not self.owner:
77
+ raise ValueError("ModelRegistryEntry.owner must be non-empty")
78
+ if not self.purpose:
79
+ raise ValueError("ModelRegistryEntry.purpose must be non-empty")
80
+ if self.status not in _VALID_STATUSES:
81
+ raise ValueError(f"ModelRegistryEntry.status must be one of {sorted(_VALID_STATUSES)}")
82
+
83
+ def to_dict(self) -> dict[str, Any]:
84
+ """Serialise to a plain dict."""
85
+ return asdict(self)
86
+
87
+ @classmethod
88
+ def from_dict(cls, data: dict[str, Any]) -> ModelRegistryEntry:
89
+ """Deserialise from a plain dict."""
90
+ return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
91
+
92
+
93
+ class ModelRegistry:
94
+ """Thread-safe in-memory model registry with lifecycle transitions.
95
+
96
+ Each mutation emits an audit event into the HMAC chain.
97
+ Optionally, the registry can persist to/from a JSON file.
98
+ """
99
+
100
+ def __init__(self, *, auto_emit: bool = True) -> None:
101
+ self._lock = threading.Lock()
102
+ self._models: dict[str, ModelRegistryEntry] = {}
103
+ self._auto_emit = auto_emit
104
+
105
+ def register(
106
+ self,
107
+ model_id: str,
108
+ name: str,
109
+ version: str,
110
+ risk_tier: Literal["low", "medium", "high", "critical"],
111
+ owner: str,
112
+ purpose: str,
113
+ *,
114
+ deployment_date: str | None = None,
115
+ metadata: dict[str, Any] | None = None,
116
+ ) -> ModelRegistryEntry:
117
+ """Register a new model and emit ``model_registry.registered``."""
118
+ entry = ModelRegistryEntry(
119
+ model_id=model_id,
120
+ name=name,
121
+ version=version,
122
+ risk_tier=risk_tier,
123
+ owner=owner,
124
+ purpose=purpose,
125
+ status="active",
126
+ deployment_date=deployment_date or self._now(),
127
+ metadata=metadata or {},
128
+ )
129
+ with self._lock:
130
+ if model_id in self._models:
131
+ raise ValueError(
132
+ f"Model {model_id!r} already registered. "
133
+ "Use a unique model_id or retire the existing entry first."
134
+ )
135
+ self._models[model_id] = entry
136
+
137
+ if self._auto_emit:
138
+ self._emit("registered", entry)
139
+ return entry
140
+
141
+ def deprecate(self, model_id: str, *, reason: str = "") -> ModelRegistryEntry:
142
+ """Mark a model as deprecated and emit ``model_registry.deprecated``."""
143
+ with self._lock:
144
+ entry = self._models.get(model_id)
145
+ if entry is None:
146
+ raise KeyError(f"Model {model_id!r} not found in registry")
147
+ if entry.status == "retired":
148
+ raise ValueError(f"Model {model_id!r} is already retired")
149
+ entry.status = "deprecated"
150
+ if reason:
151
+ entry.metadata["deprecation_reason"] = reason
152
+
153
+ if self._auto_emit:
154
+ self._emit("deprecated", entry)
155
+ return entry
156
+
157
+ def retire(self, model_id: str) -> ModelRegistryEntry:
158
+ """Move a model to retired status and emit ``model_registry.retired``."""
159
+ with self._lock:
160
+ entry = self._models.get(model_id)
161
+ if entry is None:
162
+ raise KeyError(f"Model {model_id!r} not found in registry")
163
+ entry.status = "retired"
164
+ entry.decommission_date = self._now()
165
+
166
+ if self._auto_emit:
167
+ self._emit("retired", entry)
168
+ return entry
169
+
170
+ def get(self, model_id: str) -> ModelRegistryEntry | None:
171
+ """Look up a model entry by ID."""
172
+ with self._lock:
173
+ return self._models.get(model_id)
174
+
175
+ def list_all(self) -> list[ModelRegistryEntry]:
176
+ """Return all registered models."""
177
+ with self._lock:
178
+ return list(self._models.values())
179
+
180
+ def list_active(self) -> list[ModelRegistryEntry]:
181
+ """Return only models with ``status == 'active'``."""
182
+ with self._lock:
183
+ return [m for m in self._models.values() if m.status == "active"]
184
+
185
+ def clear(self) -> None:
186
+ """Remove all entries (for testing)."""
187
+ with self._lock:
188
+ self._models.clear()
189
+
190
+ # -----------------------------------------------------------------------
191
+ # Persistence
192
+ # -----------------------------------------------------------------------
193
+
194
+ def save(self, path: str | Path) -> None:
195
+ """Persist registry to a JSON file."""
196
+ path = Path(path)
197
+ path.parent.mkdir(parents=True, exist_ok=True)
198
+ with self._lock:
199
+ data = [e.to_dict() for e in self._models.values()]
200
+ path.write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")
201
+
202
+ def load(self, path: str | Path) -> None:
203
+ """Load registry from a JSON file, replacing current entries."""
204
+ path = Path(path)
205
+ raw = json.loads(path.read_text(encoding="utf-8"))
206
+ entries = [ModelRegistryEntry.from_dict(d) for d in raw]
207
+ with self._lock:
208
+ self._models = {e.model_id: e for e in entries}
209
+
210
+ # -----------------------------------------------------------------------
211
+ # Internal helpers
212
+ # -----------------------------------------------------------------------
213
+
214
+ @staticmethod
215
+ def _now() -> str:
216
+ import datetime
217
+
218
+ return datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
219
+
220
+ @staticmethod
221
+ def _emit(action: str, entry: ModelRegistryEntry) -> None:
222
+ """Emit a model registry event into the HMAC audit chain."""
223
+ try:
224
+ from spanforge._stream import emit_rfc_event
225
+ from spanforge.types import EventType
226
+
227
+ _action_to_event = {
228
+ "registered": EventType.MODEL_REGISTERED,
229
+ "deprecated": EventType.MODEL_DEPRECATED,
230
+ "retired": EventType.MODEL_RETIRED,
231
+ }
232
+ et = _action_to_event.get(action)
233
+ if et is None:
234
+ return
235
+ with contextlib.suppress(Exception):
236
+ emit_rfc_event(et, entry.to_dict())
237
+ except ImportError:
238
+ pass
239
+
240
+
241
+ # ---------------------------------------------------------------------------
242
+ # Module-level singleton & convenience functions
243
+ # ---------------------------------------------------------------------------
244
+
245
+ _registry = ModelRegistry()
246
+
247
+
248
+ def register_model(
249
+ model_id: str,
250
+ name: str,
251
+ version: str,
252
+ risk_tier: Literal["low", "medium", "high", "critical"],
253
+ owner: str,
254
+ purpose: str,
255
+ **kwargs: Any,
256
+ ) -> ModelRegistryEntry:
257
+ """Register a model via the module-level :class:`ModelRegistry`."""
258
+ return _registry.register(model_id, name, version, risk_tier, owner, purpose, **kwargs)
259
+
260
+
261
+ def deprecate_model(model_id: str, **kwargs: Any) -> ModelRegistryEntry:
262
+ """Deprecate a model via the module-level :class:`ModelRegistry`."""
263
+ return _registry.deprecate(model_id, **kwargs)
264
+
265
+
266
+ def retire_model(model_id: str) -> ModelRegistryEntry:
267
+ """Retire a model via the module-level :class:`ModelRegistry`."""
268
+ return _registry.retire(model_id)
269
+
270
+
271
+ def list_models() -> list[ModelRegistryEntry]:
272
+ """List all models via the module-level :class:`ModelRegistry`."""
273
+ return _registry.list_all()
274
+
275
+
276
+ def get_model(model_id: str) -> ModelRegistryEntry | None:
277
+ """Get a model via the module-level :class:`ModelRegistry`."""
278
+ return _registry.get(model_id)