spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,342 @@
1
+ """spanforge.metrics_export — Prometheus-compatible metrics export.
2
+
3
+ This module provides a zero-dependency Prometheus text-format metrics
4
+ endpoint for SpanForge. It exposes key observability indicators as gauges
5
+ and counters compatible with any Prometheus scraper.
6
+
7
+ Exported metrics
8
+ ----------------
9
+
10
+ ========================================= =====================================
11
+ Metric name Description
12
+ ========================================= =====================================
13
+ ``spanforge_spans_total`` Total spans emitted (counter).
14
+ ``spanforge_spans_error_total`` Total error spans (counter).
15
+ ``spanforge_export_errors_total`` Total export backend errors (counter).
16
+ ``spanforge_events_dropped_total`` Total events dropped (counter).
17
+ ``spanforge_token_usage_total`` Total tokens used (counter by type).
18
+ ``spanforge_span_duration_ms`` Span duration histogram buckets (gauge).
19
+ ``spanforge_drift_alerts_total`` Total drift alerts emitted (counter).
20
+ ========================================= =====================================
21
+
22
+ Usage
23
+ -----
24
+ Standalone HTTP server::
25
+
26
+ from spanforge.metrics_export import serve_metrics
27
+ serve_metrics(port=9090) # starts a background thread
28
+
29
+ Single scrape (e.g. push-gateway integration)::
30
+
31
+ from spanforge.metrics_export import PrometheusMetricsExporter, MetricsSummary
32
+
33
+ exporter = PrometheusMetricsExporter()
34
+ text = exporter.export(MetricsSummary(spans_total=1000, error_spans=12, ...))
35
+ print(text)
36
+ """
37
+
38
+ from __future__ import annotations
39
+
40
+ import http.server
41
+ import logging
42
+ import re
43
+ import threading
44
+ import time
45
+ from dataclasses import dataclass, field
46
+ from typing import Any
47
+
48
+ __all__ = [
49
+ "MetricsSummary",
50
+ "PrometheusMetricsExporter",
51
+ "serve_metrics",
52
+ ]
53
+
54
+ _log = logging.getLogger("spanforge.metrics_export")
55
+
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # MetricsSummary
59
+ # ---------------------------------------------------------------------------
60
+
61
+
62
+ @dataclass
63
+ class MetricsSummary:
64
+ """Snapshot of SpanForge observability counters.
65
+
66
+ Instances of this class are passed to
67
+ :meth:`PrometheusMetricsExporter.export` to generate the Prometheus text
68
+ payload. All fields default to zero so callers can omit unknown values.
69
+
70
+ Args:
71
+ spans_total: Cumulative number of spans started.
72
+ error_spans: Cumulative number of spans with status ``"error"``.
73
+ export_errors: Cumulative export backend errors.
74
+ events_dropped: Events silently dropped (queue full / circuit open).
75
+ prompt_tokens: Cumulative prompt token count.
76
+ completion_tokens: Cumulative completion token count.
77
+ total_tokens: Cumulative total token count.
78
+ total_cost_usd: Cumulative estimated cost in USD.
79
+ drift_alerts: Cumulative drift alert events emitted.
80
+ active_spans: Gauge — currently open spans.
81
+ duration_buckets: Histogram bucket counts ``{le_ms: count}``.
82
+ labels: Optional extra label key/value pairs applied to
83
+ every metric (e.g. ``{"service": "my-service"}``).
84
+ timestamp_ms: Unix timestamp (milliseconds) of the snapshot.
85
+ """
86
+
87
+ spans_total: int = 0
88
+ error_spans: int = 0
89
+ export_errors: int = 0
90
+ events_dropped: int = 0
91
+ prompt_tokens: int = 0
92
+ completion_tokens: int = 0
93
+ total_tokens: int = 0
94
+ total_cost_usd: float = 0.0
95
+ drift_alerts: int = 0
96
+ active_spans: int = 0
97
+ duration_buckets: dict[float, int] = field(default_factory=dict)
98
+ labels: dict[str, str] = field(default_factory=dict)
99
+ timestamp_ms: int = field(default_factory=lambda: int(time.time() * 1000))
100
+
101
+
102
+ # ---------------------------------------------------------------------------
103
+ # PrometheusMetricsExporter
104
+ # ---------------------------------------------------------------------------
105
+
106
+ _DEFAULT_DURATION_BUCKETS = (5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0, 1000.0, 5000.0)
107
+
108
+
109
+ class PrometheusMetricsExporter:
110
+ """Render a :class:`MetricsSummary` as Prometheus text format (0.0.4).
111
+
112
+ The output is compatible with Prometheus scraping and the OpenMetrics
113
+ exposition format.
114
+
115
+ Args:
116
+ namespace: Optional metric name prefix. Defaults to ``"spanforge"``.
117
+
118
+ Example::
119
+
120
+ exporter = PrometheusMetricsExporter()
121
+ summary = MetricsSummary(spans_total=500, error_spans=3)
122
+ print(exporter.export(summary))
123
+ """
124
+
125
+ def __init__(self, namespace: str = "spanforge") -> None:
126
+ self._ns = namespace.rstrip("_")
127
+
128
+ def export(self, summary: MetricsSummary) -> str:
129
+ """Return Prometheus text exposition for *summary*.
130
+
131
+ Args:
132
+ summary: Populated :class:`MetricsSummary` snapshot.
133
+
134
+ Returns:
135
+ Multi-line string in Prometheus text format 0.0.4.
136
+ """
137
+ ns = self._ns
138
+ lines: list[str] = []
139
+ ts = summary.timestamp_ms
140
+ base_labels = self._format_labels(summary.labels)
141
+
142
+ def counter(name: str, help_text: str, value: int | float) -> None:
143
+ full = f"{ns}_{name}"
144
+ lines.append(f"# HELP {full} {help_text}")
145
+ lines.append(f"# TYPE {full} counter")
146
+ lines.append(f"{full}{base_labels} {value} {ts}")
147
+
148
+ def gauge(name: str, help_text: str, value: int | float) -> None:
149
+ full = f"{ns}_{name}"
150
+ lines.append(f"# HELP {full} {help_text}")
151
+ lines.append(f"# TYPE {full} gauge")
152
+ lines.append(f"{full}{base_labels} {value} {ts}")
153
+
154
+ # Span counters
155
+ counter("spans_total", "Total number of spans emitted.", summary.spans_total)
156
+ counter("spans_error_total", "Total number of error spans.", summary.error_spans)
157
+ counter("export_errors_total", "Total export backend errors.", summary.export_errors)
158
+ counter("events_dropped_total", "Total events dropped.", summary.events_dropped)
159
+ counter("drift_alerts_total", "Total drift alerts emitted.", summary.drift_alerts)
160
+
161
+ # Token usage (with token_type label)
162
+ tok_name = f"{ns}_token_usage_total"
163
+ lines.append(f"# HELP {tok_name} Total token usage by token type.")
164
+ lines.append(f"# TYPE {tok_name} counter")
165
+ for ttype, count in [
166
+ ("prompt", summary.prompt_tokens),
167
+ ("completion", summary.completion_tokens),
168
+ ("total", summary.total_tokens),
169
+ ]:
170
+ label_str = self._format_labels({**summary.labels, "token_type": ttype})
171
+ lines.append(f"{tok_name}{label_str} {count} {ts}")
172
+
173
+ # Cost
174
+ counter("cost_usd_total", "Total estimated cost in USD.", summary.total_cost_usd)
175
+
176
+ # Active spans (gauge)
177
+ gauge("active_spans", "Currently open (in-flight) spans.", summary.active_spans)
178
+
179
+ # Duration histogram
180
+ if summary.duration_buckets:
181
+ hist_name = f"{ns}_span_duration_ms"
182
+ lines.append(f"# HELP {hist_name} Span duration distribution in milliseconds.")
183
+ lines.append(f"# TYPE {hist_name} histogram")
184
+ cumulative = 0
185
+ sorted_buckets = sorted(summary.duration_buckets.items())
186
+ for le, count in sorted_buckets:
187
+ cumulative += count
188
+ le_label = self._format_labels({**summary.labels, "le": str(le)})
189
+ lines.append(f"{hist_name}_bucket{le_label} {cumulative} {ts}")
190
+ # +Inf bucket
191
+ inf_label = self._format_labels({**summary.labels, "le": "+Inf"})
192
+ lines.append(f"{hist_name}_bucket{inf_label} {cumulative} {ts}")
193
+ else:
194
+ # Emit default zero buckets so scrapers don't see missing series.
195
+ hist_name = f"{ns}_span_duration_ms"
196
+ lines.append(f"# HELP {hist_name} Span duration distribution in milliseconds.")
197
+ lines.append(f"# TYPE {hist_name} histogram")
198
+ for le in _DEFAULT_DURATION_BUCKETS:
199
+ le_label = self._format_labels({**summary.labels, "le": str(le)})
200
+ lines.append(f"{hist_name}_bucket{le_label} 0 {ts}")
201
+ inf_label = self._format_labels({**summary.labels, "le": "+Inf"})
202
+ lines.append(f"{hist_name}_bucket{inf_label} 0 {ts}")
203
+
204
+ lines.append("") # trailing newline
205
+ return "\n".join(lines)
206
+
207
+ # ------------------------------------------------------------------
208
+
209
+ @staticmethod
210
+ def _format_labels(labels: dict[str, str]) -> str:
211
+ if not labels:
212
+ return ""
213
+ # Drop any label keys that don't conform to the Prometheus data model.
214
+ valid_labels = {
215
+ k: v for k, v in labels.items() if _PROM_LABEL_NAME_RE.match(k)
216
+ }
217
+ if not valid_labels:
218
+ return ""
219
+ pairs = ",".join(
220
+ f'{k}="{_escape_label_value(v)}"' for k, v in sorted(valid_labels.items())
221
+ )
222
+ return "{" + pairs + "}"
223
+
224
+
225
+ # M6: Prometheus label names must match [a-zA-Z_:][a-zA-Z0-9_:]* (Prometheus data model).
226
+ _PROM_LABEL_NAME_RE: re.Pattern[str] = re.compile(r"^[a-zA-Z_:][a-zA-Z0-9_:]*$")
227
+
228
+
229
+ def _escape_label_value(value: str) -> str:
230
+ return value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
231
+
232
+
233
+ # ---------------------------------------------------------------------------
234
+ # Live metrics collector — reads from _stream internals
235
+ # ---------------------------------------------------------------------------
236
+
237
+
238
+ def _collect_live_summary() -> MetricsSummary:
239
+ """Build a :class:`MetricsSummary` from live SpanForge stream counters."""
240
+ summary = MetricsSummary()
241
+ try:
242
+ from spanforge._stream import _export_error_count # noqa: PLC0415
243
+ summary.export_errors = _export_error_count
244
+ except Exception: # NOSONAR
245
+ pass
246
+ try:
247
+ from spanforge._span import _SPAN_STACK # noqa: PLC0415
248
+ # _SPAN_STACK is a ContextVar[list]; counting open spans is tricky
249
+ # without a global registry. Use 0 as a safe default.
250
+ _ = _SPAN_STACK
251
+ except Exception: # NOSONAR
252
+ pass
253
+ return summary
254
+
255
+
256
+ # ---------------------------------------------------------------------------
257
+ # HTTP handler + server
258
+ # ---------------------------------------------------------------------------
259
+
260
+
261
+ class _MetricsHTTPHandler(http.server.BaseHTTPRequestHandler):
262
+ """Minimal HTTP handler serving /metrics in Prometheus text format."""
263
+
264
+ _exporter: PrometheusMetricsExporter
265
+ _collector: Any # callable: () -> MetricsSummary
266
+
267
+ def do_GET(self) -> None: # noqa: N802
268
+ if self.path != "/metrics":
269
+ self.send_response(404)
270
+ self.end_headers()
271
+ self.wfile.write(b"Not Found\n")
272
+ return
273
+
274
+ try:
275
+ summary = self._collector()
276
+ body = self._exporter.export(summary).encode("utf-8")
277
+ except Exception as exc: # NOSONAR
278
+ _log.error("metrics handler error: %s", exc)
279
+ self.send_response(500)
280
+ self.end_headers()
281
+ self.wfile.write(b"Internal Server Error\n")
282
+ return
283
+
284
+ self.send_response(200)
285
+ self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
286
+ self.send_header("Content-Length", str(len(body)))
287
+ self.end_headers()
288
+ self.wfile.write(body)
289
+
290
+ def log_message(self, fmt: str, *args: Any) -> None: # pragma: no cover
291
+ # Suppress default access log to stderr.
292
+ pass
293
+
294
+
295
+ def serve_metrics(
296
+ port: int = 9090,
297
+ *,
298
+ host: str = "127.0.0.1",
299
+ collector: Any | None = None,
300
+ namespace: str = "spanforge",
301
+ ) -> http.server.HTTPServer:
302
+ """Start a background HTTP server exposing ``/metrics`` in Prometheus format.
303
+
304
+ The server runs in a daemon thread so it stops automatically when the main
305
+ process exits.
306
+
307
+ Args:
308
+ port: TCP port to bind. Defaults to ``9090``.
309
+ host: Interface to bind. Defaults to ``"127.0.0.1"`` (localhost
310
+ only). Set to ``"0.0.0.0"`` to expose on all interfaces
311
+ (ensure firewall rules are in place).
312
+ collector: Optional callable ``() -> MetricsSummary``. Defaults to
313
+ :func:`_collect_live_summary` which reads from SpanForge
314
+ internals.
315
+ namespace: Metric name prefix (default ``"spanforge"``).
316
+
317
+ Returns:
318
+ The running :class:`http.server.HTTPServer` instance.
319
+
320
+ Example::
321
+
322
+ serve_metrics(port=9090)
323
+ # Scrape at http://localhost:9090/metrics
324
+ """
325
+ exporter = PrometheusMetricsExporter(namespace=namespace)
326
+ _collector = collector if collector is not None else _collect_live_summary
327
+
328
+ class _Handler(_MetricsHTTPHandler):
329
+ pass
330
+
331
+ _Handler._exporter = exporter # type: ignore[attr-defined]
332
+ _Handler._collector = staticmethod(_collector) # type: ignore[attr-defined]
333
+
334
+ server = http.server.HTTPServer((host, port), _Handler)
335
+ thread = threading.Thread(
336
+ target=server.serve_forever,
337
+ name=f"spanforge-metrics-{port}",
338
+ daemon=True,
339
+ )
340
+ thread.start()
341
+ _log.info("spanforge metrics server listening on http://%s:%d/metrics", host, port)
342
+ return server
spanforge/migrate.py ADDED
@@ -0,0 +1,278 @@
1
+ """Schema migration utilities for spanforge events.
2
+
3
+ Provides forward-only migration functions to convert events from older schema
4
+ versions to the current version. Migrations are idempotent — migrating an
5
+ event that is already at the target version returns it unchanged.
6
+
7
+ Usage
8
+ -----
9
+ ::
10
+
11
+ from spanforge.migrate import v1_to_v2, migrate_file
12
+
13
+ # Single event
14
+ v2_event = v1_to_v2(v1_event)
15
+
16
+ # Bulk file migration
17
+ stats = migrate_file("audit.jsonl", output="audit_v2.jsonl")
18
+ print(f"Migrated {stats.migrated} events")
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import hashlib
24
+ import json
25
+ from dataclasses import dataclass, field
26
+ from pathlib import Path
27
+ from typing import Any
28
+
29
+ __all__ = [
30
+ "MigrationStats",
31
+ "migrate_file",
32
+ "v1_to_v2",
33
+ ]
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class MigrationStats:
38
+ """Result of a bulk migration operation.
39
+
40
+ Attributes:
41
+ total: Total events processed.
42
+ migrated: Events that were upgraded to a new schema version.
43
+ skipped: Events already at the target version (not modified).
44
+ errors: Events that could not be parsed or migrated.
45
+ warnings: Non-fatal warnings encountered during migration.
46
+ output_path: Path where the migrated events were written.
47
+ transformed_fields: Mapping of field names to the count of events
48
+ where that field was transformed.
49
+ """
50
+
51
+ total: int
52
+ migrated: int
53
+ skipped: int
54
+ errors: int
55
+ warnings: list[str] = field(default_factory=list)
56
+ output_path: str = ""
57
+ transformed_fields: dict[str, int] = field(default_factory=dict)
58
+
59
+
60
+ def _rehash_md5_to_sha256(checksum: str | None, payload: dict[str, Any]) -> str | None:
61
+ """If *checksum* starts with ``md5:``, recompute as ``sha256:``."""
62
+ if checksum and checksum.startswith("md5:"):
63
+ canonical = json.dumps(
64
+ payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False
65
+ ).encode("utf-8")
66
+ return f"sha256:{hashlib.sha256(canonical).hexdigest()}"
67
+ return checksum
68
+
69
+
70
+ def _coerce_tag_values(tags: Any) -> dict[str, str]:
71
+ """Ensure all tag values are strings."""
72
+ if not isinstance(tags, dict):
73
+ return {}
74
+ return {str(k): str(v) for k, v in tags.items()}
75
+
76
+
77
+ def v1_to_v2(event: Any) -> Any: # noqa: ANN401
78
+ """Migrate a single event from schema version 1.0 to 2.0.
79
+
80
+ Changes applied:
81
+ * ``schema_version`` is set to ``"2.0"``.
82
+ * Missing ``org_id`` is set to ``None`` (was not required in v1).
83
+ * Missing ``team_id`` is set to ``None``.
84
+ * Payload key ``model`` is normalised to ``model_id`` if present.
85
+ * ``tags`` is initialised to an empty dict if missing; all values
86
+ are coerced to strings.
87
+ * ``checksum`` is re-hashed from md5 to sha256 if applicable.
88
+
89
+ If the event is already at version ``"2.0"`` or later, it is returned
90
+ unchanged (idempotent).
91
+
92
+ Args:
93
+ event: Either an :class:`~spanforge.event.Event` instance or a plain
94
+ ``dict`` (as loaded from JSONL).
95
+
96
+ Returns:
97
+ The migrated event (same type as input).
98
+ """
99
+ from spanforge.event import Event # noqa: PLC0415
100
+
101
+ if isinstance(event, Event):
102
+ if event.schema_version == "2.0":
103
+ return event
104
+ payload = dict(event.payload)
105
+ # Normalise model → model_id
106
+ if "model" in payload and "model_id" not in payload:
107
+ payload["model_id"] = payload.pop("model")
108
+ # Re-hash md5 checksum
109
+ checksum = _rehash_md5_to_sha256(event.checksum, payload)
110
+ # Coerce tag values to strings
111
+ tags = _coerce_tag_values(event.tags) if event.tags else {}
112
+ return Event(
113
+ schema_version="2.0",
114
+ event_id=event.event_id,
115
+ event_type=event.event_type,
116
+ timestamp=event.timestamp,
117
+ source=event.source,
118
+ payload=payload,
119
+ trace_id=event.trace_id,
120
+ span_id=event.span_id,
121
+ parent_span_id=event.parent_span_id,
122
+ org_id=event.org_id,
123
+ team_id=event.team_id,
124
+ actor_id=event.actor_id,
125
+ session_id=event.session_id,
126
+ tags=tags,
127
+ checksum=checksum,
128
+ signature=event.signature,
129
+ prev_id=event.prev_id,
130
+ )
131
+
132
+ # Dict-based migration (e.g. raw JSONL parsing)
133
+ if isinstance(event, dict):
134
+ if event.get("schema_version") == "2.0":
135
+ return event
136
+ d = dict(event)
137
+ d["schema_version"] = "2.0"
138
+ d.setdefault("org_id", None)
139
+ d.setdefault("team_id", None)
140
+ # Coerce tag values
141
+ raw_tags = d.get("tags")
142
+ if isinstance(raw_tags, dict):
143
+ d["tags"] = {str(k): str(v) for k, v in raw_tags.items()}
144
+ else:
145
+ d["tags"] = {}
146
+ payload = d.get("payload", {})
147
+ if isinstance(payload, dict):
148
+ if "model" in payload and "model_id" not in payload:
149
+ payload["model_id"] = payload.pop("model")
150
+ # Re-hash md5 checksum
151
+ if d.get("checksum", "").startswith("md5:") and isinstance(payload, dict):
152
+ canonical = json.dumps(
153
+ payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False
154
+ ).encode("utf-8")
155
+ d["checksum"] = f"sha256:{hashlib.sha256(canonical).hexdigest()}"
156
+ return d
157
+
158
+ raise TypeError(f"Cannot migrate object of type {type(event).__name__}")
159
+
160
+
161
+ def migrate_file(
162
+ input_path: str | Path,
163
+ *,
164
+ output: str | Path | None = None,
165
+ org_secret: str | None = None,
166
+ target_version: str = "2.0",
167
+ dry_run: bool = False,
168
+ ) -> MigrationStats:
169
+ """Migrate all events in a JSONL file from v1 to v2.
170
+
171
+ Reads line-by-line, applies :func:`v1_to_v2` to each JSON object, and
172
+ writes the result to *output* (defaults to ``<input>_v2.jsonl``).
173
+
174
+ Args:
175
+ input_path: Path to the source JSONL file.
176
+ output: Output file path (default: ``<stem>_v2.jsonl``).
177
+ org_secret: When provided, re-signs the migrated chain using HMAC.
178
+ target_version: Target schema version (default ``"2.0"``).
179
+ dry_run: When ``True``, report stats without writing output.
180
+
181
+ Returns:
182
+ A :class:`MigrationStats` summarising the operation.
183
+ """
184
+ src = Path(input_path)
185
+ if output is None:
186
+ dst = src.with_name(f"{src.stem}_v2{src.suffix}")
187
+ else:
188
+ dst = Path(output)
189
+
190
+ total = 0
191
+ migrated = 0
192
+ skipped = 0
193
+ errors = 0
194
+ warnings: list[str] = []
195
+ transformed_fields: dict[str, int] = {}
196
+
197
+ migrated_dicts: list[str] = []
198
+
199
+ with src.open("r", encoding="utf-8") as fin:
200
+ for line_no, line in enumerate(fin, 1):
201
+ line = line.strip()
202
+ if not line:
203
+ continue
204
+ total += 1
205
+ try:
206
+ data = json.loads(line)
207
+ except json.JSONDecodeError:
208
+ errors += 1
209
+ migrated_dicts.append(line + "\n")
210
+ continue
211
+
212
+ # Source format validation
213
+ if not isinstance(data, dict):
214
+ errors += 1
215
+ warnings.append(f"line {line_no}: not a JSON object")
216
+ migrated_dicts.append(line + "\n")
217
+ continue
218
+
219
+ if data.get("schema_version") == target_version:
220
+ skipped += 1
221
+ migrated_dicts.append(line + "\n")
222
+ continue
223
+
224
+ try:
225
+ # Track which fields get transformed
226
+ payload = data.get("payload", {})
227
+ if isinstance(payload, dict) and "model" in payload and "model_id" not in payload:
228
+ transformed_fields["payload.model→model_id"] = transformed_fields.get("payload.model→model_id", 0) + 1
229
+ if data.get("checksum", "").startswith("md5:"):
230
+ transformed_fields["checksum.md5→sha256"] = transformed_fields.get("checksum.md5→sha256", 0) + 1
231
+ raw_tags = data.get("tags", {})
232
+ if isinstance(raw_tags, dict) and any(not isinstance(v, str) for v in raw_tags.values()):
233
+ transformed_fields["tags.value_coercion"] = transformed_fields.get("tags.value_coercion", 0) + 1
234
+
235
+ migrated_data = v1_to_v2(data)
236
+ migrated_dicts.append(
237
+ json.dumps(migrated_data, separators=(",", ":"), ensure_ascii=False) + "\n"
238
+ )
239
+ migrated += 1
240
+ except Exception: # NOSONAR
241
+ errors += 1
242
+ migrated_dicts.append(line + "\n")
243
+
244
+ # Re-sign if org_secret provided
245
+ if org_secret and not dry_run:
246
+ from spanforge.event import Event # noqa: PLC0415
247
+ from spanforge.signing import sign as _sign # noqa: PLC0415
248
+
249
+ signed_lines: list[str] = []
250
+ prev_event = None
251
+ for raw_line in migrated_dicts:
252
+ raw_line = raw_line.strip()
253
+ if not raw_line:
254
+ continue
255
+ try:
256
+ data = json.loads(raw_line)
257
+ evt = Event.from_dict(data)
258
+ signed_evt = _sign(evt, org_secret, prev_event=prev_event)
259
+ prev_event = signed_evt
260
+ signed_lines.append(signed_evt.to_json() + "\n")
261
+ except Exception: # noqa: BLE001
262
+ signed_lines.append(raw_line + "\n")
263
+ migrated_dicts = signed_lines
264
+
265
+ if not dry_run:
266
+ with dst.open("w", encoding="utf-8") as fout:
267
+ for out_line in migrated_dicts:
268
+ fout.write(out_line)
269
+
270
+ return MigrationStats(
271
+ total=total,
272
+ migrated=migrated,
273
+ skipped=skipped,
274
+ errors=errors,
275
+ warnings=warnings,
276
+ output_path=str(dst),
277
+ transformed_fields=transformed_fields,
278
+ )