spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,342 @@
1
+ """Append-only JSONL exporter with fsync, rotation, and WORM backend support.
2
+
3
+ Provides a tamper-evident append-only audit log suitable for compliance workloads
4
+ (SOC 2, HIPAA, GDPR) where events must never be overwritten or truncated.
5
+
6
+ Features
7
+ --------
8
+ * Opens files in append-only mode (``O_APPEND`` on POSIX, ``"a"`` elsewhere).
9
+ * ``fsync`` after every write to guarantee durability.
10
+ * Automatic file rotation when ``max_bytes`` is exceeded — a ``CHAIN_ROTATED``
11
+ audit event is inserted at the boundary.
12
+ * Optional :class:`WORMBackend` for pushing sealed files to immutable object
13
+ stores (S3 Object Lock, GCS Retention Policy, Azure Immutable Storage).
14
+
15
+ Thread-safety: a :class:`threading.Lock` serialises all writes.
16
+
17
+ Example::
18
+
19
+ exporter = AppendOnlyJSONLExporter(
20
+ path="audit.jsonl",
21
+ org_secret="corp-key-001",
22
+ source="audit@1.0.0",
23
+ max_bytes=50_000_000,
24
+ )
25
+ exporter.append(event)
26
+ exporter.close()
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import json
32
+ import os
33
+ import threading
34
+ from dataclasses import dataclass, field
35
+ from datetime import datetime, timezone
36
+ from pathlib import Path
37
+ from typing import IO, TYPE_CHECKING, Any, Protocol, runtime_checkable
38
+
39
+ if TYPE_CHECKING:
40
+ from spanforge.event import Event
41
+
42
+ __all__ = [
43
+ "AppendOnlyJSONLExporter",
44
+ "WORMBackend",
45
+ "WORMUploadResult",
46
+ ]
47
+
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # WORM Backend protocol
51
+ # ---------------------------------------------------------------------------
52
+
53
+
54
+ @dataclass(frozen=True)
55
+ class WORMUploadResult:
56
+ """Result of a WORM backend upload.
57
+
58
+ Attributes:
59
+ success: Whether the upload succeeded.
60
+ location: The remote URI or key for the uploaded object.
61
+ error: Error message if the upload failed, or ``None``.
62
+ metadata: Optional metadata returned by the backend.
63
+ """
64
+
65
+ success: bool
66
+ location: str = ""
67
+ error: str | None = None
68
+ metadata: dict[str, Any] = field(default_factory=dict)
69
+
70
+
71
+ @runtime_checkable
72
+ class WORMBackend(Protocol):
73
+ """Protocol for Write-Once-Read-Many storage backends.
74
+
75
+ Implementations must accept a local file path and push it to an immutable
76
+ object store. The file is sealed (closed) before upload.
77
+ """
78
+
79
+ def upload(self, local_path: str, remote_key: str) -> WORMUploadResult:
80
+ """Upload a sealed file to WORM storage.
81
+
82
+ Args:
83
+ local_path: Absolute path to the local file.
84
+ remote_key: Remote object key / blob name.
85
+
86
+ Returns:
87
+ A :class:`WORMUploadResult` indicating success or failure.
88
+ """
89
+ ... # pragma: no cover
90
+
91
+ def write(self, event: "Event") -> None:
92
+ """Write a single event to WORM storage atomically."""
93
+ ... # pragma: no cover
94
+
95
+ def list_files(self) -> list[str]:
96
+ """List all files/objects stored in the WORM backend."""
97
+ ... # pragma: no cover
98
+
99
+ def verify_chain(self) -> "ChainVerificationResult":
100
+ """Verify the HMAC chain across all stored files."""
101
+ ... # pragma: no cover
102
+
103
+
104
+ # ---------------------------------------------------------------------------
105
+ # AppendOnlyJSONLExporter
106
+ # ---------------------------------------------------------------------------
107
+
108
+
109
+ class AppendOnlyJSONLExporter:
110
+ """Append-only JSONL exporter with fsync, rotation, and WORM support.
111
+
112
+ Args:
113
+ path: Base file path for the audit log.
114
+ org_secret: HMAC signing key for chain-rotation events.
115
+ source: ``source`` field for auto-generated audit events.
116
+ max_bytes: Maximum file size before rotation (0 = no rotation).
117
+ worm_backend: Optional :class:`WORMBackend` for sealing rotated files.
118
+
119
+ Raises:
120
+ ValueError: If *max_bytes* is negative.
121
+
122
+ Example::
123
+
124
+ exporter = AppendOnlyJSONLExporter(
125
+ path="audit.jsonl",
126
+ org_secret="corp-key-001",
127
+ source="audit@1.0.0",
128
+ max_bytes=50_000_000,
129
+ )
130
+ for event in events:
131
+ exporter.append(event)
132
+ exporter.close()
133
+ """
134
+
135
+ __slots__ = (
136
+ "_base_path",
137
+ "_current_path",
138
+ "_fh",
139
+ "_lock",
140
+ "_max_bytes",
141
+ "_org_secret",
142
+ "_rotation_index",
143
+ "_source",
144
+ "_written_bytes",
145
+ "_worm_backend",
146
+ )
147
+
148
+ def __init__(
149
+ self,
150
+ path: str | Path,
151
+ org_secret: str,
152
+ source: str,
153
+ max_bytes: int = 0,
154
+ worm_backend: WORMBackend | None = None,
155
+ ) -> None:
156
+ if max_bytes < 0:
157
+ raise ValueError("max_bytes must be >= 0")
158
+ self._base_path = Path(path)
159
+ self._org_secret = org_secret
160
+ self._source = source
161
+ self._max_bytes = max_bytes
162
+ self._worm_backend = worm_backend
163
+
164
+ self._lock = threading.Lock()
165
+ self._rotation_index = 0
166
+ self._current_path = self._base_path
167
+ self._fh: IO[bytes] | None = None
168
+ self._written_bytes = 0
169
+
170
+ # ------------------------------------------------------------------
171
+ # Internal helpers
172
+ # ------------------------------------------------------------------
173
+
174
+ def _ensure_open(self) -> IO[bytes]:
175
+ """Open the file handle in binary append mode if not already open."""
176
+ if self._fh is None:
177
+ self._current_path.parent.mkdir(parents=True, exist_ok=True)
178
+ # SF-13-A: Guard against overwrite — only append mode is allowed
179
+ if self._current_path.exists():
180
+ import stat as _stat # noqa: PLC0415
181
+ mode = self._current_path.stat().st_mode
182
+ # File exists — verify we are appending, not overwriting
183
+ pass # open in 'ab' guarantees append semantics
184
+ self._fh = open( # noqa: SIM115
185
+ self._current_path, mode="ab"
186
+ )
187
+ # Set written_bytes to current file size for resumed files
188
+ self._written_bytes = self._current_path.stat().st_size
189
+ return self._fh
190
+
191
+ def write_exclusive(self, path: str | Path) -> None:
192
+ """Raise :class:`~spanforge.exceptions.AuditStorageError` if *path* already exists.
193
+
194
+ Use this to enforce that a new audit log file is created, not
195
+ overwritten. For append-to-existing, use :meth:`append` directly.
196
+ """
197
+ from spanforge.exceptions import AuditStorageError # noqa: PLC0415
198
+
199
+ p = Path(path)
200
+ if p.exists():
201
+ raise AuditStorageError(
202
+ f"Audit log file already exists and cannot be overwritten: {p}. "
203
+ "Use append mode or choose a new filename."
204
+ )
205
+
206
+ def _write_line(self, line_bytes: bytes) -> None:
207
+ """Write a line and fsync to disk."""
208
+ fh = self._ensure_open()
209
+ fh.write(line_bytes)
210
+ fh.write(b"\n")
211
+ fh.flush()
212
+ os.fsync(fh.fileno())
213
+ self._written_bytes += len(line_bytes) + 1
214
+
215
+ def _needs_rotation(self) -> bool:
216
+ """Return True if the file exceeds max_bytes."""
217
+ return self._max_bytes > 0 and self._written_bytes >= self._max_bytes
218
+
219
+ def _rotate(self) -> None:
220
+ """Seal the current file and open a new one.
221
+
222
+ Inserts an ``AUDIT_CHAIN_ROTATED`` event at the boundary.
223
+ """
224
+ from spanforge.event import Event # noqa: PLC0415
225
+ from spanforge.types import EventType # noqa: PLC0415
226
+ from spanforge.ulid import generate as gen_ulid # noqa: PLC0415
227
+
228
+ old_path = self._current_path
229
+
230
+ # Insert chain rotation event into old file
231
+ rotation_event = Event(
232
+ event_type=EventType.AUDIT_CHAIN_ROTATED.value,
233
+ source=self._source,
234
+ payload={
235
+ "reason": "file_rotation",
236
+ "old_file": str(old_path),
237
+ "rotation_index": self._rotation_index,
238
+ "rotated_at": datetime.now(timezone.utc).isoformat(),
239
+ },
240
+ event_id=gen_ulid(),
241
+ )
242
+ rotation_json = rotation_event.to_json().encode("utf-8")
243
+ self._write_line(rotation_json)
244
+
245
+ # Close old file
246
+ if self._fh is not None:
247
+ self._fh.flush()
248
+ os.fsync(self._fh.fileno())
249
+ self._fh.close()
250
+ self._fh = None
251
+
252
+ # Push to WORM backend if configured
253
+ if self._worm_backend is not None:
254
+ remote_key = f"{old_path.stem}_{self._rotation_index}{old_path.suffix}"
255
+ self._worm_backend.upload(str(old_path), remote_key)
256
+
257
+ # Open new file
258
+ self._rotation_index += 1
259
+ stem = self._base_path.stem
260
+ suffix = self._base_path.suffix
261
+ self._current_path = self._base_path.parent / f"{stem}.{self._rotation_index}{suffix}"
262
+ self._written_bytes = 0
263
+
264
+ # ------------------------------------------------------------------
265
+ # Public API
266
+ # ------------------------------------------------------------------
267
+
268
+ def append(self, event: Event) -> None:
269
+ """Append a signed event to the audit log.
270
+
271
+ Thread-safe. Triggers rotation when ``max_bytes`` is exceeded.
272
+
273
+ Args:
274
+ event: The event to append (should already be signed).
275
+ """
276
+ with self._lock:
277
+ line_bytes = event.to_json().encode("utf-8")
278
+ self._write_line(line_bytes)
279
+
280
+ if self._needs_rotation():
281
+ self._rotate()
282
+
283
+ def append_batch(self, events: list[Event]) -> int:
284
+ """Append multiple events. Returns the count written."""
285
+ with self._lock:
286
+ count = 0
287
+ for event in events:
288
+ line_bytes = event.to_json().encode("utf-8")
289
+ self._write_line(line_bytes)
290
+ count += 1
291
+
292
+ if self._needs_rotation():
293
+ self._rotate()
294
+ return count
295
+
296
+ def close(self) -> None:
297
+ """Flush and close the current file handle. Idempotent."""
298
+ with self._lock:
299
+ if self._fh is not None:
300
+ try:
301
+ self._fh.flush()
302
+ os.fsync(self._fh.fileno())
303
+ finally:
304
+ self._fh.close()
305
+ self._fh = None
306
+
307
+ def rotate(self, max_size_mb: int = 100) -> None:
308
+ """Force rotation if current file exceeds *max_size_mb* megabytes.
309
+
310
+ A ``CHAIN_ROTATED`` event is inserted at the boundary so the HMAC
311
+ chain is preserved across files.
312
+
313
+ Args:
314
+ max_size_mb: Trigger rotation when file exceeds this size.
315
+ Pass 0 to force immediate rotation.
316
+ """
317
+ with self._lock:
318
+ threshold = max_size_mb * 1_048_576
319
+ if threshold == 0 or self._written_bytes >= threshold:
320
+ self._rotate()
321
+
322
+ @property
323
+ def current_path(self) -> Path:
324
+ """The path of the file currently being written to."""
325
+ return self._current_path
326
+
327
+ @property
328
+ def rotation_count(self) -> int:
329
+ """Number of file rotations that have occurred."""
330
+ return self._rotation_index
331
+
332
+ def __enter__(self) -> AppendOnlyJSONLExporter:
333
+ return self
334
+
335
+ def __exit__(self, *_: object) -> None:
336
+ self.close()
337
+
338
+ def __repr__(self) -> str:
339
+ return (
340
+ f"AppendOnlyJSONLExporter(path={str(self._base_path)!r}, "
341
+ f"rotations={self._rotation_index})"
342
+ )
@@ -0,0 +1,349 @@
1
+ """spanforge.export.cloud — Cloud telemetry exporter.
2
+
3
+ Batches spanforge events and ships them to the spanforge Cloud API (or any
4
+ compatible self-hosted endpoint) over HTTPS using only stdlib ``urllib``.
5
+
6
+ Configuration
7
+ -------------
8
+ All settings are read from environment variables so no secrets end up in
9
+ source code:
10
+
11
+ ``SPANFORGE_CLOUD_API_KEY``
12
+ Required. Your spanforge Cloud API key.
13
+
14
+ ``SPANFORGE_CLOUD_ENDPOINT``
15
+ Optional. Override the ingestion URL. Defaults to
16
+ ``https://ingest.getspanforge.com/v1/events``.
17
+
18
+ ``SPANFORGE_CLOUD_BATCH_SIZE``
19
+ Optional integer. Events per HTTP request. Default ``100``.
20
+
21
+ ``SPANFORGE_CLOUD_TIMEOUT``
22
+ Optional seconds (float). HTTP request timeout. Default ``10``.
23
+
24
+ Example::
25
+
26
+ import os
27
+ os.environ["SPANFORGE_CLOUD_API_KEY"] = "sf_live_..."
28
+
29
+ from spanforge.export.cloud import CloudExporter
30
+ from spanforge import configure
31
+ configure(exporter="cloud")
32
+
33
+ # Events are now shipped automatically via the default TraceStore flush.
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ import asyncio
39
+ import ipaddress
40
+ import json
41
+ import logging
42
+ import os
43
+ import socket
44
+ import ssl
45
+ import threading
46
+ import time
47
+ import urllib.error
48
+ import urllib.parse
49
+ import urllib.request
50
+ from collections import deque
51
+ from typing import TYPE_CHECKING, Any
52
+
53
+ if TYPE_CHECKING:
54
+ from spanforge.event import Event
55
+
56
+ __all__ = ["CloudExporter", "CloudExporterError"]
57
+
58
+ _log = logging.getLogger("spanforge.export.cloud")
59
+
60
+
61
+ def _is_private_ip_literal(host: str) -> bool:
62
+ """Return True if *host* is a private/loopback/link-local IP literal."""
63
+ try:
64
+ addr = ipaddress.ip_address(host)
65
+ except ValueError:
66
+ return False
67
+ return addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_multicast
68
+
69
+
70
+ def _validate_http_url(
71
+ url: str,
72
+ param_name: str = "url",
73
+ *,
74
+ allow_private_addresses: bool = False,
75
+ ) -> None:
76
+ """Raise ValueError if *url* is not a valid http(s):// URL.
77
+
78
+ When *allow_private_addresses* is False (default), also rejects literal
79
+ private/loopback IP addresses and hostnames that resolve to them.
80
+ """
81
+ parsed = urllib.parse.urlparse(url)
82
+ if parsed.scheme not in {"http", "https"} or not parsed.netloc:
83
+ raise ValueError(
84
+ f"{param_name} must be a valid http:// or https:// URL; got {url!r}"
85
+ )
86
+ if not allow_private_addresses:
87
+ host = parsed.hostname or ""
88
+ if _is_private_ip_literal(host):
89
+ raise ValueError(
90
+ f"{param_name} resolves to a private/loopback/link-local IP address "
91
+ f"({host!r}). Set allow_private_addresses=True to permit this in "
92
+ "non-production environments."
93
+ )
94
+ if host and not _is_private_ip_literal(host):
95
+ try:
96
+ resolved = socket.gethostbyname(host)
97
+ addr = ipaddress.ip_address(resolved)
98
+ if addr.is_private or addr.is_loopback or addr.is_link_local:
99
+ raise ValueError(
100
+ f"{param_name} hostname {host!r} resolves to a private/loopback/"
101
+ f"link-local address ({resolved}). "
102
+ "Set allow_private_addresses=True to permit this."
103
+ )
104
+ except OSError:
105
+ pass # DNS failure — allow through
106
+
107
+ _DEFAULT_ENDPOINT = "https://ingest.getspanforge.com/v1/events"
108
+ _DEFAULT_BATCH_SIZE = 100
109
+ _DEFAULT_TIMEOUT = 10.0
110
+ _DEFAULT_FLUSH_INTERVAL = 5.0 # seconds
111
+ _MAX_QUEUE_SIZE = 10_000 # prevent unbounded growth when endpoint is unreachable
112
+
113
+
114
+ class CloudExporterError(RuntimeError):
115
+ """Raised when a cloud export request fails permanently."""
116
+
117
+
118
+ class CloudExporter:
119
+ """Async-compatible batch exporter that ships events to spanforge Cloud.
120
+
121
+ The exporter maintains an internal queue and flushes in batches either
122
+ on a timed interval or when the queue reaches *batch_size*. HTTP
123
+ bodies are newline-delimited JSON (one event per line) to minimise
124
+ memory overhead.
125
+
126
+ Args:
127
+ api_key:
128
+ spanforge Cloud API key. Falls back to the
129
+ ``SPANFORGE_CLOUD_API_KEY`` environment variable.
130
+ endpoint:
131
+ HTTP(S) ingestion URL. Falls back to
132
+ ``SPANFORGE_CLOUD_ENDPOINT`` env var, then the default.
133
+ batch_size:
134
+ Maximum events per HTTP POST.
135
+ flush_interval:
136
+ Seconds between automatic flushes.
137
+ timeout:
138
+ HTTP request timeout in seconds.
139
+ max_retries:
140
+ Number of retries on transient errors (5xx, timeout).
141
+
142
+ Raises:
143
+ CloudExporterError:
144
+ If ``api_key`` is not set via argument or environment variable.
145
+
146
+ Example::
147
+
148
+ async with CloudExporter(api_key="sf_live_...") as exporter:
149
+ await exporter.export(event)
150
+ """
151
+
152
+ def __init__(
153
+ self,
154
+ api_key: str | None = None,
155
+ endpoint: str | None = None,
156
+ batch_size: int | None = None,
157
+ flush_interval: float | None = None,
158
+ timeout: float | None = None,
159
+ max_retries: int = 3,
160
+ allow_private_addresses: bool = False,
161
+ ) -> None:
162
+ resolved_key = api_key or os.environ.get("SPANFORGE_CLOUD_API_KEY", "")
163
+ if not resolved_key:
164
+ raise CloudExporterError(
165
+ "No API key provided. Set SPANFORGE_CLOUD_API_KEY or pass api_key=."
166
+ )
167
+ self._api_key = resolved_key
168
+ self._endpoint = (
169
+ endpoint
170
+ or os.environ.get("SPANFORGE_CLOUD_ENDPOINT", "")
171
+ or _DEFAULT_ENDPOINT
172
+ )
173
+ _validate_http_url(self._endpoint, "endpoint", allow_private_addresses=allow_private_addresses)
174
+ self._batch_size = int(
175
+ batch_size
176
+ if batch_size is not None
177
+ else int(os.environ.get("SPANFORGE_CLOUD_BATCH_SIZE", _DEFAULT_BATCH_SIZE))
178
+ )
179
+ self._flush_interval = (
180
+ flush_interval
181
+ if flush_interval is not None
182
+ else _DEFAULT_FLUSH_INTERVAL
183
+ )
184
+ self._timeout = float(
185
+ timeout
186
+ if timeout is not None
187
+ else float(os.environ.get("SPANFORGE_CLOUD_TIMEOUT", _DEFAULT_TIMEOUT))
188
+ )
189
+ self._max_retries = max_retries
190
+
191
+ self._queue: deque[dict[str, Any]] = deque()
192
+ # threading.Lock guards the queue so both the async flush() coroutine
193
+ # and the background _flush_loop() thread can access it safely.
194
+ self._queue_lock = threading.Lock()
195
+ self._closed = False
196
+
197
+ # Background flush thread (started lazily on first export)
198
+ self._flush_thread: threading.Thread | None = None
199
+ self._stop_event = threading.Event()
200
+
201
+ # ------------------------------------------------------------------
202
+ # Public API
203
+ # ------------------------------------------------------------------
204
+
205
+ async def export(self, event: "Event") -> None:
206
+ """Queue a single event for batched delivery."""
207
+ if self._closed:
208
+ raise CloudExporterError("CloudExporter is closed.")
209
+ with self._queue_lock:
210
+ if len(self._queue) >= _MAX_QUEUE_SIZE:
211
+ raise CloudExporterError(
212
+ f"Export queue is full ({_MAX_QUEUE_SIZE} events). "
213
+ "Cloud endpoint may be unreachable."
214
+ )
215
+ self._queue.append(self._serialise(event))
216
+ should_flush = len(self._queue) >= self._batch_size
217
+ self._ensure_flush_thread()
218
+ if should_flush:
219
+ await self.flush()
220
+
221
+ async def export_batch(self, events: list["Event"]) -> None:
222
+ """Queue multiple events for batched delivery."""
223
+ for event in events:
224
+ await self.export(event)
225
+
226
+ async def flush(self) -> int:
227
+ """Drain the queue and send all pending events. Returns sent count."""
228
+ if self._closed:
229
+ return 0
230
+ with self._queue_lock:
231
+ batch = []
232
+ while self._queue and len(batch) < self._batch_size:
233
+ batch.append(self._queue.popleft())
234
+ if not batch:
235
+ return 0
236
+ loop = asyncio.get_running_loop()
237
+ sent = await loop.run_in_executor(None, self._send_batch, batch)
238
+ return sent
239
+
240
+ async def close(self) -> None:
241
+ """Flush remaining events and shut down the background thread."""
242
+ self._stop_event.set()
243
+ if self._flush_thread and self._flush_thread.is_alive():
244
+ self._flush_thread.join(timeout=self._timeout + 2)
245
+ # Final flush
246
+ await self.flush()
247
+ self._closed = True
248
+
249
+ # Async context manager support
250
+ async def __aenter__(self) -> "CloudExporter":
251
+ return self
252
+
253
+ async def __aexit__(self, *_: object) -> None:
254
+ await self.close()
255
+
256
+ # ------------------------------------------------------------------
257
+ # Internal helpers
258
+ # ------------------------------------------------------------------
259
+
260
+ def _ensure_flush_thread(self) -> None:
261
+ if self._flush_thread is None or not self._flush_thread.is_alive():
262
+ t = threading.Thread(
263
+ target=self._flush_loop,
264
+ name="spanforge-cloud-flush",
265
+ daemon=True,
266
+ )
267
+ t.start()
268
+ self._flush_thread = t
269
+
270
+ def _flush_loop(self) -> None:
271
+ """Background thread: flush on interval until stopped."""
272
+ while not self._stop_event.wait(timeout=self._flush_interval):
273
+ with self._queue_lock:
274
+ batch = []
275
+ while self._queue and len(batch) < self._batch_size:
276
+ batch.append(self._queue.popleft())
277
+ if batch:
278
+ try:
279
+ self._send_batch(batch)
280
+ except Exception as exc: # noqa: BLE001
281
+ _log.warning("Background flush failed: %s", exc)
282
+
283
+ def _send_batch(self, batch: list[dict[str, Any]]) -> int:
284
+ """HTTP POST a batch of serialised events. Returns number sent."""
285
+ body = "\n".join(json.dumps(ev, default=str) for ev in batch).encode("utf-8")
286
+ headers = {
287
+ "Content-Type": "application/x-ndjson; charset=utf-8",
288
+ "Authorization": f"Bearer {self._api_key}",
289
+ "X-Spanforge-SDK": "python",
290
+ "User-Agent": "spanforge-python/2.0",
291
+ }
292
+ req = urllib.request.Request(
293
+ self._endpoint,
294
+ data=body,
295
+ headers=headers,
296
+ method="POST",
297
+ )
298
+
299
+ last_exc: Exception | None = None
300
+ for attempt in range(1, self._max_retries + 1):
301
+ try:
302
+ ctx = ssl.create_default_context()
303
+ with urllib.request.urlopen(req, timeout=self._timeout, context=ctx) as resp: # noqa: S310
304
+ status = resp.status
305
+ if 200 <= status < 300:
306
+ _log.debug("Shipped %d events → %s (%s)", len(batch), self._endpoint, status)
307
+ return len(batch)
308
+ # Non-retryable client error
309
+ if 400 <= status < 500:
310
+ raise CloudExporterError(
311
+ f"Cloud API rejected batch: HTTP {status} (check API key and payload)"
312
+ )
313
+ # 5xx — retryable
314
+ _log.warning("Server error %s on attempt %d/%d", status, attempt, self._max_retries)
315
+
316
+ except urllib.error.HTTPError as exc:
317
+ if 400 <= exc.code < 500:
318
+ raise CloudExporterError(
319
+ f"Cloud API rejected batch: HTTP {exc.code}"
320
+ ) from exc
321
+ last_exc = exc
322
+ _log.warning("HTTP %s on attempt %d/%d", exc.code, attempt, self._max_retries)
323
+ except (urllib.error.URLError, OSError, TimeoutError) as exc:
324
+ last_exc = exc
325
+ _log.warning("Network error on attempt %d/%d: %s", attempt, self._max_retries, exc)
326
+
327
+ if attempt < self._max_retries:
328
+ time.sleep(min(2 ** attempt, 30)) # exponential back-off
329
+
330
+ # Re-enqueue failed batch at the front for next flush cycle
331
+ with self._queue_lock:
332
+ self._queue.extendleft(reversed(batch))
333
+ _log.error("Failed to ship batch after %d attempts: %s", self._max_retries, last_exc)
334
+ return 0
335
+
336
+ @staticmethod
337
+ def _serialise(event: "Event") -> dict[str, Any]:
338
+ """Convert an Event to a plain dict for JSON serialisation."""
339
+ if hasattr(event, "to_dict"):
340
+ return event.to_dict()
341
+ # Fallback for duck-typed event objects
342
+ return {
343
+ k: getattr(event, k, None)
344
+ for k in (
345
+ "event_id", "event_type", "payload", "schema_version",
346
+ "source", "span_id", "trace_id", "timestamp", "tags",
347
+ "signature", "checksum", "prev_id",
348
+ )
349
+ }