spanforge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. spanforge/__init__.py +815 -0
  2. spanforge/_ansi.py +93 -0
  3. spanforge/_batch_exporter.py +409 -0
  4. spanforge/_cli.py +2094 -0
  5. spanforge/_cli_audit.py +639 -0
  6. spanforge/_cli_compliance.py +711 -0
  7. spanforge/_cli_cost.py +243 -0
  8. spanforge/_cli_ops.py +791 -0
  9. spanforge/_cli_phase11.py +356 -0
  10. spanforge/_hooks.py +337 -0
  11. spanforge/_server.py +1708 -0
  12. spanforge/_span.py +1036 -0
  13. spanforge/_store.py +288 -0
  14. spanforge/_stream.py +664 -0
  15. spanforge/_trace.py +335 -0
  16. spanforge/_tracer.py +254 -0
  17. spanforge/actor.py +141 -0
  18. spanforge/alerts.py +469 -0
  19. spanforge/auto.py +464 -0
  20. spanforge/baseline.py +335 -0
  21. spanforge/cache.py +635 -0
  22. spanforge/compliance.py +325 -0
  23. spanforge/config.py +532 -0
  24. spanforge/consent.py +228 -0
  25. spanforge/consumer.py +377 -0
  26. spanforge/core/__init__.py +5 -0
  27. spanforge/core/compliance_mapping.py +1254 -0
  28. spanforge/cost.py +600 -0
  29. spanforge/debug.py +548 -0
  30. spanforge/deprecations.py +205 -0
  31. spanforge/drift.py +482 -0
  32. spanforge/egress.py +58 -0
  33. spanforge/eval.py +648 -0
  34. spanforge/event.py +1064 -0
  35. spanforge/exceptions.py +240 -0
  36. spanforge/explain.py +178 -0
  37. spanforge/export/__init__.py +69 -0
  38. spanforge/export/append_only.py +337 -0
  39. spanforge/export/cloud.py +357 -0
  40. spanforge/export/datadog.py +497 -0
  41. spanforge/export/grafana.py +320 -0
  42. spanforge/export/jsonl.py +195 -0
  43. spanforge/export/openinference.py +158 -0
  44. spanforge/export/otel_bridge.py +294 -0
  45. spanforge/export/otlp.py +811 -0
  46. spanforge/export/otlp_bridge.py +233 -0
  47. spanforge/export/redis_backend.py +282 -0
  48. spanforge/export/siem_schema.py +98 -0
  49. spanforge/export/siem_splunk.py +264 -0
  50. spanforge/export/siem_syslog.py +212 -0
  51. spanforge/export/webhook.py +299 -0
  52. spanforge/exporters/__init__.py +30 -0
  53. spanforge/exporters/console.py +271 -0
  54. spanforge/exporters/jsonl.py +144 -0
  55. spanforge/exporters/sqlite.py +142 -0
  56. spanforge/gate.py +1150 -0
  57. spanforge/governance.py +181 -0
  58. spanforge/hitl.py +295 -0
  59. spanforge/http.py +187 -0
  60. spanforge/inspect.py +427 -0
  61. spanforge/integrations/__init__.py +45 -0
  62. spanforge/integrations/_pricing.py +280 -0
  63. spanforge/integrations/anthropic.py +388 -0
  64. spanforge/integrations/azure_openai.py +133 -0
  65. spanforge/integrations/bedrock.py +292 -0
  66. spanforge/integrations/crewai.py +251 -0
  67. spanforge/integrations/gemini.py +351 -0
  68. spanforge/integrations/groq.py +442 -0
  69. spanforge/integrations/langchain.py +349 -0
  70. spanforge/integrations/langgraph.py +306 -0
  71. spanforge/integrations/llamaindex.py +373 -0
  72. spanforge/integrations/ollama.py +287 -0
  73. spanforge/integrations/openai.py +368 -0
  74. spanforge/integrations/together.py +483 -0
  75. spanforge/io.py +214 -0
  76. spanforge/lint.py +322 -0
  77. spanforge/metrics.py +417 -0
  78. spanforge/metrics_export.py +343 -0
  79. spanforge/migrate.py +402 -0
  80. spanforge/model_registry.py +278 -0
  81. spanforge/models.py +389 -0
  82. spanforge/namespaces/__init__.py +254 -0
  83. spanforge/namespaces/audit.py +256 -0
  84. spanforge/namespaces/cache.py +237 -0
  85. spanforge/namespaces/chain.py +77 -0
  86. spanforge/namespaces/confidence.py +72 -0
  87. spanforge/namespaces/consent.py +92 -0
  88. spanforge/namespaces/cost.py +179 -0
  89. spanforge/namespaces/decision.py +143 -0
  90. spanforge/namespaces/diff.py +157 -0
  91. spanforge/namespaces/drift.py +80 -0
  92. spanforge/namespaces/eval_.py +251 -0
  93. spanforge/namespaces/feedback.py +241 -0
  94. spanforge/namespaces/fence.py +193 -0
  95. spanforge/namespaces/guard.py +105 -0
  96. spanforge/namespaces/hitl.py +91 -0
  97. spanforge/namespaces/latency.py +72 -0
  98. spanforge/namespaces/prompt.py +190 -0
  99. spanforge/namespaces/redact.py +173 -0
  100. spanforge/namespaces/retrieval.py +379 -0
  101. spanforge/namespaces/runtime_governance.py +494 -0
  102. spanforge/namespaces/template.py +208 -0
  103. spanforge/namespaces/tool_call.py +77 -0
  104. spanforge/namespaces/trace.py +1029 -0
  105. spanforge/normalizer.py +171 -0
  106. spanforge/plugins.py +82 -0
  107. spanforge/presidio_backend.py +349 -0
  108. spanforge/processor.py +258 -0
  109. spanforge/prompt_registry.py +418 -0
  110. spanforge/py.typed +0 -0
  111. spanforge/redact.py +914 -0
  112. spanforge/regression.py +192 -0
  113. spanforge/runtime_policy.py +159 -0
  114. spanforge/sampling.py +511 -0
  115. spanforge/schema.py +183 -0
  116. spanforge/schemas/v1.0/schema.json +170 -0
  117. spanforge/schemas/v2.0/schema.json +536 -0
  118. spanforge/sdk/__init__.py +625 -0
  119. spanforge/sdk/_base.py +584 -0
  120. spanforge/sdk/_base.pyi +71 -0
  121. spanforge/sdk/_exceptions.py +1096 -0
  122. spanforge/sdk/_types.py +2184 -0
  123. spanforge/sdk/alert.py +1514 -0
  124. spanforge/sdk/alert.pyi +56 -0
  125. spanforge/sdk/audit.py +1196 -0
  126. spanforge/sdk/audit.pyi +67 -0
  127. spanforge/sdk/cec.py +1215 -0
  128. spanforge/sdk/cec.pyi +37 -0
  129. spanforge/sdk/config.py +641 -0
  130. spanforge/sdk/config.pyi +55 -0
  131. spanforge/sdk/enterprise.py +714 -0
  132. spanforge/sdk/enterprise.pyi +79 -0
  133. spanforge/sdk/explain.py +170 -0
  134. spanforge/sdk/fallback.py +432 -0
  135. spanforge/sdk/feedback.py +351 -0
  136. spanforge/sdk/gate.py +874 -0
  137. spanforge/sdk/gate.pyi +51 -0
  138. spanforge/sdk/identity.py +2114 -0
  139. spanforge/sdk/identity.pyi +47 -0
  140. spanforge/sdk/lineage.py +175 -0
  141. spanforge/sdk/observe.py +1065 -0
  142. spanforge/sdk/observe.pyi +50 -0
  143. spanforge/sdk/operator.py +338 -0
  144. spanforge/sdk/pii.py +1473 -0
  145. spanforge/sdk/pii.pyi +119 -0
  146. spanforge/sdk/pipelines.py +458 -0
  147. spanforge/sdk/pipelines.pyi +39 -0
  148. spanforge/sdk/policy.py +930 -0
  149. spanforge/sdk/rag.py +594 -0
  150. spanforge/sdk/rbac.py +280 -0
  151. spanforge/sdk/registry.py +430 -0
  152. spanforge/sdk/registry.pyi +46 -0
  153. spanforge/sdk/scope.py +279 -0
  154. spanforge/sdk/secrets.py +293 -0
  155. spanforge/sdk/secrets.pyi +25 -0
  156. spanforge/sdk/security.py +560 -0
  157. spanforge/sdk/security.pyi +57 -0
  158. spanforge/sdk/trust.py +472 -0
  159. spanforge/sdk/trust.pyi +41 -0
  160. spanforge/secrets.py +799 -0
  161. spanforge/signing.py +1179 -0
  162. spanforge/stats.py +100 -0
  163. spanforge/stream.py +560 -0
  164. spanforge/testing.py +378 -0
  165. spanforge/testing_mocks.py +1052 -0
  166. spanforge/trace.py +199 -0
  167. spanforge/types.py +696 -0
  168. spanforge/ulid.py +300 -0
  169. spanforge/validate.py +379 -0
  170. spanforge-1.0.0.dist-info/METADATA +1509 -0
  171. spanforge-1.0.0.dist-info/RECORD +174 -0
  172. spanforge-1.0.0.dist-info/WHEEL +4 -0
  173. spanforge-1.0.0.dist-info/entry_points.txt +5 -0
  174. spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
@@ -0,0 +1,337 @@
1
+ """Append-only JSONL exporter with fsync, rotation, and WORM backend support.
2
+
3
+ Provides a tamper-evident append-only audit log suitable for compliance workloads
4
+ (SOC 2, HIPAA, GDPR) where events must never be overwritten or truncated.
5
+
6
+ Features
7
+ --------
8
+ * Opens files in append-only mode (``O_APPEND`` on POSIX, ``"a"`` elsewhere).
9
+ * ``fsync`` after every write to guarantee durability.
10
+ * Automatic file rotation when ``max_bytes`` is exceeded — a ``CHAIN_ROTATED``
11
+ audit event is inserted at the boundary.
12
+ * Optional :class:`WORMBackend` for pushing sealed files to immutable object
13
+ stores (S3 Object Lock, GCS Retention Policy, Azure Immutable Storage).
14
+
15
+ Thread-safety: a :class:`threading.Lock` serialises all writes.
16
+
17
+ Example::
18
+
19
+ exporter = AppendOnlyJSONLExporter(
20
+ path="audit.jsonl",
21
+ org_secret="corp-key-001",
22
+ source="audit@1.0.0",
23
+ max_bytes=50_000_000,
24
+ )
25
+ exporter.append(event)
26
+ exporter.close()
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import os
32
+ import threading
33
+ from dataclasses import dataclass, field
34
+ from datetime import datetime, timezone
35
+ from pathlib import Path
36
+ from typing import IO, TYPE_CHECKING, Any, Protocol, runtime_checkable
37
+
38
+ if TYPE_CHECKING:
39
+ from spanforge.event import Event
40
+ from spanforge.signing import ChainVerificationResult
41
+
42
+ __all__ = [
43
+ "AppendOnlyJSONLExporter",
44
+ "WORMBackend",
45
+ "WORMUploadResult",
46
+ ]
47
+
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # WORM Backend protocol
51
+ # ---------------------------------------------------------------------------
52
+
53
+
54
+ @dataclass(frozen=True)
55
+ class WORMUploadResult:
56
+ """Result of a WORM backend upload.
57
+
58
+ Attributes:
59
+ success: Whether the upload succeeded.
60
+ location: The remote URI or key for the uploaded object.
61
+ error: Error message if the upload failed, or ``None``.
62
+ metadata: Optional metadata returned by the backend.
63
+ """
64
+
65
+ success: bool
66
+ location: str = ""
67
+ error: str | None = None
68
+ metadata: dict[str, Any] = field(default_factory=dict)
69
+
70
+
71
+ @runtime_checkable
72
+ class WORMBackend(Protocol):
73
+ """Protocol for Write-Once-Read-Many storage backends.
74
+
75
+ Implementations must accept a local file path and push it to an immutable
76
+ object store. The file is sealed (closed) before upload.
77
+ """
78
+
79
+ def upload(self, local_path: str, remote_key: str) -> WORMUploadResult:
80
+ """Upload a sealed file to WORM storage.
81
+
82
+ Args:
83
+ local_path: Absolute path to the local file.
84
+ remote_key: Remote object key / blob name.
85
+
86
+ Returns:
87
+ A :class:`WORMUploadResult` indicating success or failure.
88
+ """
89
+ ... # pragma: no cover
90
+
91
+ def write(self, event: Event) -> None:
92
+ """Write a single event to WORM storage atomically."""
93
+ ... # pragma: no cover
94
+
95
+ def list_files(self) -> list[str]:
96
+ """List all files/objects stored in the WORM backend."""
97
+ ... # pragma: no cover
98
+
99
+ def verify_chain(self) -> ChainVerificationResult:
100
+ """Verify the HMAC chain across all stored files."""
101
+ ... # pragma: no cover
102
+
103
+
104
+ # ---------------------------------------------------------------------------
105
+ # AppendOnlyJSONLExporter
106
+ # ---------------------------------------------------------------------------
107
+
108
+
109
+ class AppendOnlyJSONLExporter:
110
+ """Append-only JSONL exporter with fsync, rotation, and WORM support.
111
+
112
+ Args:
113
+ path: Base file path for the audit log.
114
+ org_secret: HMAC signing key for chain-rotation events.
115
+ source: ``source`` field for auto-generated audit events.
116
+ max_bytes: Maximum file size before rotation (0 = no rotation).
117
+ worm_backend: Optional :class:`WORMBackend` for sealing rotated files.
118
+
119
+ Raises:
120
+ ValueError: If *max_bytes* is negative.
121
+
122
+ Example::
123
+
124
+ exporter = AppendOnlyJSONLExporter(
125
+ path="audit.jsonl",
126
+ org_secret="corp-key-001",
127
+ source="audit@1.0.0",
128
+ max_bytes=50_000_000,
129
+ )
130
+ for event in events:
131
+ exporter.append(event)
132
+ exporter.close()
133
+ """
134
+
135
+ __slots__ = (
136
+ "_base_path",
137
+ "_current_path",
138
+ "_fh",
139
+ "_lock",
140
+ "_max_bytes",
141
+ "_org_secret",
142
+ "_rotation_index",
143
+ "_source",
144
+ "_worm_backend",
145
+ "_written_bytes",
146
+ )
147
+
148
+ def __init__(
149
+ self,
150
+ path: str | Path,
151
+ org_secret: str,
152
+ source: str,
153
+ max_bytes: int = 0,
154
+ worm_backend: WORMBackend | None = None,
155
+ ) -> None:
156
+ if max_bytes < 0:
157
+ raise ValueError("max_bytes must be >= 0")
158
+ self._base_path = Path(path)
159
+ self._org_secret = org_secret
160
+ self._source = source
161
+ self._max_bytes = max_bytes
162
+ self._worm_backend = worm_backend
163
+
164
+ self._lock = threading.Lock()
165
+ self._rotation_index = 0
166
+ self._current_path = self._base_path
167
+ self._fh: IO[bytes] | None = None
168
+ self._written_bytes = 0
169
+
170
+ # ------------------------------------------------------------------
171
+ # Internal helpers
172
+ # ------------------------------------------------------------------
173
+
174
+ def _ensure_open(self) -> IO[bytes]:
175
+ """Open the file handle in binary append mode if not already open."""
176
+ if self._fh is None:
177
+ self._current_path.parent.mkdir(parents=True, exist_ok=True)
178
+ # SF-13-A: Guard against overwrite — only append mode is allowed
179
+ if self._current_path.exists():
180
+ pass # open in 'ab' guarantees append semantics
181
+ self._fh = self._current_path.open(mode="ab")
182
+ # Set written_bytes to current file size for resumed files
183
+ self._written_bytes = self._current_path.stat().st_size
184
+ return self._fh
185
+
186
+ def write_exclusive(self, path: str | Path) -> None:
187
+ """Raise :class:`~spanforge.exceptions.AuditStorageError` if *path* already exists.
188
+
189
+ Use this to enforce that a new audit log file is created, not
190
+ overwritten. For append-to-existing, use :meth:`append` directly.
191
+ """
192
+ from spanforge.exceptions import AuditStorageError
193
+
194
+ p = Path(path)
195
+ if p.exists():
196
+ raise AuditStorageError(
197
+ f"Audit log file already exists and cannot be overwritten: {p}. "
198
+ "Use append mode or choose a new filename."
199
+ )
200
+
201
+ def _write_line(self, line_bytes: bytes) -> None:
202
+ """Write a line and fsync to disk."""
203
+ fh = self._ensure_open()
204
+ fh.write(line_bytes)
205
+ fh.write(b"\n")
206
+ fh.flush()
207
+ os.fsync(fh.fileno())
208
+ self._written_bytes += len(line_bytes) + 1
209
+
210
+ def _needs_rotation(self) -> bool:
211
+ """Return True if the file exceeds max_bytes."""
212
+ return self._max_bytes > 0 and self._written_bytes >= self._max_bytes
213
+
214
+ def _rotate(self) -> None:
215
+ """Seal the current file and open a new one.
216
+
217
+ Inserts an ``AUDIT_CHAIN_ROTATED`` event at the boundary.
218
+ """
219
+ from spanforge.event import Event
220
+ from spanforge.types import EventType
221
+ from spanforge.ulid import generate as gen_ulid
222
+
223
+ old_path = self._current_path
224
+
225
+ # Insert chain rotation event into old file
226
+ rotation_event = Event(
227
+ event_type=EventType.AUDIT_CHAIN_ROTATED.value,
228
+ source=self._source,
229
+ payload={
230
+ "reason": "file_rotation",
231
+ "old_file": str(old_path),
232
+ "rotation_index": self._rotation_index,
233
+ "rotated_at": datetime.now(timezone.utc).isoformat(),
234
+ },
235
+ event_id=gen_ulid(),
236
+ )
237
+ rotation_json = rotation_event.to_json().encode("utf-8")
238
+ self._write_line(rotation_json)
239
+
240
+ # Close old file
241
+ if self._fh is not None:
242
+ self._fh.flush()
243
+ os.fsync(self._fh.fileno())
244
+ self._fh.close()
245
+ self._fh = None
246
+
247
+ # Push to WORM backend if configured
248
+ if self._worm_backend is not None:
249
+ remote_key = f"{old_path.stem}_{self._rotation_index}{old_path.suffix}"
250
+ self._worm_backend.upload(str(old_path), remote_key)
251
+
252
+ # Open new file
253
+ self._rotation_index += 1
254
+ stem = self._base_path.stem
255
+ suffix = self._base_path.suffix
256
+ self._current_path = self._base_path.parent / f"{stem}.{self._rotation_index}{suffix}"
257
+ self._written_bytes = 0
258
+
259
+ # ------------------------------------------------------------------
260
+ # Public API
261
+ # ------------------------------------------------------------------
262
+
263
+ def append(self, event: Event) -> None:
264
+ """Append a signed event to the audit log.
265
+
266
+ Thread-safe. Triggers rotation when ``max_bytes`` is exceeded.
267
+
268
+ Args:
269
+ event: The event to append (should already be signed).
270
+ """
271
+ with self._lock:
272
+ line_bytes = event.to_json().encode("utf-8")
273
+ self._write_line(line_bytes)
274
+
275
+ if self._needs_rotation():
276
+ self._rotate()
277
+
278
+ def append_batch(self, events: list[Event]) -> int:
279
+ """Append multiple events. Returns the count written."""
280
+ with self._lock:
281
+ count = 0
282
+ for event in events:
283
+ line_bytes = event.to_json().encode("utf-8")
284
+ self._write_line(line_bytes)
285
+ count += 1
286
+
287
+ if self._needs_rotation():
288
+ self._rotate()
289
+ return count
290
+
291
+ def close(self) -> None:
292
+ """Flush and close the current file handle. Idempotent."""
293
+ with self._lock:
294
+ if self._fh is not None:
295
+ try:
296
+ self._fh.flush()
297
+ os.fsync(self._fh.fileno())
298
+ finally:
299
+ self._fh.close()
300
+ self._fh = None
301
+
302
+ def rotate(self, max_size_mb: int = 100) -> None:
303
+ """Force rotation if current file exceeds *max_size_mb* megabytes.
304
+
305
+ A ``CHAIN_ROTATED`` event is inserted at the boundary so the HMAC
306
+ chain is preserved across files.
307
+
308
+ Args:
309
+ max_size_mb: Trigger rotation when file exceeds this size.
310
+ Pass 0 to force immediate rotation.
311
+ """
312
+ with self._lock:
313
+ threshold = max_size_mb * 1_048_576
314
+ if threshold == 0 or self._written_bytes >= threshold:
315
+ self._rotate()
316
+
317
+ @property
318
+ def current_path(self) -> Path:
319
+ """The path of the file currently being written to."""
320
+ return self._current_path
321
+
322
+ @property
323
+ def rotation_count(self) -> int:
324
+ """Number of file rotations that have occurred."""
325
+ return self._rotation_index
326
+
327
+ def __enter__(self) -> AppendOnlyJSONLExporter:
328
+ return self
329
+
330
+ def __exit__(self, *_: object) -> None:
331
+ self.close()
332
+
333
+ def __repr__(self) -> str:
334
+ return (
335
+ f"AppendOnlyJSONLExporter(path={str(self._base_path)!r}, "
336
+ f"rotations={self._rotation_index})"
337
+ )
@@ -0,0 +1,357 @@
1
+ """spanforge.export.cloud — Cloud telemetry exporter.
2
+
3
+ Batches spanforge events and ships them to the spanforge Cloud API (or any
4
+ compatible self-hosted endpoint) over HTTPS using only stdlib ``urllib``.
5
+
6
+ Configuration
7
+ -------------
8
+ All settings are read from environment variables so no secrets end up in
9
+ source code:
10
+
11
+ ``SPANFORGE_CLOUD_API_KEY``
12
+ Required. Your spanforge Cloud API key.
13
+
14
+ ``SPANFORGE_CLOUD_ENDPOINT``
15
+ Optional. Override the ingestion URL. Defaults to
16
+ ``https://ingest.getspanforge.com/v1/events``.
17
+
18
+ ``SPANFORGE_CLOUD_BATCH_SIZE``
19
+ Optional integer. Events per HTTP request. Default ``100``.
20
+
21
+ ``SPANFORGE_CLOUD_TIMEOUT``
22
+ Optional seconds (float). HTTP request timeout. Default ``10``.
23
+
24
+ Example::
25
+
26
+ import os
27
+ os.environ["SPANFORGE_CLOUD_API_KEY"] = "sf_live_..."
28
+
29
+ from spanforge.export.cloud import CloudExporter
30
+ from spanforge import configure
31
+ configure(exporter="cloud")
32
+
33
+ # Events are now shipped automatically via the default TraceStore flush.
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ import asyncio
39
+ import ipaddress
40
+ import json
41
+ import logging
42
+ import os
43
+ import socket
44
+ import ssl
45
+ import threading
46
+ import time
47
+ import urllib.error
48
+ import urllib.parse
49
+ import urllib.request
50
+ from collections import deque
51
+ from typing import TYPE_CHECKING, Any
52
+
53
+ if TYPE_CHECKING:
54
+ from spanforge.event import Event
55
+
56
+ __all__ = ["CloudExporter", "CloudExporterError"]
57
+
58
+ _log = logging.getLogger("spanforge.export.cloud")
59
+
60
+
61
+ def _is_private_ip_literal(host: str) -> bool:
62
+ """Return True if *host* is a private/loopback/link-local IP literal."""
63
+ try:
64
+ addr = ipaddress.ip_address(host)
65
+ except ValueError:
66
+ return False
67
+ return addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_multicast
68
+
69
+
70
+ def _validate_http_url(
71
+ url: str,
72
+ param_name: str = "url",
73
+ *,
74
+ allow_private_addresses: bool = False,
75
+ ) -> None:
76
+ """Raise ValueError if *url* is not a valid http(s):// URL.
77
+
78
+ When *allow_private_addresses* is False (default), also rejects literal
79
+ private/loopback IP addresses and hostnames that resolve to them.
80
+ """
81
+ parsed = urllib.parse.urlparse(url)
82
+ if parsed.scheme not in {"http", "https"} or not parsed.netloc:
83
+ raise ValueError(f"{param_name} must be a valid http:// or https:// URL; got {url!r}")
84
+ if not allow_private_addresses:
85
+ host = parsed.hostname or ""
86
+ if _is_private_ip_literal(host):
87
+ raise ValueError(
88
+ f"{param_name} resolves to a private/loopback/link-local IP address "
89
+ f"({host!r}). Set allow_private_addresses=True to permit this in "
90
+ "non-production environments."
91
+ )
92
+ if host and not _is_private_ip_literal(host):
93
+ try:
94
+ resolved = socket.gethostbyname(host)
95
+ addr = ipaddress.ip_address(resolved)
96
+ if addr.is_private or addr.is_loopback or addr.is_link_local:
97
+ raise ValueError(
98
+ f"{param_name} hostname {host!r} resolves to a private/loopback/"
99
+ f"link-local address ({resolved}). "
100
+ "Set allow_private_addresses=True to permit this."
101
+ )
102
+ except OSError:
103
+ pass # DNS failure — allow through
104
+
105
+
106
+ _DEFAULT_ENDPOINT = "https://ingest.getspanforge.com/v1/events"
107
+ _DEFAULT_BATCH_SIZE = 100
108
+ _DEFAULT_TIMEOUT = 10.0
109
+ _DEFAULT_FLUSH_INTERVAL = 5.0 # seconds
110
+ _MAX_QUEUE_SIZE = 10_000 # prevent unbounded growth when endpoint is unreachable
111
+
112
+
113
+ class CloudExporterError(RuntimeError):
114
+ """Raised when a cloud export request fails permanently."""
115
+
116
+
117
+ class CloudExporter:
118
+ """Async-compatible batch exporter that ships events to spanforge Cloud.
119
+
120
+ The exporter maintains an internal queue and flushes in batches either
121
+ on a timed interval or when the queue reaches *batch_size*. HTTP
122
+ bodies are newline-delimited JSON (one event per line) to minimise
123
+ memory overhead.
124
+
125
+ Args:
126
+ api_key:
127
+ spanforge Cloud API key. Falls back to the
128
+ ``SPANFORGE_CLOUD_API_KEY`` environment variable.
129
+ endpoint:
130
+ HTTP(S) ingestion URL. Falls back to
131
+ ``SPANFORGE_CLOUD_ENDPOINT`` env var, then the default.
132
+ batch_size:
133
+ Maximum events per HTTP POST.
134
+ flush_interval:
135
+ Seconds between automatic flushes.
136
+ timeout:
137
+ HTTP request timeout in seconds.
138
+ max_retries:
139
+ Number of retries on transient errors (5xx, timeout).
140
+
141
+ Raises:
142
+ CloudExporterError:
143
+ If ``api_key`` is not set via argument or environment variable.
144
+
145
+ Example::
146
+
147
+ async with CloudExporter(api_key="sf_live_...") as exporter:
148
+ await exporter.export(event)
149
+ """
150
+
151
+ def __init__(
152
+ self,
153
+ api_key: str | None = None,
154
+ endpoint: str | None = None,
155
+ batch_size: int | None = None,
156
+ flush_interval: float | None = None,
157
+ timeout: float | None = None,
158
+ max_retries: int = 3,
159
+ allow_private_addresses: bool = False,
160
+ ) -> None:
161
+ resolved_key = api_key or os.environ.get("SPANFORGE_CLOUD_API_KEY", "")
162
+ if not resolved_key:
163
+ raise CloudExporterError(
164
+ "No API key provided. Set SPANFORGE_CLOUD_API_KEY or pass api_key=."
165
+ )
166
+ self._api_key = resolved_key
167
+ self._endpoint = (
168
+ endpoint or os.environ.get("SPANFORGE_CLOUD_ENDPOINT", "") or _DEFAULT_ENDPOINT
169
+ )
170
+ _validate_http_url(
171
+ self._endpoint, "endpoint", allow_private_addresses=allow_private_addresses
172
+ )
173
+ self._batch_size = int(
174
+ batch_size
175
+ if batch_size is not None
176
+ else int(os.environ.get("SPANFORGE_CLOUD_BATCH_SIZE", _DEFAULT_BATCH_SIZE))
177
+ )
178
+ self._flush_interval = (
179
+ flush_interval if flush_interval is not None else _DEFAULT_FLUSH_INTERVAL
180
+ )
181
+ self._timeout = float(
182
+ timeout
183
+ if timeout is not None
184
+ else float(os.environ.get("SPANFORGE_CLOUD_TIMEOUT", _DEFAULT_TIMEOUT))
185
+ )
186
+ self._max_retries = max_retries
187
+
188
+ self._queue: deque[dict[str, Any]] = deque()
189
+ # threading.Lock guards the queue so both the async flush() coroutine
190
+ # and the background _flush_loop() thread can access it safely.
191
+ self._queue_lock = threading.Lock()
192
+ self._closed = False
193
+
194
+ # Background flush thread (started lazily on first export)
195
+ self._flush_thread: threading.Thread | None = None
196
+ self._stop_event = threading.Event()
197
+
198
+ # ------------------------------------------------------------------
199
+ # Public API
200
+ # ------------------------------------------------------------------
201
+
202
+ async def export(self, event: Event) -> None:
203
+ """Queue a single event for batched delivery."""
204
+ if self._closed:
205
+ raise CloudExporterError("CloudExporter is closed.")
206
+ with self._queue_lock:
207
+ if len(self._queue) >= _MAX_QUEUE_SIZE:
208
+ raise CloudExporterError(
209
+ f"Export queue is full ({_MAX_QUEUE_SIZE} events). "
210
+ "Cloud endpoint may be unreachable."
211
+ )
212
+ self._queue.append(self._serialise(event))
213
+ should_flush = len(self._queue) >= self._batch_size
214
+ self._ensure_flush_thread()
215
+ if should_flush:
216
+ await self.flush()
217
+
218
+ async def export_batch(self, events: list[Event]) -> None:
219
+ """Queue multiple events for batched delivery."""
220
+ for event in events:
221
+ await self.export(event)
222
+
223
+ async def flush(self) -> int:
224
+ """Drain the queue and send all pending events. Returns sent count."""
225
+ if self._closed:
226
+ return 0
227
+ with self._queue_lock:
228
+ batch: list[dict[str, Any]] = []
229
+ while self._queue and len(batch) < self._batch_size:
230
+ batch.append(self._queue.popleft())
231
+ if not batch:
232
+ return 0
233
+ loop = asyncio.get_running_loop()
234
+ sent = await loop.run_in_executor(None, self._send_batch, batch)
235
+ return sent
236
+
237
+ async def close(self) -> None:
238
+ """Flush remaining events and shut down the background thread."""
239
+ self._stop_event.set()
240
+ if self._flush_thread and self._flush_thread.is_alive():
241
+ self._flush_thread.join(timeout=self._timeout + 2)
242
+ # Final flush
243
+ await self.flush()
244
+ self._closed = True
245
+
246
+ # Async context manager support
247
+ async def __aenter__(self) -> CloudExporter:
248
+ return self
249
+
250
+ async def __aexit__(self, *_: object) -> None:
251
+ await self.close()
252
+
253
+ # ------------------------------------------------------------------
254
+ # Internal helpers
255
+ # ------------------------------------------------------------------
256
+
257
+ def _ensure_flush_thread(self) -> None:
258
+ if self._flush_thread is None or not self._flush_thread.is_alive():
259
+ t = threading.Thread(
260
+ target=self._flush_loop,
261
+ name="spanforge-cloud-flush",
262
+ daemon=True,
263
+ )
264
+ t.start()
265
+ self._flush_thread = t
266
+
267
+ def _flush_loop(self) -> None:
268
+ """Background thread: flush on interval until stopped."""
269
+ while not self._stop_event.wait(timeout=self._flush_interval):
270
+ with self._queue_lock:
271
+ batch: list[dict[str, Any]] = []
272
+ while self._queue and len(batch) < self._batch_size:
273
+ batch.append(self._queue.popleft())
274
+ if batch:
275
+ try:
276
+ self._send_batch(batch)
277
+ except Exception as exc:
278
+ _log.warning("Background flush failed: %s", exc)
279
+
280
+ def _send_batch(self, batch: list[dict[str, Any]]) -> int:
281
+ """HTTP POST a batch of serialised events. Returns number sent."""
282
+ body = "\n".join(json.dumps(ev, default=str) for ev in batch).encode("utf-8")
283
+ headers = {
284
+ "Content-Type": "application/x-ndjson; charset=utf-8",
285
+ "Authorization": f"Bearer {self._api_key}",
286
+ "X-Spanforge-SDK": "python",
287
+ "User-Agent": "spanforge-python/2.0",
288
+ }
289
+ req = urllib.request.Request(
290
+ self._endpoint,
291
+ data=body,
292
+ headers=headers,
293
+ method="POST",
294
+ )
295
+
296
+ last_exc: Exception | None = None
297
+ for attempt in range(1, self._max_retries + 1):
298
+ try:
299
+ ctx = ssl.create_default_context()
300
+ with urllib.request.urlopen(req, timeout=self._timeout, context=ctx) as resp: # nosec B310
301
+ status = resp.status
302
+ if 200 <= status < 300:
303
+ _log.debug(
304
+ "Shipped %d events → %s (%s)", len(batch), self._endpoint, status
305
+ )
306
+ return len(batch)
307
+ # Non-retryable client error
308
+ if 400 <= status < 500:
309
+ raise CloudExporterError(
310
+ f"Cloud API rejected batch: HTTP {status} (check API key and payload)"
311
+ )
312
+ # 5xx — retryable
313
+ _log.warning(
314
+ "Server error %s on attempt %d/%d", status, attempt, self._max_retries
315
+ )
316
+
317
+ except urllib.error.HTTPError as exc:
318
+ if 400 <= exc.code < 500:
319
+ raise CloudExporterError(f"Cloud API rejected batch: HTTP {exc.code}") from exc
320
+ last_exc = exc
321
+ _log.warning("HTTP %s on attempt %d/%d", exc.code, attempt, self._max_retries)
322
+ except (urllib.error.URLError, OSError, TimeoutError) as exc:
323
+ last_exc = exc
324
+ _log.warning("Network error on attempt %d/%d: %s", attempt, self._max_retries, exc)
325
+
326
+ if attempt < self._max_retries:
327
+ time.sleep(min(2**attempt, 30)) # exponential back-off
328
+
329
+ # Re-enqueue failed batch at the front for next flush cycle
330
+ with self._queue_lock:
331
+ self._queue.extendleft(reversed(batch))
332
+ _log.error("Failed to ship batch after %d attempts: %s", self._max_retries, last_exc)
333
+ return 0
334
+
335
+ @staticmethod
336
+ def _serialise(event: Event) -> dict[str, Any]:
337
+ """Convert an Event to a plain dict for JSON serialisation."""
338
+ if hasattr(event, "to_dict"):
339
+ return event.to_dict()
340
+ # Fallback for duck-typed event objects
341
+ return {
342
+ k: getattr(event, k, None)
343
+ for k in (
344
+ "event_id",
345
+ "event_type",
346
+ "payload",
347
+ "schema_version",
348
+ "source",
349
+ "span_id",
350
+ "trace_id",
351
+ "timestamp",
352
+ "tags",
353
+ "signature",
354
+ "checksum",
355
+ "prev_id",
356
+ )
357
+ }