spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
spanforge/config.py ADDED
@@ -0,0 +1,460 @@
1
+ """spanforge.config — Global configuration singleton and ``configure()`` entry point.
2
+
3
+ The configuration layer is intentionally simple: a single mutable dataclass
4
+ backed by a module-level ``threading.Lock`` for safe concurrent mutation.
5
+ Environment variables are read once at import time; subsequent calls to
6
+ :func:`configure` override individual fields.
7
+
8
+ Environment variable mapping
9
+ -----------------------------
10
+ +-----------------------------+-----------------------+
11
+ | Env var | Config field |
12
+ +=============================+=======================+
13
+ | ``SPANFORGE_EXPORTER`` | ``exporter`` |
14
+ | ``SPANFORGE_ENDPOINT`` | ``endpoint`` |
15
+ | ``SPANFORGE_ORG_ID`` | ``org_id`` |
16
+ | ``SPANFORGE_SERVICE_NAME`` | ``service_name`` |
17
+ | ``SPANFORGE_ENV`` | ``env`` |
18
+ | ``SPANFORGE_SERVICE_VERSION``| ``service_version`` |
19
+ | ``SPANFORGE_SIGNING_KEY`` | ``signing_key`` |
20
+ | ``SPANFORGE_SAMPLE_RATE`` | ``sample_rate`` |
21
+ +-----------------------------+-----------------------+
22
+
23
+ Usage::
24
+
25
+ from spanforge import configure
26
+ configure(exporter="jsonl", service_name="my-agent", endpoint="./events.jsonl")
27
+
28
+ from spanforge.config import get_config
29
+ cfg = get_config()
30
+ print(cfg.service_name) # "my-agent"
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ import os
36
+ import threading
37
+ from dataclasses import dataclass, field
38
+ from typing import Any, Callable, TYPE_CHECKING
39
+
40
+ if TYPE_CHECKING:
41
+ from spanforge.event import Event
42
+
43
+ __all__ = ["SpanForgeConfig", "configure", "get_config"]
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Configuration dataclass
47
+ # ---------------------------------------------------------------------------
48
+
49
+ _VALID_EXPORTERS = frozenset({"console", "jsonl", "otlp", "webhook", "datadog", "grafana_loki", "otel_bridge", "otel_passthrough"})
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Config presets
53
+ # ---------------------------------------------------------------------------
54
+
55
+ _PRESETS: dict[str, dict[str, Any]] = {
56
+ "development": {
57
+ "exporter": "console",
58
+ "sample_rate": 1.0,
59
+ "enable_trace_store": True,
60
+ "trace_store_size": 500,
61
+ "on_export_error": "warn",
62
+ "allow_private_endpoints": True,
63
+ "env": "development",
64
+ "flush_interval_seconds": 1.0,
65
+ },
66
+ "testing": {
67
+ "exporter": "console",
68
+ "sample_rate": 1.0,
69
+ "enable_trace_store": True,
70
+ "trace_store_size": 1000,
71
+ "on_export_error": "raise",
72
+ "allow_private_endpoints": True,
73
+ "env": "testing",
74
+ "flush_interval_seconds": 0.1,
75
+ },
76
+ "staging": {
77
+ "exporter": "console",
78
+ "sample_rate": 0.5,
79
+ "enable_trace_store": True,
80
+ "trace_store_size": 200,
81
+ "on_export_error": "warn",
82
+ "always_sample_errors": True,
83
+ "env": "staging",
84
+ },
85
+ "production": {
86
+ "exporter": "otlp",
87
+ "sample_rate": 0.1,
88
+ "enable_trace_store": False,
89
+ "on_export_error": "drop",
90
+ "always_sample_errors": True,
91
+ "batch_size": 512,
92
+ "flush_interval_seconds": 5.0,
93
+ "max_queue_size": 10_000,
94
+ "env": "production",
95
+ },
96
+ "otel_passthrough": {
97
+ "exporter": "otel_bridge",
98
+ "sample_rate": 1.0,
99
+ "enable_trace_store": True,
100
+ "on_export_error": "warn",
101
+ "compliance_sampling": True,
102
+ "env": "production",
103
+ },
104
+ }
105
+
106
+
107
+ @dataclass
108
+ class SpanForgeConfig:
109
+ """Mutable global configuration for the SpanForge SDK.
110
+
111
+ All fields have safe defaults so zero-configuration usage works
112
+ out-of-the-box (``exporter="console"`` prints to stdout).
113
+
114
+ Attributes:
115
+ exporter: Backend to use: ``"console"`` | ``"jsonl"`` | ``"otlp"``
116
+ | ``"webhook"`` | ``"datadog"`` | ``"grafana_loki"``.
117
+ endpoint: Exporter-specific destination
118
+ (file path for JSONL, URL for OTLP/webhook/Datadog/Loki).
119
+ org_id: Organisation identifier; included on all emitted events.
120
+ service_name: Human-readable service name (used in ``source`` field).
121
+ Must start with a letter and contain only
122
+ ``[a-zA-Z0-9._-]``. Defaults to ``"unknown-service"``.
123
+ env: Deployment environment tag (e.g. ``"production"``).
124
+ service_version: SemVer string for the emitting service.
125
+ Defaults to ``"0.0.0"``.
126
+ signing_key: Base64-encoded HMAC-SHA256 key for audit-chain signing.
127
+ ``None`` disables signing.
128
+ redaction_policy: :class:`~spanforge.redact.RedactionPolicy` instance or
129
+ ``None`` to disable PII redaction.
130
+ on_export_error: Policy when an exporter or emission error occurs.
131
+ One of ``"warn"`` (emit to ``stderr``, default),
132
+ ``"raise"`` (re-raise the exception into caller code),
133
+ or ``"drop"`` (silently discard).
134
+ include_raw_tool_io: Opt-in flag to include raw tool arguments
135
+ (``arguments_raw``) and results (``result_raw``)
136
+ in serialised :class:`~spanforge.namespaces.trace.ToolCall`
137
+ payloads. Defaults to ``False`` to prevent
138
+ accidental PII leakage. Set programmatically;
139
+ no corresponding environment variable is provided.
140
+ sample_rate: Fraction of traces to emit (0.0–1.0 inclusive).
141
+ Sampling is deterministic per ``trace_id`` so
142
+ all spans of a trace are sampled together.
143
+ Defaults to ``1.0`` (emit everything). Set via
144
+ ``SPANFORGE_SAMPLE_RATE`` env var.
145
+ always_sample_errors: When ``True`` (the default), spans/traces with
146
+ ``status="error"`` or ``status="timeout"`` are
147
+ always emitted regardless of *sample_rate*.
148
+ trace_filters: List of callables ``(Event) -> bool``. An event
149
+ is emitted only when **all** filters return
150
+ ``True``. Applied after probabilistic sampling.
151
+ Not configurable via environment variable.
152
+ enable_trace_store: When ``True``, every dispatched event is also
153
+ written to the in-process
154
+ :class:`~spanforge._store.TraceStore` ring buffer so
155
+ it can be queried via :func:`~spanforge.get_trace`
156
+ etc. Defaults to ``False``. Set via
157
+ ``SPANFORGE_ENABLE_TRACE_STORE=1``.
158
+ trace_store_size: Maximum number of distinct traces the ring buffer
159
+ retains. Oldest trace is evicted when full.
160
+ Default: 100.
161
+ export_max_retries: Number of retry attempts on transient export failures
162
+ before the ``on_export_error`` policy is applied.
163
+ Retries use exponential back-off (0.5 s, 1 s, 2 s …).
164
+ Default: 3.
165
+ auto_emit_cost: When ``True``, automatically emit a
166
+ ``llm.cost.token.recorded`` event whenever a span
167
+ closes with a non-``None`` ``cost`` attribute.
168
+ Defaults to ``False``.
169
+ budget_usd_per_run: When set, a budget alert is fired on the global
170
+ :class:`~spanforge.cost.CostTracker` when any single
171
+ agent run accumulates costs exceeding this value.
172
+ ``None`` disables per-run budget checks.
173
+ budget_usd_per_day: Rolling 24-hour USD budget cap on the global tracker.
174
+ ``None`` disables the daily budget check.
175
+ """
176
+
177
+ exporter: str = "console"
178
+ endpoint: str | None = None
179
+ org_id: str | None = None
180
+ service_name: str = "unknown-service"
181
+ env: str = "production"
182
+ service_version: str = "0.0.0"
183
+ signing_key: str | None = field(default=None, repr=False)
184
+ redaction_policy: Any = None # RedactionPolicy | None — avoids circular import
185
+ on_export_error: str = "warn" # "warn" | "raise" | "drop"
186
+ include_raw_tool_io: bool = False # opt-in to store raw tool I/O (ToolCall.arguments_raw / result_raw)
187
+ sample_rate: float = 1.0 # 0.0–1.0; fraction of traces to emit
188
+ always_sample_errors: bool = True # emit error/timeout spans regardless of sample_rate
189
+ trace_filters: list[Callable[["Event"], bool]] = field(default_factory=list)
190
+ enable_trace_store: bool = False # opt-in in-process trace store
191
+ trace_store_size: int = 100 # ring buffer capacity (number of traces)
192
+ export_max_retries: int = 3 # retry count for transient export failures
193
+ # SSRF protection: set to True to allow private/loopback endpoints (local dev only)
194
+ allow_private_endpoints: bool = False # SPANFORGE_ALLOW_PRIVATE_ENDPOINTS=true
195
+ # Tool 2 — Cost Calculation Engine
196
+ auto_emit_cost: bool = False # auto-emit llm.cost.token.recorded on span close
197
+ budget_usd_per_run: float | None = None # per-run budget cap (USD)
198
+ budget_usd_per_day: float | None = None # rolling 24-hour budget cap (USD)
199
+ # ---------------------------------------------------------------------------
200
+ # New fields (P0 + P1 + P2 additions)
201
+ # ---------------------------------------------------------------------------
202
+ # Async batch export pipeline
203
+ batch_size: int = 512 # max events per batch
204
+ flush_interval_seconds: float = 5.0 # max seconds between flushes
205
+ max_queue_size: int = 10_000 # bounded in-memory queue depth
206
+ # Error callback (invoked on every export error, regardless of on_export_error policy)
207
+ export_error_callback: "Callable[[Exception], None] | None" = field(
208
+ default=None, repr=False
209
+ )
210
+ # Span processor pipeline
211
+ span_processors: "list[Any]" = field(default_factory=list) # list[SpanProcessor]
212
+ # Custom sampler (overrides sample_rate when set)
213
+ sampler: "Any" = field(default=None, repr=False) # Sampler | None
214
+ # Session / user tracking defaults
215
+ default_session_id: str | None = None
216
+ default_user_id: str | None = None
217
+ # Maximum span events held per Span (deque maxlen); 0 means unlimited
218
+ max_span_events: int = 1000
219
+ # ---------------------------------------------------------------------------
220
+ # Alerting
221
+ # ---------------------------------------------------------------------------
222
+ # alert_config: AlertConfig data class (loaded from SPANFORGE_ALERT_* env vars).
223
+ # When set, build_manager() is called lazily the first time an
224
+ # alert fires. Ignored when alert_manager is provided directly.
225
+ # alert_manager: Pre-built AlertManager instance. Takes precedence over
226
+ # alert_config. Inject directly for full control.
227
+ alert_config: "Any" = field(default=None, repr=False) # AlertConfig | None
228
+ alert_manager: "Any" = field(default=None, repr=False) # AlertManager | None
229
+ # ---------------------------------------------------------------------------
230
+ # v1.0 — Compliance layer additions
231
+ # ---------------------------------------------------------------------------
232
+ # SF-14: Data residency & no-egress controls
233
+ no_egress: bool = False # block all network exporters
234
+ egress_allowlist: "frozenset[str]" = field(default_factory=frozenset) # URL prefixes
235
+ # SF-16: Compliance-aware sampling
236
+ compliance_sampling: bool = True # always-record compliance events when sample_rate < 1.0
237
+ # GA-01: Signing key security
238
+ signing_key_expires_at: str | None = None # ISO-8601 date
239
+ # GA-01-D: Context-based key derivation for multi-env isolation
240
+ signing_key_context: str | None = None # e.g. "production", "staging"
241
+ # GA-04: Multi-tenant key isolation
242
+ require_org_id: bool = False # raise SigningError if event.org_id is None
243
+ # SF-11-C: Dual-stream export — multiple simultaneous exporters
244
+ exporters: "list[str]" = field(default_factory=list) # e.g. ['otel_passthrough', 'jsonl']
245
+ # ---------------------------------------------------------------------------
246
+ # v2.0 — T.R.U.S.T. Framework additions
247
+ # ---------------------------------------------------------------------------
248
+ # Consent boundary enforcement
249
+ consent_enforcement: bool = False # enable runtime consent checks
250
+ # Human-in-the-loop (HITL) review queue
251
+ hitl_enabled: bool = False # activate HITL queue
252
+ hitl_confidence_threshold: float = 0.7 # auto-queue below this confidence
253
+ hitl_sla_seconds: int = 3600 # SLA timeout for pending reviews
254
+ # Model registry
255
+ model_registry_path: str | None = None # JSON persistence path (optional)
256
+
257
+
258
+ # ---------------------------------------------------------------------------
259
+ # Module-level singleton
260
+ # ---------------------------------------------------------------------------
261
+
262
+ _config: SpanForgeConfig = SpanForgeConfig()
263
+ _config_lock: threading.Lock = threading.Lock()
264
+
265
+
266
+ def _load_from_env() -> None:
267
+ """Read environment variables and overlay them onto *_config*."""
268
+ env_map = {
269
+ "SPANFORGE_EXPORTER": "exporter",
270
+ "SPANFORGE_ENDPOINT": "endpoint",
271
+ "SPANFORGE_ORG_ID": "org_id",
272
+ "SPANFORGE_SERVICE_NAME": "service_name",
273
+ "SPANFORGE_ENV": "env",
274
+ "SPANFORGE_SERVICE_VERSION": "service_version",
275
+ "SPANFORGE_SIGNING_KEY": "signing_key",
276
+ "SPANFORGE_ON_EXPORT_ERROR": "on_export_error",
277
+ }
278
+ for env_var, field_name in env_map.items():
279
+ value = os.environ.get(env_var)
280
+ if value is not None:
281
+ setattr(_config, field_name, value)
282
+ # Numeric env vars need explicit conversion.
283
+ raw_rate = os.environ.get("SPANFORGE_SAMPLE_RATE")
284
+ if raw_rate is not None:
285
+ try:
286
+ rate = float(raw_rate)
287
+ except ValueError:
288
+ rate = 1.0
289
+ _config.sample_rate = max(0.0, min(1.0, rate))
290
+ # Boolean env var: SPANFORGE_ENABLE_TRACE_STORE=1 / true / yes enables the store.
291
+ raw_store = os.environ.get("SPANFORGE_ENABLE_TRACE_STORE")
292
+ if raw_store is not None:
293
+ _config.enable_trace_store = raw_store.strip().lower() in ("1", "true", "yes")
294
+ # SSRF override: SPANFORGE_ALLOW_PRIVATE_ENDPOINTS=true allows private IPs (dev only).
295
+ raw_priv = os.environ.get("SPANFORGE_ALLOW_PRIVATE_ENDPOINTS")
296
+ if raw_priv is not None:
297
+ _config.allow_private_endpoints = raw_priv.strip().lower() in ("1", "true", "yes")
298
+ # v1.0 — No-egress mode
299
+ raw_no_egress = os.environ.get("SPANFORGE_NO_EGRESS")
300
+ if raw_no_egress is not None:
301
+ _config.no_egress = raw_no_egress.strip().lower() in ("1", "true", "yes")
302
+ # v1.0 — Egress allowlist (comma-separated URLs)
303
+ raw_allowlist = os.environ.get("SPANFORGE_EGRESS_ALLOWLIST")
304
+ if raw_allowlist is not None:
305
+ _config.egress_allowlist = frozenset(
306
+ u.strip() for u in raw_allowlist.split(",") if u.strip()
307
+ )
308
+ # v1.0 — Compliance sampling
309
+ raw_comp_samp = os.environ.get("SPANFORGE_COMPLIANCE_SAMPLING")
310
+ if raw_comp_samp is not None:
311
+ _config.compliance_sampling = raw_comp_samp.strip().lower() not in ("0", "false", "no")
312
+ # v1.0 — Signing key expiry
313
+ raw_key_expiry = os.environ.get("SPANFORGE_SIGNING_KEY_EXPIRES_AT")
314
+ if raw_key_expiry is not None:
315
+ _config.signing_key_expires_at = raw_key_expiry.strip()
316
+ # v1.0 — Signing key context (GA-01-D)
317
+ raw_key_ctx = os.environ.get("SPANFORGE_SIGNING_KEY_CONTEXT")
318
+ if raw_key_ctx is not None:
319
+ _config.signing_key_context = raw_key_ctx.strip() or None
320
+ # v1.0 — Require org_id
321
+ raw_req_org = os.environ.get("SPANFORGE_REQUIRE_ORG_ID")
322
+ if raw_req_org is not None:
323
+ _config.require_org_id = raw_req_org.strip().lower() in ("1", "true", "yes")
324
+ # v2.0 — T.R.U.S.T. Framework env vars
325
+ raw_consent = os.environ.get("SPANFORGE_CONSENT_ENFORCEMENT")
326
+ if raw_consent is not None:
327
+ _config.consent_enforcement = raw_consent.strip().lower() in ("1", "true", "yes")
328
+ raw_hitl = os.environ.get("SPANFORGE_HITL_ENABLED")
329
+ if raw_hitl is not None:
330
+ _config.hitl_enabled = raw_hitl.strip().lower() in ("1", "true", "yes")
331
+ raw_hitl_thresh = os.environ.get("SPANFORGE_HITL_CONFIDENCE_THRESHOLD")
332
+ if raw_hitl_thresh is not None:
333
+ try:
334
+ _config.hitl_confidence_threshold = max(0.0, min(1.0, float(raw_hitl_thresh)))
335
+ except ValueError:
336
+ pass
337
+ raw_hitl_sla = os.environ.get("SPANFORGE_HITL_SLA_SECONDS")
338
+ if raw_hitl_sla is not None:
339
+ try:
340
+ _config.hitl_sla_seconds = max(1, int(raw_hitl_sla))
341
+ except ValueError:
342
+ pass
343
+ raw_registry_path = os.environ.get("SPANFORGE_MODEL_REGISTRY_PATH")
344
+ if raw_registry_path is not None:
345
+ _config.model_registry_path = raw_registry_path.strip() or None
346
+
347
+
348
+ # Apply env vars immediately at import time.
349
+ _load_from_env()
350
+
351
+
352
+ # ---------------------------------------------------------------------------
353
+ # Public API
354
+ # ---------------------------------------------------------------------------
355
+
356
+
357
+ def get_config() -> SpanForgeConfig:
358
+ """Return the active :class:`SpanForgeConfig` singleton.
359
+
360
+ The returned object is the *live* singleton — modifications to it will
361
+ affect all subsequent tracer operations. Prefer :func:`configure` for
362
+ intentional mutations.
363
+ """
364
+ return _config
365
+
366
+
367
+ def configure(**kwargs: Any) -> None: # noqa: ANN401
368
+ """Mutate the global :class:`SpanForgeConfig` singleton.
369
+
370
+ Accepts the same keyword arguments as :class:`SpanForgeConfig` field names.
371
+ Unknown keys raise :exc:`ValueError` immediately. Calling ``configure()``
372
+ with no arguments is a no-op (safe for idempotent setup scripts).
373
+
374
+ Passing ``preset="<name>"`` applies a set of sensible defaults for the
375
+ environment **before** applying any other kwargs. Available presets:
376
+ ``"development"``, ``"testing"``, ``"staging"``, ``"production"``.
377
+
378
+ Args:
379
+ **kwargs: One or more :class:`SpanForgeConfig` field names and their
380
+ new values. ``preset`` is a special keyword handled here.
381
+
382
+ Raises:
383
+ ValueError: If an unknown configuration key or preset name is passed.
384
+
385
+ Examples::
386
+
387
+ configure(preset="production", exporter="otlp", endpoint="http://collector:4318")
388
+ configure(preset="development")
389
+ configure(exporter="jsonl", endpoint="./events.jsonl")
390
+ """
391
+ if not kwargs:
392
+ return
393
+ with _config_lock:
394
+ # Handle mode shortcut (SF-11-B): configure(mode='otel_passthrough')
395
+ mode = kwargs.pop("mode", None)
396
+ if mode is not None:
397
+ if mode == "otel_passthrough":
398
+ kwargs.setdefault("preset", "otel_passthrough")
399
+ else:
400
+ raise ValueError(
401
+ f"Unknown spanforge mode {mode!r}. "
402
+ "Valid modes: 'otel_passthrough'"
403
+ )
404
+
405
+ # Handle preset first so explicit kwargs override preset defaults.
406
+ preset_name = kwargs.pop("preset", None)
407
+ if preset_name is not None:
408
+ if preset_name not in _PRESETS:
409
+ valid_presets = sorted(_PRESETS.keys())
410
+ raise ValueError(
411
+ f"Unknown spanforge preset {preset_name!r}. "
412
+ f"Valid presets: {valid_presets}"
413
+ )
414
+ for key, value in _PRESETS[preset_name].items():
415
+ setattr(_config, key, value)
416
+
417
+ for key, value in kwargs.items():
418
+ if not hasattr(_config, key):
419
+ valid = sorted(vars(_config).keys())
420
+ raise ValueError(
421
+ f"Unknown spanforge configuration key {key!r}. "
422
+ f"Valid keys: {valid}"
423
+ )
424
+ # Validate numeric range fields.
425
+ if key == "batch_size":
426
+ if not isinstance(value, int) or value < 1:
427
+ raise ValueError("batch_size must be a positive integer >= 1")
428
+ elif key == "flush_interval_seconds":
429
+ if not isinstance(value, (int, float)) or value <= 0:
430
+ raise ValueError("flush_interval_seconds must be a positive number > 0")
431
+ elif key == "max_queue_size":
432
+ if not isinstance(value, int) or value < 1:
433
+ raise ValueError("max_queue_size must be a positive integer >= 1")
434
+ elif key == "sample_rate":
435
+ if not isinstance(value, (int, float)) or not (0.0 <= value <= 1.0):
436
+ raise ValueError("sample_rate must be a float in [0.0, 1.0]")
437
+ setattr(_config, key, value)
438
+ # Auto-wire ComplianceSampler when compliance_sampling is enabled
439
+ # and a sub-1.0 sample_rate is set but no explicit sampler provided.
440
+ if _config.compliance_sampling and _config.sample_rate < 1.0 and _config.sampler is None:
441
+ from spanforge.sampling import ComplianceSampler # noqa: PLC0415
442
+ _config.sampler = ComplianceSampler(base_rate=_config.sample_rate)
443
+ # GA-01-A: Validate signing key strength when a key is configured.
444
+ if _config.signing_key:
445
+ import logging as _logging # noqa: PLC0415
446
+ from spanforge.signing import validate_key_strength # noqa: PLC0415
447
+ _key_warnings = validate_key_strength(_config.signing_key)
448
+ if _key_warnings:
449
+ _log = _logging.getLogger("spanforge.config")
450
+ for _w in _key_warnings:
451
+ _log.warning("signing key: %s", _w)
452
+ # Invalidate the cached exporter in the stream so the next emit
453
+ # picks up the new configuration. Import here to avoid circular
454
+ # import at module load time.
455
+ try:
456
+ from spanforge import _stream # noqa: PLC0415
457
+ _stream._reset_exporter()
458
+ except (ImportError, AttributeError):
459
+ # _stream not yet loaded (e.g. during package init) — safe to skip.
460
+ pass
spanforge/consent.py ADDED
@@ -0,0 +1,227 @@
1
+ """Consent boundary enforcement for SpanForge compliance pipeline.
2
+
3
+ Provides runtime monitoring that flags agent decisions made on
4
+ out-of-consent data, distinct from PII redaction. Consent enforcement
5
+ checks whether data *should be used at all*, while redaction masks
6
+ sensitive values.
7
+
8
+ Configuration
9
+ -------------
10
+ * ``consent_enforcement=True`` on :class:`~spanforge.config.SpanForgeConfig`
11
+ activates consent boundary checks.
12
+ * Call :func:`grant_consent` / :func:`revoke_consent` to manage the
13
+ consent store, then :func:`check_consent` before data processing.
14
+
15
+ Emits ``consent.granted``, ``consent.revoked``, ``consent.violation``
16
+ events into the HMAC audit chain via :func:`emit_rfc_event`.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import threading
22
+ from dataclasses import dataclass, field
23
+ from typing import Any
24
+
25
+ from spanforge.namespaces.consent import ConsentPayload
26
+
27
+ __all__ = [
28
+ "ConsentBoundary",
29
+ "ConsentRecord",
30
+ "check_consent",
31
+ "grant_consent",
32
+ "revoke_consent",
33
+ ]
34
+
35
+
36
+ @dataclass
37
+ class ConsentRecord:
38
+ """A single consent grant for a data subject."""
39
+
40
+ subject_id: str
41
+ scope: str
42
+ purpose: str
43
+ legal_basis: str = "consent"
44
+ expiry: str | None = None # ISO 8601
45
+ data_categories: list[str] = field(default_factory=list)
46
+
47
+
48
+ class ConsentBoundary:
49
+ """Thread-safe runtime consent store and boundary enforcer.
50
+
51
+ Manages active consent records and checks data-use against them.
52
+ Emits HMAC-signed events for grants, revocations, and violations.
53
+ """
54
+
55
+ def __init__(self, *, auto_emit: bool = True) -> None:
56
+ self._lock = threading.Lock()
57
+ self._records: dict[tuple[str, str], ConsentRecord] = {}
58
+ self._auto_emit = auto_emit
59
+
60
+ def grant(
61
+ self,
62
+ subject_id: str,
63
+ scope: str,
64
+ purpose: str,
65
+ *,
66
+ legal_basis: str = "consent",
67
+ expiry: str | None = None,
68
+ agent_id: str | None = None,
69
+ data_categories: list[str] | None = None,
70
+ ) -> ConsentRecord:
71
+ """Record a consent grant and emit a ``consent.granted`` event."""
72
+ if not subject_id:
73
+ raise ValueError("subject_id must be non-empty")
74
+ if not scope:
75
+ raise ValueError("scope must be non-empty")
76
+ if not purpose:
77
+ raise ValueError("purpose must be non-empty")
78
+
79
+ record = ConsentRecord(
80
+ subject_id=subject_id,
81
+ scope=scope,
82
+ purpose=purpose,
83
+ legal_basis=legal_basis,
84
+ expiry=expiry,
85
+ data_categories=data_categories or [],
86
+ )
87
+ with self._lock:
88
+ self._records[(subject_id, scope)] = record
89
+
90
+ if self._auto_emit:
91
+ payload = ConsentPayload(
92
+ subject_id=subject_id,
93
+ scope=scope,
94
+ purpose=purpose,
95
+ status="granted",
96
+ legal_basis=legal_basis,
97
+ expiry=expiry,
98
+ agent_id=agent_id,
99
+ data_categories=data_categories or [],
100
+ )
101
+ self._emit(payload, "granted")
102
+ return record
103
+
104
+ def revoke(
105
+ self,
106
+ subject_id: str,
107
+ scope: str,
108
+ *,
109
+ reason: str = "user request",
110
+ agent_id: str | None = None,
111
+ ) -> bool:
112
+ """Revoke a consent record and emit a ``consent.revoked`` event.
113
+
114
+ Returns ``True`` if a matching record was found and removed.
115
+ """
116
+ with self._lock:
117
+ removed = self._records.pop((subject_id, scope), None)
118
+
119
+ if removed is not None and self._auto_emit:
120
+ payload = ConsentPayload(
121
+ subject_id=subject_id,
122
+ scope=scope,
123
+ purpose=removed.purpose,
124
+ status="revoked",
125
+ legal_basis=removed.legal_basis,
126
+ agent_id=agent_id,
127
+ violation_detail=reason,
128
+ )
129
+ self._emit(payload, "revoked")
130
+ return removed is not None
131
+
132
+ def check(
133
+ self,
134
+ subject_id: str,
135
+ scope: str,
136
+ *,
137
+ agent_id: str | None = None,
138
+ purpose: str = "",
139
+ ) -> bool:
140
+ """Check whether consent is active for the given subject + scope.
141
+
142
+ If no active consent exists, emits a ``consent.violation`` event
143
+ and returns ``False``.
144
+ """
145
+ with self._lock:
146
+ record = self._records.get((subject_id, scope))
147
+
148
+ if record is not None:
149
+ return True
150
+
151
+ # Violation: no consent for this subject + scope
152
+ if self._auto_emit:
153
+ payload = ConsentPayload(
154
+ subject_id=subject_id,
155
+ scope=scope,
156
+ purpose=purpose or "unspecified",
157
+ status="violation",
158
+ agent_id=agent_id,
159
+ violation_detail=f"No active consent for subject={subject_id!r} scope={scope!r}",
160
+ )
161
+ self._emit(payload, "violation")
162
+ return False
163
+
164
+ def has_consent(self, subject_id: str, scope: str) -> bool:
165
+ """Return ``True`` if an active consent record exists (no event emitted)."""
166
+ with self._lock:
167
+ return (subject_id, scope) in self._records
168
+
169
+ def list_consents(self, subject_id: str | None = None) -> list[ConsentRecord]:
170
+ """Return all active consent records, optionally filtered by subject."""
171
+ with self._lock:
172
+ if subject_id is None:
173
+ return list(self._records.values())
174
+ return [r for r in self._records.values() if r.subject_id == subject_id]
175
+
176
+ def clear(self) -> None:
177
+ """Remove all consent records (for testing)."""
178
+ with self._lock:
179
+ self._records.clear()
180
+
181
+ @staticmethod
182
+ def _emit(payload: ConsentPayload, status: str) -> None:
183
+ """Emit a consent event into the HMAC audit chain."""
184
+ try:
185
+ from spanforge._stream import emit_rfc_event # noqa: PLC0415
186
+ from spanforge.types import EventType # noqa: PLC0415
187
+
188
+ _status_to_event = {
189
+ "granted": EventType.CONSENT_GRANTED,
190
+ "revoked": EventType.CONSENT_REVOKED,
191
+ "violation": EventType.CONSENT_VIOLATION,
192
+ }
193
+ et = _status_to_event.get(status)
194
+ if et is not None:
195
+ try:
196
+ emit_rfc_event(et, payload.to_dict())
197
+ except Exception: # noqa: BLE001
198
+ pass # never let auto-emit failures disrupt the caller
199
+ except ImportError:
200
+ pass
201
+
202
+
203
+ # ---------------------------------------------------------------------------
204
+ # Module-level singleton & convenience functions
205
+ # ---------------------------------------------------------------------------
206
+
207
+ _boundary = ConsentBoundary()
208
+
209
+
210
+ def grant_consent(
211
+ subject_id: str,
212
+ scope: str,
213
+ purpose: str,
214
+ **kwargs: Any,
215
+ ) -> ConsentRecord:
216
+ """Grant consent via the module-level :class:`ConsentBoundary`."""
217
+ return _boundary.grant(subject_id, scope, purpose, **kwargs)
218
+
219
+
220
+ def revoke_consent(subject_id: str, scope: str, **kwargs: Any) -> bool:
221
+ """Revoke consent via the module-level :class:`ConsentBoundary`."""
222
+ return _boundary.revoke(subject_id, scope, **kwargs)
223
+
224
+
225
+ def check_consent(subject_id: str, scope: str, **kwargs: Any) -> bool:
226
+ """Check consent via the module-level :class:`ConsentBoundary`."""
227
+ return _boundary.check(subject_id, scope, **kwargs)