spanforge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. spanforge/__init__.py +815 -0
  2. spanforge/_ansi.py +93 -0
  3. spanforge/_batch_exporter.py +409 -0
  4. spanforge/_cli.py +2094 -0
  5. spanforge/_cli_audit.py +639 -0
  6. spanforge/_cli_compliance.py +711 -0
  7. spanforge/_cli_cost.py +243 -0
  8. spanforge/_cli_ops.py +791 -0
  9. spanforge/_cli_phase11.py +356 -0
  10. spanforge/_hooks.py +337 -0
  11. spanforge/_server.py +1708 -0
  12. spanforge/_span.py +1036 -0
  13. spanforge/_store.py +288 -0
  14. spanforge/_stream.py +664 -0
  15. spanforge/_trace.py +335 -0
  16. spanforge/_tracer.py +254 -0
  17. spanforge/actor.py +141 -0
  18. spanforge/alerts.py +469 -0
  19. spanforge/auto.py +464 -0
  20. spanforge/baseline.py +335 -0
  21. spanforge/cache.py +635 -0
  22. spanforge/compliance.py +325 -0
  23. spanforge/config.py +532 -0
  24. spanforge/consent.py +228 -0
  25. spanforge/consumer.py +377 -0
  26. spanforge/core/__init__.py +5 -0
  27. spanforge/core/compliance_mapping.py +1254 -0
  28. spanforge/cost.py +600 -0
  29. spanforge/debug.py +548 -0
  30. spanforge/deprecations.py +205 -0
  31. spanforge/drift.py +482 -0
  32. spanforge/egress.py +58 -0
  33. spanforge/eval.py +648 -0
  34. spanforge/event.py +1064 -0
  35. spanforge/exceptions.py +240 -0
  36. spanforge/explain.py +178 -0
  37. spanforge/export/__init__.py +69 -0
  38. spanforge/export/append_only.py +337 -0
  39. spanforge/export/cloud.py +357 -0
  40. spanforge/export/datadog.py +497 -0
  41. spanforge/export/grafana.py +320 -0
  42. spanforge/export/jsonl.py +195 -0
  43. spanforge/export/openinference.py +158 -0
  44. spanforge/export/otel_bridge.py +294 -0
  45. spanforge/export/otlp.py +811 -0
  46. spanforge/export/otlp_bridge.py +233 -0
  47. spanforge/export/redis_backend.py +282 -0
  48. spanforge/export/siem_schema.py +98 -0
  49. spanforge/export/siem_splunk.py +264 -0
  50. spanforge/export/siem_syslog.py +212 -0
  51. spanforge/export/webhook.py +299 -0
  52. spanforge/exporters/__init__.py +30 -0
  53. spanforge/exporters/console.py +271 -0
  54. spanforge/exporters/jsonl.py +144 -0
  55. spanforge/exporters/sqlite.py +142 -0
  56. spanforge/gate.py +1150 -0
  57. spanforge/governance.py +181 -0
  58. spanforge/hitl.py +295 -0
  59. spanforge/http.py +187 -0
  60. spanforge/inspect.py +427 -0
  61. spanforge/integrations/__init__.py +45 -0
  62. spanforge/integrations/_pricing.py +280 -0
  63. spanforge/integrations/anthropic.py +388 -0
  64. spanforge/integrations/azure_openai.py +133 -0
  65. spanforge/integrations/bedrock.py +292 -0
  66. spanforge/integrations/crewai.py +251 -0
  67. spanforge/integrations/gemini.py +351 -0
  68. spanforge/integrations/groq.py +442 -0
  69. spanforge/integrations/langchain.py +349 -0
  70. spanforge/integrations/langgraph.py +306 -0
  71. spanforge/integrations/llamaindex.py +373 -0
  72. spanforge/integrations/ollama.py +287 -0
  73. spanforge/integrations/openai.py +368 -0
  74. spanforge/integrations/together.py +483 -0
  75. spanforge/io.py +214 -0
  76. spanforge/lint.py +322 -0
  77. spanforge/metrics.py +417 -0
  78. spanforge/metrics_export.py +343 -0
  79. spanforge/migrate.py +402 -0
  80. spanforge/model_registry.py +278 -0
  81. spanforge/models.py +389 -0
  82. spanforge/namespaces/__init__.py +254 -0
  83. spanforge/namespaces/audit.py +256 -0
  84. spanforge/namespaces/cache.py +237 -0
  85. spanforge/namespaces/chain.py +77 -0
  86. spanforge/namespaces/confidence.py +72 -0
  87. spanforge/namespaces/consent.py +92 -0
  88. spanforge/namespaces/cost.py +179 -0
  89. spanforge/namespaces/decision.py +143 -0
  90. spanforge/namespaces/diff.py +157 -0
  91. spanforge/namespaces/drift.py +80 -0
  92. spanforge/namespaces/eval_.py +251 -0
  93. spanforge/namespaces/feedback.py +241 -0
  94. spanforge/namespaces/fence.py +193 -0
  95. spanforge/namespaces/guard.py +105 -0
  96. spanforge/namespaces/hitl.py +91 -0
  97. spanforge/namespaces/latency.py +72 -0
  98. spanforge/namespaces/prompt.py +190 -0
  99. spanforge/namespaces/redact.py +173 -0
  100. spanforge/namespaces/retrieval.py +379 -0
  101. spanforge/namespaces/runtime_governance.py +494 -0
  102. spanforge/namespaces/template.py +208 -0
  103. spanforge/namespaces/tool_call.py +77 -0
  104. spanforge/namespaces/trace.py +1029 -0
  105. spanforge/normalizer.py +171 -0
  106. spanforge/plugins.py +82 -0
  107. spanforge/presidio_backend.py +349 -0
  108. spanforge/processor.py +258 -0
  109. spanforge/prompt_registry.py +418 -0
  110. spanforge/py.typed +0 -0
  111. spanforge/redact.py +914 -0
  112. spanforge/regression.py +192 -0
  113. spanforge/runtime_policy.py +159 -0
  114. spanforge/sampling.py +511 -0
  115. spanforge/schema.py +183 -0
  116. spanforge/schemas/v1.0/schema.json +170 -0
  117. spanforge/schemas/v2.0/schema.json +536 -0
  118. spanforge/sdk/__init__.py +625 -0
  119. spanforge/sdk/_base.py +584 -0
  120. spanforge/sdk/_base.pyi +71 -0
  121. spanforge/sdk/_exceptions.py +1096 -0
  122. spanforge/sdk/_types.py +2184 -0
  123. spanforge/sdk/alert.py +1514 -0
  124. spanforge/sdk/alert.pyi +56 -0
  125. spanforge/sdk/audit.py +1196 -0
  126. spanforge/sdk/audit.pyi +67 -0
  127. spanforge/sdk/cec.py +1215 -0
  128. spanforge/sdk/cec.pyi +37 -0
  129. spanforge/sdk/config.py +641 -0
  130. spanforge/sdk/config.pyi +55 -0
  131. spanforge/sdk/enterprise.py +714 -0
  132. spanforge/sdk/enterprise.pyi +79 -0
  133. spanforge/sdk/explain.py +170 -0
  134. spanforge/sdk/fallback.py +432 -0
  135. spanforge/sdk/feedback.py +351 -0
  136. spanforge/sdk/gate.py +874 -0
  137. spanforge/sdk/gate.pyi +51 -0
  138. spanforge/sdk/identity.py +2114 -0
  139. spanforge/sdk/identity.pyi +47 -0
  140. spanforge/sdk/lineage.py +175 -0
  141. spanforge/sdk/observe.py +1065 -0
  142. spanforge/sdk/observe.pyi +50 -0
  143. spanforge/sdk/operator.py +338 -0
  144. spanforge/sdk/pii.py +1473 -0
  145. spanforge/sdk/pii.pyi +119 -0
  146. spanforge/sdk/pipelines.py +458 -0
  147. spanforge/sdk/pipelines.pyi +39 -0
  148. spanforge/sdk/policy.py +930 -0
  149. spanforge/sdk/rag.py +594 -0
  150. spanforge/sdk/rbac.py +280 -0
  151. spanforge/sdk/registry.py +430 -0
  152. spanforge/sdk/registry.pyi +46 -0
  153. spanforge/sdk/scope.py +279 -0
  154. spanforge/sdk/secrets.py +293 -0
  155. spanforge/sdk/secrets.pyi +25 -0
  156. spanforge/sdk/security.py +560 -0
  157. spanforge/sdk/security.pyi +57 -0
  158. spanforge/sdk/trust.py +472 -0
  159. spanforge/sdk/trust.pyi +41 -0
  160. spanforge/secrets.py +799 -0
  161. spanforge/signing.py +1179 -0
  162. spanforge/stats.py +100 -0
  163. spanforge/stream.py +560 -0
  164. spanforge/testing.py +378 -0
  165. spanforge/testing_mocks.py +1052 -0
  166. spanforge/trace.py +199 -0
  167. spanforge/types.py +696 -0
  168. spanforge/ulid.py +300 -0
  169. spanforge/validate.py +379 -0
  170. spanforge-1.0.0.dist-info/METADATA +1509 -0
  171. spanforge-1.0.0.dist-info/RECORD +174 -0
  172. spanforge-1.0.0.dist-info/WHEEL +4 -0
  173. spanforge-1.0.0.dist-info/entry_points.txt +5 -0
  174. spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/sdk/cec.py ADDED
@@ -0,0 +1,1215 @@
1
+ """spanforge.sdk.cec — SpanForge sf-cec Compliance Evidence Chain client (Phase 5).
2
+
3
+ Implements the full sf-cec API surface for Phase 5 of the SpanForge roadmap.
4
+ All operations run locally in-process (zero external dependencies beyond the
5
+ standard library) when ``config.endpoint`` is empty or the remote service is
6
+ unreachable and ``local_fallback_enabled`` is ``True``.
7
+
8
+ Architecture
9
+ ------------
10
+ * :meth:`build_bundle` is the **primary entry point**. It orchestrates
11
+ evidence collection from sf-audit, regulatory clause mapping, ZIP assembly,
12
+ HMAC-SHA256 manifest signing, and (in BYOS mode) upload.
13
+ * :meth:`verify_bundle` re-computes the manifest HMAC and validates the
14
+ embedded ``chain_proof.json`` and RFC 3161 timestamp stub.
15
+ * :meth:`generate_dpa` produces a GDPR Article 28 Data Processing Agreement
16
+ using evidence records and the provided controller/processor details.
17
+ * :meth:`get_status` reports service health and session statistics.
18
+
19
+ ZIP bundle structure (CEC-002)
20
+ -------------------------------
21
+ ``halluccheck_cec_{project}_{date}.zip`` containing:
22
+
23
+ * ``manifest.json`` — record inventory + HMAC signature
24
+ * ``score_records/`` — NDJSON for ``halluccheck.score.v1``
25
+ * ``bias_reports/`` — NDJSON for ``halluccheck.bias.v1``
26
+ * ``prri_records/`` — NDJSON for ``halluccheck.prri.v1``
27
+ * ``drift_events/`` — NDJSON for ``halluccheck.drift.v1``
28
+ * ``pii_detections/`` — NDJSON for ``halluccheck.pii.v1``
29
+ * ``gate_evaluations/`` — NDJSON for ``halluccheck.gate.v1``
30
+ * ``clause_map.json`` — regulatory clause mapping
31
+ * ``attestation.json`` — HMAC-signed attestation (PDF optional)
32
+ * ``chain_proof.json`` — ``verify_chain()`` result
33
+ * ``rfc3161_timestamp.tsr`` — RFC 3161 timestamp stub
34
+
35
+ Supported regulatory frameworks (CEC-010 through CEC-014)
36
+ ----------------------------------------------------------
37
+ * ``eu_ai_act`` — EU AI Act Articles 9, 10, 12, 13, 14, 15
38
+ * ``iso_42001`` — ISO/IEC 42001 Clauses 6.1, 8.3, 9.1, 10
39
+ * ``nist_ai_rmf`` — NIST AI RMF GOVERN, MAP, MEASURE, MANAGE
40
+ * ``iso27001`` — ISO/IEC 27001 Annex A controls A.12.4.x
41
+ * ``soc2`` — SOC 2 Type II CC6, CC7, CC9
42
+
43
+ BYOS env var (inherits from sf-audit)
44
+ ---------------------------------------
45
+ ``SPANFORGE_AUDIT_BYOS_PROVIDER`` = ``s3|azure|gcs|r2``
46
+
47
+ Security requirements
48
+ ---------------------
49
+ * HMAC signing keys are **never** logged or included in exception messages.
50
+ * ZIP files are written to the system temp directory; callers are responsible
51
+ for moving or deleting them after use.
52
+ * Thread-safety: all in-memory counters use locks.
53
+ """
54
+
55
+ from __future__ import annotations
56
+
57
+ import hashlib
58
+ import hmac as _hmac
59
+ import json
60
+ import logging
61
+ import os
62
+ import tempfile
63
+ import threading
64
+ import uuid
65
+ import zipfile
66
+ from dataclasses import dataclass
67
+ from datetime import datetime, timedelta, timezone
68
+ from pathlib import Path
69
+ from typing import Any
70
+
71
+ from spanforge.sdk._base import SFClientConfig, SFServiceClient
72
+ from spanforge.sdk._exceptions import (
73
+ SFCECBuildError,
74
+ SFCECError, # noqa: F401 (re-exported)
75
+ SFCECExportError,
76
+ SFCECVerifyError,
77
+ )
78
+ from spanforge.sdk._types import (
79
+ BundleResult,
80
+ BundleVerificationResult,
81
+ CECStatusInfo,
82
+ ClauseMapEntry,
83
+ ClauseSatisfaction,
84
+ DPADocument,
85
+ )
86
+
87
+ __all__ = ["SFCECClient"]
88
+
89
+ _log = logging.getLogger(__name__)
90
+
91
+ # ---------------------------------------------------------------------------
92
+ # Constants
93
+ # ---------------------------------------------------------------------------
94
+
95
+ #: Supported regulatory framework identifiers.
96
+ SUPPORTED_FRAMEWORKS: frozenset[str] = frozenset(
97
+ {"eu_ai_act", "iso_42001", "nist_ai_rmf", "iso27001", "soc2"}
98
+ )
99
+
100
+ #: Default bundle URL expiry in hours (CEC-004).
101
+ _BUNDLE_URL_EXPIRY_HOURS: int = 24
102
+
103
+ #: Insecure default signing key — warns if used in production.
104
+ _INSECURE_DEFAULT_KEY: str = "spanforge-insecure-default-do-not-use-in-production"
105
+
106
+ # ---------------------------------------------------------------------------
107
+ # CEC contribution table: schema_key -> ZIP directory name
108
+ # ---------------------------------------------------------------------------
109
+
110
+ _SCHEMA_TO_DIR: dict[str, str] = {
111
+ "halluccheck.score.v1": "score_records",
112
+ "halluccheck.bias.v1": "bias_reports",
113
+ "halluccheck.prri.v1": "prri_records",
114
+ "halluccheck.drift.v1": "drift_events",
115
+ "halluccheck.pii.v1": "pii_detections",
116
+ "halluccheck.gate.v1": "gate_evaluations",
117
+ }
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # Regulatory framework clause definitions (CEC-010 through CEC-014)
121
+ # ---------------------------------------------------------------------------
122
+
123
+ _EU_AI_ACT_CLAUSES: list[dict[str, Any]] = [
124
+ {
125
+ "clause_id": "Art.9",
126
+ "title": "Risk Management System",
127
+ "description": (
128
+ "Evidence that a risk management system has been established and maintained "
129
+ "throughout the AI system lifecycle."
130
+ ),
131
+ "evidence_schemas": [
132
+ "halluccheck.score.v1",
133
+ "halluccheck.drift.v1",
134
+ "halluccheck.gate.v1",
135
+ ],
136
+ "min_count": 5,
137
+ },
138
+ {
139
+ "clause_id": "Art.10",
140
+ "title": "Data and Data Governance",
141
+ "description": (
142
+ "Evidence of data governance practices including PII handling and bias monitoring."
143
+ ),
144
+ "evidence_schemas": ["halluccheck.pii.v1", "halluccheck.bias.v1"],
145
+ "min_count": 5,
146
+ },
147
+ {
148
+ "clause_id": "Art.12",
149
+ "title": "Record-keeping",
150
+ "description": (
151
+ "Automatically generated logs enabling reconstruction of events that "
152
+ "presented a risk to health, safety or fundamental rights."
153
+ ),
154
+ "evidence_schemas": list(_SCHEMA_TO_DIR.keys()),
155
+ "min_count": 10,
156
+ },
157
+ {
158
+ "clause_id": "Art.13",
159
+ "title": "Transparency and Provision of Information",
160
+ "description": (
161
+ "Evidence of model output traceability and explainability sufficient "
162
+ "to inform users of capabilities and limitations."
163
+ ),
164
+ "evidence_schemas": ["halluccheck.score.v1", "halluccheck.prri.v1"],
165
+ "min_count": 5,
166
+ },
167
+ {
168
+ "clause_id": "Art.14",
169
+ "title": "Human Oversight",
170
+ "description": (
171
+ "Evidence of human-in-the-loop mechanisms and escalation triggers "
172
+ "allowing natural persons to override AI decisions."
173
+ ),
174
+ "evidence_schemas": ["halluccheck.gate.v1", "halluccheck.drift.v1"],
175
+ "min_count": 5,
176
+ },
177
+ {
178
+ "clause_id": "Art.15",
179
+ "title": "Accuracy, Robustness and Cybersecurity",
180
+ "description": (
181
+ "Evidence that the AI system achieves appropriate levels of accuracy "
182
+ "and is resilient to adversarial inputs."
183
+ ),
184
+ "evidence_schemas": [
185
+ "halluccheck.score.v1",
186
+ "halluccheck.drift.v1",
187
+ "halluccheck.bias.v1",
188
+ ],
189
+ "min_count": 5,
190
+ },
191
+ ]
192
+
193
+ _ISO_42001_CLAUSES: list[dict[str, Any]] = [
194
+ {
195
+ "clause_id": "6.1",
196
+ "title": "Actions to Address Risks and Opportunities",
197
+ "description": (
198
+ "Evidence of risk identification and treatment records for AI system impacts."
199
+ ),
200
+ "evidence_schemas": [
201
+ "halluccheck.drift.v1",
202
+ "halluccheck.gate.v1",
203
+ "halluccheck.score.v1",
204
+ ],
205
+ "min_count": 5,
206
+ },
207
+ {
208
+ "clause_id": "8.3",
209
+ "title": "AI System Impact Assessment",
210
+ "description": (
211
+ "Records demonstrating assessment of AI system impact on individuals and society."
212
+ ),
213
+ "evidence_schemas": ["halluccheck.pii.v1", "halluccheck.bias.v1"],
214
+ "min_count": 5,
215
+ },
216
+ {
217
+ "clause_id": "9.1",
218
+ "title": "Monitoring, Measurement, Analysis and Evaluation",
219
+ "description": (
220
+ "Continuous telemetry supporting measurement and evaluation of the AI "
221
+ "management system."
222
+ ),
223
+ "evidence_schemas": [
224
+ "halluccheck.score.v1",
225
+ "halluccheck.drift.v1",
226
+ "halluccheck.gate.v1",
227
+ ],
228
+ "min_count": 5,
229
+ },
230
+ {
231
+ "clause_id": "10",
232
+ "title": "Improvement — Nonconformity and Corrective Action",
233
+ "description": (
234
+ "Audit and gate events documenting corrective actions taken in response "
235
+ "to AI system nonconformities."
236
+ ),
237
+ "evidence_schemas": ["halluccheck.gate.v1", "halluccheck.score.v1"],
238
+ "min_count": 5,
239
+ },
240
+ ]
241
+
242
+ _NIST_AI_RMF_CLAUSES: list[dict[str, Any]] = [
243
+ {
244
+ "clause_id": "GOVERN",
245
+ "title": "Policies, Accountability and Organizational Culture",
246
+ "description": (
247
+ "Evidence of policies, accountability assignments, and culture supporting "
248
+ "responsible AI deployment."
249
+ ),
250
+ "evidence_schemas": ["halluccheck.gate.v1", "halluccheck.score.v1"],
251
+ "min_count": 5,
252
+ },
253
+ {
254
+ "clause_id": "MAP",
255
+ "title": "Context, Risk Identification and Categorization",
256
+ "description": (
257
+ "Evidence of AI context documentation, risk identification, and impact categorization."
258
+ ),
259
+ "evidence_schemas": [
260
+ "halluccheck.prri.v1",
261
+ "halluccheck.bias.v1",
262
+ "halluccheck.drift.v1",
263
+ ],
264
+ "min_count": 5,
265
+ },
266
+ {
267
+ "clause_id": "MEASURE",
268
+ "title": "Evaluation, Monitoring and Measurement",
269
+ "description": (
270
+ "Continuous evaluation and monitoring evidence demonstrating AI system "
271
+ "performance and risk measurement."
272
+ ),
273
+ "evidence_schemas": [
274
+ "halluccheck.score.v1",
275
+ "halluccheck.drift.v1",
276
+ "halluccheck.gate.v1",
277
+ ],
278
+ "min_count": 5,
279
+ },
280
+ {
281
+ "clause_id": "MANAGE",
282
+ "title": "Response, Recovery and Residual Risk",
283
+ "description": (
284
+ "Evidence of incident response plans, recovery procedures, and residual "
285
+ "risk acceptance."
286
+ ),
287
+ "evidence_schemas": ["halluccheck.gate.v1", "halluccheck.drift.v1"],
288
+ "min_count": 5,
289
+ },
290
+ ]
291
+
292
+ _ISO_27001_CLAUSES: list[dict[str, Any]] = [
293
+ {
294
+ "clause_id": "A.12.4.1",
295
+ "title": "Event Logging",
296
+ "description": (
297
+ "Audit log evidence demonstrating that event logs are produced and maintained."
298
+ ),
299
+ "evidence_schemas": list(_SCHEMA_TO_DIR.keys()),
300
+ "min_count": 10,
301
+ },
302
+ {
303
+ "clause_id": "A.12.4.2",
304
+ "title": "Protection of Log Information",
305
+ "description": (
306
+ "HMAC chain evidence demonstrating that audit logs are protected against tampering."
307
+ ),
308
+ "evidence_schemas": list(_SCHEMA_TO_DIR.keys()),
309
+ "min_count": 10,
310
+ },
311
+ {
312
+ "clause_id": "A.12.4.3",
313
+ "title": "Administrator and Operator Logs",
314
+ "description": "Evidence of system administrator activity logging.",
315
+ "evidence_schemas": ["halluccheck.gate.v1", "halluccheck.score.v1"],
316
+ "min_count": 5,
317
+ },
318
+ ]
319
+
320
+ _SOC2_CLAUSES: list[dict[str, Any]] = [
321
+ {
322
+ "clause_id": "CC6",
323
+ "title": "Logical and Physical Access Controls",
324
+ "description": (
325
+ "Evidence of actor-based access controls, audit trails, and PII protection."
326
+ ),
327
+ "evidence_schemas": ["halluccheck.pii.v1", "halluccheck.gate.v1"],
328
+ "min_count": 5,
329
+ },
330
+ {
331
+ "clause_id": "CC7",
332
+ "title": "System Operations — Anomaly and Threat Detection",
333
+ "description": (
334
+ "Drift and gate events demonstrating anomaly monitoring and threat "
335
+ "detection in AI pipelines."
336
+ ),
337
+ "evidence_schemas": ["halluccheck.drift.v1", "halluccheck.gate.v1"],
338
+ "min_count": 5,
339
+ },
340
+ {
341
+ "clause_id": "CC9",
342
+ "title": "Risk Mitigation",
343
+ "description": (
344
+ "Score and gate telemetry demonstrating risk identification and mitigation processes."
345
+ ),
346
+ "evidence_schemas": ["halluccheck.score.v1", "halluccheck.gate.v1"],
347
+ "min_count": 5,
348
+ },
349
+ ]
350
+
351
+ _FRAMEWORK_CLAUSES: dict[str, list[dict[str, Any]]] = {
352
+ "eu_ai_act": _EU_AI_ACT_CLAUSES,
353
+ "iso_42001": _ISO_42001_CLAUSES,
354
+ "nist_ai_rmf": _NIST_AI_RMF_CLAUSES,
355
+ "iso27001": _ISO_27001_CLAUSES,
356
+ "soc2": _SOC2_CLAUSES,
357
+ }
358
+
359
+ # ---------------------------------------------------------------------------
360
+ # DPA template text
361
+ # ---------------------------------------------------------------------------
362
+
363
+ _DPA_TEMPLATE = """\
364
+ DATA PROCESSING AGREEMENT
365
+ (GDPR Article 28 / Module 2 Standard Contractual Clauses)
366
+
367
+ Parties
368
+ -------
369
+ Controller: {controller_name}
370
+ Address: {controller_address}
371
+
372
+ Processor: {processor_name}
373
+ Address: {processor_address}
374
+
375
+ 1. Subject Matter and Duration
376
+ 1.1 This Agreement governs the processing of personal data by the Processor
377
+ on behalf of the Controller in connection with the SpanForge AI
378
+ observability platform for project: {project_id}.
379
+ 1.2 Processing commences on the date of signature and continues for the
380
+ retention period specified in Clause 5.
381
+
382
+ 2. Nature and Purpose of Processing
383
+ {purposes_block}
384
+
385
+ 3. Categories of Personal Data
386
+ {data_categories_block}
387
+
388
+ 4. Categories of Data Subjects
389
+ {data_subjects_block}
390
+
391
+ 5. Retention
392
+ 5.1 {retention_period}
393
+ 5.2 Upon termination the Processor shall, at the choice of the Controller,
394
+ delete or return all personal data unless Union or Member State law
395
+ requires storage.
396
+
397
+ 6. Sub-processors
398
+ 6.1 The Controller authorises engagement of the following sub-processors:
399
+ {sub_processors_block}
400
+ 6.2 The Processor shall impose the same data protection obligations on any
401
+ sub-processor by way of a binding written agreement.
402
+
403
+ 7. Technical and Organisational Security Measures (Article 32)
404
+ {security_measures_block}
405
+
406
+ 8. Assistance and Audit Rights
407
+ 8.1 The Processor shall assist the Controller with data subject requests,
408
+ security incident notifications, and DPIA obligations.
409
+ 8.2 The Controller may audit compliance with this Agreement upon 30 days'
410
+ written notice.
411
+
412
+ 9. Cross-border Transfers
413
+ Transfer mechanism: {transfer_mechanism}
414
+ SCC Module: {scc_clauses}
415
+
416
+ 10. Signatures
417
+ Controller: _________________________ Date: _________
418
+ Processor: _________________________ Date: _________
419
+
420
+ ---
421
+ Document ID: {document_id}
422
+ Generated: {generated_at}
423
+ HMAC-SHA256 (document): {doc_hmac}
424
+ """
425
+
426
+
427
+ # ---------------------------------------------------------------------------
428
+ # Internal helpers
429
+ # ---------------------------------------------------------------------------
430
+
431
+
432
+ def _get_signing_key() -> str:
433
+ """Return the HMAC signing key from env, warning if using the insecure default."""
434
+ key = os.environ.get("SPANFORGE_SIGNING_KEY", "").strip()
435
+ if not key or key == "spanforge-default":
436
+ _log.warning(
437
+ "SPANFORGE_SIGNING_KEY is not set or uses the insecure default value. "
438
+ "Set a strong secret before generating CEC bundles for production. "
439
+ "Example: export SPANFORGE_SIGNING_KEY=$(openssl rand -hex 32)"
440
+ )
441
+ return _INSECURE_DEFAULT_KEY
442
+ return key
443
+
444
+
445
+ def _hmac_sign(data: bytes, key: str) -> str:
446
+ """Return ``hmac-sha256:<hex>`` for *data* signed with *key*."""
447
+ digest = _hmac.new(key.encode(), data, hashlib.sha256).hexdigest()
448
+ return f"hmac-sha256:{digest}"
449
+
450
+
451
+ def _compute_clause_map(
452
+ frameworks: list[str],
453
+ record_counts: dict[str, int],
454
+ ) -> list[ClauseMapEntry]:
455
+ """Build the clause map for the given frameworks and record counts."""
456
+ entries: list[ClauseMapEntry] = []
457
+ for fw in frameworks:
458
+ fw_norm = fw.lower().replace("-", "_")
459
+ clauses = _FRAMEWORK_CLAUSES.get(fw_norm, [])
460
+ for clause_def in clauses:
461
+ total_evidence = sum(record_counts.get(sk, 0) for sk in clause_def["evidence_schemas"])
462
+ min_count: int = clause_def["min_count"]
463
+ if total_evidence >= min_count:
464
+ status = ClauseSatisfaction.SATISFIED
465
+ elif total_evidence > 0:
466
+ status = ClauseSatisfaction.PARTIAL
467
+ else:
468
+ status = ClauseSatisfaction.GAP
469
+
470
+ entries.append(
471
+ ClauseMapEntry(
472
+ framework=fw_norm,
473
+ clause_id=clause_def["clause_id"],
474
+ title=clause_def["title"],
475
+ status=status,
476
+ evidence_count=total_evidence,
477
+ evidence_ids=[],
478
+ description=clause_def["description"],
479
+ )
480
+ )
481
+ return entries
482
+
483
+
484
+ def _build_rfc3161_stub(zip_bytes: bytes) -> bytes:
485
+ """Return a minimal RFC 3161 timestamp stub (not a real TSA response).
486
+
487
+ In production this would call a TSA endpoint. The stub records the
488
+ SHA-256 digest and timestamp for local verification.
489
+ """
490
+ digest = hashlib.sha256(zip_bytes).hexdigest()
491
+ stub = {
492
+ "version": 1,
493
+ "policy": "spanforge.local.stub",
494
+ "hashAlgorithm": "sha256",
495
+ "messageImprint": digest,
496
+ "serialNumber": str(uuid.uuid4().int),
497
+ "genTime": datetime.now(timezone.utc).isoformat(),
498
+ "note": (
499
+ "LOCAL STUB — not a qualified TSA response. "
500
+ "Replace with a real RFC 3161 TSA response for production use."
501
+ ),
502
+ }
503
+ return json.dumps(stub, indent=2).encode()
504
+
505
+
506
+ # ---------------------------------------------------------------------------
507
+ # Main client
508
+ # ---------------------------------------------------------------------------
509
+
510
+
511
+ @dataclass
512
+ class _CECSessionStats:
513
+ """In-memory session statistics for sf-cec."""
514
+
515
+ bundle_count: int = 0
516
+ last_bundle_at: str | None = None
517
+
518
+
519
+ class SFCECClient(SFServiceClient):
520
+ """SpanForge Compliance Evidence Chain client (Phase 5).
521
+
522
+ Provides ZIP bundle assembly, regulatory clause mapping, bundle
523
+ verification, and GDPR Article 28 DPA generation.
524
+
525
+ All operations work fully in local mode (no network) when
526
+ ``config.endpoint`` is empty or unreachable.
527
+
528
+ Args:
529
+ config: :class:`~spanforge.sdk._base.SFClientConfig` instance. Pass
530
+ ``SFClientConfig.from_env()`` for auto-configuration from
531
+ environment variables.
532
+
533
+ Example::
534
+
535
+ from spanforge.sdk import sf_cec, sf_audit
536
+ from datetime import date
537
+
538
+ result = sf_cec.build_bundle(
539
+ project_id="my-project",
540
+ date_range=("2026-01-01", "2026-03-31"),
541
+ frameworks=["eu_ai_act", "iso_42001"],
542
+ )
543
+ print(result.bundle_id)
544
+ print(result.download_url)
545
+
546
+ # Verify later
547
+ vr = sf_cec.verify_bundle(result.zip_path)
548
+ assert vr.overall_valid
549
+ """
550
+
551
+ def __init__(self, config: SFClientConfig) -> None:
552
+ super().__init__(config, service_name="cec")
553
+ self._lock = threading.Lock()
554
+ self._stats = _CECSessionStats()
555
+ self._byos_provider = self._detect_byos()
556
+ # CEC-004: in-memory bundle registry {bundle_id -> BundleResult}
557
+ self._bundle_registry: dict[str, BundleResult] = {}
558
+
559
+ # ------------------------------------------------------------------
560
+ # Internal helpers
561
+ # ------------------------------------------------------------------
562
+
563
+ def _detect_byos(self) -> str | None:
564
+ """Detect BYOS provider from env var."""
565
+ _byos_providers = frozenset({"s3", "azure", "gcs", "r2"})
566
+ raw = os.environ.get("SPANFORGE_AUDIT_BYOS_PROVIDER", "").strip().lower()
567
+ return raw if raw in _byos_providers else None
568
+
569
+ def _collect_records(
570
+ self,
571
+ project_id: str,
572
+ date_range: tuple[str, str],
573
+ ) -> dict[str, list[dict[str, Any]]]:
574
+ """Collect exported audit records for all CEC schema keys.
575
+
576
+ Returns a mapping of schema_key -> list of record dicts.
577
+ Silently returns an empty list for schemas with no records.
578
+ """
579
+ from spanforge.sdk.audit import SFAuditClient
580
+
581
+ audit = SFAuditClient(self._config)
582
+ records: dict[str, list[dict[str, Any]]] = {}
583
+
584
+ for schema_key in _SCHEMA_TO_DIR:
585
+ try:
586
+ exported = audit.export(
587
+ schema_key=schema_key,
588
+ project_id=project_id or None,
589
+ date_range=(date_range[0], date_range[1]) if date_range else None,
590
+ )
591
+ records[schema_key] = [
592
+ rec if isinstance(rec, dict) else rec.__dict__ for rec in exported
593
+ ]
594
+ except Exception as exc: # pragma: no cover # noqa: PERF203
595
+ _log.debug("sf-cec: export skipped for %s: %s", schema_key, exc)
596
+ records[schema_key] = []
597
+
598
+ return records
599
+
600
+ def _assemble_zip(
601
+ self,
602
+ project_id: str,
603
+ date_range: tuple[str, str],
604
+ records: dict[str, list[dict[str, Any]]],
605
+ clause_map: list[ClauseMapEntry],
606
+ chain_proof: dict[str, Any],
607
+ ) -> tuple[Path, str, dict[str, int]]:
608
+ """Assemble the CEC ZIP bundle.
609
+
610
+ Returns ``(zip_path, hmac_manifest, record_counts)``.
611
+ """
612
+ today = datetime.now(timezone.utc).strftime("%Y%m%d")
613
+ safe_project = project_id.replace("/", "_").replace(" ", "_") or "default"
614
+ zip_name = f"halluccheck_cec_{safe_project}_{today}.zip"
615
+ zip_path = Path(tempfile.gettempdir()) / zip_name
616
+
617
+ record_counts: dict[str, int] = {sk: len(v) for sk, v in records.items()}
618
+ generated_at = datetime.now(timezone.utc).isoformat()
619
+
620
+ # Build manifest (before signing)
621
+ manifest: dict[str, Any] = {
622
+ "bundle_schema": "spanforge.cec.v1",
623
+ "project_id": project_id,
624
+ "date_range": list(date_range),
625
+ "generated_at": generated_at,
626
+ "record_counts": record_counts,
627
+ "frameworks": sorted({e.framework for e in clause_map}),
628
+ }
629
+
630
+ signing_key = _get_signing_key()
631
+
632
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
633
+ # Write evidence NDJSON files
634
+ for schema_key, recs in records.items():
635
+ dir_name = _SCHEMA_TO_DIR[schema_key]
636
+ ndjson_bytes = "\n".join(json.dumps(r, default=str) for r in recs).encode()
637
+ zf.writestr(f"{dir_name}/records.ndjson", ndjson_bytes)
638
+
639
+ # clause_map.json
640
+ clause_map_doc = [
641
+ {
642
+ "framework": e.framework,
643
+ "clause_id": e.clause_id,
644
+ "title": e.title,
645
+ "status": e.status.value,
646
+ "evidence_count": e.evidence_count,
647
+ "description": e.description,
648
+ }
649
+ for e in clause_map
650
+ ]
651
+ zf.writestr("clause_map.json", json.dumps(clause_map_doc, indent=2))
652
+
653
+ # chain_proof.json
654
+ zf.writestr("chain_proof.json", json.dumps(chain_proof, indent=2))
655
+
656
+ # attestation.json (HMAC-signed attestation from compliance_mapping)
657
+ attestation_doc = self._build_attestation(
658
+ project_id, date_range, record_counts, clause_map, generated_at
659
+ )
660
+ zf.writestr(
661
+ "attestation.json",
662
+ json.dumps(attestation_doc, indent=2, default=str),
663
+ )
664
+
665
+ # Manifest HMAC (signs the canonical manifest bytes)
666
+ manifest_bytes = json.dumps(manifest, sort_keys=True).encode()
667
+ hmac_manifest = _hmac_sign(manifest_bytes, signing_key)
668
+ manifest["hmac"] = hmac_manifest
669
+ zf.writestr("manifest.json", json.dumps(manifest, indent=2))
670
+
671
+ # RFC 3161 timestamp stub (CEC-020 local mode)
672
+ # Compute over the full zip content written so far
673
+ tsr_stub = _build_rfc3161_stub(zip_path.read_bytes() if zip_path.exists() else b"")
674
+ zf.writestr("rfc3161_timestamp.tsr", tsr_stub)
675
+
676
+ return zip_path, hmac_manifest, record_counts
677
+
678
+ def _build_attestation(
679
+ self,
680
+ project_id: str,
681
+ date_range: tuple[str, str],
682
+ record_counts: dict[str, int],
683
+ clause_map: list[ClauseMapEntry],
684
+ generated_at: str,
685
+ ) -> dict[str, Any]:
686
+ """Build the HMAC-signed attestation document."""
687
+ total_records = sum(record_counts.values())
688
+ satisfied = sum(1 for e in clause_map if e.status == ClauseSatisfaction.SATISFIED)
689
+ partial = sum(1 for e in clause_map if e.status == ClauseSatisfaction.PARTIAL)
690
+ gaps = sum(1 for e in clause_map if e.status == ClauseSatisfaction.GAP)
691
+
692
+ if gaps == 0 and partial == 0:
693
+ overall = "SATISFIED"
694
+ elif gaps == 0:
695
+ overall = "PARTIAL"
696
+ else:
697
+ overall = "GAP"
698
+
699
+ doc: dict[str, Any] = {
700
+ "schema": "spanforge.cec.attestation.v1",
701
+ "project_id": project_id,
702
+ "period_from": date_range[0],
703
+ "period_to": date_range[1],
704
+ "generated_at": generated_at,
705
+ "generated_by": "spanforge.sdk.cec v1",
706
+ "total_evidence_records": total_records,
707
+ "overall_status": overall,
708
+ "satisfied_clauses": satisfied,
709
+ "partial_clauses": partial,
710
+ "gap_clauses": gaps,
711
+ "clauses": [
712
+ {
713
+ "framework": e.framework,
714
+ "clause_id": e.clause_id,
715
+ "status": e.status.value,
716
+ "evidence_count": e.evidence_count,
717
+ }
718
+ for e in clause_map
719
+ ],
720
+ }
721
+
722
+ signing_key = _get_signing_key()
723
+ sig_payload = json.dumps(
724
+ {
725
+ "project_id": project_id,
726
+ "from": date_range[0],
727
+ "to": date_range[1],
728
+ "generated_at": generated_at,
729
+ "overall_status": overall,
730
+ },
731
+ sort_keys=True,
732
+ )
733
+ doc["hmac_sig"] = _hmac_sign(sig_payload.encode(), signing_key)
734
+ return doc
735
+
736
+ # ------------------------------------------------------------------
737
+ # Public API
738
+ # ------------------------------------------------------------------
739
+
740
+ def build_bundle(
741
+ self,
742
+ project_id: str,
743
+ date_range: tuple[str, str],
744
+ frameworks: list[str] | None = None,
745
+ ) -> BundleResult:
746
+ """Assemble a signed compliance evidence ZIP bundle (CEC-001 / CEC-002).
747
+
748
+ Steps:
749
+
750
+ 1. Export audit records for all CEC schema keys via sf-audit.
751
+ 2. Map exported evidence to regulatory clause entries.
752
+ 3. Obtain ``verify_chain()`` result from sf-audit.
753
+ 4. Assemble ZIP with structure per CEC-002.
754
+ 5. HMAC-SHA256 sign the manifest.
755
+ 6. Return :class:`~spanforge.sdk._types.BundleResult`.
756
+
757
+ Args:
758
+ project_id: Project identifier to scope evidence collection.
759
+ date_range: ``(from_date, to_date)`` ISO-8601 date strings
760
+ defining the evidence period.
761
+ frameworks: List of regulatory framework identifiers to include.
762
+ Defaults to all supported frameworks. Valid values:
763
+ ``"eu_ai_act"``, ``"iso_42001"``, ``"nist_ai_rmf"``,
764
+ ``"iso27001"``, ``"soc2"``.
765
+
766
+ Returns:
767
+ :class:`~spanforge.sdk._types.BundleResult` with bundle metadata
768
+ and ZIP path.
769
+
770
+ Raises:
771
+ :exc:`~spanforge.sdk._exceptions.SFCECBuildError`:
772
+ If any step of the bundle assembly fails.
773
+ :exc:`ValueError`:
774
+ If an unknown framework identifier is supplied.
775
+
776
+ Example::
777
+
778
+ result = sf_cec.build_bundle(
779
+ project_id="prod-nlp",
780
+ date_range=("2026-01-01", "2026-03-31"),
781
+ frameworks=["eu_ai_act"],
782
+ )
783
+ print(result.zip_path)
784
+ """
785
+ if frameworks is None:
786
+ frameworks = list(SUPPORTED_FRAMEWORKS)
787
+
788
+ # Validate framework identifiers
789
+ unknown = [f for f in frameworks if f.lower().replace("-", "_") not in SUPPORTED_FRAMEWORKS]
790
+ if unknown:
791
+ raise ValueError(
792
+ f"Unknown framework(s): {unknown}. Supported: {sorted(SUPPORTED_FRAMEWORKS)}"
793
+ )
794
+
795
+ try:
796
+ # Step 1: collect audit records
797
+ records = self._collect_records(project_id, date_range)
798
+
799
+ # Step 2: compute clause map
800
+ record_counts = {sk: len(v) for sk, v in records.items()}
801
+ clause_map = _compute_clause_map(frameworks, record_counts)
802
+
803
+ # Step 3: obtain chain proof
804
+ chain_proof = self._get_chain_proof(project_id)
805
+
806
+ # Step 4 + 5: assemble ZIP + sign manifest
807
+ zip_path, hmac_manifest, counts = self._assemble_zip(
808
+ project_id=project_id,
809
+ date_range=date_range,
810
+ records=records,
811
+ clause_map=clause_map,
812
+ chain_proof=chain_proof,
813
+ )
814
+
815
+ except (SFCECBuildError, ValueError):
816
+ raise
817
+ except Exception as exc:
818
+ raise SFCECBuildError(str(exc)) from exc
819
+
820
+ bundle_id = str(uuid.uuid4())
821
+ generated_at = datetime.now(timezone.utc).isoformat()
822
+ expires_at = (
823
+ datetime.now(timezone.utc) + timedelta(hours=_BUNDLE_URL_EXPIRY_HOURS)
824
+ ).isoformat()
825
+ download_url = zip_path.as_uri()
826
+
827
+ with self._lock:
828
+ self._stats.bundle_count += 1
829
+ self._stats.last_bundle_at = generated_at
830
+
831
+ result = BundleResult(
832
+ bundle_id=bundle_id,
833
+ download_url=download_url,
834
+ expires_at=expires_at,
835
+ hmac_manifest=hmac_manifest,
836
+ record_counts=counts,
837
+ zip_path=str(zip_path),
838
+ frameworks=[f.lower().replace("-", "_") for f in frameworks],
839
+ project_id=project_id,
840
+ generated_at=generated_at,
841
+ )
842
+
843
+ with self._lock:
844
+ self._bundle_registry[bundle_id] = result
845
+
846
+ return result
847
+
848
+ def get_bundle(self, bundle_id: str) -> BundleResult | None:
849
+ """Return a previously-built bundle by *bundle_id* (CEC-004).
850
+
851
+ Does **not** re-build the bundle. Returns ``None`` if *bundle_id*
852
+ is not found in this session's registry.
853
+
854
+ Args:
855
+ bundle_id: UUID string returned by :meth:`build_bundle`.
856
+
857
+ Returns:
858
+ :class:`~spanforge.sdk._types.BundleResult` or ``None``.
859
+ """
860
+ with self._lock:
861
+ return self._bundle_registry.get(bundle_id)
862
+
863
+ def reissue_download_url(self, bundle_id: str) -> BundleResult:
864
+ """Re-issue a fresh signed download URL for an existing bundle (CEC-004).
865
+
866
+ The bundle ZIP is **not** rebuilt. Only the ``download_url`` and
867
+ ``expires_at`` fields are refreshed. Raises
868
+ :exc:`~spanforge.sdk._exceptions.SFCECBuildError` if *bundle_id* is
869
+ unknown or the bundle ZIP no longer exists on disk.
870
+
871
+ Args:
872
+ bundle_id: UUID string returned by :meth:`build_bundle`.
873
+
874
+ Returns:
875
+ Updated :class:`~spanforge.sdk._types.BundleResult` with a new
876
+ ``expires_at`` timestamp.
877
+
878
+ Raises:
879
+ :exc:`~spanforge.sdk._exceptions.SFCECBuildError`: If the bundle
880
+ is not found or the zip file has been deleted.
881
+ """
882
+ with self._lock:
883
+ existing = self._bundle_registry.get(bundle_id)
884
+
885
+ if existing is None:
886
+ raise SFCECBuildError(f"Bundle not found: bundle_id={bundle_id!r}")
887
+
888
+ zip_path = Path(existing.zip_path)
889
+ if not zip_path.exists():
890
+ raise SFCECBuildError(
891
+ f"Bundle zip file no longer exists: {existing.zip_path!r}. "
892
+ "Re-build the bundle with build_bundle()."
893
+ )
894
+
895
+ new_expires_at = (
896
+ datetime.now(timezone.utc) + timedelta(hours=_BUNDLE_URL_EXPIRY_HOURS)
897
+ ).isoformat()
898
+
899
+ refreshed = BundleResult(
900
+ bundle_id=existing.bundle_id,
901
+ download_url=zip_path.as_uri(),
902
+ expires_at=new_expires_at,
903
+ hmac_manifest=existing.hmac_manifest,
904
+ record_counts=existing.record_counts,
905
+ zip_path=existing.zip_path,
906
+ frameworks=existing.frameworks,
907
+ project_id=existing.project_id,
908
+ generated_at=existing.generated_at,
909
+ )
910
+
911
+ with self._lock:
912
+ self._bundle_registry[bundle_id] = refreshed
913
+
914
+ _log.info("CEC-004: re-issued download URL for bundle_id=%s", bundle_id)
915
+ return refreshed
916
+
917
+ def verify_bundle(self, zip_path: str) -> BundleVerificationResult: # noqa: PLR0912,PLR0915
918
+ """Verify the integrity of an assembled CEC bundle (CEC-005).
919
+
920
+ Checks:
921
+
922
+ 1. Re-computes ``manifest.json`` HMAC and compares with stored value.
923
+ 2. Validates ``chain_proof.json`` structure.
924
+ 3. Checks ``rfc3161_timestamp.tsr`` is present and well-formed.
925
+
926
+ Args:
927
+ zip_path: Path to the CEC ZIP file to verify.
928
+
929
+ Returns:
930
+ :class:`~spanforge.sdk._types.BundleVerificationResult`.
931
+
932
+ Raises:
933
+ :exc:`~spanforge.sdk._exceptions.SFCECVerifyError`:
934
+ If the ZIP file cannot be opened or is severely malformed.
935
+ """
936
+ errors: list[str] = []
937
+ bundle_id = "unknown"
938
+ manifest_valid = False
939
+ chain_valid = False
940
+ timestamp_valid = False
941
+
942
+ try:
943
+ with zipfile.ZipFile(zip_path, "r") as zf:
944
+ names = zf.namelist()
945
+
946
+ # 1. Manifest HMAC check
947
+ if "manifest.json" not in names:
948
+ errors.append("manifest.json missing from bundle")
949
+ else:
950
+ try:
951
+ raw = zf.read("manifest.json")
952
+ manifest = json.loads(raw)
953
+ stored_hmac = manifest.pop("hmac", "")
954
+ recomputed_bytes = json.dumps(manifest, sort_keys=True).encode()
955
+ expected = _hmac_sign(recomputed_bytes, _get_signing_key())
956
+ if _hmac.compare_digest(stored_hmac, expected):
957
+ manifest_valid = True
958
+ else:
959
+ errors.append("manifest HMAC mismatch — bundle may be tampered")
960
+ bundle_id = manifest.get("bundle_schema", "unknown")
961
+ # Restore for further use
962
+ manifest["hmac"] = stored_hmac
963
+ except Exception as exc: # pragma: no cover
964
+ errors.append(f"manifest.json parse error: {exc}")
965
+
966
+ # 2. Chain proof check
967
+ if "chain_proof.json" not in names:
968
+ errors.append("chain_proof.json missing from bundle")
969
+ else:
970
+ try:
971
+ cp = json.loads(zf.read("chain_proof.json"))
972
+ if isinstance(cp, dict) and "valid" in cp:
973
+ chain_valid = bool(cp["valid"])
974
+ if not chain_valid:
975
+ errors.append(
976
+ f"chain_proof reports invalid chain: "
977
+ f"{cp.get('error', 'unknown reason')}"
978
+ )
979
+ else:
980
+ # Structural chain proof (list of records) — treat as valid
981
+ chain_valid = isinstance(cp, (dict, list))
982
+ except Exception as exc: # pragma: no cover
983
+ errors.append(f"chain_proof.json parse error: {exc}")
984
+
985
+ # 3. RFC 3161 timestamp stub check
986
+ if "rfc3161_timestamp.tsr" not in names:
987
+ errors.append("rfc3161_timestamp.tsr missing from bundle")
988
+ else:
989
+ try:
990
+ tsr = json.loads(zf.read("rfc3161_timestamp.tsr"))
991
+ if "genTime" in tsr and "messageImprint" in tsr:
992
+ timestamp_valid = True
993
+ else:
994
+ errors.append("rfc3161_timestamp.tsr is malformed")
995
+ except Exception as exc: # pragma: no cover
996
+ errors.append(f"rfc3161_timestamp.tsr parse error: {exc}")
997
+
998
+ except zipfile.BadZipFile as exc:
999
+ raise SFCECVerifyError(f"Cannot open ZIP file: {exc}") from exc
1000
+ except FileNotFoundError as exc:
1001
+ raise SFCECVerifyError(f"Bundle file not found: {zip_path}") from exc
1002
+
1003
+ overall_valid = manifest_valid and chain_valid and timestamp_valid
1004
+
1005
+ return BundleVerificationResult(
1006
+ bundle_id=bundle_id,
1007
+ manifest_valid=manifest_valid,
1008
+ chain_valid=chain_valid,
1009
+ timestamp_valid=timestamp_valid,
1010
+ overall_valid=overall_valid,
1011
+ errors=errors,
1012
+ )
1013
+
1014
+ def generate_dpa( # noqa: PLR0913
1015
+ self,
1016
+ project_id: str,
1017
+ controller_details: dict[str, str],
1018
+ processor_details: dict[str, str],
1019
+ *,
1020
+ processing_purposes: list[str] | None = None,
1021
+ data_categories: list[str] | None = None,
1022
+ data_subjects: list[str] | None = None,
1023
+ sub_processors: list[str] | None = None,
1024
+ transfer_mechanism: str = "SCCs",
1025
+ scc_clauses: str = "Module 2 (controller-to-processor)",
1026
+ retention_period: str = "7 years from the date of last processing",
1027
+ security_measures: list[str] | None = None,
1028
+ ) -> DPADocument:
1029
+ """Generate a GDPR Article 28 Data Processing Agreement (CEC-015).
1030
+
1031
+ Args:
1032
+ project_id: Project the DPA covers.
1033
+ controller_details: Dict with at least ``"name"`` and
1034
+ optionally ``"address"``.
1035
+ processor_details: Dict with at least ``"name"`` and
1036
+ optionally ``"address"``.
1037
+ processing_purposes: List of processing purpose descriptions.
1038
+ data_categories: Categories of personal data processed.
1039
+ data_subjects: Categories of data subjects.
1040
+ sub_processors: Authorised sub-processors.
1041
+ transfer_mechanism: Cross-border transfer mechanism.
1042
+ scc_clauses: EU SCC module applied.
1043
+ retention_period: Retention period description.
1044
+ security_measures: List of technical/organisational measures.
1045
+
1046
+ Returns:
1047
+ :class:`~spanforge.sdk._types.DPADocument`.
1048
+
1049
+ Raises:
1050
+ :exc:`~spanforge.sdk._exceptions.SFCECExportError`:
1051
+ If DPA generation fails.
1052
+ """
1053
+ try:
1054
+ controller_name = controller_details.get("name", "Data Controller")
1055
+ controller_address = controller_details.get("address", "[Address not provided]")
1056
+ processor_name = processor_details.get("name", "SpanForge Platform")
1057
+ processor_address = processor_details.get("address", "SpanForge, Inc., [Address]")
1058
+
1059
+ purposes = processing_purposes or [
1060
+ "AI model evaluation and scoring",
1061
+ "Compliance monitoring and audit trail generation",
1062
+ "PII detection and redaction",
1063
+ ]
1064
+ categories = data_categories or [
1065
+ "Identifiers (names, email addresses)",
1066
+ "AI model inputs and outputs",
1067
+ "Usage telemetry",
1068
+ ]
1069
+ subjects = data_subjects or [
1070
+ "Employees and contractors using AI tools",
1071
+ "End users interacting with AI-powered products",
1072
+ ]
1073
+ subs = sub_processors or ["None"]
1074
+ measures = security_measures or [
1075
+ "HMAC-SHA256 audit log chaining",
1076
+ "Encryption at rest (AES-256)",
1077
+ "Encryption in transit (TLS 1.3)",
1078
+ "Role-based access controls",
1079
+ "Automated PII detection and redaction",
1080
+ ]
1081
+
1082
+ document_id = str(uuid.uuid4())
1083
+ generated_at = datetime.now(timezone.utc).isoformat()
1084
+
1085
+ def _bullet_block(items: list[str], indent: int = 4) -> str:
1086
+ pad = " " * indent
1087
+ return "\n".join(f"{pad}* {item}" for item in items)
1088
+
1089
+ text = _DPA_TEMPLATE.format(
1090
+ controller_name=controller_name,
1091
+ controller_address=controller_address,
1092
+ processor_name=processor_name,
1093
+ processor_address=processor_address,
1094
+ project_id=project_id,
1095
+ purposes_block=_bullet_block(purposes),
1096
+ data_categories_block=_bullet_block(categories),
1097
+ data_subjects_block=_bullet_block(subjects),
1098
+ retention_period=retention_period,
1099
+ sub_processors_block=_bullet_block(subs),
1100
+ security_measures_block=_bullet_block(measures),
1101
+ transfer_mechanism=transfer_mechanism,
1102
+ scc_clauses=scc_clauses,
1103
+ document_id=document_id,
1104
+ generated_at=generated_at,
1105
+ doc_hmac=_hmac_sign(
1106
+ json.dumps(
1107
+ {
1108
+ "document_id": document_id,
1109
+ "project_id": project_id,
1110
+ "generated_at": generated_at,
1111
+ },
1112
+ sort_keys=True,
1113
+ ).encode(),
1114
+ _get_signing_key(),
1115
+ ),
1116
+ )
1117
+
1118
+ except (SFCECExportError, ValueError):
1119
+ raise
1120
+ except Exception as exc:
1121
+ raise SFCECExportError(str(exc)) from exc
1122
+
1123
+ return DPADocument(
1124
+ project_id=project_id,
1125
+ controller_name=controller_name,
1126
+ controller_address=controller_address,
1127
+ processor_name=processor_name,
1128
+ processor_address=processor_address,
1129
+ processing_purposes=purposes,
1130
+ data_categories=categories,
1131
+ data_subjects=subjects,
1132
+ sub_processors=subs,
1133
+ transfer_mechanism=transfer_mechanism,
1134
+ retention_period=retention_period,
1135
+ security_measures=measures,
1136
+ scc_clauses=scc_clauses,
1137
+ document_id=document_id,
1138
+ generated_at=generated_at,
1139
+ text=text,
1140
+ )
1141
+
1142
+ # ------------------------------------------------------------------
1143
+ # build_bundle_async (F-10)
1144
+ # ------------------------------------------------------------------
1145
+
1146
+ async def build_bundle_async(
1147
+ self,
1148
+ project_id: str,
1149
+ date_range: tuple,
1150
+ frameworks: list | None = None,
1151
+ ):
1152
+ """Async variant of :meth:`build_bundle` (F-10).
1153
+
1154
+ Runs :meth:`build_bundle` in a thread-pool executor via
1155
+ :func:`asyncio.run_in_executor`, making it safe to ``await``
1156
+ from async code without blocking the event loop.
1157
+
1158
+ Args:
1159
+ project_id: Project identifier to scope evidence collection.
1160
+ date_range: ``(from_date, to_date)`` ISO-8601 date strings.
1161
+ frameworks: List of regulatory framework identifiers to include.
1162
+
1163
+ Returns:
1164
+ :class:`~spanforge.sdk._types.BundleResult` — same as
1165
+ :meth:`build_bundle`.
1166
+ """
1167
+ import asyncio
1168
+ import functools
1169
+
1170
+ loop = asyncio.get_event_loop()
1171
+ return await loop.run_in_executor(
1172
+ None,
1173
+ functools.partial(self.build_bundle, project_id, date_range, frameworks),
1174
+ )
1175
+
1176
+ def get_status(self) -> CECStatusInfo:
1177
+ """Return sf-cec service health and session statistics.
1178
+
1179
+ Returns:
1180
+ :class:`~spanforge.sdk._types.CECStatusInfo`.
1181
+ """
1182
+ with self._lock:
1183
+ return CECStatusInfo(
1184
+ status="ok",
1185
+ byos_enabled=self._byos_provider is not None,
1186
+ bundle_count=self._stats.bundle_count,
1187
+ last_bundle_at=self._stats.last_bundle_at,
1188
+ frameworks_supported=sorted(SUPPORTED_FRAMEWORKS),
1189
+ )
1190
+
1191
+ # ------------------------------------------------------------------
1192
+ # Internal helpers (continued)
1193
+ # ------------------------------------------------------------------
1194
+
1195
+ def _get_chain_proof(self, project_id: str) -> dict[str, Any]:
1196
+ """Obtain verify_chain result from sf-audit, returning a stub on failure."""
1197
+ from spanforge.sdk.audit import SFAuditClient
1198
+
1199
+ audit = SFAuditClient(self._config)
1200
+ try:
1201
+ # Export all records for project then verify their chain
1202
+ raw = audit.export(project_id=project_id or None)
1203
+ result = audit.verify_chain(raw)
1204
+ if hasattr(result, "__dict__"):
1205
+ return {
1206
+ "valid": getattr(result, "valid", True),
1207
+ "record_count": getattr(result, "record_count", 0),
1208
+ "error": getattr(result, "error", None),
1209
+ }
1210
+ if isinstance(result, dict):
1211
+ return result
1212
+ return {"valid": True, "record_count": 0} # noqa: TRY300
1213
+ except Exception as exc: # pragma: no cover
1214
+ _log.debug("sf-cec: verify_chain failed: %s", exc)
1215
+ return {"valid": True, "record_count": 0, "note": "no records in store"}