spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1777 @@
1
+ Metadata-Version: 2.4
2
+ Name: spanforge
3
+ Version: 2.0.0
4
+ Summary: SpanForge — AI lifecycle and governance platform (RFC-0001 SPANFORGE)
5
+ Project-URL: Homepage, https://github.com/veerarag1973/spanforge
6
+ Project-URL: Documentation, https://github.com/veerarag1973/spanforge/blob/main/docs/index.md
7
+ Project-URL: Source, https://github.com/veerarag1973/spanforge
8
+ Project-URL: Bug Tracker, https://github.com/veerarag1973/spanforge/issues
9
+ Project-URL: Changelog, https://github.com/veerarag1973/spanforge/blob/main/docs/changelog.md
10
+ Author: LLM Toolkit Team
11
+ License: MIT
12
+ License-File: LICENSE
13
+ Keywords: agentic-ai,ai-compliance,ai-lifecycle,events,governance,llm,opentelemetry,schema,spanforge
14
+ Classifier: Development Status :: 5 - Production/Stable
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
+ Classifier: Topic :: System :: Logging
25
+ Classifier: Typing :: Typed
26
+ Requires-Python: >=3.9
27
+ Provides-Extra: all
28
+ Requires-Dist: anthropic>=0.25; extra == 'all'
29
+ Requires-Dist: crewai>=0.28; extra == 'all'
30
+ Requires-Dist: httpx>=0.27; extra == 'all'
31
+ Requires-Dist: jsonschema>=4.21; extra == 'all'
32
+ Requires-Dist: kafka-python>=2.0; extra == 'all'
33
+ Requires-Dist: langchain-core>=0.2; extra == 'all'
34
+ Requires-Dist: llama-index-core>=0.10; extra == 'all'
35
+ Requires-Dist: openai>=1.0; extra == 'all'
36
+ Requires-Dist: opentelemetry-sdk>=1.24; extra == 'all'
37
+ Requires-Dist: pydantic>=2.7; extra == 'all'
38
+ Requires-Dist: redis>=4.0; extra == 'all'
39
+ Provides-Extra: anthropic
40
+ Requires-Dist: anthropic>=0.25; extra == 'anthropic'
41
+ Provides-Extra: bedrock
42
+ Requires-Dist: boto3>=1.34; extra == 'bedrock'
43
+ Provides-Extra: compliance
44
+ Requires-Dist: reportlab>=4.0; extra == 'compliance'
45
+ Provides-Extra: crewai
46
+ Requires-Dist: crewai>=0.28; extra == 'crewai'
47
+ Provides-Extra: datadog
48
+ Provides-Extra: dev
49
+ Requires-Dist: httpx>=0.27; extra == 'dev'
50
+ Requires-Dist: hypothesis>=6.100; extra == 'dev'
51
+ Requires-Dist: jsonschema>=4.21; extra == 'dev'
52
+ Requires-Dist: mypy>=1.10; extra == 'dev'
53
+ Requires-Dist: pre-commit>=3.7; extra == 'dev'
54
+ Requires-Dist: pydantic>=2.7; extra == 'dev'
55
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
56
+ Requires-Dist: pytest-benchmark>=4.0; extra == 'dev'
57
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
58
+ Requires-Dist: pytest-xdist>=3.5; extra == 'dev'
59
+ Requires-Dist: pytest>=8.0; extra == 'dev'
60
+ Requires-Dist: ruff>=0.4.0; extra == 'dev'
61
+ Provides-Extra: docs
62
+ Requires-Dist: pydata-sphinx-theme>=0.15; extra == 'docs'
63
+ Requires-Dist: sphinx>=7.0; extra == 'docs'
64
+ Provides-Extra: gemini
65
+ Requires-Dist: google-generativeai>=0.5; extra == 'gemini'
66
+ Provides-Extra: groq
67
+ Requires-Dist: groq>=0.9; extra == 'groq'
68
+ Provides-Extra: http
69
+ Requires-Dist: httpx>=0.27; extra == 'http'
70
+ Provides-Extra: jsonschema
71
+ Requires-Dist: jsonschema>=4.21; extra == 'jsonschema'
72
+ Provides-Extra: kafka
73
+ Requires-Dist: kafka-python>=2.0; extra == 'kafka'
74
+ Provides-Extra: langchain
75
+ Requires-Dist: langchain-core>=0.2; extra == 'langchain'
76
+ Provides-Extra: llamaindex
77
+ Requires-Dist: llama-index-core>=0.10; extra == 'llamaindex'
78
+ Provides-Extra: ollama
79
+ Requires-Dist: ollama>=0.2; extra == 'ollama'
80
+ Provides-Extra: openai
81
+ Requires-Dist: openai>=1.0; extra == 'openai'
82
+ Provides-Extra: otel
83
+ Requires-Dist: opentelemetry-sdk>=1.24; extra == 'otel'
84
+ Provides-Extra: presidio
85
+ Requires-Dist: presidio-analyzer>=2.2; extra == 'presidio'
86
+ Requires-Dist: presidio-anonymizer>=2.2; extra == 'presidio'
87
+ Provides-Extra: pydantic
88
+ Requires-Dist: pydantic>=2.7; extra == 'pydantic'
89
+ Provides-Extra: redis
90
+ Requires-Dist: redis>=4.0; extra == 'redis'
91
+ Provides-Extra: together
92
+ Requires-Dist: together>=1.2; extra == 'together'
93
+ Provides-Extra: worm-gcs
94
+ Requires-Dist: google-cloud-storage>=2.14; extra == 'worm-gcs'
95
+ Provides-Extra: worm-s3
96
+ Requires-Dist: boto3>=1.34; extra == 'worm-s3'
97
+ Description-Content-Type: text/markdown
98
+
99
+ <h1 align="center">spanforge</h1>
100
+
101
+ <p align="center">
102
+ <strong>The AI Compliance Platform for Agentic Systems.</strong><br/>
103
+ Ship AI applications that are auditable, regulator-ready, and privacy-safe — from day one.
104
+ </p>
105
+
106
+ <p align="center">
107
+ <em>Built on <a href="https://www.getspanforge.com/standard">RFC-0001 — the SpanForge AI Compliance Standard</a> for agentic AI systems.</em>
108
+ </p>
109
+
110
+ <p align="center">
111
+ <img src="https://img.shields.io/badge/python-3.9%2B-4c8cbf?logo=python&logoColor=white" alt="Python 3.9+"/>
112
+ <a href="https://pypi.org/project/spanforge/"><img src="https://img.shields.io/pypi/v/spanforge?color=4c8cbf&logo=pypi&logoColor=white" alt="PyPI"/></a>
113
+ <a href="https://www.getspanforge.com/standard"><img src="https://img.shields.io/badge/standard-SpanForge_RFC--0001-4c8cbf" alt="spanforge RFC-0001"/></a>
114
+ <img src="https://img.shields.io/badge/coverage-92%25-brightgreen" alt="92% test coverage"/>
115
+ <img src="https://img.shields.io/badge/tests-3331%20passing-brightgreen" alt="3331 tests"/>
116
+ <img src="https://img.shields.io/badge/version-2.1.0-4c8cbf" alt="Version 2.1.0"/>
117
+ <img src="https://img.shields.io/badge/dependencies-zero-brightgreen" alt="Zero dependencies"/>
118
+ <a href="docs/index.md"><img src="https://img.shields.io/badge/docs-local-4c8cbf" alt="Documentation"/></a>
119
+ <img src="https://img.shields.io/badge/license-MIT-blue" alt="MIT license"/>
120
+ </p>
121
+
122
+ ---
123
+
124
+ ## The problem
125
+
126
+ You're building AI applications in a world where regulators are catching up fast. The EU AI Act is in force. GDPR applies to every LLM that touches personal data. SOC 2 auditors want evidence that your AI systems are governed. And your team is stitching together ad-hoc logs, hoping they'll hold up in an audit.
127
+
128
+ **spanforge** solves this. It is a **compliance-first platform** — not a monitoring add-on — that gives every AI action in your stack a cryptographically signed, privacy-safe, regulator-ready record.
129
+
130
+ ---
131
+
132
+ ## What spanforge does
133
+
134
+ <table>
135
+ <tr>
136
+ <td width="50%">
137
+
138
+ ### Compliance & Regulatory Mapping
139
+ - Map telemetry to **EU AI Act**, **GDPR**, **SOC 2**, **HIPAA**, **ISO 42001**, **NIST AI RMF** clauses automatically
140
+ - Generate HMAC-signed **evidence packages** with gap analysis
141
+ - Track **consent boundaries**, **HITL oversight**, **model registry** governance, and **explainability** coverage
142
+ - Produce audit-ready attestations with model owner, risk tier, and status metadata
143
+
144
+ </td>
145
+ <td width="50%">
146
+
147
+ ### Privacy & Audit Infrastructure
148
+ - **PII redaction** — detect and strip sensitive data before it leaves your app
149
+ - **HMAC audit chains** — tamper-evident, blockchain-style event signing
150
+ - **GDPR subject erasure** — right-to-erasure with tombstone events that preserve chain integrity
151
+ - **Air-gapped deployment** — runs fully offline with zero egress
152
+
153
+ </td>
154
+ </tr>
155
+ <tr>
156
+ <td>
157
+
158
+ ### Governance & Controls
159
+ - **Consent boundary monitoring** — `consent.granted`, `consent.revoked`, `consent.violation` events
160
+ - **Human-in-the-loop hooks** — `hitl.queued`, `hitl.reviewed`, `hitl.escalated`, `hitl.timeout` events
161
+ - **Model registry** — register, deprecate, retire models; attestations auto-warn on ungoverned models
162
+ - **Explainability tracking** — measure what % of AI decisions have explanations attached
163
+
164
+ </td>
165
+ <td>
166
+
167
+ ### Developer Experience
168
+ - **Zero required dependencies** — pure Python 3.9+ stdlib
169
+ - **One-line setup** — `spanforge.configure()` and you're compliant
170
+ - **Auto-instrumentation** — patch OpenAI, Anthropic, LangChain, CrewAI, and more
171
+ - **18 CLI commands** — compliance checks, PII scans, audit-chain verification, all CI-ready
172
+
173
+ </td>
174
+ </tr>
175
+ </table>
176
+
177
+ ---
178
+
179
+ ## How it compares
180
+
181
+ spanforge is the only **open-standard, zero-dependency AI compliance platform**. Other tools are monitoring platforms that bolt on compliance as an afterthought. spanforge is compliance infrastructure that happens to capture the telemetry needed to prove it.
182
+
183
+ | Capability | **spanforge** | LangSmith | Langfuse | OpenLLMetry | Arize Phoenix |
184
+ |---|:---:|:---:|:---:|:---:|:---:|
185
+ | Regulatory framework mapping (EU AI Act, GDPR, SOC 2…) | ✅ | ❌ | ❌ | ❌ | ❌ |
186
+ | HMAC-signed evidence packages & attestations | ✅ | ❌ | ❌ | ❌ | ❌ |
187
+ | Consent boundary monitoring | ✅ | ❌ | ❌ | ❌ | ❌ |
188
+ | Human-in-the-loop compliance events | ✅ | ❌ | ❌ | ❌ | ❌ |
189
+ | Model registry with risk-tier governance | ✅ | ❌ | ❌ | ❌ | ❌ |
190
+ | Explainability coverage metrics | ✅ | ❌ | ❌ | ❌ | ❌ |
191
+ | Built-in PII redaction | ✅ | ❌ | ❌ | ❌ | ❌ |
192
+ | Tamper-proof audit chain | ✅ | ❌ | ❌ | ❌ | ❌ |
193
+ | GDPR subject erasure (right-to-erasure) | ✅ | ❌ | ❌ | ❌ | ❌ |
194
+ | Works fully offline / air-gapped | ✅ | ❌ | Self-host | Partial | Self-host |
195
+ | Open schema standard (RFC-driven) | ✅ | ❌ | ❌ | Partial | ❌ |
196
+ | Zero required dependencies | ✅ | ❌ | ❌ | ❌ | ❌ |
197
+ | OTLP export (any OTel backend) | ✅ | ❌ | ❌ | ✅ | ✅ |
198
+ | MIT license, no call-home | ✅ | Partial | ✅ | ✅ | ✅ |
199
+
200
+ > **Bottom line**: Others help you *watch* your AI. spanforge helps you *govern* it.
201
+
202
+ ---
203
+
204
+ ## Install
205
+
206
+ ```bash
207
+ pip install spanforge
208
+ ```
209
+
210
+ **Requires Python 3.9+.** Zero mandatory dependencies.
211
+
212
+ ### Optional extras
213
+
214
+ ```bash
215
+ pip install "spanforge[openai]" # OpenAI auto-instrumentation
216
+ pip install "spanforge[langchain]" # LangChain callback handler
217
+ pip install "spanforge[crewai]" # CrewAI callback handler
218
+ pip install "spanforge[http]" # Webhook + OTLP export
219
+ pip install "spanforge[datadog]" # Datadog APM + metrics
220
+ pip install "spanforge[kafka]" # Kafka EventStream source
221
+ pip install "spanforge[pydantic]" # Pydantic v2 model layer
222
+ pip install "spanforge[otel]" # OpenTelemetry SDK integration
223
+ pip install "spanforge[jsonschema]" # Strict JSON Schema validation
224
+ pip install "spanforge[llamaindex]" # LlamaIndex event handler
225
+ pip install "spanforge[all]" # everything above
226
+ ```
227
+
228
+ ---
229
+
230
+ ## Quick start — compliance in 5 minutes
231
+
232
+ ### 1. Configure and instrument
233
+
234
+ ```python
235
+ import spanforge
236
+
237
+ spanforge.configure(
238
+ service_name="my-agent",
239
+ signing_key="your-org-secret", # HMAC audit chain — tamper-proof
240
+ redaction_policy="gdpr", # PII stripped before export
241
+ exporter="jsonl",
242
+ endpoint="audit.jsonl",
243
+ )
244
+ ```
245
+
246
+ Every event your app emits is now **signed**, **PII-redacted**, and **stored** — with zero per-call boilerplate.
247
+
248
+ ### 2. Trace AI decisions
249
+
250
+ ```python
251
+ with spanforge.start_trace("loan-approval-agent") as trace:
252
+ with trace.llm_call("gpt-4o", temperature=0.2) as span:
253
+ decision = call_llm(prompt)
254
+ span.set_token_usage(input=512, output=200, total=712)
255
+ span.set_status("ok")
256
+ ```
257
+
258
+ ### 3. Generate compliance evidence
259
+
260
+ ```python
261
+ from spanforge.core.compliance_mapping import ComplianceMappingEngine
262
+
263
+ engine = ComplianceMappingEngine()
264
+ package = engine.generate_evidence_package(
265
+ model_id="gpt-4o",
266
+ framework="eu_ai_act",
267
+ from_date="2026-01-01",
268
+ to_date="2026-03-31",
269
+ audit_events=events,
270
+ )
271
+
272
+ print(package.attestation.coverage_pct) # e.g. 87.5%
273
+ print(package.attestation.explanation_coverage_pct) # e.g. 75.0%
274
+ print(package.attestation.model_risk_tier) # e.g. "high"
275
+ print(package.gap_report) # what's missing
276
+ ```
277
+
278
+ Or from the CLI:
279
+
280
+ ```bash
281
+ spanforge compliance generate \
282
+ --model gpt-4o \
283
+ --framework eu_ai_act \
284
+ --from 2026-01-01 --to 2026-03-31 \
285
+ audit.jsonl
286
+ ```
287
+
288
+ ### 4. Hand to your auditor
289
+
290
+ The evidence package contains:
291
+ - **Clause mappings** — which telemetry events satisfy which regulatory clauses
292
+ - **Gap analysis** — which clauses lack evidence and need attention
293
+ - **HMAC-signed attestation** — cryptographic proof the evidence hasn't been tampered with
294
+ - **Model governance metadata** — owner, risk tier, status, warnings for deprecated/retired models
295
+ - **Explanation coverage** — percentage of AI decisions with explainability records
296
+
297
+ ---
298
+
299
+ ## Regulatory framework coverage
300
+
301
+ The `ComplianceMappingEngine` maps your telemetry events to specific regulatory clauses:
302
+
303
+ | Framework | Clause | Mapped events | What it proves |
304
+ |-----------|--------|---------------|----------------|
305
+ | **GDPR** | Art. 22 | `consent.*`, `hitl.*` | Automated decisions have consent + human oversight |
306
+ | **GDPR** | Art. 25 | `llm.redact.*`, `consent.*` | Privacy by design — PII handled before export |
307
+ | **EU AI Act** | Art. 13 | `explanation.*` | AI decisions are transparent and explainable |
308
+ | **EU AI Act** | Art. 14 | `hitl.*`, `consent.*` | Human oversight of high-risk AI |
309
+ | **EU AI Act** | Annex IV.5 | `llm.guard.*`, `llm.audit.*`, `hitl.*` | Technical documentation — safety + oversight |
310
+ | **SOC 2** | CC6.1 | `llm.audit.*`, `llm.trace.*`, `model_registry.*` | Logical access controls + model governance |
311
+ | **NIST AI RMF** | MAP 1.1 | `llm.trace.*`, `llm.eval.*`, `model_registry.*`, `explanation.*` | Risk identification and mapping |
312
+ | **HIPAA** | §164.312 | `llm.redact.*`, `llm.audit.*` | PHI access controls and audit |
313
+ | **ISO 42001** | A.5–A.10 | Full event set | AI management system controls |
314
+
315
+ ---
316
+
317
+ ## Compliance event types
318
+
319
+ spanforge defines purpose-built event types for AI governance — these aren't afterthought log messages, they are first-class compliance primitives:
320
+
321
+ | Category | Event types | Purpose |
322
+ |----------|------------|---------|
323
+ | **Consent** | `consent.granted`, `consent.revoked`, `consent.violation` | Track user consent for automated processing |
324
+ | **Human-in-the-Loop** | `hitl.queued`, `hitl.reviewed`, `hitl.escalated`, `hitl.timeout` | Prove human oversight of AI decisions |
325
+ | **Model Registry** | `model_registry.registered`, `model_registry.deprecated`, `model_registry.retired` | Govern model lifecycle and risk |
326
+ | **Explainability** | `explanation.generated` | Attach explanations to AI decisions |
327
+ | **Guardrails** | `llm.guard.*` | Safety classifier outputs and block decisions |
328
+ | **PII** | `llm.redact.*` | Audit trail of what PII was found and removed |
329
+ | **Audit** | `llm.audit.*` | Access logs and chain-of-custody records |
330
+ | **Traces** | `llm.trace.*` | Model calls, tokens, latency, cost |
331
+
332
+ ---
333
+
334
+ ## Core capabilities
335
+
336
+ ### Tamper-proof audit chains
337
+
338
+ Every event is HMAC-SHA256 signed and chained to its predecessor — the same principle as certificate chains. Alter one event and the entire chain breaks.
339
+
340
+ ```python
341
+ from spanforge.signing import AuditStream, verify_chain
342
+
343
+ stream = AuditStream(org_secret="your-secret")
344
+ for event in events:
345
+ stream.append(event)
346
+
347
+ result = verify_chain(stream.events, org_secret="your-secret")
348
+ assert result.valid # any tampering → False
349
+ ```
350
+
351
+ ### PII redaction
352
+
353
+ Strip personal data before events leave your application boundary. Deep scanning with Luhn validation for credit card numbers.
354
+
355
+ ```python
356
+ from spanforge.redact import RedactionPolicy, Sensitivity
357
+
358
+ policy = RedactionPolicy(min_sensitivity=Sensitivity.PII, redacted_by="policy:gdpr-v1")
359
+ result = policy.apply(event)
360
+ # All PII fields → "[REDACTED by policy:gdpr-v1]"
361
+ ```
362
+
363
+ ### Model registry governance
364
+
365
+ Register models with ownership and risk metadata. Attestations automatically warn when models are deprecated, retired, or unregistered.
366
+
367
+ ```python
368
+ from spanforge.model_registry import ModelRegistry
369
+
370
+ registry = ModelRegistry()
371
+ registry.register("gpt-4o", owner="ml-platform", risk_tier="high")
372
+ registry.deprecate("gpt-3.5-turbo", reason="Successor available")
373
+
374
+ # Evidence packages now include:
375
+ # model_owner: "ml-platform"
376
+ # model_risk_tier: "high"
377
+ # model_status: "active"
378
+ # model_warnings: [] (or ["model 'gpt-3.5-turbo' is deprecated"])
379
+ ```
380
+
381
+ ### Explainability tracking
382
+
383
+ Measure what percentage of your AI decisions have explanations attached:
384
+
385
+ ```python
386
+ from spanforge.explain import generate_explanation
387
+
388
+ explanation = generate_explanation(
389
+ decision_event_id="evt_01HX...",
390
+ method="feature_importance",
391
+ content="Top factors: credit_score (0.42), income (0.31)...",
392
+ )
393
+ # explanation_coverage_pct in attestations = explained / total decisions
394
+ ```
395
+
396
+ ### GDPR subject erasure
397
+
398
+ Right-to-erasure with tombstone events that preserve audit chain integrity:
399
+
400
+ ```bash
401
+ spanforge audit erase audit.jsonl --subject-id user123
402
+ ```
403
+
404
+ ---
405
+
406
+ ## Auto-instrumentation
407
+
408
+ Patch supported providers once — compliance data flows automatically:
409
+
410
+ ```python
411
+ # Instrument all installed providers in one call
412
+ import spanforge.auto
413
+ spanforge.auto.setup()
414
+
415
+ # Or patch individually
416
+ from spanforge.integrations import openai as sf_openai
417
+ sf_openai.patch() # every OpenAI call → signed, redacted, compliant
418
+ sf_openai.unpatch() # restore original behaviour
419
+ ```
420
+
421
+ **Supported providers:** OpenAI, Anthropic, Ollama, Groq, Together AI
422
+
423
+ **Supported frameworks:** LangChain, LlamaIndex, CrewAI
424
+
425
+ ---
426
+
427
+ ## Using spanforge alongside OpenTelemetry
428
+
429
+ spanforge is not an OTel replacement. OTel handles performance monitoring. spanforge adds the compliance layer OTel cannot provide — audit chains, PII redaction, consent tracking, and regulator-ready attestations.
430
+
431
+ ```python
432
+ # Your existing OTel pipeline stays untouched
433
+ from opentelemetry.sdk.trace import TracerProvider
434
+ provider = TracerProvider()
435
+
436
+ # Add spanforge's compliance layer alongside it
437
+ import spanforge
438
+ spanforge.configure(mode="otel_passthrough")
439
+
440
+ # Dual-stream: OTel for monitoring, spanforge for compliance
441
+ spanforge.configure(exporters=["otel_passthrough", "jsonl"], endpoint="audit.jsonl")
442
+ ```
443
+
444
+ ---
445
+
446
+ ## Export
447
+
448
+ Ship compliance events to any backend:
449
+
450
+ ```python
451
+ from spanforge.stream import EventStream
452
+ from spanforge.export.jsonl import JSONLExporter
453
+ from spanforge.export.otlp import OTLPExporter
454
+ from spanforge.export.datadog import DatadogExporter
455
+ from spanforge.export.grafana import GrafanaLokiExporter
456
+ from spanforge.export.cloud import CloudExporter
457
+
458
+ stream = EventStream(events)
459
+
460
+ await stream.drain(JSONLExporter("audit.jsonl")) # local file
461
+ await stream.drain(OTLPExporter("http://collector:4318/v1/traces")) # OTel collector
462
+ await stream.drain(DatadogExporter(service="my-app")) # Datadog APM
463
+ await stream.drain(GrafanaLokiExporter(url="http://loki:3100")) # Grafana Loki
464
+ await stream.drain(CloudExporter(api_key="sf_live_xxx")) # spanforge Cloud
465
+ ```
466
+
467
+ Fan-out routing for compliance alerting:
468
+
469
+ ```python
470
+ from spanforge.export.webhook import WebhookExporter
471
+
472
+ # Route guardrail violations to Slack
473
+ await stream.route(
474
+ WebhookExporter("https://hooks.slack.com/your-webhook"),
475
+ predicate=lambda e: e.event_type == "llm.guard.output.blocked",
476
+ )
477
+ ```
478
+
479
+ ---
480
+
481
+ ## CLI
482
+
483
+ 18 commands — all CI-pipeline ready:
484
+
485
+ ```bash
486
+ # Compliance
487
+ spanforge compliance generate --model gpt-4o --framework eu_ai_act \
488
+ --from 2026-01-01 --to 2026-03-31 events.jsonl
489
+ spanforge compliance check evidence.json
490
+ spanforge compliance validate-attestation evidence.json
491
+
492
+ # Audit chain
493
+ spanforge audit-chain events.jsonl # verify chain integrity
494
+ spanforge audit erase events.jsonl --subject-id user123 # GDPR erasure
495
+ spanforge audit rotate-key events.jsonl # key rotation
496
+ spanforge audit verify --input events.jsonl # verify integrity
497
+
498
+ # Privacy
499
+ spanforge scan events.jsonl --fail-on-match # CI-gate PII scan
500
+
501
+ # Validation
502
+ spanforge check # end-to-end health check
503
+ spanforge check-compat events.json # v2.0 compatibility
504
+ spanforge validate events.jsonl # JSON Schema validation
505
+
506
+ # Analysis
507
+ spanforge stats events.jsonl # counts, tokens, cost
508
+ spanforge inspect <EVENT_ID> events.jsonl # pretty-print one event
509
+ spanforge cost events.jsonl # token spend report
510
+
511
+ # Schema management
512
+ spanforge migrate events.jsonl --sign # v1→v2 migration
513
+ spanforge list-deprecated # deprecated event types
514
+ spanforge migration-roadmap # v2 migration plan
515
+ spanforge check-consumers # consumer compatibility
516
+
517
+ # Viewer
518
+ spanforge serve # local SPA trace viewer
519
+ spanforge ui # standalone HTML viewer
520
+ ```
521
+
522
+ ---
523
+
524
+ ## Event namespaces
525
+
526
+ Every event carries a typed ``payload``. The built-in namespaces:
527
+
528
+ | Prefix | Dataclass | What it records |
529
+ |---|---|---|
530
+ | `consent.*` | — | User consent grants, revocations, violations |
531
+ | `hitl.*` | — | Human-in-the-loop review, escalation, timeout |
532
+ | `model_registry.*` | — | Model registration, deprecation, retirement |
533
+ | `explanation.*` | — | Explainability records for AI decisions |
534
+ | `llm.trace.*` | `SpanPayload` | Model calls — tokens, latency, cost **(frozen v2)** |
535
+ | `llm.guard.*` | `GuardPayload` | Safety classifier outputs, block decisions |
536
+ | `llm.redact.*` | `RedactPayload` | PII audit — what was found and removed |
537
+ | `llm.audit.*` | — | Access logs and chain-of-custody |
538
+ | `llm.eval.*` | `EvalScenarioPayload` | Scores, labels, evaluator identity |
539
+ | `llm.cost.*` | `CostPayload` | Per-call cost in USD |
540
+ | `llm.cache.*` | `CachePayload` | Cache hit/miss, backend, TTL |
541
+ | `llm.prompt.*` | `PromptPayload` | Prompt template version, rendered text |
542
+ | `llm.fence.*` | `FencePayload` | Topic constraints, allow/block lists |
543
+ | `llm.diff.*` | `DiffPayload` | Prompt/response delta between events |
544
+ | `llm.template.*` | `TemplatePayload` | Template registry metadata |
545
+
546
+ ---
547
+
548
+ ## Architecture
549
+
550
+ ```
551
+ spanforge/
552
+ ├── core/
553
+ │ └── compliance_mapping.py ← ComplianceMappingEngine, evidence packages, attestations
554
+ ├── compliance/ ← Programmatic compliance test suite
555
+ ├── signing.py ← HMAC audit chains, key management, multi-tenant KeyResolver
556
+ ├── redact.py ← PII detection + redaction policies
557
+ ├── model_registry.py ← Model lifecycle governance
558
+ ├── explain.py ← Explainability records
559
+ ├── consent.py ← Consent boundary events
560
+ ├── hitl.py ← Human-in-the-loop events
561
+ ├── governance.py ← Policy-based event gating
562
+ ├── event.py ← Event envelope
563
+ ├── types.py ← EventType enum (consent.*, hitl.*, model_registry.*, explanation.*, llm.*)
564
+ ├── config.py ← configure() / get_config()
565
+ ├── _span.py ← Span, AgentRun, AgentStep context managers
566
+ ├── _trace.py ← Trace + start_trace()
567
+ ├── _tracer.py ← Top-level tracing entry point
568
+ ├── _stream.py ← Internal dispatch: sample → redact → sign → export
569
+ ├── _store.py ← TraceStore ring buffer
570
+ ├── _hooks.py ← HookRegistry (lifecycle hooks)
571
+ ├── _server.py ← HTTP server (/traces, /compliance/summary)
572
+ ├── _cli.py ← 18 CLI sub-commands
573
+ ├── cost.py ← CostTracker, BudgetMonitor, @budget_alert
574
+ ├── cache.py ← SemanticCache, @cached decorator
575
+ ├── retry.py ← @retry, FallbackChain, CircuitBreaker
576
+ ├── toolsmith.py ← @tool, ToolRegistry
577
+ ├── lint/ ← AST-based instrumentation linter (AO001–AO005)
578
+ ├── export/ ← JSONL, OTLP, Webhook, Datadog, Grafana Loki, Cloud
579
+ ├── integrations/ ← OpenAI, Anthropic, LangChain, LlamaIndex, CrewAI, Ollama, Groq, Together
580
+ ├── namespaces/ ← Typed payload dataclasses
581
+ └── migrate.py ← Schema migration (v1 → v2)
582
+ ```
583
+
584
+ ---
585
+
586
+ ## What is inside the box
587
+
588
+ <table>
589
+ <thead>
590
+ <tr><th>Module</th><th>What it does</th><th>For whom</th></tr>
591
+ </thead>
592
+ <tbody>
593
+ <tr>
594
+ <td><strong>Compliance & Governance</strong></td><td colspan="2"></td>
595
+ </tr>
596
+ <tr>
597
+ <td><code>spanforge.compliance</code></td>
598
+ <td><code>ComplianceMappingEngine</code> maps telemetry to regulatory frameworks (EU AI Act, ISO 42001, NIST AI RMF, GDPR, SOC 2, HIPAA). Generates evidence packages with HMAC-signed attestations. Consent, HITL, model registry, and explainability events integrated into clause mappings. Attestations include model owner, risk tier, status, warnings, and <code>explanation_coverage_pct</code>. Also: programmatic v2.0 compatibility checks — no pytest required.</td>
599
+ <td>Compliance / legal / platform teams</td>
600
+ </tr>
601
+ <tr>
602
+ <td><code>spanforge.signing</code></td>
603
+ <td>HMAC-SHA256 event signing, tamper-evident audit chains, key strength validation, key expiry checks, environment-isolated key derivation, multi-tenant <code>KeyResolver</code> protocol, and <code>AsyncAuditStream</code></td>
604
+ <td>Security / compliance teams</td>
605
+ </tr>
606
+ <tr>
607
+ <td><code>spanforge.redact</code></td>
608
+ <td>PII detection, sensitivity levels, redaction policies, deep <code>scan_payload()</code> with Luhn validation, and <code>contains_pii()</code> / <code>assert_redacted()</code> with raw string scanning</td>
609
+ <td>Data privacy / GDPR teams</td>
610
+ </tr>
611
+ <tr>
612
+ <td><code>spanforge.governance</code></td>
613
+ <td>Policy-based event gating — block prohibited types, warn on deprecated usage, enforce custom rules</td>
614
+ <td>Platform / compliance teams</td>
615
+ </tr>
616
+ <tr>
617
+ <td><strong>Instrumentation & Tracing</strong></td><td colspan="2"></td>
618
+ </tr>
619
+ <tr>
620
+ <td><code>spanforge.event</code></td>
621
+ <td>The core <code>Event</code> envelope — the one structure all tools share</td>
622
+ <td>Everyone</td>
623
+ </tr>
624
+ <tr>
625
+ <td><code>spanforge.types</code></td>
626
+ <td>All built-in event types — compliance events (<code>consent.*</code>, <code>hitl.*</code>, <code>model_registry.*</code>, <code>explanation.*</code>) and telemetry events (<code>llm.trace.*</code>, <code>llm.guard.*</code>, etc.)</td>
627
+ <td>Everyone</td>
628
+ </tr>
629
+ <tr>
630
+ <td><code>spanforge._span</code></td>
631
+ <td>Span, AgentRun, AgentStep context managers. <code>contextvars</code>-based async/thread-safe propagation. <code>async with</code>, <code>span.add_event()</code>, <code>span.set_timeout_deadline()</code></td>
632
+ <td>App developers</td>
633
+ </tr>
634
+ <tr>
635
+ <td><code>spanforge._trace</code></td>
636
+ <td><code>Trace</code> + <code>start_trace()</code> — high-level tracing entry point; accumulates child spans</td>
637
+ <td>App developers</td>
638
+ </tr>
639
+ <tr>
640
+ <td><code>spanforge.config</code></td>
641
+ <td><code>configure()</code> and <code>get_config()</code> — signing key, redaction policy, exporters, sample rate</td>
642
+ <td>Everyone</td>
643
+ </tr>
644
+ <tr>
645
+ <td><strong>Export & Integration</strong></td><td colspan="2"></td>
646
+ </tr>
647
+ <tr>
648
+ <td><code>spanforge.export</code></td>
649
+ <td>Ship events to JSONL, HTTP webhooks, OTLP collectors, Datadog APM, Grafana Loki, or spanforge Cloud</td>
650
+ <td>Infra / compliance teams</td>
651
+ </tr>
652
+ <tr>
653
+ <td><code>spanforge.stream</code></td>
654
+ <td>Fan-out router — one <code>drain()</code> call reaches multiple backends; Kafka source</td>
655
+ <td>Platform engineers</td>
656
+ </tr>
657
+ <tr>
658
+ <td><code>spanforge.integrations</code></td>
659
+ <td>Auto-instrumentation for OpenAI, Anthropic, LangChain, LlamaIndex, CrewAI, Groq, Ollama, Together</td>
660
+ <td>App developers</td>
661
+ </tr>
662
+ <tr>
663
+ <td><code>spanforge.auto</code></td>
664
+ <td><code>setup()</code> auto-patches all installed LLM integrations; <code>teardown()</code> cleanly unpatches</td>
665
+ <td>App developers</td>
666
+ </tr>
667
+ <tr>
668
+ <td><strong>Developer Tools</strong></td><td colspan="2"></td>
669
+ </tr>
670
+ <tr>
671
+ <td><code>spanforge.cost</code></td>
672
+ <td><code>CostTracker</code>, <code>BudgetMonitor</code>, <code>@budget_alert</code> — track and alert on token spend</td>
673
+ <td>App developers / FinOps</td>
674
+ </tr>
675
+ <tr>
676
+ <td><code>spanforge.cache</code></td>
677
+ <td><code>SemanticCache</code> + <code>@cached</code> — deduplicate LLM calls via cosine similarity; <code>InMemoryBackend</code>, <code>SQLiteBackend</code>, <code>RedisBackend</code></td>
678
+ <td>App developers / FinOps</td>
679
+ </tr>
680
+ <tr>
681
+ <td><code>spanforge.retry</code></td>
682
+ <td><code>@retry</code>, <code>FallbackChain</code>, <code>CircuitBreaker</code>, <code>CostAwareRouter</code> — resilient LLM routing with compliance events</td>
683
+ <td>App developers / SREs</td>
684
+ </tr>
685
+ <tr>
686
+ <td><code>spanforge.toolsmith</code></td>
687
+ <td><code>@tool</code> + <code>ToolRegistry</code> — register functions as typed tools; render JSON schemas for function-calling APIs</td>
688
+ <td>App developers</td>
689
+ </tr>
690
+ <tr>
691
+ <td><code>spanforge.lint</code></td>
692
+ <td>AST-based instrumentation linter; AO001–AO005 codes; flake8 plugin; CLI</td>
693
+ <td>All teams / CI</td>
694
+ </tr>
695
+ <tr>
696
+ <td><code>spanforge.debug</code></td>
697
+ <td><code>print_tree()</code>, <code>summary()</code>, <code>visualize()</code> — terminal tree, stats dict, HTML Gantt timeline</td>
698
+ <td>App developers</td>
699
+ </tr>
700
+ <tr>
701
+ <td><code>spanforge.metrics</code></td>
702
+ <td><code>aggregate()</code> — success rates, latency percentiles, token totals, cost breakdowns</td>
703
+ <td>Analytics engineers</td>
704
+ </tr>
705
+ <tr>
706
+ <td><code>spanforge.testing</code></td>
707
+ <td><code>MockExporter</code>, <code>capture_events()</code>, <code>assert_event_schema_valid()</code>, <code>trace_store()</code></td>
708
+ <td>Test authors</td>
709
+ </tr>
710
+ <tr>
711
+ <td><code>spanforge.validate</code></td>
712
+ <td>JSON Schema validation against the published v2.0 schema</td>
713
+ <td>All teams</td>
714
+ </tr>
715
+ <tr>
716
+ <td><code>spanforge.namespaces</code></td>
717
+ <td>Typed payload dataclasses for all built-in event namespaces</td>
718
+ <td>Tool authors</td>
719
+ </tr>
720
+ <tr>
721
+ <td><code>spanforge.models</code></td>
722
+ <td>Optional Pydantic v2 models for validated schemas</td>
723
+ <td>API / backend teams</td>
724
+ </tr>
725
+ <tr>
726
+ <td><code>spanforge.consumer</code></td>
727
+ <td>Declare schema-namespace dependencies; fail fast at startup if version requirements are not met</td>
728
+ <td>Platform teams</td>
729
+ </tr>
730
+ <tr>
731
+ <td><code>spanforge.deprecations</code></td>
732
+ <td>Per-event-type deprecation notices at runtime</td>
733
+ <td>Library maintainers</td>
734
+ </tr>
735
+ <tr>
736
+ <td><code>spanforge._hooks</code></td>
737
+ <td>Lifecycle hooks: <code>@hooks.on_llm_call</code>, <code>@hooks.on_tool_call</code>, <code>@hooks.on_agent_start</code> (sync + async)</td>
738
+ <td>App developers / platform</td>
739
+ </tr>
740
+ <tr>
741
+ <td><code>spanforge._store</code></td>
742
+ <td><code>TraceStore</code> ring buffer — <code>get_trace()</code>, <code>list_tool_calls()</code>, <code>list_llm_calls()</code></td>
743
+ <td>Platform / tooling engineers</td>
744
+ </tr>
745
+ <tr>
746
+ <td><code>spanforge._cli</code></td>
747
+ <td>18 CLI sub-commands: compliance, audit, scan, validate, stats, serve, ui, and more</td>
748
+ <td>DevOps / CI teams</td>
749
+ </tr>
750
+ </tbody>
751
+ </table>
752
+
753
+ ---
754
+
755
+ ## Quality
756
+
757
+ - **3 331 tests** passing (10 skipped) — unit, integration, property-based (Hypothesis), performance benchmarks
758
+ - **≥ 92 % line and branch coverage** — 90 % minimum enforced in CI
759
+ - **Zero required dependencies** — entire core runs on Python stdlib
760
+ - **Typed** — full `py.typed` marker; mypy + pyright clean
761
+ - **Frozen v2 trace schema** — `llm.trace.*` payload fields never break between minor releases
762
+ - **Async-safe** — `contextvars`-based context propagation across asyncio, threads, and executors
763
+
764
+ ---
765
+
766
+ ## Development
767
+
768
+ ```bash
769
+ git clone https://github.com/veerarag1973/spanforge.git
770
+ cd spanforge
771
+ python -m venv .venv && .venv\Scripts\activate
772
+ pip install -e ".[dev]"
773
+ pytest # 3 331 tests
774
+ ```
775
+
776
+ <details>
777
+ <summary><strong>Code quality</strong></summary>
778
+
779
+ ```bash
780
+ ruff check . && ruff format .
781
+ mypy spanforge
782
+ pytest --cov # >=90% required
783
+ ```
784
+
785
+ </details>
786
+
787
+ <details>
788
+ <summary><strong>Build docs</strong></summary>
789
+
790
+ ```bash
791
+ pip install -e ".[docs]"
792
+ cd docs && sphinx-build -b html . _build/html
793
+ ```
794
+
795
+ </details>
796
+
797
+ ---
798
+
799
+ ## Versioning
800
+
801
+ spanforge implements **RFC-0001** (AI Compliance Standard for Agentic AI Systems). Current schema version: **2.0**.
802
+
803
+ This project follows [Semantic Versioning](https://semver.org/). The `llm.trace.*` namespace is additionally **frozen at v2** — even major releases won't remove fields from `SpanPayload`, `AgentRunPayload`, or `AgentStepPayload`.
804
+
805
+ See [docs/changelog.md](docs/changelog.md) for the full version history.
806
+
807
+ ---
808
+
809
+ ## Contributing
810
+
811
+ Contributions welcome — see the [Contributing Guide](docs/contributing.md). All new code must maintain ≥ 90 % coverage. Run `ruff` and `mypy` before submitting.
812
+
813
+ ---
814
+
815
+ ## Community
816
+
817
+ - **[Discussions](https://github.com/veerarag1973/spanforge/discussions)** — questions, ideas, show-and-tell
818
+ - **[Issues](https://github.com/veerarag1973/spanforge/issues)** — bug reports and feature requests
819
+ - **[SECURITY.md](SECURITY.md)** — responsible disclosure process
820
+ - **[Code of Conduct](CODE_OF_CONDUCT.md)** — Contributor Covenant v2.1
821
+
822
+ > Topics: `ai-compliance` `ai-governance` `eu-ai-act` `gdpr` `soc2` `audit-trail` `pii-redaction` `hmac-signing` `llm-governance` `python`
823
+
824
+ ---
825
+
826
+ ## License
827
+
828
+ [MIT](LICENSE) — free for personal and commercial use.
829
+
830
+ ---
831
+
832
+ <p align="center">
833
+ Built for teams that take AI governance seriously.<br/>
834
+ <a href="docs/index.md">Docs</a> ·
835
+ <a href="docs/quickstart.md">Quickstart</a> ·
836
+ <a href="docs/api/index.md">API Reference</a> ·
837
+ <a href="https://github.com/veerarag1973/spanforge/discussions">Discussions</a> ·
838
+ <a href="https://github.com/veerarag1973/spanforge/issues">Report a bug</a>
839
+ </p>
840
+ <h1 align="center">spanforge</h1>
841
+
842
+ <p align="center">
843
+ <strong>The reference implementation of the spanforge Standard.</strong><br/>
844
+ A lightweight Python SDK that gives your AI applications a common, structured way to record, sign, redact, and export events — with zero mandatory dependencies.
845
+ </p>
846
+
847
+ <p align="center">
848
+ <em>spanforge (RFC-0001) is the open event-schema standard for compliance and governance of agentic AI systems.</em>
849
+ </p>
850
+
851
+ <p align="center">
852
+ <img src="https://img.shields.io/badge/python-3.9%2B-4c8cbf?logo=python&logoColor=white" alt="Python 3.9+"/>
853
+ <a href="https://pypi.org/project/spanforge/"><img src="https://img.shields.io/pypi/v/spanforge?color=4c8cbf&logo=pypi&logoColor=white" alt="PyPI"/></a>
854
+ <a href="https://www.getspanforge.com/standard"><img src="https://img.shields.io/badge/standard-SpanForge_RFC--0001-4c8cbf" alt="spanforge RFC-0001"/></a>
855
+ <img src="https://img.shields.io/badge/coverage-92%25-brightgreen" alt="92% test coverage"/>
856
+ <img src="https://img.shields.io/badge/tests-3162%20passing-brightgreen" alt="3162 tests"/>
857
+ <img src="https://img.shields.io/badge/version-1.0.0-4c8cbf" alt="Version 1.0.0"/>
858
+ <img src="https://img.shields.io/badge/dependencies-zero-brightgreen" alt="Zero dependencies"/>
859
+ <a href="docs/index.md"><img src="https://img.shields.io/badge/docs-local-4c8cbf" alt="Documentation"/></a>
860
+ <img src="https://img.shields.io/badge/license-MIT-blue" alt="MIT license"/>
861
+ </p>
862
+
863
+ ---
864
+
865
+ ## What is this?
866
+
867
+ **spanforge** (``spanforge``) is the **reference implementation of [RFC-0001 spanforge](https://www.getspanforge.com/standard)** — the open event-schema standard for compliance and governance of agentic AI systems.
868
+
869
+ spanforge defines a structured, typed event envelope that every LLM-adjacent instrumentation tool can emit and every compliance backend can consume. It covers the full lifecycle: event envelopes, agent span hierarchies, token and cost models, HMAC audit chains, PII redaction, OTLP-compatible export, and schema governance.
870
+
871
+ > Think of **spanforge** as a **universal receipt format** for your AI application.
872
+ > Every time your app calls a language model, makes a decision, redacts private data, or checks a guardrail — this library gives that action a consistent, structured record that any tool in your stack can read.
873
+
874
+ ---
875
+
876
+ ## Why use it?
877
+
878
+ Without a shared schema, every team invents their own log format. With ``spanforge`` (and the spanforge standard it implements), your logs, dashboards, compliance reports, and monitoring tools all speak the same language — automatically.
879
+
880
+ | Without spanforge | With spanforge |
881
+ |---|---|
882
+ | Each service logs events differently | Every event follows the same structure |
883
+ | Hard to audit who saw what data | Built-in HMAC signing creates a tamper-proof audit trail |
884
+ | PII scattered across logs | First-class PII redaction before data leaves your app |
885
+ | Vendor-specific telemetry | OpenTelemetry-compatible — works with any monitoring stack |
886
+ | No way to check compatibility | CLI + programmatic compliance checks in CI |
887
+ | Complex integration glue | Zero required dependencies — just ``pip install`` |
888
+
889
+ ---
890
+
891
+ ## How spanforge compares
892
+
893
+ spanforge is the only **open-schema, zero-dependency** AI compliance platform. Everything else either requires a hosted backend, imposes a proprietary event format, or has mandatory heavy dependencies.
894
+
895
+ | Feature | **spanforge** | LangSmith | Langfuse | OpenLLMetry | Arize Phoenix |
896
+ |---|:---:|:---:|:---:|:---:|:---:|
897
+ | Open schema standard (RFC-driven) | ✅ | ❌ | ❌ | Partial | ❌ |
898
+ | Zero required dependencies | ✅ | ❌ | ❌ | ❌ | ❌ |
899
+ | Works fully offline / air-gapped | ✅ | ❌ | Self-host only | Partial | Self-host only |
900
+ | HMAC tamper-proof audit chain | ✅ | ❌ | ❌ | ❌ | ❌ |
901
+ | First-class PII redaction (built-in) | ✅ | ❌ | ❌ | ❌ | ❌ |
902
+ | OTLP export (any OTel backend) | ✅ | ❌ | ❌ | ✅ | ✅ |
903
+ | MIT license (self-hosted, no call-home) | ✅ | Partial | ✅ | ✅ | ✅ |
904
+ | Python 3.9+ (no Pydantic required) | ✅ | ❌ | ❌ | ❌ | ❌ |
905
+ | CLI-first compliance checks | ✅ | ❌ | ❌ | ❌ | ❌ |
906
+ | Schema versioning & migration tools | ✅ | ❌ | ❌ | ❌ | ❌ |
907
+
908
+ > **Bottom line**: Use spanforge when you need a *standard* rather than a *service* — especially in regulated, offline, or multi-vendor environments.
909
+
910
+ ---
911
+
912
+ ## Install
913
+
914
+ ```bash
915
+ pip install spanforge
916
+ ```
917
+
918
+ ```python
919
+ import spanforge # distribution name is spanforge, import name is spanforge
920
+ ```
921
+
922
+ **Requires Python 3.9 or later.** No other packages are required for core usage.
923
+
924
+ > **Note:** The PyPI distribution is named `spanforge`. The Python import name remains `spanforge`.
925
+
926
+ ### Optional extras
927
+
928
+ ```bash
929
+ pip install "spanforge[jsonschema]" # strict JSON Schema validation
930
+ pip install "spanforge[openai]" # OpenAI auto-instrumentation (patch/unpatch)
931
+ pip install "spanforge[http]" # Webhook + OTLP export
932
+ pip install "spanforge[pydantic]" # Pydantic v2 model layer
933
+ pip install "spanforge[otel]" # OpenTelemetry SDK integration
934
+ pip install "spanforge[kafka]" # EventStream.from_kafka() via kafka-python
935
+ pip install "spanforge[langchain]" # LangChain callback handler
936
+ pip install "spanforge[llamaindex]" # LlamaIndex event handler
937
+ pip install "spanforge[crewai]" # CrewAI callback handler
938
+ pip install "spanforge[datadog]" # Datadog APM + metrics exporter
939
+ pip install "spanforge[all]" # everything above
940
+ ```
941
+
942
+ ---
943
+
944
+ ## Using SpanForge alongside OpenTelemetry
945
+
946
+ SpanForge does not replace your OTel setup. It adds the compliance layer OTel cannot provide — tamper-proof audit chains, PII redaction, and regulator-ready attestation reports.
947
+
948
+ ```python
949
+ from opentelemetry.sdk.trace import TracerProvider
950
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
951
+
952
+ # 1. Set up your existing OTel pipeline as normal
953
+ provider = TracerProvider()
954
+ provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
955
+
956
+ # 2. Add SpanForge's compliance layer in one line
957
+ import spanforge
958
+ spanforge.configure(mode="otel_passthrough")
959
+
960
+ # 3. Use SpanForge spans — OTel + HMAC audit chain + PII redaction all active
961
+ with spanforge.Tracer().span("retrieve_docs") as s:
962
+ s.set_attribute("user_query", "What is our refund policy?")
963
+ ```
964
+
965
+ For dual-stream export (OTel bridge + local audit log):
966
+
967
+ ```python
968
+ spanforge.configure(exporters=["otel_passthrough", "jsonl"], endpoint="audit.jsonl")
969
+ ```
970
+
971
+ ---
972
+
973
+ ## Five-minute tour
974
+
975
+ ### 1 — Trace an LLM call with the span API
976
+
977
+ ```python
978
+ import spanforge
979
+
980
+ spanforge.configure(exporter="console", service_name="my-agent")
981
+
982
+ with spanforge.span("call-llm") as span:
983
+ span.set_model(model="gpt-4o", system="openai")
984
+ result = call_llm(prompt) # your LLM call here
985
+ span.set_token_usage(input=512, output=128, total=640)
986
+ span.set_status("ok")
987
+ ```
988
+
989
+ The context manager automatically records start/end times, parent-child span relationships, and emits a structured event when it exits.
990
+
991
+ ---
992
+
993
+ ### 1c — Use the high-level `Trace` API (new in 2.0)
994
+
995
+ ```python
996
+ import spanforge
997
+
998
+ spanforge.configure(exporter="console", service_name="my-agent")
999
+
1000
+ with spanforge.start_trace("research-agent") as trace:
1001
+ with trace.llm_call("gpt-4o", temperature=0.7) as span:
1002
+ result = call_llm(prompt)
1003
+ span.set_token_usage(input=512, output=200, total=712)
1004
+ span.set_status("ok")
1005
+ span.add_event("tool_selected", {"name": "web_search"})
1006
+
1007
+ with trace.tool_call("web_search") as span:
1008
+ output = run_search(query)
1009
+ span.set_status("ok")
1010
+
1011
+ # Inspect the trace in the terminal
1012
+ trace.print_tree()
1013
+ # ─ Agent Run: research-agent [1.2s]
1014
+ # ├─ LLM Call: gpt-4o [0.8s] in=512 out=200 tokens $0.0034
1015
+ # └─ Tool Call: web_search [0.4s] ok
1016
+
1017
+ print(trace.summary())
1018
+ # {'trace_id': '...', 'agent_name': 'research-agent', 'span_count': 3, ...}
1019
+ ```
1020
+
1021
+ The `Trace` object works with `async with` too:
1022
+
1023
+ ```python
1024
+ async with spanforge.start_trace("async-agent") as trace:
1025
+ async with trace.llm_call("gpt-4o") as span:
1026
+ response = await async_call_llm(prompt)
1027
+ span.set_status("ok")
1028
+ ```
1029
+
1030
+ ---
1031
+
1032
+ ### 1b — Auto-instrument the OpenAI client (zero boilerplate)
1033
+
1034
+ ```python
1035
+ from spanforge.integrations import openai as openai_integration
1036
+ import openai, spanforge
1037
+
1038
+ # One-time setup: patch the OpenAI SDK
1039
+ openai_integration.patch()
1040
+
1041
+ spanforge.configure(exporter="console", service_name="my-agent")
1042
+
1043
+ client = openai.OpenAI()
1044
+
1045
+ with spanforge.tracer.span("chat-gpt4o") as span:
1046
+ resp = client.chat.completions.create(
1047
+ model="gpt-4o",
1048
+ messages=[{"role": "user", "content": "Hello"}],
1049
+ )
1050
+ # span.token_usage, span.cost, and span.model are now populated automatically
1051
+ ```
1052
+
1053
+ `patch()` wraps every `client.chat.completions.create()` call (sync and async)
1054
+ so that `token_usage`, `cost`, and `model` are auto-populated on the active span
1055
+ from the API response — no per-call boilerplate required.
1056
+
1057
+ ```python
1058
+ # Restore original behaviour when you're done
1059
+ openai_integration.unpatch()
1060
+ ```
1061
+
1062
+ ---
1063
+
1064
+ ### 2 — Record a raw event
1065
+
1066
+ ```python
1067
+ from spanforge import Event, EventType, Tags
1068
+
1069
+ event = Event(
1070
+ event_type=EventType.TRACE_SPAN_COMPLETED,
1071
+ source="my-app@1.0.0", # who emitted this
1072
+ org_id="org_acme", # your organisation
1073
+ payload={
1074
+ "model": "gpt-4o",
1075
+ "prompt_tokens": 512,
1076
+ "completion_tokens": 128,
1077
+ "latency_ms": 340.5,
1078
+ },
1079
+ tags=Tags(env="production"),
1080
+ )
1081
+
1082
+ event.validate() # raises if structure is invalid
1083
+ print(event.to_json()) # compact JSON string, ready to store or ship
1084
+ ```
1085
+
1086
+ Every event gets a **ULID** (a time-sortable unique ID) automatically — no need to generate one yourself.
1087
+
1088
+ ---
1089
+
1090
+ ### 3 — Redact private information before logging
1091
+
1092
+ ```python
1093
+ from spanforge import Event, EventType
1094
+ from spanforge.redact import Redactable, RedactionPolicy, Sensitivity
1095
+
1096
+ policy = RedactionPolicy(min_sensitivity=Sensitivity.PII, redacted_by="policy:gdpr-v1")
1097
+
1098
+ # Wrap any string that might contain PII
1099
+ event = Event(
1100
+ event_type=EventType.TRACE_SPAN_COMPLETED,
1101
+ source="my-app@1.0.0",
1102
+ payload={"prompt": Redactable("Call me at 555-867-5309", Sensitivity.PII)},
1103
+ )
1104
+ result = policy.apply(event)
1105
+ # result.event.payload["prompt"] -> "[REDACTED by policy:gdpr-v1]"
1106
+ ```
1107
+
1108
+ ``Redactable`` is a string wrapper. You mark fields as sensitive at the point where they are created; the policy decides what to remove before the event is written to any log.
1109
+
1110
+ > **Tip — auto-redact every span:** pass `redaction_policy=policy` to
1111
+ > `spanforge.configure()` and the policy runs automatically inside `_dispatch()`
1112
+ > before any exporter sees the event.
1113
+
1114
+ ---
1115
+
1116
+ ### 4 — Sign events for tamper-proof audit trails
1117
+
1118
+ ```python
1119
+ from spanforge.signing import sign, verify_chain, AuditStream
1120
+
1121
+ # Sign a single event
1122
+ signed = sign(event, org_secret="my-org-secret")
1123
+
1124
+ # Or build a chain — every event references the one before it,
1125
+ # so any gap or modification is immediately detectable.
1126
+ stream = AuditStream(org_secret="my-org-secret")
1127
+ for e in events:
1128
+ stream.append(e)
1129
+
1130
+ result = verify_chain(stream.events, org_secret="my-org-secret")
1131
+ ```
1132
+
1133
+ This is the same principle used in certificate chains and blockchain — each event's signature covers the previous event's signature, so you cannot alter history without breaking the chain.
1134
+ > **Tip — auto-sign every span:** pass `signing_key="your-secret"` to
1135
+ > `spanforge.configure()` and every emitted span is signed and chained
1136
+ > automatically, with no per-event boilerplate.
1137
+ ---
1138
+
1139
+ ### 5 — Export to anywhere
1140
+
1141
+ ```python
1142
+ from spanforge.stream import EventStream
1143
+ from spanforge.export.jsonl import JSONLExporter
1144
+ from spanforge.export.webhook import WebhookExporter
1145
+ from spanforge.export.otlp import OTLPExporter
1146
+ from spanforge.export.datadog import DatadogExporter
1147
+ from spanforge.export.grafana import GrafanaLokiExporter
1148
+ from spanforge.export.cloud import CloudExporter
1149
+
1150
+ stream = EventStream(events)
1151
+
1152
+ # Write everything to a local file
1153
+ await stream.drain(JSONLExporter("events.jsonl"))
1154
+
1155
+ # Ship to your OpenTelemetry collector
1156
+ await stream.drain(OTLPExporter("http://otel-collector:4318/v1/traces"))
1157
+
1158
+ # Send to Datadog APM (traces + metrics)
1159
+ await stream.drain(DatadogExporter(
1160
+ service="my-app",
1161
+ env="production",
1162
+ agent_url="http://dd-agent:8126",
1163
+ api_key="your-dd-api-key",
1164
+ ))
1165
+
1166
+ # Push to Grafana Loki
1167
+ await stream.drain(GrafanaLokiExporter(
1168
+ url="http://loki:3100",
1169
+ labels={"app": "my-app", "env": "production"},
1170
+ ))
1171
+
1172
+ # Send to spanforge Cloud
1173
+ await stream.drain(CloudExporter(
1174
+ api_key="sf_live_xxx",
1175
+ endpoint="https://ingest.getspanforge.com/v1/events",
1176
+ ))
1177
+
1178
+ # Fan-out: guard-blocked events -> Slack webhook
1179
+ await stream.route(
1180
+ WebhookExporter("https://hooks.slack.com/your-webhook"),
1181
+ predicate=lambda e: e.event_type == "llm.guard.output.blocked",
1182
+ )
1183
+ ```
1184
+
1185
+ #### Kafka source
1186
+
1187
+ ```python
1188
+ from spanforge.stream import EventStream
1189
+
1190
+ # Drain a Kafka topic directly into an EventStream
1191
+ stream = EventStream.from_kafka(
1192
+ topic="llm-events",
1193
+ bootstrap_servers="kafka:9092",
1194
+ group_id="analytics",
1195
+ max_messages=5000,
1196
+ )
1197
+ await stream.drain(exporter)
1198
+ ```
1199
+
1200
+ ---
1201
+
1202
+ ### 6 — Sync exporters for non-async workflows
1203
+
1204
+ ```python
1205
+ from spanforge.exporters.jsonl import SyncJSONLExporter
1206
+ from spanforge.exporters.console import SyncConsoleExporter
1207
+
1208
+ # Log all events to a JSONL file synchronously
1209
+ exporter = SyncJSONLExporter("events.jsonl")
1210
+ exporter.export(event)
1211
+ exporter.close()
1212
+
1213
+ # Pretty-print events to the terminal during development
1214
+ console = SyncConsoleExporter()
1215
+ console.export(event)
1216
+ ```
1217
+
1218
+ ---
1219
+
1220
+ ### 7b — Register lifecycle hooks (new in 2.0)
1221
+
1222
+ ```python
1223
+ import spanforge
1224
+
1225
+ @spanforge.hooks.on_llm_call
1226
+ def log_llm(span):
1227
+ print(f"LLM called: {span.model} temp={span.temperature}")
1228
+
1229
+ @spanforge.hooks.on_tool_call
1230
+ def log_tool(span):
1231
+ print(f"Tool called: {span.name}")
1232
+
1233
+ # Hooks fire automatically for every span of the matching type
1234
+ ```
1235
+
1236
+ ---
1237
+
1238
+ ### 7c — Aggregate metrics from a trace file (new in 2.0)
1239
+
1240
+ ```python
1241
+ import spanforge
1242
+ from spanforge.stream import EventStream
1243
+
1244
+ events = list(EventStream.from_file("events.jsonl"))
1245
+ summary = spanforge.metrics.aggregate(events)
1246
+
1247
+ print(f"Traces: {summary.trace_count}")
1248
+ print(f"Success: {summary.agent_success_rate:.0%}")
1249
+ print(f"p95 LLM: {summary.llm_latency_ms.p95:.0f} ms")
1250
+ print(f"Cost: ${summary.total_cost_usd:.4f}")
1251
+ ```
1252
+
1253
+ ---
1254
+
1255
+ ### 7d — Visualize a Gantt timeline (new in 2.0)
1256
+
1257
+ ```python
1258
+ from spanforge.debug import visualize
1259
+
1260
+ html = visualize(trace.spans, path="trace.html")
1261
+ # Opens trace.html in a browser — self-contained, no external deps
1262
+ ```
1263
+
1264
+ ---
1265
+
1266
+ ### 8a — Semantic cache — skip redundant LLM calls
1267
+
1268
+ ```python
1269
+ from spanforge.cache import SemanticCache, InMemoryBackend
1270
+
1271
+ cache = SemanticCache(
1272
+ backend=InMemoryBackend(max_size=1024),
1273
+ similarity_threshold=0.92, # cosine similarity cutoff
1274
+ ttl_seconds=3600,
1275
+ namespace="responses",
1276
+ emit_events=True, # emits llm.cache.hit/miss/written events
1277
+ )
1278
+
1279
+ # Or use the @cached decorator on any async function
1280
+ from spanforge.cache import cached
1281
+
1282
+ @cached(threshold=0.92, ttl=3600, emit_events=True)
1283
+ async def call_llm(prompt: str) -> str:
1284
+ # ... real LLM call only on cache miss
1285
+ return response
1286
+
1287
+ reply = await call_llm("Summarise the spanforge RFC in one sentence.")
1288
+ # Second call with a semantically identical prompt → instant cache hit, zero tokens spent
1289
+ reply2 = await call_llm("Give me a one-sentence summary of the spanforge RFC.")
1290
+ ```
1291
+
1292
+ ---
1293
+
1294
+ ### 8b — Lint your instrumentation in CI
1295
+
1296
+ ```python
1297
+ from spanforge.lint import run_checks
1298
+
1299
+ source = open("myapp/pipeline.py").read()
1300
+ errors = run_checks(source, filename="myapp/pipeline.py")
1301
+
1302
+ for err in errors:
1303
+ print(f"{err.filename}:{err.line}:{err.col}: {err.code} {err.message}")
1304
+ # myapp/pipeline.py:42:12: AO002 actor_id receives a bare str; wrap with Redactable()
1305
+ ```
1306
+
1307
+ Or run the CLI against a whole directory:
1308
+
1309
+ ```bash
1310
+ python -m spanforge.lint myapp/
1311
+ # AO001 Event() missing required field 'payload' myapp/pipeline.py:17
1312
+ # AO004 LLM call outside tracer span context myapp/pipeline.py:53
1313
+ # 2 errors in 1 file.
1314
+
1315
+ # Plug into flake8 / ruff automatically (entry-point registered in pyproject.toml):
1316
+ flake8 myapp/
1317
+ ```
1318
+
1319
+ ---
1320
+
1321
+ ### 9 — Check compliance and inspect events from the command line
1322
+
1323
+ ```bash
1324
+ spanforge check # end-to-end health check (config → export → trace store)
1325
+ spanforge check-compat events.json # v2.0 compatibility checklist
1326
+ spanforge validate events.jsonl # JSON Schema validation per event
1327
+ spanforge audit-chain events.jsonl # verify HMAC signing chain integrity
1328
+ spanforge audit check-health events.jsonl # PII scan + chain + egress health check
1329
+ spanforge audit rotate-key events.jsonl # rotate signing key & re-sign chain
1330
+ spanforge audit erase events.jsonl --subject-id user123 # GDPR subject erasure
1331
+ spanforge audit verify --input events.jsonl # verify chain integrity
1332
+ spanforge scan events.jsonl --fail-on-match # CI-gate PII scan
1333
+ spanforge migrate events.jsonl --sign # v1→v2 schema migration
1334
+ spanforge inspect <EVENT_ID> events.jsonl # pretty-print a single event
1335
+ spanforge stats events.jsonl # summary: counts, tokens, cost, timestamps
1336
+ spanforge list-deprecated # list all deprecated event types
1337
+ spanforge migration-roadmap [--json] # v2 migration roadmap
1338
+ spanforge check-consumers # consumer registry compatibility check
1339
+ ```
1340
+
1341
+ ```
1342
+ CHK-1 All required fields present (500 / 500 events)
1343
+ CHK-2 Event types valid (500 / 500 events)
1344
+ CHK-3 Source identifiers well-formed (500 / 500 events)
1345
+ CHK-5 Event IDs are valid ULIDs (500 / 500 events)
1346
+ All checks passed.
1347
+ ```
1348
+
1349
+ Drop any of these into your CI pipeline to catch schema drift, signing failures, or schema-breaking migrations before they reach production.
1350
+
1351
+ ---
1352
+
1353
+ ### 10 — SPA Trace Viewer
1354
+
1355
+ Browse traces in a local single-page application — no external dependencies:
1356
+
1357
+ ```bash
1358
+ # Start the HTTP trace viewer server (default port 8888)
1359
+ spanforge serve
1360
+
1361
+ # Or open the standalone HTML viewer in your default browser
1362
+ spanforge ui
1363
+ ```
1364
+
1365
+ `spanforge serve` starts a lightweight HTTP server that exposes a `/traces`
1366
+ JSON API backed by the in-memory `TraceStore`. The SPA renders agent runs,
1367
+ LLM calls, tool calls, and timing data in a searchable table.
1368
+
1369
+ `spanforge ui` generates a self-contained HTML file from a JSONL export
1370
+ and opens it directly — useful for sharing trace snapshots offline.
1371
+
1372
+ ---
1373
+
1374
+ ## What is inside the box
1375
+
1376
+ <table>
1377
+ <thead>
1378
+ <tr><th>Module</th><th>What it does</th><th>For whom</th></tr>
1379
+ </thead>
1380
+ <tbody>
1381
+ <tr>
1382
+ <td><code>spanforge.event</code></td>
1383
+ <td>The core <code>Event</code> envelope — the one structure all tools share</td>
1384
+ <td>Everyone</td>
1385
+ </tr>
1386
+ <tr>
1387
+ <td><code>spanforge.types</code></td>
1388
+ <td>All built-in event type strings (trace, cost, cache, eval, guard…)</td>
1389
+ <td>Everyone</td>
1390
+ </tr>
1391
+ <tr>
1392
+ <td><code>spanforge.config</code></td>
1393
+ <td><code>configure()</code> and <code>get_config()</code> — global SDK configuration</td>
1394
+ <td>Everyone</td>
1395
+ </tr>
1396
+ <tr>
1397
+ <td><code>spanforge._span</code></td>
1398
+ <td>Span, AgentRun, AgentStep context managers — the runtime tracing API. Uses <code>contextvars</code> for safe async/thread context propagation. Supports <code>async with</code>, <code>span.add_event()</code>, <code>span.set_timeout_deadline()</code></td>
1399
+ <td>App developers</td>
1400
+ </tr>
1401
+ <tr>
1402
+ <td><code>spanforge._trace</code></td>
1403
+ <td><code>Trace</code> object and <code>start_trace()</code> — high-level, imperative tracing entry point; accumulates all child spans</td>
1404
+ <td>App developers</td>
1405
+ </tr>
1406
+ <tr>
1407
+ <td><code>spanforge.debug</code></td>
1408
+ <td><code>print_tree()</code>, <code>summary()</code>, <code>visualize()</code> — terminal tree, stats dict, and self-contained HTML Gantt timeline</td>
1409
+ <td>App developers</td>
1410
+ </tr>
1411
+ <tr>
1412
+ <td><code>spanforge.metrics</code></td>
1413
+ <td><code>aggregate()</code> and <code>MetricsSummary</code> — compute success rates, latency percentiles, token totals, and cost breakdowns from any <code>Iterable[Event]</code></td>
1414
+ <td>Data / analytics engineers</td>
1415
+ </tr>
1416
+ <tr>
1417
+ <td><code>spanforge._store</code></td>
1418
+ <td><code>TraceStore</code> — in-memory ring buffer; <code>get_trace()</code>, <code>list_tool_calls()</code>, <code>list_llm_calls()</code></td>
1419
+ <td>Platform / tooling engineers</td>
1420
+ </tr>
1421
+ <tr>
1422
+ <td><code>spanforge._hooks</code></td>
1423
+ <td><code>HookRegistry</code> / <code>hooks</code> — global span lifecycle hooks: <code>@hooks.on_llm_call</code>, <code>@hooks.on_tool_call</code>, <code>@hooks.on_agent_start</code>, <code>@hooks.on_agent_end</code>. Async variants: <code>@hooks.on_llm_call_async</code>, <code>@hooks.on_tool_call_async</code>, <code>@hooks.on_agent_start_async</code>, <code>@hooks.on_agent_end_async</code> — fired via <code>asyncio.ensure_future()</code>.</td>
1424
+ <td>App developers / platform</td>
1425
+ </tr>
1426
+ <tr>
1427
+ <td><code>spanforge._cli</code></td>
1428
+ <td>CLI sub-commands: <code>check</code>, <code>check-compat</code>, <code>validate</code>, <code>audit-chain</code>, <code>audit</code> (<code>erase</code>, <code>rotate-key</code>, <code>check-health</code>, <code>verify</code>), <code>scan</code>, <code>migrate</code>, <code>inspect</code>, <code>stats</code>, <code>list-deprecated</code>, <code>migration-roadmap</code>, <code>check-consumers</code>, <code>compliance</code>, <code>cost</code>, <code>dev</code>, <code>module</code>, <code>serve</code>, <code>init</code>, <code>quickstart</code>, <code>report</code>, <code>ui</code></td>
1429
+ <td>DevOps / CI teams</td>
1430
+ </tr>
1431
+ <tr>
1432
+ <td><code>spanforge.redact</code></td>
1433
+ <td>PII detection, sensitivity levels, redaction policies, deep <code>scan_payload()</code> with Luhn validation, and <code>contains_pii()</code> / <code>assert_redacted()</code> with raw string scanning</td>
1434
+ <td>Data privacy / GDPR teams</td>
1435
+ </tr>
1436
+ <tr>
1437
+ <td><code>spanforge.signing</code></td>
1438
+ <td>HMAC-SHA256 event signing, tamper-evident audit chains, key strength validation, key expiry checks, environment-isolated key derivation, multi-tenant <code>KeyResolver</code> protocol, and <code>AsyncAuditStream</code></td>
1439
+ <td>Security / compliance teams</td>
1440
+ </tr>
1441
+ <tr>
1442
+ <td><code>spanforge.compliance</code></td>
1443
+ <td>Programmatic v2.0 compatibility checks — no pytest required. <code>ComplianceMappingEngine</code> maps telemetry to regulatory frameworks (EU AI Act, ISO 42001, NIST AI RMF, GDPR, SOC 2) and generates evidence packages with HMAC-signed attestations. Consent (<code>consent.*</code>), HITL (<code>hitl.*</code>), model registry (<code>model_registry.*</code>), and explainability (<code>explanation.*</code>) events are integrated into clause mappings. Attestations include model owner/risk-tier/status metadata and <code>explanation_coverage_pct</code></td>
1444
+ <td>Platform / DevOps / Compliance teams</td>
1445
+ </tr>
1446
+ <tr>
1447
+ <td><code>spanforge.export</code></td>
1448
+ <td>Ship events to files (JSONL), HTTP webhooks, OTLP collectors, Datadog APM, Grafana Loki, or spanforge Cloud</td>
1449
+ <td>Infra / compliance teams</td>
1450
+ </tr>
1451
+ <tr>
1452
+ <td><code>spanforge.exporters</code></td>
1453
+ <td>Sync exporters — <code>SyncJSONLExporter</code> and <code>SyncConsoleExporter</code> for non-async code</td>
1454
+ <td>App developers</td>
1455
+ </tr>
1456
+ <tr>
1457
+ <td><code>spanforge.stream</code></td>
1458
+ <td>Fan-out router — one <code>drain()</code> call reaches multiple backends; Kafka source via <code>from_kafka()</code></td>
1459
+ <td>Platform engineers</td>
1460
+ </tr>
1461
+ <tr>
1462
+ <td><code>spanforge.validate</code></td>
1463
+ <td>JSON Schema validation against the published v2.0 schema</td>
1464
+ <td>All teams</td>
1465
+ </tr>
1466
+ <tr>
1467
+ <td><code>spanforge.consumer</code></td>
1468
+ <td>Declare schema-namespace dependencies; fail fast at startup if version requirements are not met</td>
1469
+ <td>Platform / integration teams</td>
1470
+ </tr>
1471
+ <tr>
1472
+ <td><code>spanforge.governance</code></td>
1473
+ <td>Policy-based event gating — block prohibited types, warn on deprecated usage, enforce custom rules</td>
1474
+ <td>Platform / compliance teams</td>
1475
+ </tr>
1476
+ <tr>
1477
+ <td><code>spanforge.deprecations</code></td>
1478
+ <td>Register and surface per-event-type deprecation notices at runtime</td>
1479
+ <td>Library maintainers</td>
1480
+ </tr>
1481
+ <tr>
1482
+ <td><code>spanforge.testing</code></td>
1483
+ <td>Test utilities: <code>MockExporter</code>, <code>capture_events()</code> context manager, <code>assert_event_schema_valid()</code>, and <code>trace_store()</code> isolated store context manager. Write unit tests for your AI pipeline without real exporters.</td>
1484
+ <td>App developers / test authors</td>
1485
+ </tr>
1486
+ <tr>
1487
+ <td><code>spanforge.auto</code></td>
1488
+ <td>Integration auto-discovery: <code>spanforge.auto.setup()</code> auto-patches every installed LLM integration (OpenAI, Anthropic, Ollama, Groq, Together AI). <code>setup()</code> must be called explicitly; <code>spanforge.auto.teardown()</code> cleanly unpatches all.</td>
1489
+ <td>App developers</td>
1490
+ </tr>
1491
+ <tr>
1492
+ <td><code>spanforge.integrations</code></td>
1493
+ <td>Plug-in adapters for OpenAI (auto-instrumentation via <code>patch()</code>), LangChain, LlamaIndex, Anthropic, Groq, Ollama, Together, and <strong>CrewAI</strong> (<code>SpanForgeCrewAIHandler</code> + <code>patch()</code>). <code>spanforge.integrations._pricing</code> ships a static USD/1M-token pricing table for all current OpenAI models.</td>
1494
+ <td>App developers</td>
1495
+ </tr>
1496
+ <tr>
1497
+ <td><code>spanforge.namespaces</code></td>
1498
+ <td>Typed payload dataclasses for all 10 built-in event namespaces</td>
1499
+ <td>Tool authors</td>
1500
+ </tr>
1501
+ <tr>
1502
+ <td><code>spanforge.models</code></td>
1503
+ <td>Optional Pydantic v2 models for teams that prefer validated schemas</td>
1504
+ <td>API / backend teams</td>
1505
+ </tr>
1506
+ <tr>
1507
+ <td><code>spanforge.trace</code></td>
1508
+ <td><code>@trace()</code> decorator — wraps sync/async functions, auto-emits span start/end events with timing and error capture. <code>spanforge.export.otlp_bridge</code> converts spans to OTLP proto dicts.</td>
1509
+ <td>App developers</td>
1510
+ </tr>
1511
+ <tr>
1512
+ <td><code>spanforge.cost</code></td>
1513
+ <td><code>CostTracker</code>, <code>BudgetMonitor</code>, <code>@budget_alert</code>, <code>emit_cost_event()</code>, <code>cost_summary()</code> — track and alert on token spend across a session</td>
1514
+ <td>App developers / FinOps</td>
1515
+ </tr>
1516
+ <tr>
1517
+ <td><code>spanforge.inspect</code></td>
1518
+ <td><code>InspectorSession</code> context manager + <code>inspect_trace()</code> — intercept and record tool call arguments, results, latency, and errors within a trace</td>
1519
+ <td>Platform / debugging</td>
1520
+ </tr>
1521
+ <tr>
1522
+ <td><code>spanforge.toolsmith</code></td>
1523
+ <td><code>@tool</code> decorator + <code>ToolRegistry</code> — register functions as typed tools; <code>build_openai_schema()</code> / <code>build_anthropic_schema()</code> render JSON schemas for function-calling APIs</td>
1524
+ <td>App developers</td>
1525
+ </tr>
1526
+ <tr>
1527
+ <td><code>spanforge.retry</code></td>
1528
+ <td><code>@retry</code> with exponential back-off, <code>FallbackChain</code>, <code>CircuitBreaker</code>, <code>CostAwareRouter</code> — resilient LLM provider routing with compliance events at each step</td>
1529
+ <td>App developers / SREs</td>
1530
+ </tr>
1531
+ <tr>
1532
+ <td><code>spanforge.cache</code></td>
1533
+ <td><code>SemanticCache</code> + <code>@cached</code> decorator — deduplicate LLM calls via cosine-similarity matching; pluggable backends: <code>InMemoryBackend</code>, <code>SQLiteBackend</code>, <code>RedisBackend</code>; emits <code>llm.cache.*</code> events</td>
1534
+ <td>App developers / FinOps</td>
1535
+ </tr>
1536
+ <tr>
1537
+ <td><code>spanforge.lint</code></td>
1538
+ <td><code>run_checks(source, filename)</code> — AST-based instrumentation linter; five AO-codes (AO001–AO005); flake8 plugin; <code>python -m spanforge.lint</code> CLI</td>
1539
+ <td>All teams / CI pipelines</td>
1540
+ </tr>
1541
+ </tbody>
1542
+ </table>
1543
+
1544
+ ---
1545
+
1546
+ ## Event namespaces
1547
+
1548
+ Every event carries a ``payload`` — a dictionary whose shape is defined by the event's **namespace**. The ten built-in namespaces cover everything from raw model traces to safety guardrails:
1549
+
1550
+ | Namespace prefix | Dataclass | What it records |
1551
+ |---|---|---|
1552
+ | ``llm.trace.*`` | ``SpanPayload``, ``AgentRunPayload``, ``AgentStepPayload`` | Model call — tokens, latency, finish reason **(frozen v2)** |
1553
+ | ``llm.cost.*`` | ``CostPayload`` | Per-call cost in USD |
1554
+ | ``llm.cache.*`` | ``CachePayload`` | Cache hit/miss, backend, TTL |
1555
+ | ``llm.eval.*`` | ``EvalScenarioPayload`` | Scores, labels, evaluator identity |
1556
+ | ``llm.guard.*`` | ``GuardPayload`` | Safety classifier output, block decisions |
1557
+ | ``llm.fence.*`` | ``FencePayload`` | Topic constraints, allow/block lists |
1558
+ | ``llm.prompt.*`` | ``PromptPayload`` | Prompt template version, rendered text |
1559
+ | ``llm.redact.*`` | ``RedactPayload`` | PII audit record — what was found and removed |
1560
+ | ``llm.diff.*`` | ``DiffPayload`` | Prompt/response delta between two events |
1561
+ | ``llm.template.*`` | ``TemplatePayload`` | Template registry metadata |
1562
+
1563
+ ```python
1564
+ from spanforge.namespaces.trace import SpanPayload
1565
+ from spanforge import Event
1566
+
1567
+ payload = SpanPayload(
1568
+ span_name="call-llm",
1569
+ span_id="abc123",
1570
+ trace_id="def456",
1571
+ start_time_ns=1_000_000_000,
1572
+ end_time_ns=1_340_000_000,
1573
+ status="ok",
1574
+ )
1575
+
1576
+ event = Event(
1577
+ event_type="llm.trace.span.completed",
1578
+ source="my-app@1.0.0",
1579
+ payload=payload.to_dict(),
1580
+ )
1581
+ ```
1582
+
1583
+ ---
1584
+
1585
+ ## Quality standards
1586
+
1587
+ - **3 331 tests** (3 331 passing, 10 skipped) — unit, integration, property-based (Hypothesis), and performance benchmarks
1588
+ - **≥ 92.84 % line and branch coverage** — measured with ``pytest-cov``; 90 % minimum enforced in CI
1589
+ - **Zero required dependencies** — the entire core runs on Python's standard library alone
1590
+ - **Typed** — full ``py.typed`` marker; works with mypy and pyright out of the box
1591
+ - **Frozen v2 trace schema** — ``llm.trace.*`` payload fields will never break between minor releases
1592
+ - **async-safe context propagation** — `contextvars`-based span stacks work correctly across `asyncio` tasks, thread pools, and executors
1593
+ - **Version 1.0.7** adds: `@trace()` decorator, OTLP bridge, `CostTracker` / `BudgetMonitor`, `InspectorSession`, `ToolRegistry` / `@tool`, `@retry` / `FallbackChain` / `CircuitBreaker`, `SemanticCache` / `@cached`, and `spanforge.lint` (AO001–AO005, flake8 plugin, CLI)
1594
+ - **Version 2.0.0** adds: `Trace` / `start_trace()`, `async with`, `span.add_event()`, `print_tree()` / `summary()` / `visualize()`, sampling controls, `metrics.aggregate()`, `TraceStore`, `HookRegistry`, CrewAI integration
1595
+ - **Version 1.0.6** adds: `spanforge.testing`, `spanforge.auto`, async lifecycle hooks, `spanforge check` CLI, export retry with back-off, `unpatch()` / `is_patched()` for all integrations, frozen payload dataclasses, `assert_no_sunset_reached()`
1596
+
1597
+ ---
1598
+
1599
+ ## Project structure
1600
+
1601
+ ```
1602
+ spanforge/
1603
+ ├── __init__.py <- Public API surface (start here)
1604
+ ├── event.py <- The Event envelope
1605
+ ├── types.py <- EventType enum (+ SpanErrorCategory)
1606
+ ├── config.py <- configure() / get_config() / SpanForgeConfig
1607
+ │ (sample_rate, always_sample_errors, include_raw_tool_io,
1608
+ │ enable_trace_store, trace_store_size)
1609
+ ├── _span.py <- Span, AgentRun, AgentStep context managers
1610
+ │ (contextvars stacks, async with, add_event,
1611
+ │ record_error, set_timeout_deadline)
1612
+ ├── _trace.py <- Trace class + start_trace() [NEW in 2.0]
1613
+ ├── _tracer.py <- Tracer — top-level tracing entry point
1614
+ ├── _stream.py <- Internal dispatch: sample → redact → sign → export
1615
+ ├── _store.py <- TraceStore ring buffer [NEW in 2.0]
1616
+ ├── _hooks.py <- HookRegistry singleton (hooks) [NEW in 2.0]
1617
+ ├── _cli.py <- CLI entry-point (18 sub-commands: check, check-compat, validate,
1618
+ │ audit-chain, inspect, stats, list-deprecated, migration-roadmap,
1619
+ │ check-consumers, compliance, cost, dev, module, serve, init,
1620
+ │ quickstart, report, ui)
1621
+ ├── _server.py <- TraceViewerServer — lightweight HTTP server for /traces endpoint
1622
+ ├── trace.py <- @trace() decorator + SpanOTLPBridge [NEW in 1.0.7]
1623
+ ├── cost.py <- CostTracker, BudgetMonitor, @budget_alert [NEW in 1.0.7]
1624
+ ├── inspect.py <- InspectorSession, inspect_trace() [NEW in 1.0.7]
1625
+ ├── toolsmith.py <- @tool, ToolRegistry, build_openai_schema() [NEW in 1.0.7]
1626
+ ├── retry.py <- @retry, FallbackChain, CircuitBreaker [NEW in 1.0.7]
1627
+ ├── cache.py <- SemanticCache, @cached, *Backend [NEW in 1.0.7]
1628
+ ├── lint/ <- run_checks(), AO001-AO005, flake8 plugin, CLI [NEW in 1.0.7]
1629
+ │ ├── __init__.py
1630
+ │ ├── _visitor.py
1631
+ │ ├── _checks.py
1632
+ │ ├── _flake8.py
1633
+ │ └── __main__.py
1634
+ ├── testing.py <- MockExporter, capture_events(), assert_event_schema_valid(),
1635
+ │ trace_store() — test utilities without real exporters [1.0.6]
1636
+ ├── auto.py <- Integration auto-discovery; setup() / teardown() [1.0.6]
1637
+ ├── debug.py <- print_tree, summary, visualize [NEW in 2.0]
1638
+ ├── metrics.py <- aggregate(), MetricsSummary, etc. [NEW in 2.0]
1639
+ ├── signing.py <- HMAC signing & audit chains
1640
+ ├── redact.py <- PII redaction
1641
+ ├── validate.py <- JSON Schema validation
1642
+ ├── consumer.py <- Consumer registry & schema-version compatibility
1643
+ ├── governance.py <- Event governance policies
1644
+ ├── deprecations.py <- Per-event-type deprecation tracking
1645
+ ├── compliance/ <- Compatibility checklist suite
1646
+ ├── core/
1647
+ │ └── compliance_mapping.py <- ComplianceMappingEngine + evidence packages [Commercial]
1648
+ ├── export/
1649
+ │ ├── jsonl.py <- Local file export (async)
1650
+ │ ├── webhook.py <- HTTP POST export
1651
+ │ ├── otlp.py <- OpenTelemetry export
1652
+ │ ├── datadog.py <- Datadog APM traces + metrics
1653
+ │ ├── grafana.py <- Grafana Loki export
1654
+ │ └── cloud.py <- spanforge Cloud export (thread-safe, batched) [Commercial]
1655
+ ├── exporters/
1656
+ │ ├── jsonl.py <- SyncJSONLExporter
1657
+ │ └── console.py <- SyncConsoleExporter
1658
+ ├── stream.py <- EventStream fan-out router (+ Kafka source)
1659
+ ├── integrations/
1660
+ │ ├── langchain.py <- LangChain callback handler
1661
+ │ ├── llamaindex.py <- LlamaIndex event handler
1662
+ │ ├── openai.py <- OpenAI tracing wrapper
1663
+ │ ├── crewai.py <- CrewAI handler + patch() [NEW in 2.0]
1664
+ │ └── ... (anthropic, groq, ollama, together)
1665
+ ├── namespaces/ <- Typed payload dataclasses
1666
+ │ ├── trace.py (SpanPayload + temperature/top_p/max_tokens/error_category,
1667
+ │ │ SpanEvent, ToolCall + arguments_raw/result_raw/retry_count)
1668
+ │ ├── cost.py
1669
+ │ ├── cache.py
1670
+ │ └── ...
1671
+ ├── models.py <- Optional Pydantic v2 models
1672
+ └── migrate.py <- Schema migration: v1_to_v2(), migrate_file(), MigrationStats
1673
+ examples/ <- Runnable sample scripts
1674
+ ├── openai_chat.py <- OpenAI + JSONL export
1675
+ ├── agent_workflow.py <- Multi-step agent + console exporter
1676
+ ├── langchain_chain.py<- LangChain callback handler
1677
+ └── secure_pipeline.py<- HMAC signing + PII redaction together
1678
+ ```
1679
+
1680
+ ---
1681
+
1682
+ ## Development setup
1683
+
1684
+ ```bash
1685
+ git clone https://github.com/veerarag1973/spanforge.git
1686
+ cd spanforge
1687
+
1688
+ python -m venv .venv
1689
+ .venv\Scripts\activate # Windows
1690
+ # source .venv/bin/activate # macOS / Linux
1691
+
1692
+ pip install -e ".[dev]"
1693
+ pytest # run all 3 032 tests
1694
+ ```
1695
+
1696
+ <details>
1697
+ <summary><strong>Code quality commands</strong></summary>
1698
+
1699
+ ```bash
1700
+ ruff check . # linting
1701
+ ruff format . # auto-format
1702
+ mypy spanforge # type checking
1703
+ pytest --cov # tests + coverage report (>=90% required)
1704
+ ```
1705
+
1706
+ </details>
1707
+
1708
+ <details>
1709
+ <summary><strong>Build the docs locally</strong></summary>
1710
+
1711
+ ```bash
1712
+ pip install -e ".[docs]"
1713
+ cd docs
1714
+ sphinx-build -b html . _build/html # open _build/html/index.html
1715
+ ```
1716
+
1717
+ </details>
1718
+
1719
+ ---
1720
+
1721
+ ## Compatibility and versioning
1722
+
1723
+ ``spanforge`` implements **RFC-0001 spanforge** (AI Compliance Standard for Agentic AI Systems). The current schema version is **2.0**.
1724
+
1725
+ This project follows [Semantic Versioning](https://semver.org/):
1726
+
1727
+ - **Patch** releases (``1.0.x``) — bug fixes only, fully backwards-compatible
1728
+ - **Minor** releases (``1.x.0``) — new features, backwards-compatible
1729
+ - **Major** releases (``x.0.0``) — breaking changes, announced in advance
1730
+
1731
+ The ``llm.trace.*`` namespace payload schema is **additionally frozen at v2**: even a major release will not remove or rename fields from ``SpanPayload``, ``AgentRunPayload``, or ``AgentStepPayload``.
1732
+
1733
+ ---
1734
+
1735
+ ## Changelog
1736
+
1737
+ See [docs/changelog.md](docs/changelog.md) for the full version history.
1738
+
1739
+ ---
1740
+
1741
+ ## Contributing
1742
+
1743
+ Contributions are welcome! Please read the [Contributing Guide](docs/contributing.md) first, then open an issue or pull request.
1744
+
1745
+ Key rules:
1746
+ - All new code must maintain **>= 90 % test coverage**
1747
+ - Follow the existing **Google-style docstrings**
1748
+ - Run ``ruff`` and ``mypy`` before submitting
1749
+
1750
+ ---
1751
+
1752
+ ## Community
1753
+
1754
+ - **GitHub Discussions** — questions, ideas, and show-and-tell: [github.com/veerarag1973/spanforge/discussions](https://github.com/veerarag1973/spanforge/discussions)
1755
+ - **Bug reports & feature requests** — use GitHub Issues with the provided templates
1756
+ - **Security disclosures** — see [SECURITY.md](SECURITY.md) for our responsible-disclosure process
1757
+ - **Code of Conduct** — this project follows the [Contributor Covenant v2.1](CODE_OF_CONDUCT.md)
1758
+ - **Changelog** — see [docs/changelog.md](docs/changelog.md) for release history
1759
+
1760
+ > GitHub topics for discoverability: `ai-compliance` `ai-governance` `llm-tracing` `opentelemetry` `pii-redaction` `audit-trail` `langchain` `openai` `python`
1761
+
1762
+ ---
1763
+
1764
+ ## License
1765
+
1766
+ [MIT](LICENSE) — free for personal and commercial use.
1767
+
1768
+ ---
1769
+
1770
+ <p align="center">
1771
+ Made with care for the AI compliance community.<br/>
1772
+ <a href="docs/index.md">Docs</a> ·
1773
+ <a href="docs/quickstart.md">Quickstart</a> ·
1774
+ <a href="docs/api/index.md">API Reference</a> ·
1775
+ <a href="https://github.com/veerarag1973/spanforge/discussions">Discussions</a> ·
1776
+ <a href="https://github.com/veerarag1973/spanforge/issues">Report a bug</a>
1777
+ </p>