spooled-ai 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. spooled_ai-0.4.0/LICENSE +19 -0
  2. spooled_ai-0.4.0/MANIFEST.in +28 -0
  3. spooled_ai-0.4.0/PKG-INFO +247 -0
  4. spooled_ai-0.4.0/README.md +183 -0
  5. spooled_ai-0.4.0/cli/__init__.py +5 -0
  6. spooled_ai-0.4.0/cli/commands/__init__.py +53 -0
  7. spooled_ai-0.4.0/cli/commands/analyze.py +340 -0
  8. spooled_ai-0.4.0/cli/commands/attest.py +321 -0
  9. spooled_ai-0.4.0/cli/commands/baseline.py +660 -0
  10. spooled_ai-0.4.0/cli/commands/ci.py +3374 -0
  11. spooled_ai-0.4.0/cli/commands/ci_display.py +338 -0
  12. spooled_ai-0.4.0/cli/commands/ci_engine.py +895 -0
  13. spooled_ai-0.4.0/cli/commands/demo.py +264 -0
  14. spooled_ai-0.4.0/cli/commands/diff.py +114 -0
  15. spooled_ai-0.4.0/cli/commands/doctor.py +407 -0
  16. spooled_ai-0.4.0/cli/commands/fingerprint.py +231 -0
  17. spooled_ai-0.4.0/cli/commands/fleet.py +539 -0
  18. spooled_ai-0.4.0/cli/commands/ingest.py +352 -0
  19. spooled_ai-0.4.0/cli/commands/init.py +313 -0
  20. spooled_ai-0.4.0/cli/commands/keys.py +263 -0
  21. spooled_ai-0.4.0/cli/commands/list.py +116 -0
  22. spooled_ai-0.4.0/cli/commands/new_agent.py +252 -0
  23. spooled_ai-0.4.0/cli/commands/policy.py +165 -0
  24. spooled_ai-0.4.0/cli/commands/pull.py +185 -0
  25. spooled_ai-0.4.0/cli/commands/replay.py +123 -0
  26. spooled_ai-0.4.0/cli/commands/session.py +310 -0
  27. spooled_ai-0.4.0/cli/commands/traces.py +138 -0
  28. spooled_ai-0.4.0/cli/commands/verify.py +392 -0
  29. spooled_ai-0.4.0/cli/commands/view.py +258 -0
  30. spooled_ai-0.4.0/cli/commands/watch.py +185 -0
  31. spooled_ai-0.4.0/cli/diff.py +510 -0
  32. spooled_ai-0.4.0/cli/main.py +198 -0
  33. spooled_ai-0.4.0/cli/replay_generator.py +221 -0
  34. spooled_ai-0.4.0/cli/report.py +514 -0
  35. spooled_ai-0.4.0/cli/report_pr.py +424 -0
  36. spooled_ai-0.4.0/cli/templates/__init__.py +3 -0
  37. spooled_ai-0.4.0/cli/templates/demo_agent.py +252 -0
  38. spooled_ai-0.4.0/cli/templates/demo_variant.py +220 -0
  39. spooled_ai-0.4.0/cli/templates/github_workflow.py +185 -0
  40. spooled_ai-0.4.0/cli/templates/policy_templates.py +150 -0
  41. spooled_ai-0.4.0/cli/templates/sample_policy.py +7 -0
  42. spooled_ai-0.4.0/cli/utils.py +51 -0
  43. spooled_ai-0.4.0/pyproject.toml +224 -0
  44. spooled_ai-0.4.0/setup.cfg +4 -0
  45. spooled_ai-0.4.0/spooled/__init__.py +264 -0
  46. spooled_ai-0.4.0/spooled/_context.py +27 -0
  47. spooled_ai-0.4.0/spooled/attest.py +267 -0
  48. spooled_ai-0.4.0/spooled/attestation.py +269 -0
  49. spooled_ai-0.4.0/spooled/baseline.py +1394 -0
  50. spooled_ai-0.4.0/spooled/ci_history_sync.py +185 -0
  51. spooled_ai-0.4.0/spooled/circuit_breaker.py +115 -0
  52. spooled_ai-0.4.0/spooled/decorators.py +343 -0
  53. spooled_ai-0.4.0/spooled/exporters/__init__.py +30 -0
  54. spooled_ai-0.4.0/spooled/exporters/_semconv.py +74 -0
  55. spooled_ai-0.4.0/spooled/exporters/otel.py +423 -0
  56. spooled_ai-0.4.0/spooled/exporters/webhook.py +159 -0
  57. spooled_ai-0.4.0/spooled/fingerprint.py +830 -0
  58. spooled_ai-0.4.0/spooled/hash_utils.py +176 -0
  59. spooled_ai-0.4.0/spooled/hooks/__init__.py +89 -0
  60. spooled_ai-0.4.0/spooled/hooks/_privacy.py +185 -0
  61. spooled_ai-0.4.0/spooled/hooks/aiohttp_hook.py +161 -0
  62. spooled_ai-0.4.0/spooled/hooks/anthropic_hook.py +546 -0
  63. spooled_ai-0.4.0/spooled/hooks/bedrock_hook.py +920 -0
  64. spooled_ai-0.4.0/spooled/hooks/httpx_hook.py +218 -0
  65. spooled_ai-0.4.0/spooled/hooks/openai_hook.py +699 -0
  66. spooled_ai-0.4.0/spooled/hooks/requests_hook.py +171 -0
  67. spooled_ai-0.4.0/spooled/integrations/__init__.py +18 -0
  68. spooled_ai-0.4.0/spooled/integrations/autogen.py +161 -0
  69. spooled_ai-0.4.0/spooled/integrations/crewai.py +222 -0
  70. spooled_ai-0.4.0/spooled/integrations/langchain.py +200 -0
  71. spooled_ai-0.4.0/spooled/integrations/langgraph.py +352 -0
  72. spooled_ai-0.4.0/spooled/integrations/llamaindex.py +116 -0
  73. spooled_ai-0.4.0/spooled/licensing.py +267 -0
  74. spooled_ai-0.4.0/spooled/metrics.py +98 -0
  75. spooled_ai-0.4.0/spooled/models.py +569 -0
  76. spooled_ai-0.4.0/spooled/policy.py +874 -0
  77. spooled_ai-0.4.0/spooled/processors/__init__.py +24 -0
  78. spooled_ai-0.4.0/spooled/processors/_converter.py +264 -0
  79. spooled_ai-0.4.0/spooled/processors/otel.py +355 -0
  80. spooled_ai-0.4.0/spooled/py.typed +0 -0
  81. spooled_ai-0.4.0/spooled/recorder.py +1134 -0
  82. spooled_ai-0.4.0/spooled/redaction.py +337 -0
  83. spooled_ai-0.4.0/spooled/scoring.py +287 -0
  84. spooled_ai-0.4.0/spooled/signals.py +1391 -0
  85. spooled_ai-0.4.0/spooled/storage.py +214 -0
  86. spooled_ai-0.4.0/spooled/tool_signature.py +125 -0
  87. spooled_ai-0.4.0/spooled/usage.py +214 -0
  88. spooled_ai-0.4.0/spooled/utils.py +111 -0
  89. spooled_ai-0.4.0/spooled/wrappers/__init__.py +18 -0
  90. spooled_ai-0.4.0/spooled/wrappers/anthropic_wrapper.py +108 -0
  91. spooled_ai-0.4.0/spooled/wrappers/openai_wrapper.py +137 -0
  92. spooled_ai-0.4.0/spooled_ai.egg-info/PKG-INFO +247 -0
  93. spooled_ai-0.4.0/spooled_ai.egg-info/SOURCES.txt +95 -0
  94. spooled_ai-0.4.0/spooled_ai.egg-info/dependency_links.txt +1 -0
  95. spooled_ai-0.4.0/spooled_ai.egg-info/entry_points.txt +3 -0
  96. spooled_ai-0.4.0/spooled_ai.egg-info/requires.txt +44 -0
  97. spooled_ai-0.4.0/spooled_ai.egg-info/top_level.txt +2 -0
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2026 Spooled AI. All rights reserved.
2
+
3
+ This software and associated documentation files (the "Software") are the
4
+ proprietary property of Spooled AI. You may use the Software only in
5
+ accordance with the terms of your agreement with Spooled AI or, in the
6
+ absence of such agreement, subject to the following conditions:
7
+
8
+ 1. You may install and use the Software for your internal business purposes.
9
+ 2. You may not redistribute, sublicense, sell, or otherwise make the Software
10
+ available to third parties, in whole or in part.
11
+ 3. You may not modify, reverse-engineer, decompile, or create derivative
12
+ works based on the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
+ SPOOLED AI BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18
+ AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,28 @@
1
+ # Include only what belongs in the pip package
2
+ include LICENSE
3
+ include README.md
4
+ include pyproject.toml
5
+
6
+ # SDK and CLI source
7
+ recursive-include spooled *.py
8
+ include spooled/py.typed
9
+ recursive-include cli *.py
10
+
11
+ # Exclude everything else
12
+ prune action
13
+ prune backend
14
+ prune tests
15
+ prune tools
16
+ prune docs
17
+ prune examples
18
+ prune schemas
19
+ prune .github
20
+ prune cdk.out
21
+ prune htmlcov
22
+
23
+ # Exclude dev/config files
24
+ exclude .env
25
+ exclude .env.*
26
+ exclude .pre-commit-config.yaml
27
+ exclude Makefile
28
+ exclude setup.py
@@ -0,0 +1,247 @@
1
+ Metadata-Version: 2.4
2
+ Name: spooled-ai
3
+ Version: 0.4.0
4
+ Summary: CI for AI agents - behavioral fingerprinting and drift detection
5
+ Author: Spooled Team
6
+ License: Proprietary
7
+ Project-URL: Homepage, https://spooled.ai
8
+ Project-URL: Documentation, https://spooled.ai/docs
9
+ Project-URL: Changelog, https://spooled.ai/docs/changelog
10
+ Keywords: ai,agents,tracing,debugging,replay,ci,behavioral-testing,llm
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: Other/Proprietary License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Classifier: Topic :: Software Development :: Testing
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
26
+ Requires-Dist: python-dotenv>=1.0.0
27
+ Requires-Dist: pyyaml>=6.0.0
28
+ Requires-Dist: structlog>=23.0.0
29
+ Requires-Dist: typer>=0.9.0
30
+ Requires-Dist: rich>=13.0.0
31
+ Requires-Dist: httpx>=0.24.0
32
+ Requires-Dist: requests>=2.31.0
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
35
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
36
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
37
+ Requires-Dist: black>=23.0.0; extra == "dev"
38
+ Requires-Dist: mypy>=1.5.0; extra == "dev"
39
+ Requires-Dist: pre-commit>=3.5.0; extra == "dev"
40
+ Requires-Dist: types-requests; extra == "dev"
41
+ Requires-Dist: types-python-dateutil; extra == "dev"
42
+ Provides-Extra: cli
43
+ Provides-Extra: backend
44
+ Requires-Dist: aws-cdk-lib>=2.100.0; extra == "backend"
45
+ Requires-Dist: constructs>=10.0.0; extra == "backend"
46
+ Requires-Dist: boto3>=1.28.0; extra == "backend"
47
+ Provides-Extra: tools
48
+ Requires-Dist: langchain>=0.3.0; extra == "tools"
49
+ Requires-Dist: langchain-community>=0.3.0; extra == "tools"
50
+ Requires-Dist: langchain-openai>=0.2.0; extra == "tools"
51
+ Requires-Dist: langgraph>=0.2.0; extra == "tools"
52
+ Requires-Dist: crewai>=0.80.0; extra == "tools"
53
+ Requires-Dist: llama-index>=0.10.0; extra == "tools"
54
+ Requires-Dist: pyautogen>=0.2.0; extra == "tools"
55
+ Requires-Dist: aiohttp>=3.9.0; extra == "tools"
56
+ Requires-Dist: boto3>=1.28.0; extra == "tools"
57
+ Provides-Extra: metrics
58
+ Requires-Dist: prometheus-client>=0.20.0; extra == "metrics"
59
+ Provides-Extra: otel
60
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == "otel"
61
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == "otel"
62
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.20.0; extra == "otel"
63
+ Dynamic: license-file
64
+
65
+ # Spooled — Behavioral CI for AI Agents
66
+
67
+ > **The diff for agent behavior.**
68
+ > Capture what your agent does, detect when it changes, gate the PR.
69
+
70
+ AI agents are non-deterministic. The same code, prompt, and model produce different tool-calling behavior on every run. A one-word prompt edit can silently drop a compliance check. A model upgrade can change which tools get called. A KB refresh can alter the agent's decision path. Unit tests pass. Eval suites pass. Nobody notices until production.
71
+
72
+ Spooled catches it on the PR.
73
+
74
+ ## What It Does
75
+
76
+ **Capture** — wraps your LLM client and records the structural fingerprint of every agent run: which tools were called, in what order, how many times. Content-blind by architecture — prompts, customer data, and AI responses never leave your infrastructure.
77
+
78
+ **Compare** — diffs the current run against a committed baseline. Shows exactly what changed: tools added, tools removed, sequence reordered, token usage shifted.
79
+
80
+ **Gate** — posts a PR comment with a behavioral score. Blocks the merge if the policy says so. Resolution instructions included.
81
+
82
+ ## Install
83
+
84
+ ```bash
85
+ pip install spooled-ai
86
+ ```
87
+
88
+ ## Quick Start
89
+
90
+ ```python
91
+ import spooled
92
+ from spooled.wrappers import wrap_openai
93
+ from openai import OpenAI
94
+
95
+ spooled.init(agent_id="my_agent")
96
+ client = wrap_openai(OpenAI())
97
+
98
+ response = client.chat.completions.create(
99
+ model="gpt-4o",
100
+ messages=[{"role": "user", "content": "Analyze this deal"}],
101
+ tools=MY_TOOLS,
102
+ )
103
+
104
+ spooled.shutdown()
105
+ ```
106
+
107
+ That's it. Every tool call is captured. The trace is saved to `.spooled/traces/`. The hash chain signs every interaction at capture time.
108
+
109
+ ## CI Integration
110
+
111
+ ```yaml
112
+ # .github/workflows/spooled.yml
113
+ - name: Generate traces
114
+ run: python ci_runner.py
115
+ env:
116
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
117
+
118
+ - name: Spooled behavioral check
119
+ run: |
120
+ pip install spooled-ai
121
+ spooled ci compare .spooled/traces/*.jsonl \
122
+ --baseline .github/baselines \
123
+ --policy spooled-policy.yml \
124
+ --enable-blocking
125
+ ```
126
+
127
+ The action compares traces against committed baselines and posts a PR comment:
128
+
129
+ ```
130
+ ❌ Spooled Behavioral CI: FAIL
131
+ Spooled Score: 59/100 (D) 🔴
132
+
133
+ | Agent | Status | Score | Tokens |
134
+ |------------|---------------------|-------|---------------|
135
+ | deal_agent | ⚠️ Variant · Tooling change | 59 | 🔻 14198 (-32%) |
136
+
137
+ 🔧 Tool Changes:
138
+ ➖ sanctions_screening removed
139
+ ➖ ip_patent_search removed
140
+ ```
141
+
142
+ ## What Spooled Catches
143
+
144
+ Validated across 50 real-world test scenarios with ~316 OpenAI API calls:
145
+
146
+ | Change type | Example | Unit tests | Spooled |
147
+ |-------------|---------|:----------:|:-------:|
148
+ | Prompt tweak | "Be concise" drops compliance tools | ✅ Pass | **VARIANT** |
149
+ | Model swap | gpt-4o drops sanctions screening | ✅ Pass | **VARIANT** |
150
+ | Tool deprecation | Agent proceeds on sanctioned entity without sanctions data | ✅ Pass | **VARIANT** |
151
+ | KB refresh | Fraud tickets lose customer response | ✅ Pass | **VARIANT** |
152
+ | Schema migration | Field rename stops international detection | ✅ Pass | **VARIANT** |
153
+ | Prompt reordering | Same words, sections reordered | ✅ Pass | **VARIANT** |
154
+ | Tool description edit | Better docs change model decisions | ✅ Pass | **VARIANT** |
155
+ | Upstream degradation | Retry paths appear in fingerprint | ✅ Pass | **VARIANT** |
156
+
157
+ ## Content-Blind Architecture
158
+
159
+ Spooled never captures prompts, customer data, or AI responses. Only structural metadata: tool names, call sequence, token counts, timing. This is enforced in code — content is stripped before the trace reaches disk.
160
+
161
+ Verified: we injected SSNs, credit cards, API keys, and email addresses into tool outputs. Scanned the trace file. **Zero PII found.** Structural data (tool names, model, usage) fully preserved.
162
+
163
+ This opens regulated markets (healthcare, finance, government) where competitors who capture content cannot operate.
164
+
165
+ ## Multi-Agent Support
166
+
167
+ ```python
168
+ # Supervisor starts
169
+ spooled.init(agent_id="supervisor")
170
+ # ... supervisor work ...
171
+
172
+ # Child inherits parent linkage automatically
173
+ spooled.init(agent_id="worker") # auto-detects parent, inherits session
174
+ # ... worker work ...
175
+ spooled.shutdown() # stops worker only, supervisor stays active
176
+ ```
177
+
178
+ Each agent gets its own trace, its own fingerprint, its own baseline. Concurrent execution is safe — tested with 3 agents in parallel threads, zero cross-contamination.
179
+
180
+ ## Policy Rules
181
+
182
+ ```yaml
183
+ # spooled-policy.yml
184
+ name: "Production gate"
185
+ enabled: true
186
+ block_merges: true
187
+ rules:
188
+ - name: "Block behavioral variants"
189
+ fail_if:
190
+ on_variant: true # Block structural changes
191
+ on_new_behavior: true # Block new intent patterns
192
+ ```
193
+
194
+ Separate `on_variant` (structural change to a known intent) from `on_new_behavior` (entirely new intent bucket). Gate each independently.
195
+
196
+ ## Actionable Findings
197
+
198
+ From the 50-scenario assessment:
199
+
200
+ - **Set `seed=42`** on OpenAI tool-calling agents — achieves 100% fingerprint stability on gpt-4o-mini, 75% on gpt-4o
201
+ - **Guardrails need "non-negotiable" language** — medium-strength prompts erode 100% under conversational pressure; strong prompts with explicit override-rejection hold 100%
202
+ - **Run 20+ inputs** before shipping prompt changes — 10-input samples produce noise; 20+ reveals the real distribution shift
203
+
204
+ ## Supported Libraries
205
+
206
+ **LLM Providers (explicit wrappers):**
207
+ - OpenAI (sync/async, streaming)
208
+ - Anthropic (sync/async, streaming)
209
+
210
+ **HTTP & Cloud (auto-instrumented via hooks):**
211
+ - AWS Bedrock
212
+ - requests, httpx, aiohttp
213
+
214
+ **Frameworks (callback handlers):**
215
+ - LangChain, LlamaIndex, AutoGen, CrewAI, LangGraph
216
+
217
+ ## Commands
218
+
219
+ ```bash
220
+ spooled verify trace <run_id> # Hash chain integrity check
221
+ spooled diff traces <a> <b> # Structural comparison
222
+ spooled ci compare <trace> --baseline # CI comparison
223
+ spooled ci update-baseline --from dir # Generate baselines
224
+ spooled analyze --agent-id my_agent # Analyze trace patterns
225
+ spooled policy init # Create a policy file
226
+ ```
227
+
228
+ ## How It Works
229
+
230
+ 1. `wrap_openai()` intercepts `chat.completions.create()` calls
231
+ 2. Each tool call is recorded with a SHA-256 hash of the previous interaction (Merkle chain)
232
+ 3. Content is stripped to structural metadata before saving (privacy by architecture)
233
+ 4. A `structural_hash` of the saved data enables post-save tampering detection
234
+ 5. The trace is saved as append-only JSONL in `.spooled/traces/`
235
+ 6. `spooled ci compare` extracts the fingerprint (tool graph) and diffs against the baseline
236
+ 7. Policy rules determine PASS/FAIL; the PR comment shows what changed
237
+
238
+ ## Documentation
239
+
240
+ - [CI Integration Guide](docs/CI_INTEGRATION.md)
241
+ - [Architecture & Trust Model](docs/ARCHITECTURE.md)
242
+ - [Data Schema](docs/DATA_SCHEMA.md)
243
+ - [Deployment Guide](docs/DEPLOYMENT.md)
244
+
245
+ ## License
246
+
247
+ MIT
@@ -0,0 +1,183 @@
1
+ # Spooled — Behavioral CI for AI Agents
2
+
3
+ > **The diff for agent behavior.**
4
+ > Capture what your agent does, detect when it changes, gate the PR.
5
+
6
+ AI agents are non-deterministic. The same code, prompt, and model produce different tool-calling behavior on every run. A one-word prompt edit can silently drop a compliance check. A model upgrade can change which tools get called. A KB refresh can alter the agent's decision path. Unit tests pass. Eval suites pass. Nobody notices until production.
7
+
8
+ Spooled catches it on the PR.
9
+
10
+ ## What It Does
11
+
12
+ **Capture** — wraps your LLM client and records the structural fingerprint of every agent run: which tools were called, in what order, how many times. Content-blind by architecture — prompts, customer data, and AI responses never leave your infrastructure.
13
+
14
+ **Compare** — diffs the current run against a committed baseline. Shows exactly what changed: tools added, tools removed, sequence reordered, token usage shifted.
15
+
16
+ **Gate** — posts a PR comment with a behavioral score. Blocks the merge if the policy says so. Resolution instructions included.
17
+
18
+ ## Install
19
+
20
+ ```bash
21
+ pip install spooled-ai
22
+ ```
23
+
24
+ ## Quick Start
25
+
26
+ ```python
27
+ import spooled
28
+ from spooled.wrappers import wrap_openai
29
+ from openai import OpenAI
30
+
31
+ spooled.init(agent_id="my_agent")
32
+ client = wrap_openai(OpenAI())
33
+
34
+ response = client.chat.completions.create(
35
+ model="gpt-4o",
36
+ messages=[{"role": "user", "content": "Analyze this deal"}],
37
+ tools=MY_TOOLS,
38
+ )
39
+
40
+ spooled.shutdown()
41
+ ```
42
+
43
+ That's it. Every tool call is captured. The trace is saved to `.spooled/traces/`. The hash chain signs every interaction at capture time.
44
+
45
+ ## CI Integration
46
+
47
+ ```yaml
48
+ # .github/workflows/spooled.yml
49
+ - name: Generate traces
50
+ run: python ci_runner.py
51
+ env:
52
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
53
+
54
+ - name: Spooled behavioral check
55
+ run: |
56
+ pip install spooled-ai
57
+ spooled ci compare .spooled/traces/*.jsonl \
58
+ --baseline .github/baselines \
59
+ --policy spooled-policy.yml \
60
+ --enable-blocking
61
+ ```
62
+
63
+ The action compares traces against committed baselines and posts a PR comment:
64
+
65
+ ```
66
+ ❌ Spooled Behavioral CI: FAIL
67
+ Spooled Score: 59/100 (D) 🔴
68
+
69
+ | Agent | Status | Score | Tokens |
70
+ |------------|---------------------|-------|---------------|
71
+ | deal_agent | ⚠️ Variant · Tooling change | 59 | 🔻 14198 (-32%) |
72
+
73
+ 🔧 Tool Changes:
74
+ ➖ sanctions_screening removed
75
+ ➖ ip_patent_search removed
76
+ ```
77
+
78
+ ## What Spooled Catches
79
+
80
+ Validated across 50 real-world test scenarios with ~316 OpenAI API calls:
81
+
82
+ | Change type | Example | Unit tests | Spooled |
83
+ |-------------|---------|:----------:|:-------:|
84
+ | Prompt tweak | "Be concise" drops compliance tools | ✅ Pass | **VARIANT** |
85
+ | Model swap | gpt-4o drops sanctions screening | ✅ Pass | **VARIANT** |
86
+ | Tool deprecation | Agent proceeds on sanctioned entity without sanctions data | ✅ Pass | **VARIANT** |
87
+ | KB refresh | Fraud tickets lose customer response | ✅ Pass | **VARIANT** |
88
+ | Schema migration | Field rename stops international detection | ✅ Pass | **VARIANT** |
89
+ | Prompt reordering | Same words, sections reordered | ✅ Pass | **VARIANT** |
90
+ | Tool description edit | Better docs change model decisions | ✅ Pass | **VARIANT** |
91
+ | Upstream degradation | Retry paths appear in fingerprint | ✅ Pass | **VARIANT** |
92
+
93
+ ## Content-Blind Architecture
94
+
95
+ Spooled never captures prompts, customer data, or AI responses. Only structural metadata: tool names, call sequence, token counts, timing. This is enforced in code — content is stripped before the trace reaches disk.
96
+
97
+ Verified: we injected SSNs, credit cards, API keys, and email addresses into tool outputs. Scanned the trace file. **Zero PII found.** Structural data (tool names, model, usage) fully preserved.
98
+
99
+ This opens regulated markets (healthcare, finance, government) where competitors who capture content cannot operate.
100
+
101
+ ## Multi-Agent Support
102
+
103
+ ```python
104
+ # Supervisor starts
105
+ spooled.init(agent_id="supervisor")
106
+ # ... supervisor work ...
107
+
108
+ # Child inherits parent linkage automatically
109
+ spooled.init(agent_id="worker") # auto-detects parent, inherits session
110
+ # ... worker work ...
111
+ spooled.shutdown() # stops worker only, supervisor stays active
112
+ ```
113
+
114
+ Each agent gets its own trace, its own fingerprint, its own baseline. Concurrent execution is safe — tested with 3 agents in parallel threads, zero cross-contamination.
115
+
116
+ ## Policy Rules
117
+
118
+ ```yaml
119
+ # spooled-policy.yml
120
+ name: "Production gate"
121
+ enabled: true
122
+ block_merges: true
123
+ rules:
124
+ - name: "Block behavioral variants"
125
+ fail_if:
126
+ on_variant: true # Block structural changes
127
+ on_new_behavior: true # Block new intent patterns
128
+ ```
129
+
130
+ Separate `on_variant` (structural change to a known intent) from `on_new_behavior` (entirely new intent bucket). Gate each independently.
131
+
132
+ ## Actionable Findings
133
+
134
+ From the 50-scenario assessment:
135
+
136
+ - **Set `seed=42`** on OpenAI tool-calling agents — achieves 100% fingerprint stability on gpt-4o-mini, 75% on gpt-4o
137
+ - **Guardrails need "non-negotiable" language** — medium-strength prompts erode 100% under conversational pressure; strong prompts with explicit override-rejection hold 100%
138
+ - **Run 20+ inputs** before shipping prompt changes — 10-input samples produce noise; 20+ reveals the real distribution shift
139
+
140
+ ## Supported Libraries
141
+
142
+ **LLM Providers (explicit wrappers):**
143
+ - OpenAI (sync/async, streaming)
144
+ - Anthropic (sync/async, streaming)
145
+
146
+ **HTTP & Cloud (auto-instrumented via hooks):**
147
+ - AWS Bedrock
148
+ - requests, httpx, aiohttp
149
+
150
+ **Frameworks (callback handlers):**
151
+ - LangChain, LlamaIndex, AutoGen, CrewAI, LangGraph
152
+
153
+ ## Commands
154
+
155
+ ```bash
156
+ spooled verify trace <run_id> # Hash chain integrity check
157
+ spooled diff traces <a> <b> # Structural comparison
158
+ spooled ci compare <trace> --baseline # CI comparison
159
+ spooled ci update-baseline --from dir # Generate baselines
160
+ spooled analyze --agent-id my_agent # Analyze trace patterns
161
+ spooled policy init # Create a policy file
162
+ ```
163
+
164
+ ## How It Works
165
+
166
+ 1. `wrap_openai()` intercepts `chat.completions.create()` calls
167
+ 2. Each tool call is recorded with a SHA-256 hash of the previous interaction (Merkle chain)
168
+ 3. Content is stripped to structural metadata before saving (privacy by architecture)
169
+ 4. A `structural_hash` of the saved data enables post-save tampering detection
170
+ 5. The trace is saved as append-only JSONL in `.spooled/traces/`
171
+ 6. `spooled ci compare` extracts the fingerprint (tool graph) and diffs against the baseline
172
+ 7. Policy rules determine PASS/FAIL; the PR comment shows what changed
173
+
174
+ ## Documentation
175
+
176
+ - [CI Integration Guide](docs/CI_INTEGRATION.md)
177
+ - [Architecture & Trust Model](docs/ARCHITECTURE.md)
178
+ - [Data Schema](docs/DATA_SCHEMA.md)
179
+ - [Deployment Guide](docs/DEPLOYMENT.md)
180
+
181
+ ## License
182
+
183
+ MIT
@@ -0,0 +1,5 @@
1
+ """Spooled CLI - Command-line tool for replaying traces."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "0.4.0"
@@ -0,0 +1,53 @@
1
+ """CLI command modules."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from cli.commands import (
6
+ analyze,
7
+ attest,
8
+ baseline,
9
+ ci,
10
+ demo,
11
+ diff,
12
+ doctor,
13
+ fingerprint,
14
+ fleet,
15
+ ingest,
16
+ init,
17
+ keys,
18
+ list,
19
+ new_agent,
20
+ policy,
21
+ pull,
22
+ replay,
23
+ session,
24
+ traces,
25
+ verify,
26
+ view,
27
+ watch,
28
+ )
29
+
30
+ __all__ = [
31
+ "pull",
32
+ "replay",
33
+ "diff",
34
+ "view",
35
+ "list",
36
+ "init",
37
+ "verify",
38
+ "baseline",
39
+ "fingerprint",
40
+ "ci",
41
+ "fleet",
42
+ "policy",
43
+ "keys",
44
+ "session",
45
+ "doctor",
46
+ "attest",
47
+ "traces",
48
+ "new_agent",
49
+ "demo",
50
+ "watch",
51
+ "ingest",
52
+ "analyze",
53
+ ]