toolrecall 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. toolrecall-0.3.2/LICENSE +21 -0
  2. toolrecall-0.3.2/PKG-INFO +245 -0
  3. toolrecall-0.3.2/README.md +215 -0
  4. toolrecall-0.3.2/pyproject.toml +42 -0
  5. toolrecall-0.3.2/setup.cfg +4 -0
  6. toolrecall-0.3.2/tests/test_cache_safety.py +114 -0
  7. toolrecall-0.3.2/tests/test_file_cache.py +167 -0
  8. toolrecall-0.3.2/tests/test_integration.py +294 -0
  9. toolrecall-0.3.2/tests/test_mcp_config.py +97 -0
  10. toolrecall-0.3.2/tests/test_mcp_github.py +165 -0
  11. toolrecall-0.3.2/tests/test_mcp_seqthink.py +219 -0
  12. toolrecall-0.3.2/tests/test_mcp_time.py +179 -0
  13. toolrecall-0.3.2/tests/test_memory_index.py +248 -0
  14. toolrecall-0.3.2/tests/test_security_injection.py +363 -0
  15. toolrecall-0.3.2/tests/test_security_waf.py +88 -0
  16. toolrecall-0.3.2/toolrecall/__init__.py +41 -0
  17. toolrecall-0.3.2/toolrecall/cache.py +854 -0
  18. toolrecall-0.3.2/toolrecall/cli.py +484 -0
  19. toolrecall-0.3.2/toolrecall/client.py +256 -0
  20. toolrecall-0.3.2/toolrecall/config.py +400 -0
  21. toolrecall-0.3.2/toolrecall/daemon.py +901 -0
  22. toolrecall-0.3.2/toolrecall/dataset.py +66 -0
  23. toolrecall-0.3.2/toolrecall/docs.py +417 -0
  24. toolrecall-0.3.2/toolrecall/hermes_init.py +106 -0
  25. toolrecall-0.3.2/toolrecall/mcp_bridge.py +344 -0
  26. toolrecall-0.3.2/toolrecall/mcp_github.py +173 -0
  27. toolrecall-0.3.2/toolrecall/mcp_seqthink.py +135 -0
  28. toolrecall-0.3.2/toolrecall/mcp_server.py +556 -0
  29. toolrecall-0.3.2/toolrecall/mcp_time.py +81 -0
  30. toolrecall-0.3.2/toolrecall/proxy.py +143 -0
  31. toolrecall-0.3.2/toolrecall.egg-info/PKG-INFO +245 -0
  32. toolrecall-0.3.2/toolrecall.egg-info/SOURCES.txt +34 -0
  33. toolrecall-0.3.2/toolrecall.egg-info/dependency_links.txt +1 -0
  34. toolrecall-0.3.2/toolrecall.egg-info/entry_points.txt +2 -0
  35. toolrecall-0.3.2/toolrecall.egg-info/requires.txt +10 -0
  36. toolrecall-0.3.2/toolrecall.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Robin Schultka (robka.de)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,245 @@
1
+ Metadata-Version: 2.4
2
+ Name: toolrecall
3
+ Version: 0.3.2
4
+ Summary: The Deterministic Tool Cache for LLM Agents — no LLM decides what to cache. SQLite FTS5, zero deps, MCP multiplexer, zero-trust WAF.
5
+ Author-email: Robin Schultka <robin@robka.de>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/whiskybeer/toolrecall
8
+ Project-URL: Repository, https://github.com/whiskybeer/toolrecall
9
+ Project-URL: Documentation, https://github.com/whiskybeer/toolrecall#readme
10
+ Keywords: tool-cache,llm,agent,token-savings,sqlite,fts5
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Provides-Extra: yaml
23
+ Requires-Dist: pyyaml>=6.0; extra == "yaml"
24
+ Provides-Extra: toml-write
25
+ Requires-Dist: tomli-w>=1.0; extra == "toml-write"
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=7.0; extra == "dev"
28
+ Requires-Dist: flake8>=6.0; extra == "dev"
29
+ Dynamic: license-file
30
+
31
+ # ToolRecall — The Deterministic Tool Cache for LLM Agents
32
+
33
+ **No LLM decides what to cache. No second agent. No misclassification. Only you do.**
34
+
35
+ ToolRecall is a **deterministic** middleware layer for autonomous AI agents. It sits between the agent and the OS, catching tool executions and managing MCP servers via Unix Domain Sockets.
36
+
37
+ Unlike caching frameworks that use a second LLM ("Cache Planner") to classify tools as cacheable or not — introducing hallucination risk, extra API cost, and cold-start latency — ToolRecall is purely deterministic: files invalidate on mtime, commands expire by explicit TTL, and `ttl=0` guarantees a tool **always** executes live. No guesses. No grey zones. No data loss from a bad LLM classification.
38
+
39
+ | What ToolRecall IS | What ToolRecall IS NOT |
40
+ |---|---|
41
+ | ✅ **Deterministic** — byte-exact tool output cache from SQLite, no LLM in the caching loop | ❌ Not an LLM-driven Cache Planner — no second agent deciding what to cache |
42
+ | ✅ **MCP Multiplexer** — single daemon manages all external MCP servers | ❌ Not a chronological call-graph — mtime handles staleness without state tracking |
43
+ | ✅ **Zero-Trust WAF** — path sandboxing, secret air-gapping, read-only mode | ❌ Not a vector database — no embeddings, no GPU, no semantic search |
44
+ | ✅ **FTS5 Knowledge Base** — zero-dep full-text search over docs and notes | ❌ Not a distributed cache — single-node SQLite, no Redis/Cluster |
45
+ | ✅ **Deterministic replay** — freeze OS state for 100% reproducible agent runs | ❌ Not a replacement for real-time data — use `ttl=0` for dynamic endpoints |
46
+
47
+ ---
48
+
49
+ ## Why Not an LLM-Powered Cache?
50
+
51
+ Some caching frameworks use a second LLM — a "Cache Planner" — to classify tools by cacheability: STATIC (cache forever), TRANSIENT (expire by TTL), or NONE (never cache). That sounds intelligent, but introduces failure modes ToolRecall eliminates by design — because ToolRecall is **deterministic**, not heuristic:
52
+
53
+ | Failure mode | LLM-Driven Cache | ToolRecall (Deterministic) |
54
+ |---|---|---|
55
+ | **Misclassification** | LLM guesses `send_message()` is STATIC → messages silently dropped | `ttl=0` means NEVER cache. Binary, deterministic, no AI middleman. |
56
+ | **Extra API cost** | Every new tool needs an LLM call to classify | $0 — SQLite FTS5, no embeddings, no API calls |
57
+ | **Cold-start latency** | Must analyze tool metadata before first cache decision | First call executes live, cached on return — zero overhead |
58
+ | **Side-effect blindness** | Relies on tool name/description text, not actual behavior | mtime-based auto-invalidation — file edited? next read is fresh. |
59
+ | **Reproducibility** | Non-deterministic — LLM may classify same tool differently on different runs | Always byte-identical for same args + same mtime. 100% reproducible. |
60
+
61
+ **The principle:** *Intelligent caching doesn't need an intelligence. It needs a filesystem, a clock, and the honesty to say "I don't know — execute it live."*
62
+
63
+ If you want an LLM to decide what to cache, you're adding a second agent that can hallucinate, costs money per decision, and can silently break your workflow. ToolRecall caches yes/no based on explicit TTLs and file modification times. **Deterministic by default.**
64
+
65
+ ---
66
+
67
+ ## The Core Problem: The Context Snowball
68
+
69
+ LLM context windows are stateless. Every time an agent reads a 10,000-token file, those 10,000 tokens enter the history. Over 100 turns, that's 1,000,000 billed input tokens for the same file — the O(N²) context snowball.
70
+
71
+ **ToolRecall's solution (Micro-RAG):**
72
+ 1. Agents read the file once.
73
+ 2. The agent drops the dump from its active context window.
74
+ 3. Hours later if needed again, ToolRecall serves the exact bytes from SQLite — 1.5ms, no API call.
75
+ 4. File edited? `mtime` invalidates the entry. Next read is fresh.
76
+
77
+ **The paradigm shift:** Cost and latency are eliminated from sessions. The *only* reason to end a session now is attention degradation (topic drift), not token bills or wait time.
78
+
79
+ ---
80
+
81
+ ## Universal Agent Compatibility (Drop-In MCP)
82
+
83
+ ToolRecall exposes a standard `stdio` MCP interface (`toolrecall mcp`). It works out-of-the-box with **any** agent — Claude Code, Cursor, Cline, Hermes:
84
+
85
+ ```bash
86
+ claude mcp add toolrecall toolrecall mcp
87
+ ```
88
+
89
+ No custom plugins. No SDK changes. 100% Day-1 ecosystem penetration.
90
+
91
+ ---
92
+
93
+ ## Security Architecture (The WAF)
94
+
95
+ ToolRecall doesn't cure an LLM of being prompt-injected — it cages the agent to neutralize the consequences:
96
+
97
+ - **Daemon-based IPC:** Unix Domain Sockets only. No open TCP ports (immune to SSRF).
98
+ - **Cryptographic path resolution:** `os.path.realpath` blocks `../../../etc/shadow` before the OS is touched.
99
+ - **Execution blackholes:** `allow_terminal = false` drops RCE attempts into a void.
100
+ - **Air-gapped secrets:** API keys in `~/.toolrecall/.env` — the LLM never sees them.
101
+ - **Read-only sandbox:** `read_only_sandbox = true` drops any tool containing `write`, `delete`, `push`.
102
+
103
+ ---
104
+
105
+ ## The Five Axes (Breaking the Iron Triangle)
106
+
107
+ 1. **Faster:** Tool execution drops from ~1.5s to <0.1ms on cache hits — ~85 minutes saved in a 13-hour session.
108
+ 2. **Cheaper:** Deterministic byte-exact responses qualify for 90% server-side prompt caching discount. 81% fewer input tokens.
109
+ 3. **Deterministic:** Freeze OS state. 100% reproducible agent runs. No OS flakiness, no network jitter.
110
+ 4. **Safer:** Zero-Trust WAF, path sandboxing, secret air-gapping.
111
+ 5. **Universal:** Standard `stdio` MCP — any agent, any framework.
112
+
113
+ ---
114
+
115
+ ## The Hourglass Architecture
116
+
117
+ ```
118
+ [ Claude Code ] [ Cursor IDE ] [ Hermes Agent ]
119
+ \ | /
120
+ \ | /
121
+ +───────────────────────────────────+
122
+ │ Standard stdio Protocol (Bridge) │ <- Client Layer
123
+ +─────────────────┬─────────────────+
124
+ │ Unix Domain Socket
125
+ +─────────────────▼─────────────────+
126
+ │ ToolRecall Daemon │ <- Gateway Layer
127
+ │ ┌─────────────────────────────┐ │
128
+ │ │ In-Memory LRU (L1 Cache) │ │
129
+ │ └──────────────┬──────────────┘ │
130
+ │ ┌──────────────▼──────────────┐ │
131
+ │ │ SQLite WAL (Persistent) │ │
132
+ │ └─────────────────────────────┘ │
133
+ │ ┌─────────────────────────────┐ │
134
+ │ │ MCP Server Multiplexer │ │
135
+ │ └──────────────┬──────────────┘ │
136
+ +─────────────────┼─────────────────+
137
+ │ Lazy-Loaded stdio Subprocesses
138
+ +─────────────────▼─────────────────+
139
+ │ [ Downstream MCP: GitHub / Time ] │ <- Execution Layer
140
+ +───────────────────────────────────+
141
+ ```
142
+
143
+ ---
144
+
145
+ ## Features
146
+
147
+ ### Byte-Exact Tool Caching
148
+ - **File Cache:** Invalidates on file modification (`mtime`) — no stale reads.
149
+ - **Terminal Cache:** Caches read-only commands by TTL (`git status` for 30s, `hostname` for 1h).
150
+ - **Script & Code Cache:** `cached_run`, `cached_exec` with explicit `ttl=0` bypass for state-changing operations.
151
+ - **MCP Cache:** TTL-based caching for external MCP tool responses (13.5× speedup measured).
152
+
153
+ ### MCP Multiplexer (AI Gateway)
154
+ - One daemon manages all your MCP servers (GitHub, Brave Search, time, fetch, ...).
155
+ - **Lazy loading:** Servers boot in 0.01s only when first called.
156
+ - **Idle timeout:** Killed after 15min inactivity — daemon drops from 130MB to 11MB RAM.
157
+ - Agents connect to **one** server: `toolrecall mcp`. Session startup: ~0.01s instead of ~1.7s.
158
+
159
+ ### FTS5 Knowledge Base
160
+ Zero-dependency full-text search over docs, notes, Hermes memory, Obsidian vaults. BM25 ranking, Porter stemming, source-filtered queries. No embeddings, no GPU, no API calls.
161
+
162
+ ### Data Engine (RLHF / SFT Trajectories)
163
+ ```bash
164
+ toolrecall export-dataset ~/trajectories.jsonl
165
+ ```
166
+ Exact (Action → State) pairs mined from agent sessions. Zero-cost SFT/DPO dataset generation.
167
+
168
+ ---
169
+
170
+ ## Quickstart
171
+
172
+ **Requirements:** Python 3.11+, standard SQLite.
173
+
174
+ ```bash
175
+ # 1. Install
176
+ pip install toolrecall
177
+
178
+ # 2. Init config + .env
179
+ toolrecall init
180
+
181
+ # 3. Start daemon
182
+ toolrecall daemon &
183
+ ```
184
+
185
+ ### Claude Code
186
+ ```bash
187
+ claude mcp add toolrecall toolrecall mcp
188
+ ```
189
+
190
+ ### Direct Python
191
+ ```python
192
+ from toolrecall import cached_read
193
+
194
+ result = cached_read("README.md")
195
+ print(f"Cached: {result['cached']}")
196
+ ```
197
+
198
+ ---
199
+
200
+ ## Configuration
201
+
202
+ TOML (default, zero deps via stdlib `tomllib`) or YAML (optional, requires `pyyaml`).
203
+
204
+ ```toml
205
+ [mcp]
206
+ allowed_paths = ["~/projects", "~/.hermes/skills"]
207
+ allow_terminal = false
208
+ default_ttl = 60
209
+
210
+ [mcp_multiplex]
211
+ enabled = true
212
+ idle_minutes = 15
213
+
214
+ [mcp_multiplex.servers_config]
215
+ github = { command = "npx", args = ["-y", "@modelcontextprotocol/server-github"], ttl = 60 }
216
+ ```
217
+
218
+ `TOOLRECALL_*` environment variables override TOML (for CI/CD, multi-agent setups).
219
+
220
+ ---
221
+
222
+ ## Status
223
+
224
+ **Experimental.** Used in heavy autonomous agent workflows. Before production CI/CD: ensure your allowlist is strictly scoped.
225
+
226
+ ---
227
+
228
+ ## Roadmap
229
+
230
+ - Live cache dashboard (`toolrecall dashboard`)
231
+ - Tool-calling profiler (latency breakdown per MCP call)
232
+ - Active cache invalidation on mutation tools (write_file, POST, git push)
233
+ - Container sandbox for `cached_run` (Docker backend)
234
+ - Webhook-triggered invalidation (CI/events POST to purge keys)
235
+
236
+ ---
237
+
238
+ ## Documentation
239
+
240
+ - [The Bottleneck Solved](docs/BOTTLENECK_SOLVED.md) — O(N²) context theory
241
+ - [Knowledge DB](docs/KNOWLEDGE_DB.md) — FTS5 indexing guide
242
+ - [Docker Deployment](docs/DOCKER.md) — containerized stack
243
+ - [Security Architecture](SECURITY.md) — WAF details
244
+ - [Enterprise Scale](docs/ENTERPRISE_SCALE.md) — L1 cache metaphor
245
+ - [Troubleshooting](docs/TROUBLESHOOTING.md) — common fixes
@@ -0,0 +1,215 @@
1
+ # ToolRecall — The Deterministic Tool Cache for LLM Agents
2
+
3
+ **No LLM decides what to cache. No second agent. No misclassification. Only you do.**
4
+
5
+ ToolRecall is a **deterministic** middleware layer for autonomous AI agents. It sits between the agent and the OS, catching tool executions and managing MCP servers via Unix Domain Sockets.
6
+
7
+ Unlike caching frameworks that use a second LLM ("Cache Planner") to classify tools as cacheable or not — introducing hallucination risk, extra API cost, and cold-start latency — ToolRecall is purely deterministic: files invalidate on mtime, commands expire by explicit TTL, and `ttl=0` guarantees a tool **always** executes live. No guesses. No grey zones. No data loss from a bad LLM classification.
8
+
9
+ | What ToolRecall IS | What ToolRecall IS NOT |
10
+ |---|---|
11
+ | ✅ **Deterministic** — byte-exact tool output cache from SQLite, no LLM in the caching loop | ❌ Not an LLM-driven Cache Planner — no second agent deciding what to cache |
12
+ | ✅ **MCP Multiplexer** — single daemon manages all external MCP servers | ❌ Not a chronological call-graph — mtime handles staleness without state tracking |
13
+ | ✅ **Zero-Trust WAF** — path sandboxing, secret air-gapping, read-only mode | ❌ Not a vector database — no embeddings, no GPU, no semantic search |
14
+ | ✅ **FTS5 Knowledge Base** — zero-dep full-text search over docs and notes | ❌ Not a distributed cache — single-node SQLite, no Redis/Cluster |
15
+ | ✅ **Deterministic replay** — freeze OS state for 100% reproducible agent runs | ❌ Not a replacement for real-time data — use `ttl=0` for dynamic endpoints |
16
+
17
+ ---
18
+
19
+ ## Why Not an LLM-Powered Cache?
20
+
21
+ Some caching frameworks use a second LLM — a "Cache Planner" — to classify tools by cacheability: STATIC (cache forever), TRANSIENT (expire by TTL), or NONE (never cache). That sounds intelligent, but introduces failure modes ToolRecall eliminates by design — because ToolRecall is **deterministic**, not heuristic:
22
+
23
+ | Failure mode | LLM-Driven Cache | ToolRecall (Deterministic) |
24
+ |---|---|---|
25
+ | **Misclassification** | LLM guesses `send_message()` is STATIC → messages silently dropped | `ttl=0` means NEVER cache. Binary, deterministic, no AI middleman. |
26
+ | **Extra API cost** | Every new tool needs an LLM call to classify | $0 — SQLite FTS5, no embeddings, no API calls |
27
+ | **Cold-start latency** | Must analyze tool metadata before first cache decision | First call executes live, cached on return — zero overhead |
28
+ | **Side-effect blindness** | Relies on tool name/description text, not actual behavior | mtime-based auto-invalidation — file edited? next read is fresh. |
29
+ | **Reproducibility** | Non-deterministic — LLM may classify same tool differently on different runs | Always byte-identical for same args + same mtime. 100% reproducible. |
30
+
31
+ **The principle:** *Intelligent caching doesn't need an intelligence. It needs a filesystem, a clock, and the honesty to say "I don't know — execute it live."*
32
+
33
+ If you want an LLM to decide what to cache, you're adding a second agent that can hallucinate, costs money per decision, and can silently break your workflow. ToolRecall caches yes/no based on explicit TTLs and file modification times. **Deterministic by default.**
34
+
35
+ ---
36
+
37
+ ## The Core Problem: The Context Snowball
38
+
39
+ LLM context windows are stateless. Every time an agent reads a 10,000-token file, those 10,000 tokens enter the history. Over 100 turns, that's 1,000,000 billed input tokens for the same file — the O(N²) context snowball.
40
+
41
+ **ToolRecall's solution (Micro-RAG):**
42
+ 1. Agents read the file once.
43
+ 2. The agent drops the dump from its active context window.
44
+ 3. Hours later if needed again, ToolRecall serves the exact bytes from SQLite — 1.5ms, no API call.
45
+ 4. File edited? `mtime` invalidates the entry. Next read is fresh.
46
+
47
+ **The paradigm shift:** Cost and latency are eliminated from sessions. The *only* reason to end a session now is attention degradation (topic drift), not token bills or wait time.
48
+
49
+ ---
50
+
51
+ ## Universal Agent Compatibility (Drop-In MCP)
52
+
53
+ ToolRecall exposes a standard `stdio` MCP interface (`toolrecall mcp`). It works out-of-the-box with **any** agent — Claude Code, Cursor, Cline, Hermes:
54
+
55
+ ```bash
56
+ claude mcp add toolrecall toolrecall mcp
57
+ ```
58
+
59
+ No custom plugins. No SDK changes. 100% Day-1 ecosystem penetration.
60
+
61
+ ---
62
+
63
+ ## Security Architecture (The WAF)
64
+
65
+ ToolRecall doesn't cure an LLM of being prompt-injected — it cages the agent to neutralize the consequences:
66
+
67
+ - **Daemon-based IPC:** Unix Domain Sockets only. No open TCP ports (immune to SSRF).
68
+ - **Cryptographic path resolution:** `os.path.realpath` blocks `../../../etc/shadow` before the OS is touched.
69
+ - **Execution blackholes:** `allow_terminal = false` drops RCE attempts into a void.
70
+ - **Air-gapped secrets:** API keys in `~/.toolrecall/.env` — the LLM never sees them.
71
+ - **Read-only sandbox:** `read_only_sandbox = true` drops any tool containing `write`, `delete`, `push`.
72
+
73
+ ---
74
+
75
+ ## The Five Axes (Breaking the Iron Triangle)
76
+
77
+ 1. **Faster:** Tool execution drops from ~1.5s to <0.1ms on cache hits — ~85 minutes saved in a 13-hour session.
78
+ 2. **Cheaper:** Deterministic byte-exact responses qualify for 90% server-side prompt caching discount. 81% fewer input tokens.
79
+ 3. **Deterministic:** Freeze OS state. 100% reproducible agent runs. No OS flakiness, no network jitter.
80
+ 4. **Safer:** Zero-Trust WAF, path sandboxing, secret air-gapping.
81
+ 5. **Universal:** Standard `stdio` MCP — any agent, any framework.
82
+
83
+ ---
84
+
85
+ ## The Hourglass Architecture
86
+
87
+ ```
88
+ [ Claude Code ] [ Cursor IDE ] [ Hermes Agent ]
89
+ \ | /
90
+ \ | /
91
+ +───────────────────────────────────+
92
+ │ Standard stdio Protocol (Bridge) │ <- Client Layer
93
+ +─────────────────┬─────────────────+
94
+ │ Unix Domain Socket
95
+ +─────────────────▼─────────────────+
96
+ │ ToolRecall Daemon │ <- Gateway Layer
97
+ │ ┌─────────────────────────────┐ │
98
+ │ │ In-Memory LRU (L1 Cache) │ │
99
+ │ └──────────────┬──────────────┘ │
100
+ │ ┌──────────────▼──────────────┐ │
101
+ │ │ SQLite WAL (Persistent) │ │
102
+ │ └─────────────────────────────┘ │
103
+ │ ┌─────────────────────────────┐ │
104
+ │ │ MCP Server Multiplexer │ │
105
+ │ └──────────────┬──────────────┘ │
106
+ +─────────────────┼─────────────────+
107
+ │ Lazy-Loaded stdio Subprocesses
108
+ +─────────────────▼─────────────────+
109
+ │ [ Downstream MCP: GitHub / Time ] │ <- Execution Layer
110
+ +───────────────────────────────────+
111
+ ```
112
+
113
+ ---
114
+
115
+ ## Features
116
+
117
+ ### Byte-Exact Tool Caching
118
+ - **File Cache:** Invalidates on file modification (`mtime`) — no stale reads.
119
+ - **Terminal Cache:** Caches read-only commands by TTL (`git status` for 30s, `hostname` for 1h).
120
+ - **Script & Code Cache:** `cached_run`, `cached_exec` with explicit `ttl=0` bypass for state-changing operations.
121
+ - **MCP Cache:** TTL-based caching for external MCP tool responses (13.5× speedup measured).
122
+
123
+ ### MCP Multiplexer (AI Gateway)
124
+ - One daemon manages all your MCP servers (GitHub, Brave Search, time, fetch, ...).
125
+ - **Lazy loading:** Servers boot in 0.01s only when first called.
126
+ - **Idle timeout:** Killed after 15min inactivity — daemon drops from 130MB to 11MB RAM.
127
+ - Agents connect to **one** server: `toolrecall mcp`. Session startup: ~0.01s instead of ~1.7s.
128
+
129
+ ### FTS5 Knowledge Base
130
+ Zero-dependency full-text search over docs, notes, Hermes memory, Obsidian vaults. BM25 ranking, Porter stemming, source-filtered queries. No embeddings, no GPU, no API calls.
131
+
132
+ ### Data Engine (RLHF / SFT Trajectories)
133
+ ```bash
134
+ toolrecall export-dataset ~/trajectories.jsonl
135
+ ```
136
+ Exact (Action → State) pairs mined from agent sessions. Zero-cost SFT/DPO dataset generation.
137
+
138
+ ---
139
+
140
+ ## Quickstart
141
+
142
+ **Requirements:** Python 3.11+, standard SQLite.
143
+
144
+ ```bash
145
+ # 1. Install
146
+ pip install toolrecall
147
+
148
+ # 2. Init config + .env
149
+ toolrecall init
150
+
151
+ # 3. Start daemon
152
+ toolrecall daemon &
153
+ ```
154
+
155
+ ### Claude Code
156
+ ```bash
157
+ claude mcp add toolrecall toolrecall mcp
158
+ ```
159
+
160
+ ### Direct Python
161
+ ```python
162
+ from toolrecall import cached_read
163
+
164
+ result = cached_read("README.md")
165
+ print(f"Cached: {result['cached']}")
166
+ ```
167
+
168
+ ---
169
+
170
+ ## Configuration
171
+
172
+ TOML (default, zero deps via stdlib `tomllib`) or YAML (optional, requires `pyyaml`).
173
+
174
+ ```toml
175
+ [mcp]
176
+ allowed_paths = ["~/projects", "~/.hermes/skills"]
177
+ allow_terminal = false
178
+ default_ttl = 60
179
+
180
+ [mcp_multiplex]
181
+ enabled = true
182
+ idle_minutes = 15
183
+
184
+ [mcp_multiplex.servers_config]
185
+ github = { command = "npx", args = ["-y", "@modelcontextprotocol/server-github"], ttl = 60 }
186
+ ```
187
+
188
+ `TOOLRECALL_*` environment variables override TOML (for CI/CD, multi-agent setups).
189
+
190
+ ---
191
+
192
+ ## Status
193
+
194
+ **Experimental.** Used in heavy autonomous agent workflows. Before production CI/CD: ensure your allowlist is strictly scoped.
195
+
196
+ ---
197
+
198
+ ## Roadmap
199
+
200
+ - Live cache dashboard (`toolrecall dashboard`)
201
+ - Tool-calling profiler (latency breakdown per MCP call)
202
+ - Active cache invalidation on mutation tools (write_file, POST, git push)
203
+ - Container sandbox for `cached_run` (Docker backend)
204
+ - Webhook-triggered invalidation (CI/events POST to purge keys)
205
+
206
+ ---
207
+
208
+ ## Documentation
209
+
210
+ - [The Bottleneck Solved](docs/BOTTLENECK_SOLVED.md) — O(N²) context theory
211
+ - [Knowledge DB](docs/KNOWLEDGE_DB.md) — FTS5 indexing guide
212
+ - [Docker Deployment](docs/DOCKER.md) — containerized stack
213
+ - [Security Architecture](SECURITY.md) — WAF details
214
+ - [Enterprise Scale](docs/ENTERPRISE_SCALE.md) — L1 cache metaphor
215
+ - [Troubleshooting](docs/TROUBLESHOOTING.md) — common fixes
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "packaging", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "toolrecall"
7
+ version = "0.3.2"
8
+ description = "The Deterministic Tool Cache for LLM Agents — no LLM decides what to cache. SQLite FTS5, zero deps, MCP multiplexer, zero-trust WAF."
9
+ readme = "README.md"
10
+ authors = [
11
+ {name = "Robin Schultka", email = "robin@robka.de"},
12
+ ]
13
+ license = {text = "MIT"}
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Topic :: Software Development :: Libraries :: Python Modules",
22
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
23
+ ]
24
+ keywords = ["tool-cache", "llm", "agent", "token-savings", "sqlite", "fts5"]
25
+ requires-python = ">=3.11"
26
+ dependencies = []
27
+
28
+ [project.optional-dependencies]
29
+ yaml = ["pyyaml >= 6.0"]
30
+ toml-write = ["tomli-w >= 1.0"]
31
+ dev = ["pytest >= 7.0", "flake8 >= 6.0"]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/whiskybeer/toolrecall"
35
+ Repository = "https://github.com/whiskybeer/toolrecall"
36
+ Documentation = "https://github.com/whiskybeer/toolrecall#readme"
37
+
38
+ [project.scripts]
39
+ toolrecall = "toolrecall.cli:main"
40
+
41
+ [tool.setuptools.packages.find]
42
+ include = ["toolrecall*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,114 @@
1
+ import os
2
+ import sys
3
+ import unittest
4
+ import tempfile
5
+ import time
6
+ import shutil
7
+
8
+ # Force a clean, isolated test database path before loading toolrecall
9
+ test_db_dir = tempfile.mkdtemp()
10
+ test_db_path = os.path.join(test_db_dir, "test_cache.db")
11
+ os.environ["TOOLRECALL_CACHE_DB"] = test_db_path
12
+
13
+ # Add current path to sys.path
14
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
15
+
16
+ from toolrecall.cache import cached_exec, cached_run, cached_terminal, DEFAULT_CACHEABLE
17
+
18
+
19
+ class TestCacheSafety(unittest.TestCase):
20
+ def setUp(self):
21
+ # Ensure database is clean for each test
22
+ if os.path.exists(test_db_path):
23
+ os.remove(test_db_path)
24
+ from toolrecall.cache import _init
25
+ _init()
26
+
27
+ def tearDown(self):
28
+ if os.path.exists(test_db_path):
29
+ os.remove(test_db_path)
30
+
31
+ @classmethod
32
+ def tearDownClass(cls):
33
+ shutil.rmtree(test_db_dir, ignore_errors=True)
34
+
35
+ def test_cached_exec_default_ttl_zero(self):
36
+ """Verify cached_exec default TTL is 0 (or disabled), meaning it executes fresh every time."""
37
+ code = "import time; print(time.time())"
38
+
39
+ # Run first time
40
+ res1 = cached_exec(code)
41
+ # Run second time
42
+ res2 = cached_exec(code)
43
+
44
+ self.assertFalse(res1.get("cached"), "First run should not be cached")
45
+ self.assertFalse(res2.get("cached"), "Second run with default TTL=0 should not be cached")
46
+ self.assertNotEqual(res1.get("output"), res2.get("output"), "Output of dynamic code should be different without cache")
47
+
48
+ def test_cached_exec_explicit_ttl(self):
49
+ """Verify cached_exec with explicit TTL does cache."""
50
+ code = "print('hello_exec')"
51
+
52
+ res1 = cached_exec(code, ttl=10)
53
+ res2 = cached_exec(code, ttl=10)
54
+
55
+ self.assertFalse(res1.get("cached"), "First run should not be cached")
56
+ self.assertTrue(res2.get("cached"), "Second run with explicit TTL should be cached")
57
+ self.assertEqual(res1.get("output"), res2.get("output"), "Cached output should match")
58
+
59
+ def test_cached_run_default_ttl_zero(self):
60
+ """Verify cached_run default TTL is 0, executing fresh every time."""
61
+ # Create a temporary script to execute
62
+ with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f:
63
+ f.write("#!/usr/bin/env python3\nimport time; print(time.time())\n")
64
+ script_path = f.name
65
+ os.chmod(script_path, 0o755)
66
+
67
+ try:
68
+ # Run first time
69
+ res1 = cached_run(script_path)
70
+ time.sleep(0.01) # Ensure time would advance if it runs fresh
71
+ # Run second time
72
+ res2 = cached_run(script_path)
73
+
74
+ self.assertFalse(res1.get("cached"), "First run should not be cached")
75
+ self.assertFalse(res2.get("cached"), "Second run with default TTL=0 should not be cached")
76
+ self.assertNotEqual(res1.get("output"), res2.get("output"), "Output of dynamic script should be different without cache")
77
+ finally:
78
+ os.remove(script_path)
79
+
80
+ def test_cached_run_explicit_ttl(self):
81
+ """Verify cached_run with explicit TTL does cache."""
82
+ with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f:
83
+ f.write("#!/usr/bin/env python3\nprint('hello_run')\n")
84
+ script_path = f.name
85
+ os.chmod(script_path, 0o755)
86
+
87
+ try:
88
+ res1 = cached_run(script_path, ttl=10)
89
+ res2 = cached_run(script_path, ttl=10)
90
+
91
+ self.assertFalse(res1.get("cached"), "First run should not be cached")
92
+ self.assertTrue(res2.get("cached"), "Second run with explicit TTL should be cached")
93
+ self.assertEqual(res1.get("output"), res2.get("output"), "Cached output should match")
94
+ finally:
95
+ os.remove(script_path)
96
+
97
+ def test_dynamic_commands_not_in_default_cacheable(self):
98
+ """Verify dynamic state-sensing commands are NOT in DEFAULT_CACHEABLE."""
99
+ unsafe_cmds = ["git status", "ls -la", "git branch", "git diff --stat", "ls", "git diff"]
100
+ for cmd in unsafe_cmds:
101
+ self.assertNotIn(cmd, DEFAULT_CACHEABLE, f"Unsafe command '{cmd}' must not be in DEFAULT_CACHEABLE")
102
+
103
+ def test_cached_terminal_does_not_cache_dynamic_commands(self):
104
+ """Verify cached_terminal does not cache dynamic commands like git status by default."""
105
+ # Even if we don't have a git repo, cached_terminal should bypass cache for 'git status'
106
+ res1 = cached_terminal("git status")
107
+ res2 = cached_terminal("git status")
108
+
109
+ self.assertFalse(res1.get("cached"), "Dynamic command first run should not be cached")
110
+ self.assertFalse(res2.get("cached"), "Dynamic command second run should not be cached")
111
+
112
+
113
+ if __name__ == "__main__":
114
+ unittest.main()