toolrecall 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolrecall-0.3.2/LICENSE +21 -0
- toolrecall-0.3.2/PKG-INFO +245 -0
- toolrecall-0.3.2/README.md +215 -0
- toolrecall-0.3.2/pyproject.toml +42 -0
- toolrecall-0.3.2/setup.cfg +4 -0
- toolrecall-0.3.2/tests/test_cache_safety.py +114 -0
- toolrecall-0.3.2/tests/test_file_cache.py +167 -0
- toolrecall-0.3.2/tests/test_integration.py +294 -0
- toolrecall-0.3.2/tests/test_mcp_config.py +97 -0
- toolrecall-0.3.2/tests/test_mcp_github.py +165 -0
- toolrecall-0.3.2/tests/test_mcp_seqthink.py +219 -0
- toolrecall-0.3.2/tests/test_mcp_time.py +179 -0
- toolrecall-0.3.2/tests/test_memory_index.py +248 -0
- toolrecall-0.3.2/tests/test_security_injection.py +363 -0
- toolrecall-0.3.2/tests/test_security_waf.py +88 -0
- toolrecall-0.3.2/toolrecall/__init__.py +41 -0
- toolrecall-0.3.2/toolrecall/cache.py +854 -0
- toolrecall-0.3.2/toolrecall/cli.py +484 -0
- toolrecall-0.3.2/toolrecall/client.py +256 -0
- toolrecall-0.3.2/toolrecall/config.py +400 -0
- toolrecall-0.3.2/toolrecall/daemon.py +901 -0
- toolrecall-0.3.2/toolrecall/dataset.py +66 -0
- toolrecall-0.3.2/toolrecall/docs.py +417 -0
- toolrecall-0.3.2/toolrecall/hermes_init.py +106 -0
- toolrecall-0.3.2/toolrecall/mcp_bridge.py +344 -0
- toolrecall-0.3.2/toolrecall/mcp_github.py +173 -0
- toolrecall-0.3.2/toolrecall/mcp_seqthink.py +135 -0
- toolrecall-0.3.2/toolrecall/mcp_server.py +556 -0
- toolrecall-0.3.2/toolrecall/mcp_time.py +81 -0
- toolrecall-0.3.2/toolrecall/proxy.py +143 -0
- toolrecall-0.3.2/toolrecall.egg-info/PKG-INFO +245 -0
- toolrecall-0.3.2/toolrecall.egg-info/SOURCES.txt +34 -0
- toolrecall-0.3.2/toolrecall.egg-info/dependency_links.txt +1 -0
- toolrecall-0.3.2/toolrecall.egg-info/entry_points.txt +2 -0
- toolrecall-0.3.2/toolrecall.egg-info/requires.txt +10 -0
- toolrecall-0.3.2/toolrecall.egg-info/top_level.txt +1 -0
toolrecall-0.3.2/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Robin Schultka (robka.de)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: toolrecall
|
|
3
|
+
Version: 0.3.2
|
|
4
|
+
Summary: The Deterministic Tool Cache for LLM Agents — no LLM decides what to cache. SQLite FTS5, zero deps, MCP multiplexer, zero-trust WAF.
|
|
5
|
+
Author-email: Robin Schultka <robin@robka.de>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/whiskybeer/toolrecall
|
|
8
|
+
Project-URL: Repository, https://github.com/whiskybeer/toolrecall
|
|
9
|
+
Project-URL: Documentation, https://github.com/whiskybeer/toolrecall#readme
|
|
10
|
+
Keywords: tool-cache,llm,agent,token-savings,sqlite,fts5
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Provides-Extra: yaml
|
|
23
|
+
Requires-Dist: pyyaml>=6.0; extra == "yaml"
|
|
24
|
+
Provides-Extra: toml-write
|
|
25
|
+
Requires-Dist: tomli-w>=1.0; extra == "toml-write"
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
28
|
+
Requires-Dist: flake8>=6.0; extra == "dev"
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
# ToolRecall — The Deterministic Tool Cache for LLM Agents
|
|
32
|
+
|
|
33
|
+
**No LLM decides what to cache. No second agent. No misclassification. Only you do.**
|
|
34
|
+
|
|
35
|
+
ToolRecall is a **deterministic** middleware layer for autonomous AI agents. It sits between the agent and the OS, catching tool executions and managing MCP servers via Unix Domain Sockets.
|
|
36
|
+
|
|
37
|
+
Unlike caching frameworks that use a second LLM ("Cache Planner") to classify tools as cacheable or not — introducing hallucination risk, extra API cost, and cold-start latency — ToolRecall is purely deterministic: files invalidate on mtime, commands expire by explicit TTL, and `ttl=0` guarantees a tool **always** executes live. No guesses. No grey zones. No data loss from a bad LLM classification.
|
|
38
|
+
|
|
39
|
+
| What ToolRecall IS | What ToolRecall IS NOT |
|
|
40
|
+
|---|---|
|
|
41
|
+
| ✅ **Deterministic** — byte-exact tool output cache from SQLite, no LLM in the caching loop | ❌ Not an LLM-driven Cache Planner — no second agent deciding what to cache |
|
|
42
|
+
| ✅ **MCP Multiplexer** — single daemon manages all external MCP servers | ❌ Not a chronological call-graph — mtime handles staleness without state tracking |
|
|
43
|
+
| ✅ **Zero-Trust WAF** — path sandboxing, secret air-gapping, read-only mode | ❌ Not a vector database — no embeddings, no GPU, no semantic search |
|
|
44
|
+
| ✅ **FTS5 Knowledge Base** — zero-dep full-text search over docs and notes | ❌ Not a distributed cache — single-node SQLite, no Redis/Cluster |
|
|
45
|
+
| ✅ **Deterministic replay** — freeze OS state for 100% reproducible agent runs | ❌ Not a replacement for real-time data — use `ttl=0` for dynamic endpoints |
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Why Not an LLM-Powered Cache?
|
|
50
|
+
|
|
51
|
+
Some caching frameworks use a second LLM — a "Cache Planner" — to classify tools by cacheability: STATIC (cache forever), TRANSIENT (expire by TTL), or NONE (never cache). That sounds intelligent, but introduces failure modes ToolRecall eliminates by design — because ToolRecall is **deterministic**, not heuristic:
|
|
52
|
+
|
|
53
|
+
| Failure mode | LLM-Driven Cache | ToolRecall (Deterministic) |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| **Misclassification** | LLM guesses `send_message()` is STATIC → messages silently dropped | `ttl=0` means NEVER cache. Binary, deterministic, no AI middleman. |
|
|
56
|
+
| **Extra API cost** | Every new tool needs an LLM call to classify | $0 — SQLite FTS5, no embeddings, no API calls |
|
|
57
|
+
| **Cold-start latency** | Must analyze tool metadata before first cache decision | First call executes live, cached on return — zero overhead |
|
|
58
|
+
| **Side-effect blindness** | Relies on tool name/description text, not actual behavior | mtime-based auto-invalidation — file edited? next read is fresh. |
|
|
59
|
+
| **Reproducibility** | Non-deterministic — LLM may classify same tool differently on different runs | Always byte-identical for same args + same mtime. 100% reproducible. |
|
|
60
|
+
|
|
61
|
+
**The principle:** *Intelligent caching doesn't need an intelligence. It needs a filesystem, a clock, and the honesty to say "I don't know — execute it live."*
|
|
62
|
+
|
|
63
|
+
If you want an LLM to decide what to cache, you're adding a second agent that can hallucinate, costs money per decision, and can silently break your workflow. ToolRecall caches yes/no based on explicit TTLs and file modification times. **Deterministic by default.**
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## The Core Problem: The Context Snowball
|
|
68
|
+
|
|
69
|
+
LLM context windows are stateless. Every time an agent reads a 10,000-token file, those 10,000 tokens enter the history. Over 100 turns, that's 1,000,000 billed input tokens for the same file — the O(N²) context snowball.
|
|
70
|
+
|
|
71
|
+
**ToolRecall's solution (Micro-RAG):**
|
|
72
|
+
1. Agents read the file once.
|
|
73
|
+
2. The agent drops the dump from its active context window.
|
|
74
|
+
3. Hours later if needed again, ToolRecall serves the exact bytes from SQLite — 1.5ms, no API call.
|
|
75
|
+
4. File edited? `mtime` invalidates the entry. Next read is fresh.
|
|
76
|
+
|
|
77
|
+
**The paradigm shift:** Cost and latency are eliminated from sessions. The *only* reason to end a session now is attention degradation (topic drift), not token bills or wait time.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Universal Agent Compatibility (Drop-In MCP)
|
|
82
|
+
|
|
83
|
+
ToolRecall exposes a standard `stdio` MCP interface (`toolrecall mcp`). It works out-of-the-box with **any** agent — Claude Code, Cursor, Cline, Hermes:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
claude mcp add toolrecall toolrecall mcp
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
No custom plugins. No SDK changes. 100% Day-1 ecosystem penetration.
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Security Architecture (The WAF)
|
|
94
|
+
|
|
95
|
+
ToolRecall doesn't cure an LLM of being prompt-injected — it cages the agent to neutralize the consequences:
|
|
96
|
+
|
|
97
|
+
- **Daemon-based IPC:** Unix Domain Sockets only. No open TCP ports (immune to SSRF).
|
|
98
|
+
- **Cryptographic path resolution:** `os.path.realpath` blocks `../../../etc/shadow` before the OS is touched.
|
|
99
|
+
- **Execution blackholes:** `allow_terminal = false` drops RCE attempts into a void.
|
|
100
|
+
- **Air-gapped secrets:** API keys in `~/.toolrecall/.env` — the LLM never sees them.
|
|
101
|
+
- **Read-only sandbox:** `read_only_sandbox = true` drops any tool containing `write`, `delete`, `push`.
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## The Five Axes (Breaking the Iron Triangle)
|
|
106
|
+
|
|
107
|
+
1. **Faster:** Tool execution drops from ~1.5s to <0.1ms on cache hits — ~85 minutes saved in a 13-hour session.
|
|
108
|
+
2. **Cheaper:** Deterministic byte-exact responses qualify for 90% server-side prompt caching discount. 81% fewer input tokens.
|
|
109
|
+
3. **Deterministic:** Freeze OS state. 100% reproducible agent runs. No OS flakiness, no network jitter.
|
|
110
|
+
4. **Safer:** Zero-Trust WAF, path sandboxing, secret air-gapping.
|
|
111
|
+
5. **Universal:** Standard `stdio` MCP — any agent, any framework.
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## The Hourglass Architecture
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
[ Claude Code ] [ Cursor IDE ] [ Hermes Agent ]
|
|
119
|
+
\ | /
|
|
120
|
+
\ | /
|
|
121
|
+
+───────────────────────────────────+
|
|
122
|
+
│ Standard stdio Protocol (Bridge) │ <- Client Layer
|
|
123
|
+
+─────────────────┬─────────────────+
|
|
124
|
+
│ Unix Domain Socket
|
|
125
|
+
+─────────────────▼─────────────────+
|
|
126
|
+
│ ToolRecall Daemon │ <- Gateway Layer
|
|
127
|
+
│ ┌─────────────────────────────┐ │
|
|
128
|
+
│ │ In-Memory LRU (L1 Cache) │ │
|
|
129
|
+
│ └──────────────┬──────────────┘ │
|
|
130
|
+
│ ┌──────────────▼──────────────┐ │
|
|
131
|
+
│ │ SQLite WAL (Persistent) │ │
|
|
132
|
+
│ └─────────────────────────────┘ │
|
|
133
|
+
│ ┌─────────────────────────────┐ │
|
|
134
|
+
│ │ MCP Server Multiplexer │ │
|
|
135
|
+
│ └──────────────┬──────────────┘ │
|
|
136
|
+
+─────────────────┼─────────────────+
|
|
137
|
+
│ Lazy-Loaded stdio Subprocesses
|
|
138
|
+
+─────────────────▼─────────────────+
|
|
139
|
+
│ [ Downstream MCP: GitHub / Time ] │ <- Execution Layer
|
|
140
|
+
+───────────────────────────────────+
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Features
|
|
146
|
+
|
|
147
|
+
### Byte-Exact Tool Caching
|
|
148
|
+
- **File Cache:** Invalidates on file modification (`mtime`) — no stale reads.
|
|
149
|
+
- **Terminal Cache:** Caches read-only commands by TTL (`git status` for 30s, `hostname` for 1h).
|
|
150
|
+
- **Script & Code Cache:** `cached_run`, `cached_exec` with explicit `ttl=0` bypass for state-changing operations.
|
|
151
|
+
- **MCP Cache:** TTL-based caching for external MCP tool responses (13.5× speedup measured).
|
|
152
|
+
|
|
153
|
+
### MCP Multiplexer (AI Gateway)
|
|
154
|
+
- One daemon manages all your MCP servers (GitHub, Brave Search, time, fetch, ...).
|
|
155
|
+
- **Lazy loading:** Servers boot in 0.01s only when first called.
|
|
156
|
+
- **Idle timeout:** Killed after 15min inactivity — daemon drops from 130MB to 11MB RAM.
|
|
157
|
+
- Agents connect to **one** server: `toolrecall mcp`. Session startup: ~0.01s instead of ~1.7s.
|
|
158
|
+
|
|
159
|
+
### FTS5 Knowledge Base
|
|
160
|
+
Zero-dependency full-text search over docs, notes, Hermes memory, Obsidian vaults. BM25 ranking, Porter stemming, source-filtered queries. No embeddings, no GPU, no API calls.
|
|
161
|
+
|
|
162
|
+
### Data Engine (RLHF / SFT Trajectories)
|
|
163
|
+
```bash
|
|
164
|
+
toolrecall export-dataset ~/trajectories.jsonl
|
|
165
|
+
```
|
|
166
|
+
Exact (Action → State) pairs mined from agent sessions. Zero-cost SFT/DPO dataset generation.
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Quickstart
|
|
171
|
+
|
|
172
|
+
**Requirements:** Python 3.11+, standard SQLite.
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
# 1. Install
|
|
176
|
+
pip install toolrecall
|
|
177
|
+
|
|
178
|
+
# 2. Init config + .env
|
|
179
|
+
toolrecall init
|
|
180
|
+
|
|
181
|
+
# 3. Start daemon
|
|
182
|
+
toolrecall daemon &
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Claude Code
|
|
186
|
+
```bash
|
|
187
|
+
claude mcp add toolrecall toolrecall mcp
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Direct Python
|
|
191
|
+
```python
|
|
192
|
+
from toolrecall import cached_read
|
|
193
|
+
|
|
194
|
+
result = cached_read("README.md")
|
|
195
|
+
print(f"Cached: {result['cached']}")
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## Configuration
|
|
201
|
+
|
|
202
|
+
TOML (default, zero deps via stdlib `tomllib`) or YAML (optional, requires `pyyaml`).
|
|
203
|
+
|
|
204
|
+
```toml
|
|
205
|
+
[mcp]
|
|
206
|
+
allowed_paths = ["~/projects", "~/.hermes/skills"]
|
|
207
|
+
allow_terminal = false
|
|
208
|
+
default_ttl = 60
|
|
209
|
+
|
|
210
|
+
[mcp_multiplex]
|
|
211
|
+
enabled = true
|
|
212
|
+
idle_minutes = 15
|
|
213
|
+
|
|
214
|
+
[mcp_multiplex.servers_config]
|
|
215
|
+
github = { command = "npx", args = ["-y", "@modelcontextprotocol/server-github"], ttl = 60 }
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
`TOOLRECALL_*` environment variables override TOML (for CI/CD, multi-agent setups).
|
|
219
|
+
|
|
220
|
+
---
|
|
221
|
+
|
|
222
|
+
## Status
|
|
223
|
+
|
|
224
|
+
**Experimental.** Used in heavy autonomous agent workflows. Before production CI/CD: ensure your allowlist is strictly scoped.
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## Roadmap
|
|
229
|
+
|
|
230
|
+
- Live cache dashboard (`toolrecall dashboard`)
|
|
231
|
+
- Tool-calling profiler (latency breakdown per MCP call)
|
|
232
|
+
- Active cache invalidation on mutation tools (write_file, POST, git push)
|
|
233
|
+
- Container sandbox for `cached_run` (Docker backend)
|
|
234
|
+
- Webhook-triggered invalidation (CI/events POST to purge keys)
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## Documentation
|
|
239
|
+
|
|
240
|
+
- [The Bottleneck Solved](docs/BOTTLENECK_SOLVED.md) — O(N²) context theory
|
|
241
|
+
- [Knowledge DB](docs/KNOWLEDGE_DB.md) — FTS5 indexing guide
|
|
242
|
+
- [Docker Deployment](docs/DOCKER.md) — containerized stack
|
|
243
|
+
- [Security Architecture](SECURITY.md) — WAF details
|
|
244
|
+
- [Enterprise Scale](docs/ENTERPRISE_SCALE.md) — L1 cache metaphor
|
|
245
|
+
- [Troubleshooting](docs/TROUBLESHOOTING.md) — common fixes
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# ToolRecall — The Deterministic Tool Cache for LLM Agents
|
|
2
|
+
|
|
3
|
+
**No LLM decides what to cache. No second agent. No misclassification. Only you do.**
|
|
4
|
+
|
|
5
|
+
ToolRecall is a **deterministic** middleware layer for autonomous AI agents. It sits between the agent and the OS, catching tool executions and managing MCP servers via Unix Domain Sockets.
|
|
6
|
+
|
|
7
|
+
Unlike caching frameworks that use a second LLM ("Cache Planner") to classify tools as cacheable or not — introducing hallucination risk, extra API cost, and cold-start latency — ToolRecall is purely deterministic: files invalidate on mtime, commands expire by explicit TTL, and `ttl=0` guarantees a tool **always** executes live. No guesses. No grey zones. No data loss from a bad LLM classification.
|
|
8
|
+
|
|
9
|
+
| What ToolRecall IS | What ToolRecall IS NOT |
|
|
10
|
+
|---|---|
|
|
11
|
+
| ✅ **Deterministic** — byte-exact tool output cache from SQLite, no LLM in the caching loop | ❌ Not an LLM-driven Cache Planner — no second agent deciding what to cache |
|
|
12
|
+
| ✅ **MCP Multiplexer** — single daemon manages all external MCP servers | ❌ Not a chronological call-graph — mtime handles staleness without state tracking |
|
|
13
|
+
| ✅ **Zero-Trust WAF** — path sandboxing, secret air-gapping, read-only mode | ❌ Not a vector database — no embeddings, no GPU, no semantic search |
|
|
14
|
+
| ✅ **FTS5 Knowledge Base** — zero-dep full-text search over docs and notes | ❌ Not a distributed cache — single-node SQLite, no Redis/Cluster |
|
|
15
|
+
| ✅ **Deterministic replay** — freeze OS state for 100% reproducible agent runs | ❌ Not a replacement for real-time data — use `ttl=0` for dynamic endpoints |
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Why Not an LLM-Powered Cache?
|
|
20
|
+
|
|
21
|
+
Some caching frameworks use a second LLM — a "Cache Planner" — to classify tools by cacheability: STATIC (cache forever), TRANSIENT (expire by TTL), or NONE (never cache). That sounds intelligent, but introduces failure modes ToolRecall eliminates by design — because ToolRecall is **deterministic**, not heuristic:
|
|
22
|
+
|
|
23
|
+
| Failure mode | LLM-Driven Cache | ToolRecall (Deterministic) |
|
|
24
|
+
|---|---|---|
|
|
25
|
+
| **Misclassification** | LLM guesses `send_message()` is STATIC → messages silently dropped | `ttl=0` means NEVER cache. Binary, deterministic, no AI middleman. |
|
|
26
|
+
| **Extra API cost** | Every new tool needs an LLM call to classify | $0 — SQLite FTS5, no embeddings, no API calls |
|
|
27
|
+
| **Cold-start latency** | Must analyze tool metadata before first cache decision | First call executes live, cached on return — zero overhead |
|
|
28
|
+
| **Side-effect blindness** | Relies on tool name/description text, not actual behavior | mtime-based auto-invalidation — file edited? next read is fresh. |
|
|
29
|
+
| **Reproducibility** | Non-deterministic — LLM may classify same tool differently on different runs | Always byte-identical for same args + same mtime. 100% reproducible. |
|
|
30
|
+
|
|
31
|
+
**The principle:** *Intelligent caching doesn't need an intelligence. It needs a filesystem, a clock, and the honesty to say "I don't know — execute it live."*
|
|
32
|
+
|
|
33
|
+
If you want an LLM to decide what to cache, you're adding a second agent that can hallucinate, costs money per decision, and can silently break your workflow. ToolRecall caches yes/no based on explicit TTLs and file modification times. **Deterministic by default.**
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## The Core Problem: The Context Snowball
|
|
38
|
+
|
|
39
|
+
LLM context windows are stateless. Every time an agent reads a 10,000-token file, those 10,000 tokens enter the history. Over 100 turns, that's 1,000,000 billed input tokens for the same file — the O(N²) context snowball.
|
|
40
|
+
|
|
41
|
+
**ToolRecall's solution (Micro-RAG):**
|
|
42
|
+
1. Agents read the file once.
|
|
43
|
+
2. The agent drops the dump from its active context window.
|
|
44
|
+
3. Hours later if needed again, ToolRecall serves the exact bytes from SQLite — 1.5ms, no API call.
|
|
45
|
+
4. File edited? `mtime` invalidates the entry. Next read is fresh.
|
|
46
|
+
|
|
47
|
+
**The paradigm shift:** Cost and latency are eliminated from sessions. The *only* reason to end a session now is attention degradation (topic drift), not token bills or wait time.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Universal Agent Compatibility (Drop-In MCP)
|
|
52
|
+
|
|
53
|
+
ToolRecall exposes a standard `stdio` MCP interface (`toolrecall mcp`). It works out-of-the-box with **any** agent — Claude Code, Cursor, Cline, Hermes:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
claude mcp add toolrecall toolrecall mcp
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
No custom plugins. No SDK changes. 100% Day-1 ecosystem penetration.
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Security Architecture (The WAF)
|
|
64
|
+
|
|
65
|
+
ToolRecall doesn't cure an LLM of being prompt-injected — it cages the agent to neutralize the consequences:
|
|
66
|
+
|
|
67
|
+
- **Daemon-based IPC:** Unix Domain Sockets only. No open TCP ports (immune to SSRF).
|
|
68
|
+
- **Cryptographic path resolution:** `os.path.realpath` blocks `../../../etc/shadow` before the OS is touched.
|
|
69
|
+
- **Execution blackholes:** `allow_terminal = false` drops RCE attempts into a void.
|
|
70
|
+
- **Air-gapped secrets:** API keys in `~/.toolrecall/.env` — the LLM never sees them.
|
|
71
|
+
- **Read-only sandbox:** `read_only_sandbox = true` drops any tool containing `write`, `delete`, `push`.
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## The Five Axes (Breaking the Iron Triangle)
|
|
76
|
+
|
|
77
|
+
1. **Faster:** Tool execution drops from ~1.5s to <0.1ms on cache hits — ~85 minutes saved in a 13-hour session.
|
|
78
|
+
2. **Cheaper:** Deterministic byte-exact responses qualify for 90% server-side prompt caching discount. 81% fewer input tokens.
|
|
79
|
+
3. **Deterministic:** Freeze OS state. 100% reproducible agent runs. No OS flakiness, no network jitter.
|
|
80
|
+
4. **Safer:** Zero-Trust WAF, path sandboxing, secret air-gapping.
|
|
81
|
+
5. **Universal:** Standard `stdio` MCP — any agent, any framework.
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## The Hourglass Architecture
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
[ Claude Code ] [ Cursor IDE ] [ Hermes Agent ]
|
|
89
|
+
\ | /
|
|
90
|
+
\ | /
|
|
91
|
+
+───────────────────────────────────+
|
|
92
|
+
│ Standard stdio Protocol (Bridge) │ <- Client Layer
|
|
93
|
+
+─────────────────┬─────────────────+
|
|
94
|
+
│ Unix Domain Socket
|
|
95
|
+
+─────────────────▼─────────────────+
|
|
96
|
+
│ ToolRecall Daemon │ <- Gateway Layer
|
|
97
|
+
│ ┌─────────────────────────────┐ │
|
|
98
|
+
│ │ In-Memory LRU (L1 Cache) │ │
|
|
99
|
+
│ └──────────────┬──────────────┘ │
|
|
100
|
+
│ ┌──────────────▼──────────────┐ │
|
|
101
|
+
│ │ SQLite WAL (Persistent) │ │
|
|
102
|
+
│ └─────────────────────────────┘ │
|
|
103
|
+
│ ┌─────────────────────────────┐ │
|
|
104
|
+
│ │ MCP Server Multiplexer │ │
|
|
105
|
+
│ └──────────────┬──────────────┘ │
|
|
106
|
+
+─────────────────┼─────────────────+
|
|
107
|
+
│ Lazy-Loaded stdio Subprocesses
|
|
108
|
+
+─────────────────▼─────────────────+
|
|
109
|
+
│ [ Downstream MCP: GitHub / Time ] │ <- Execution Layer
|
|
110
|
+
+───────────────────────────────────+
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Features
|
|
116
|
+
|
|
117
|
+
### Byte-Exact Tool Caching
|
|
118
|
+
- **File Cache:** Invalidates on file modification (`mtime`) — no stale reads.
|
|
119
|
+
- **Terminal Cache:** Caches read-only commands by TTL (`git status` for 30s, `hostname` for 1h).
|
|
120
|
+
- **Script & Code Cache:** `cached_run`, `cached_exec` with explicit `ttl=0` bypass for state-changing operations.
|
|
121
|
+
- **MCP Cache:** TTL-based caching for external MCP tool responses (13.5× speedup measured).
|
|
122
|
+
|
|
123
|
+
### MCP Multiplexer (AI Gateway)
|
|
124
|
+
- One daemon manages all your MCP servers (GitHub, Brave Search, time, fetch, ...).
|
|
125
|
+
- **Lazy loading:** Servers boot in 0.01s only when first called.
|
|
126
|
+
- **Idle timeout:** Killed after 15min inactivity — daemon drops from 130MB to 11MB RAM.
|
|
127
|
+
- Agents connect to **one** server: `toolrecall mcp`. Session startup: ~0.01s instead of ~1.7s.
|
|
128
|
+
|
|
129
|
+
### FTS5 Knowledge Base
|
|
130
|
+
Zero-dependency full-text search over docs, notes, Hermes memory, Obsidian vaults. BM25 ranking, Porter stemming, source-filtered queries. No embeddings, no GPU, no API calls.
|
|
131
|
+
|
|
132
|
+
### Data Engine (RLHF / SFT Trajectories)
|
|
133
|
+
```bash
|
|
134
|
+
toolrecall export-dataset ~/trajectories.jsonl
|
|
135
|
+
```
|
|
136
|
+
Exact (Action → State) pairs mined from agent sessions. Zero-cost SFT/DPO dataset generation.
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Quickstart
|
|
141
|
+
|
|
142
|
+
**Requirements:** Python 3.11+, standard SQLite.
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
# 1. Install
|
|
146
|
+
pip install toolrecall
|
|
147
|
+
|
|
148
|
+
# 2. Init config + .env
|
|
149
|
+
toolrecall init
|
|
150
|
+
|
|
151
|
+
# 3. Start daemon
|
|
152
|
+
toolrecall daemon &
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### Claude Code
|
|
156
|
+
```bash
|
|
157
|
+
claude mcp add toolrecall toolrecall mcp
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Direct Python
|
|
161
|
+
```python
|
|
162
|
+
from toolrecall import cached_read
|
|
163
|
+
|
|
164
|
+
result = cached_read("README.md")
|
|
165
|
+
print(f"Cached: {result['cached']}")
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Configuration
|
|
171
|
+
|
|
172
|
+
TOML (default, zero deps via stdlib `tomllib`) or YAML (optional, requires `pyyaml`).
|
|
173
|
+
|
|
174
|
+
```toml
|
|
175
|
+
[mcp]
|
|
176
|
+
allowed_paths = ["~/projects", "~/.hermes/skills"]
|
|
177
|
+
allow_terminal = false
|
|
178
|
+
default_ttl = 60
|
|
179
|
+
|
|
180
|
+
[mcp_multiplex]
|
|
181
|
+
enabled = true
|
|
182
|
+
idle_minutes = 15
|
|
183
|
+
|
|
184
|
+
[mcp_multiplex.servers_config]
|
|
185
|
+
github = { command = "npx", args = ["-y", "@modelcontextprotocol/server-github"], ttl = 60 }
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
`TOOLRECALL_*` environment variables override TOML (for CI/CD, multi-agent setups).
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## Status
|
|
193
|
+
|
|
194
|
+
**Experimental.** Used in heavy autonomous agent workflows. Before production CI/CD: ensure your allowlist is strictly scoped.
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
## Roadmap
|
|
199
|
+
|
|
200
|
+
- Live cache dashboard (`toolrecall dashboard`)
|
|
201
|
+
- Tool-calling profiler (latency breakdown per MCP call)
|
|
202
|
+
- Active cache invalidation on mutation tools (write_file, POST, git push)
|
|
203
|
+
- Container sandbox for `cached_run` (Docker backend)
|
|
204
|
+
- Webhook-triggered invalidation (CI/events POST to purge keys)
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## Documentation
|
|
209
|
+
|
|
210
|
+
- [The Bottleneck Solved](docs/BOTTLENECK_SOLVED.md) — O(N²) context theory
|
|
211
|
+
- [Knowledge DB](docs/KNOWLEDGE_DB.md) — FTS5 indexing guide
|
|
212
|
+
- [Docker Deployment](docs/DOCKER.md) — containerized stack
|
|
213
|
+
- [Security Architecture](SECURITY.md) — WAF details
|
|
214
|
+
- [Enterprise Scale](docs/ENTERPRISE_SCALE.md) — L1 cache metaphor
|
|
215
|
+
- [Troubleshooting](docs/TROUBLESHOOTING.md) — common fixes
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "packaging", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "toolrecall"
|
|
7
|
+
version = "0.3.2"
|
|
8
|
+
description = "The Deterministic Tool Cache for LLM Agents — no LLM decides what to cache. SQLite FTS5, zero deps, MCP multiplexer, zero-trust WAF."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [
|
|
11
|
+
{name = "Robin Schultka", email = "robin@robka.de"},
|
|
12
|
+
]
|
|
13
|
+
license = {text = "MIT"}
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
23
|
+
]
|
|
24
|
+
keywords = ["tool-cache", "llm", "agent", "token-savings", "sqlite", "fts5"]
|
|
25
|
+
requires-python = ">=3.11"
|
|
26
|
+
dependencies = []
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
yaml = ["pyyaml >= 6.0"]
|
|
30
|
+
toml-write = ["tomli-w >= 1.0"]
|
|
31
|
+
dev = ["pytest >= 7.0", "flake8 >= 6.0"]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/whiskybeer/toolrecall"
|
|
35
|
+
Repository = "https://github.com/whiskybeer/toolrecall"
|
|
36
|
+
Documentation = "https://github.com/whiskybeer/toolrecall#readme"
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
toolrecall = "toolrecall.cli:main"
|
|
40
|
+
|
|
41
|
+
[tool.setuptools.packages.find]
|
|
42
|
+
include = ["toolrecall*"]
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import unittest
|
|
4
|
+
import tempfile
|
|
5
|
+
import time
|
|
6
|
+
import shutil
|
|
7
|
+
|
|
8
|
+
# Force a clean, isolated test database path before loading toolrecall
|
|
9
|
+
test_db_dir = tempfile.mkdtemp()
|
|
10
|
+
test_db_path = os.path.join(test_db_dir, "test_cache.db")
|
|
11
|
+
os.environ["TOOLRECALL_CACHE_DB"] = test_db_path
|
|
12
|
+
|
|
13
|
+
# Add current path to sys.path
|
|
14
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
15
|
+
|
|
16
|
+
from toolrecall.cache import cached_exec, cached_run, cached_terminal, DEFAULT_CACHEABLE
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestCacheSafety(unittest.TestCase):
|
|
20
|
+
def setUp(self):
|
|
21
|
+
# Ensure database is clean for each test
|
|
22
|
+
if os.path.exists(test_db_path):
|
|
23
|
+
os.remove(test_db_path)
|
|
24
|
+
from toolrecall.cache import _init
|
|
25
|
+
_init()
|
|
26
|
+
|
|
27
|
+
def tearDown(self):
|
|
28
|
+
if os.path.exists(test_db_path):
|
|
29
|
+
os.remove(test_db_path)
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def tearDownClass(cls):
|
|
33
|
+
shutil.rmtree(test_db_dir, ignore_errors=True)
|
|
34
|
+
|
|
35
|
+
def test_cached_exec_default_ttl_zero(self):
|
|
36
|
+
"""Verify cached_exec default TTL is 0 (or disabled), meaning it executes fresh every time."""
|
|
37
|
+
code = "import time; print(time.time())"
|
|
38
|
+
|
|
39
|
+
# Run first time
|
|
40
|
+
res1 = cached_exec(code)
|
|
41
|
+
# Run second time
|
|
42
|
+
res2 = cached_exec(code)
|
|
43
|
+
|
|
44
|
+
self.assertFalse(res1.get("cached"), "First run should not be cached")
|
|
45
|
+
self.assertFalse(res2.get("cached"), "Second run with default TTL=0 should not be cached")
|
|
46
|
+
self.assertNotEqual(res1.get("output"), res2.get("output"), "Output of dynamic code should be different without cache")
|
|
47
|
+
|
|
48
|
+
def test_cached_exec_explicit_ttl(self):
|
|
49
|
+
"""Verify cached_exec with explicit TTL does cache."""
|
|
50
|
+
code = "print('hello_exec')"
|
|
51
|
+
|
|
52
|
+
res1 = cached_exec(code, ttl=10)
|
|
53
|
+
res2 = cached_exec(code, ttl=10)
|
|
54
|
+
|
|
55
|
+
self.assertFalse(res1.get("cached"), "First run should not be cached")
|
|
56
|
+
self.assertTrue(res2.get("cached"), "Second run with explicit TTL should be cached")
|
|
57
|
+
self.assertEqual(res1.get("output"), res2.get("output"), "Cached output should match")
|
|
58
|
+
|
|
59
|
+
def test_cached_run_default_ttl_zero(self):
|
|
60
|
+
"""Verify cached_run default TTL is 0, executing fresh every time."""
|
|
61
|
+
# Create a temporary script to execute
|
|
62
|
+
with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f:
|
|
63
|
+
f.write("#!/usr/bin/env python3\nimport time; print(time.time())\n")
|
|
64
|
+
script_path = f.name
|
|
65
|
+
os.chmod(script_path, 0o755)
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
# Run first time
|
|
69
|
+
res1 = cached_run(script_path)
|
|
70
|
+
time.sleep(0.01) # Ensure time would advance if it runs fresh
|
|
71
|
+
# Run second time
|
|
72
|
+
res2 = cached_run(script_path)
|
|
73
|
+
|
|
74
|
+
self.assertFalse(res1.get("cached"), "First run should not be cached")
|
|
75
|
+
self.assertFalse(res2.get("cached"), "Second run with default TTL=0 should not be cached")
|
|
76
|
+
self.assertNotEqual(res1.get("output"), res2.get("output"), "Output of dynamic script should be different without cache")
|
|
77
|
+
finally:
|
|
78
|
+
os.remove(script_path)
|
|
79
|
+
|
|
80
|
+
def test_cached_run_explicit_ttl(self):
|
|
81
|
+
"""Verify cached_run with explicit TTL does cache."""
|
|
82
|
+
with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f:
|
|
83
|
+
f.write("#!/usr/bin/env python3\nprint('hello_run')\n")
|
|
84
|
+
script_path = f.name
|
|
85
|
+
os.chmod(script_path, 0o755)
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
res1 = cached_run(script_path, ttl=10)
|
|
89
|
+
res2 = cached_run(script_path, ttl=10)
|
|
90
|
+
|
|
91
|
+
self.assertFalse(res1.get("cached"), "First run should not be cached")
|
|
92
|
+
self.assertTrue(res2.get("cached"), "Second run with explicit TTL should be cached")
|
|
93
|
+
self.assertEqual(res1.get("output"), res2.get("output"), "Cached output should match")
|
|
94
|
+
finally:
|
|
95
|
+
os.remove(script_path)
|
|
96
|
+
|
|
97
|
+
def test_dynamic_commands_not_in_default_cacheable(self):
|
|
98
|
+
"""Verify dynamic state-sensing commands are NOT in DEFAULT_CACHEABLE."""
|
|
99
|
+
unsafe_cmds = ["git status", "ls -la", "git branch", "git diff --stat", "ls", "git diff"]
|
|
100
|
+
for cmd in unsafe_cmds:
|
|
101
|
+
self.assertNotIn(cmd, DEFAULT_CACHEABLE, f"Unsafe command '{cmd}' must not be in DEFAULT_CACHEABLE")
|
|
102
|
+
|
|
103
|
+
def test_cached_terminal_does_not_cache_dynamic_commands(self):
|
|
104
|
+
"""Verify cached_terminal does not cache dynamic commands like git status by default."""
|
|
105
|
+
# Even if we don't have a git repo, cached_terminal should bypass cache for 'git status'
|
|
106
|
+
res1 = cached_terminal("git status")
|
|
107
|
+
res2 = cached_terminal("git status")
|
|
108
|
+
|
|
109
|
+
self.assertFalse(res1.get("cached"), "Dynamic command first run should not be cached")
|
|
110
|
+
self.assertFalse(res2.get("cached"), "Dynamic command second run should not be cached")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
if __name__ == "__main__":
|
|
114
|
+
unittest.main()
|