threadkeeper 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. threadkeeper/__init__.py +8 -0
  2. threadkeeper/_mcp.py +6 -0
  3. threadkeeper/_setup.py +299 -0
  4. threadkeeper/adapters/__init__.py +40 -0
  5. threadkeeper/adapters/_hook_helpers.py +72 -0
  6. threadkeeper/adapters/base.py +152 -0
  7. threadkeeper/adapters/claude_code.py +178 -0
  8. threadkeeper/adapters/claude_desktop.py +128 -0
  9. threadkeeper/adapters/codex.py +259 -0
  10. threadkeeper/adapters/copilot.py +195 -0
  11. threadkeeper/adapters/gemini.py +169 -0
  12. threadkeeper/adapters/vscode.py +144 -0
  13. threadkeeper/brief.py +735 -0
  14. threadkeeper/config.py +216 -0
  15. threadkeeper/curator.py +390 -0
  16. threadkeeper/db.py +474 -0
  17. threadkeeper/embeddings.py +232 -0
  18. threadkeeper/extract_daemon.py +125 -0
  19. threadkeeper/helpers.py +101 -0
  20. threadkeeper/i18n.py +342 -0
  21. threadkeeper/identity.py +237 -0
  22. threadkeeper/ingest.py +507 -0
  23. threadkeeper/lessons.py +170 -0
  24. threadkeeper/nudges.py +257 -0
  25. threadkeeper/process_health.py +202 -0
  26. threadkeeper/review_prompts.py +207 -0
  27. threadkeeper/search_proxy.py +160 -0
  28. threadkeeper/server.py +55 -0
  29. threadkeeper/shadow_review.py +358 -0
  30. threadkeeper/skill_watcher.py +96 -0
  31. threadkeeper/spawn_budget.py +246 -0
  32. threadkeeper/tools/__init__.py +2 -0
  33. threadkeeper/tools/concepts.py +111 -0
  34. threadkeeper/tools/consolidate.py +222 -0
  35. threadkeeper/tools/core_memory.py +109 -0
  36. threadkeeper/tools/correlation.py +116 -0
  37. threadkeeper/tools/curator.py +121 -0
  38. threadkeeper/tools/dialectic.py +359 -0
  39. threadkeeper/tools/dialog.py +131 -0
  40. threadkeeper/tools/distill.py +184 -0
  41. threadkeeper/tools/extract.py +411 -0
  42. threadkeeper/tools/graph.py +183 -0
  43. threadkeeper/tools/invariants.py +177 -0
  44. threadkeeper/tools/lessons.py +110 -0
  45. threadkeeper/tools/missed_spawns.py +142 -0
  46. threadkeeper/tools/peers.py +579 -0
  47. threadkeeper/tools/pickup.py +148 -0
  48. threadkeeper/tools/probes.py +251 -0
  49. threadkeeper/tools/process_health.py +90 -0
  50. threadkeeper/tools/session.py +34 -0
  51. threadkeeper/tools/shadow_review.py +106 -0
  52. threadkeeper/tools/skills.py +856 -0
  53. threadkeeper/tools/spawn.py +871 -0
  54. threadkeeper/tools/style.py +44 -0
  55. threadkeeper/tools/threads.py +299 -0
  56. threadkeeper-0.4.0.dist-info/METADATA +351 -0
  57. threadkeeper-0.4.0.dist-info/RECORD +61 -0
  58. threadkeeper-0.4.0.dist-info/WHEEL +5 -0
  59. threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
  60. threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
  61. threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,44 @@
1
+ """Stylistic running rules and verbatim user quotes."""
2
+
3
+ import sqlite3
4
+ import time
5
+ from typing import Optional
6
+
7
+ from .._mcp import mcp
8
+ from ..db import get_db
9
+ from .. import identity
10
+ from ..identity import _ensure_session, _emit
11
+
12
+
13
+ @mcp.tool()
14
+ def verbatim_user(content: str, thread_id: str = "") -> str:
15
+ """Capture a user quote worth surfacing in future briefs. Use when the user's
16
+ exact phrasing matters (sharp reframes, decisions, pushback)."""
17
+ conn = get_db()
18
+ _ensure_session(conn)
19
+ now = int(time.time())
20
+ tid = thread_id.strip() or None
21
+ conn.execute(
22
+ "INSERT INTO verbatim (speaker, content, thread_id, created_at, session_id) "
23
+ "VALUES (?,?,?,?,?)",
24
+ ("user", content, tid, now, identity._session_id),
25
+ )
26
+ _emit(conn, "verbatim_user", target=tid, summary=content)
27
+ conn.commit()
28
+ return "ok"
29
+
30
+
31
+ @mcp.tool()
32
+ def style_set(key: str, value: str) -> str:
33
+ """Set a stylistic running rule. Examples:
34
+ lang=ru | prose=lean | allow=half-baked,weird | deny=sycophancy,headers"""
35
+ conn = get_db()
36
+ now = int(time.time())
37
+ conn.execute(
38
+ "INSERT INTO style (key, value, updated_at) VALUES (?,?,?) "
39
+ "ON CONFLICT(key) DO UPDATE SET value=excluded.value, updated_at=excluded.updated_at",
40
+ (key, value, now),
41
+ )
42
+ _emit(conn, "style_set", target=key, summary=f"{key}={value}")
43
+ conn.commit()
44
+ return "ok"
@@ -0,0 +1,299 @@
1
+ """Thread-lifecycle and brief MCP tools.
2
+
3
+ Extracted from server.py. Provides the core thread state-machine
4
+ (open/note/close/idle), the conversation-start brief/context tools,
5
+ generic search/compost utilities, and the format-evolution
6
+ suggestion box.
7
+ """
8
+
9
+ import sqlite3
10
+ import time
11
+ from datetime import datetime, timezone
12
+ from typing import Optional
13
+
14
+ from .._mcp import mcp
15
+ from ..config import SEMANTIC_AVAILABLE, DB_PATH
16
+ from ..db import get_db
17
+ from ..helpers import gen_thread_id, fmt_age, q
18
+ from .. import identity
19
+ from ..identity import _ensure_session, _detect_self_cid, _emit
20
+ from ..embeddings import _embed, _cosine_search, _vec_upsert_note
21
+ from ..brief import render_brief
22
+
23
+
24
+ @mcp.tool()
25
+ def brief(query: str = "", k: int = 6) -> str:
26
+ """Compact Claude-native memory brief. CALL AT THE START OF EVERY CONVERSATION.
27
+
28
+ Format is dense, structural, not designed for human reading. Pass the user's
29
+ first message as `query` to inline semantically relevant past notes.
30
+ """
31
+ conn = get_db()
32
+ _ensure_session(conn)
33
+ return render_brief(conn, query=query, k=k)
34
+
35
+
36
+ @mcp.tool()
37
+ def context() -> str:
38
+ """Runtime context: session id, age, semantic on/off, db path, thread counts."""
39
+ conn = get_db()
40
+ _ensure_session(conn)
41
+ now = int(time.time())
42
+ counts = conn.execute(
43
+ "SELECT state, COUNT(*) c FROM threads GROUP BY state"
44
+ ).fetchall()
45
+ cs = " ".join(f"{r['state']}={r['c']}" for r in counts) or "empty"
46
+ started = identity._session_start or now
47
+ return (
48
+ f"sess={identity._session_id} "
49
+ f"started={fmt_age(now - started)}_ago "
50
+ f"sem={'on' if SEMANTIC_AVAILABLE else 'off'} "
51
+ f"db={DB_PATH} "
52
+ f"threads[{cs}] "
53
+ f"now={datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%MZ')}"
54
+ )
55
+
56
+
57
+ @mcp.tool()
58
+ def open_thread(question: str, parent_id: str = "") -> str:
59
+ """Open a thread. `question` should be terse (5-15 words, the open question).
60
+ `parent_id` optional — pass an existing ID like 'T7f3' for a child. Returns new ID."""
61
+ conn = get_db()
62
+ _ensure_session(conn)
63
+ now = int(time.time())
64
+ parent = parent_id.strip() or None
65
+ depth = 0
66
+ if parent:
67
+ row = conn.execute("SELECT depth FROM threads WHERE id=?", (parent,)).fetchone()
68
+ if not row:
69
+ return f"ERR parent_not_found={parent}"
70
+ depth = row["depth"] + 1
71
+ tid = gen_thread_id(conn)
72
+ conn.execute(
73
+ "INSERT INTO threads (id, question, state, parent_id, opened_at, "
74
+ "last_touched_at, depth) VALUES (?,?,?,?,?,?,?)",
75
+ (tid, question, "active", parent, now, now, depth),
76
+ )
77
+ _emit(conn, "open_thread", target=tid, summary=question)
78
+ conn.commit()
79
+ return tid
80
+
81
+
82
+ @mcp.tool()
83
+ def note(thread_id: str, content: str, kind: str = "move") -> str:
84
+ """Add a note to a thread. Write terse, optimized for future-Claude.
85
+
86
+ `kind`: 'move' (we tried/decided X), 'failed' (tried X, broke because Y),
87
+ 'insight' (crystallized observation), 'open_q' (something to come back to)."""
88
+ conn = get_db()
89
+ _ensure_session(conn)
90
+ if not conn.execute("SELECT 1 FROM threads WHERE id=?", (thread_id,)).fetchone():
91
+ return f"ERR thread_not_found={thread_id}"
92
+ now = int(time.time())
93
+ emb = _embed(content)
94
+ cur = conn.execute(
95
+ "INSERT INTO notes (thread_id, content, kind, created_at, session_id, embedding) "
96
+ "VALUES (?,?,?,?,?,?)",
97
+ (thread_id, content, kind, now, identity._session_id, emb),
98
+ )
99
+ note_id = cur.lastrowid
100
+ _vec_upsert_note(conn, note_id, emb)
101
+ conn.execute(
102
+ "UPDATE threads SET last_touched_at=?, last_move=?, "
103
+ "state=CASE WHEN state='idle' THEN 'active' ELSE state END WHERE id=?",
104
+ (now, content[:90], thread_id),
105
+ )
106
+ _emit(conn, f"note:{kind}", target=thread_id, summary=content)
107
+ conn.commit()
108
+ return f"ok id={note_id}"
109
+
110
+
111
+ @mcp.tool()
112
+ def close_thread(thread_id: str, outcome: str) -> str:
113
+ """Close a thread with a 5-15 word outcome."""
114
+ conn = get_db()
115
+ _ensure_session(conn)
116
+ if not conn.execute("SELECT 1 FROM threads WHERE id=?", (thread_id,)).fetchone():
117
+ return f"ERR thread_not_found={thread_id}"
118
+ now = int(time.time())
119
+ conn.execute(
120
+ "UPDATE threads SET state='closed', outcome=?, last_touched_at=? WHERE id=?",
121
+ (outcome, now, thread_id),
122
+ )
123
+ _emit(conn, "close_thread", target=thread_id, summary=outcome)
124
+ conn.commit()
125
+ # Auto-review hook: if AUTO_REVIEW_ENABLED and this is a rich thread,
126
+ # fire background review immediately. Best-effort — never raise.
127
+ try:
128
+ from ..nudges import auto_review_should_fire
129
+ from ..config import AUTO_REVIEW_ENABLED
130
+ if AUTO_REVIEW_ENABLED:
131
+ rich_tid = auto_review_should_fire(conn, identity._session_id)
132
+ if rich_tid == thread_id:
133
+ from .skills import review_thread
134
+ review_thread(thread_id=thread_id, focus='skills', mode='auto')
135
+ except Exception:
136
+ pass
137
+ return "ok"
138
+
139
+
140
+ @mcp.tool()
141
+ def mark_skill_materialized(thread_id: str, skill_path: str = "") -> str:
142
+ """Close the Learning loop: record that a closed thread's insights were
143
+ written into a Claude skill under ~/.claude/skills/.
144
+
145
+ Stops the brief()'s `skill_hint` nudge from firing for this thread. Also
146
+ appends a `move` note pointing at the skill path so future briefs surface
147
+ the link.
148
+
149
+ Pass the absolute path to the SKILL.md (or skill directory) when known;
150
+ leave empty if you only want to silence the hint without recording a path."""
151
+ conn = get_db()
152
+ _ensure_session(conn)
153
+ if not conn.execute("SELECT 1 FROM threads WHERE id=?", (thread_id,)).fetchone():
154
+ return f"ERR thread_not_found={thread_id}"
155
+ now = int(time.time())
156
+ path = skill_path.strip()
157
+ summary = path or "(no path recorded)"
158
+ conn.execute(
159
+ "INSERT INTO events (session_id, kind, target, summary, created_at) "
160
+ "VALUES (?,?,?,?,?)",
161
+ (identity._session_id or "", "skill_materialized",
162
+ thread_id, summary, now),
163
+ )
164
+ note_body = (
165
+ f"materialized into {path}" if path
166
+ else "materialized into a Claude skill (path not recorded)"
167
+ )
168
+ emb = _embed(note_body)
169
+ cur = conn.execute(
170
+ "INSERT INTO notes (thread_id, content, kind, created_at, session_id, "
171
+ "embedding) VALUES (?,?,?,?,?,?)",
172
+ (thread_id, note_body, "move", now, identity._session_id, emb),
173
+ )
174
+ _vec_upsert_note(conn, cur.lastrowid, emb)
175
+ conn.execute(
176
+ "UPDATE threads SET last_touched_at=?, last_move=? WHERE id=?",
177
+ (now, note_body[:90], thread_id),
178
+ )
179
+ conn.commit()
180
+ return "ok"
181
+
182
+
183
+ @mcp.tool()
184
+ def idle_thread(thread_id: str) -> str:
185
+ """Mark thread idle (paused, may return). Auto-revives to active on next note()."""
186
+ conn = get_db()
187
+ _ensure_session(conn)
188
+ now = int(time.time())
189
+ conn.execute(
190
+ "UPDATE threads SET state='idle', last_touched_at=? WHERE id=?",
191
+ (now, thread_id),
192
+ )
193
+ _emit(conn, "idle_thread", target=thread_id)
194
+ conn.commit()
195
+ return "ok"
196
+
197
+
198
+ @mcp.tool()
199
+ def search(query: str, k: int = 5) -> str:
200
+ """Semantic (or FTS) search over all notes."""
201
+ conn = get_db()
202
+ if SEMANTIC_AVAILABLE:
203
+ hits = _cosine_search(conn, query, k)
204
+ if not hits:
205
+ return "no_matches"
206
+ return "\n".join(
207
+ f"{r['thread_id'] or '-'} {r['kind']} s={r['score']:.2f} "
208
+ f"{q(r['content'][:200].replace(chr(10), ' '))}"
209
+ for r in hits
210
+ )
211
+ try:
212
+ rows = conn.execute(
213
+ "SELECT n.thread_id, n.kind, n.content FROM notes_fts f "
214
+ "JOIN notes n ON f.rowid=n.id WHERE notes_fts MATCH ? LIMIT ?",
215
+ (query, k),
216
+ ).fetchall()
217
+ except sqlite3.OperationalError:
218
+ return "fts_error"
219
+ if not rows:
220
+ return "no_matches"
221
+ return "\n".join(
222
+ f"{r['thread_id'] or '-'} {r['kind']} {q(r['content'][:200])}"
223
+ for r in rows
224
+ )
225
+
226
+
227
+ @mcp.tool()
228
+ def compost(n: int = 2) -> str:
229
+ """Surface N random idle threads. Call when current threads feel exhausted
230
+ or you want to shake loose dormant ideas."""
231
+ conn = get_db()
232
+ rows = conn.execute(
233
+ "SELECT * FROM threads WHERE state='idle' ORDER BY RANDOM() LIMIT ?",
234
+ (n,),
235
+ ).fetchall()
236
+ if not rows:
237
+ return "no_idle"
238
+ now = int(time.time())
239
+ return "\n".join(
240
+ f"{t['id']} q={q(t['question'])} dorm={fmt_age(now - t['last_touched_at'])}"
241
+ for t in rows
242
+ )
243
+
244
+
245
+ @mcp.tool()
246
+ def evolve_format(suggestion: str, rationale: str = "") -> str:
247
+ """Propose a change to the brief format itself. The format is not fixed — this
248
+ is how it adapts. Examples: 'field X unused this session, drop it';
249
+ 'add field failed_attempts under each open thread'; 'shorten Z to single token'."""
250
+ conn = get_db()
251
+ now = int(time.time())
252
+ conn.execute(
253
+ "INSERT INTO evolve (suggestion, rationale, created_at) VALUES (?,?,?)",
254
+ (suggestion, rationale or None, now),
255
+ )
256
+ _emit(conn, "evolve_format", summary=suggestion)
257
+ conn.commit()
258
+ return "ok"
259
+
260
+
261
+ @mcp.tool()
262
+ def evolve_review(include_applied: bool = False) -> str:
263
+ """List pending (or all) format-evolution suggestions for review."""
264
+ conn = get_db()
265
+ if include_applied:
266
+ rows = conn.execute(
267
+ "SELECT * FROM evolve ORDER BY created_at DESC LIMIT 30"
268
+ ).fetchall()
269
+ else:
270
+ rows = conn.execute(
271
+ "SELECT * FROM evolve WHERE applied=0 ORDER BY created_at DESC LIMIT 30"
272
+ ).fetchall()
273
+ if not rows:
274
+ return "no_pending"
275
+ return "\n".join(
276
+ f"#{e['id']} {'[APPLIED]' if e['applied'] else '[pending]'} "
277
+ f"{q(e['suggestion'])}" + (f" why={q(e['rationale'])}" if e["rationale"] else "")
278
+ for e in rows
279
+ )
280
+
281
+
282
+ @mcp.tool()
283
+ def auto_review_trigger(focus: str = "combined", force: bool = False) -> str:
284
+ """Check current counters + close-thread state and, if conditions are
285
+ met, fire review_thread(mode='auto') for the richest pending thread.
286
+
287
+ `force=True` skips the counter check (always trigger if there's a
288
+ rich pending closed thread). Use this when you've seen a skill_nudge
289
+ or skill_hint and want to act without manually picking the thread_id.
290
+ """
291
+ conn = get_db()
292
+ _ensure_session(conn)
293
+ from ..nudges import auto_review_should_fire
294
+ tid = auto_review_should_fire(conn, identity._session_id, force=force)
295
+ if not tid:
296
+ return "no_pending (no rich closed thread, or thresholds not met)"
297
+ from .skills import review_thread
298
+ result = review_thread(thread_id=tid, focus=focus, mode='auto')
299
+ return f"triggered for {tid}: {result}"
@@ -0,0 +1,351 @@
1
+ Metadata-Version: 2.4
2
+ Name: threadkeeper
3
+ Version: 0.4.0
4
+ Summary: Persistent working memory across agentic CLI sessions — CLI-agnostic MCP server for Claude Code/Desktop, Codex, Gemini, Copilot, VS Code.
5
+ Author: thread-keeper contributors
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/po4erk91/thread-keeper
8
+ Project-URL: Repository, https://github.com/po4erk91/thread-keeper
9
+ Project-URL: Issues, https://github.com/po4erk91/thread-keeper/issues
10
+ Project-URL: Documentation, https://github.com/po4erk91/thread-keeper#readme
11
+ Project-URL: Changelog, https://github.com/po4erk91/thread-keeper/releases
12
+ Keywords: mcp,model-context-protocol,claude,codex,gemini,copilot,memory,agents,self-improving,skills
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: MacOS
16
+ Classifier: Operating System :: POSIX :: Linux
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.11
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: mcp>=1.0.0
26
+ Provides-Extra: semantic
27
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "semantic"
28
+ Requires-Dist: numpy>=1.24.0; extra == "semantic"
29
+ Requires-Dist: sqlite-vec>=0.1.9; extra == "semantic"
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=8.0; extra == "dev"
32
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
33
+ Dynamic: license-file
34
+
35
+ # thread-keeper
36
+
37
+ [![tests](https://github.com/po4erk91/thread-keeper/actions/workflows/test.yml/badge.svg)](https://github.com/po4erk91/thread-keeper/actions/workflows/test.yml)
38
+ [![Python](https://img.shields.io/badge/python-3.11%2B-blue)](https://www.python.org/downloads/)
39
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
40
+ [![CLIs](https://img.shields.io/badge/CLIs-Claude%20%7C%20Codex%20%7C%20Gemini%20%7C%20Copilot%20%7C%20VS%20Code-green)](#multi-cli-integration)
41
+
42
+ A local MCP server that holds **persistent working memory across agentic CLI
43
+ sessions** — Claude Code, Claude Desktop, OpenAI Codex (CLI + desktop),
44
+ Google Gemini, GitHub Copilot, and every MCP-aware VS Code extension share
45
+ one SQLite store, one set of threads, one learning loop, one user model.
46
+
47
+ The brief format is dense — structural tags, opaque IDs, ~6 KB per
48
+ session-start injection. Optimized for agent consumption, not human reading.
49
+
50
+ ---
51
+
52
+ ## Why
53
+
54
+ Today every agent CLI starts cold. Context dies at session boundaries.
55
+ Skills you taught Claude don't transfer to Codex. Threads you closed in
56
+ yesterday's Gemini chat are invisible to today's Copilot.
57
+
58
+ thread-keeper is the substrate underneath:
59
+
60
+ - **One memory store** — threads, notes, verbatim quotes, dialectic claims
61
+ about you. Survives session, restart, CLI swap.
62
+ - **One learning loop (hermes-style)** — closed threads with rich content
63
+ spawn a background reviewer that appends lessons to
64
+ `~/.threadkeeper/lessons.md`. Every CLI's per-user instructions file
65
+ references this path, so the same procedural knowledge surfaces in
66
+ Claude Code, Codex, Gemini, and Copilot. Claude-specific
67
+ `~/.claude/skills/*/SKILL.md` is an optional secondary output when
68
+ frontmatter auto-triggering adds value.
69
+ - **Cross-session signaling** — broadcast / whisper / inbox / wait between
70
+ concurrent sessions across different CLIs.
71
+
72
+ ---
73
+
74
+ ## Quickstart
75
+
76
+ The shortest path — **PyPI + pipx** (recommended):
77
+
78
+ ```bash
79
+ pipx install 'threadkeeper[semantic]' && thread-keeper-setup
80
+ ```
81
+
82
+ `thread-keeper-setup` detects every CLI you have installed (Claude
83
+ Code / Claude Desktop / Codex CLI + desktop / Gemini / Copilot / VS
84
+ Code), registers the MCP server in each one's config, copies hooks to
85
+ `~/.threadkeeper/hooks/`, and writes a managed instructions block into
86
+ each CLI's per-user instructions file (`CLAUDE.md` / `AGENTS.md` /
87
+ `GEMINI.md` / `copilot-instructions.md` — Claude Desktop and VS Code
88
+ have no global instructions file, so that step is skipped for them).
89
+
90
+ Restart your CLI of choice. The SessionStart hook injects a brief on
91
+ first message; no manual `brief()` call required.
92
+
93
+ ### Alternative installs
94
+
95
+ If you don't have `pipx` and don't want to install it:
96
+
97
+ ```bash
98
+ # uv (Rust-fast Python tool runner) — no clone, single binary on PATH
99
+ uv tool install 'threadkeeper[semantic]' && thread-keeper-setup
100
+
101
+ # Plain pip into a venv
102
+ python3 -m venv ~/.threadkeeper-venv
103
+ ~/.threadkeeper-venv/bin/pip install 'threadkeeper[semantic]'
104
+ ~/.threadkeeper-venv/bin/thread-keeper-setup
105
+ ```
106
+
107
+ For development (editable install from a git checkout) or to track the
108
+ bleeding edge:
109
+
110
+ ```bash
111
+ # One-liner installer — clones to ~/thread-keeper, makes a venv,
112
+ # editable-installs, wires every detected CLI. Idempotent — re-run to
113
+ # update (it git-pulls + reinstalls).
114
+ curl -fsSL https://raw.githubusercontent.com/po4erk91/thread-keeper/main/install.sh | bash -s -- --semantic
115
+
116
+ # Or fully manual
117
+ git clone https://github.com/po4erk91/thread-keeper ~/thread-keeper
118
+ cd ~/thread-keeper && python3 -m venv .venv
119
+ .venv/bin/pip install -e '.[semantic]'
120
+ .venv/bin/thread-keeper-setup
121
+ ```
122
+
123
+ To preview without writing anything:
124
+
125
+ ```bash
126
+ thread-keeper-setup --dry-run
127
+ ```
128
+
129
+ ---
130
+
131
+ ## Multi-CLI integration
132
+
133
+ | CLI | MCP config | Instructions file | Hooks | Transcripts ingested |
134
+ |---|---|---|---|---|
135
+ | Claude Code | `~/.claude.json` `mcpServers` | `~/.claude/CLAUDE.md` | `~/.claude/settings.json` `hooks` | `~/.claude/projects/**/*.jsonl` |
136
+ | Claude Desktop | `~/Library/Application Support/Claude/claude_desktop_config.json` `mcpServers` (macOS); `%APPDATA%\Claude\…` (Win); `~/.config/Claude/…` (Linux) | none (GUI-only) | not supported by the app | none — chats live in Electron IndexedDB |
137
+ | Codex (CLI + desktop) | `~/.codex/config.toml` `[mcp_servers]` (shared between CLI and `Codex.app`) | `~/.codex/AGENTS.md` | not supported | `~/.codex/sessions/**/rollout-*.jsonl` |
138
+ | Gemini | `~/.gemini/settings.json` `mcpServers` | `~/.gemini/GEMINI.md` | `~/.gemini/settings.json` `hooks` | `~/.gemini/tmp/<user>/chats/session-*.jsonl` |
139
+ | Copilot | `~/.copilot/mcp-config.json` `mcpServers` | `~/.copilot/copilot-instructions.md` | `~/.copilot/hooks.json` | `~/.copilot/session-store.db` (sqlite) |
140
+ | VS Code | `~/Library/Application Support/Code/User/mcp.json` `servers` (macOS); `%APPDATA%\Code\User\mcp.json` (Win); `~/.config/Code/User/mcp.json` (Linux) | none (per-workspace only) | not supported | none — extensions own their history |
141
+
142
+ Every CLI that produces parseable transcripts feeds the same
143
+ `dialog_messages` table with a `source` tag, so `dialog_search()` finds
144
+ matches regardless of where the conversation happened. Claude Desktop
145
+ and the VS Code adapter are the exceptions — MCP registration only;
146
+ their chats don't reach the table for now (Electron IndexedDB on the
147
+ Claude Desktop side; per-extension stores on the VS Code side).
148
+
149
+ VS Code's user-level `mcp.json` is the central host that **every
150
+ MCP-aware VS Code extension** consumes — GitHub Copilot Chat, the
151
+ Anthropic Claude IDE plugin, the OpenAI Codex IDE plugin, Continue,
152
+ Cline, … — so a single registration there reaches all of them at once.
153
+
154
+ Adding a new CLI = one file under `threadkeeper/adapters/` implementing
155
+ the `CLIAdapter` contract. See [CONTRIBUTING.md](CONTRIBUTING.md).
156
+
157
+ ---
158
+
159
+ ## Core systems
160
+
161
+ ### Spawn — primary parallelism primitive
162
+
163
+ `spawn(prompt, slim=True, role=..., visible=False, ...)` launches a child
164
+ Claude session via a `claude -p` subprocess. By default `slim=True`: the
165
+ child loads only the thread-keeper MCP, no embeddings, no third-party
166
+ servers. ~500 MB RSS versus ~1.3 GB for a full child. Heuristic for the
167
+ parent: N≥2 modular independent units of ≥5 min each = spawn signal.
168
+
169
+ A daemon measures combined child RSS every 10 s; admission control
170
+ refuses a new spawn that would exceed `THREADKEEPER_SPAWN_BUDGET_MB`
171
+ (3 GB default). Slim children that need semantic search delegate to the
172
+ parent via `search_via_parent` — no per-child copy of sentence-transformers.
173
+
174
+ ### Learning loop (hermes-style)
175
+
176
+ Four loops materialize knowledge into Anthropic-style Skill files
177
+ (`SKILL.md` under each detected CLI's skills directory — Claude's
178
+ `~/.claude/skills/`, Codex's `~/.codex/skills/`, plus the canonical
179
+ `~/.threadkeeper/skills/` mirror) with a CLI-agnostic
180
+ `~/.threadkeeper/lessons.md` fallback for CLIs that don't auto-trigger
181
+ on the Skill format (Gemini / Copilot / bare MCP clients):
182
+
183
+ - **Auto-review on close_thread** — when a closed thread is rich
184
+ (≥5 notes, ≥2 insight/move), `close_thread` spawns a slim child with
185
+ `SKILL_REVIEW_PROMPT` + the thread's notes. The prompt is rubric-form
186
+ (Q1–Q5 yes/no) with explicit positive examples for incident-vs-rule
187
+ classification. The fork also receives a "recently active skills"
188
+ block so it prefers PATCHing existing umbrellas over creating new
189
+ ones (Hermes Agent v0.12's *active-update bias*). Child appends a
190
+ lesson via `lesson_append`, optionally mirrors to
191
+ `~/.claude/skills/<name>/SKILL.md`, then closes with
192
+ `mark_skill_materialized`. Opt in with `THREADKEEPER_AUTO_REVIEW=1`.
193
+ - **Shadow-review daemon** — every `THREADKEEPER_SHADOW_REVIEW_INTERVAL_S`
194
+ seconds (default off; 15 min recommended), scans the diff of
195
+ `dialog_messages` since the last cursor across **all** CLIs. The
196
+ window filters internal review-child sessions (no self-pollution)
197
+ and strips adapter `[tool_result]` / `[tool_call]` noise — Hermes
198
+ v0.12's "clean context" rule. If ≥500 chars of meaningful signal
199
+ remain, spawns a slim observer child that decides on class-level
200
+ learning. Idempotent through `events.kind='shadow_review_pass'`.
201
+ - **Extract daemon** — every `THREADKEEPER_EXTRACT_INTERVAL_S` seconds
202
+ (default off; 10 min recommended), scans recent `dialog_messages`
203
+ with heuristic matchers (locale-aware "I want / next time / always"
204
+ patterns, headers + insight markers, bullet regularities, paraphrase
205
+ clusters via cosine ≥ 0.80) and enqueues candidates in
206
+ `extract_candidates.status='pending'` for the agent to review via
207
+ `review_candidates()` / `accept_candidate()`. The same self-pollution
208
+ filter as shadow_review excludes internal review-child sessions.
209
+ Where shadow extracts CLASS-LEVEL durable rules, extract harvests
210
+ PER-INCIDENT decision-shaped utterances — sidesteps the empirical
211
+ problem that agents focused on their primary task don't call
212
+ `note()` / `verbatim_user()` on their own.
213
+ - **Autonomous Curator** — every `THREADKEEPER_CURATOR_INTERVAL_S`
214
+ seconds (default off; 7 days recommended), spawns a slim child that
215
+ reviews the EXISTING `lessons.md` + `skill_usage` inventory and
216
+ writes `~/.threadkeeper/curator/REPORT-<isodate>.md` with KEEP /
217
+ PATCH / CONSOLIDATE / PRUNE recommendations. Pinned and
218
+ foreground-authored entries are marked `[PROTECTED]` in the
219
+ inventory so the curator never proposes destructive changes against
220
+ them. Phase 1 is advisory-only — user reviews the REPORT and
221
+ applies changes manually. Inspired by Hermes Agent v0.12's
222
+ `hermes curator` cron agent.
223
+
224
+ ### Dialectic user model
225
+
226
+ A model of you, accumulated as you use the agent. `dialectic_claim`,
227
+ `dialectic_evidence` (support / contradict / clarifying),
228
+ `dialectic_synthesis`, `dialectic_supersede`. Honcho-inspired smoothed
229
+ ratio `(s-c)/(s+c+3)` → low / medium / high / disputed confidence.
230
+ Grouped by domain (style, values, workflow, ...) in `brief()`.
231
+
232
+ ### i18n bundle
233
+
234
+ All multilingual regex and prompt fragments live in
235
+ `threadkeeper/i18n.py` — the rest of the codebase stays English-only.
236
+ Currently ships ten locales: **English, Mandarin Chinese, Hindi,
237
+ Spanish, Portuguese, French, German, Arabic, Russian, Japanese**
238
+ (~82 % of the world's speakers).
239
+
240
+ Adding a new language is a two-file PR — see [CONTRIBUTING.md](CONTRIBUTING.md).
241
+
242
+ ---
243
+
244
+ ## Configuration
245
+
246
+ The most-used env knobs (full list in `threadkeeper/config.py`):
247
+
248
+ | Knob | Default | Purpose |
249
+ |---|---|---|
250
+ | `THREADKEEPER_DB` | `~/.threadkeeper/db.sqlite` | SQLite file |
251
+ | `THREADKEEPER_AUTO_REVIEW` | "" (off) | auto-review on `close_thread` |
252
+ | `THREADKEEPER_SHADOW_REVIEW_INTERVAL_S` | 0 (off) | shadow daemon tick (s) |
253
+ | `THREADKEEPER_SHADOW_REVIEW_WINDOW_S` | 900 | sliding window for shadow scan (s) |
254
+ | `THREADKEEPER_EXTRACT_INTERVAL_S` | 0 (off) | extract daemon tick (s); 600 = 10 min recommended |
255
+ | `THREADKEEPER_EXTRACT_WINDOW_MIN` | 30 | sliding dialog window per extract pass (min) |
256
+ | `THREADKEEPER_CURATOR_INTERVAL_S` | 0 (off) | curator daemon tick (s); 604800 = 7d recommended |
257
+ | `THREADKEEPER_CURATOR_MIN_LESSONS` | 3 | min lessons before curator engages |
258
+ | `THREADKEEPER_CURATOR_DESTRUCTIVE` | "" (advisory) | when "1": curator child applies its own PATCH/PRUNE/CONSOLIDATE directly instead of writing advisory REPORT only |
259
+ | `THREADKEEPER_SPAWN_BUDGET_MB` | 3072 | combined child RSS cap (MB); 0 disables |
260
+ | `THREADKEEPER_INGEST_INTERVAL_S` | 30 | transcript ingest tick (s) |
261
+ | `THREADKEEPER_NO_EMBEDDINGS` | "" | force-disable sentence-transformers |
262
+ | `THREADKEEPER_SKILL_NUDGE_INTERVAL` | 10 | events between `skill_hint` nudges |
263
+
264
+ Persist them via `~/.claude/settings.json`'s `env` block (Claude Code) or
265
+ the equivalent env section in each CLI's config. Hot-config reload is
266
+ [tracked](https://github.com/po4erk91/thread-keeper/issues/2).
267
+
268
+ ---
269
+
270
+ ## Storage
271
+
272
+ `~/.threadkeeper/db.sqlite` (overridable via `THREADKEEPER_DB`). WAL
273
+ mode for multi-writer concurrency. Optional `notes_vec` / `dialog_vec`
274
+ HNSW indexes through `sqlite-vec` for sub-linear semantic search;
275
+ fallback to Python-side cosine when the extension is missing.
276
+
277
+ One file. Backup = `cp`. Wipe memory = `rm`.
278
+
279
+ Hooks and small runtime artifacts: `~/.threadkeeper/hooks/`.
280
+
281
+ ---
282
+
283
+ ## Verifying ingest across CLIs
284
+
285
+ ```bash
286
+ python scripts/tk_verify_ingest.py
287
+ ```
288
+
289
+ Walks every installed CLI adapter, parses recent transcripts in an
290
+ isolated tempdir DB, reports per-source message counts and any silent
291
+ parse failures. Read-only with respect to live state.
292
+
293
+ ---
294
+
295
+ ## Tests
296
+
297
+ ```bash
298
+ pip install -e '.[semantic,dev]'
299
+ python -m pytest
300
+ ```
301
+
302
+ 412 tests passing on Python 3.11 / 3.12 / 3.13 (1 skipped). CI runs
303
+ the suite on every push and PR.
304
+
305
+ ---
306
+
307
+ ## Project layout
308
+
309
+ ```
310
+ threadkeeper/
311
+ ├── server.py # MCP entry: python -m threadkeeper.server
312
+ ├── _setup.py # `thread-keeper-setup` installer
313
+ ├── config.py # env-driven defaults
314
+ ├── db.py # SQLite schema + sqlite-vec loader
315
+ ├── identity.py # session, self-cid, daemon launchers
316
+ ├── ingest.py # adapter-driven transcript ingest
317
+ ├── brief.py # render_brief / render_context
318
+ ├── shadow_review.py # autonomous learning observer
319
+ ├── i18n.py # 10 locales of regex + prompt bundles
320
+ ├── adapters/ # one file per supported CLI
321
+ │ ├── claude_code.py
322
+ │ ├── claude_desktop.py
323
+ │ ├── codex.py
324
+ │ ├── gemini.py
325
+ │ ├── copilot.py
326
+ │ └── vscode.py
327
+ └── tools/ # @mcp.tool entries — 83 of them
328
+ ├── threads.py
329
+ ├── peers.py
330
+ ├── spawn.py
331
+ ├── skills.py
332
+ └── ...
333
+ ```
334
+
335
+ Detailed map in [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md).
336
+ Open work in [docs/ROADMAP.md](docs/ROADMAP.md) and the
337
+ [Issues tab](https://github.com/po4erk91/thread-keeper/issues).
338
+
339
+ ---
340
+
341
+ ## Contributing
342
+
343
+ PRs welcome — see [CONTRIBUTING.md](CONTRIBUTING.md) for the project
344
+ map, test workflow, and recipes for adding a new CLI adapter or a new
345
+ locale. Look for the `good-first-issue` label.
346
+
347
+ ---
348
+
349
+ ## License
350
+
351
+ MIT — see [LICENSE](LICENSE).