threadkeeper 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. threadkeeper/__init__.py +8 -0
  2. threadkeeper/_mcp.py +6 -0
  3. threadkeeper/_setup.py +299 -0
  4. threadkeeper/adapters/__init__.py +40 -0
  5. threadkeeper/adapters/_hook_helpers.py +72 -0
  6. threadkeeper/adapters/base.py +152 -0
  7. threadkeeper/adapters/claude_code.py +178 -0
  8. threadkeeper/adapters/claude_desktop.py +128 -0
  9. threadkeeper/adapters/codex.py +259 -0
  10. threadkeeper/adapters/copilot.py +195 -0
  11. threadkeeper/adapters/gemini.py +169 -0
  12. threadkeeper/adapters/vscode.py +144 -0
  13. threadkeeper/brief.py +735 -0
  14. threadkeeper/config.py +216 -0
  15. threadkeeper/curator.py +390 -0
  16. threadkeeper/db.py +474 -0
  17. threadkeeper/embeddings.py +232 -0
  18. threadkeeper/extract_daemon.py +125 -0
  19. threadkeeper/helpers.py +101 -0
  20. threadkeeper/i18n.py +342 -0
  21. threadkeeper/identity.py +237 -0
  22. threadkeeper/ingest.py +507 -0
  23. threadkeeper/lessons.py +170 -0
  24. threadkeeper/nudges.py +257 -0
  25. threadkeeper/process_health.py +202 -0
  26. threadkeeper/review_prompts.py +207 -0
  27. threadkeeper/search_proxy.py +160 -0
  28. threadkeeper/server.py +55 -0
  29. threadkeeper/shadow_review.py +358 -0
  30. threadkeeper/skill_watcher.py +96 -0
  31. threadkeeper/spawn_budget.py +246 -0
  32. threadkeeper/tools/__init__.py +2 -0
  33. threadkeeper/tools/concepts.py +111 -0
  34. threadkeeper/tools/consolidate.py +222 -0
  35. threadkeeper/tools/core_memory.py +109 -0
  36. threadkeeper/tools/correlation.py +116 -0
  37. threadkeeper/tools/curator.py +121 -0
  38. threadkeeper/tools/dialectic.py +359 -0
  39. threadkeeper/tools/dialog.py +131 -0
  40. threadkeeper/tools/distill.py +184 -0
  41. threadkeeper/tools/extract.py +411 -0
  42. threadkeeper/tools/graph.py +183 -0
  43. threadkeeper/tools/invariants.py +177 -0
  44. threadkeeper/tools/lessons.py +110 -0
  45. threadkeeper/tools/missed_spawns.py +142 -0
  46. threadkeeper/tools/peers.py +579 -0
  47. threadkeeper/tools/pickup.py +148 -0
  48. threadkeeper/tools/probes.py +251 -0
  49. threadkeeper/tools/process_health.py +90 -0
  50. threadkeeper/tools/session.py +34 -0
  51. threadkeeper/tools/shadow_review.py +106 -0
  52. threadkeeper/tools/skills.py +856 -0
  53. threadkeeper/tools/spawn.py +871 -0
  54. threadkeeper/tools/style.py +44 -0
  55. threadkeeper/tools/threads.py +299 -0
  56. threadkeeper-0.4.0.dist-info/METADATA +351 -0
  57. threadkeeper-0.4.0.dist-info/RECORD +61 -0
  58. threadkeeper-0.4.0.dist-info/WHEEL +5 -0
  59. threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
  60. threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
  61. threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,246 @@
1
+ """RSS budget enforcement for spawned children.
2
+
3
+ Cap on combined RSS of all running spawned children. The parent process
4
+ itself is not counted (we don't constrain the user's main agent). Default
5
+ budget is `SPAWN_BUDGET_MB` (3072 MB).
6
+
7
+ Flow:
8
+ spawn() pre-flight:
9
+ estimate child RSS (slim → SPAWN_ESTIMATE_SLIM_MB, else FULL)
10
+ check_budget(conn, new_kb) returns ("ok"|"refused", message)
11
+ if refused → spawn() returns ERR + reason
12
+ else → INSERT tasks row with rss_kb = estimate, then proceed
13
+
14
+ background daemon (start_budget_daemon):
15
+ every SPAWN_BUDGET_POLL_S seconds, walk running tasks, compute real
16
+ RSS of each process tree via `ps`, write back into tasks.rss_kb.
17
+ Tasks that have ended → no update (their rss_kb stays as last seen
18
+ but they're filtered out by ended_at IS NOT NULL anyway).
19
+
20
+ spawn_budget_status() (MCP):
21
+ "budget=N MB used=N MB free=N MB | per_task..."
22
+
23
+ Set SPAWN_BUDGET_MB=0 to disable enforcement entirely.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import logging
29
+ import os
30
+ import subprocess
31
+ import threading
32
+ import time
33
+ from typing import Optional, Tuple
34
+
35
+ from .config import (
36
+ SPAWN_BUDGET_MB,
37
+ SPAWN_ESTIMATE_SLIM_MB,
38
+ SPAWN_ESTIMATE_FULL_MB,
39
+ SPAWN_BUDGET_POLL_S,
40
+ )
41
+ from .db import get_db
42
+ from .helpers import alive
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+ _started = False
47
+
48
+
49
+ # ─────────────────────────────────────────────────────────────────────
50
+ # Estimates
51
+ # ─────────────────────────────────────────────────────────────────────
52
+
53
+ def estimate_child_rss_kb(slim: bool) -> int:
54
+ """Initial RSS guess for a not-yet-running child, used by admission
55
+ control. Real value replaces this within `SPAWN_BUDGET_POLL_S`."""
56
+ mb = SPAWN_ESTIMATE_SLIM_MB if slim else SPAWN_ESTIMATE_FULL_MB
57
+ return int(mb) * 1024
58
+
59
+
60
+ # ─────────────────────────────────────────────────────────────────────
61
+ # Tree walker — sum RSS of pid and all its descendants via `ps`
62
+ # ─────────────────────────────────────────────────────────────────────
63
+
64
+ def _ps_pairs() -> list[tuple[int, int]]:
65
+ """Snapshot of (pid, ppid) for every process visible to `ps`."""
66
+ try:
67
+ r = subprocess.run(
68
+ ["ps", "-ax", "-o", "pid=,ppid="],
69
+ capture_output=True, text=True, timeout=3,
70
+ )
71
+ except (subprocess.SubprocessError, OSError):
72
+ return []
73
+ out: list[tuple[int, int]] = []
74
+ for line in (r.stdout or "").splitlines():
75
+ parts = line.split()
76
+ if len(parts) < 2:
77
+ continue
78
+ try:
79
+ out.append((int(parts[0]), int(parts[1])))
80
+ except ValueError:
81
+ continue
82
+ return out
83
+
84
+
85
+ def _rss_for_pids(pids: list[int]) -> int:
86
+ """Sum RSS (KB) for the given pids via `ps`. Missing pids contribute 0."""
87
+ if not pids:
88
+ return 0
89
+ try:
90
+ r = subprocess.run(
91
+ ["ps", "-o", "pid=,rss="] + ["-p"] + [str(p) for p in pids],
92
+ capture_output=True, text=True, timeout=3,
93
+ )
94
+ except (subprocess.SubprocessError, OSError):
95
+ return 0
96
+ total = 0
97
+ for line in (r.stdout or "").splitlines():
98
+ parts = line.split()
99
+ if len(parts) < 2:
100
+ continue
101
+ try:
102
+ total += int(parts[1])
103
+ except ValueError:
104
+ continue
105
+ return total
106
+
107
+
108
+ def measure_tree_rss_kb(root_pid: int) -> Optional[int]:
109
+ """Walk descendants of root_pid and return summed RSS in KB.
110
+ Returns None when the root is gone (so caller can leave the row alone)."""
111
+ if root_pid is None or root_pid <= 0:
112
+ return None
113
+ pairs = _ps_pairs()
114
+ if not pairs:
115
+ return None
116
+ # Bail when the root isn't visible — process ended.
117
+ root_alive = any(pid == root_pid for pid, _ in pairs)
118
+ if not root_alive:
119
+ return None
120
+ # BFS descendants
121
+ children_by_parent: dict[int, list[int]] = {}
122
+ for pid, ppid in pairs:
123
+ children_by_parent.setdefault(ppid, []).append(pid)
124
+ tree = [root_pid]
125
+ frontier = [root_pid]
126
+ while frontier:
127
+ nxt: list[int] = []
128
+ for p in frontier:
129
+ for kid in children_by_parent.get(p, ()):
130
+ tree.append(kid)
131
+ nxt.append(kid)
132
+ frontier = nxt
133
+ return _rss_for_pids(tree)
134
+
135
+
136
+ # ─────────────────────────────────────────────────────────────────────
137
+ # Budget check
138
+ # ─────────────────────────────────────────────────────────────────────
139
+
140
+ def _running_tasks_rss(conn) -> int:
141
+ """Sum rss_kb across tasks that are not ended. NULL rss_kb means we
142
+ haven't measured yet — assume the FULL estimate as a conservative
143
+ placeholder, otherwise a spawn flood could squeeze past the cap before
144
+ the daemon catches up."""
145
+ rows = conn.execute(
146
+ "SELECT rss_kb FROM tasks WHERE ended_at IS NULL"
147
+ ).fetchall()
148
+ total = 0
149
+ fallback_kb = SPAWN_ESTIMATE_FULL_MB * 1024
150
+ for r in rows:
151
+ total += (r["rss_kb"] if r["rss_kb"] is not None else fallback_kb)
152
+ return total
153
+
154
+
155
+ def check_budget(conn, new_child_kb: int) -> Tuple[bool, str]:
156
+ """Decide whether spawning a child of `new_child_kb` would breach the
157
+ budget. Returns (ok, message). When SPAWN_BUDGET_MB=0, always ok."""
158
+ if SPAWN_BUDGET_MB <= 0:
159
+ return True, "budget_disabled"
160
+ budget_kb = SPAWN_BUDGET_MB * 1024
161
+ current = _running_tasks_rss(conn)
162
+ projected = current + new_child_kb
163
+ if projected > budget_kb:
164
+ cur_mb = current // 1024
165
+ new_mb = new_child_kb // 1024
166
+ proj_mb = projected // 1024
167
+ return False, (
168
+ f"budget_exceeded: running_subagents={cur_mb}MB + "
169
+ f"new_child={new_mb}MB = {proj_mb}MB > "
170
+ f"limit={SPAWN_BUDGET_MB}MB. Wait for a child to finish, "
171
+ f"raise THREADKEEPER_SPAWN_BUDGET_MB, or use task_kill()."
172
+ )
173
+ return True, (
174
+ f"ok: current={current // 1024}MB + new={new_child_kb // 1024}MB "
175
+ f"≤ {SPAWN_BUDGET_MB}MB"
176
+ )
177
+
178
+
179
+ # ─────────────────────────────────────────────────────────────────────
180
+ # Daemon — refresh real RSS values
181
+ # ─────────────────────────────────────────────────────────────────────
182
+
183
+ def _refresh_all_running(conn) -> int:
184
+ """Sweep running tasks, update rss_kb with real measurement. Tasks
185
+ whose pid is invalid (0 / visible spawn) are skipped — their RSS
186
+ can't be tracked from here. Returns number of rows updated."""
187
+ rows = conn.execute(
188
+ "SELECT id, pid, spawned_cid FROM tasks "
189
+ "WHERE ended_at IS NULL ORDER BY started_at DESC LIMIT 100"
190
+ ).fetchall()
191
+ now = int(time.time())
192
+ updated = 0
193
+ for r in rows:
194
+ pid = r["pid"]
195
+ if not pid or pid <= 0:
196
+ continue # visible spawns — Terminal-launched, pid not tracked
197
+ if not alive(pid):
198
+ # Process gone — mark ended, leave rss_kb as last-known.
199
+ conn.execute(
200
+ "UPDATE tasks SET ended_at=? WHERE id=? AND ended_at IS NULL",
201
+ (now, r["id"]),
202
+ )
203
+ continue
204
+ rss = measure_tree_rss_kb(pid)
205
+ if rss is None:
206
+ continue
207
+ conn.execute(
208
+ "UPDATE tasks SET rss_kb=?, rss_updated_at=? WHERE id=?",
209
+ (rss, now, r["id"]),
210
+ )
211
+ updated += 1
212
+ if updated:
213
+ try:
214
+ conn.commit()
215
+ except Exception:
216
+ pass
217
+ return updated
218
+
219
+
220
+ def _daemon_loop() -> None:
221
+ while True:
222
+ try:
223
+ conn = get_db()
224
+ try:
225
+ _refresh_all_running(conn)
226
+ finally:
227
+ conn.close()
228
+ except Exception:
229
+ logger.debug("spawn_budget daemon tick failed", exc_info=True)
230
+ time.sleep(SPAWN_BUDGET_POLL_S)
231
+
232
+
233
+ def start_budget_daemon() -> None:
234
+ """Idempotent — call from _ensure_session lazily."""
235
+ global _started
236
+ if _started:
237
+ return
238
+ if SPAWN_BUDGET_POLL_S <= 0:
239
+ return
240
+ if SPAWN_BUDGET_MB <= 0:
241
+ return # budget disabled, no need to track
242
+ t = threading.Thread(
243
+ target=_daemon_loop, name="spawn_budget", daemon=True,
244
+ )
245
+ t.start()
246
+ _started = True
@@ -0,0 +1,2 @@
1
+ """Tool modules. Importing each one registers its @mcp.tool() entries on
2
+ the singleton in threadkeeper._mcp.mcp."""
@@ -0,0 +1,111 @@
1
+ """Concept-registration MCP tools.
2
+
3
+ Extracted from server.py. Provides registration, listing, and
4
+ expansion for nameless concepts triangulated across paraphrase runs.
5
+ """
6
+
7
+ import sqlite3
8
+ import time
9
+ from typing import Optional
10
+
11
+ from .._mcp import mcp
12
+ from ..db import get_db
13
+ from ..helpers import fmt_age, q, gen_concept_id
14
+ from ..identity import _ensure_session, _detect_self_cid, _emit
15
+
16
+
17
+ @mcp.tool()
18
+ def register_concept(description: str,
19
+ triangulation_notes: str = "",
20
+ confidence: str = "medium",
21
+ source_thread: str = "") -> str:
22
+ """Register a concept that lacks a precise human name.
23
+
24
+ `description` should describe the phenomenon through EXAMPLES, not with
25
+ a canonical label — naming it locks it back into a human discipline.
26
+ `triangulation_notes` (optional): the paraphrase runs that surfaced
27
+ the invariant. `confidence` ∈ {low, medium, high}."""
28
+ if confidence not in ("low", "medium", "high"):
29
+ return f"ERR bad_confidence={confidence}"
30
+ if not description.strip():
31
+ return "ERR empty_description"
32
+ conn = get_db()
33
+ _ensure_session(conn)
34
+ cid = _detect_self_cid()
35
+ src = source_thread.strip() or None
36
+ if src and not conn.execute(
37
+ "SELECT 1 FROM threads WHERE id=?", (src,)
38
+ ).fetchone():
39
+ return f"ERR source_thread_not_found={src}"
40
+ pid = gen_concept_id(conn)
41
+ now_t = int(time.time())
42
+ conn.execute(
43
+ "INSERT INTO concepts (id, description, triangulation_notes, "
44
+ "confidence, source_thread, registered_by_cid, registered_at, "
45
+ "last_evidence_at) VALUES (?,?,?,?,?,?,?,?)",
46
+ (pid, description, triangulation_notes or None, confidence,
47
+ src, cid, now_t, now_t),
48
+ )
49
+ _emit(conn, "register_concept", target=pid,
50
+ summary=description[:140])
51
+ conn.commit()
52
+ return f"ok id={pid} conf={confidence}"
53
+
54
+
55
+ @mcp.tool()
56
+ def list_concepts(min_confidence: str = "low", k: int = 10) -> str:
57
+ """List registered concepts, filtered by minimum confidence."""
58
+ rank = {"low": 0, "medium": 1, "high": 2}
59
+ if min_confidence not in rank:
60
+ return f"ERR bad_confidence={min_confidence}"
61
+ conn = get_db()
62
+ rows = conn.execute(
63
+ "SELECT id, description, confidence, source_thread, registered_at "
64
+ "FROM concepts ORDER BY registered_at DESC LIMIT ?",
65
+ (max(1, int(k)) * 3,),
66
+ ).fetchall()
67
+ out = []
68
+ for r in rows:
69
+ if rank[r["confidence"]] < rank[min_confidence]:
70
+ continue
71
+ out.append({
72
+ "id": r["id"],
73
+ "conf": r["confidence"],
74
+ "src": r["source_thread"] or "-",
75
+ "age": fmt_age(int(time.time()) - r["registered_at"]),
76
+ "desc": r["description"][:240].replace("\n", " "),
77
+ })
78
+ if len(out) >= k:
79
+ break
80
+ if not out:
81
+ return f"no_concepts (min_confidence={min_confidence})"
82
+ lines = [f"concepts n={len(out)}"]
83
+ for c in out:
84
+ lines.append(
85
+ f" {c['id']} conf={c['conf']} src={c['src']} "
86
+ f"age={c['age']}_ago"
87
+ )
88
+ lines.append(f" {c['desc']}")
89
+ return "\n".join(lines)
90
+
91
+
92
+ @mcp.tool()
93
+ def expand_concept(concept_id: str) -> str:
94
+ """Full description + triangulation_notes for one concept."""
95
+ conn = get_db()
96
+ r = conn.execute(
97
+ "SELECT * FROM concepts WHERE id=?", (concept_id.strip(),)
98
+ ).fetchone()
99
+ if not r:
100
+ return f"ERR concept_not_found={concept_id}"
101
+ parts = [
102
+ f"id={r['id']} conf={r['confidence']} src={r['source_thread'] or '-'} "
103
+ f"by={(r['registered_by_cid'] or '?')[:8]} "
104
+ f"age={fmt_age(int(time.time()) - r['registered_at'])}_ago",
105
+ "",
106
+ "description:",
107
+ r["description"],
108
+ ]
109
+ if r["triangulation_notes"]:
110
+ parts += ["", "triangulation_notes:", r["triangulation_notes"]]
111
+ return "\n".join(parts)
@@ -0,0 +1,222 @@
1
+ """Consolidate MCP tool: periodic memory hygiene.
2
+
3
+ Extracted from server.py. Provides a dry-run-by-default sweep that
4
+ reports (and optionally applies) four kinds of cleanup:
5
+
6
+ merge_dup_notes : intra-thread cosine ≥ note_cosine, keep oldest
7
+ idle_stale : active threads not touched in stale_days
8
+ dedupe_verbatim : exact text + (if embeddings) cosine ≥ verbatim_cosine
9
+ release_orphan : claim ≥ orphan_days old, no progress past claim mark
10
+ """
11
+
12
+ import sqlite3
13
+ import time
14
+
15
+ from .._mcp import mcp
16
+ from ..db import get_db
17
+ from ..config import SEMANTIC_AVAILABLE
18
+ from ..helpers import fmt_age, q, normalize_text
19
+ from ..identity import _ensure_session, _emit
20
+ from ..embeddings import _get_model
21
+
22
+
23
+ CONSOLIDATE_NOTE_COSINE = 0.95
24
+ CONSOLIDATE_VERBATIM_COSINE = 0.90
25
+ CONSOLIDATE_STALE_THREAD_DAYS = 30
26
+ CONSOLIDATE_ORPHAN_CLAIM_DAYS = 7
27
+
28
+
29
+ @mcp.tool()
30
+ def consolidate(dry_run: bool = True,
31
+ stale_days: int = CONSOLIDATE_STALE_THREAD_DAYS,
32
+ orphan_days: int = CONSOLIDATE_ORPHAN_CLAIM_DAYS,
33
+ note_cosine: float = CONSOLIDATE_NOTE_COSINE,
34
+ verbatim_cosine: float = CONSOLIDATE_VERBATIM_COSINE) -> str:
35
+ """Periodic memory hygiene. dry_run=True (default) reports only.
36
+
37
+ merge_dup_notes : intra-thread cosine ≥ note_cosine, keep oldest
38
+ idle_stale : active threads not touched in stale_days
39
+ dedupe_verbatim : exact text + (if embeddings) cosine ≥ verbatim_cosine
40
+ release_orphan : claim ≥ orphan_days old, no progress past claim mark"""
41
+ conn = get_db()
42
+ _ensure_session(conn)
43
+ now = int(time.time())
44
+ findings = {
45
+ "merge_dup_notes": [], "idle_stale": [],
46
+ "dedupe_verbatim": [], "release_orphan": [],
47
+ }
48
+ np = None
49
+ if SEMANTIC_AVAILABLE:
50
+ try:
51
+ import numpy as np # type: ignore
52
+ except ImportError:
53
+ np = None
54
+
55
+ if np is not None:
56
+ thread_ids = [
57
+ r["thread_id"] for r in conn.execute(
58
+ "SELECT thread_id FROM notes WHERE thread_id IS NOT NULL "
59
+ "AND embedding IS NOT NULL "
60
+ "GROUP BY thread_id HAVING COUNT(*) >= 2"
61
+ ).fetchall()
62
+ ]
63
+ for tid in thread_ids:
64
+ ns = conn.execute(
65
+ "SELECT id, content, embedding, created_at FROM notes "
66
+ "WHERE thread_id=? AND embedding IS NOT NULL "
67
+ "ORDER BY created_at ASC", (tid,)
68
+ ).fetchall()
69
+ if len(ns) < 2:
70
+ continue
71
+ embs = np.stack([
72
+ np.frombuffer(n["embedding"], dtype="float32") for n in ns
73
+ ])
74
+ sim = embs @ embs.T
75
+ kept = [True] * len(ns)
76
+ for i in range(len(ns)):
77
+ if not kept[i]:
78
+ continue
79
+ for j in range(i + 1, len(ns)):
80
+ if kept[j] and sim[i, j] >= note_cosine:
81
+ kept[j] = False
82
+ findings["merge_dup_notes"].append({
83
+ "thread": tid, "keep": ns[i]["id"],
84
+ "drop": ns[j]["id"],
85
+ "cos": float(sim[i, j]),
86
+ "snip": ns[j]["content"][:120],
87
+ })
88
+
89
+ cutoff_stale = now - max(1, int(stale_days)) * 86400
90
+ for t in conn.execute(
91
+ "SELECT id, question, last_touched_at FROM threads "
92
+ "WHERE state='active' AND last_touched_at < ?", (cutoff_stale,)
93
+ ).fetchall():
94
+ findings["idle_stale"].append({
95
+ "thread": t["id"], "question": t["question"][:120],
96
+ "stale_for": fmt_age(now - t["last_touched_at"]),
97
+ })
98
+
99
+ vb = conn.execute(
100
+ "SELECT id, speaker, content, created_at FROM verbatim "
101
+ "ORDER BY created_at DESC"
102
+ ).fetchall()
103
+ seen_text: dict = {}
104
+ semantic_pool: list = []
105
+ for v in vb:
106
+ key = (v["speaker"], normalize_text(v["content"]))
107
+ if key in seen_text:
108
+ findings["dedupe_verbatim"].append({
109
+ "keep": seen_text[key], "drop": v["id"],
110
+ "via": "text_exact", "snip": v["content"][:120],
111
+ })
112
+ else:
113
+ seen_text[key] = v["id"]
114
+ semantic_pool.append(v)
115
+ if np is not None and 1 < len(semantic_pool) <= 200:
116
+ m = _get_model()
117
+ if m is not None:
118
+ by_speaker: dict = {}
119
+ for v in semantic_pool:
120
+ by_speaker.setdefault(v["speaker"], []).append(v)
121
+ for sp, vs in by_speaker.items():
122
+ if len(vs) < 2:
123
+ continue
124
+ vecs = m.encode(
125
+ [v["content"] for v in vs], normalize_embeddings=True
126
+ ).astype("float32")
127
+ sim = vecs @ vecs.T
128
+ kept = [True] * len(vs)
129
+ for i in range(len(vs)):
130
+ if not kept[i]:
131
+ continue
132
+ for j in range(i + 1, len(vs)):
133
+ if kept[j] and sim[i, j] >= verbatim_cosine:
134
+ kept[j] = False
135
+ findings["dedupe_verbatim"].append({
136
+ "keep": vs[i]["id"], "drop": vs[j]["id"],
137
+ "via": f"cos={float(sim[i, j]):.2f}",
138
+ "snip": vs[j]["content"][:120],
139
+ })
140
+
141
+ cutoff_orphan = now - max(1, int(orphan_days)) * 86400
142
+ for t in conn.execute(
143
+ "SELECT id, question, claimed_at, claimed_by_cid, last_touched_at "
144
+ "FROM threads WHERE claimed_at IS NOT NULL AND claimed_at < ? "
145
+ "AND last_touched_at <= claimed_at + 60", (cutoff_orphan,)
146
+ ).fetchall():
147
+ findings["release_orphan"].append({
148
+ "thread": t["id"],
149
+ "claimed_by": (t["claimed_by_cid"] or "?")[:8],
150
+ "claimed_age": fmt_age(now - t["claimed_at"]),
151
+ "question": t["question"][:120],
152
+ })
153
+
154
+ applied = {
155
+ "merge_dup_notes": 0, "idle_stale": 0,
156
+ "dedupe_verbatim": 0, "release_orphan": 0,
157
+ }
158
+ if not dry_run:
159
+ for f in findings["merge_dup_notes"]:
160
+ conn.execute("DELETE FROM notes WHERE id=?", (f["drop"],))
161
+ applied["merge_dup_notes"] += 1
162
+ for f in findings["idle_stale"]:
163
+ conn.execute(
164
+ "UPDATE threads SET state='idle', last_touched_at=? WHERE id=?",
165
+ (now, f["thread"]),
166
+ )
167
+ applied["idle_stale"] += 1
168
+ for f in findings["dedupe_verbatim"]:
169
+ conn.execute("DELETE FROM verbatim WHERE id=?", (f["drop"],))
170
+ applied["dedupe_verbatim"] += 1
171
+ for f in findings["release_orphan"]:
172
+ conn.execute(
173
+ "UPDATE threads SET claimed_at=NULL, claimed_by_cid=NULL "
174
+ "WHERE id=?", (f["thread"],)
175
+ )
176
+ applied["release_orphan"] += 1
177
+ _emit(conn, "consolidate_apply",
178
+ summary=" ".join(f"{k}={v}" for k, v in applied.items()))
179
+ conn.commit()
180
+
181
+ out = [
182
+ f"consolidate dry_run={dry_run} "
183
+ f"merge={len(findings['merge_dup_notes'])} "
184
+ f"idle={len(findings['idle_stale'])} "
185
+ f"dedupe={len(findings['dedupe_verbatim'])} "
186
+ f"orphan={len(findings['release_orphan'])}"
187
+ ]
188
+ if not dry_run:
189
+ out.append("applied " + " ".join(f"{k}={v}" for k, v in applied.items()))
190
+ if findings["merge_dup_notes"]:
191
+ out.append("")
192
+ out.append("merge_dup_notes (keep oldest)")
193
+ for f in findings["merge_dup_notes"][:10]:
194
+ out.append(
195
+ f" thread={f['thread']} keep=#{f['keep']} drop=#{f['drop']} "
196
+ f"cos={f['cos']:.3f} {q(f['snip'])}"
197
+ )
198
+ if findings["idle_stale"]:
199
+ out.append("")
200
+ out.append(f"idle_stale (>{stale_days}d untouched)")
201
+ for f in findings["idle_stale"][:10]:
202
+ out.append(
203
+ f" {f['thread']} stale={f['stale_for']} "
204
+ f"q={q(f['question'])}"
205
+ )
206
+ if findings["dedupe_verbatim"]:
207
+ out.append("")
208
+ out.append("dedupe_verbatim (keep most-recent)")
209
+ for f in findings["dedupe_verbatim"][:10]:
210
+ out.append(
211
+ f" keep=#{f['keep']} drop=#{f['drop']} "
212
+ f"via={f['via']} {q(f['snip'])}"
213
+ )
214
+ if findings["release_orphan"]:
215
+ out.append("")
216
+ out.append(f"release_orphan (claimed >{orphan_days}d, no progress)")
217
+ for f in findings["release_orphan"][:10]:
218
+ out.append(
219
+ f" {f['thread']} by={f['claimed_by']} "
220
+ f"age={f['claimed_age']} q={q(f['question'])}"
221
+ )
222
+ return "\n".join(out)
@@ -0,0 +1,109 @@
1
+ """Core-memory MCP tools.
2
+
3
+ Small key/value store always surfaced in `brief()`, sorted by priority
4
+ DESC. Designed as the 'what new-claude must know' surface — not a note
5
+ store. Entries are capped at 1KB and a soft hint at 20 entries total.
6
+ """
7
+
8
+ import sqlite3
9
+ import time
10
+
11
+ from .._mcp import mcp
12
+ from ..db import get_db
13
+ from ..helpers import fmt_age, q
14
+ from ..identity import _ensure_session, _emit
15
+
16
+
17
+ CORE_MAX_BYTES = 1024
18
+ CORE_MAX_ENTRIES_HINT = 20
19
+ CORE_PRIORITY_MIN = 0
20
+ CORE_PRIORITY_MAX = 100
21
+
22
+
23
+ @mcp.tool()
24
+ def core_set(key: str, content: str, priority: int = 50) -> str:
25
+ """Upsert a core-memory entry. ALWAYS shown in brief, sorted by priority DESC.
26
+
27
+ Use sparingly — this is the 'what new-claude must know' surface, not a
28
+ note store. Good: 'project_root=/Users/.../ai-memory'. Bad: 'today we
29
+ tried X'. `priority` 0-100 (higher = shown first). `content` capped 1KB."""
30
+ conn = get_db()
31
+ _ensure_session(conn)
32
+ key = key.strip()
33
+ if not key:
34
+ return "ERR empty_key"
35
+ if len(key) > 64:
36
+ return "ERR key_too_long max=64"
37
+ if not content.strip():
38
+ return "ERR empty_content"
39
+ if len(content.encode("utf-8")) > CORE_MAX_BYTES:
40
+ return f"ERR content_too_large max={CORE_MAX_BYTES}B"
41
+ if not (CORE_PRIORITY_MIN <= priority <= CORE_PRIORITY_MAX):
42
+ return f"ERR priority_out_of_range min={CORE_PRIORITY_MIN} max={CORE_PRIORITY_MAX}"
43
+ now = int(time.time())
44
+ conn.execute(
45
+ "INSERT INTO core_memory (key, content, priority, updated_at) "
46
+ "VALUES (?,?,?,?) ON CONFLICT(key) DO UPDATE SET "
47
+ "content=excluded.content, priority=excluded.priority, "
48
+ "updated_at=excluded.updated_at",
49
+ (key, content, priority, now),
50
+ )
51
+ _emit(conn, "core_set", target=key, summary=f"P{priority} {content[:80]}")
52
+ conn.commit()
53
+ n = conn.execute("SELECT COUNT(*) c FROM core_memory").fetchone()["c"]
54
+ warn = f" warn=over_hint({n}/{CORE_MAX_ENTRIES_HINT})" if n > CORE_MAX_ENTRIES_HINT else ""
55
+ return f"ok n={n}{warn}"
56
+
57
+
58
+ @mcp.tool()
59
+ def core_remove(key: str) -> str:
60
+ """Delete a core-memory entry by key."""
61
+ conn = get_db()
62
+ _ensure_session(conn)
63
+ cur = conn.execute("DELETE FROM core_memory WHERE key=?", (key.strip(),))
64
+ if cur.rowcount == 0:
65
+ return f"ERR not_found={key}"
66
+ _emit(conn, "core_remove", target=key)
67
+ conn.commit()
68
+ return "ok"
69
+
70
+
71
+ @mcp.tool()
72
+ def core_list() -> str:
73
+ """List all core-memory entries, ordered by priority DESC then key."""
74
+ conn = get_db()
75
+ rows = conn.execute(
76
+ "SELECT key, content, priority, updated_at FROM core_memory "
77
+ "ORDER BY priority DESC, key ASC"
78
+ ).fetchall()
79
+ if not rows:
80
+ return "empty"
81
+ now = int(time.time())
82
+ lines = []
83
+ for r in rows:
84
+ snip = r["content"][:120].replace("\n", " ")
85
+ if len(r["content"]) > 120:
86
+ snip += "…"
87
+ lines.append(
88
+ f"[P{r['priority']}] {r['key']}: {q(snip)} "
89
+ f"upd={fmt_age(now - r['updated_at'])}_ago"
90
+ )
91
+ return "\n".join(lines)
92
+
93
+
94
+ @mcp.tool()
95
+ def core_get(key: str) -> str:
96
+ """Return the full content of a single core-memory entry."""
97
+ conn = get_db()
98
+ row = conn.execute(
99
+ "SELECT key, content, priority, updated_at FROM core_memory WHERE key=?",
100
+ (key.strip(),),
101
+ ).fetchone()
102
+ if not row:
103
+ return f"ERR not_found={key}"
104
+ now = int(time.time())
105
+ return (
106
+ f"key={row['key']} P{row['priority']} "
107
+ f"upd={fmt_age(now - row['updated_at'])}_ago\n"
108
+ f"{row['content']}"
109
+ )