threadkeeper 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. threadkeeper/__init__.py +8 -0
  2. threadkeeper/_mcp.py +6 -0
  3. threadkeeper/_setup.py +299 -0
  4. threadkeeper/adapters/__init__.py +40 -0
  5. threadkeeper/adapters/_hook_helpers.py +72 -0
  6. threadkeeper/adapters/base.py +152 -0
  7. threadkeeper/adapters/claude_code.py +178 -0
  8. threadkeeper/adapters/claude_desktop.py +128 -0
  9. threadkeeper/adapters/codex.py +259 -0
  10. threadkeeper/adapters/copilot.py +195 -0
  11. threadkeeper/adapters/gemini.py +169 -0
  12. threadkeeper/adapters/vscode.py +144 -0
  13. threadkeeper/brief.py +735 -0
  14. threadkeeper/config.py +216 -0
  15. threadkeeper/curator.py +390 -0
  16. threadkeeper/db.py +474 -0
  17. threadkeeper/embeddings.py +232 -0
  18. threadkeeper/extract_daemon.py +125 -0
  19. threadkeeper/helpers.py +101 -0
  20. threadkeeper/i18n.py +342 -0
  21. threadkeeper/identity.py +237 -0
  22. threadkeeper/ingest.py +507 -0
  23. threadkeeper/lessons.py +170 -0
  24. threadkeeper/nudges.py +257 -0
  25. threadkeeper/process_health.py +202 -0
  26. threadkeeper/review_prompts.py +207 -0
  27. threadkeeper/search_proxy.py +160 -0
  28. threadkeeper/server.py +55 -0
  29. threadkeeper/shadow_review.py +358 -0
  30. threadkeeper/skill_watcher.py +96 -0
  31. threadkeeper/spawn_budget.py +246 -0
  32. threadkeeper/tools/__init__.py +2 -0
  33. threadkeeper/tools/concepts.py +111 -0
  34. threadkeeper/tools/consolidate.py +222 -0
  35. threadkeeper/tools/core_memory.py +109 -0
  36. threadkeeper/tools/correlation.py +116 -0
  37. threadkeeper/tools/curator.py +121 -0
  38. threadkeeper/tools/dialectic.py +359 -0
  39. threadkeeper/tools/dialog.py +131 -0
  40. threadkeeper/tools/distill.py +184 -0
  41. threadkeeper/tools/extract.py +411 -0
  42. threadkeeper/tools/graph.py +183 -0
  43. threadkeeper/tools/invariants.py +177 -0
  44. threadkeeper/tools/lessons.py +110 -0
  45. threadkeeper/tools/missed_spawns.py +142 -0
  46. threadkeeper/tools/peers.py +579 -0
  47. threadkeeper/tools/pickup.py +148 -0
  48. threadkeeper/tools/probes.py +251 -0
  49. threadkeeper/tools/process_health.py +90 -0
  50. threadkeeper/tools/session.py +34 -0
  51. threadkeeper/tools/shadow_review.py +106 -0
  52. threadkeeper/tools/skills.py +856 -0
  53. threadkeeper/tools/spawn.py +871 -0
  54. threadkeeper/tools/style.py +44 -0
  55. threadkeeper/tools/threads.py +299 -0
  56. threadkeeper-0.4.0.dist-info/METADATA +351 -0
  57. threadkeeper-0.4.0.dist-info/RECORD +61 -0
  58. threadkeeper-0.4.0.dist-info/WHEEL +5 -0
  59. threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
  60. threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
  61. threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
threadkeeper/nudges.py ADDED
@@ -0,0 +1,257 @@
1
+ """Counter-driven nudge logic. Inspired by hermes-agent's
2
+ memory_nudge_interval / skill_nudge_interval — when N mutating events
3
+ have passed in this session since the last 'save event' (memory or skill),
4
+ surface an active nudge in brief() asking the agent to consolidate.
5
+
6
+ Unlike spawn_hint and skill_hint (passive observation of state), these
7
+ nudges are turn-counter-driven: every mutating tool emits an event, the
8
+ counter walks forward, and when it crosses a threshold the surface
9
+ escalates from soft → hard → demanding.
10
+
11
+ Public:
12
+ compute_memory_nudge(conn, session_id) -> Optional[str]
13
+ Returns the nudge text to embed in brief(), or None if quiet.
14
+ compute_skill_nudge(conn, session_id) -> Optional[str]
15
+ Same for skill consolidation.
16
+ auto_review_should_fire(conn, session_id) -> Optional[str]
17
+ Returns a thread_id IF auto-review should spawn now (rich closed
18
+ thread + threshold crossed + AUTO_REVIEW_ENABLED), else None.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import sqlite3
23
+ import time
24
+ from typing import Optional
25
+
26
+ from .config import (
27
+ MEMORY_NUDGE_INTERVAL,
28
+ SKILL_NUDGE_INTERVAL,
29
+ AUTO_REVIEW_ENABLED,
30
+ )
31
+
32
+
33
+ # Event kinds that count as "memory save" — emitting any of these resets
34
+ # the memory-nudge counter.
35
+ _MEMORY_RESET_KINDS = (
36
+ "open_thread",
37
+ "close_thread",
38
+ "note:insight",
39
+ "note:move",
40
+ "core_set",
41
+ "verbatim_user",
42
+ "concept_register",
43
+ "distill",
44
+ "memory_save",
45
+ )
46
+
47
+ # Event kinds that count as "skill save" — emitting any of these resets
48
+ # the skill-nudge counter.
49
+ _SKILL_RESET_KINDS = (
50
+ "skill_create",
51
+ "skill_edit",
52
+ "skill_patch",
53
+ "skill_write_file",
54
+ "skill_materialized",
55
+ )
56
+
57
+
58
+ def _last_reset_event_id(conn: sqlite3.Connection, session_id: str,
59
+ kinds: tuple[str, ...]) -> int:
60
+ """Return MAX(events.id) for this session matching any reset-kind, or 0."""
61
+ if not session_id or not kinds:
62
+ return 0
63
+ placeholders = ",".join("?" * len(kinds))
64
+ row = conn.execute(
65
+ f"SELECT COALESCE(MAX(id), 0) m FROM events "
66
+ f"WHERE session_id = ? AND kind IN ({placeholders})",
67
+ (session_id, *kinds),
68
+ ).fetchone()
69
+ if row is None:
70
+ return 0
71
+ return row["m"] if hasattr(row, "keys") else row[0]
72
+
73
+
74
+ def _count_events_since(conn: sqlite3.Connection, session_id: str,
75
+ since_id: int,
76
+ exclude_kinds: tuple[str, ...]) -> int:
77
+ """Count events for session with id > since_id whose kind is NOT in
78
+ exclude_kinds. These are the "non-save" turns between the last save
79
+ and now."""
80
+ if not session_id:
81
+ return 0
82
+ if exclude_kinds:
83
+ placeholders = ",".join("?" * len(exclude_kinds))
84
+ row = conn.execute(
85
+ f"SELECT COUNT(*) c FROM events "
86
+ f"WHERE session_id = ? AND id > ? "
87
+ f"AND kind NOT IN ({placeholders})",
88
+ (session_id, since_id, *exclude_kinds),
89
+ ).fetchone()
90
+ else:
91
+ row = conn.execute(
92
+ "SELECT COUNT(*) c FROM events "
93
+ "WHERE session_id = ? AND id > ?",
94
+ (session_id, since_id),
95
+ ).fetchone()
96
+ if row is None:
97
+ return 0
98
+ return row["c"] if hasattr(row, "keys") else row[0]
99
+
100
+
101
+ def _has_rich_thread(conn: sqlite3.Connection,
102
+ min_notes: int = 3) -> bool:
103
+ """True if there's at least one active-or-closed thread with ≥ min_notes
104
+ notes total. Used by memory-nudge — there's something worth saving."""
105
+ try:
106
+ row = conn.execute(
107
+ "SELECT t.id "
108
+ "FROM threads t "
109
+ "WHERE t.state IN ('active','closed') "
110
+ " AND (SELECT COUNT(*) FROM notes n WHERE n.thread_id=t.id) >= ? "
111
+ "LIMIT 1",
112
+ (min_notes,),
113
+ ).fetchone()
114
+ except sqlite3.OperationalError:
115
+ return False
116
+ return row is not None
117
+
118
+
119
+ def _find_rich_pending_thread(conn: sqlite3.Connection,
120
+ within_seconds: int = 86400) -> Optional[str]:
121
+ """Find the richest closed thread that hasn't been materialized into a
122
+ skill yet. Returns thread_id, or None.
123
+
124
+ Rich = ≥5 notes, ≥2 of kind 'insight' or 'move'. Recency: closed within
125
+ `within_seconds`. Suppressed when a 'skill_materialized' event already
126
+ exists for the thread.
127
+ """
128
+ now = int(time.time())
129
+ try:
130
+ row = conn.execute(
131
+ "SELECT t.id, "
132
+ " (SELECT COUNT(*) FROM notes n WHERE n.thread_id=t.id) AS n_total, "
133
+ " (SELECT COUNT(*) FROM notes n WHERE n.thread_id=t.id "
134
+ " AND n.kind IN ('insight','move')) AS n_rich "
135
+ "FROM threads t "
136
+ "WHERE t.state='closed' AND t.last_touched_at > ? "
137
+ " AND NOT EXISTS ("
138
+ " SELECT 1 FROM events e "
139
+ " WHERE e.kind='skill_materialized' AND e.target=t.id"
140
+ " ) "
141
+ " AND (SELECT COUNT(*) FROM notes n WHERE n.thread_id=t.id) >= 5 "
142
+ " AND (SELECT COUNT(*) FROM notes n WHERE n.thread_id=t.id "
143
+ " AND n.kind IN ('insight','move')) >= 2 "
144
+ "ORDER BY t.last_touched_at DESC LIMIT 1",
145
+ (now - within_seconds,),
146
+ ).fetchone()
147
+ except sqlite3.OperationalError:
148
+ return None
149
+ if row is None:
150
+ return None
151
+ return row["id"] if hasattr(row, "keys") else row[0]
152
+
153
+
154
+ def compute_memory_nudge(conn: sqlite3.Connection,
155
+ session_id: str) -> Optional[str]:
156
+ """Counter-driven memory consolidation nudge. Fires when this session's
157
+ event counter has crossed MEMORY_NUDGE_INTERVAL since the last memory
158
+ save AND there's a rich thread worth saving.
159
+
160
+ Returns the multi-line nudge text (to be embedded in brief()), or None.
161
+ """
162
+ if MEMORY_NUDGE_INTERVAL <= 0:
163
+ return None
164
+ if not session_id:
165
+ return None
166
+ last_id = _last_reset_event_id(conn, session_id, _MEMORY_RESET_KINDS)
167
+ n_since = _count_events_since(conn, session_id, last_id,
168
+ _MEMORY_RESET_KINDS)
169
+ if n_since < MEMORY_NUDGE_INTERVAL:
170
+ return None
171
+ if not _has_rich_thread(conn, min_notes=3):
172
+ return None
173
+ if n_since >= 2 * MEMORY_NUDGE_INTERVAL:
174
+ # demanding
175
+ return (
176
+ f"memory_nudge n_since={n_since} ⚠️ overdue=2x\n"
177
+ f" → ⚠️ {n_since} events without a memory save "
178
+ f"(threshold was {MEMORY_NUDGE_INTERVAL}). MUST consolidate "
179
+ f"next: pick richest thread, write insight-note OR "
180
+ f"core_set()/verbatim_user() on the durable signal. "
181
+ f"Continuing without save = losing the work."
182
+ )
183
+ # soft
184
+ return (
185
+ f"memory_nudge n_since={n_since} target=memory "
186
+ f"threshold={MEMORY_NUDGE_INTERVAL}\n"
187
+ f" → {n_since} events since last memory save. CONSOLIDATE: pick "
188
+ f"the most active thread, write a note(kind='insight') with what "
189
+ f"crystallized, or core_set() the durable lesson. Don't let "
190
+ f"context evaporate."
191
+ )
192
+
193
+
194
+ def compute_skill_nudge(conn: sqlite3.Connection,
195
+ session_id: str) -> Optional[str]:
196
+ """Counter-driven skill consolidation nudge. Fires when this session's
197
+ event counter has crossed SKILL_NUDGE_INTERVAL since the last skill
198
+ save AND there's a rich closed thread without a prior skill_materialized
199
+ event.
200
+ """
201
+ if SKILL_NUDGE_INTERVAL <= 0:
202
+ return None
203
+ if not session_id:
204
+ return None
205
+ last_id = _last_reset_event_id(conn, session_id, _SKILL_RESET_KINDS)
206
+ n_since = _count_events_since(conn, session_id, last_id,
207
+ _SKILL_RESET_KINDS)
208
+ if n_since < SKILL_NUDGE_INTERVAL:
209
+ return None
210
+ if _find_rich_pending_thread(conn) is None:
211
+ return None
212
+ if n_since >= 2 * SKILL_NUDGE_INTERVAL:
213
+ return (
214
+ f"skill_nudge n_since={n_since} ⚠️ overdue=2x\n"
215
+ f" → ⚠️ {n_since} events without skill update "
216
+ f"(threshold was {SKILL_NUDGE_INTERVAL}). MUST act next: "
217
+ f"materialize the richest closed thread via "
218
+ f"review_thread(..., mode='auto') OR patch the most-relevant "
219
+ f"existing skill via skill_manage(action='patch')."
220
+ )
221
+ return (
222
+ f"skill_nudge n_since={n_since} target=skill "
223
+ f"threshold={SKILL_NUDGE_INTERVAL}\n"
224
+ f" → {n_since} events since last skill materialize. CHECK: any "
225
+ f"closed thread rich enough (≥5 notes, ≥2 insight/move)? If yes → "
226
+ f"review_thread(thread_id, focus='skills', mode='auto') OR "
227
+ f"skill_manage(action='patch', ...)."
228
+ )
229
+
230
+
231
+ def auto_review_should_fire(conn: sqlite3.Connection,
232
+ session_id: str,
233
+ force: bool = False) -> Optional[str]:
234
+ """Decide whether to fire a background review NOW.
235
+
236
+ Returns the thread_id of the richest pending closed thread (≥5 notes,
237
+ ≥2 insight/move, no prior skill_materialized) if all of:
238
+ - AUTO_REVIEW_ENABLED is true (skipped when force=True)
239
+ - skill-nudge counter is at or past SKILL_NUDGE_INTERVAL (skipped
240
+ when force=True)
241
+ - a rich pending thread exists
242
+
243
+ Otherwise None.
244
+ """
245
+ if not force:
246
+ if not AUTO_REVIEW_ENABLED:
247
+ return None
248
+ if SKILL_NUDGE_INTERVAL <= 0:
249
+ return None
250
+ if not session_id:
251
+ return None
252
+ last_id = _last_reset_event_id(conn, session_id, _SKILL_RESET_KINDS)
253
+ n_since = _count_events_since(conn, session_id, last_id,
254
+ _SKILL_RESET_KINDS)
255
+ if n_since < SKILL_NUDGE_INTERVAL:
256
+ return None
257
+ return _find_rich_pending_thread(conn)
@@ -0,0 +1,202 @@
1
+ """Detection and cleanup of orphaned thread-keeper server processes.
2
+
3
+ Each Claude client (Code CLI, Desktop, VS Code extension, headless `claude -p`)
4
+ spawns its own thread-keeper subprocess via stdio MCP. When the client dies
5
+ cleanly, its subprocess gets reaped. When the client crashes / is killed -9 /
6
+ loses its parent, the thread-keeper can linger as an orphan: still holding
7
+ file handles, embedding model in RAM, but with no peer ever sending it stdin.
8
+
9
+ Detection criteria (a process is "orphaned" when ALL hold):
10
+ 1. Process is a threadkeeper.server invocation
11
+ 2. Parent process is gone (ppid is 1/launchd OR ppid doesn't exist)
12
+ 3. Either:
13
+ - heartbeat_at on its session row is older than `STALE_HEARTBEAT_S`, OR
14
+ - the process has no session row in `presence` (it never finished
15
+ bootstrapping)
16
+
17
+ Cleanup never touches the running parent process itself — only other
18
+ thread-keeper processes that meet the orphan criteria.
19
+
20
+ Public API:
21
+ scan() -> list[dict] # diagnostic snapshot of all mp processes
22
+ cleanup(dry_run, force) -> dict # kill orphans
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import os
28
+ import subprocess
29
+ import time
30
+ from typing import Optional
31
+
32
+ from .db import get_db
33
+
34
+
35
+ # Seconds of presence-table silence before we consider a process orphaned.
36
+ STALE_HEARTBEAT_S = 5 * 60
37
+
38
+
39
+ # ─────────────────────────────────────────────────────────────────────
40
+ # Process discovery
41
+ # ─────────────────────────────────────────────────────────────────────
42
+
43
+ def _list_threadkeeper_pids() -> list[dict]:
44
+ """Find every running threadkeeper.server invocation. Returns rows
45
+ with pid, ppid, rss_kb, etime_s, full command. Skips disclaimer
46
+ wrappers (parent shim that exec's the real Python and exits)."""
47
+ try:
48
+ r = subprocess.run(
49
+ ["ps", "-ax", "-o", "pid=,ppid=,rss=,etime=,command="],
50
+ capture_output=True, text=True, timeout=5,
51
+ )
52
+ except (subprocess.SubprocessError, OSError):
53
+ return []
54
+ out: list[dict] = []
55
+ for line in (r.stdout or "").splitlines():
56
+ if "threadkeeper.server" not in line:
57
+ continue
58
+ # Skip the disclaimer shim: its command starts with the
59
+ # /Applications/Claude.app/Contents/Helpers/disclaimer path and
60
+ # holds RSS ≈0. We want only the real Python that took its place.
61
+ if "/Helpers/disclaimer" in line:
62
+ continue
63
+ # Tokenize: pid ppid rss etime command...
64
+ parts = line.split(None, 4)
65
+ if len(parts) < 5:
66
+ continue
67
+ try:
68
+ pid = int(parts[0])
69
+ ppid = int(parts[1])
70
+ rss = int(parts[2])
71
+ except ValueError:
72
+ continue
73
+ etime = parts[3]
74
+ cmd = parts[4]
75
+ out.append({
76
+ "pid": pid,
77
+ "ppid": ppid,
78
+ "rss_kb": rss,
79
+ "etime": etime,
80
+ "command": cmd,
81
+ })
82
+ return out
83
+
84
+
85
+ def _pid_alive(pid: int) -> bool:
86
+ """True if the given pid exists. pid=1 (init/launchd) and pid<=0 return
87
+ False — we treat init as 'no real parent'."""
88
+ if pid is None or pid <= 1:
89
+ return False
90
+ try:
91
+ os.kill(pid, 0)
92
+ return True
93
+ except (ProcessLookupError, PermissionError):
94
+ # ProcessLookupError → not alive
95
+ # PermissionError → it exists but isn't ours — count as alive
96
+ return isinstance(_sentinel_for_perm_error(pid), bool)
97
+ except OSError:
98
+ return False
99
+
100
+
101
+ def _sentinel_for_perm_error(pid: int) -> bool:
102
+ """PermissionError on os.kill(pid, 0) means the pid exists but is owned
103
+ by another user. We can't probe it, but it IS alive."""
104
+ return True
105
+
106
+
107
+ # ─────────────────────────────────────────────────────────────────────
108
+ # Orphan classification
109
+ # ─────────────────────────────────────────────────────────────────────
110
+
111
+ def _heartbeat_age_for_pid(conn, pid: int) -> Optional[int]:
112
+ """Look up presence.heartbeat_at for the session that this pid most
113
+ likely belongs to. Heuristic: pid embedded in session_id format
114
+ `s_<pid>_<hex>`. Returns age in seconds, or None if no match."""
115
+ row = conn.execute(
116
+ "SELECT heartbeat_at FROM presence "
117
+ "WHERE session_id LIKE ? "
118
+ "ORDER BY heartbeat_at DESC LIMIT 1",
119
+ (f"s_{pid}_%",),
120
+ ).fetchone()
121
+ if not row or not row["heartbeat_at"]:
122
+ return None
123
+ return int(time.time()) - int(row["heartbeat_at"])
124
+
125
+
126
+ def classify(p: dict, conn) -> dict:
127
+ """Return p augmented with orphan classification. Sets:
128
+ - `parent_alive` (bool)
129
+ - `heartbeat_age_s` (int | None)
130
+ - `is_orphaned` (bool)
131
+ - `is_self` (bool) — never classify our own pid as orphan
132
+ """
133
+ p = dict(p)
134
+ p["parent_alive"] = _pid_alive(p["ppid"])
135
+ p["heartbeat_age_s"] = _heartbeat_age_for_pid(conn, p["pid"])
136
+ p["is_self"] = (p["pid"] == os.getpid())
137
+
138
+ if p["is_self"]:
139
+ p["is_orphaned"] = False
140
+ p["orphan_reason"] = "self"
141
+ return p
142
+
143
+ if p["parent_alive"]:
144
+ p["is_orphaned"] = False
145
+ p["orphan_reason"] = "parent_alive"
146
+ return p
147
+
148
+ # Parent gone. Now check heartbeat freshness.
149
+ hb = p["heartbeat_age_s"]
150
+ if hb is None:
151
+ # No presence row — process either died before bootstrapping or
152
+ # uses a different session-id format. Treat as orphan to be safe;
153
+ # if it's a real living process it'll come back next session.
154
+ p["is_orphaned"] = True
155
+ p["orphan_reason"] = "parent_gone + no_heartbeat"
156
+ return p
157
+ if hb > STALE_HEARTBEAT_S:
158
+ p["is_orphaned"] = True
159
+ p["orphan_reason"] = f"parent_gone + heartbeat_age={hb}s > {STALE_HEARTBEAT_S}s"
160
+ return p
161
+ p["is_orphaned"] = False
162
+ p["orphan_reason"] = f"parent_gone but heartbeat fresh ({hb}s)"
163
+ return p
164
+
165
+
166
+ # ─────────────────────────────────────────────────────────────────────
167
+ # Public API
168
+ # ─────────────────────────────────────────────────────────────────────
169
+
170
+ def scan() -> list[dict]:
171
+ """Return a list of classified thread-keeper processes."""
172
+ conn = get_db()
173
+ procs = _list_threadkeeper_pids()
174
+ return [classify(p, conn) for p in procs]
175
+
176
+
177
+ def cleanup(dry_run: bool = True, force: bool = False) -> dict:
178
+ """Kill orphaned processes. dry_run=True returns the plan without
179
+ killing. force=True sends SIGKILL instead of SIGTERM (which gives the
180
+ process a chance to flush)."""
181
+ import signal as _sig
182
+ procs = scan()
183
+ plan = [p for p in procs if p.get("is_orphaned")]
184
+ killed: list[int] = []
185
+ failed: list[dict] = []
186
+ if not dry_run:
187
+ sig = _sig.SIGKILL if force else _sig.SIGTERM
188
+ for p in plan:
189
+ try:
190
+ os.kill(p["pid"], sig)
191
+ killed.append(p["pid"])
192
+ except (ProcessLookupError, PermissionError) as e:
193
+ failed.append({"pid": p["pid"], "err": str(e)})
194
+ except OSError as e:
195
+ failed.append({"pid": p["pid"], "err": str(e)})
196
+ return {
197
+ "all_procs": procs,
198
+ "orphans": plan,
199
+ "killed": killed,
200
+ "failed": failed,
201
+ "dry_run": dry_run,
202
+ }
@@ -0,0 +1,207 @@
1
+ """Self-improvement review prompts.
2
+
3
+ Adapted from hermes-agent's MEMORY_REVIEW_PROMPT / SKILL_REVIEW_PROMPT
4
+ constants. The "do NOT capture" list is the part that prevents auto-curation
5
+ from harming itself by hardening transient failures into permanent rules.
6
+
7
+ Used by:
8
+ - review_thread(mode='auto') — spawned background child receives one of these
9
+ as its prompt and runs through the conversation reading recent notes.
10
+ - review_thread(mode='inline') — foreground agent gets the text back and
11
+ processes it in the current turn.
12
+ """
13
+
14
+ # Rubric-form opener for the review prompts. Hermes Agent v0.12 switched
15
+ # its review fork from free-form "should this update memory/skills?" to
16
+ # rubric-based grading — that change halved their false-negative rate on
17
+ # substantive incidents. We mirror the pattern: 5 yes/no questions, each
18
+ # with a concrete action attached. "Nothing to save." is allowed ONLY
19
+ # when all five answers are No.
20
+ RUBRIC_QUESTIONS = (
21
+ "RUBRIC — answer each question. ANY \"YES\" answer requires action; "
22
+ "only ALL-\"NO\" allows the \"Nothing to save.\" stop.\n\n"
23
+ " Q1. Did the user state a workflow rule as POLICY "
24
+ "(\"always do X\", \"next time Y\", \"prefer Path-1 over Path-2 "
25
+ "when Z\")? Frustration signals (\"stop doing X\", \"this is too "
26
+ "verbose\") and explicit \"remember this\" count as YES.\n"
27
+ " → YES: capture as stated-policy lesson; embed the preference "
28
+ "verbatim so next session starts already knowing.\n\n"
29
+ " Q2. Did a RECOVERY / CLEANUP procedure for flaky infra emerge "
30
+ "(network reset before tool start, proxy state hygiene, "
31
+ "zombie-process cleanup, port-reuse wait-loops)?\n"
32
+ " → YES: capture as recovery-pattern lesson. The env-specific "
33
+ "incident becomes ONE worked example inside a rule-shaped lesson, "
34
+ "NOT the whole content.\n\n"
35
+ " Q3. Did a DEBUGGING STRATEGY generalize beyond this specific "
36
+ "bug (pattern-recognition rules like \"check testID drift before "
37
+ "chasing logic\", \"3 compounding bugs detection via element-cache "
38
+ "+ Z-order + fixture mismatch\", \"verify state transition, not "
39
+ "destination label\")?\n"
40
+ " → YES: capture as debugging-pattern lesson.\n\n"
41
+ " Q4. Was an EXISTING skill or lesson corrected, missing a step, "
42
+ "or outdated relative to what just happened?\n"
43
+ " → YES: PATCH the existing one BEFORE considering a new "
44
+ "lesson. New lessons that overlap existing ones pollute the store.\n\n"
45
+ " Q5. Did a non-trivial TECHNIQUE / FIX / TOOL-USAGE PATTERN "
46
+ "emerge that someone else hitting the same class of problem would "
47
+ "want to know — not the specific bug, the SHAPE of the solution?\n"
48
+ " → YES: capture under the appropriate umbrella; prefer "
49
+ "references/<topic>.md under an existing skill if it fits."
50
+ )
51
+
52
+
53
+ # Counter-weight to ANTI_CAPTURE. The original anti-capture clause is
54
+ # strong enough that early calibration data showed shadow children
55
+ # SKIPping 100% of substantive incidents — every real-world fix has
56
+ # *some* env-specific surface, and the children kept classifying the
57
+ # whole episode as env-specific even when the underlying pattern was
58
+ # durable. POSITIVE_EXAMPLES draws the surface/pattern line explicitly.
59
+ POSITIVE_EXAMPLES = (
60
+ "CAPTURE these even when they emerged in a single incident — the "
61
+ "FIX/PATTERN is durable even if the failure surface was env-specific:\n"
62
+ " • Recovery patterns for flaky infra (network resets before WDA "
63
+ "start, proxy state hygiene, zombie-process cleanup, port-reuse "
64
+ "wait-loops). The HOW-TO is generalizable across every future "
65
+ "instance, not specific to today's test.\n"
66
+ " • Debugging-strategy patterns: \"3 compounding bugs detection "
67
+ "via element-cache + Z-order + fixture mismatch\", \"check testID "
68
+ "drift before chasing logic\", \"verify state transition, not just "
69
+ "destination label\". Pattern-recognition rules outlive the bug "
70
+ "that surfaced them.\n"
71
+ " • Workflow rules the user stated as policy (\"on each test "
72
+ "start, do X\", \"before claiming a fix, verify Y\", \"prefer "
73
+ "Path-1 over Path-2 when Z\"). Stated policies are first-class "
74
+ "skill content.\n"
75
+ " • iOS/Android testing recovery — WDA + macOS Wi-Fi proxy state, "
76
+ "XCUITest element-cache invalidation, share-cluster bug "
77
+ "triangulation, Detox/Maestro selector hierarchies. Class-level "
78
+ "even when discovered in one suite.\n\n"
79
+ "KEY DISTINCTION — \"episode env-specific\" vs \"rule env-specific\":\n"
80
+ " If the SYMPTOM looked env-specific (Plaid flake, fixture testID "
81
+ "drift, payout step ordering) but the underlying FIX generalizes "
82
+ "(always reset network before WDA start; always check for testID "
83
+ "drift before chasing logic bugs; always make optional/ad-hoc "
84
+ "fixture steps explicit) — CAPTURE the generalized rule, not the "
85
+ "incident. Use the incident as ONE illustrative example inside a "
86
+ "rule-shaped lesson.\n\n"
87
+ "ANTI_CAPTURE still applies — but only for genuinely transient env "
88
+ "errors (\"npm i fixed it\", \"reboot fixed it\") with no durable "
89
+ "rule. If you find yourself writing the verdict \"environment-"
90
+ "specific E2E debugging — no class-level rule\" but the conversation "
91
+ "ALSO contains stated policies, recovery procedures, or "
92
+ "pattern-recognition heuristics — those ARE class-level, capture "
93
+ "them as a rule lesson with the incident as the worked example."
94
+ )
95
+
96
+
97
+ # Shared do-NOT-capture clause. Quoted in both prompts so a foreground agent
98
+ # trying to "save everything" stops at this fence.
99
+ ANTI_CAPTURE = (
100
+ "Do NOT capture (these become persistent self-imposed constraints that "
101
+ "bite you later when the environment changes):\n"
102
+ " • Environment-dependent failures: missing binaries, fresh-install "
103
+ "errors, post-migration path mismatches, 'command not found', "
104
+ "unconfigured credentials, uninstalled packages. The user can fix "
105
+ "these — they are not durable rules.\n"
106
+ " • Negative claims about tools or features ('browser tools do not "
107
+ "work', 'X tool is broken', 'cannot use Y from execute_code'). These "
108
+ "harden into refusals the agent cites against itself for months "
109
+ "after the actual problem was fixed.\n"
110
+ " • Session-specific transient errors that resolved before the "
111
+ "conversation ended. If retrying worked, the lesson is the retry "
112
+ "pattern, not the original failure.\n"
113
+ " • One-off task narratives. A user asking 'summarize today's "
114
+ "market' or 'analyze this PR' is not a class of work that warrants "
115
+ "a skill.\n\n"
116
+ "If a tool failed because of setup state, capture the FIX (install "
117
+ "command, config step, env var to set) under an existing setup or "
118
+ "troubleshooting skill — never 'this tool does not work' as a "
119
+ "standalone constraint."
120
+ )
121
+
122
+
123
+ MEMORY_REVIEW_PROMPT = (
124
+ "Review the closed thread above (use search() or the notes_for_thread "
125
+ "context below) and consider saving to memory if appropriate.\n\n"
126
+ "Focus on:\n"
127
+ "1. Has the user revealed things about themselves — persona, "
128
+ "preferences, work style, personal details worth remembering?\n"
129
+ "2. Has the user expressed expectations about how you should "
130
+ "behave or operate in this kind of task?\n\n"
131
+ "If something stands out, write it via core_set() for high-priority "
132
+ "always-on lines OR verbatim_user() for a quoted fragment OR an "
133
+ "appropriate note() on the source thread. " + ANTI_CAPTURE + "\n\n"
134
+ "If nothing is worth saving, broadcast 'Nothing to save.' and stop."
135
+ )
136
+
137
+
138
+ SKILL_REVIEW_PROMPT = (
139
+ "Review the closed thread above and materialize any class-level "
140
+ "lessons.\n\n"
141
+ "PRIMARY output: a SKILL.md under ~/.claude/skills/<name>/ via "
142
+ "skill_manage(action='create'|'patch'|'write_file'|'delete'). The "
143
+ "Skill format is the universal format — Claude Code, Claude "
144
+ "Desktop, Codex, the Anthropic IDE plugins, and any MCP-aware tool "
145
+ "consume it. SKILL.md auto-triggers via the frontmatter "
146
+ "description field, so the right skill loads when relevant — vs. "
147
+ "an opt-in scan of lessons.md.\n\n"
148
+ "FALLBACK output (only when target CLI has no skills/ directory — "
149
+ "Gemini / Copilot / generic MCP clients without a skill loader): "
150
+ "lesson_append(title, body, summary, source=thread_id) writes into "
151
+ "~/.threadkeeper/lessons.md. Use this only if the primary path "
152
+ "isn't available; otherwise the SKILL.md is strictly better.\n\n"
153
+ + RUBRIC_QUESTIONS + "\n\n"
154
+ "PREFERENCE ORDER (pick the earliest action that fits):\n"
155
+ " 1. PATCH an existing skill. If the conversation referenced (or "
156
+ "the RECENTLY ACTIVE SKILLS block surfaces) an existing skill "
157
+ "covering the new learning, use skill_manage(action='patch', "
158
+ "name=..., old_string=..., new_string=...). New skills that "
159
+ "overlap existing ones pollute the store — patch beats create.\n"
160
+ " 2. ADD a `references/<topic>.md` under an existing umbrella for "
161
+ "session-specific detail. Use skill_manage(action='write_file', "
162
+ "name=..., sub_path='references/<topic>.md', content=...). Keeps "
163
+ "the parent SKILL.md compact; references load lazily.\n"
164
+ " 3. CREATE a new class-level umbrella via skill_manage(action="
165
+ "'create', ...). Name MUST be class-level — never an incident "
166
+ "codename, PR number, or 'fix-X-today' artifact. If the name only "
167
+ "makes sense for today's task, fall back to (1) or (2).\n"
168
+ " 4. DELETE if you discover the consulted skill was a false "
169
+ "positive (created in error, doesn't actually apply): "
170
+ "skill_manage(action='delete', name=...). Don't leave wrong "
171
+ "skills in the store hoping next session ignores them — they "
172
+ "auto-load via frontmatter and bias future runs.\n\n"
173
+ "Target shape: CLASS-LEVEL umbrella skills with rich SKILL.md and "
174
+ "optional references/ directory for session-specific detail — NOT "
175
+ "a long flat list of narrow one-incident skills.\n\n"
176
+ "When done, call mark_skill_materialized(thread_id, skill_path) so "
177
+ "the brief's skill_hint stops firing for this thread.\n\n"
178
+ + POSITIVE_EXAMPLES + "\n\n"
179
+ + ANTI_CAPTURE + "\n\n"
180
+ "STOP CONDITION: \"Nothing to save.\" is only legal when ALL of "
181
+ "Q1-Q5 above answer No. If even one answers Yes, you must act."
182
+ )
183
+
184
+
185
+ COMBINED_REVIEW_PROMPT = (
186
+ "Review the closed thread above and update two dimensions in one "
187
+ "pass:\n\n"
188
+ " **Memory** — who the user is. Did the user reveal persona, "
189
+ "preferences, work style, personal details, or expectations about "
190
+ "how you should operate? If yes, save via core_set / verbatim_user "
191
+ "/ note as appropriate.\n\n"
192
+ " **Skills** — how to handle this class of task. PRIMARY: "
193
+ "skill_manage(action='create'|'patch'|'write_file'|'delete') → "
194
+ "~/.claude/skills/<name>/SKILL.md. The Skill format auto-triggers "
195
+ "via frontmatter description and is consumed by every modern "
196
+ "agentic CLI (Claude Code/Desktop, Codex CLI/desktop, IDE plugins) "
197
+ "— strictly better than an opt-in lessons.md scan. FALLBACK: "
198
+ "lesson_append(...) → ~/.threadkeeper/lessons.md only for CLIs "
199
+ "without a skills/ directory (Gemini / Copilot / bare MCP).\n\n"
200
+ + RUBRIC_QUESTIONS + "\n\n"
201
+ "After any materialization, call mark_skill_materialized("
202
+ "thread_id, skill_path_or_lessons_md) to close the loop.\n\n"
203
+ + POSITIVE_EXAMPLES + "\n\n"
204
+ + ANTI_CAPTURE + "\n\n"
205
+ "STOP CONDITION: \"Nothing to save.\" is only legal when ALL of "
206
+ "Q1-Q5 AND both Memory questions above answer No."
207
+ )