threadkeeper 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. threadkeeper/__init__.py +8 -0
  2. threadkeeper/_mcp.py +6 -0
  3. threadkeeper/_setup.py +299 -0
  4. threadkeeper/adapters/__init__.py +40 -0
  5. threadkeeper/adapters/_hook_helpers.py +72 -0
  6. threadkeeper/adapters/base.py +152 -0
  7. threadkeeper/adapters/claude_code.py +178 -0
  8. threadkeeper/adapters/claude_desktop.py +128 -0
  9. threadkeeper/adapters/codex.py +259 -0
  10. threadkeeper/adapters/copilot.py +195 -0
  11. threadkeeper/adapters/gemini.py +169 -0
  12. threadkeeper/adapters/vscode.py +144 -0
  13. threadkeeper/brief.py +735 -0
  14. threadkeeper/config.py +216 -0
  15. threadkeeper/curator.py +390 -0
  16. threadkeeper/db.py +474 -0
  17. threadkeeper/embeddings.py +232 -0
  18. threadkeeper/extract_daemon.py +125 -0
  19. threadkeeper/helpers.py +101 -0
  20. threadkeeper/i18n.py +342 -0
  21. threadkeeper/identity.py +237 -0
  22. threadkeeper/ingest.py +507 -0
  23. threadkeeper/lessons.py +170 -0
  24. threadkeeper/nudges.py +257 -0
  25. threadkeeper/process_health.py +202 -0
  26. threadkeeper/review_prompts.py +207 -0
  27. threadkeeper/search_proxy.py +160 -0
  28. threadkeeper/server.py +55 -0
  29. threadkeeper/shadow_review.py +358 -0
  30. threadkeeper/skill_watcher.py +96 -0
  31. threadkeeper/spawn_budget.py +246 -0
  32. threadkeeper/tools/__init__.py +2 -0
  33. threadkeeper/tools/concepts.py +111 -0
  34. threadkeeper/tools/consolidate.py +222 -0
  35. threadkeeper/tools/core_memory.py +109 -0
  36. threadkeeper/tools/correlation.py +116 -0
  37. threadkeeper/tools/curator.py +121 -0
  38. threadkeeper/tools/dialectic.py +359 -0
  39. threadkeeper/tools/dialog.py +131 -0
  40. threadkeeper/tools/distill.py +184 -0
  41. threadkeeper/tools/extract.py +411 -0
  42. threadkeeper/tools/graph.py +183 -0
  43. threadkeeper/tools/invariants.py +177 -0
  44. threadkeeper/tools/lessons.py +110 -0
  45. threadkeeper/tools/missed_spawns.py +142 -0
  46. threadkeeper/tools/peers.py +579 -0
  47. threadkeeper/tools/pickup.py +148 -0
  48. threadkeeper/tools/probes.py +251 -0
  49. threadkeeper/tools/process_health.py +90 -0
  50. threadkeeper/tools/session.py +34 -0
  51. threadkeeper/tools/shadow_review.py +106 -0
  52. threadkeeper/tools/skills.py +856 -0
  53. threadkeeper/tools/spawn.py +871 -0
  54. threadkeeper/tools/style.py +44 -0
  55. threadkeeper/tools/threads.py +299 -0
  56. threadkeeper-0.4.0.dist-info/METADATA +351 -0
  57. threadkeeper-0.4.0.dist-info/RECORD +61 -0
  58. threadkeeper-0.4.0.dist-info/WHEEL +5 -0
  59. threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
  60. threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
  61. threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
threadkeeper/config.py ADDED
@@ -0,0 +1,216 @@
1
+ """Paths, env-driven defaults, semantic-search availability flag.
2
+ Imported wherever a constant or config is needed; cheap to import."""
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ DB_PATH: Path = Path(
10
+ os.environ.get("THREADKEEPER_DB", "~/.threadkeeper/db.sqlite")
11
+ ).expanduser()
12
+
13
+ EMBED_MODEL_NAME: str = os.environ.get(
14
+ "THREADKEEPER_EMBED_MODEL",
15
+ "paraphrase-multilingual-MiniLM-L12-v2", # 118 MB, RU+EN cross-lingual
16
+ )
17
+
18
+ DB_PATH.parent.mkdir(parents=True, exist_ok=True)
19
+
20
+ # One-shot migration from the historical name `memory_partner`. If the new
21
+ # DB doesn't exist yet but the legacy one does, copy it (including the WAL
22
+ # sidecars) so users can rename mid-life without losing memory. After this
23
+ # import the legacy directory is left in place — caller can `rm -rf` once
24
+ # they've verified the new path is working.
25
+ #
26
+ # Gate: only run when DB_PATH is the default `~/.threadkeeper/db.sqlite`.
27
+ # Tests + custom paths must NOT trigger the migration — otherwise every
28
+ # test would copy the user's ~683MB DB into its tmp dir and exhaust disk.
29
+ _DEFAULT_DB = Path("~/.threadkeeper/db.sqlite").expanduser()
30
+ _LEGACY_DIR = Path("~/.memory_partner").expanduser()
31
+ _LEGACY_DB = _LEGACY_DIR / "db.sqlite"
32
+ if (
33
+ DB_PATH == _DEFAULT_DB
34
+ and not DB_PATH.exists()
35
+ and _LEGACY_DB.exists()
36
+ ):
37
+ import shutil
38
+ for fname in ("db.sqlite", "db.sqlite-wal", "db.sqlite-shm"):
39
+ src = _LEGACY_DIR / fname
40
+ if src.exists():
41
+ shutil.copy2(src, DB_PATH.parent / fname)
42
+
43
+ # Semantic search opt-out. When this process is light (spawned slim child that
44
+ # should never load PyTorch/transformers), set THREADKEEPER_NO_EMBEDDINGS=1.
45
+ # This process will then delegate semantic queries to a peer via the signals
46
+ # channel (search_via_parent). Notes still get inserted with embedding=NULL;
47
+ # a parent process with embeddings backfills them asynchronously.
48
+ NO_EMBEDDINGS: bool = os.environ.get(
49
+ "THREADKEEPER_NO_EMBEDDINGS", ""
50
+ ).lower() in {"1", "true", "yes", "on"}
51
+
52
+ # Optional semantic search. If sentence-transformers is not installed OR the
53
+ # no-embeddings opt-out is set, fall back to FTS5 keyword matching + delegate.
54
+ # Brief still works either way.
55
+ if NO_EMBEDDINGS:
56
+ SEMANTIC_AVAILABLE: bool = False
57
+ else:
58
+ try:
59
+ from sentence_transformers import SentenceTransformer # type: ignore # noqa: F401
60
+ import numpy as np # type: ignore # noqa: F401
61
+ SEMANTIC_AVAILABLE = True
62
+ except Exception:
63
+ SEMANTIC_AVAILABLE = False
64
+
65
+ # Client label used for `presence`/`sessions` rows.
66
+ CLIENT_LABEL: str = os.environ.get("THREADKEEPER_CLIENT", "claude")
67
+
68
+ # Write-origin for this server process. 'foreground' = a regular user-facing
69
+ # conversation; 'background_review' = a headless review fork spawned to
70
+ # auto-curate memory/skills after a complex task. Curator only ever touches
71
+ # skills created under 'background_review' so user-authored skills are safe.
72
+ WRITE_ORIGIN: str = os.environ.get(
73
+ "THREADKEEPER_WRITE_ORIGIN", "foreground"
74
+ )
75
+
76
+ # Where Claude's user-local skills live. Used by skill_manage / curator.
77
+ CLAUDE_SKILLS_DIR: Path = Path(
78
+ os.environ.get("CLAUDE_SKILLS_DIR", "~/.claude/skills")
79
+ ).expanduser()
80
+
81
+ # Where the live ingester reads claude code transcripts from.
82
+ CLAUDE_PROJECTS_DIR: Path = Path(
83
+ os.environ.get("CLAUDE_PROJECTS_DIR", "~/.claude/projects")
84
+ ).expanduser()
85
+
86
+ # Per-session ingest cap so brief() at session start doesn't block.
87
+ INGEST_CAP_PER_CALL: int = int(os.environ.get("THREADKEEPER_INGEST_CAP", "50"))
88
+
89
+ # Background live-ingester tick (seconds). 0 disables.
90
+ INGEST_INTERVAL_S: float = float(
91
+ os.environ.get("THREADKEEPER_INGEST_INTERVAL_S", "3")
92
+ )
93
+ INGEST_RECENT_WINDOW_S: int = int(
94
+ os.environ.get("THREADKEEPER_INGEST_WINDOW_S", "600")
95
+ )
96
+
97
+ # Self-cid heuristic cache TTL (only matters when ppid walk fails).
98
+ SELF_CID_TTL_S: float = float(
99
+ os.environ.get("THREADKEEPER_SELF_CID_TTL_S", "5")
100
+ )
101
+
102
+ # Per-task log directory for spawned children.
103
+ TASK_LOG_DIR: Path = Path(
104
+ os.environ.get("THREADKEEPER_TASK_LOG_DIR", "/tmp/thread-keeper-tasks")
105
+ ).expanduser()
106
+ DIALOG_LOG: Path = TASK_LOG_DIR / "dialog.log"
107
+
108
+ # Counter-driven nudge thresholds. Memory nudge fires when N mutating events
109
+ # have passed since the last memory_save event in this session; skill nudge
110
+ # fires after N events since the last skill_materialized event. 0 disables.
111
+ MEMORY_NUDGE_INTERVAL: int = int(
112
+ os.environ.get("THREADKEEPER_MEMORY_NUDGE_INTERVAL", "10")
113
+ )
114
+ SKILL_NUDGE_INTERVAL: int = int(
115
+ os.environ.get("THREADKEEPER_SKILL_NUDGE_INTERVAL", "10")
116
+ )
117
+ # When true, review_thread(thread_id) automatically spawns a background fork
118
+ # for rich closed threads at the moment of close_thread(). Default off so
119
+ # behavior is predictable; users opt in via env.
120
+ AUTO_REVIEW_ENABLED: bool = os.environ.get(
121
+ "THREADKEEPER_AUTO_REVIEW", ""
122
+ ).lower() in {"1", "true", "yes", "on"}
123
+
124
+ # Budget cap on combined RSS of all running spawned children (not the
125
+ # parent itself). spawn() refuses a new child whose estimated RSS would
126
+ # push total over this. Default 3 GB. Set 0 to disable budget enforcement.
127
+ SPAWN_BUDGET_MB: int = int(
128
+ os.environ.get("THREADKEEPER_SPAWN_BUDGET_MB", "3072")
129
+ )
130
+ # Initial RSS estimate for a freshly-spawned child before its real RSS is
131
+ # measured by the budget daemon. Updated to actual value within ~10s.
132
+ SPAWN_ESTIMATE_SLIM_MB: int = int(
133
+ os.environ.get("THREADKEEPER_SPAWN_ESTIMATE_SLIM_MB", "500")
134
+ )
135
+ SPAWN_ESTIMATE_FULL_MB: int = int(
136
+ os.environ.get("THREADKEEPER_SPAWN_ESTIMATE_FULL_MB", "1500")
137
+ )
138
+ # Budget daemon poll interval (seconds). 0 disables the daemon (estimates
139
+ # stay frozen; not recommended outside tests).
140
+ SPAWN_BUDGET_POLL_S: float = float(
141
+ os.environ.get("THREADKEEPER_SPAWN_BUDGET_POLL_S", "10")
142
+ )
143
+
144
+ # Shadow-review daemon. Periodically scans recently-ingested
145
+ # dialog_messages from ALL active sessions, looks for class-level
146
+ # learning signals, and spawns an LLM evaluator child to decide whether
147
+ # to materialize a skill. 0 disables (default — opt in via env).
148
+ SHADOW_REVIEW_INTERVAL_S: float = float(
149
+ os.environ.get("THREADKEEPER_SHADOW_REVIEW_INTERVAL_S", "0")
150
+ )
151
+ # Sliding window of dialog history each shadow pass considers, in
152
+ # seconds. Combined with the dedup cursor: actual scan range is
153
+ # max(cursor_ts, now-window_s) → now.
154
+ SHADOW_REVIEW_WINDOW_S: int = int(
155
+ os.environ.get("THREADKEEPER_SHADOW_REVIEW_WINDOW_S", "900")
156
+ )
157
+ # Minimum significant chars (user+assistant dialog combined) before a
158
+ # pass is worth spawning the evaluator. Cheap floor against periodic
159
+ # misfires on idle windows.
160
+ SHADOW_REVIEW_MIN_CHARS: int = int(
161
+ os.environ.get("THREADKEEPER_SHADOW_REVIEW_MIN_CHARS", "500")
162
+ )
163
+
164
+ # Curator daemon. Periodic LLM-driven audit of the existing
165
+ # lessons.md + ~/.claude/skills/ library — grades, suggests
166
+ # consolidation/patches/prunes, writes a per-run REPORT.md. Where
167
+ # shadow_review LOOKS FOR NEW class-level learning every few minutes,
168
+ # the Curator REVIEWS THE STORE every few days. Inspired by Hermes
169
+ # Agent v0.12's `hermes curator` cron agent. 0 disables (default —
170
+ # opt in via env). Recommended: 604800 (7 days).
171
+ CURATOR_INTERVAL_S: float = float(
172
+ os.environ.get("THREADKEEPER_CURATOR_INTERVAL_S", "0")
173
+ )
174
+ # Don't bother curating a tiny library; below this lessons-count there's
175
+ # nothing meaningful to consolidate.
176
+ CURATOR_MIN_LESSONS: int = int(
177
+ os.environ.get("THREADKEEPER_CURATOR_MIN_LESSONS", "3")
178
+ )
179
+ # Where the Curator writes its REPORT-<isodate>.md per run. One file
180
+ # per pass; latest is the canonical one to read. Anchored to DB_PATH's
181
+ # parent so a custom THREADKEEPER_DB co-locates its curator reports.
182
+ CURATOR_REPORTS_DIR: Path = Path(
183
+ os.environ.get(
184
+ "THREADKEEPER_CURATOR_REPORTS_DIR",
185
+ str(DB_PATH.parent / "curator"),
186
+ )
187
+ ).expanduser()
188
+ # When TRUE, curator-child gets write-mode tools (skill_manage delete/
189
+ # patch + lesson_append) and is instructed to apply its own PRUNE /
190
+ # PATCH / CONSOLIDATE recommendations directly, not just report them.
191
+ # Default OFF — Phase 1 is advisory-only, user reviews REPORT.md and
192
+ # applies manually. Flip to "1" once you trust the curator's verdicts.
193
+ CURATOR_DESTRUCTIVE: bool = bool(
194
+ os.environ.get("THREADKEEPER_CURATOR_DESTRUCTIVE", "")
195
+ )
196
+
197
+ # Extract daemon. Periodically scans dialog_messages for heuristic
198
+ # candidates (note / concept / distill / verbatim) via extract_recent()
199
+ # and enqueues them under extract_candidates.status='pending'. Where
200
+ # shadow_review extracts CLASS-LEVEL durable rules, extract harvests
201
+ # PER-INCIDENT decision-shaped utterances ("let's use X", "next time
202
+ # we should Y", insight markers, bullet-listed regularities). Agent's
203
+ # subsequent review_candidates() / accept_candidate() materializes the
204
+ # survivors into notes/concepts/distills.
205
+ # 0 disables (default — opt in via env). Recommended: 600 (every 10
206
+ # min) — extract is cheap, just regex + cosine clustering on the
207
+ # already-ingested dialog window.
208
+ EXTRACT_INTERVAL_S: float = float(
209
+ os.environ.get("THREADKEEPER_EXTRACT_INTERVAL_S", "0")
210
+ )
211
+ # Sliding window of dialog history each extract pass considers, in
212
+ # minutes. Defaults align with the typical agent-task duration so a
213
+ # whole task's worth of decisions gets harvested at once.
214
+ EXTRACT_WINDOW_MIN: int = int(
215
+ os.environ.get("THREADKEEPER_EXTRACT_WINDOW_MIN", "30")
216
+ )
@@ -0,0 +1,390 @@
1
+ """Autonomous Curator — periodic library audit & consolidation.
2
+
3
+ Inspired by Hermes Agent v0.12's `hermes curator` cron agent. Where
4
+ shadow_review LOOKS FOR NEW class-level learning every few minutes,
5
+ the Curator REVIEWS THE STORE every few days:
6
+
7
+ 1. Daemon thread wakes every CURATOR_INTERVAL_S seconds (0 = off).
8
+ 2. Collects inventory: every lesson slug + every recently-touched
9
+ skill + usage telemetry.
10
+ 3. Spawns slim child with CURATOR_PROMPT + inventory dump.
11
+ 4. Child grades each entry, suggests KEEP / PATCH / CONSOLIDATE /
12
+ PRUNE, and writes REPORT-<isodate>.md under CURATOR_REPORTS_DIR.
13
+ 5. Parent records `curator_pass` event with high-water timestamp.
14
+
15
+ Design choices borrowed from Hermes:
16
+
17
+ • **Class-first / rubric-based output** — child uses an explicit
18
+ decision matrix (see CURATOR_PROMPT) rather than free-form grading.
19
+ • **Defense-in-depth** — pinned lessons/skills and foreground-origin
20
+ entries are listed in the inventory as PROTECTED so the child knows
21
+ not to touch them.
22
+ • **Scoped toolset** — child gets only lesson_*/skill_*/Read/Write.
23
+ No shell, no web, no spawn. Curator can't sprawl into anything else.
24
+ • **Per-run REPORT.md** — every pass leaves an auditable trail.
25
+ • **Read-only-by-default destructive ops** — Phase 1: child writes a
26
+ REPORT.md with recommendations only. User reviews it and decides
27
+ whether to apply patches/consolidations manually. Future versions
28
+ can flip an env knob to let the curator merge/delete in place.
29
+
30
+ Why this exists: shadow_review accumulates lessons over weeks. Without
31
+ periodic curation, the library grows unbounded with overlapping,
32
+ duplicate, or stale content — the same failure mode that pushed
33
+ Hermes to add their Curator in v0.12.
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ import logging
39
+ import sqlite3
40
+ import threading
41
+ import time
42
+
43
+ from .config import (
44
+ CURATOR_INTERVAL_S,
45
+ CURATOR_MIN_LESSONS,
46
+ CURATOR_REPORTS_DIR,
47
+ CURATOR_DESTRUCTIVE,
48
+ )
49
+ from .db import get_db
50
+ from . import identity, lessons
51
+
52
+ logger = logging.getLogger(__name__)
53
+
54
+ _started = False
55
+
56
+
57
+ CURATOR_PROMPT = """\
58
+ You are an autonomous CURATOR for thread-keeper's lessons + skills
59
+ library. You read the inventory below — every lesson slug, every
60
+ recently-touched skill, usage telemetry — and decide what to keep,
61
+ patch, consolidate, or prune.
62
+
63
+ Where the shadow_review observer LOOKS FOR new class-level learning,
64
+ your role is the inverse: review the EXISTING store for quality, dedup,
65
+ and freshness.
66
+
67
+ OUTPUT: write a REPORT.md to ~/.threadkeeper/curator/REPORT-<isodate>.md
68
+ via Write tool. The REPORT.md is your sole user-visible output; the
69
+ human reads it later and decides which recommendations to apply.
70
+
71
+ RUBRIC (per-entry decision matrix — answer for every lesson and every
72
+ recently-active skill):
73
+
74
+ KEEP — entry is class-level, in use, accurate. Note "KEEP: <slug>".
75
+
76
+ PATCH — entry is mostly right but missing a step, has outdated
77
+ example, or contradicts something more recent. Quote the exact
78
+ string to change and the replacement. Format:
79
+ PATCH: <slug>
80
+ old: "<exact substring>"
81
+ new: "<replacement>"
82
+ reason: <one line>
83
+
84
+ CONSOLIDATE — two or more entries cover overlapping territory and
85
+ would be stronger as one umbrella. Format:
86
+ CONSOLIDATE: <merged-slug>
87
+ merges: <slug-a>, <slug-b>, ...
88
+ keep_in_umbrella: <bullet list of what carries over>
89
+ reason: <one line on why they overlap>
90
+
91
+ PRUNE — entry is one-off incident narrative, env-specific transient,
92
+ superseded by a newer entry, or a **FALSE POSITIVE** (auto-created
93
+ by the background-review loop but never validated by actual use).
94
+ Specifically flag as PRUNE:
95
+ • origin=background_review AND use_count=0 AND patches=0 AND
96
+ created >14 days ago → strong false-positive signal: nobody ever
97
+ consulted it, and the agent that created it never came back to
98
+ refine it.
99
+ • SKILL_OUTCOME signals (in the events table) marking the skill
100
+ as 'wrong' more often than 'helped' → user-judgment override.
101
+ Format:
102
+ PRUNE: <slug>
103
+ reason: <one line; note "false_positive" if from the criteria above>
104
+
105
+ INVENTORY ORDERING — entries marked [PROTECTED] are pinned or
106
+ foreground-authored. NEVER suggest PATCH/CONSOLIDATE/PRUNE on those —
107
+ only KEEP. Always-OK to RECOMMEND that the user manually review them,
108
+ but the curator must not propose destructive changes.
109
+
110
+ PRIORITY ORDER inside the REPORT.md:
111
+ 1. CONSOLIDATE recommendations first (highest leverage — merging two
112
+ overlapping entries clarifies the whole library).
113
+ 2. PATCH recommendations next (low-risk, in-place improvements).
114
+ 3. PRUNE recommendations last (highest-risk; require explicit human
115
+ confirmation).
116
+ 4. KEEP entries summarised at the end as a short list of slugs.
117
+
118
+ OPEN with a one-paragraph LIBRARY HEALTH summary: total entries,
119
+ average use_count, most/least-used skill, oldest untouched entry.
120
+
121
+ CLOSE with the literal line `CURATOR_PASS_COMPLETE` so the parent
122
+ process knows the run finished cleanly.
123
+
124
+ CONSTRAINTS:
125
+ - Do NOT cite internal IDs (T-codes, cids, task IDs) in the REPORT.md.
126
+ Plain prose for the human reader.
127
+ - If the inventory is genuinely fine (no patches/consolidations/prunes
128
+ warranted), still write a REPORT.md that says so — the trail matters
129
+ even when nothing changes.
130
+ - {DESTRUCTIVE_CLAUSE}
131
+
132
+ INVENTORY
133
+ =========
134
+ """
135
+
136
+
137
+ # ──────────────────────────────────────────────────────────────────────
138
+ # Pure functions: cursor, inventory collection
139
+ # ──────────────────────────────────────────────────────────────────────
140
+
141
+ def _last_curator_ts(conn: sqlite3.Connection) -> int:
142
+ """High-water timestamp of the most recent curator pass. Stored in
143
+ `target` of the latest `events.kind='curator_pass'` row so `summary`
144
+ is free for human-readable outcome. Returns 0 when no prior pass."""
145
+ try:
146
+ row = conn.execute(
147
+ "SELECT target FROM events WHERE kind='curator_pass' "
148
+ "ORDER BY id DESC LIMIT 1"
149
+ ).fetchone()
150
+ except sqlite3.OperationalError:
151
+ return 0
152
+ if not row or not row["target"]:
153
+ return 0
154
+ try:
155
+ return int(row["target"])
156
+ except (ValueError, TypeError):
157
+ return 0
158
+
159
+
160
+ def _record_curator_pass(conn: sqlite3.Connection,
161
+ ts: int,
162
+ outcome: str) -> None:
163
+ try:
164
+ conn.execute(
165
+ "INSERT INTO events (session_id, kind, target, summary, "
166
+ "created_at) VALUES (?, 'curator_pass', ?, ?, ?)",
167
+ (identity._session_id or "", str(ts), outcome[:300],
168
+ int(time.time())),
169
+ )
170
+ conn.commit()
171
+ except sqlite3.OperationalError:
172
+ logger.debug("curator: failed to record pass", exc_info=True)
173
+
174
+
175
+ def _format_lesson(item: dict) -> str:
176
+ """One inventory line per lesson.
177
+
178
+ Lessons aren't pinned in the same way skills are — but lessons
179
+ flagged with `source=foreground` (i.e. user-typed via lesson_append
180
+ in a live session, not auto-spawned) get the PROTECTED marker so
181
+ the curator never proposes destructive changes against them."""
182
+ src = (item.get("source") or "").strip()
183
+ protected = " [PROTECTED]" if src in ("foreground", "user") else ""
184
+ ts = item.get("ts") or 0
185
+ age_d = (int(time.time()) - ts) // 86400 if ts else "?"
186
+ body_preview = (item.get("body") or "")[:200].replace("\n", " ")
187
+ if len(item.get("body") or "") > 200:
188
+ body_preview += "…"
189
+ return (
190
+ f"- LESSON {item['slug']}{protected} "
191
+ f"(source={src or '?'}, age={age_d}d)\n"
192
+ f" body: {body_preview}"
193
+ )
194
+
195
+
196
+ def _format_skill(row: dict) -> str:
197
+ """One inventory line per recently-touched skill row from
198
+ skill_usage. Foreground-origin and pinned skills are PROTECTED."""
199
+ origin = row.get("created_by_origin") or "?"
200
+ protected = ""
201
+ if row.get("pinned") or origin == "foreground":
202
+ protected = " [PROTECTED]"
203
+ now = int(time.time())
204
+ last_active = max(
205
+ row.get("last_used_at") or 0,
206
+ row.get("last_viewed_at") or 0,
207
+ row.get("last_patched_at") or 0,
208
+ row.get("created_at") or 0,
209
+ )
210
+ age_d = (now - last_active) // 86400 if last_active else "?"
211
+ return (
212
+ f"- SKILL {row['name']}{protected} "
213
+ f"(origin={origin}, uses={row.get('use_count', 0)}, "
214
+ f"views={row.get('view_count', 0)}, "
215
+ f"patches={row.get('patch_count', 0)}, "
216
+ f"last_active={age_d}d_ago, state={row.get('state', '?')})"
217
+ )
218
+
219
+
220
+ def _collect_inventory(conn: sqlite3.Connection) -> tuple[str, int, int]:
221
+ """Build the inventory dump the curator child will read.
222
+
223
+ Returns (dump_text, lesson_count, skill_count). The dump format is
224
+ plain text — `_format_lesson` and `_format_skill` produce one line
225
+ per entry, grouped into LESSONS and SKILLS sections.
226
+ """
227
+ # ---- Lessons ----
228
+ lesson_lines: list[str] = []
229
+ n_lessons = 0
230
+ try:
231
+ for item in lessons.iter_lessons():
232
+ lesson_lines.append(_format_lesson(item))
233
+ n_lessons += 1
234
+ except Exception:
235
+ logger.debug("curator: iter_lessons failed", exc_info=True)
236
+
237
+ # ---- Skills ----
238
+ skill_lines: list[str] = []
239
+ n_skills = 0
240
+ try:
241
+ rows = conn.execute(
242
+ "SELECT name, created_at, created_by_origin, last_used_at, "
243
+ "last_viewed_at, last_patched_at, use_count, view_count, "
244
+ "patch_count, pinned, state "
245
+ "FROM skill_usage "
246
+ "WHERE state IN ('active', 'stale') "
247
+ "ORDER BY COALESCE(last_used_at, last_viewed_at, "
248
+ " last_patched_at, created_at) DESC"
249
+ ).fetchall()
250
+ for r in rows:
251
+ skill_lines.append(_format_skill(dict(r)))
252
+ n_skills += 1
253
+ except sqlite3.OperationalError:
254
+ logger.debug("curator: skill_usage scan failed", exc_info=True)
255
+
256
+ parts: list[str] = []
257
+ parts.append(f"## LESSONS (n={n_lessons})\n")
258
+ parts.extend(lesson_lines if lesson_lines else ["(none)"])
259
+ parts.append(f"\n## SKILLS (n={n_skills})\n")
260
+ parts.extend(skill_lines if skill_lines else ["(none)"])
261
+
262
+ return ("\n".join(parts), n_lessons, n_skills)
263
+
264
+
265
+ # ──────────────────────────────────────────────────────────────────────
266
+ # Synchronous pass + daemon loop
267
+ # ──────────────────────────────────────────────────────────────────────
268
+
269
+ def run_curator_pass(force: bool = False) -> str:
270
+ """Execute one curator pass synchronously. Used by the daemon AND
271
+ by the MCP tool for manual triggering / testing.
272
+
273
+ Returns a short status string for observability:
274
+ - 'disabled' — env knob off and not forced
275
+ - 'below_threshold' — fewer than CURATOR_MIN_LESSONS lessons; skip
276
+ - 'spawned task_id=…' — curator child launched
277
+ - 'spawn_error: …' — spawn() rejected
278
+ """
279
+ if CURATOR_INTERVAL_S <= 0 and not force:
280
+ return "disabled"
281
+ conn = get_db()
282
+ inventory, n_lessons, n_skills = _collect_inventory(conn)
283
+ now = int(time.time())
284
+ if n_lessons < CURATOR_MIN_LESSONS:
285
+ _record_curator_pass(
286
+ conn, now,
287
+ f"below_threshold lessons={n_lessons} skills={n_skills}",
288
+ )
289
+ return f"below_threshold lessons={n_lessons}"
290
+
291
+ # Ensure reports dir exists before the child tries to Write into it.
292
+ CURATOR_REPORTS_DIR.mkdir(parents=True, exist_ok=True)
293
+
294
+ # Phase-1 default: advisory-only. CURATOR_DESTRUCTIVE=1 promotes
295
+ # the child to "apply your own recommendations directly" mode and
296
+ # widens the allowed-tools list to include skill_manage + lesson_append.
297
+ if CURATOR_DESTRUCTIVE:
298
+ destructive_clause = (
299
+ "DESTRUCTIVE MODE ENABLED. After writing the REPORT.md you "
300
+ "MAY apply your own PATCH / PRUNE / CONSOLIDATE recommendations "
301
+ "directly via skill_manage(action='patch'|'delete'|'write_file') "
302
+ "and lesson_append(...). Always cross-check against the "
303
+ "[PROTECTED] marker — never touch protected entries even in "
304
+ "destructive mode. Apply changes ONLY after the REPORT.md is "
305
+ "written (audit trail first, mutation second)."
306
+ )
307
+ allowed_tools = (
308
+ "mcp__thread-keeper__lesson_list,"
309
+ "mcp__thread-keeper__lesson_get,"
310
+ "mcp__thread-keeper__lesson_append,"
311
+ "mcp__thread-keeper__skill_list,"
312
+ "mcp__thread-keeper__skill_manage,"
313
+ "Read,Write"
314
+ )
315
+ else:
316
+ destructive_clause = (
317
+ "ADVISORY MODE. Do NOT call lesson_append, skill_manage with "
318
+ "action in {create,patch,delete,write_file}, or any other "
319
+ "destructive tool. Your output is the REPORT.md ONLY — the "
320
+ "human reviews and applies changes manually. Flip "
321
+ "THREADKEEPER_CURATOR_DESTRUCTIVE=1 in env when ready to let "
322
+ "the curator apply its own recommendations."
323
+ )
324
+ allowed_tools = (
325
+ "mcp__thread-keeper__lesson_list,"
326
+ "mcp__thread-keeper__lesson_get,"
327
+ "mcp__thread-keeper__skill_list,"
328
+ "Read,Write"
329
+ )
330
+
331
+ full_prompt = (
332
+ CURATOR_PROMPT.replace("{DESTRUCTIVE_CLAUSE}", destructive_clause)
333
+ + inventory
334
+ + "\n\n"
335
+ + f"REPORT_PATH = {CURATOR_REPORTS_DIR}/REPORT-"
336
+ f"{time.strftime('%Y%m%dT%H%M%S')}.md\n"
337
+ + "Write the REPORT.md to that exact path."
338
+ )
339
+
340
+ from .tools.spawn import spawn # type: ignore
341
+ try:
342
+ result = spawn(
343
+ prompt=full_prompt,
344
+ visible=False,
345
+ capture_output=True,
346
+ permission_mode="auto",
347
+ role="curator",
348
+ write_origin="curator",
349
+ slim=True,
350
+ extra_allowed_tools=allowed_tools,
351
+ )
352
+ except Exception as e:
353
+ _record_curator_pass(conn, now, f"spawn_error: {e}")
354
+ return f"spawn_error: {e}"
355
+
356
+ _record_curator_pass(
357
+ conn, now,
358
+ f"spawned lessons={n_lessons} skills={n_skills} :: {str(result)[:140]}",
359
+ )
360
+ return str(result)
361
+
362
+
363
+ def _serve_loop() -> None:
364
+ """Daemon body. Sleep → tick → sleep, until process dies."""
365
+ while True:
366
+ try:
367
+ run_curator_pass()
368
+ except Exception:
369
+ logger.debug("curator tick failed", exc_info=True)
370
+ time.sleep(CURATOR_INTERVAL_S)
371
+
372
+
373
+ def start_curator_daemon() -> None:
374
+ """Idempotent daemon starter. Honors env: no-op when
375
+ CURATOR_INTERVAL_S<=0. Identical cascade-prevention as
376
+ start_shadow_daemon: slim children (SEMANTIC_AVAILABLE=False)
377
+ refuse to start the daemon so spawn() doesn't recurse."""
378
+ global _started
379
+ if _started:
380
+ return
381
+ if CURATOR_INTERVAL_S <= 0:
382
+ return
383
+ from .config import SEMANTIC_AVAILABLE
384
+ if not SEMANTIC_AVAILABLE:
385
+ return # slim child: don't fire curator from here
386
+ t = threading.Thread(
387
+ target=_serve_loop, name="curator", daemon=True,
388
+ )
389
+ t.start()
390
+ _started = True