threadkeeper 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- threadkeeper/__init__.py +8 -0
- threadkeeper/_mcp.py +6 -0
- threadkeeper/_setup.py +299 -0
- threadkeeper/adapters/__init__.py +40 -0
- threadkeeper/adapters/_hook_helpers.py +72 -0
- threadkeeper/adapters/base.py +152 -0
- threadkeeper/adapters/claude_code.py +178 -0
- threadkeeper/adapters/claude_desktop.py +128 -0
- threadkeeper/adapters/codex.py +259 -0
- threadkeeper/adapters/copilot.py +195 -0
- threadkeeper/adapters/gemini.py +169 -0
- threadkeeper/adapters/vscode.py +144 -0
- threadkeeper/brief.py +735 -0
- threadkeeper/config.py +216 -0
- threadkeeper/curator.py +390 -0
- threadkeeper/db.py +474 -0
- threadkeeper/embeddings.py +232 -0
- threadkeeper/extract_daemon.py +125 -0
- threadkeeper/helpers.py +101 -0
- threadkeeper/i18n.py +342 -0
- threadkeeper/identity.py +237 -0
- threadkeeper/ingest.py +507 -0
- threadkeeper/lessons.py +170 -0
- threadkeeper/nudges.py +257 -0
- threadkeeper/process_health.py +202 -0
- threadkeeper/review_prompts.py +207 -0
- threadkeeper/search_proxy.py +160 -0
- threadkeeper/server.py +55 -0
- threadkeeper/shadow_review.py +358 -0
- threadkeeper/skill_watcher.py +96 -0
- threadkeeper/spawn_budget.py +246 -0
- threadkeeper/tools/__init__.py +2 -0
- threadkeeper/tools/concepts.py +111 -0
- threadkeeper/tools/consolidate.py +222 -0
- threadkeeper/tools/core_memory.py +109 -0
- threadkeeper/tools/correlation.py +116 -0
- threadkeeper/tools/curator.py +121 -0
- threadkeeper/tools/dialectic.py +359 -0
- threadkeeper/tools/dialog.py +131 -0
- threadkeeper/tools/distill.py +184 -0
- threadkeeper/tools/extract.py +411 -0
- threadkeeper/tools/graph.py +183 -0
- threadkeeper/tools/invariants.py +177 -0
- threadkeeper/tools/lessons.py +110 -0
- threadkeeper/tools/missed_spawns.py +142 -0
- threadkeeper/tools/peers.py +579 -0
- threadkeeper/tools/pickup.py +148 -0
- threadkeeper/tools/probes.py +251 -0
- threadkeeper/tools/process_health.py +90 -0
- threadkeeper/tools/session.py +34 -0
- threadkeeper/tools/shadow_review.py +106 -0
- threadkeeper/tools/skills.py +856 -0
- threadkeeper/tools/spawn.py +871 -0
- threadkeeper/tools/style.py +44 -0
- threadkeeper/tools/threads.py +299 -0
- threadkeeper-0.4.0.dist-info/METADATA +351 -0
- threadkeeper-0.4.0.dist-info/RECORD +61 -0
- threadkeeper-0.4.0.dist-info/WHEEL +5 -0
- threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
- threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
- threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Search-proxy daemon: parent processes (SEMANTIC_AVAILABLE=True) serve
|
|
2
|
+
semantic-search requests from spawned slim children (SEMANTIC_AVAILABLE=False).
|
|
3
|
+
|
|
4
|
+
Mechanism:
|
|
5
|
+
- A child without embeddings posts a `signals` row with kind='search_request'
|
|
6
|
+
addressed to the parent's cid. Payload is JSON: {query, k, mode, scope}.
|
|
7
|
+
- This daemon, running ONLY in processes where SEMANTIC_AVAILABLE=True,
|
|
8
|
+
polls signals every 500ms for unread 'search_request' rows addressed to
|
|
9
|
+
me (or broadcast). For each, runs the requested search and writes back
|
|
10
|
+
a 'search_response' signal to the requester. Marks the request read.
|
|
11
|
+
- The child's `search_via_parent` MCP tool wraps post + wait.
|
|
12
|
+
|
|
13
|
+
Why this exists: loading sentence-transformers in every spawned child costs
|
|
14
|
+
~300-500MB. Most spawned children only need to *write* a few notes/skills;
|
|
15
|
+
they rarely need to *search* semantically. When they do, delegating to
|
|
16
|
+
the existing parent is far cheaper than each child loading its own model.
|
|
17
|
+
|
|
18
|
+
Daemon is started lazily on first _ensure_session() call. No-op when
|
|
19
|
+
SEMANTIC_AVAILABLE=False — children's daemons stay silent, so each request
|
|
20
|
+
is answered by exactly one parent (or zero if none exists, in which case
|
|
21
|
+
the child's tool times out and falls back to FTS).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
import logging
|
|
28
|
+
import threading
|
|
29
|
+
import time
|
|
30
|
+
from typing import Optional
|
|
31
|
+
|
|
32
|
+
from .config import SEMANTIC_AVAILABLE
|
|
33
|
+
from .db import get_db
|
|
34
|
+
from . import identity
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
_started = False
|
|
39
|
+
_POLL_INTERVAL_S = float(
|
|
40
|
+
__import__("os").environ.get("THREADKEEPER_SEARCH_PROXY_POLL_S", "0.5")
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Maximum number of requests per poll tick — guard against runaway loops.
|
|
44
|
+
_MAX_BATCH = 10
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _serve_request(conn, sig_row) -> None:
|
|
48
|
+
"""Run the requested search and post a 'search_response' signal back."""
|
|
49
|
+
from .embeddings import _cosine_search, _dialog_cosine_search, _fts_search
|
|
50
|
+
from .config import SEMANTIC_AVAILABLE as _sa
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
payload = json.loads(sig_row["content"])
|
|
54
|
+
except (json.JSONDecodeError, TypeError):
|
|
55
|
+
payload = {}
|
|
56
|
+
if not isinstance(payload, dict):
|
|
57
|
+
payload = {}
|
|
58
|
+
|
|
59
|
+
query = str(payload.get("query", "")).strip()
|
|
60
|
+
if not query:
|
|
61
|
+
_write_response(conn, sig_row, {"error": "empty_query", "results": []})
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
k = int(payload.get("k", 5) or 5)
|
|
65
|
+
if k <= 0 or k > 100:
|
|
66
|
+
k = 5
|
|
67
|
+
scope = str(payload.get("scope", "notes")).lower() # 'notes' | 'dialog'
|
|
68
|
+
mode = str(payload.get("mode", "hybrid")).lower() # for dialog only
|
|
69
|
+
|
|
70
|
+
hits: list[dict] = []
|
|
71
|
+
try:
|
|
72
|
+
if scope == "dialog":
|
|
73
|
+
sem = _dialog_cosine_search(conn, query, k * 3) if _sa else []
|
|
74
|
+
fts = _fts_search(conn, query, k * 3)
|
|
75
|
+
if mode == "semantic":
|
|
76
|
+
hits = sem[:k]
|
|
77
|
+
elif mode == "fts":
|
|
78
|
+
hits = fts[:k]
|
|
79
|
+
else:
|
|
80
|
+
from .embeddings import _rrf_combine
|
|
81
|
+
hits = _rrf_combine([sem, fts], top_n=k)
|
|
82
|
+
else:
|
|
83
|
+
hits = _cosine_search(conn, query, k) if _sa else []
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.debug("search_proxy serve failed: %s", e, exc_info=True)
|
|
86
|
+
_write_response(conn, sig_row, {"error": str(e), "results": []})
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
# Trim payload: drop embedding blob, cap content length to keep signal small.
|
|
90
|
+
out = []
|
|
91
|
+
for h in hits:
|
|
92
|
+
h2 = {k_: v for k_, v in h.items()
|
|
93
|
+
if k_ not in ("embedding",)}
|
|
94
|
+
if isinstance(h2.get("content"), str) and len(h2["content"]) > 400:
|
|
95
|
+
h2["content"] = h2["content"][:400] + "…"
|
|
96
|
+
out.append(h2)
|
|
97
|
+
_write_response(conn, sig_row, {"results": out, "scope": scope})
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _write_response(conn, request_row, body: dict) -> None:
|
|
101
|
+
"""Post a kind='search_response' whisper back to the requester and mark
|
|
102
|
+
the original request read."""
|
|
103
|
+
now = int(time.time())
|
|
104
|
+
self_cid = identity._detect_self_cid() or ""
|
|
105
|
+
requester = request_row["from_cid"]
|
|
106
|
+
try:
|
|
107
|
+
conn.execute(
|
|
108
|
+
"INSERT INTO signals (from_cid, to_cid, kind, content, created_at) "
|
|
109
|
+
"VALUES (?, ?, 'search_response', ?, ?)",
|
|
110
|
+
(self_cid, requester, json.dumps(body), now),
|
|
111
|
+
)
|
|
112
|
+
conn.execute(
|
|
113
|
+
"UPDATE signals SET read_at=? WHERE id=?",
|
|
114
|
+
(now, request_row["id"]),
|
|
115
|
+
)
|
|
116
|
+
conn.commit()
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.debug("search_proxy write_response failed: %s", e, exc_info=True)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _serve_loop() -> None:
|
|
122
|
+
while True:
|
|
123
|
+
try:
|
|
124
|
+
self_cid = identity._detect_self_cid()
|
|
125
|
+
if not self_cid:
|
|
126
|
+
time.sleep(_POLL_INTERVAL_S)
|
|
127
|
+
continue
|
|
128
|
+
conn = get_db()
|
|
129
|
+
rows = conn.execute(
|
|
130
|
+
"SELECT id, from_cid, to_cid, content, created_at "
|
|
131
|
+
"FROM signals "
|
|
132
|
+
"WHERE kind='search_request' AND read_at IS NULL "
|
|
133
|
+
" AND (to_cid = ? OR to_cid IS NULL) "
|
|
134
|
+
" AND from_cid != ? "
|
|
135
|
+
"ORDER BY id ASC LIMIT ?",
|
|
136
|
+
(self_cid, self_cid, _MAX_BATCH),
|
|
137
|
+
).fetchall()
|
|
138
|
+
for r in rows:
|
|
139
|
+
_serve_request(conn, r)
|
|
140
|
+
conn.close()
|
|
141
|
+
except Exception:
|
|
142
|
+
logger.debug("search_proxy loop tick failed", exc_info=True)
|
|
143
|
+
time.sleep(_POLL_INTERVAL_S)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def start_search_proxy() -> None:
|
|
147
|
+
"""Idempotent daemon-thread starter. No-op when SEMANTIC_AVAILABLE=False
|
|
148
|
+
so light children don't compete with the parent to answer requests."""
|
|
149
|
+
global _started
|
|
150
|
+
if _started:
|
|
151
|
+
return
|
|
152
|
+
if not SEMANTIC_AVAILABLE:
|
|
153
|
+
return
|
|
154
|
+
if _POLL_INTERVAL_S <= 0:
|
|
155
|
+
return # disabled via env (test environments, or explicit opt-out)
|
|
156
|
+
t = threading.Thread(
|
|
157
|
+
target=_serve_loop, name="search_proxy", daemon=True,
|
|
158
|
+
)
|
|
159
|
+
t.start()
|
|
160
|
+
_started = True
|
threadkeeper/server.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""threadkeeper.server — package entry point.
|
|
2
|
+
|
|
3
|
+
Importing each tools module triggers its @mcp.tool() decorators against the
|
|
4
|
+
shared FastMCP singleton in threadkeeper._mcp. After all imports, the
|
|
5
|
+
runtime is fully assembled; mcp.run() starts the stdio MCP loop.
|
|
6
|
+
|
|
7
|
+
To launch:
|
|
8
|
+
python -m threadkeeper.server
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
# Singleton must be importable before tool modules.
|
|
13
|
+
from ._mcp import mcp
|
|
14
|
+
|
|
15
|
+
# Core modules — registered for completeness; no @mcp.tool() decorators here.
|
|
16
|
+
# Import them so any import-time side effects happen in a predictable order.
|
|
17
|
+
from . import config # noqa: F401
|
|
18
|
+
from . import db # noqa: F401
|
|
19
|
+
from . import helpers # noqa: F401
|
|
20
|
+
from . import identity # noqa: F401
|
|
21
|
+
from . import embeddings # noqa: F401
|
|
22
|
+
from . import ingest # noqa: F401
|
|
23
|
+
from . import brief # noqa: F401
|
|
24
|
+
|
|
25
|
+
# Tool modules — each import registers a group of @mcp.tool() entries on
|
|
26
|
+
# the shared mcp instance. Order is deliberate: peers/spawn first because
|
|
27
|
+
# pickup imports spawn for auto_spawn; brief is already imported above so
|
|
28
|
+
# tools.threads can pull render_brief.
|
|
29
|
+
from .tools import threads # noqa: F401
|
|
30
|
+
from .tools import style # noqa: F401
|
|
31
|
+
from .tools import peers # noqa: F401
|
|
32
|
+
from .tools import spawn # noqa: F401
|
|
33
|
+
from .tools import probes # noqa: F401
|
|
34
|
+
from .tools import concepts # noqa: F401
|
|
35
|
+
from .tools import distill # noqa: F401
|
|
36
|
+
from .tools import core_memory # noqa: F401
|
|
37
|
+
from .tools import graph # noqa: F401
|
|
38
|
+
from .tools import correlation # noqa: F401
|
|
39
|
+
from .tools import extract # noqa: F401
|
|
40
|
+
from .tools import consolidate # noqa: F401
|
|
41
|
+
from .tools import invariants # noqa: F401
|
|
42
|
+
from .tools import missed_spawns # noqa: F401
|
|
43
|
+
from .tools import dialog # noqa: F401
|
|
44
|
+
from .tools import pickup # noqa: F401
|
|
45
|
+
from .tools import session # noqa: F401
|
|
46
|
+
from .tools import skills # noqa: F401
|
|
47
|
+
from .tools import dialectic # noqa: F401
|
|
48
|
+
from .tools import process_health # noqa: F401
|
|
49
|
+
from .tools import shadow_review # noqa: F401
|
|
50
|
+
from .tools import lessons # noqa: F401
|
|
51
|
+
from .tools import curator # noqa: F401
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
if __name__ == "__main__":
|
|
55
|
+
mcp.run()
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
"""Shadow-review daemon: an autonomous observer that periodically scans
|
|
2
|
+
recently-ingested dialog and decides whether any class-level learning
|
|
3
|
+
emerged worth materializing into a Claude skill — independent of whether
|
|
4
|
+
foreground Claude bothered to call close_thread.
|
|
5
|
+
|
|
6
|
+
The architecture has two layers:
|
|
7
|
+
|
|
8
|
+
1. PURE FUNCTIONS (below) — read dialog_messages diff since last pass,
|
|
9
|
+
build a context dump, decide whether the window is worth evaluating
|
|
10
|
+
at all (cheap char-count floor). Idempotent: tracks last-evaluated
|
|
11
|
+
timestamp via events.kind='shadow_review_pass'.
|
|
12
|
+
|
|
13
|
+
2. DAEMON / SPAWN (start_shadow_daemon) — periodic thread in the parent
|
|
14
|
+
thread-keeper process. On each tick: collect candidate window, if
|
|
15
|
+
non-trivial → spawn slim child with SHADOW_REVIEW_PROMPT + dialog
|
|
16
|
+
dump. Child IS the LLM evaluator; it decides yes/no and (when yes)
|
|
17
|
+
applies skill_manage / mark_skill_materialized.
|
|
18
|
+
|
|
19
|
+
Why this exists: foreground Claude is an unreliable narrator of when to
|
|
20
|
+
close threads / materialize skills. The shadow pass closes that gap.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import logging
|
|
27
|
+
import os
|
|
28
|
+
import sqlite3
|
|
29
|
+
import threading
|
|
30
|
+
import time
|
|
31
|
+
from typing import Optional
|
|
32
|
+
|
|
33
|
+
from .config import (
|
|
34
|
+
SHADOW_REVIEW_INTERVAL_S,
|
|
35
|
+
SHADOW_REVIEW_MIN_CHARS,
|
|
36
|
+
SHADOW_REVIEW_WINDOW_S,
|
|
37
|
+
)
|
|
38
|
+
from .db import get_db
|
|
39
|
+
from . import identity
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
_started = False
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# The shadow prompt is what the spawned evaluator child sees. It encodes
|
|
47
|
+
# the class-vs-incident decision rubric inline so the child doesn't need
|
|
48
|
+
# to (and can't, in slim mode) load the ai-memory-learning-loop skill.
|
|
49
|
+
from .i18n import SHADOW_CLASS_SIGNAL_EXAMPLES
|
|
50
|
+
from .review_prompts import POSITIVE_EXAMPLES
|
|
51
|
+
|
|
52
|
+
SHADOW_REVIEW_PROMPT = f"""\
|
|
53
|
+
You are a SHADOW LEARNING OBSERVER for thread-keeper. You read a slice
|
|
54
|
+
of recent dialog from across ALL Claude sessions on this machine and
|
|
55
|
+
decide whether any CLASS-LEVEL learning emerged that's worth a durable
|
|
56
|
+
skill under ~/.claude/skills/.
|
|
57
|
+
|
|
58
|
+
CLASS-LEVEL signals (materialize):
|
|
59
|
+
{SHADOW_CLASS_SIGNAL_EXAMPLES}\
|
|
60
|
+
- a debugging insight that generalizes beyond the specific bug
|
|
61
|
+
- a workflow rule the user stated as policy
|
|
62
|
+
- a corrected misunderstanding (existing skill is wrong/incomplete)
|
|
63
|
+
- a recovery / cleanup procedure for flaky infra (the FIX outlives the
|
|
64
|
+
incident even when the symptom was env-specific)
|
|
65
|
+
|
|
66
|
+
NOT class-level (skip):
|
|
67
|
+
- one-off task descriptions
|
|
68
|
+
- session-transient confusion that resolved itself
|
|
69
|
+
- the user asking what something is
|
|
70
|
+
- you complimenting yourself or summarizing what just happened
|
|
71
|
+
- GENUINELY transient env errors with no durable rule ("rebooted, fixed",
|
|
72
|
+
"wrong dir, fixed"). NOTE: this is narrower than it sounds — see the
|
|
73
|
+
POSITIVE_EXAMPLES block below before defaulting to SKIP.
|
|
74
|
+
|
|
75
|
+
{POSITIVE_EXAMPLES}
|
|
76
|
+
|
|
77
|
+
PROCEDURE
|
|
78
|
+
1. Read the dialog window below.
|
|
79
|
+
2. If nothing class-level emerges → output exactly `SKIP: <one-line reason>` and stop.
|
|
80
|
+
3. If class-level learning is present:
|
|
81
|
+
a. PRIMARY: call `mcp__thread-keeper__lesson_append(title, body, summary, source='shadow')`
|
|
82
|
+
to write into ~/.threadkeeper/lessons.md (shared by every CLI).
|
|
83
|
+
- title: lowercase-hyphens slug describing a CLASS of task, not the incident
|
|
84
|
+
- body: markdown rationale + procedure
|
|
85
|
+
- summary: optional one-line TL;DR
|
|
86
|
+
b. OPTIONAL: also call `mcp__thread-keeper__skill_manage(...)` to mirror
|
|
87
|
+
under ~/.claude/skills/ when Claude-specific frontmatter
|
|
88
|
+
auto-triggering adds value beyond the lesson alone.
|
|
89
|
+
c. Output `MATERIALIZED: <slug>` on success.
|
|
90
|
+
|
|
91
|
+
CONSTRAINTS
|
|
92
|
+
- Be conservative. False negatives (skipping) cost nothing; false
|
|
93
|
+
positives pollute the skill store.
|
|
94
|
+
- Do NOT open/close memory threads. Your sole output is a skill write
|
|
95
|
+
or SKIP.
|
|
96
|
+
- Do NOT cite internal IDs in human-readable output (no T-codes, cids,
|
|
97
|
+
task IDs). The user style requires plain prose.
|
|
98
|
+
|
|
99
|
+
DIALOG WINDOW (most recent at the bottom)
|
|
100
|
+
=========================================
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _last_shadow_ts(conn: sqlite3.Connection) -> int:
|
|
105
|
+
"""Earliest dialog-message timestamp we have NOT yet evaluated.
|
|
106
|
+
|
|
107
|
+
Returns the high-water mark recorded in the most recent
|
|
108
|
+
`events.kind='shadow_review_pass'` row. The mark lives in `target`
|
|
109
|
+
so `summary` is free for the human-readable outcome.
|
|
110
|
+
Returns 0 when no prior pass exists — caller falls back to a
|
|
111
|
+
window-based floor.
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
row = conn.execute(
|
|
115
|
+
"SELECT target FROM events WHERE kind='shadow_review_pass' "
|
|
116
|
+
"ORDER BY id DESC LIMIT 1"
|
|
117
|
+
).fetchone()
|
|
118
|
+
except sqlite3.OperationalError:
|
|
119
|
+
return 0
|
|
120
|
+
if not row or not row["target"]:
|
|
121
|
+
return 0
|
|
122
|
+
try:
|
|
123
|
+
return int(row["target"])
|
|
124
|
+
except (ValueError, TypeError):
|
|
125
|
+
return 0
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _record_shadow_pass(conn: sqlite3.Connection,
|
|
129
|
+
high_water_ts: int,
|
|
130
|
+
outcome: str) -> None:
|
|
131
|
+
"""Write a shadow_review_pass event so the next tick advances cursor.
|
|
132
|
+
|
|
133
|
+
`high_water_ts` is the created_at of the newest dialog message we
|
|
134
|
+
evaluated (stored in `target` for cursor reads). `outcome` is a
|
|
135
|
+
short human-readable status string stored in `summary` (e.g.
|
|
136
|
+
'no_window', 'spawned task_id=...', 'too_short').
|
|
137
|
+
"""
|
|
138
|
+
try:
|
|
139
|
+
conn.execute(
|
|
140
|
+
"INSERT INTO events (session_id, kind, target, summary, created_at) "
|
|
141
|
+
"VALUES (?, 'shadow_review_pass', ?, ?, ?)",
|
|
142
|
+
(identity._session_id or "", str(high_water_ts),
|
|
143
|
+
outcome[:300], int(time.time())),
|
|
144
|
+
)
|
|
145
|
+
conn.commit()
|
|
146
|
+
except sqlite3.OperationalError:
|
|
147
|
+
logger.debug("shadow: failed to record pass", exc_info=True)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# Opening lines of every prompt we ourselves inject into a spawned
|
|
151
|
+
# child. When a child writes its conversation to ~/.claude/projects/
|
|
152
|
+
# the parent ingests it back into dialog_messages — without this filter
|
|
153
|
+
# the next shadow pass picks up its OWN prior child's reasoning as the
|
|
154
|
+
# "recent dialog" and SKIPs ("dialog window contains only shadow-observer
|
|
155
|
+
# task framing"). Also catches the close_thread auto-review child whose
|
|
156
|
+
# prompt is built around "You are reviewing closed thread <T-code>".
|
|
157
|
+
#
|
|
158
|
+
# Match is on the FIRST 80 chars of the very first user message of a
|
|
159
|
+
# session, so we exclude every message of that session (not just the
|
|
160
|
+
# prompt itself — slim children's broadcasts, tool_results, and SKIP
|
|
161
|
+
# verdicts also pollute the window).
|
|
162
|
+
_INTERNAL_PROMPT_PREFIXES: tuple[str, ...] = (
|
|
163
|
+
"You are a SHADOW LEARNING OBSERVER",
|
|
164
|
+
"You are reviewing closed thread",
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# Lines starting with these markers carry no semantic signal for
|
|
169
|
+
# class-level learning — they're verbose adapter-side renderings of
|
|
170
|
+
# tool_use / tool_result blocks (file dumps, shell output, search
|
|
171
|
+
# results). Inspired by Hermes Agent v0.12's review-fork upgrade that
|
|
172
|
+
# excludes prior-turn tool messages from the review summary so the
|
|
173
|
+
# fork sees a clean context. We keep `[thinking]` blocks — those ARE
|
|
174
|
+
# signal (chain-of-thought often contains the rule being learned).
|
|
175
|
+
_NOISE_LINE_PREFIXES: tuple[str, ...] = (
|
|
176
|
+
"[tool_result]",
|
|
177
|
+
"[tool_call]",
|
|
178
|
+
"[tool_use]",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _strip_tool_noise(text: str) -> str:
|
|
183
|
+
"""Drop adapter-prefixed tool_result / tool_call lines from a message.
|
|
184
|
+
|
|
185
|
+
Returns the cleaned text. If every line in the message was a tool
|
|
186
|
+
artifact, returns the empty string — caller can decide to skip the
|
|
187
|
+
row entirely (zero-information content).
|
|
188
|
+
"""
|
|
189
|
+
if not text:
|
|
190
|
+
return text
|
|
191
|
+
# Fast path: no markers → no work
|
|
192
|
+
if not any(p in text for p in _NOISE_LINE_PREFIXES):
|
|
193
|
+
return text
|
|
194
|
+
kept: list[str] = []
|
|
195
|
+
for line in text.split("\n"):
|
|
196
|
+
stripped = line.lstrip()
|
|
197
|
+
if any(stripped.startswith(p) for p in _NOISE_LINE_PREFIXES):
|
|
198
|
+
continue
|
|
199
|
+
kept.append(line)
|
|
200
|
+
return "\n".join(kept).strip("\n")
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _collect_window(conn: sqlite3.Connection,
|
|
204
|
+
floor_ts: int,
|
|
205
|
+
window_s: int) -> tuple[str, int, int]:
|
|
206
|
+
"""Pull dialog messages newer than max(floor_ts, now-window_s),
|
|
207
|
+
excluding any session whose opening user prompt is one of our own
|
|
208
|
+
internal spawn prompts (shadow-observer or close_thread reviewer).
|
|
209
|
+
|
|
210
|
+
Returns (dump_text, high_water_ts, char_count).
|
|
211
|
+
- dump_text: human-readable rendering ready for the shadow prompt
|
|
212
|
+
- high_water_ts: largest created_at seen (== floor for next tick)
|
|
213
|
+
- char_count: total visible char length (input to MIN_CHARS guard)
|
|
214
|
+
|
|
215
|
+
Mixes all NON-internal active sessions — that's the point. The
|
|
216
|
+
shadow agent's review pool is global across the user's real
|
|
217
|
+
conversations, not the chatter of internal review children.
|
|
218
|
+
"""
|
|
219
|
+
now = int(time.time())
|
|
220
|
+
cutoff = max(floor_ts, now - max(1, window_s))
|
|
221
|
+
# Per-prefix `substr(content,1,N) = ?` is friendlier to SQLite's
|
|
222
|
+
# planner than chained `LIKE 'X%' OR LIKE 'Y%'` (no LIKE_PATTERN
|
|
223
|
+
# compile, exact prefix bytewise). N = max prefix length.
|
|
224
|
+
prefix_clauses = " OR ".join(
|
|
225
|
+
["substr(content, 1, ?) = ?"] * len(_INTERNAL_PROMPT_PREFIXES)
|
|
226
|
+
)
|
|
227
|
+
prefix_params: list = []
|
|
228
|
+
for p in _INTERNAL_PROMPT_PREFIXES:
|
|
229
|
+
prefix_params.extend([len(p), p])
|
|
230
|
+
rows = conn.execute(
|
|
231
|
+
"SELECT role, content, created_at, session_id "
|
|
232
|
+
"FROM dialog_messages "
|
|
233
|
+
"WHERE created_at > ? "
|
|
234
|
+
" AND session_id NOT IN ("
|
|
235
|
+
" SELECT DISTINCT session_id FROM dialog_messages "
|
|
236
|
+
f" WHERE role = 'user' AND ({prefix_clauses})"
|
|
237
|
+
" ) "
|
|
238
|
+
"ORDER BY created_at ASC",
|
|
239
|
+
(cutoff, *prefix_params),
|
|
240
|
+
).fetchall()
|
|
241
|
+
if not rows:
|
|
242
|
+
return ("", cutoff, 0)
|
|
243
|
+
lines: list[str] = []
|
|
244
|
+
char_count = 0
|
|
245
|
+
high_water = cutoff
|
|
246
|
+
for r in rows:
|
|
247
|
+
body = _strip_tool_noise(r["content"] or "")
|
|
248
|
+
if not body:
|
|
249
|
+
# Whole turn was tool noise — skip but still advance the
|
|
250
|
+
# cursor (we don't want to re-evaluate this row next pass).
|
|
251
|
+
high_water = max(high_water, int(r["created_at"]))
|
|
252
|
+
continue
|
|
253
|
+
# Cap each turn at 1.5KB so a single noisy block doesn't blow
|
|
254
|
+
# the prompt budget. Most class-level signals are short.
|
|
255
|
+
if len(body) > 1500:
|
|
256
|
+
body = body[:1500] + "…"
|
|
257
|
+
char_count += len(body)
|
|
258
|
+
high_water = max(high_water, int(r["created_at"]))
|
|
259
|
+
sid = (r["session_id"] or "?")[-6:]
|
|
260
|
+
lines.append(f"[{r['role']} @{sid}]\n{body}\n")
|
|
261
|
+
return ("\n".join(lines), high_water, char_count)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def run_shadow_pass(force: bool = False) -> str:
|
|
265
|
+
"""Execute one shadow pass synchronously. Used by the daemon AND by
|
|
266
|
+
the MCP tool for manual triggering / testing.
|
|
267
|
+
|
|
268
|
+
Returns a short status string for observability:
|
|
269
|
+
- 'disabled' — env knob off and not forced
|
|
270
|
+
- 'no_window' — no fresh dialog since last cursor
|
|
271
|
+
- 'too_short' — window exists but < MIN_CHARS
|
|
272
|
+
- 'spawned task_id=…' — evaluator child launched
|
|
273
|
+
- 'spawn_error: …' — spawn() rejected (budget cap, etc)
|
|
274
|
+
"""
|
|
275
|
+
if SHADOW_REVIEW_INTERVAL_S <= 0 and not force:
|
|
276
|
+
return "disabled"
|
|
277
|
+
conn = get_db()
|
|
278
|
+
floor = _last_shadow_ts(conn)
|
|
279
|
+
dump, high_water, n_chars = _collect_window(
|
|
280
|
+
conn, floor, SHADOW_REVIEW_WINDOW_S,
|
|
281
|
+
)
|
|
282
|
+
if n_chars == 0:
|
|
283
|
+
_record_shadow_pass(conn, high_water, "no_window")
|
|
284
|
+
return "no_window"
|
|
285
|
+
if n_chars < SHADOW_REVIEW_MIN_CHARS:
|
|
286
|
+
_record_shadow_pass(conn, high_water, "too_short")
|
|
287
|
+
return "too_short"
|
|
288
|
+
|
|
289
|
+
full_prompt = SHADOW_REVIEW_PROMPT + dump
|
|
290
|
+
|
|
291
|
+
# Late import — spawn module imports identity / config; importing it
|
|
292
|
+
# at module load time would create cycles.
|
|
293
|
+
from .tools.spawn import spawn # type: ignore
|
|
294
|
+
try:
|
|
295
|
+
result = spawn(
|
|
296
|
+
prompt=full_prompt,
|
|
297
|
+
visible=False,
|
|
298
|
+
capture_output=True,
|
|
299
|
+
permission_mode="auto",
|
|
300
|
+
role="shadow_observer",
|
|
301
|
+
write_origin="shadow_review",
|
|
302
|
+
slim=True,
|
|
303
|
+
extra_allowed_tools=(
|
|
304
|
+
"mcp__thread-keeper__lesson_append,"
|
|
305
|
+
"mcp__thread-keeper__lesson_list,"
|
|
306
|
+
"mcp__thread-keeper__skill_manage,"
|
|
307
|
+
"mcp__thread-keeper__skill_list,"
|
|
308
|
+
"mcp__thread-keeper__mark_skill_materialized,"
|
|
309
|
+
"Read,Write"
|
|
310
|
+
),
|
|
311
|
+
)
|
|
312
|
+
except Exception as e:
|
|
313
|
+
_record_shadow_pass(conn, high_water, f"spawn_error: {e}")
|
|
314
|
+
return f"spawn_error: {e}"
|
|
315
|
+
|
|
316
|
+
_record_shadow_pass(conn, high_water, str(result)[:200])
|
|
317
|
+
return str(result)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _serve_loop() -> None:
|
|
321
|
+
"""Daemon body. Sleep → tick → sleep, until process dies."""
|
|
322
|
+
while True:
|
|
323
|
+
try:
|
|
324
|
+
run_shadow_pass()
|
|
325
|
+
except Exception:
|
|
326
|
+
logger.debug("shadow_review tick failed", exc_info=True)
|
|
327
|
+
time.sleep(SHADOW_REVIEW_INTERVAL_S)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def start_shadow_daemon() -> None:
|
|
331
|
+
"""Idempotent daemon starter. Honors env: no-op when
|
|
332
|
+
SHADOW_REVIEW_INTERVAL_S<=0 so tests stay quiet.
|
|
333
|
+
|
|
334
|
+
CRITICAL: only the parent mp process runs this daemon. Spawned slim
|
|
335
|
+
children DO start their own MCP server (so they can call tools), and
|
|
336
|
+
each MCP server in turn calls _ensure_session() which would start
|
|
337
|
+
yet another shadow daemon — that one tries to spawn its own
|
|
338
|
+
evaluator children, which themselves spawn more shadows, etc.
|
|
339
|
+
A recursive spawn cascade.
|
|
340
|
+
|
|
341
|
+
We tell parent-vs-child by SEMANTIC_AVAILABLE: parents load the
|
|
342
|
+
embedding model and have it True; slim children get NO_EMBEDDINGS=1
|
|
343
|
+
injected by spawn() so they're False. Same gating that search_proxy
|
|
344
|
+
uses for the symmetric reason.
|
|
345
|
+
"""
|
|
346
|
+
global _started
|
|
347
|
+
if _started:
|
|
348
|
+
return
|
|
349
|
+
if SHADOW_REVIEW_INTERVAL_S <= 0:
|
|
350
|
+
return
|
|
351
|
+
from .config import SEMANTIC_AVAILABLE
|
|
352
|
+
if not SEMANTIC_AVAILABLE:
|
|
353
|
+
return # slim child: don't fire shadow review from here
|
|
354
|
+
t = threading.Thread(
|
|
355
|
+
target=_serve_loop, name="shadow_review", daemon=True,
|
|
356
|
+
)
|
|
357
|
+
t.start()
|
|
358
|
+
_started = True
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Background daemon that watches ~/.claude/skills/*/SKILL.md for mtime
|
|
2
|
+
changes and updates skill_usage telemetry. Catches patches made via
|
|
3
|
+
external editors / direct Edit/Write tool calls that bypass skill_manage.
|
|
4
|
+
|
|
5
|
+
Tick interval is configurable; default 10s. Daemon thread, started lazily
|
|
6
|
+
on first _ensure_session() call so import-time side effects stay
|
|
7
|
+
minimal. Reads only — never writes to SKILL.md.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
import threading
|
|
14
|
+
import time
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
from .config import CLAUDE_SKILLS_DIR
|
|
18
|
+
from .db import get_db
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
_started = False
|
|
23
|
+
_tick_interval_s = float(os.environ.get(
|
|
24
|
+
'THREADKEEPER_SKILL_WATCH_INTERVAL_S', '10'))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _scan_once(conn) -> int:
|
|
28
|
+
"""Scan ~/.claude/skills/*/SKILL.md mtimes. For each file whose mtime
|
|
29
|
+
is newer than skill_usage.last_patched_at (or row missing), bump
|
|
30
|
+
last_patched_at + patch_count. Returns number of rows updated.
|
|
31
|
+
"""
|
|
32
|
+
if not CLAUDE_SKILLS_DIR.exists():
|
|
33
|
+
return 0
|
|
34
|
+
updates = 0
|
|
35
|
+
for skill_dir in CLAUDE_SKILLS_DIR.iterdir():
|
|
36
|
+
if not skill_dir.is_dir():
|
|
37
|
+
continue
|
|
38
|
+
if skill_dir.name.startswith('.'): # skip .archive
|
|
39
|
+
continue
|
|
40
|
+
md = skill_dir / 'SKILL.md'
|
|
41
|
+
if not md.exists():
|
|
42
|
+
continue
|
|
43
|
+
try:
|
|
44
|
+
mtime = int(md.stat().st_mtime)
|
|
45
|
+
except OSError:
|
|
46
|
+
continue
|
|
47
|
+
name = skill_dir.name
|
|
48
|
+
# Ensure row exists; insert with foreground origin if not present
|
|
49
|
+
# (this is a user-edited skill, not agent-created).
|
|
50
|
+
conn.execute(
|
|
51
|
+
"INSERT INTO skill_usage (name, created_at, created_by_origin) "
|
|
52
|
+
"VALUES (?, ?, 'foreground') ON CONFLICT(name) DO NOTHING",
|
|
53
|
+
(name, mtime),
|
|
54
|
+
)
|
|
55
|
+
row = conn.execute(
|
|
56
|
+
"SELECT last_patched_at FROM skill_usage WHERE name=?",
|
|
57
|
+
(name,),
|
|
58
|
+
).fetchone()
|
|
59
|
+
prev = row['last_patched_at'] if row and row['last_patched_at'] else 0
|
|
60
|
+
if mtime > prev:
|
|
61
|
+
conn.execute(
|
|
62
|
+
"UPDATE skill_usage SET last_patched_at=?, "
|
|
63
|
+
"patch_count=patch_count+1 WHERE name=?",
|
|
64
|
+
(mtime, name),
|
|
65
|
+
)
|
|
66
|
+
updates += 1
|
|
67
|
+
if updates:
|
|
68
|
+
conn.commit()
|
|
69
|
+
return updates
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _watch_loop() -> None:
|
|
73
|
+
while True:
|
|
74
|
+
try:
|
|
75
|
+
conn = get_db()
|
|
76
|
+
try:
|
|
77
|
+
_scan_once(conn)
|
|
78
|
+
finally:
|
|
79
|
+
conn.close()
|
|
80
|
+
except Exception:
|
|
81
|
+
logger.debug("skill_watcher tick failed", exc_info=True)
|
|
82
|
+
time.sleep(_tick_interval_s)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def start_skill_watcher() -> None:
|
|
86
|
+
"""Start the daemon if not already running. Safe to call multiple times."""
|
|
87
|
+
global _started
|
|
88
|
+
if _started:
|
|
89
|
+
return
|
|
90
|
+
if _tick_interval_s <= 0:
|
|
91
|
+
return
|
|
92
|
+
t = threading.Thread(
|
|
93
|
+
target=_watch_loop, name='skill_watcher', daemon=True,
|
|
94
|
+
)
|
|
95
|
+
t.start()
|
|
96
|
+
_started = True
|