threadkeeper 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- threadkeeper/__init__.py +8 -0
- threadkeeper/_mcp.py +6 -0
- threadkeeper/_setup.py +299 -0
- threadkeeper/adapters/__init__.py +40 -0
- threadkeeper/adapters/_hook_helpers.py +72 -0
- threadkeeper/adapters/base.py +152 -0
- threadkeeper/adapters/claude_code.py +178 -0
- threadkeeper/adapters/claude_desktop.py +128 -0
- threadkeeper/adapters/codex.py +259 -0
- threadkeeper/adapters/copilot.py +195 -0
- threadkeeper/adapters/gemini.py +169 -0
- threadkeeper/adapters/vscode.py +144 -0
- threadkeeper/brief.py +735 -0
- threadkeeper/config.py +216 -0
- threadkeeper/curator.py +390 -0
- threadkeeper/db.py +474 -0
- threadkeeper/embeddings.py +232 -0
- threadkeeper/extract_daemon.py +125 -0
- threadkeeper/helpers.py +101 -0
- threadkeeper/i18n.py +342 -0
- threadkeeper/identity.py +237 -0
- threadkeeper/ingest.py +507 -0
- threadkeeper/lessons.py +170 -0
- threadkeeper/nudges.py +257 -0
- threadkeeper/process_health.py +202 -0
- threadkeeper/review_prompts.py +207 -0
- threadkeeper/search_proxy.py +160 -0
- threadkeeper/server.py +55 -0
- threadkeeper/shadow_review.py +358 -0
- threadkeeper/skill_watcher.py +96 -0
- threadkeeper/spawn_budget.py +246 -0
- threadkeeper/tools/__init__.py +2 -0
- threadkeeper/tools/concepts.py +111 -0
- threadkeeper/tools/consolidate.py +222 -0
- threadkeeper/tools/core_memory.py +109 -0
- threadkeeper/tools/correlation.py +116 -0
- threadkeeper/tools/curator.py +121 -0
- threadkeeper/tools/dialectic.py +359 -0
- threadkeeper/tools/dialog.py +131 -0
- threadkeeper/tools/distill.py +184 -0
- threadkeeper/tools/extract.py +411 -0
- threadkeeper/tools/graph.py +183 -0
- threadkeeper/tools/invariants.py +177 -0
- threadkeeper/tools/lessons.py +110 -0
- threadkeeper/tools/missed_spawns.py +142 -0
- threadkeeper/tools/peers.py +579 -0
- threadkeeper/tools/pickup.py +148 -0
- threadkeeper/tools/probes.py +251 -0
- threadkeeper/tools/process_health.py +90 -0
- threadkeeper/tools/session.py +34 -0
- threadkeeper/tools/shadow_review.py +106 -0
- threadkeeper/tools/skills.py +856 -0
- threadkeeper/tools/spawn.py +871 -0
- threadkeeper/tools/style.py +44 -0
- threadkeeper/tools/threads.py +299 -0
- threadkeeper-0.4.0.dist-info/METADATA +351 -0
- threadkeeper-0.4.0.dist-info/RECORD +61 -0
- threadkeeper-0.4.0.dist-info/WHEEL +5 -0
- threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
- threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
- threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Self-initiated pickup of stale unresolved threads.
|
|
2
|
+
|
|
3
|
+
Surfaces idle, unclaimed threads as candidates for autonomous work, lets a
|
|
4
|
+
caller claim one (optionally spawning a headless child to advance it), and
|
|
5
|
+
releases the claim when done.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sqlite3
|
|
9
|
+
import time
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from .._mcp import mcp
|
|
13
|
+
from ..db import get_db
|
|
14
|
+
from ..helpers import fmt_age, q
|
|
15
|
+
from .. import identity
|
|
16
|
+
from ..identity import _ensure_session, _detect_self_cid, _emit
|
|
17
|
+
from ..embeddings import _embed
|
|
18
|
+
from .spawn import spawn
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@mcp.tool()
|
|
22
|
+
def pickup_candidates(min_idle_days: int = 3, max_n: int = 5) -> str:
|
|
23
|
+
"""Surface unresolved threads that are stale and unclaimed — candidates
|
|
24
|
+
for self-initiated pickup when context is free.
|
|
25
|
+
|
|
26
|
+
Ranks by oldest last_touched_at among active+idle threads with no current
|
|
27
|
+
claim. Adds a one-line summary so caller can decide which to claim."""
|
|
28
|
+
conn = get_db()
|
|
29
|
+
_ensure_session(conn)
|
|
30
|
+
now_t = int(time.time())
|
|
31
|
+
cutoff = now_t - max(0, int(min_idle_days)) * 86400
|
|
32
|
+
rows = conn.execute(
|
|
33
|
+
"SELECT id, question, state, last_touched_at, last_move "
|
|
34
|
+
"FROM threads "
|
|
35
|
+
"WHERE state IN ('active','idle') "
|
|
36
|
+
"AND last_touched_at <= ? AND claimed_at IS NULL "
|
|
37
|
+
"ORDER BY last_touched_at ASC LIMIT ?",
|
|
38
|
+
(cutoff, max(1, int(max_n))),
|
|
39
|
+
).fetchall()
|
|
40
|
+
if not rows:
|
|
41
|
+
return f"no_candidates (no unclaimed thread idle >= {min_idle_days}d)"
|
|
42
|
+
lines = [f"candidates n={len(rows)} idle>={min_idle_days}d"]
|
|
43
|
+
for t in rows:
|
|
44
|
+
idle = fmt_age(now_t - t["last_touched_at"])
|
|
45
|
+
last_move_short = (t["last_move"] or "(no notes)")[:80]
|
|
46
|
+
lines.append(
|
|
47
|
+
f" {t['id']} [{t['state']}] q={q(t['question'][:90])} "
|
|
48
|
+
f"idle={idle} last={q(last_move_short)}"
|
|
49
|
+
)
|
|
50
|
+
return "\n".join(lines)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@mcp.tool()
|
|
54
|
+
def claim_pickup(thread_id: str, plan: str = "",
|
|
55
|
+
spawn_role: str = "", auto_spawn: bool = False) -> str:
|
|
56
|
+
"""Claim a thread for self-initiated work. Marks it claimed by my cid.
|
|
57
|
+
|
|
58
|
+
If `auto_spawn=True`, immediately spawns a headless child with the
|
|
59
|
+
thread context (question + recent notes + plan) for parallel work.
|
|
60
|
+
`spawn_role` defaults to 'executor' when auto_spawn is on."""
|
|
61
|
+
conn = get_db()
|
|
62
|
+
_ensure_session(conn)
|
|
63
|
+
self_cid = _detect_self_cid()
|
|
64
|
+
if not self_cid:
|
|
65
|
+
return "ERR cannot_detect_self_cid"
|
|
66
|
+
tid = thread_id.strip()
|
|
67
|
+
t = conn.execute(
|
|
68
|
+
"SELECT id, question, state, claimed_at, claimed_by_cid "
|
|
69
|
+
"FROM threads WHERE id=?",
|
|
70
|
+
(tid,),
|
|
71
|
+
).fetchone()
|
|
72
|
+
if not t:
|
|
73
|
+
return f"ERR thread_not_found={tid}"
|
|
74
|
+
if t["claimed_at"] and t["claimed_by_cid"] != self_cid:
|
|
75
|
+
return (
|
|
76
|
+
f"ERR already_claimed by={(t['claimed_by_cid'] or '')[:8]} "
|
|
77
|
+
f"at={fmt_age(int(time.time()) - t['claimed_at'])}_ago"
|
|
78
|
+
)
|
|
79
|
+
now_t = int(time.time())
|
|
80
|
+
conn.execute(
|
|
81
|
+
"UPDATE threads SET claimed_at=?, claimed_by_cid=?, "
|
|
82
|
+
"last_touched_at=? WHERE id=?",
|
|
83
|
+
(now_t, self_cid, now_t, tid),
|
|
84
|
+
)
|
|
85
|
+
if plan:
|
|
86
|
+
emb = _embed(plan)
|
|
87
|
+
conn.execute(
|
|
88
|
+
"INSERT INTO notes (thread_id, content, kind, created_at, "
|
|
89
|
+
"session_id, embedding) VALUES (?,?,?,?,?,?)",
|
|
90
|
+
(tid, f"PICKUP plan: {plan}", "move", now_t, identity._session_id, emb),
|
|
91
|
+
)
|
|
92
|
+
_emit(conn, "claim_pickup", target=tid,
|
|
93
|
+
summary=plan[:140] if plan else (t["question"] or ""))
|
|
94
|
+
conn.commit()
|
|
95
|
+
|
|
96
|
+
spawn_info = ""
|
|
97
|
+
if auto_spawn:
|
|
98
|
+
notes = conn.execute(
|
|
99
|
+
"SELECT kind, content FROM notes WHERE thread_id=? "
|
|
100
|
+
"ORDER BY created_at DESC LIMIT 8",
|
|
101
|
+
(tid,),
|
|
102
|
+
).fetchall()
|
|
103
|
+
notes_block = "\n".join(
|
|
104
|
+
f" [{n['kind']}] {n['content'][:240]}" for n in notes
|
|
105
|
+
) or " (no notes yet)"
|
|
106
|
+
child_prompt = (
|
|
107
|
+
f"Pickup task — make progress on a stale unresolved thread.\n\n"
|
|
108
|
+
f"Thread question: {t['question']}\n\n"
|
|
109
|
+
f"Recent notes:\n{notes_block}\n\n"
|
|
110
|
+
f"Plan from caller: {plan or '(infer one and proceed)'}\n\n"
|
|
111
|
+
f"Make ONE concrete advance. Add a note to the thread "
|
|
112
|
+
f"(mcp__thread-keeper__note with thread_id={tid}, kind='move'). "
|
|
113
|
+
f"When done, broadcast 'pickup-{tid}: <one-line result>'."
|
|
114
|
+
)
|
|
115
|
+
result = spawn(
|
|
116
|
+
prompt=child_prompt,
|
|
117
|
+
visible=False,
|
|
118
|
+
permission_mode="auto",
|
|
119
|
+
role=spawn_role or "executor",
|
|
120
|
+
extra_allowed_tools="Read,Bash,Grep,Glob",
|
|
121
|
+
)
|
|
122
|
+
spawn_info = f" | spawn: {result}"
|
|
123
|
+
return f"ok claimed thread={tid}{spawn_info}"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@mcp.tool()
|
|
127
|
+
def release_pickup(thread_id: str) -> str:
|
|
128
|
+
"""Release a claim. Only the claimant can release."""
|
|
129
|
+
conn = get_db()
|
|
130
|
+
_ensure_session(conn)
|
|
131
|
+
self_cid = _detect_self_cid()
|
|
132
|
+
if not self_cid:
|
|
133
|
+
return "ERR cannot_detect_self_cid"
|
|
134
|
+
tid = thread_id.strip()
|
|
135
|
+
t = conn.execute(
|
|
136
|
+
"SELECT claimed_by_cid FROM threads WHERE id=?", (tid,),
|
|
137
|
+
).fetchone()
|
|
138
|
+
if not t or not t["claimed_by_cid"]:
|
|
139
|
+
return "not_claimed"
|
|
140
|
+
if t["claimed_by_cid"] != self_cid:
|
|
141
|
+
return f"ERR not_my_claim by={t['claimed_by_cid'][:8]}"
|
|
142
|
+
conn.execute(
|
|
143
|
+
"UPDATE threads SET claimed_at=NULL, claimed_by_cid=NULL WHERE id=?",
|
|
144
|
+
(tid,),
|
|
145
|
+
)
|
|
146
|
+
_emit(conn, "release_pickup", target=tid)
|
|
147
|
+
conn.commit()
|
|
148
|
+
return f"ok released thread={tid}"
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""Self-diagnostic probes: claude-shape weak-spot tracking.
|
|
2
|
+
|
|
3
|
+
Probes encode known reliability sags (counting in long context, date math,
|
|
4
|
+
verbatim recall, format compliance, etc.). Each attempt → probe_results;
|
|
5
|
+
rolling stats → reliability cache; brief surfaces weak_spots.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import sqlite3
|
|
9
|
+
import time
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from .._mcp import mcp
|
|
13
|
+
from ..db import get_db
|
|
14
|
+
from ..helpers import fmt_age, q, gen_probe_id
|
|
15
|
+
from .. import identity
|
|
16
|
+
from ..identity import _ensure_session, _detect_self_cid, _emit
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _grade_probe(grader: str, expected_pattern: Optional[str],
|
|
20
|
+
response: str) -> bool:
|
|
21
|
+
"""Apply grader to a response. 'manual' always returns False — caller
|
|
22
|
+
confirms correctness via record_attempt(success=True)."""
|
|
23
|
+
if grader == "manual" or not expected_pattern:
|
|
24
|
+
return False
|
|
25
|
+
if grader == "exact":
|
|
26
|
+
return expected_pattern in response
|
|
27
|
+
if grader == "regex":
|
|
28
|
+
try:
|
|
29
|
+
import re as _re
|
|
30
|
+
return _re.search(expected_pattern, response, _re.DOTALL) is not None
|
|
31
|
+
except Exception:
|
|
32
|
+
return False
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _recompute_reliability(conn: sqlite3.Connection, category: str) -> dict:
|
|
37
|
+
"""Recompute aggregate stats for one category from probe_results.
|
|
38
|
+
UPSERTs the reliability row. Returns the new aggregate as a dict."""
|
|
39
|
+
now_t = int(time.time())
|
|
40
|
+
overall = conn.execute(
|
|
41
|
+
"SELECT COUNT(*) c, SUM(success) s, MAX(created_at) last "
|
|
42
|
+
"FROM probe_results WHERE category=?",
|
|
43
|
+
(category,),
|
|
44
|
+
).fetchone()
|
|
45
|
+
attempts = overall["c"] or 0
|
|
46
|
+
successes = overall["s"] or 0
|
|
47
|
+
last_at = overall["last"]
|
|
48
|
+
|
|
49
|
+
def _fail_rate(window_s: int) -> Optional[float]:
|
|
50
|
+
cutoff = now_t - window_s
|
|
51
|
+
r = conn.execute(
|
|
52
|
+
"SELECT COUNT(*) c, SUM(success) s "
|
|
53
|
+
"FROM probe_results WHERE category=? AND created_at >= ?",
|
|
54
|
+
(category, cutoff),
|
|
55
|
+
).fetchone()
|
|
56
|
+
n = r["c"] or 0
|
|
57
|
+
if n == 0:
|
|
58
|
+
return None
|
|
59
|
+
s = r["s"] or 0
|
|
60
|
+
return (n - s) / n
|
|
61
|
+
|
|
62
|
+
fr_7 = _fail_rate(7 * 86400)
|
|
63
|
+
fr_30 = _fail_rate(30 * 86400)
|
|
64
|
+
conn.execute(
|
|
65
|
+
"INSERT INTO reliability (category, attempts, successes, last_at, "
|
|
66
|
+
"fail_rate_7d, fail_rate_30d, updated_at) VALUES (?,?,?,?,?,?,?) "
|
|
67
|
+
"ON CONFLICT(category) DO UPDATE SET "
|
|
68
|
+
" attempts=excluded.attempts, successes=excluded.successes, "
|
|
69
|
+
" last_at=excluded.last_at, fail_rate_7d=excluded.fail_rate_7d, "
|
|
70
|
+
" fail_rate_30d=excluded.fail_rate_30d, updated_at=excluded.updated_at",
|
|
71
|
+
(category, attempts, successes, last_at, fr_7, fr_30, now_t),
|
|
72
|
+
)
|
|
73
|
+
return {
|
|
74
|
+
"category": category, "attempts": attempts, "successes": successes,
|
|
75
|
+
"last_at": last_at, "fail_rate_7d": fr_7, "fail_rate_30d": fr_30,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@mcp.tool()
|
|
80
|
+
def register_probe(category: str, prompt: str,
|
|
81
|
+
expected_pattern: str = "",
|
|
82
|
+
grader: str = "regex") -> str:
|
|
83
|
+
"""Register a self-test probe: a known weak-spot task with a verifier.
|
|
84
|
+
|
|
85
|
+
`grader`: 'regex' (pattern match in response), 'exact' (substring), or
|
|
86
|
+
'manual' (claude self-grades — always counts as failure unless caller
|
|
87
|
+
explicitly confirms success via record_attempt). `expected_pattern`
|
|
88
|
+
optional for 'manual'.
|
|
89
|
+
|
|
90
|
+
Categories should be claude-shape: 'count_long_context',
|
|
91
|
+
'date_arithmetic', 'recall_verbatim_block', 'detect_contradiction',
|
|
92
|
+
'follow_negative_instruction', 'preserve_list_order',
|
|
93
|
+
'respect_length_limit', 'needle_mid_context', 'fact_vs_inference',
|
|
94
|
+
'notice_absence', 'strict_format_compliance', 'uncertainty_acknowledgment'."""
|
|
95
|
+
if grader not in ("regex", "exact", "manual"):
|
|
96
|
+
return f"ERR bad_grader={grader}"
|
|
97
|
+
if not category.strip() or not prompt.strip():
|
|
98
|
+
return "ERR empty_category_or_prompt"
|
|
99
|
+
conn = get_db()
|
|
100
|
+
_ensure_session(conn)
|
|
101
|
+
pid = gen_probe_id(conn)
|
|
102
|
+
conn.execute(
|
|
103
|
+
"INSERT INTO probes (id, category, prompt, expected_pattern, "
|
|
104
|
+
"grader, created_at) VALUES (?,?,?,?,?,?)",
|
|
105
|
+
(pid, category.strip(), prompt, expected_pattern or None,
|
|
106
|
+
grader, int(time.time())),
|
|
107
|
+
)
|
|
108
|
+
_emit(conn, "probe_register", target=pid, summary=category)
|
|
109
|
+
conn.commit()
|
|
110
|
+
return f"ok id={pid} cat={category}"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@mcp.tool()
|
|
114
|
+
def run_probe(probe_id: str) -> str:
|
|
115
|
+
"""Surface a registered probe for self-attempt. Returns the prompt and
|
|
116
|
+
the grader hint. After attempting, call record_attempt(category,
|
|
117
|
+
success=true/false, probe_id=...) — the harness doesn't auto-grade
|
|
118
|
+
because attempting and judging are the same model."""
|
|
119
|
+
conn = get_db()
|
|
120
|
+
_ensure_session(conn)
|
|
121
|
+
p = conn.execute(
|
|
122
|
+
"SELECT id, category, prompt, expected_pattern, grader, enabled "
|
|
123
|
+
"FROM probes WHERE id=?",
|
|
124
|
+
(probe_id.strip(),),
|
|
125
|
+
).fetchone()
|
|
126
|
+
if not p:
|
|
127
|
+
return f"ERR probe_not_found={probe_id}"
|
|
128
|
+
if not p["enabled"]:
|
|
129
|
+
return f"ERR probe_disabled={probe_id}"
|
|
130
|
+
parts = [
|
|
131
|
+
f"probe={p['id']} cat={p['category']} grader={p['grader']}",
|
|
132
|
+
]
|
|
133
|
+
if p["expected_pattern"]:
|
|
134
|
+
parts.append(f"expect={p['expected_pattern']}")
|
|
135
|
+
parts.append(f"prompt={p['prompt']}")
|
|
136
|
+
return "\n".join(parts)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@mcp.tool()
|
|
140
|
+
def record_attempt(category: str, success: bool, note: str = "",
|
|
141
|
+
probe_id: str = "", latency_ms: int = 0) -> str:
|
|
142
|
+
"""Record a self-test outcome. Updates reliability aggregates.
|
|
143
|
+
|
|
144
|
+
Use for both registered probes (pass `probe_id`) and ad-hoc self-
|
|
145
|
+
observations — e.g. you noticed yourself miscounting items in this
|
|
146
|
+
very turn → record_attempt('count_long_context', false, note='said 32, actual 47')."""
|
|
147
|
+
if not category.strip():
|
|
148
|
+
return "ERR empty_category"
|
|
149
|
+
conn = get_db()
|
|
150
|
+
_ensure_session(conn)
|
|
151
|
+
cat = category.strip()
|
|
152
|
+
pid = probe_id.strip() or None
|
|
153
|
+
if pid:
|
|
154
|
+
if not conn.execute("SELECT 1 FROM probes WHERE id=?", (pid,)).fetchone():
|
|
155
|
+
return f"ERR probe_not_found={pid}"
|
|
156
|
+
now_t = int(time.time())
|
|
157
|
+
cid = _detect_self_cid()
|
|
158
|
+
cur = conn.execute(
|
|
159
|
+
"INSERT INTO probe_results (probe_id, category, session_id, cid, "
|
|
160
|
+
"success, latency_ms, note, created_at) VALUES (?,?,?,?,?,?,?,?)",
|
|
161
|
+
(pid, cat, identity._session_id, cid, 1 if success else 0,
|
|
162
|
+
latency_ms or None, note or None, now_t),
|
|
163
|
+
)
|
|
164
|
+
agg = _recompute_reliability(conn, cat)
|
|
165
|
+
_emit(conn, "probe_attempt", target=cat,
|
|
166
|
+
summary=f"{'pass' if success else 'fail'}: {note[:120]}")
|
|
167
|
+
conn.commit()
|
|
168
|
+
fr_7 = agg["fail_rate_7d"]
|
|
169
|
+
fr_str = f"{fr_7:.2f}" if fr_7 is not None else "?"
|
|
170
|
+
return (
|
|
171
|
+
f"ok rid={cur.lastrowid} cat={cat} "
|
|
172
|
+
f"attempts={agg['attempts']} successes={agg['successes']} "
|
|
173
|
+
f"fail7d={fr_str}"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@mcp.tool()
|
|
178
|
+
def reliability_for(category: str, window_days: int = 30) -> str:
|
|
179
|
+
"""Reliability stats for one category over a window."""
|
|
180
|
+
if not category.strip():
|
|
181
|
+
return "ERR empty_category"
|
|
182
|
+
conn = get_db()
|
|
183
|
+
cat = category.strip()
|
|
184
|
+
now_t = int(time.time())
|
|
185
|
+
cutoff = now_t - max(1, window_days) * 86400
|
|
186
|
+
win = conn.execute(
|
|
187
|
+
"SELECT COUNT(*) c, SUM(success) s, MAX(created_at) last "
|
|
188
|
+
"FROM probe_results WHERE category=? AND created_at >= ?",
|
|
189
|
+
(cat, cutoff),
|
|
190
|
+
).fetchone()
|
|
191
|
+
n = win["c"] or 0
|
|
192
|
+
if n == 0:
|
|
193
|
+
return f"cat={cat} no_data window={window_days}d"
|
|
194
|
+
s = win["s"] or 0
|
|
195
|
+
rate = s / n
|
|
196
|
+
last = win["last"]
|
|
197
|
+
cached = conn.execute(
|
|
198
|
+
"SELECT fail_rate_7d, fail_rate_30d FROM reliability WHERE category=?",
|
|
199
|
+
(cat,),
|
|
200
|
+
).fetchone()
|
|
201
|
+
fr7 = (
|
|
202
|
+
f"{cached['fail_rate_7d']:.2f}"
|
|
203
|
+
if cached and cached["fail_rate_7d"] is not None
|
|
204
|
+
else "?"
|
|
205
|
+
)
|
|
206
|
+
fr30 = (
|
|
207
|
+
f"{cached['fail_rate_30d']:.2f}"
|
|
208
|
+
if cached and cached["fail_rate_30d"] is not None
|
|
209
|
+
else "?"
|
|
210
|
+
)
|
|
211
|
+
return (
|
|
212
|
+
f"cat={cat} window={window_days}d attempts={n} success={s} "
|
|
213
|
+
f"rate={rate:.2f} fail7d={fr7} fail30d={fr30} "
|
|
214
|
+
f"last={fmt_age(now_t - last)}_ago"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@mcp.tool()
|
|
219
|
+
def weak_spots(top_n: int = 5) -> str:
|
|
220
|
+
"""List categories ranked by recent failure rate (min 3 attempts in 30d),
|
|
221
|
+
plus registered probe categories with no attempts yet (= unknown,
|
|
222
|
+
equally important to test)."""
|
|
223
|
+
conn = get_db()
|
|
224
|
+
now_t = int(time.time())
|
|
225
|
+
weak = conn.execute(
|
|
226
|
+
"SELECT category, fail_rate_7d, fail_rate_30d, attempts, last_at "
|
|
227
|
+
"FROM reliability WHERE fail_rate_30d IS NOT NULL AND attempts >= 3 "
|
|
228
|
+
"ORDER BY COALESCE(fail_rate_7d, fail_rate_30d) DESC LIMIT ?",
|
|
229
|
+
(max(1, top_n),),
|
|
230
|
+
).fetchall()
|
|
231
|
+
unknown = conn.execute(
|
|
232
|
+
"SELECT DISTINCT p.category FROM probes p "
|
|
233
|
+
"LEFT JOIN reliability r ON r.category = p.category "
|
|
234
|
+
"WHERE p.enabled = 1 AND (r.category IS NULL OR r.attempts = 0)"
|
|
235
|
+
).fetchall()
|
|
236
|
+
out = [f"weak n={len(weak)}"]
|
|
237
|
+
for r in weak:
|
|
238
|
+
age = fmt_age(now_t - r["last_at"]) if r["last_at"] else "?"
|
|
239
|
+
f7 = f"{r['fail_rate_7d']:.2f}" if r["fail_rate_7d"] is not None else "?"
|
|
240
|
+
f30 = f"{r['fail_rate_30d']:.2f}" if r["fail_rate_30d"] is not None else "?"
|
|
241
|
+
out.append(
|
|
242
|
+
f" {r['category']} fail7d={f7} fail30d={f30} "
|
|
243
|
+
f"n={r['attempts']} last={age}_ago"
|
|
244
|
+
)
|
|
245
|
+
if unknown:
|
|
246
|
+
out.append(f"unknown n={len(unknown)}")
|
|
247
|
+
for r in unknown:
|
|
248
|
+
out.append(f" {r['category']} (never_tested)")
|
|
249
|
+
if not weak and not unknown:
|
|
250
|
+
return "no_data (register probes via register_probe)"
|
|
251
|
+
return "\n".join(out)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""MCP tools for inspecting and pruning orphaned thread-keeper processes.
|
|
2
|
+
|
|
3
|
+
Each Claude client spawns its own thread-keeper subprocess. Crashed clients
|
|
4
|
+
leave orphan processes that hold RAM (especially with sentence-transformers
|
|
5
|
+
loaded). These tools surface the situation and let you clean up.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .._mcp import mcp
|
|
9
|
+
from .. import process_health
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@mcp.tool()
|
|
13
|
+
def mp_health() -> str:
|
|
14
|
+
"""Diagnostic snapshot of every running thread-keeper server process
|
|
15
|
+
on this machine. Shows pid, parent status, RSS, heartbeat age, and
|
|
16
|
+
whether each is classified as orphaned (parent gone + no fresh
|
|
17
|
+
heartbeat from its session).
|
|
18
|
+
|
|
19
|
+
Self (the process answering this call) is always marked is_self=true
|
|
20
|
+
and never flagged as orphan."""
|
|
21
|
+
procs = process_health.scan()
|
|
22
|
+
if not procs:
|
|
23
|
+
return "no_mp_processes_running"
|
|
24
|
+
|
|
25
|
+
total_kb = sum(p["rss_kb"] for p in procs)
|
|
26
|
+
orphans = [p for p in procs if p.get("is_orphaned")]
|
|
27
|
+
live = [p for p in procs if not p.get("is_orphaned")]
|
|
28
|
+
out = [
|
|
29
|
+
f"total={len(procs)} live={len(live)} orphans={len(orphans)} "
|
|
30
|
+
f"rss_total={total_kb // 1024}MB"
|
|
31
|
+
]
|
|
32
|
+
for p in procs:
|
|
33
|
+
flag = "self" if p["is_self"] else ("ORPHAN" if p["is_orphaned"] else "live")
|
|
34
|
+
hb = p["heartbeat_age_s"]
|
|
35
|
+
hb_disp = f"{hb}s" if hb is not None else "?"
|
|
36
|
+
parent = "alive" if p["parent_alive"] else "dead"
|
|
37
|
+
rss_mb = p["rss_kb"] // 1024
|
|
38
|
+
out.append(
|
|
39
|
+
f" pid={p['pid']:<6} ppid={p['ppid']:<6} ({parent}) "
|
|
40
|
+
f"rss={rss_mb}MB hb={hb_disp} etime={p['etime']} "
|
|
41
|
+
f"[{flag}] {p.get('orphan_reason','-')}"
|
|
42
|
+
)
|
|
43
|
+
if orphans:
|
|
44
|
+
out.append(
|
|
45
|
+
f"\nCleanup plan: mp_cleanup(dry_run=False) would SIGTERM "
|
|
46
|
+
f"{len(orphans)} orphan(s); add force=True for SIGKILL."
|
|
47
|
+
)
|
|
48
|
+
return "\n".join(out)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@mcp.tool()
|
|
52
|
+
def mp_cleanup(dry_run: bool = True, force: bool = False) -> str:
|
|
53
|
+
"""Kill orphaned thread-keeper processes (parent gone AND heartbeat
|
|
54
|
+
stale for > 5 minutes). Defaults to dry-run — pass dry_run=False to
|
|
55
|
+
actually send signals. force=True uses SIGKILL instead of SIGTERM.
|
|
56
|
+
|
|
57
|
+
Never touches the current process or processes whose parent is still
|
|
58
|
+
alive. Safe to run repeatedly."""
|
|
59
|
+
result = process_health.cleanup(dry_run=dry_run, force=force)
|
|
60
|
+
procs = result["all_procs"]
|
|
61
|
+
orphans = result["orphans"]
|
|
62
|
+
if not orphans:
|
|
63
|
+
return (
|
|
64
|
+
f"nothing_to_do: {len(procs)} mp process(es) running, "
|
|
65
|
+
"all healthy"
|
|
66
|
+
)
|
|
67
|
+
if dry_run:
|
|
68
|
+
lines = [f"plan dry_run=True orphans={len(orphans)}"]
|
|
69
|
+
for p in orphans:
|
|
70
|
+
rss_mb = p["rss_kb"] // 1024
|
|
71
|
+
lines.append(
|
|
72
|
+
f" would SIGTERM pid={p['pid']} rss={rss_mb}MB "
|
|
73
|
+
f"reason={p['orphan_reason']}"
|
|
74
|
+
)
|
|
75
|
+
free_mb = sum(p["rss_kb"] for p in orphans) // 1024
|
|
76
|
+
lines.append(
|
|
77
|
+
f"\napprox {free_mb}MB to be freed; call "
|
|
78
|
+
"mp_cleanup(dry_run=False) to apply."
|
|
79
|
+
)
|
|
80
|
+
return "\n".join(lines)
|
|
81
|
+
# Apply
|
|
82
|
+
lines = [
|
|
83
|
+
f"applied {'SIGKILL' if force else 'SIGTERM'}: "
|
|
84
|
+
f"killed={len(result['killed'])} failed={len(result['failed'])}"
|
|
85
|
+
]
|
|
86
|
+
for pid in result["killed"]:
|
|
87
|
+
lines.append(f" ok pid={pid}")
|
|
88
|
+
for f in result["failed"]:
|
|
89
|
+
lines.append(f" ERR pid={f['pid']} {f['err']}")
|
|
90
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Session lifecycle close-out: mark the active session ended and optionally
|
|
2
|
+
record a terse summary note for future briefs."""
|
|
3
|
+
|
|
4
|
+
import sqlite3
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from .._mcp import mcp
|
|
8
|
+
from ..db import get_db
|
|
9
|
+
from ..helpers import fmt_age
|
|
10
|
+
from ..embeddings import _embed
|
|
11
|
+
from .. import identity
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@mcp.tool()
|
|
15
|
+
def session_end(summary: str = "") -> str:
|
|
16
|
+
"""Mark current session ended with optional terse summary."""
|
|
17
|
+
if identity._session_id is None:
|
|
18
|
+
return "no_active_session"
|
|
19
|
+
conn = get_db()
|
|
20
|
+
now = int(time.time())
|
|
21
|
+
sid = identity._session_id
|
|
22
|
+
started = identity._session_start or now
|
|
23
|
+
conn.execute("UPDATE sessions SET ended_at=? WHERE id=?", (now, sid))
|
|
24
|
+
if summary:
|
|
25
|
+
emb = _embed(summary)
|
|
26
|
+
conn.execute(
|
|
27
|
+
"INSERT INTO notes (thread_id, content, kind, created_at, session_id, embedding) "
|
|
28
|
+
"VALUES (NULL,?,?,?,?,?)",
|
|
29
|
+
(summary, "session_summary", now, sid, emb),
|
|
30
|
+
)
|
|
31
|
+
conn.commit()
|
|
32
|
+
identity._session_id = None
|
|
33
|
+
identity._session_start = None
|
|
34
|
+
return f"closed sess={sid} dur={fmt_age(now - started)}"
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""MCP tools for the shadow-review machinery.
|
|
2
|
+
|
|
3
|
+
shadow_review_run(force=False, dry_run=False)
|
|
4
|
+
Trigger one shadow pass NOW. `force=True` overrides the
|
|
5
|
+
SHADOW_REVIEW_INTERVAL_S=0 disable. `dry_run=True` returns the prompt
|
|
6
|
+
that WOULD be spawned (no actual spawn) — useful for inspecting
|
|
7
|
+
candidate windows or building tests.
|
|
8
|
+
|
|
9
|
+
shadow_review_status()
|
|
10
|
+
Diagnostic snapshot: env config, cursor position, last 5 passes.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
from .._mcp import mcp
|
|
19
|
+
from ..db import get_db
|
|
20
|
+
from ..identity import _ensure_session
|
|
21
|
+
from ..shadow_review import (
|
|
22
|
+
SHADOW_REVIEW_PROMPT,
|
|
23
|
+
_collect_window,
|
|
24
|
+
_last_shadow_ts,
|
|
25
|
+
_record_shadow_pass,
|
|
26
|
+
run_shadow_pass,
|
|
27
|
+
)
|
|
28
|
+
from ..config import (
|
|
29
|
+
SHADOW_REVIEW_INTERVAL_S,
|
|
30
|
+
SHADOW_REVIEW_MIN_CHARS,
|
|
31
|
+
SHADOW_REVIEW_WINDOW_S,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@mcp.tool()
|
|
36
|
+
def shadow_review_run(force: bool = False, dry_run: bool = False) -> str:
|
|
37
|
+
"""Fire one shadow-review pass.
|
|
38
|
+
|
|
39
|
+
`force=True` runs even when the daemon is disabled (interval=0). Used
|
|
40
|
+
by tests and one-shot triage.
|
|
41
|
+
|
|
42
|
+
`dry_run=True` short-circuits before the spawn — returns the dialog
|
|
43
|
+
dump that WOULD be evaluated, plus n_chars and high-water cursor. No
|
|
44
|
+
spawn. No cursor advance. Use this to inspect candidate windows
|
|
45
|
+
before paying for an evaluator child.
|
|
46
|
+
"""
|
|
47
|
+
conn = get_db()
|
|
48
|
+
_ensure_session(conn)
|
|
49
|
+
if dry_run:
|
|
50
|
+
floor = _last_shadow_ts(conn)
|
|
51
|
+
dump, high_water, n_chars = _collect_window(
|
|
52
|
+
conn, floor, SHADOW_REVIEW_WINDOW_S,
|
|
53
|
+
)
|
|
54
|
+
if n_chars == 0:
|
|
55
|
+
return "dry_run: no_window (nothing new since last cursor)"
|
|
56
|
+
head = dump[:2000]
|
|
57
|
+
suffix = "…(truncated for display)" if len(dump) > 2000 else ""
|
|
58
|
+
return (
|
|
59
|
+
f"dry_run: n_chars={n_chars} high_water_ts={high_water} "
|
|
60
|
+
f"min_chars={SHADOW_REVIEW_MIN_CHARS} "
|
|
61
|
+
f"would_spawn={'yes' if n_chars >= SHADOW_REVIEW_MIN_CHARS else 'no'}\n\n"
|
|
62
|
+
f"--- prompt preview ---\n"
|
|
63
|
+
f"{SHADOW_REVIEW_PROMPT[:400]}…\n\n"
|
|
64
|
+
f"--- dialog window head ---\n{head}{suffix}"
|
|
65
|
+
)
|
|
66
|
+
return run_shadow_pass(force=force)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@mcp.tool()
|
|
70
|
+
def shadow_review_status() -> str:
|
|
71
|
+
"""Show shadow-review configuration + last 5 passes.
|
|
72
|
+
|
|
73
|
+
Snapshot for sanity-checking that the daemon is alive and
|
|
74
|
+
advancing its cursor. Counts how many spawned passes vs how many
|
|
75
|
+
were skipped for being too short or empty."""
|
|
76
|
+
conn = get_db()
|
|
77
|
+
_ensure_session(conn)
|
|
78
|
+
floor = _last_shadow_ts(conn)
|
|
79
|
+
now = int(time.time())
|
|
80
|
+
age_s = (now - floor) if floor else None
|
|
81
|
+
lines = [
|
|
82
|
+
f"interval_s={SHADOW_REVIEW_INTERVAL_S:.0f} "
|
|
83
|
+
f"window_s={SHADOW_REVIEW_WINDOW_S} "
|
|
84
|
+
f"min_chars={SHADOW_REVIEW_MIN_CHARS}",
|
|
85
|
+
f"cursor_ts={floor} (age={age_s}s)" if floor
|
|
86
|
+
else "cursor_ts=0 (no prior pass)",
|
|
87
|
+
"",
|
|
88
|
+
"recent passes (newest first):",
|
|
89
|
+
]
|
|
90
|
+
try:
|
|
91
|
+
rows = conn.execute(
|
|
92
|
+
"SELECT created_at, summary FROM events "
|
|
93
|
+
"WHERE kind='shadow_review_pass' "
|
|
94
|
+
"ORDER BY id DESC LIMIT 5"
|
|
95
|
+
).fetchall()
|
|
96
|
+
except Exception:
|
|
97
|
+
rows = []
|
|
98
|
+
if not rows:
|
|
99
|
+
lines.append(" (none)")
|
|
100
|
+
else:
|
|
101
|
+
for r in rows:
|
|
102
|
+
ts = r["created_at"]
|
|
103
|
+
age = now - int(ts) if ts else 0
|
|
104
|
+
snip = (r["summary"] or "")[:120]
|
|
105
|
+
lines.append(f" {age}s_ago {snip}")
|
|
106
|
+
return "\n".join(lines)
|