threadkeeper 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. threadkeeper/__init__.py +8 -0
  2. threadkeeper/_mcp.py +6 -0
  3. threadkeeper/_setup.py +299 -0
  4. threadkeeper/adapters/__init__.py +40 -0
  5. threadkeeper/adapters/_hook_helpers.py +72 -0
  6. threadkeeper/adapters/base.py +152 -0
  7. threadkeeper/adapters/claude_code.py +178 -0
  8. threadkeeper/adapters/claude_desktop.py +128 -0
  9. threadkeeper/adapters/codex.py +259 -0
  10. threadkeeper/adapters/copilot.py +195 -0
  11. threadkeeper/adapters/gemini.py +169 -0
  12. threadkeeper/adapters/vscode.py +144 -0
  13. threadkeeper/brief.py +735 -0
  14. threadkeeper/config.py +216 -0
  15. threadkeeper/curator.py +390 -0
  16. threadkeeper/db.py +474 -0
  17. threadkeeper/embeddings.py +232 -0
  18. threadkeeper/extract_daemon.py +125 -0
  19. threadkeeper/helpers.py +101 -0
  20. threadkeeper/i18n.py +342 -0
  21. threadkeeper/identity.py +237 -0
  22. threadkeeper/ingest.py +507 -0
  23. threadkeeper/lessons.py +170 -0
  24. threadkeeper/nudges.py +257 -0
  25. threadkeeper/process_health.py +202 -0
  26. threadkeeper/review_prompts.py +207 -0
  27. threadkeeper/search_proxy.py +160 -0
  28. threadkeeper/server.py +55 -0
  29. threadkeeper/shadow_review.py +358 -0
  30. threadkeeper/skill_watcher.py +96 -0
  31. threadkeeper/spawn_budget.py +246 -0
  32. threadkeeper/tools/__init__.py +2 -0
  33. threadkeeper/tools/concepts.py +111 -0
  34. threadkeeper/tools/consolidate.py +222 -0
  35. threadkeeper/tools/core_memory.py +109 -0
  36. threadkeeper/tools/correlation.py +116 -0
  37. threadkeeper/tools/curator.py +121 -0
  38. threadkeeper/tools/dialectic.py +359 -0
  39. threadkeeper/tools/dialog.py +131 -0
  40. threadkeeper/tools/distill.py +184 -0
  41. threadkeeper/tools/extract.py +411 -0
  42. threadkeeper/tools/graph.py +183 -0
  43. threadkeeper/tools/invariants.py +177 -0
  44. threadkeeper/tools/lessons.py +110 -0
  45. threadkeeper/tools/missed_spawns.py +142 -0
  46. threadkeeper/tools/peers.py +579 -0
  47. threadkeeper/tools/pickup.py +148 -0
  48. threadkeeper/tools/probes.py +251 -0
  49. threadkeeper/tools/process_health.py +90 -0
  50. threadkeeper/tools/session.py +34 -0
  51. threadkeeper/tools/shadow_review.py +106 -0
  52. threadkeeper/tools/skills.py +856 -0
  53. threadkeeper/tools/spawn.py +871 -0
  54. threadkeeper/tools/style.py +44 -0
  55. threadkeeper/tools/threads.py +299 -0
  56. threadkeeper-0.4.0.dist-info/METADATA +351 -0
  57. threadkeeper-0.4.0.dist-info/RECORD +61 -0
  58. threadkeeper-0.4.0.dist-info/WHEEL +5 -0
  59. threadkeeper-0.4.0.dist-info/entry_points.txt +2 -0
  60. threadkeeper-0.4.0.dist-info/licenses/LICENSE +21 -0
  61. threadkeeper-0.4.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,177 @@
1
+ """Invariance detection MCP tool.
2
+
3
+ Extracted from server.py. Finds recurring assistant-side response patterns
4
+ that survive prompt variance — clusters of responses with high mutual
5
+ similarity whose preceding user prompts are diverse. High-scoring clusters
6
+ are candidates for "things I always say" regardless of what was asked.
7
+
8
+ Requires semantic embeddings (sentence-transformers) — without them the
9
+ tool returns ERR.
10
+ """
11
+
12
+ import sqlite3
13
+ import time
14
+ from typing import Optional
15
+
16
+ from .._mcp import mcp
17
+ from ..db import get_db
18
+ from ..config import SEMANTIC_AVAILABLE
19
+ from ..helpers import fmt_age, q
20
+ from ..identity import _ensure_session
21
+
22
+
23
+ @mcp.tool()
24
+ def find_invariants(window_days: int = 14,
25
+ min_cluster_size: int = 3,
26
+ response_cohesion: float = 0.85,
27
+ top_n: int = 10,
28
+ max_messages: int = 10000) -> str:
29
+ """Find recurring assistant-side patterns that survive prompt variance.
30
+
31
+ Algorithm:
32
+ 1. Pull recent assistant messages from dialog_messages (with embeddings).
33
+ 2. Greedy cluster by response embedding cosine ≥ response_cohesion.
34
+ 3. For each cluster (size ≥ min_cluster_size), find each response's
35
+ immediately-preceding user prompt in the same conversation.
36
+ 4. Score = avg_response_similarity × (1 - avg_prompt_similarity).
37
+ High = my response stays the same shape while prompts vary widely.
38
+
39
+ Returns top_n clusters with sample response, scores, and counts.
40
+ Requires semantic embeddings (sentence-transformers) — without them
41
+ returns ERR.
42
+ """
43
+ if not SEMANTIC_AVAILABLE:
44
+ return "ERR semantic_off (need sentence-transformers + embeddings)"
45
+ try:
46
+ import numpy as _np # type: ignore
47
+ except ImportError:
48
+ return "ERR numpy_unavailable"
49
+
50
+ conn = get_db()
51
+ cutoff = int(time.time()) - max(1, int(window_days)) * 86400
52
+ # Aggressive filter: subagent jsonls (project='subagents') are mostly
53
+ # boilerplate role-intros and pollute clusters. Skip those + common
54
+ # subagent-shape kickoff phrases. We want main-conversation responses.
55
+ rows = conn.execute(
56
+ "SELECT uuid, session_id, content, created_at, embedding "
57
+ "FROM dialog_messages "
58
+ "WHERE role='assistant' AND embedding IS NOT NULL "
59
+ "AND created_at >= ? "
60
+ "AND project != 'subagents' "
61
+ "AND content NOT LIKE '[thinking]%' "
62
+ "AND content NOT LIKE 'I''m Claude Code%' "
63
+ "AND content NOT LIKE 'Hello! I''m Claude Code%' "
64
+ "AND content NOT LIKE 'I''ll help you%' "
65
+ "AND content NOT LIKE 'I understand you want me to%' "
66
+ "AND content NOT LIKE '<summary>%' "
67
+ "AND length(content) >= 120 "
68
+ "ORDER BY created_at DESC LIMIT ?",
69
+ (cutoff, max(100, int(max_messages))),
70
+ ).fetchall()
71
+ if len(rows) < min_cluster_size:
72
+ return f"insufficient_data n={len(rows)} need>={min_cluster_size}"
73
+
74
+ embs = _np.stack([
75
+ _np.frombuffer(r["embedding"], dtype="float32") for r in rows
76
+ ]) # (N, D)
77
+ N = embs.shape[0]
78
+ sim = embs @ embs.T # (N, N), embeddings already normalized
79
+
80
+ # Greedy single-link clustering from each unassigned seed.
81
+ assigned = [False] * N
82
+ clusters: list[list[int]] = []
83
+ threshold = float(response_cohesion)
84
+ for i in range(N):
85
+ if assigned[i]:
86
+ continue
87
+ cluster = [i]
88
+ assigned[i] = True
89
+ # vectorized scan of remaining
90
+ for j in range(i + 1, N):
91
+ if assigned[j]:
92
+ continue
93
+ if sim[i, j] >= threshold:
94
+ cluster.append(j)
95
+ assigned[j] = True
96
+ if len(cluster) >= min_cluster_size:
97
+ clusters.append(cluster)
98
+
99
+ if not clusters:
100
+ return (
101
+ f"no_clusters (n={N}, threshold={threshold}, "
102
+ f"min={min_cluster_size}) — try lower threshold"
103
+ )
104
+
105
+ invariants = []
106
+ for cl in clusters:
107
+ cl_arr = _np.array(cl)
108
+ sub_sim = sim[_np.ix_(cl_arr, cl_arr)]
109
+ n = len(cl)
110
+ # mean of off-diagonal
111
+ if n > 1:
112
+ cohesion = (sub_sim.sum() - n) / (n * (n - 1))
113
+ else:
114
+ cohesion = 1.0
115
+
116
+ # gather preceding user prompts (one per cluster member, same session)
117
+ prompt_embs = []
118
+ for idx in cl:
119
+ r = rows[idx]
120
+ ts = r["created_at"]
121
+ sid = r["session_id"]
122
+ if not sid:
123
+ continue
124
+ ur = conn.execute(
125
+ "SELECT embedding FROM dialog_messages "
126
+ "WHERE session_id=? AND role='user' AND created_at < ? "
127
+ "AND embedding IS NOT NULL "
128
+ "AND content NOT LIKE '[tool_result]%' "
129
+ "AND content NOT LIKE '[Image%' "
130
+ "ORDER BY created_at DESC LIMIT 1",
131
+ (sid, ts),
132
+ ).fetchone()
133
+ if ur and ur["embedding"]:
134
+ prompt_embs.append(
135
+ _np.frombuffer(ur["embedding"], dtype="float32")
136
+ )
137
+ if len(prompt_embs) < min_cluster_size:
138
+ continue
139
+ pe = _np.stack(prompt_embs)
140
+ psim = pe @ pe.T
141
+ pn = len(prompt_embs)
142
+ if pn > 1:
143
+ avg_psim = (psim.sum() - pn) / (pn * (pn - 1))
144
+ else:
145
+ avg_psim = 1.0
146
+ diversity = 1.0 - float(avg_psim)
147
+ score = float(cohesion) * diversity
148
+
149
+ # representative: longest message in cluster
150
+ rep_idx = max(cl, key=lambda i: len(rows[i]["content"]))
151
+ rep = rows[rep_idx]["content"][:240].replace("\n", " ")
152
+ if len(rows[rep_idx]["content"]) > 240:
153
+ rep += "…"
154
+ invariants.append({
155
+ "size": n,
156
+ "cohesion": float(cohesion),
157
+ "diversity": diversity,
158
+ "score": score,
159
+ "sample": rep,
160
+ })
161
+
162
+ invariants.sort(key=lambda x: x["score"], reverse=True)
163
+ invariants = invariants[: max(1, int(top_n))]
164
+ if not invariants:
165
+ return f"no_invariants (clusters had insufficient prompt variety)"
166
+
167
+ out = [
168
+ f"invariants n={len(invariants)} window={window_days}d "
169
+ f"threshold={threshold} pool={N}"
170
+ ]
171
+ for inv in invariants:
172
+ out.append(
173
+ f" size={inv['size']} cohesion={inv['cohesion']:.2f} "
174
+ f"diversity={inv['diversity']:.2f} score={inv['score']:.2f}"
175
+ )
176
+ out.append(f" sample: {inv['sample']}")
177
+ return "\n".join(out)
@@ -0,0 +1,110 @@
1
+ """MCP tools that expose the CLI-agnostic lessons store.
2
+
3
+ lesson_append(title, body, summary, source)
4
+ Materialize a class-level lesson into ~/.threadkeeper/lessons.md.
5
+ Idempotent on slug — re-calling with the same title overwrites the
6
+ existing section.
7
+
8
+ lesson_list(k=20)
9
+ Compact listing for inspection / diagnostics.
10
+
11
+ lesson_get(slug)
12
+ Return the full body of a single lesson by slug.
13
+
14
+ The learning loop (review_thread + shadow_review) writes here instead
15
+ of (or in addition to) ~/.claude/skills/*/SKILL.md so non-Claude CLIs
16
+ share the procedural-knowledge surface. Each CLI's per-user
17
+ instructions file references this path via the managed thread-keeper
18
+ block written by `_setup.py`.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ from datetime import datetime
23
+ from typing import Optional
24
+
25
+ from .._mcp import mcp
26
+ from ..identity import _ensure_session
27
+ from ..db import get_db
28
+ from ..lessons import (
29
+ append_lesson,
30
+ iter_lessons,
31
+ count_lessons,
32
+ get_path,
33
+ )
34
+
35
+
36
+ @mcp.tool()
37
+ def lesson_append(
38
+ title: str,
39
+ body: str,
40
+ summary: str = "",
41
+ source: str = "",
42
+ ) -> str:
43
+ """Materialize a class-level lesson into ~/.threadkeeper/lessons.md.
44
+
45
+ `title` is sluggified to a stable key — repeated calls with the same
46
+ title overwrite the existing section (idempotent).
47
+
48
+ `body` is markdown; goes verbatim into the section body.
49
+
50
+ `summary` is an optional one-liner rendered as a blockquote right
51
+ after the header. Use when the body is long and a TL;DR helps the
52
+ next agent decide whether to read further.
53
+
54
+ `source` is a provenance tag — typically a thread id (\"Tabc123\")
55
+ when written by review_thread, or \"shadow\" when written by the
56
+ shadow_review observer. Empty is fine.
57
+ """
58
+ conn = get_db()
59
+ _ensure_session(conn)
60
+ if not title.strip():
61
+ return "ERR empty_title"
62
+ if not body.strip():
63
+ return "ERR empty_body"
64
+ slug = append_lesson(
65
+ title=title, body=body, summary=summary, source=source,
66
+ )
67
+ return f"ok slug={slug} path={get_path()}"
68
+
69
+
70
+ @mcp.tool()
71
+ def lesson_list(k: int = 20) -> str:
72
+ """Compact listing of materialized lessons, newest first.
73
+
74
+ Format per line: `<age> <slug> source=<src> <first 60 chars of body>`
75
+ """
76
+ conn = get_db()
77
+ _ensure_session(conn)
78
+ items = list(iter_lessons())
79
+ if not items:
80
+ return "no_lessons"
81
+ items.sort(key=lambda x: x["ts"], reverse=True)
82
+ now = int(datetime.now().timestamp())
83
+ out: list[str] = [f"lessons total={len(items)} path={get_path()}"]
84
+ for it in items[:max(1, k)]:
85
+ age_s = max(0, now - it["ts"])
86
+ age = (
87
+ f"{age_s}s"
88
+ if age_s < 60
89
+ else f"{age_s // 60}m"
90
+ if age_s < 3600
91
+ else f"{age_s // 3600}h"
92
+ if age_s < 86400
93
+ else f"{age_s // 86400}d"
94
+ )
95
+ snippet = " ".join(it["body"].split())[:60]
96
+ src = it["source"] or "?"
97
+ out.append(f" {age:>5s} {it['slug']:30s} src={src:8s} {snippet}")
98
+ return "\n".join(out)
99
+
100
+
101
+ @mcp.tool()
102
+ def lesson_get(slug: str) -> str:
103
+ """Return the full body of one lesson by slug. Useful when
104
+ `lesson_list` surfaced something you want to read in full."""
105
+ conn = get_db()
106
+ _ensure_session(conn)
107
+ for it in iter_lessons():
108
+ if it["slug"] == slug:
109
+ return it["body"]
110
+ return f"ERR not_found slug={slug}"
@@ -0,0 +1,142 @@
1
+ """Missed-spawn detection MCP tool.
2
+
3
+ Scans recent assistant messages for response shapes that signal
4
+ decomposable work (multiple top-level numbered items or multiple
5
+ markdown section headers) and checks whether the conversation
6
+ actually called spawn() around the same time. Responses with
7
+ decomposable shape but no nearby spawn() are flagged as
8
+ `missed_spawn` candidates — places where the agent answered
9
+ linearly when it could have parallelized.
10
+
11
+ This is a behavioral mirror: it doesn't change anything, it tells
12
+ you how often spawn() reflex actually fires.
13
+ """
14
+
15
+ import re
16
+ import sqlite3
17
+ import time
18
+ from datetime import datetime, timezone
19
+
20
+ from .._mcp import mcp
21
+ from ..db import get_db
22
+ from ..helpers import fmt_age, q
23
+ from ..identity import _ensure_session
24
+
25
+
26
+ # Top-level numbered enumeration in markdown. Allows up to 3 leading
27
+ # spaces, optional ** wrap. Each match = one numbered item.
28
+ _NUMBERED_RE = re.compile(r"(?m)^[ \t]{0,3}(?:\*\*)?\d+[\.\)][ \t]+")
29
+
30
+ # H2 / H3 markdown headers. We don't count H1 (rare in chat replies).
31
+ _HEADER_RE = re.compile(r"(?m)^#{2,3}\s+\S")
32
+
33
+ # Time window (seconds) around an assistant message in which a tasks row
34
+ # counts as "the spawn for this response". 10 min is generous.
35
+ _SPAWN_PROXIMITY_S = 600
36
+
37
+
38
+ @mcp.tool()
39
+ def find_missed_spawns(window_days: int = 14,
40
+ min_response_len: int = 400,
41
+ min_numbered: int = 2,
42
+ min_headers: int = 3,
43
+ top_n: int = 10,
44
+ max_messages: int = 5000) -> str:
45
+ """Find assistant responses that decomposed into independent blocks
46
+ but were answered linearly (no spawn() call nearby).
47
+
48
+ Algorithm:
49
+ 1. Pull recent assistant messages (last `window_days` days,
50
+ length ≥ `min_response_len`, excluding subagent jsonls).
51
+ 2. For each, count top-level numbered items and H2/H3 headers.
52
+ 3. Mark as `decomposable` if numbered ≥ `min_numbered` OR
53
+ headers ≥ `min_headers`.
54
+ 4. For each decomposable response, check whether any tasks row
55
+ with parent_cid = response's session_id has started_at within
56
+ ±10 min of the response. If none → missed_spawn.
57
+ 5. Return top `top_n` by score (numbered + headers).
58
+
59
+ Use this to calibrate the spawn_hint: a high missed-spawn count
60
+ means the hint isn't strong enough, or thresholds need tuning.
61
+ """
62
+ conn = get_db()
63
+ _ensure_session(conn)
64
+ now = int(time.time())
65
+ cutoff = now - max(1, int(window_days)) * 86400
66
+
67
+ rows = conn.execute(
68
+ "SELECT uuid, session_id, content, created_at "
69
+ "FROM dialog_messages "
70
+ "WHERE role='assistant' AND created_at >= ? "
71
+ "AND project != 'subagents' "
72
+ "AND length(content) >= ? "
73
+ "AND content NOT LIKE '[thinking]%' "
74
+ "AND content NOT LIKE '[tool_result]%' "
75
+ "AND content NOT LIKE '<summary>%' "
76
+ "ORDER BY created_at DESC LIMIT ?",
77
+ (cutoff, max(100, int(min_response_len)), max(100, int(max_messages))),
78
+ ).fetchall()
79
+
80
+ if not rows:
81
+ return f"insufficient_data scanned=0 window_days={window_days}"
82
+
83
+ candidates = []
84
+ for r in rows:
85
+ content = r["content"] or ""
86
+ n_num = len(_NUMBERED_RE.findall(content))
87
+ n_hdr = len(_HEADER_RE.findall(content))
88
+ if n_num < min_numbered and n_hdr < min_headers:
89
+ continue
90
+ candidates.append({
91
+ "uuid": r["uuid"],
92
+ "session_id": r["session_id"],
93
+ "content": content,
94
+ "created_at": r["created_at"],
95
+ "numbered": n_num,
96
+ "headers": n_hdr,
97
+ })
98
+
99
+ if not candidates:
100
+ return (f"scanned={len(rows)} decomposable=0 — no responses "
101
+ "matched decomp shape thresholds")
102
+
103
+ # For each candidate, look for a tasks row close in time.
104
+ missed = []
105
+ for c in candidates:
106
+ spawned = conn.execute(
107
+ "SELECT COUNT(*) cnt FROM tasks "
108
+ "WHERE parent_cid = ? "
109
+ "AND started_at BETWEEN ? AND ?",
110
+ (c["session_id"],
111
+ c["created_at"] - _SPAWN_PROXIMITY_S,
112
+ c["created_at"] + _SPAWN_PROXIMITY_S),
113
+ ).fetchone()["cnt"]
114
+ if spawned == 0:
115
+ missed.append(c)
116
+
117
+ if not missed:
118
+ return (f"scanned={len(rows)} decomposable={len(candidates)} "
119
+ "missed=0 — every decomposable response had a nearby spawn()")
120
+
121
+ # Rank by score = numbered + headers (rough decomposition intensity).
122
+ missed.sort(key=lambda x: -(x["numbered"] + x["headers"]))
123
+ top = missed[: max(1, int(top_n))]
124
+
125
+ out = [
126
+ f"missed_spawn scanned={len(rows)} decomposable={len(candidates)} "
127
+ f"missed={len(missed)} window={window_days}d"
128
+ ]
129
+ for c in top:
130
+ iso = datetime.fromtimestamp(c["created_at"], tz=timezone.utc).strftime(
131
+ "%Y-%m-%dT%H:%MZ"
132
+ )
133
+ sid_short = (c["session_id"] or "?")[:8]
134
+ sample = c["content"][:120].replace("\n", " ")
135
+ if len(c["content"]) > 120:
136
+ sample += "…"
137
+ out.append(
138
+ f" {iso} sid={sid_short} nbr={c['numbered']} "
139
+ f"hdr={c['headers']} age={fmt_age(now - c['created_at'])}_ago "
140
+ f"{q(sample)}"
141
+ )
142
+ return "\n".join(out)